Skip to main content

pyo3/impl_/
unindent.rs

1#![warn(clippy::undocumented_unsafe_blocks)]
2
3use crate::impl_::concat::slice_copy_from_slice;
4
5/// This is a reimplementation of the `indoc` crate's unindent functionality:
6///
7/// 1. Count the leading spaces of each line, ignoring the first line and any lines that are empty or contain spaces only.
8/// 2. Take the minimum.
9/// 3. If the first line is empty i.e. the string begins with a newline, remove the first line.
10/// 4. Remove the computed number of spaces from the beginning of each line.
11const fn unindent_bytes(bytes: &mut [u8]) -> usize {
12    // (1) + (2) - count leading spaces, take the minimum
13    let Some(to_unindent) = get_minimum_leading_spaces(bytes) else {
14        // all lines were empty, nothing to unindent
15        return bytes.len();
16    };
17
18    // now copy from the original buffer, bringing values forward as needed
19    let mut read_idx = 0;
20    let mut write_idx = 0;
21
22    // (3) - remove first line if it is empty
23    match consume_eol(bytes, read_idx) {
24        // skip empty first line
25        Some(eol) => read_idx = eol,
26        // copy non-empty first line as-is
27        None => {
28            (read_idx, write_idx) = copy_forward_until_eol(bytes, read_idx, write_idx);
29        }
30    };
31
32    // (4) - unindent remaining lines
33    while read_idx < bytes.len() {
34        let leading_spaces = count_spaces(bytes, read_idx);
35
36        if leading_spaces < to_unindent {
37            read_idx += leading_spaces;
38            assert!(
39                consume_eol(bytes, read_idx).is_some(),
40                "removed fewer spaces than expected on non-empty line"
41            );
42        } else {
43            // leading_spaces may be equal to or larger than to_unindent, only need to unindent
44            // the required amount, additional indentation is meaningful
45            read_idx += to_unindent;
46        }
47
48        // copy remainder of line
49        (read_idx, write_idx) = copy_forward_until_eol(bytes, read_idx, write_idx);
50    }
51
52    write_idx
53}
54
55/// Counts the minimum leading spaces of all non-empty lines except the first line.
56///
57/// Returns `None` if there are no non-empty lines except the first line.
58const fn get_minimum_leading_spaces(bytes: &[u8]) -> Option<usize> {
59    // scan for leading spaces (ignoring first line and empty lines)
60    let mut i = 0;
61
62    // skip first line
63    i = advance_to_next_line(bytes, i);
64
65    let mut to_unindent = None;
66
67    // for remaining lines, count leading spaces
68    while i < bytes.len() {
69        let line_leading_spaces = count_spaces(bytes, i);
70        i += line_leading_spaces;
71
72        // line only had spaces, ignore for the count
73        if let Some(eol) = consume_eol(bytes, i) {
74            i = eol;
75            continue;
76        }
77
78        // this line has content, consider its leading spaces
79        if let Some(current) = to_unindent {
80            // .unwrap_or(usize::MAX) not available in const fn
81            if line_leading_spaces < current {
82                to_unindent = Some(line_leading_spaces);
83            }
84        } else {
85            to_unindent = Some(line_leading_spaces);
86        }
87
88        i = advance_to_next_line(bytes, i);
89    }
90
91    to_unindent
92}
93
94const fn advance_to_next_line(bytes: &[u8], mut i: usize) -> usize {
95    while i < bytes.len() {
96        if let Some(eol) = consume_eol(bytes, i) {
97            return eol;
98        }
99        i += 1;
100    }
101    i
102}
103
104/// Brings elements in `bytes` forward until `\n` (inclusive) or end of `source`.
105///
106/// `read_idx` must be greater than or equal to `write_idx`.
107const fn copy_forward_until_eol(
108    bytes: &mut [u8],
109    mut read_idx: usize,
110    mut write_idx: usize,
111) -> (usize, usize) {
112    assert!(read_idx >= write_idx);
113    while read_idx < bytes.len() {
114        let value = bytes[read_idx];
115        bytes[write_idx] = value;
116        read_idx += 1;
117        write_idx += 1;
118        if value == b'\n' {
119            break;
120        }
121    }
122    (read_idx, write_idx)
123}
124
125const fn count_spaces(bytes: &[u8], mut i: usize) -> usize {
126    let mut count = 0;
127    while i < bytes.len() && bytes[i] == b' ' {
128        count += 1;
129        i += 1;
130    }
131    count
132}
133
134const fn consume_eol(bytes: &[u8], i: usize) -> Option<usize> {
135    if bytes.len() == i {
136        // special case: treat end of buffer as EOL without consuming anything
137        Some(i)
138    } else if bytes.len() > i && bytes[i] == b'\n' {
139        Some(i + 1)
140    } else if bytes[i] == b'\r' && bytes.len() > i + 1 && bytes[i + 1] == b'\n' {
141        Some(i + 2)
142    } else {
143        None
144    }
145}
146
147pub const fn unindent_sized<const N: usize>(src: &[u8]) -> ([u8; N], usize) {
148    let mut out: [u8; N] = [0; N];
149    slice_copy_from_slice(&mut out, src);
150    let new_len = unindent_bytes(&mut out);
151    (out, new_len)
152}
153
154/// Helper for `py_run!` macro which unindents a string at compile time.
155#[macro_export]
156#[doc(hidden)]
157macro_rules! unindent {
158    ($value:expr) => {{
159        const RAW: &str = $value;
160        const LEN: usize = RAW.len();
161        const UNINDENTED: ([u8; LEN], usize) =
162            $crate::impl_::unindent::unindent_sized::<LEN>(RAW.as_bytes());
163        // SAFETY: this removes only spaces and preserves all other contents
164        unsafe { ::core::str::from_utf8_unchecked(UNINDENTED.0.split_at(UNINDENTED.1).0) }
165    }};
166}
167
168pub use crate::unindent;
169
170/// Equivalent of the `unindent!` macro, but works at runtime.
171pub fn unindent(s: &str) -> String {
172    let mut bytes = s.as_bytes().to_owned();
173    let unindented_size = unindent_bytes(&mut bytes);
174    bytes.resize(unindented_size, 0);
175    String::from_utf8(bytes).unwrap()
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    const SAMPLE_1_WITH_FIRST_LINE: &str = "  first line
183        line one
184
185          line two
186    ";
187
188    const UNINDENTED_1: &str = "  first line\nline one\n\n  line two\n";
189
190    const SAMPLE_2_EMPTY_FIRST_LINE: &str = "
191            line one
192
193              line two
194        ";
195    const UNINDENTED_2: &str = "line one\n\n  line two\n";
196
197    const SAMPLE_3_NO_INDENT: &str = "
198no indent
199  here";
200
201    const UNINDENTED_3: &str = "no indent\n  here";
202
203    const SAMPLE_4_NOOP: &str = "no indent\nhere\n  but here";
204
205    const SAMPLE_5_EMPTY: &str = "   \n   \n";
206
207    const ALL_CASES: &[(&str, &str)] = &[
208        (SAMPLE_1_WITH_FIRST_LINE, UNINDENTED_1),
209        (SAMPLE_2_EMPTY_FIRST_LINE, UNINDENTED_2),
210        (SAMPLE_3_NO_INDENT, UNINDENTED_3),
211        (SAMPLE_4_NOOP, SAMPLE_4_NOOP),
212        (SAMPLE_5_EMPTY, SAMPLE_5_EMPTY),
213    ];
214
215    // run const tests for each sample to ensure they work at compile time
216
217    #[test]
218    fn test_unindent_const() {
219        const UNINDENTED: &str = unindent!(SAMPLE_1_WITH_FIRST_LINE);
220        assert_eq!(UNINDENTED, UNINDENTED_1);
221    }
222
223    #[test]
224    fn test_unindent_const_removes_empty_first_line() {
225        const UNINDENTED: &str = unindent!(SAMPLE_2_EMPTY_FIRST_LINE);
226        assert_eq!(UNINDENTED, UNINDENTED_2);
227    }
228
229    #[test]
230    fn test_unindent_const_no_indent() {
231        const UNINDENTED: &str = unindent!(SAMPLE_3_NO_INDENT);
232        assert_eq!(UNINDENTED, UNINDENTED_3);
233    }
234
235    #[test]
236    fn test_unindent_macro_runtime() {
237        // this variation on the test ensures full coverage (const eval not included in coverage)
238        const INDENTED: &str = SAMPLE_1_WITH_FIRST_LINE;
239        const LEN: usize = INDENTED.len();
240        let (unindented, unindented_size) = unindent_sized::<LEN>(INDENTED.as_bytes());
241        let unindented = std::str::from_utf8(&unindented[..unindented_size]).unwrap();
242        assert_eq!(unindented, UNINDENTED_1);
243    }
244
245    #[test]
246    fn test_unindent_function() {
247        for (indented, expected) in ALL_CASES {
248            let unindented = unindent(indented);
249            assert_eq!(&unindented, expected);
250        }
251    }
252}
⚠️ Internal Docs ⚠️ Not Public API 👉 Official Docs Here