const_str/__ctfe/
split.rs

1use crate::slice::advance;
2use crate::slice::subslice;
3use crate::utf8::CharEncodeUtf8;
4
5use core::str;
6
7struct SplitImpl<'input, 'pat> {
8    input: &'input str,
9    pattern: &'pat str,
10    inclusive: bool,
11}
12
13impl<'input> SplitImpl<'input, '_> {
14    const fn output_len(&self) -> usize {
15        let mut input = self.input;
16        let pat = self.pattern;
17
18        if pat.is_empty() {
19            crate::utf8::str_count_chars(input) + 2
20        } else {
21            let mut ans = 0;
22            while let Some((_, remain)) = crate::str::next_match(input, pat) {
23                ans += 1;
24                input = remain
25            }
26            if self.inclusive {
27                if !input.is_empty() {
28                    ans += 1;
29                }
30            } else {
31                ans += 1;
32            }
33            ans
34        }
35    }
36
37    #[allow(unsafe_code)]
38    const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
39        let mut input = self.input;
40        let pat = self.pattern;
41
42        let mut buf: [&str; N] = [""; N];
43        let mut pos = 0;
44
45        if pat.is_empty() {
46            let mut input = input.as_bytes();
47
48            {
49                buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
50                pos += 1;
51            }
52
53            while let Some((_, count)) = crate::utf8::next_char(input) {
54                buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..count)) };
55                pos += 1;
56                input = advance(input, count);
57            }
58
59            {
60                buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
61                pos += 1;
62            }
63        } else {
64            while let Some((m, remain)) = crate::str::next_match(input, pat) {
65                let substr = if self.inclusive {
66                    subslice(input.as_bytes(), 0..m + pat.len())
67                } else {
68                    subslice(input.as_bytes(), 0..m)
69                };
70                buf[pos] = unsafe { str::from_utf8_unchecked(substr) };
71                pos += 1;
72                input = remain;
73            }
74            if self.inclusive {
75                if !input.is_empty() {
76                    buf[pos] = input;
77                    pos += 1;
78                }
79            } else {
80                buf[pos] = input;
81                pos += 1;
82            }
83        }
84        assert!(pos == N);
85        buf
86    }
87}
88
89pub struct Split<T, P>(pub T, pub P);
90
91impl<'input, 'pat> Split<&'input str, &'pat str> {
92    const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
93        SplitImpl {
94            input: self.0,
95            pattern: self.1,
96            inclusive: false,
97        }
98    }
99
100    pub const fn output_len(&self) -> usize {
101        self.to_impl().output_len()
102    }
103
104    pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
105        self.to_impl().const_eval()
106    }
107}
108
109impl<'input> Split<&'input str, char> {
110    const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
111        SplitImpl {
112            input: self.0,
113            pattern: ch.as_str(),
114            inclusive: false,
115        }
116    }
117
118    pub const fn output_len(&self) -> usize {
119        let ch = CharEncodeUtf8::new(self.1);
120        self.to_impl(&ch).output_len()
121    }
122
123    pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
124        let ch = CharEncodeUtf8::new(self.1);
125        self.to_impl(&ch).const_eval()
126    }
127}
128
129/// Returns an array of substrings of a string slice, separated by characters matched by a pattern.
130///
131/// The pattern type must be one of
132///
133/// + [`&str`](prim@str)
134/// + [`char`]
135///
136/// This macro is [const-context only](./index.html#const-context-only).
137///
138/// See also [`str::split`](https://doc.rust-lang.org/std/primitive.str.html#method.split).
139///
140/// # Examples
141///
142/// ```
143/// const SEPARATOR: &str = ", ";
144/// const TEXT: &str = "lion, tiger, leopard";
145///
146/// const ANIMALS_ARRAY: [&str;3] = const_str::split!(TEXT, SEPARATOR);
147/// const ANIMALS_SLICE: &[&str] = &const_str::split!(TEXT, SEPARATOR);
148///
149/// assert_eq!(ANIMALS_ARRAY, ANIMALS_SLICE);
150/// assert_eq!(ANIMALS_SLICE, &["lion", "tiger", "leopard"]);
151/// ```
152#[macro_export]
153macro_rules! split {
154    ($s: expr, $pat: expr) => {{
155        const INPUT: &str = $s;
156        const OUTPUT_LEN: usize = $crate::__ctfe::Split(INPUT, $pat).output_len();
157        const OUTPUT_BUF: [&str; OUTPUT_LEN] = $crate::__ctfe::Split(INPUT, $pat).const_eval();
158        OUTPUT_BUF
159    }};
160}
161
162pub struct SplitInclusive<T, P>(pub T, pub P);
163
164impl<'input, 'pat> SplitInclusive<&'input str, &'pat str> {
165    const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
166        SplitImpl {
167            input: self.0,
168            pattern: self.1,
169            inclusive: true,
170        }
171    }
172
173    pub const fn output_len(&self) -> usize {
174        self.to_impl().output_len()
175    }
176
177    pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
178        self.to_impl().const_eval()
179    }
180}
181
182impl<'input> SplitInclusive<&'input str, char> {
183    const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
184        SplitImpl {
185            input: self.0,
186            pattern: ch.as_str(),
187            inclusive: true,
188        }
189    }
190
191    pub const fn output_len(&self) -> usize {
192        let ch = CharEncodeUtf8::new(self.1);
193        self.to_impl(&ch).output_len()
194    }
195
196    pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
197        let ch = CharEncodeUtf8::new(self.1);
198        self.to_impl(&ch).const_eval()
199    }
200}
201
202/// Returns an array of substrings of a string slice, separated by characters matched by a pattern.
203///
204/// Differs from the array produced by [`split!`] in that
205/// [`split_inclusive!`](crate::split_inclusive) leaves the matched part as the terminator of the substring.
206///
207/// If the last element of the string is matched,
208/// that element will be considered the terminator of the preceding substring.
209/// That substring will be the last item returned by the iterator.
210///
211/// The pattern type must be one of
212///
213/// + [`&str`](prim@str)
214/// + [`char`]
215///
216/// This macro is [const-context only](./index.html#const-context-only).
217///
218/// See also [`str::split_inclusive`](https://doc.rust-lang.org/std/primitive.str.html#method.split_inclusive).
219///
220/// # Examples
221/// ```
222/// const TEXT: &str = "Mary had a little lamb\nlittle lamb\nlittle lamb.";
223/// const ANSWER:&[&str] = &const_str::split_inclusive!(TEXT, "\n");
224/// assert_eq!(ANSWER, &["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
225/// ```
226/// ```
227/// const TEXT: &str = "\nA\nB\nC\n";
228/// const ANSWER:&[&str] = &const_str::split_inclusive!(TEXT, "\n");
229/// assert_eq!(ANSWER, &["\n", "A\n", "B\n", "C\n"]);
230/// ```
231#[macro_export]
232macro_rules! split_inclusive {
233    ($s: expr, $pat: expr) => {{
234        const INPUT: &str = $s;
235        const OUTPUT_LEN: usize = $crate::__ctfe::SplitInclusive(INPUT, $pat).output_len();
236        const OUTPUT_BUF: [&str; OUTPUT_LEN] =
237            $crate::__ctfe::SplitInclusive(INPUT, $pat).const_eval();
238        OUTPUT_BUF
239    }};
240}
241
242pub const fn map_lines<const N: usize>(mut lines: [&str; N]) -> [&str; N] {
243    let mut i = 0;
244    while i < N {
245        let s = lines[i];
246        match crate::str::strip_suffix(s, "\r\n") {
247            Some(s) => lines[i] = s,
248            None => match crate::str::strip_suffix(s, "\n") {
249                Some(s) => lines[i] = s,
250                None => lines[i] = s,
251            },
252        }
253        i += 1;
254    }
255    lines
256}
257
258/// Returns an array of the lines in a string.
259///
260/// Lines are split by LF (`\n`) or CRLF (`\r\n`).
261///
262/// Line terminators are not included in the returned array.
263///
264/// The final line ending is optional.
265/// A string that ends with a final line ending will return the same lines
266/// as an otherwise identical string without a final line ending.
267///
268/// This macro is [const-context only](./index.html#const-context-only).
269///
270/// See also [`str::lines`](https://doc.rust-lang.org/std/primitive.str.html#method.lines)
271///
272/// # Examples
273/// ```rust
274/// const TEXT: &str = "foo\r\nbar\n\nbaz\r";
275/// const LINES_ARRAY: [&str;4] = const_str::split_lines!(TEXT);
276/// const LINES_SLICE: &[&str] = &const_str::split_lines!(TEXT);
277///
278/// assert_eq!(LINES_ARRAY, LINES_SLICE);
279/// assert_eq!(LINES_SLICE, &["foo", "bar", "", "baz\r"]);
280/// ```
281/// ```rust
282/// const TEXT1: &str = "1\r\n2\r\n3\r\n";
283/// const TEXT2: &str = "1\n2\n3\n";
284/// const TEXT3: &str = "1\n2\n3";
285/// const LINES1: &[&str] = &const_str::split_lines!(TEXT1);
286/// const LINES2: &[&str] = &const_str::split_lines!(TEXT2);
287/// const LINES3: &[&str] = &const_str::split_lines!(TEXT3);
288/// assert_eq!(LINES1, LINES2);
289/// assert_eq!(LINES2, LINES3);
290/// ```
291#[macro_export]
292macro_rules! split_lines {
293    ($s: expr) => {{
294        $crate::__ctfe::map_lines($crate::split_inclusive!($s, "\n"))
295    }};
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_split() {
304        macro_rules! testcase {
305            ($input: expr, $pat: expr) => {{
306                const OUTPUT: &[&str] = &$crate::split!($input, $pat);
307
308                let ans = $input.split($pat).collect::<Vec<_>>();
309                assert_eq!(OUTPUT.len(), ans.len());
310                assert_eq!(OUTPUT, &*ans, "ans = {:?}", ans);
311            }};
312        }
313
314        testcase!("", "");
315        testcase!("a中1😂1!", "");
316        testcase!("a中1😂1!", "a");
317        testcase!("a中1😂1!", "中");
318        testcase!("a中1😂1!", "1");
319        testcase!("a中1😂1!", "😂");
320        testcase!("a中1😂1!", "!");
321        testcase!("11111", "1");
322        testcase!("222", "22");
323        testcase!("啊哈哈哈", "哈哈");
324        testcase!("some string:another string", ":");
325
326        testcase!("11111", '1');
327        testcase!("a中1😂1!", 'a');
328        testcase!("a中1😂1!", '中');
329        testcase!("a中1😂1!", '1');
330        testcase!("a中1😂1!", '😂');
331        testcase!("a中1😂1!", '!');
332    }
333}