os_str_bytes/
iter.rs

1//! Iterators provided by this crate.
2
3#![cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
4
5use std::ffi::OsStr;
6use std::fmt;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::iter::FusedIterator;
10use std::mem;
11use std::str;
12
13use super::ext;
14use super::pattern::Encoded;
15use super::NonUnicodeOsStr;
16use super::OsStrBytesExt;
17use super::Pattern;
18use super::RawOsStr;
19
20macro_rules! r#impl {
21    (
22        $(#[ $attr:meta ])* $name:ident ,
23        $(#[ $raw_attr:meta ])* $raw_name:ident ,
24        $split_method:ident ,
25        $reverse:expr ,
26    ) => {
27        // [memchr::memmem::FindIter] would make this struct self-referential.
28        #[must_use]
29        $(#[$attr])*
30        pub struct $name<'a, P>
31        where
32            P: Pattern,
33        {
34            string: Option<&'a OsStr>,
35            pat: P::__Encoded,
36        }
37
38        impl<'a, P> $name<'a, P>
39        where
40            P: Pattern,
41        {
42            #[track_caller]
43            pub(super) fn new(string: &'a OsStr, pat: P) -> Self {
44                let pat = pat.__encode();
45                assert!(
46                    !pat.__as_str().is_empty(),
47                    "cannot split using an empty pattern",
48                );
49                Self {
50                    string: Some(string),
51                    pat,
52                }
53            }
54        }
55
56        impl<P> Clone for $name<'_, P>
57        where
58            P: Pattern,
59        {
60            #[inline]
61            fn clone(&self) -> Self {
62                Self {
63                    string: self.string,
64                    pat: self.pat.clone(),
65                }
66            }
67        }
68
69        impl<P> Debug for $name<'_, P>
70        where
71            P: Pattern,
72        {
73            #[inline]
74            fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
75                f.debug_struct(stringify!($name))
76                    .field("string", &self.string)
77                    .field("pat", &self.pat)
78                    .finish()
79            }
80        }
81
82        impl<P> FusedIterator for $name<'_, P> where P: Pattern {}
83
84        impl<'a, P> Iterator for $name<'a, P>
85        where
86            P: Pattern,
87        {
88            type Item = &'a OsStr;
89
90            #[inline]
91            fn next(&mut self) -> Option<Self::Item> {
92                self.string?
93                    .$split_method(self.pat.__as_str())
94                    .map(|(mut substring, mut string)| {
95                        if $reverse {
96                            mem::swap(&mut substring, &mut string);
97                        }
98                        self.string = Some(string);
99                        substring
100                    })
101                    .or_else(|| self.string.take())
102            }
103        }
104
105        #[must_use]
106        $(#[$raw_attr])*
107        pub struct $raw_name<'a, P>($name<'a, P>)
108        where
109            P: Pattern;
110
111        impl<'a, P> $raw_name<'a, P>
112        where
113            P: Pattern,
114        {
115            #[track_caller]
116            pub(super) fn new(string: &'a RawOsStr, pat: P) -> Self {
117                Self($name::new(string.as_os_str(), pat))
118            }
119        }
120
121        impl<P> Clone for $raw_name<'_, P>
122        where
123            P: Pattern,
124        {
125            #[inline]
126            fn clone(&self) -> Self {
127                Self(self.0.clone())
128            }
129        }
130
131        impl<P> Debug for $raw_name<'_, P>
132        where
133            P: Pattern,
134        {
135            #[inline]
136            fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
137                f.debug_tuple(stringify!($raw_name)).field(&self.0).finish()
138            }
139        }
140
141        impl<P> FusedIterator for $raw_name<'_, P> where P: Pattern {}
142
143        impl<'a, P> Iterator for $raw_name<'a, P>
144        where
145            P: Pattern,
146        {
147            type Item = &'a RawOsStr;
148
149            #[inline]
150            fn next(&mut self) -> Option<Self::Item> {
151                self.0.next().map(RawOsStr::new)
152            }
153        }
154    };
155}
156r#impl!(
157    /// The iterator returned by [`OsStrBytesExt::split`].
158    Split,
159    /// The iterator returned by [`RawOsStr::split`].
160    RawSplit,
161    split_once,
162    false,
163);
164r#impl!(
165    /// The iterator returned by [`OsStrBytesExt::rsplit`].
166    RSplit,
167    /// The iterator returned by [`RawOsStr::rsplit`].
168    RawRSplit,
169    rsplit_once,
170    true,
171);
172
173/// The iterator returned by [`OsStrBytesExt::utf8_chunks`].
174///
175/// [`OsStrBytesExt::utf8_chunks`]: super::OsStrBytesExt::utf8_chunks
176#[derive(Clone, Debug)]
177#[must_use]
178pub struct Utf8Chunks<'a> {
179    string: &'a OsStr,
180    invalid_length: usize,
181}
182
183impl<'a> Utf8Chunks<'a> {
184    pub(super) fn new(string: &'a OsStr) -> Self {
185        Self {
186            string,
187            invalid_length: 0,
188        }
189    }
190}
191
192impl FusedIterator for Utf8Chunks<'_> {}
193
194impl<'a> Iterator for Utf8Chunks<'a> {
195    type Item = (&'a NonUnicodeOsStr, &'a str);
196
197    fn next(&mut self) -> Option<Self::Item> {
198        let string = self.string.as_encoded_bytes();
199        if string.is_empty() {
200            debug_assert_eq!(0, self.invalid_length);
201            return None;
202        }
203
204        loop {
205            let (invalid, substring) = string.split_at(self.invalid_length);
206
207            let valid = match str::from_utf8(substring) {
208                Ok(valid) => {
209                    self.string = OsStr::new("");
210                    self.invalid_length = 0;
211                    valid
212                }
213                Err(error) => {
214                    let (valid, substring) =
215                        substring.split_at(error.valid_up_to());
216
217                    let invalid_length =
218                        error.error_len().unwrap_or_else(|| substring.len());
219                    if valid.is_empty() {
220                        self.invalid_length += invalid_length;
221                        continue;
222                    }
223                    // SAFETY: This substring was separated by a UTF-8 string.
224                    self.string = unsafe { ext::os_str(substring) };
225                    self.invalid_length = invalid_length;
226
227                    // SAFETY: This slice was validated to be UTF-8.
228                    unsafe { str::from_utf8_unchecked(valid) }
229                }
230            };
231
232            // SAFETY: This substring was separated by a UTF-8 string and
233            // validated to not be UTF-8.
234            let invalid = unsafe { NonUnicodeOsStr::new_unchecked(invalid) };
235            return Some((invalid, valid));
236        }
237    }
238}