1use std::mem::take;
2use std::str::from_utf8_unchecked;
3
4use crate::TokenSource;
5
6pub fn lines(data: &str) -> Lines<'_, false> {
12 Lines(ByteLines(data.as_bytes()))
13}
14
15pub fn lines_with_terminator(data: &str) -> Lines<'_, true> {
21 Lines(ByteLines(data.as_bytes()))
22}
23
24pub fn byte_lines_with_terminator(data: &[u8]) -> ByteLines<'_, true> {
31 ByteLines(data)
32}
33
34pub fn byte_lines(data: &[u8]) -> ByteLines<'_, false> {
40 ByteLines(data)
41}
42
43impl<'a> TokenSource for &'a str {
45 type Token = &'a str;
46
47 type Tokenizer = Lines<'a, false>;
48
49 fn tokenize(&self) -> Self::Tokenizer {
50 lines(self)
51 }
52
53 fn estimate_tokens(&self) -> u32 {
54 lines_with_terminator(self).estimate_tokens()
55 }
56}
57
58impl<'a> TokenSource for &'a [u8] {
60 type Token = Self;
61 type Tokenizer = ByteLines<'a, false>;
62
63 fn tokenize(&self) -> Self::Tokenizer {
64 byte_lines(self)
65 }
66
67 fn estimate_tokens(&self) -> u32 {
68 byte_lines(self).estimate_tokens()
69 }
70}
71
72#[derive(Clone, Copy, PartialEq, Eq)]
75pub struct Lines<'a, const INCLUDE_LINE_TERMINATOR: bool>(ByteLines<'a, INCLUDE_LINE_TERMINATOR>);
76
77impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for Lines<'a, INCLUDE_LINE_TERMINATOR> {
78 type Item = &'a str;
79
80 fn next(&mut self) -> Option<Self::Item> {
81 self.0.next().map(|it| unsafe { from_utf8_unchecked(it) })
84 }
85}
86
87impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource for Lines<'a, INCLUDE_LINE_TERMINATOR> {
89 type Token = &'a str;
90
91 type Tokenizer = Self;
92
93 fn tokenize(&self) -> Self::Tokenizer {
94 *self
95 }
96
97 fn estimate_tokens(&self) -> u32 {
98 self.0.estimate_tokens()
99 }
100}
101
102#[derive(Clone, Copy, PartialEq, Eq)]
105pub struct ByteLines<'a, const INCLUDE_LINE_TERMINATOR: bool>(&'a [u8]);
106
107impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for ByteLines<'a, INCLUDE_LINE_TERMINATOR> {
108 type Item = &'a [u8];
109
110 fn next(&mut self) -> Option<Self::Item> {
111 let mut saw_carriage_return = false;
112 let mut iter = self.0.iter().enumerate();
113 let line_len = loop {
114 match iter.next() {
115 Some((i, b'\n')) => break i + 1,
116 None => {
117 return (!self.0.is_empty()).then(|| take(&mut self.0));
118 }
119 Some((_, &it)) => saw_carriage_return = it == b'\r',
120 }
121 };
122 let (mut line, rem) = self.0.split_at(line_len);
123 self.0 = rem;
124 if !INCLUDE_LINE_TERMINATOR {
125 line = &line[..line_len - 1 - saw_carriage_return as usize];
126 }
127 Some(line)
128 }
129}
130
131impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource
133 for ByteLines<'a, INCLUDE_LINE_TERMINATOR>
134{
135 type Token = &'a [u8];
136
137 type Tokenizer = Self;
138
139 fn tokenize(&self) -> Self::Tokenizer {
140 *self
141 }
142
143 fn estimate_tokens(&self) -> u32 {
144 let len: usize = self.take(20).map(|line| line.len()).sum();
145 if len == 0 {
146 100
147 } else {
148 (self.0.len() * 20 / len) as u32
149 }
150 }
151}