surrealdb_core/syn/lexer/
reader.rs1use thiserror::Error;
2
3use crate::syn::{error::SyntaxError, token::Span};
4
5#[derive(Error, Debug)]
6#[non_exhaustive]
7pub enum CharError {
8 #[error("found eof inside multi byte character")]
9 Eof,
10 #[error("string is not valid utf-8")]
11 Unicode,
12}
13
14impl From<CharError> for SyntaxError {
15 fn from(value: CharError) -> Self {
16 let e = SyntaxError::new("Invalid, non valid UTF-8 bytes, in source");
17 if let CharError::Eof = value {
18 e.with_data_pending()
19 } else {
20 e
21 }
22 }
23}
24
25#[derive(Clone, Debug)]
26#[non_exhaustive]
27pub struct BytesReader<'a> {
28 data: &'a [u8],
29 current: usize,
30}
31
32impl<'a> BytesReader<'a> {
33 pub fn new(slice: &'a [u8]) -> Self {
34 BytesReader {
35 data: slice,
36 current: 0,
37 }
38 }
39
40 #[inline]
41 pub fn remaining(&self) -> &'a [u8] {
42 &self.data[self.current..]
43 }
44
45 #[inline]
46 pub fn len(&self) -> usize {
47 self.remaining().len()
48 }
49
50 #[inline]
51 pub fn offset(&self) -> usize {
52 self.current
53 }
54
55 #[inline]
56 pub fn backup(&mut self, offset: usize) {
57 assert!(offset <= self.offset());
58 self.current = offset;
59 }
60
61 #[inline]
62 pub fn is_empty(&self) -> bool {
63 self.remaining().is_empty()
64 }
65
66 #[inline]
67 pub fn peek(&self) -> Option<u8> {
68 self.remaining().first().copied()
69 }
70
71 #[inline]
72 pub fn peek1(&self) -> Option<u8> {
73 self.remaining().get(1).copied()
74 }
75
76 #[inline]
77 pub fn span(&self, span: Span) -> &'a [u8] {
78 &self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
79 }
80
81 #[inline]
82 pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
83 const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
84 const CONTINUE_BYTE_MASK: u8 = 0b0011_1111;
85
86 let byte = self.next().ok_or(CharError::Eof)?;
87 if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
88 return Err(CharError::Unicode);
89 }
90
91 Ok(byte & CONTINUE_BYTE_MASK)
92 }
93
94 pub fn convert_to_char(&mut self, start: u8) -> Result<char, CharError> {
95 if start.is_ascii() {
96 return Ok(start as char);
97 }
98 self.complete_char(start)
99 }
100
101 pub fn complete_char(&mut self, start: u8) -> Result<char, CharError> {
102 match start & 0b1111_1000 {
103 0b1100_0000 | 0b1101_0000 | 0b1100_1000 | 0b1101_1000 => {
104 let mut val = (start & 0b0001_1111) as u32;
105 val <<= 6;
106 let next = self.next_continue_byte()?;
107 val |= next as u32;
108 char::from_u32(val).ok_or(CharError::Unicode)
109 }
110 0b1110_0000 | 0b1110_1000 => {
111 let mut val = (start & 0b0000_1111) as u32;
112 val <<= 6;
113 let next = self.next_continue_byte()?;
114 val |= next as u32;
115 val <<= 6;
116 let next = self.next_continue_byte()?;
117 val |= next as u32;
118 char::from_u32(val).ok_or(CharError::Unicode)
119 }
120 0b1111_0000 => {
121 let mut val = (start & 0b0000_0111) as u32;
122 val <<= 6;
123 let next = self.next_continue_byte()?;
124 val |= next as u32;
125 val <<= 6;
126 let next = self.next_continue_byte()?;
127 val |= next as u32;
128 val <<= 6;
129 let next = self.next_continue_byte()?;
130 val |= next as u32;
131 char::from_u32(val).ok_or(CharError::Unicode)
132 }
133 _ => Err(CharError::Unicode),
134 }
135 }
136}
137
138impl Iterator for BytesReader<'_> {
139 type Item = u8;
140
141 #[inline]
142 fn next(&mut self) -> Option<Self::Item> {
143 let res = self.peek()?;
144 self.current += 1;
145 Some(res)
146 }
147 fn size_hint(&self) -> (usize, Option<usize>) {
148 let len = self.len();
149 (len, Some(len))
150 }
151}
152
153impl ExactSizeIterator for BytesReader<'_> {
154 fn len(&self) -> usize {
155 self.len()
156 }
157}