1#![cfg(feature = "backends")]
2
3use super::Backend;
4use crate::{symbol::expect_valid_symbol, DefaultSymbol, Symbol};
5use alloc::vec::Vec;
6use core::{marker::PhantomData, mem, str};
7
8#[derive(Debug)]
37pub struct BufferBackend<S = DefaultSymbol> {
38 len_strings: usize,
39 buffer: Vec<u8>,
40 marker: PhantomData<fn() -> S>,
41}
42
43impl<S> PartialEq for BufferBackend<S>
44where
45 S: Symbol,
46{
47 fn eq(&self, other: &Self) -> bool {
48 self.len_strings.eq(&other.len_strings) && self.buffer.eq(&other.buffer)
49 }
50}
51
52impl<S> Eq for BufferBackend<S> where S: Symbol {}
53
54impl<S> Clone for BufferBackend<S> {
55 fn clone(&self) -> Self {
56 Self {
57 len_strings: self.len_strings,
58 buffer: self.buffer.clone(),
59 marker: Default::default(),
60 }
61 }
62}
63
64impl<S> Default for BufferBackend<S> {
65 #[cfg_attr(feature = "inline-more", inline)]
66 fn default() -> Self {
67 Self {
68 len_strings: 0,
69 buffer: Default::default(),
70 marker: Default::default(),
71 }
72 }
73}
74
75impl<S> BufferBackend<S>
76where
77 S: Symbol,
78{
79 #[inline]
81 fn next_symbol(&self) -> S {
82 expect_valid_symbol(self.buffer.len())
83 }
84
85 fn resolve_index_to_str(&self, index: usize) -> Option<(&[u8], usize)> {
92 let bytes = self.buffer.get(index..)?;
93 let (str_len, str_len_bytes) = decode_var_usize(bytes)?;
94 let index_str = index + str_len_bytes;
95 let str_bytes = self.buffer.get(index_str..index_str + str_len)?;
96 Some((str_bytes, index_str + str_len))
97 }
98
99 unsafe fn resolve_index_to_str_unchecked(&self, index: usize) -> &str {
110 let bytes = unsafe { self.buffer.get_unchecked(index..) };
113 let (str_len, str_len_bytes) = unsafe { decode_var_usize_unchecked(bytes) };
116 let index_str = index + str_len_bytes;
117 let str_bytes =
118 unsafe { self.buffer.get_unchecked(index_str..index_str + str_len) };
121 unsafe { str::from_utf8_unchecked(str_bytes) }
124 }
125
126 #[inline]
130 fn encode_var_usize(&mut self, value: usize) -> usize {
131 encode_var_usize(&mut self.buffer, value)
132 }
133
134 fn push_string(&mut self, string: &str) -> S {
140 let symbol = self.next_symbol();
141 let str_len = string.len();
142 let str_bytes = string.as_bytes();
143 self.encode_var_usize(str_len);
144 self.buffer.extend(str_bytes);
145 self.len_strings += 1;
146 symbol
147 }
148}
149
150impl<S> Backend for BufferBackend<S>
151where
152 S: Symbol,
153{
154 type Symbol = S;
155 type Iter<'a>
156 = Iter<'a, S>
157 where
158 Self: 'a;
159
160 #[cfg_attr(feature = "inline-more", inline)]
161 fn with_capacity(capacity: usize) -> Self {
162 const LEN_USIZE: usize = mem::size_of::<usize>();
164 const DEFAULT_STR_LEN: usize = 5;
166 let bytes_per_string = DEFAULT_STR_LEN + LEN_USIZE;
167 Self {
168 len_strings: 0,
169 buffer: Vec::with_capacity(capacity * bytes_per_string),
170 marker: Default::default(),
171 }
172 }
173
174 #[inline]
175 fn intern(&mut self, string: &str) -> Self::Symbol {
176 self.push_string(string)
177 }
178
179 #[inline]
180 fn resolve(&self, symbol: Self::Symbol) -> Option<&str> {
181 match self.resolve_index_to_str(symbol.to_usize()) {
182 None => None,
183 Some((bytes, _)) => str::from_utf8(bytes).ok(),
184 }
185 }
186
187 fn shrink_to_fit(&mut self) {
188 self.buffer.shrink_to_fit();
189 }
190
191 #[inline]
192 unsafe fn resolve_unchecked(&self, symbol: Self::Symbol) -> &str {
193 unsafe { self.resolve_index_to_str_unchecked(symbol.to_usize()) }
196 }
197
198 #[inline]
199 fn iter(&self) -> Self::Iter<'_> {
200 Iter::new(self)
201 }
202}
203
204#[inline]
208fn encode_var_usize(buffer: &mut Vec<u8>, mut value: usize) -> usize {
209 if value <= 0x7F {
210 buffer.push(value as u8);
212 return 1;
213 }
214 let mut len_chunks = 0;
215 loop {
216 let mut chunk = (value as u8) & 0x7F_u8;
217 value >>= 7;
218 chunk |= ((value != 0) as u8) << 7;
219 buffer.push(chunk);
220 len_chunks += 1;
221 if value == 0 {
222 break;
223 }
224 }
225 len_chunks
226}
227
228#[inline]
238unsafe fn decode_var_usize_unchecked(buffer: &[u8]) -> (usize, usize) {
239 let first = unsafe { *buffer.get_unchecked(0) };
240 match first {
241 byte if byte <= 0x7F_u8 => (byte as usize, 1),
242 _ => unsafe { decode_var_usize_unchecked_cold(buffer) },
243 }
244}
245
246#[inline]
258#[cold]
259unsafe fn decode_var_usize_unchecked_cold(buffer: &[u8]) -> (usize, usize) {
260 let mut result: usize = 0;
261 let mut i = 0;
262 loop {
263 let byte = unsafe { *buffer.get_unchecked(i) };
264 let shifted = ((byte & 0x7F_u8) as usize) << ((i * 7) as u32);
265 result += shifted;
266 if (byte & 0x80) == 0 {
267 break;
268 }
269 i += 1;
270 }
271 (result, i + 1)
272}
273
274#[inline]
279fn decode_var_usize(buffer: &[u8]) -> Option<(usize, usize)> {
280 match buffer.first() {
281 None => None,
282 Some(&byte) if byte <= 0x7F_u8 => Some((byte as usize, 1)),
283 _ => decode_var_usize_cold(buffer),
284 }
285}
286
287#[inline]
294#[cold]
295fn decode_var_usize_cold(buffer: &[u8]) -> Option<(usize, usize)> {
296 let mut result: usize = 0;
297 let mut i = 0;
298 loop {
299 let byte = *buffer.get(i)?;
300 let shifted = ((byte & 0x7F_u8) as usize).checked_shl((i * 7) as u32)?;
301 result = result.checked_add(shifted)?;
302 if (byte & 0x80) == 0 {
303 break;
304 }
305 i += 1;
306 }
307 Some((result, i + 1))
308}
309
310impl<'a, S> IntoIterator for &'a BufferBackend<S>
311where
312 S: Symbol,
313{
314 type Item = (S, &'a str);
315 type IntoIter = Iter<'a, S>;
316
317 #[cfg_attr(feature = "inline-more", inline)]
318 fn into_iter(self) -> Self::IntoIter {
319 self.iter()
320 }
321}
322
323pub struct Iter<'a, S> {
324 backend: &'a BufferBackend<S>,
325 remaining: usize,
326 next: usize,
327}
328
329impl<'a, S> Iter<'a, S> {
330 #[cfg_attr(feature = "inline-more", inline)]
331 pub fn new(backend: &'a BufferBackend<S>) -> Self {
332 Self {
333 backend,
334 remaining: backend.len_strings,
335 next: 0,
336 }
337 }
338}
339
340impl<'a, S> Iterator for Iter<'a, S>
341where
342 S: Symbol,
343{
344 type Item = (S, &'a str);
345
346 #[inline]
347 fn size_hint(&self) -> (usize, Option<usize>) {
348 let remaining = self.len();
349 (remaining, Some(remaining))
350 }
351
352 #[inline]
353 fn next(&mut self) -> Option<Self::Item> {
354 self.backend
355 .resolve_index_to_str(self.next)
356 .and_then(|(bytes, next)| {
357 let string = unsafe { str::from_utf8_unchecked(bytes) };
360 let symbol = S::try_from_usize(self.next)?;
361 self.next = next;
362 self.remaining -= 1;
363 Some((symbol, string))
364 })
365 }
366}
367
368impl<S> ExactSizeIterator for Iter<'_, S>
369where
370 S: Symbol,
371{
372 #[inline]
373 fn len(&self) -> usize {
374 self.remaining
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::{decode_var_usize, encode_var_usize};
381 use alloc::vec::Vec;
382
383 #[test]
384 fn encode_var_usize_1_byte_works() {
385 let mut buffer = Vec::new();
386 for i in 0..2usize.pow(7) {
387 buffer.clear();
388 assert_eq!(encode_var_usize(&mut buffer, i), 1);
389 assert_eq!(buffer, [i as u8]);
390 assert_eq!(decode_var_usize(&buffer), Some((i, 1)));
391 }
392 }
393
394 #[test]
395 fn encode_var_usize_2_bytes_works() {
396 let mut buffer = Vec::new();
397 for i in 2usize.pow(7)..2usize.pow(14) {
398 buffer.clear();
399 assert_eq!(encode_var_usize(&mut buffer, i), 2);
400 assert_eq!(buffer, [0x80 | ((i & 0x7F) as u8), (0x7F & (i >> 7) as u8)]);
401 assert_eq!(decode_var_usize(&buffer), Some((i, 2)));
402 }
403 }
404
405 #[test]
406 #[cfg_attr(any(miri), ignore)]
407 fn encode_var_usize_3_bytes_works() {
408 let mut buffer = Vec::new();
409 for i in 2usize.pow(14)..2usize.pow(21) {
410 buffer.clear();
411 assert_eq!(encode_var_usize(&mut buffer, i), 3);
412 assert_eq!(
413 buffer,
414 [
415 0x80 | ((i & 0x7F) as u8),
416 0x80 | (0x7F & (i >> 7) as u8),
417 (0x7F & (i >> 14) as u8),
418 ]
419 );
420 assert_eq!(decode_var_usize(&buffer), Some((i, 3)));
421 }
422 }
423
424 #[cfg_attr(any(miri), ignore)]
426 fn assert_encode_var_usize_4_bytes(range: core::ops::Range<usize>) {
427 let mut buffer = Vec::new();
428 for i in range {
429 buffer.clear();
430 assert_eq!(encode_var_usize(&mut buffer, i), 4);
431 assert_eq!(
432 buffer,
433 [
434 0x80 | ((i & 0x7F) as u8),
435 0x80 | (0x7F & (i >> 7) as u8),
436 0x80 | (0x7F & (i >> 14) as u8),
437 (0x7F & (i >> 21) as u8),
438 ]
439 );
440 assert_eq!(decode_var_usize(&buffer), Some((i, 4)));
441 }
442 }
443
444 #[test]
445 #[cfg_attr(any(miri), ignore)]
446 fn encode_var_usize_4_bytes_01_works() {
447 assert_encode_var_usize_4_bytes(2usize.pow(21)..2usize.pow(24));
448 }
449
450 #[test]
451 #[cfg_attr(any(miri), ignore)]
452 fn encode_var_usize_4_bytes_02_works() {
453 assert_encode_var_usize_4_bytes(2usize.pow(24)..2usize.pow(26));
454 }
455
456 #[test]
457 #[cfg_attr(any(miri), ignore)]
458 fn encode_var_usize_4_bytes_03_works() {
459 assert_encode_var_usize_4_bytes(2usize.pow(26)..2usize.pow(27));
460 }
461
462 #[test]
463 #[cfg_attr(any(miri), ignore)]
464 fn encode_var_usize_4_bytes_04_works() {
465 assert_encode_var_usize_4_bytes(2usize.pow(27)..2usize.pow(28));
466 }
467
468 #[test]
469 fn encode_var_u32_max_works() {
470 let mut buffer = Vec::new();
471 let i = u32::MAX as usize;
472 assert_eq!(encode_var_usize(&mut buffer, i), 5);
473 assert_eq!(buffer, [0xFF, 0xFF, 0xFF, 0xFF, 0x0F]);
474 assert_eq!(decode_var_usize(&buffer), Some((i, 5)));
475 }
476
477 #[test]
478 fn encode_var_u64_max_works() {
479 let mut buffer = Vec::new();
480 let i = u64::MAX as usize;
481 assert_eq!(encode_var_usize(&mut buffer, i), 10);
482 assert_eq!(
483 buffer,
484 [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]
485 );
486 assert_eq!(decode_var_usize(&buffer), Some((i, 10)));
487 }
488
489 #[test]
490 fn decode_var_fail() {
491 assert_eq!(decode_var_usize(&[]), None);
493 assert_eq!(decode_var_usize(&[0x80]), None);
495 }
503}