1use std::ops::Range;
2
3use crate::charutils::{codepoint_to_utf8, hex_to_u32_nocheck};
4use crate::error::ErrorType;
5use crate::safer_unchecked::GetSaferUnchecked;
6
7pub(crate) const ESCAPE_MAP: [u8; 256] = [
12 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14 0x2f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23];
24
25const HIGH_SURROGATES: Range<u32> = 0xd800..0xdc00;
26const LOW_SURROGATES: Range<u32> = 0xdc00..0xe000;
27
28#[cfg_attr(not(feature = "no-inline"), inline)]
31#[allow(dead_code)]
32pub(crate) fn handle_unicode_codepoint(
33 src_ptr: &[u8],
34 dst_ptr: &mut [u8],
35) -> Result<(usize, usize), ErrorType> {
36 let (code_point, src_offset) = get_unicode_codepoint(src_ptr)?;
37 let offset: usize = codepoint_to_utf8(code_point, dst_ptr);
38 Ok((offset, src_offset))
39}
40
41#[cfg_attr(not(feature = "no-inline"), inline)]
48pub(crate) fn get_unicode_codepoint(mut src_ptr: &[u8]) -> Result<(u32, usize), ErrorType> {
49 let mut code_point: u32 = hex_to_u32_nocheck(unsafe { src_ptr.get_kinda_unchecked(2..) });
53 src_ptr = unsafe { src_ptr.get_kinda_unchecked(6..) };
54 let mut src_offset = 6;
55 if HIGH_SURROGATES.contains(&code_point) {
58 if (unsafe { *src_ptr.get_kinda_unchecked(0) } != b'\\')
59 || unsafe { *src_ptr.get_kinda_unchecked(1) } != b'u'
60 {
61 return Ok((0, src_offset));
62 }
63
64 let code_point_2: u32 = hex_to_u32_nocheck(unsafe { src_ptr.get_kinda_unchecked(2..) });
65
66 if ((code_point | code_point_2) >> 16) != 0 {
72 return Ok((0, src_offset));
73 }
74 let Some(c1) = code_point.checked_sub(0xd800) else {
75 return Err(ErrorType::InvalidUtf8);
76 };
77 let Some(c2) = code_point_2.checked_sub(0xdc00) else {
78 return Err(ErrorType::InvalidUtf8);
79 };
80 code_point = ((c1 << 10) | c2) + 0x10000;
81 src_offset += 6;
82 } else if LOW_SURROGATES.contains(&code_point) {
83 return Err(ErrorType::InvalidUtf8);
85 }
86 Ok((code_point, src_offset))
87}