#![allow(dead_code)]
use crate::sse42::utf8check::*;
use crate::*;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::mem;
pub const SIMDJSON_PADDING: usize = mem::size_of::<__m128i>() * 2;
#[derive(Debug)]
struct SimdInput {
v0: __m128i,
v1: __m128i,
v2: __m128i,
v3: __m128i,
}
fn fill_input(ptr: &[u8]) -> SimdInput {
unsafe {
#[allow(clippy::cast_ptr_alignment)]
SimdInput {
v0: _mm_loadu_si128(ptr.as_ptr() as *const __m128i),
v1: _mm_loadu_si128(ptr.as_ptr().add(16) as *const __m128i),
v2: _mm_loadu_si128(ptr.as_ptr().add(32) as *const __m128i),
v3: _mm_loadu_si128(ptr.as_ptr().add(48) as *const __m128i),
}
}
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
unsafe fn check_utf8(
input: &SimdInput,
has_error: &mut __m128i,
previous: &mut AvxProcessedUtfBytes,
) {
let highbit: __m128i = _mm_set1_epi8(static_cast_i8!(0x80u8));
if (_mm_testz_si128(_mm_or_si128(input.v0, input.v1), highbit)) == 1 {
*has_error = _mm_or_si128(
_mm_cmpgt_epi8(
previous.carried_continuations,
_mm_setr_epi8(
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1,
),
),
*has_error,
);
} else {
*previous = avxcheck_utf8_bytes(input.v0, &previous, has_error);
*previous = avxcheck_utf8_bytes(input.v1, &previous, has_error);
}
if (_mm_testz_si128(_mm_or_si128(input.v2, input.v3), highbit)) == 1 {
*has_error = _mm_or_si128(
_mm_cmpgt_epi8(
previous.carried_continuations,
_mm_setr_epi8(
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1,
),
),
*has_error,
);
} else {
*previous = avxcheck_utf8_bytes(input.v2, &previous, has_error);
*previous = avxcheck_utf8_bytes(input.v3, &previous, has_error);
}
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 {
unsafe {
let mask: __m128i = _mm_set1_epi8(m as i8);
let cmp_res_0: __m128i = _mm_cmpeq_epi8(input.v0, mask);
let res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(cmp_res_0)));
let cmp_res_1: __m128i = _mm_cmpeq_epi8(input.v1, mask);
let res_1: u64 = _mm_movemask_epi8(cmp_res_1) as u64;
let cmp_res_2: __m128i = _mm_cmpeq_epi8(input.v2, mask);
let res_2: u64 = _mm_movemask_epi8(cmp_res_2) as u64;
let cmp_res_3: __m128i = _mm_cmpeq_epi8(input.v3, mask);
let res_3: u64 = _mm_movemask_epi8(cmp_res_3) as u64;
res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48)
}
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
fn unsigned_lteq_against_input(input: &SimdInput, maxval: __m128i) -> u64 {
unsafe {
let cmp_res_0: __m128i = _mm_cmpeq_epi8(_mm_max_epu8(maxval, input.v0), maxval);
let res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(cmp_res_0)));
let cmp_res_1: __m128i = _mm_cmpeq_epi8(_mm_max_epu8(maxval, input.v1), maxval);
let res_1: u64 = _mm_movemask_epi8(cmp_res_1) as u64;
let cmp_res_2: __m128i = _mm_cmpeq_epi8(_mm_max_epu8(maxval, input.v2), maxval);
let res_2: u64 = _mm_movemask_epi8(cmp_res_2) as u64;
let cmp_res_3: __m128i = _mm_cmpeq_epi8(_mm_max_epu8(maxval, input.v3), maxval);
let res_3: u64 = _mm_movemask_epi8(cmp_res_3) as u64;
res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48)
}
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_backslash: &mut u64) -> u64 {
const EVEN_BITS: u64 = 0x5555_5555_5555_5555;
const ODD_BITS: u64 = !EVEN_BITS;
let bs_bits: u64 = cmp_mask_against_input(&input, b'\\');
let start_edges: u64 = bs_bits & !(bs_bits << 1);
let even_start_mask: u64 = EVEN_BITS ^ *prev_iter_ends_odd_backslash;
let even_starts: u64 = start_edges & even_start_mask;
let odd_starts: u64 = start_edges & !even_start_mask;
let even_carries: u64 = bs_bits.wrapping_add(even_starts);
let (mut odd_carries, iter_ends_odd_backslash) = bs_bits.overflowing_add(odd_starts);
odd_carries |= *prev_iter_ends_odd_backslash;
*prev_iter_ends_odd_backslash = if iter_ends_odd_backslash { 0x1 } else { 0x0 };
let even_carry_ends: u64 = even_carries & !bs_bits;
let odd_carry_ends: u64 = odd_carries & !bs_bits;
let even_start_odd_end: u64 = even_carry_ends & ODD_BITS;
let odd_start_even_end: u64 = odd_carry_ends & EVEN_BITS;
let odd_ends: u64 = even_start_odd_end | odd_start_even_end;
odd_ends
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
unsafe fn find_quote_mask_and_bits(
input: &SimdInput,
odd_ends: u64,
prev_iter_inside_quote: &mut u64,
quote_bits: &mut u64,
error_mask: &mut u64,
) -> u64 {
*quote_bits = cmp_mask_against_input(&input, b'"');
*quote_bits &= !odd_ends;
#[allow(overflowing_literals)]
let mut quote_mask: u64 = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
_mm_set_epi64x(0, static_cast_i64!(*quote_bits)),
_mm_set1_epi8(0xFF),
0,
)) as u64;
quote_mask ^= *prev_iter_inside_quote;
let unescaped: u64 = unsigned_lteq_against_input(&input, _mm_set1_epi8(0x1F));
*error_mask |= quote_mask & unescaped;
*prev_iter_inside_quote = static_cast_u64!(static_cast_i64!(quote_mask) >> 63);
quote_mask
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
unsafe fn find_whitespace_and_structurals(
input: &SimdInput,
whitespace: &mut u64,
structurals: &mut u64,
) {
let low_nibble_mask: __m128i = _mm_setr_epi8(
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0,
);
let high_nibble_mask: __m128i = _mm_setr_epi8(
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0,
);
let structural_shufti_mask: __m128i = _mm_set1_epi8(0x7);
let whitespace_shufti_mask: __m128i = _mm_set1_epi8(0x18);
let v_v0: __m128i = _mm_and_si128(
_mm_shuffle_epi8(low_nibble_mask, input.v0),
_mm_shuffle_epi8(
high_nibble_mask,
_mm_and_si128(_mm_srli_epi32(input.v0, 4), _mm_set1_epi8(0x7f)),
),
);
let v_v1: __m128i = _mm_and_si128(
_mm_shuffle_epi8(low_nibble_mask, input.v1),
_mm_shuffle_epi8(
high_nibble_mask,
_mm_and_si128(_mm_srli_epi32(input.v1, 4), _mm_set1_epi8(0x7f)),
),
);
let v_v2: __m128i = _mm_and_si128(
_mm_shuffle_epi8(low_nibble_mask, input.v2),
_mm_shuffle_epi8(
high_nibble_mask,
_mm_and_si128(_mm_srli_epi32(input.v2, 4), _mm_set1_epi8(0x7f)),
),
);
let v_v3: __m128i = _mm_and_si128(
_mm_shuffle_epi8(low_nibble_mask, input.v3),
_mm_shuffle_epi8(
high_nibble_mask,
_mm_and_si128(_mm_srli_epi32(input.v3, 4), _mm_set1_epi8(0x7f)),
),
);
let tmp_v0: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v0, structural_shufti_mask),
_mm_set1_epi8(0),
);
let tmp_v1: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v1, structural_shufti_mask),
_mm_set1_epi8(0),
);
let tmp_v2: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v2, structural_shufti_mask),
_mm_set1_epi8(0),
);
let tmp_v3: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v3, structural_shufti_mask),
_mm_set1_epi8(0),
);
let structural_res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(tmp_v0)));
let structural_res_1: u64 = _mm_movemask_epi8(tmp_v1) as u64;
let structural_res_2: u64 = _mm_movemask_epi8(tmp_v2) as u64;
let structural_res_3: u64 = _mm_movemask_epi8(tmp_v3) as u64;
*structurals = !(structural_res_0 | (structural_res_1 << 16) | (structural_res_2 << 32) | (structural_res_3 << 48));
let tmp_ws_v0: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v0, whitespace_shufti_mask),
_mm_set1_epi8(0),
);
let tmp_ws_v1: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v1, whitespace_shufti_mask),
_mm_set1_epi8(0),
);
let tmp_ws_v2: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v2, whitespace_shufti_mask),
_mm_set1_epi8(0),
);
let tmp_ws_v3: __m128i = _mm_cmpeq_epi8(
_mm_and_si128(v_v3, whitespace_shufti_mask),
_mm_set1_epi8(0),
);
let ws_res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(tmp_ws_v0)));
let ws_res_1: u64 = _mm_movemask_epi8(tmp_ws_v1) as u64;
let ws_res_2: u64 = _mm_movemask_epi8(tmp_ws_v2) as u64;
let ws_res_3: u64 = _mm_movemask_epi8(tmp_ws_v3) as u64;
*whitespace = !(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48));
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
fn flatten_bits(base: &mut Vec<u32>, idx: u32, mut bits: u64) {
let cnt: usize = bits.count_ones() as usize;
let mut l = base.len();
let idx_minus_64 = idx.wrapping_sub(64);
let idx_64_v = unsafe {
_mm_set_epi32(
static_cast_i32!(idx_minus_64),
static_cast_i32!(idx_minus_64),
static_cast_i32!(idx_minus_64),
static_cast_i32!(idx_minus_64),
)
};
base.reserve(64);
unsafe {
base.set_len(l + cnt);
}
while bits != 0 {
unsafe {
let v0 = bits.trailing_zeros() as i32;
bits &= bits.wrapping_sub(1);
let v1 = bits.trailing_zeros() as i32;
bits &= bits.wrapping_sub(1);
let v2 = bits.trailing_zeros() as i32;
bits &= bits.wrapping_sub(1);
let v3 = bits.trailing_zeros() as i32;
bits &= bits.wrapping_sub(1);
let v: __m128i = _mm_set_epi32(v3, v2, v1, v0);
let v: __m128i = _mm_add_epi32(idx_64_v, v);
#[allow(clippy::cast_ptr_alignment)]
_mm_storeu_si128(base.as_mut_ptr().add(l) as *mut __m128i, v);
}
l += 4;
}
}
#[cfg_attr(not(feature = "no-inline"), inline(always))]
fn finalize_structurals(
mut structurals: u64,
whitespace: u64,
quote_mask: u64,
quote_bits: u64,
prev_iter_ends_pseudo_pred: &mut u64,
) -> u64 {
structurals &= !quote_mask;
structurals |= quote_bits;
let pseudo_pred: u64 = structurals | whitespace;
let shifted_pseudo_pred: u64 = (pseudo_pred << 1) | *prev_iter_ends_pseudo_pred;
*prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
let pseudo_structurals: u64 = shifted_pseudo_pred & (!whitespace) & (!quote_mask);
structurals |= pseudo_structurals;
structurals &= !(quote_bits & !quote_mask);
structurals
}
impl<'de> Deserializer<'de> {
pub unsafe fn find_structural_bits(input: &[u8]) -> std::result::Result<Vec<u32>, ErrorType> {
let len = input.len();
let mut structural_indexes = Vec::with_capacity(len / 6);
structural_indexes.push(0);
let mut has_error: __m128i = _mm_setzero_si128();
let mut previous = AvxProcessedUtfBytes::default();
let mut prev_iter_ends_odd_backslash: u64 = 0;
let mut prev_iter_inside_quote: u64 = 0;
let mut prev_iter_ends_pseudo_pred: u64 = 1;
let mut structurals: u64 = 0;
let lenminus64: usize = if len < 64 { 0 } else { len as usize - 64 };
let mut idx: usize = 0;
let mut error_mask: u64 = 0;
while idx < lenminus64 {
let input: SimdInput = fill_input(input.get_unchecked(idx as usize..));
check_utf8(&input, &mut has_error, &mut previous);
let odd_ends: u64 =
find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash);
let mut quote_bits: u64 = 0;
let quote_mask: u64 = find_quote_mask_and_bits(
&input,
odd_ends,
&mut prev_iter_inside_quote,
&mut quote_bits,
&mut error_mask,
);
flatten_bits(&mut structural_indexes, idx as u32, structurals);
let mut whitespace: u64 = 0;
find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals);
structurals = finalize_structurals(
structurals,
whitespace,
quote_mask,
quote_bits,
&mut prev_iter_ends_pseudo_pred,
);
idx += 64;
}
if idx < len {
let mut tmpbuf: [u8; 64] = [0x20; 64];
tmpbuf
.as_mut_ptr()
.copy_from(input.as_ptr().add(idx), len as usize - idx);
let input: SimdInput = fill_input(&tmpbuf);
check_utf8(&input, &mut has_error, &mut previous);
let odd_ends: u64 =
find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash);
let mut quote_bits: u64 = 0;
let quote_mask: u64 = find_quote_mask_and_bits(
&input,
odd_ends,
&mut prev_iter_inside_quote,
&mut quote_bits,
&mut error_mask,
);
flatten_bits(&mut structural_indexes, idx as u32, structurals);
let mut whitespace: u64 = 0;
find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals);
structurals = finalize_structurals(
structurals,
whitespace,
quote_mask,
quote_bits,
&mut prev_iter_ends_pseudo_pred,
);
idx += 64;
}
if prev_iter_inside_quote != 0 {
return Err(ErrorType::Syntax);
}
flatten_bits(&mut structural_indexes, idx as u32, structurals);
if structural_indexes.len() == 1 {
return Err(ErrorType::EOF);
}
if structural_indexes.last() > Some(&(len as u32)) {
return Err(ErrorType::InternalError);
}
if error_mask != 0 {
return Err(ErrorType::Syntax);
}
if _mm_testz_si128(has_error, has_error) != 0 {
Ok(structural_indexes)
} else {
Err(ErrorType::InvalidUTF8)
}
}
}