#[cfg(not(PyPy))]
use crate::Py_hash_t;
use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_ssize_t};
#[cfg(not(Py_3_12))]
use libc::wchar_t;
use std::os::raw::{c_char, c_int, c_uint, c_void};
#[repr(C)]
struct BitfieldUnit<Storage> {
storage: Storage,
}
impl<Storage> BitfieldUnit<Storage> {
#[inline]
pub const fn new(storage: Storage) -> Self {
Self { storage }
}
}
impl<Storage> BitfieldUnit<Storage>
where
Storage: AsRef<[u8]> + AsMut<[u8]>,
{
#[inline]
fn get_bit(&self, index: usize) -> bool {
debug_assert!(index / 8 < self.storage.as_ref().len());
let byte_index = index / 8;
let byte = self.storage.as_ref()[byte_index];
let bit_index = if cfg!(target_endian = "big") {
7 - (index % 8)
} else {
index % 8
};
let mask = 1 << bit_index;
byte & mask == mask
}
#[inline]
fn set_bit(&mut self, index: usize, val: bool) {
debug_assert!(index / 8 < self.storage.as_ref().len());
let byte_index = index / 8;
let byte = &mut self.storage.as_mut()[byte_index];
let bit_index = if cfg!(target_endian = "big") {
7 - (index % 8)
} else {
index % 8
};
let mask = 1 << bit_index;
if val {
*byte |= mask;
} else {
*byte &= !mask;
}
}
#[inline]
fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
let mut val = 0;
for i in 0..(bit_width as usize) {
if self.get_bit(i + bit_offset) {
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
val |= 1 << index;
}
}
val
}
#[inline]
fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
for i in 0..(bit_width as usize) {
let mask = 1 << i;
let val_bit_is_set = val & mask == mask;
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
self.set_bit(index + bit_offset, val_bit_is_set);
}
}
}
const STATE_INTERNED_INDEX: usize = 0;
const STATE_INTERNED_WIDTH: u8 = 2;
const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
const STATE_KIND_WIDTH: u8 = 3;
const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
const STATE_COMPACT_WIDTH: u8 = 1;
const STATE_ASCII_INDEX: usize =
(STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
const STATE_ASCII_WIDTH: u8 = 1;
#[cfg(not(Py_3_12))]
const STATE_READY_INDEX: usize =
(STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
#[cfg(not(Py_3_12))]
const STATE_READY_WIDTH: u8 = 1;
#[repr(C)]
#[repr(align(4))]
struct PyASCIIObjectState {
bitfield_align: [u8; 0],
bitfield: BitfieldUnit<[u8; 4usize]>,
}
#[allow(clippy::useless_transmute)]
impl PyASCIIObjectState {
#[inline]
unsafe fn interned(&self) -> c_uint {
std::mem::transmute(
self.bitfield
.get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
)
}
#[inline]
unsafe fn set_interned(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self.bitfield
.set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
}
#[inline]
unsafe fn kind(&self) -> c_uint {
std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
}
#[inline]
unsafe fn set_kind(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self.bitfield
.set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
}
#[inline]
unsafe fn compact(&self) -> c_uint {
std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
}
#[inline]
unsafe fn set_compact(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self.bitfield
.set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
}
#[inline]
unsafe fn ascii(&self) -> c_uint {
std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
}
#[inline]
unsafe fn set_ascii(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self.bitfield
.set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
}
#[cfg(not(Py_3_12))]
#[inline]
unsafe fn ready(&self) -> c_uint {
std::mem::transmute(self.bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
}
#[cfg(not(Py_3_12))]
#[inline]
unsafe fn set_ready(&mut self, val: c_uint) {
let val: u32 = std::mem::transmute(val);
self.bitfield
.set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
}
}
impl From<u32> for PyASCIIObjectState {
#[inline]
fn from(value: u32) -> Self {
PyASCIIObjectState {
bitfield_align: [],
bitfield: BitfieldUnit::new(value.to_ne_bytes()),
}
}
}
impl From<PyASCIIObjectState> for u32 {
#[inline]
fn from(value: PyASCIIObjectState) -> Self {
u32::from_ne_bytes(value.bitfield.storage)
}
}
#[repr(C)]
pub struct PyASCIIObject {
pub ob_base: PyObject,
pub length: Py_ssize_t,
#[cfg(not(PyPy))]
pub hash: Py_hash_t,
pub state: u32,
#[cfg(not(Py_3_12))]
pub wstr: *mut wchar_t,
}
impl PyASCIIObject {
#[inline]
pub unsafe fn interned(&self) -> c_uint {
PyASCIIObjectState::from(self.state).interned()
}
#[inline]
pub unsafe fn set_interned(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
state.set_interned(val);
self.state = u32::from(state);
}
#[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
#[inline]
pub unsafe fn kind(&self) -> c_uint {
PyASCIIObjectState::from(self.state).kind()
}
#[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
#[inline]
pub unsafe fn set_kind(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
state.set_kind(val);
self.state = u32::from(state);
}
#[inline]
pub unsafe fn compact(&self) -> c_uint {
PyASCIIObjectState::from(self.state).compact()
}
#[inline]
pub unsafe fn set_compact(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
state.set_compact(val);
self.state = u32::from(state);
}
#[inline]
pub unsafe fn ascii(&self) -> c_uint {
PyASCIIObjectState::from(self.state).ascii()
}
#[inline]
pub unsafe fn set_ascii(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
state.set_ascii(val);
self.state = u32::from(state);
}
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn ready(&self) -> c_uint {
PyASCIIObjectState::from(self.state).ready()
}
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn set_ready(&mut self, val: c_uint) {
let mut state = PyASCIIObjectState::from(self.state);
state.set_ready(val);
self.state = u32::from(state);
}
}
#[repr(C)]
pub struct PyCompactUnicodeObject {
pub _base: PyASCIIObject,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
#[cfg(not(Py_3_12))]
pub wstr_length: Py_ssize_t,
}
#[repr(C)]
pub union PyUnicodeObjectData {
pub any: *mut c_void,
pub latin1: *mut Py_UCS1,
pub ucs2: *mut Py_UCS2,
pub ucs4: *mut Py_UCS4,
}
#[repr(C)]
pub struct PyUnicodeObject {
pub _base: PyCompactUnicodeObject,
pub data: PyUnicodeObjectData,
}
extern "C" {
#[cfg(not(PyPy))]
pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
}
pub const SSTATE_NOT_INTERNED: c_uint = 0;
pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
#[cfg(Py_3_12)]
pub const SSTATE_INTERNED_IMMORTAL_STATIC: c_uint = 3;
#[inline]
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
debug_assert!(crate::PyUnicode_Check(op) != 0);
#[cfg(not(Py_3_12))]
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).ascii()
}
#[inline]
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).compact()
}
#[inline]
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
}
#[cfg(not(Py_3_12))]
#[deprecated(note = "Removed in Python 3.12")]
pub const PyUnicode_WCHAR_KIND: c_uint = 0;
pub const PyUnicode_1BYTE_KIND: c_uint = 1;
pub const PyUnicode_2BYTE_KIND: c_uint = 2;
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
#[inline]
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
PyUnicode_DATA(op) as *mut Py_UCS1
}
#[inline]
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
PyUnicode_DATA(op) as *mut Py_UCS2
}
#[inline]
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
PyUnicode_DATA(op) as *mut Py_UCS4
}
#[inline]
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
debug_assert!(crate::PyUnicode_Check(op) != 0);
#[cfg(not(Py_3_12))]
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).kind()
}
#[inline]
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
if PyUnicode_IS_ASCII(op) != 0 {
(op as *mut PyASCIIObject).offset(1) as *mut c_void
} else {
(op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
}
}
#[inline]
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
(*(op as *mut PyUnicodeObject)).data.any
}
#[inline]
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
debug_assert!(crate::PyUnicode_Check(op) != 0);
if PyUnicode_IS_COMPACT(op) != 0 {
_PyUnicode_COMPACT_DATA(op)
} else {
_PyUnicode_NONCOMPACT_DATA(op)
}
}
#[inline]
pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
debug_assert!(crate::PyUnicode_Check(op) != 0);
#[cfg(not(Py_3_12))]
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).length
}
#[cfg(Py_3_12)]
#[inline]
pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
1
}
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).ready()
}
#[cfg(Py_3_12)]
#[inline]
pub unsafe fn PyUnicode_READY(_op: *mut PyObject) -> c_int {
0
}
#[cfg(not(Py_3_12))]
#[inline]
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
debug_assert!(crate::PyUnicode_Check(op) != 0);
if PyUnicode_IS_READY(op) != 0 {
0
} else {
_PyUnicode_Ready(op)
}
}
extern "C" {
#[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
#[cfg(not(PyPy))]
pub fn PyUnicode_CopyCharacters(
to: *mut PyObject,
to_start: Py_ssize_t,
from: *mut PyObject,
from_start: Py_ssize_t,
how_many: Py_ssize_t,
) -> Py_ssize_t;
#[cfg(not(PyPy))]
pub fn PyUnicode_Fill(
unicode: *mut PyObject,
start: Py_ssize_t,
length: Py_ssize_t,
fill_char: Py_UCS4,
) -> Py_ssize_t;
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
pub fn PyUnicode_FromKindAndData(
kind: c_int,
buffer: *const c_void,
size: Py_ssize_t,
) -> *mut PyObject;
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
pub fn PyUnicode_AsUnicodeAndSize(
unicode: *mut PyObject,
size: *mut Py_ssize_t,
) -> *mut Py_UNICODE;
}
extern "C" {
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
pub fn PyUnicode_Encode(
s: *const Py_UNICODE,
size: Py_ssize_t,
encoding: *const c_char,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF7(
data: *const Py_UNICODE,
length: Py_ssize_t,
base64SetO: c_int,
base64WhiteSpace: c_int,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
pub fn PyUnicode_EncodeUTF8(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF32(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF16(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
pub fn PyUnicode_EncodeRawUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
pub fn PyUnicode_EncodeLatin1(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
pub fn PyUnicode_EncodeASCII(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
mapping: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_TranslateCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
table: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
pub fn PyUnicode_EncodeDecimal(
s: *mut Py_UNICODE,
length: Py_ssize_t,
output: *mut c_char,
errors: *const c_char,
) -> c_int;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
pub fn PyUnicode_TransformDecimalToASCII(
s: *mut Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
}