1#[cfg(any(Py_3_11, not(PyPy)))]
2use crate::Py_hash_t;
3use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_ssize_t};
4use libc::wchar_t;
5use std::os::raw::{c_char, c_int, c_uint, c_void};
6
7#[repr(C)]
35struct BitfieldUnit<Storage> {
36 storage: Storage,
37}
38
39impl<Storage> BitfieldUnit<Storage> {
40 #[inline]
41 pub const fn new(storage: Storage) -> Self {
42 Self { storage }
43 }
44}
45
46#[cfg(not(GraalPy))]
47impl<Storage> BitfieldUnit<Storage>
48where
49 Storage: AsRef<[u8]> + AsMut<[u8]>,
50{
51 #[inline]
52 fn get_bit(&self, index: usize) -> bool {
53 debug_assert!(index / 8 < self.storage.as_ref().len());
54 let byte_index = index / 8;
55 let byte = self.storage.as_ref()[byte_index];
56 let bit_index = if cfg!(target_endian = "big") {
57 7 - (index % 8)
58 } else {
59 index % 8
60 };
61 let mask = 1 << bit_index;
62 byte & mask == mask
63 }
64
65 #[inline]
66 fn set_bit(&mut self, index: usize, val: bool) {
67 debug_assert!(index / 8 < self.storage.as_ref().len());
68 let byte_index = index / 8;
69 let byte = &mut self.storage.as_mut()[byte_index];
70 let bit_index = if cfg!(target_endian = "big") {
71 7 - (index % 8)
72 } else {
73 index % 8
74 };
75 let mask = 1 << bit_index;
76 if val {
77 *byte |= mask;
78 } else {
79 *byte &= !mask;
80 }
81 }
82
83 #[inline]
84 fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
85 debug_assert!(bit_width <= 64);
86 debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
87 debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
88 let mut val = 0;
89 for i in 0..(bit_width as usize) {
90 if self.get_bit(i + bit_offset) {
91 let index = if cfg!(target_endian = "big") {
92 bit_width as usize - 1 - i
93 } else {
94 i
95 };
96 val |= 1 << index;
97 }
98 }
99 val
100 }
101
102 #[inline]
103 fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
104 debug_assert!(bit_width <= 64);
105 debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
106 debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
107 for i in 0..(bit_width as usize) {
108 let mask = 1 << i;
109 let val_bit_is_set = val & mask == mask;
110 let index = if cfg!(target_endian = "big") {
111 bit_width as usize - 1 - i
112 } else {
113 i
114 };
115 self.set_bit(index + bit_offset, val_bit_is_set);
116 }
117 }
118}
119
120#[cfg(not(GraalPy))]
121const STATE_INTERNED_INDEX: usize = 0;
122#[cfg(not(GraalPy))]
123const STATE_INTERNED_WIDTH: u8 = 2;
124
125#[cfg(not(GraalPy))]
126const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
127#[cfg(not(GraalPy))]
128const STATE_KIND_WIDTH: u8 = 3;
129
130#[cfg(not(GraalPy))]
131const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
132#[cfg(not(GraalPy))]
133const STATE_COMPACT_WIDTH: u8 = 1;
134
135#[cfg(not(GraalPy))]
136const STATE_ASCII_INDEX: usize =
137 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
138#[cfg(not(GraalPy))]
139const STATE_ASCII_WIDTH: u8 = 1;
140
141#[cfg(not(any(Py_3_12, GraalPy)))]
142const STATE_READY_INDEX: usize =
143 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
144#[cfg(not(any(Py_3_12, GraalPy)))]
145const STATE_READY_WIDTH: u8 = 1;
146
147#[repr(C)]
157#[repr(align(4))]
158struct PyASCIIObjectState {
159 bitfield_align: [u8; 0],
160 bitfield: BitfieldUnit<[u8; 4usize]>,
161}
162
163#[cfg(not(GraalPy))]
165#[allow(clippy::useless_transmute)]
166impl PyASCIIObjectState {
167 #[inline]
168 unsafe fn interned(&self) -> c_uint {
169 std::mem::transmute(
170 self.bitfield
171 .get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
172 )
173 }
174
175 #[inline]
176 unsafe fn set_interned(&mut self, val: c_uint) {
177 let val: u32 = std::mem::transmute(val);
178 self.bitfield
179 .set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
180 }
181
182 #[inline]
183 unsafe fn kind(&self) -> c_uint {
184 std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
185 }
186
187 #[inline]
188 unsafe fn set_kind(&mut self, val: c_uint) {
189 let val: u32 = std::mem::transmute(val);
190 self.bitfield
191 .set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
192 }
193
194 #[inline]
195 unsafe fn compact(&self) -> c_uint {
196 std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
197 }
198
199 #[inline]
200 unsafe fn set_compact(&mut self, val: c_uint) {
201 let val: u32 = std::mem::transmute(val);
202 self.bitfield
203 .set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
204 }
205
206 #[inline]
207 unsafe fn ascii(&self) -> c_uint {
208 std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
209 }
210
211 #[inline]
212 unsafe fn set_ascii(&mut self, val: c_uint) {
213 let val: u32 = std::mem::transmute(val);
214 self.bitfield
215 .set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
216 }
217
218 #[cfg(not(Py_3_12))]
219 #[inline]
220 unsafe fn ready(&self) -> c_uint {
221 std::mem::transmute(self.bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
222 }
223
224 #[cfg(not(Py_3_12))]
225 #[inline]
226 unsafe fn set_ready(&mut self, val: c_uint) {
227 let val: u32 = std::mem::transmute(val);
228 self.bitfield
229 .set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
230 }
231}
232
233impl From<u32> for PyASCIIObjectState {
234 #[inline]
235 fn from(value: u32) -> Self {
236 PyASCIIObjectState {
237 bitfield_align: [],
238 bitfield: BitfieldUnit::new(value.to_ne_bytes()),
239 }
240 }
241}
242
243impl From<PyASCIIObjectState> for u32 {
244 #[inline]
245 fn from(value: PyASCIIObjectState) -> Self {
246 u32::from_ne_bytes(value.bitfield.storage)
247 }
248}
249
250#[repr(C)]
251pub struct PyASCIIObject {
252 pub ob_base: PyObject,
253 pub length: Py_ssize_t,
254 #[cfg(any(Py_3_11, not(PyPy)))]
255 pub hash: Py_hash_t,
256 pub state: u32,
268 #[cfg(not(Py_3_12))]
269 pub wstr: *mut wchar_t,
270}
271
272#[cfg(not(GraalPy))]
274impl PyASCIIObject {
275 #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] #[inline]
281 pub unsafe fn interned(&self) -> c_uint {
282 PyASCIIObjectState::from(self.state).interned()
283 }
284
285 #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] #[inline]
292 pub unsafe fn set_interned(&mut self, val: c_uint) {
293 let mut state = PyASCIIObjectState::from(self.state);
294 state.set_interned(val);
295 self.state = u32::from(state);
296 }
297
298 #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
302 #[inline]
304 pub unsafe fn kind(&self) -> c_uint {
305 PyASCIIObjectState::from(self.state).kind()
306 }
307
308 #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
312 #[inline]
314 pub unsafe fn set_kind(&mut self, val: c_uint) {
315 let mut state = PyASCIIObjectState::from(self.state);
316 state.set_kind(val);
317 self.state = u32::from(state);
318 }
319
320 #[inline]
324 pub unsafe fn compact(&self) -> c_uint {
325 PyASCIIObjectState::from(self.state).compact()
326 }
327
328 #[inline]
332 pub unsafe fn set_compact(&mut self, val: c_uint) {
333 let mut state = PyASCIIObjectState::from(self.state);
334 state.set_compact(val);
335 self.state = u32::from(state);
336 }
337
338 #[inline]
342 pub unsafe fn ascii(&self) -> c_uint {
343 PyASCIIObjectState::from(self.state).ascii()
344 }
345
346 #[inline]
350 pub unsafe fn set_ascii(&mut self, val: c_uint) {
351 let mut state = PyASCIIObjectState::from(self.state);
352 state.set_ascii(val);
353 self.state = u32::from(state);
354 }
355
356 #[cfg(not(Py_3_12))]
360 #[inline]
361 pub unsafe fn ready(&self) -> c_uint {
362 PyASCIIObjectState::from(self.state).ready()
363 }
364
365 #[cfg(not(Py_3_12))]
369 #[inline]
370 pub unsafe fn set_ready(&mut self, val: c_uint) {
371 let mut state = PyASCIIObjectState::from(self.state);
372 state.set_ready(val);
373 self.state = u32::from(state);
374 }
375}
376
377#[repr(C)]
378pub struct PyCompactUnicodeObject {
379 pub _base: PyASCIIObject,
380 pub utf8_length: Py_ssize_t,
381 pub utf8: *mut c_char,
382 #[cfg(not(Py_3_12))]
383 pub wstr_length: Py_ssize_t,
384}
385
386#[repr(C)]
387pub union PyUnicodeObjectData {
388 pub any: *mut c_void,
389 pub latin1: *mut Py_UCS1,
390 pub ucs2: *mut Py_UCS2,
391 pub ucs4: *mut Py_UCS4,
392}
393
394#[repr(C)]
395pub struct PyUnicodeObject {
396 pub _base: PyCompactUnicodeObject,
397 pub data: PyUnicodeObjectData,
398}
399
400extern "C" {
401 #[cfg(not(any(PyPy, GraalPy)))]
402 pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
403}
404
405pub const SSTATE_NOT_INTERNED: c_uint = 0;
411pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
412pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
413#[cfg(Py_3_12)]
414pub const SSTATE_INTERNED_IMMORTAL_STATIC: c_uint = 3;
415
416#[cfg(not(GraalPy))]
417#[inline]
418pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
419 debug_assert!(crate::PyUnicode_Check(op) != 0);
420 #[cfg(not(Py_3_12))]
421 debug_assert!(PyUnicode_IS_READY(op) != 0);
422
423 (*(op as *mut PyASCIIObject)).ascii()
424}
425
426#[cfg(not(GraalPy))]
427#[inline]
428pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
429 (*(op as *mut PyASCIIObject)).compact()
430}
431
432#[cfg(not(GraalPy))]
433#[inline]
434pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
435 ((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
436}
437
438#[cfg(not(Py_3_12))]
439#[deprecated(note = "Removed in Python 3.12")]
440pub const PyUnicode_WCHAR_KIND: c_uint = 0;
441
442pub const PyUnicode_1BYTE_KIND: c_uint = 1;
443pub const PyUnicode_2BYTE_KIND: c_uint = 2;
444pub const PyUnicode_4BYTE_KIND: c_uint = 4;
445
446#[cfg(not(any(GraalPy, PyPy)))]
447#[inline]
448pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
449 PyUnicode_DATA(op) as *mut Py_UCS1
450}
451
452#[cfg(not(any(GraalPy, PyPy)))]
453#[inline]
454pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
455 PyUnicode_DATA(op) as *mut Py_UCS2
456}
457
458#[cfg(not(any(GraalPy, PyPy)))]
459#[inline]
460pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
461 PyUnicode_DATA(op) as *mut Py_UCS4
462}
463
464#[cfg(not(GraalPy))]
465#[inline]
466pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
467 debug_assert!(crate::PyUnicode_Check(op) != 0);
468 #[cfg(not(Py_3_12))]
469 debug_assert!(PyUnicode_IS_READY(op) != 0);
470
471 (*(op as *mut PyASCIIObject)).kind()
472}
473
474#[cfg(not(GraalPy))]
475#[inline]
476pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
477 if PyUnicode_IS_ASCII(op) != 0 {
478 (op as *mut PyASCIIObject).offset(1) as *mut c_void
479 } else {
480 (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
481 }
482}
483
484#[cfg(not(any(GraalPy, PyPy)))]
485#[inline]
486pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
487 debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
488
489 (*(op as *mut PyUnicodeObject)).data.any
490}
491
492#[cfg(not(any(GraalPy, PyPy)))]
493#[inline]
494pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
495 debug_assert!(crate::PyUnicode_Check(op) != 0);
496
497 if PyUnicode_IS_COMPACT(op) != 0 {
498 _PyUnicode_COMPACT_DATA(op)
499 } else {
500 _PyUnicode_NONCOMPACT_DATA(op)
501 }
502}
503
504#[cfg(not(GraalPy))]
509#[inline]
510pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
511 debug_assert!(crate::PyUnicode_Check(op) != 0);
512 #[cfg(not(Py_3_12))]
513 debug_assert!(PyUnicode_IS_READY(op) != 0);
514
515 (*(op as *mut PyASCIIObject)).length
516}
517
518#[cfg(any(Py_3_12, GraalPy))]
519#[inline]
520pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
521 1
523}
524
525#[cfg(not(any(GraalPy, Py_3_12)))]
526#[inline]
527pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
528 (*(op as *mut PyASCIIObject)).ready()
529}
530
531#[cfg(any(Py_3_12, GraalPy))]
532#[inline]
533pub unsafe fn PyUnicode_READY(_op: *mut PyObject) -> c_int {
534 0
535}
536
537#[cfg(not(any(Py_3_12, GraalPy)))]
538#[inline]
539pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
540 debug_assert!(crate::PyUnicode_Check(op) != 0);
541
542 if PyUnicode_IS_READY(op) != 0 {
543 0
544 } else {
545 _PyUnicode_Ready(op)
546 }
547}
548
549extern "C" {
554 #[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
555 pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
556 #[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
557 pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
558
559 #[cfg(not(PyPy))]
562 pub fn PyUnicode_CopyCharacters(
563 to: *mut PyObject,
564 to_start: Py_ssize_t,
565 from: *mut PyObject,
566 from_start: Py_ssize_t,
567 how_many: Py_ssize_t,
568 ) -> Py_ssize_t;
569
570 #[cfg(not(PyPy))]
573 pub fn PyUnicode_Fill(
574 unicode: *mut PyObject,
575 start: Py_ssize_t,
576 length: Py_ssize_t,
577 fill_char: Py_UCS4,
578 ) -> Py_ssize_t;
579
580 #[cfg(not(Py_3_12))]
583 #[deprecated]
584 #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
585 pub fn PyUnicode_FromUnicode(u: *const wchar_t, size: Py_ssize_t) -> *mut PyObject;
586
587 #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
588 pub fn PyUnicode_FromKindAndData(
589 kind: c_int,
590 buffer: *const c_void,
591 size: Py_ssize_t,
592 ) -> *mut PyObject;
593
594 #[cfg(not(Py_3_12))]
598 #[deprecated]
599 #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
600 pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut wchar_t;
601
602 #[cfg(not(Py_3_12))]
605 #[deprecated]
606 #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
607 pub fn PyUnicode_AsUnicodeAndSize(
608 unicode: *mut PyObject,
609 size: *mut Py_ssize_t,
610 ) -> *mut wchar_t;
611
612 }
614
615extern "C" {
631 #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
634 pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
635
636 pub fn PyUnicode_Encode(
639 s: *const wchar_t,
640 size: Py_ssize_t,
641 encoding: *const c_char,
642 errors: *const c_char,
643 ) -> *mut PyObject;
644
645 pub fn PyUnicode_EncodeUTF7(
646 data: *const wchar_t,
647 length: Py_ssize_t,
648 base64SetO: c_int,
649 base64WhiteSpace: c_int,
650 errors: *const c_char,
651 ) -> *mut PyObject;
652
653 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
657 pub fn PyUnicode_EncodeUTF8(
658 data: *const wchar_t,
659 length: Py_ssize_t,
660 errors: *const c_char,
661 ) -> *mut PyObject;
662
663 pub fn PyUnicode_EncodeUTF32(
664 data: *const wchar_t,
665 length: Py_ssize_t,
666 errors: *const c_char,
667 byteorder: c_int,
668 ) -> *mut PyObject;
669
670 pub fn PyUnicode_EncodeUTF16(
673 data: *const wchar_t,
674 length: Py_ssize_t,
675 errors: *const c_char,
676 byteorder: c_int,
677 ) -> *mut PyObject;
678
679 pub fn PyUnicode_EncodeUnicodeEscape(data: *const wchar_t, length: Py_ssize_t)
683 -> *mut PyObject;
684
685 pub fn PyUnicode_EncodeRawUnicodeEscape(
686 data: *const wchar_t,
687 length: Py_ssize_t,
688 ) -> *mut PyObject;
689
690 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
693 pub fn PyUnicode_EncodeLatin1(
694 data: *const wchar_t,
695 length: Py_ssize_t,
696 errors: *const c_char,
697 ) -> *mut PyObject;
698
699 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
702 pub fn PyUnicode_EncodeASCII(
703 data: *const wchar_t,
704 length: Py_ssize_t,
705 errors: *const c_char,
706 ) -> *mut PyObject;
707
708 pub fn PyUnicode_EncodeCharmap(
709 data: *const wchar_t,
710 length: Py_ssize_t,
711 mapping: *mut PyObject,
712 errors: *const c_char,
713 ) -> *mut PyObject;
714
715 pub fn PyUnicode_TranslateCharmap(
718 data: *const wchar_t,
719 length: Py_ssize_t,
720 table: *mut PyObject,
721 errors: *const c_char,
722 ) -> *mut PyObject;
723
724 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
727 pub fn PyUnicode_EncodeDecimal(
728 s: *mut wchar_t,
729 length: Py_ssize_t,
730 output: *mut c_char,
731 errors: *const c_char,
732 ) -> c_int;
733
734 #[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
735 pub fn PyUnicode_TransformDecimalToASCII(s: *mut wchar_t, length: Py_ssize_t) -> *mut PyObject;
736
737 }
739
740