pyo3_ffi/
unicodeobject.rs

1use crate::object::*;
2use crate::pyport::Py_ssize_t;
3use libc::wchar_t;
4use std::os::raw::{c_char, c_int, c_void};
5#[cfg(not(PyPy))]
6use std::ptr::addr_of_mut;
7
8#[cfg(not(Py_LIMITED_API))]
9#[cfg_attr(
10    Py_3_13,
11    deprecated(note = "Deprecated since Python 3.13. Use `libc::wchar_t` instead.")
12)]
13pub type Py_UNICODE = wchar_t;
14
15pub type Py_UCS4 = u32;
16pub type Py_UCS2 = u16;
17pub type Py_UCS1 = u8;
18
19#[cfg_attr(windows, link(name = "pythonXY"))]
20extern "C" {
21    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Type")]
22    pub static mut PyUnicode_Type: PyTypeObject;
23    pub static mut PyUnicodeIter_Type: PyTypeObject;
24
25    #[cfg(PyPy)]
26    #[link_name = "PyPyUnicode_Check"]
27    pub fn PyUnicode_Check(op: *mut PyObject) -> c_int;
28
29    #[cfg(PyPy)]
30    #[link_name = "PyPyUnicode_CheckExact"]
31    pub fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int;
32}
33
34#[inline]
35#[cfg(not(PyPy))]
36pub unsafe fn PyUnicode_Check(op: *mut PyObject) -> c_int {
37    PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
38}
39
40#[inline]
41#[cfg(not(PyPy))]
42pub unsafe fn PyUnicode_CheckExact(op: *mut PyObject) -> c_int {
43    (Py_TYPE(op) == addr_of_mut!(PyUnicode_Type)) as c_int
44}
45
46pub const Py_UNICODE_REPLACEMENT_CHARACTER: Py_UCS4 = 0xFFFD;
47
48extern "C" {
49
50    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromStringAndSize")]
51    pub fn PyUnicode_FromStringAndSize(u: *const c_char, size: Py_ssize_t) -> *mut PyObject;
52    pub fn PyUnicode_FromString(u: *const c_char) -> *mut PyObject;
53
54    pub fn PyUnicode_Substring(
55        str: *mut PyObject,
56        start: Py_ssize_t,
57        end: Py_ssize_t,
58    ) -> *mut PyObject;
59    pub fn PyUnicode_AsUCS4(
60        unicode: *mut PyObject,
61        buffer: *mut Py_UCS4,
62        buflen: Py_ssize_t,
63        copy_null: c_int,
64    ) -> *mut Py_UCS4;
65    pub fn PyUnicode_AsUCS4Copy(unicode: *mut PyObject) -> *mut Py_UCS4;
66    #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetLength")]
67    pub fn PyUnicode_GetLength(unicode: *mut PyObject) -> Py_ssize_t;
68    #[cfg(not(Py_3_12))]
69    #[deprecated(note = "Removed in Python 3.12")]
70    #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetSize")]
71    pub fn PyUnicode_GetSize(unicode: *mut PyObject) -> Py_ssize_t;
72    pub fn PyUnicode_ReadChar(unicode: *mut PyObject, index: Py_ssize_t) -> Py_UCS4;
73    pub fn PyUnicode_WriteChar(
74        unicode: *mut PyObject,
75        index: Py_ssize_t,
76        character: Py_UCS4,
77    ) -> c_int;
78    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Resize")]
79    pub fn PyUnicode_Resize(unicode: *mut *mut PyObject, length: Py_ssize_t) -> c_int;
80    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromEncodedObject")]
81    pub fn PyUnicode_FromEncodedObject(
82        obj: *mut PyObject,
83        encoding: *const c_char,
84        errors: *const c_char,
85    ) -> *mut PyObject;
86    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromObject")]
87    pub fn PyUnicode_FromObject(obj: *mut PyObject) -> *mut PyObject;
88    // #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromFormatV")]
89    // pub fn PyUnicode_FromFormatV(format: *const c_char, vargs: va_list) -> *mut PyObject;
90    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromFormat")]
91    pub fn PyUnicode_FromFormat(format: *const c_char, ...) -> *mut PyObject;
92    #[cfg_attr(PyPy, link_name = "PyPyUnicode_InternInPlace")]
93    pub fn PyUnicode_InternInPlace(arg1: *mut *mut PyObject);
94    #[cfg(not(Py_3_12))]
95    #[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
96    pub fn PyUnicode_InternImmortal(arg1: *mut *mut PyObject);
97    #[cfg_attr(PyPy, link_name = "PyPyUnicode_InternFromString")]
98    pub fn PyUnicode_InternFromString(u: *const c_char) -> *mut PyObject;
99    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromWideChar")]
100    pub fn PyUnicode_FromWideChar(w: *const wchar_t, size: Py_ssize_t) -> *mut PyObject;
101    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsWideChar")]
102    pub fn PyUnicode_AsWideChar(
103        unicode: *mut PyObject,
104        w: *mut wchar_t,
105        size: Py_ssize_t,
106    ) -> Py_ssize_t;
107    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsWideCharString")]
108    pub fn PyUnicode_AsWideCharString(
109        unicode: *mut PyObject,
110        size: *mut Py_ssize_t,
111    ) -> *mut wchar_t;
112    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromOrdinal")]
113    pub fn PyUnicode_FromOrdinal(ordinal: c_int) -> *mut PyObject;
114    pub fn PyUnicode_ClearFreeList() -> c_int;
115    #[cfg_attr(PyPy, link_name = "PyPyUnicode_GetDefaultEncoding")]
116    pub fn PyUnicode_GetDefaultEncoding() -> *const c_char;
117    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Decode")]
118    pub fn PyUnicode_Decode(
119        s: *const c_char,
120        size: Py_ssize_t,
121        encoding: *const c_char,
122        errors: *const c_char,
123    ) -> *mut PyObject;
124    pub fn PyUnicode_AsDecodedObject(
125        unicode: *mut PyObject,
126        encoding: *const c_char,
127        errors: *const c_char,
128    ) -> *mut PyObject;
129    pub fn PyUnicode_AsDecodedUnicode(
130        unicode: *mut PyObject,
131        encoding: *const c_char,
132        errors: *const c_char,
133    ) -> *mut PyObject;
134    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedObject")]
135    pub fn PyUnicode_AsEncodedObject(
136        unicode: *mut PyObject,
137        encoding: *const c_char,
138        errors: *const c_char,
139    ) -> *mut PyObject;
140    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsEncodedString")]
141    pub fn PyUnicode_AsEncodedString(
142        unicode: *mut PyObject,
143        encoding: *const c_char,
144        errors: *const c_char,
145    ) -> *mut PyObject;
146    pub fn PyUnicode_AsEncodedUnicode(
147        unicode: *mut PyObject,
148        encoding: *const c_char,
149        errors: *const c_char,
150    ) -> *mut PyObject;
151    pub fn PyUnicode_BuildEncodingMap(string: *mut PyObject) -> *mut PyObject;
152    pub fn PyUnicode_DecodeUTF7(
153        string: *const c_char,
154        length: Py_ssize_t,
155        errors: *const c_char,
156    ) -> *mut PyObject;
157    pub fn PyUnicode_DecodeUTF7Stateful(
158        string: *const c_char,
159        length: Py_ssize_t,
160        errors: *const c_char,
161        consumed: *mut Py_ssize_t,
162    ) -> *mut PyObject;
163    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF8")]
164    pub fn PyUnicode_DecodeUTF8(
165        string: *const c_char,
166        length: Py_ssize_t,
167        errors: *const c_char,
168    ) -> *mut PyObject;
169    pub fn PyUnicode_DecodeUTF8Stateful(
170        string: *const c_char,
171        length: Py_ssize_t,
172        errors: *const c_char,
173        consumed: *mut Py_ssize_t,
174    ) -> *mut PyObject;
175    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8String")]
176    pub fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject;
177    #[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
178    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
179    pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
180    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF32")]
181    pub fn PyUnicode_DecodeUTF32(
182        string: *const c_char,
183        length: Py_ssize_t,
184        errors: *const c_char,
185        byteorder: *mut c_int,
186    ) -> *mut PyObject;
187    pub fn PyUnicode_DecodeUTF32Stateful(
188        string: *const c_char,
189        length: Py_ssize_t,
190        errors: *const c_char,
191        byteorder: *mut c_int,
192        consumed: *mut Py_ssize_t,
193    ) -> *mut PyObject;
194    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF32String")]
195    pub fn PyUnicode_AsUTF32String(unicode: *mut PyObject) -> *mut PyObject;
196    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeUTF16")]
197    pub fn PyUnicode_DecodeUTF16(
198        string: *const c_char,
199        length: Py_ssize_t,
200        errors: *const c_char,
201        byteorder: *mut c_int,
202    ) -> *mut PyObject;
203    pub fn PyUnicode_DecodeUTF16Stateful(
204        string: *const c_char,
205        length: Py_ssize_t,
206        errors: *const c_char,
207        byteorder: *mut c_int,
208        consumed: *mut Py_ssize_t,
209    ) -> *mut PyObject;
210    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF16String")]
211    pub fn PyUnicode_AsUTF16String(unicode: *mut PyObject) -> *mut PyObject;
212    pub fn PyUnicode_DecodeUnicodeEscape(
213        string: *const c_char,
214        length: Py_ssize_t,
215        errors: *const c_char,
216    ) -> *mut PyObject;
217    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeEscapeString")]
218    pub fn PyUnicode_AsUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
219    pub fn PyUnicode_DecodeRawUnicodeEscape(
220        string: *const c_char,
221        length: Py_ssize_t,
222        errors: *const c_char,
223    ) -> *mut PyObject;
224    pub fn PyUnicode_AsRawUnicodeEscapeString(unicode: *mut PyObject) -> *mut PyObject;
225    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeLatin1")]
226    pub fn PyUnicode_DecodeLatin1(
227        string: *const c_char,
228        length: Py_ssize_t,
229        errors: *const c_char,
230    ) -> *mut PyObject;
231    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsLatin1String")]
232    pub fn PyUnicode_AsLatin1String(unicode: *mut PyObject) -> *mut PyObject;
233    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeASCII")]
234    pub fn PyUnicode_DecodeASCII(
235        string: *const c_char,
236        length: Py_ssize_t,
237        errors: *const c_char,
238    ) -> *mut PyObject;
239    #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsASCIIString")]
240    pub fn PyUnicode_AsASCIIString(unicode: *mut PyObject) -> *mut PyObject;
241    pub fn PyUnicode_DecodeCharmap(
242        string: *const c_char,
243        length: Py_ssize_t,
244        mapping: *mut PyObject,
245        errors: *const c_char,
246    ) -> *mut PyObject;
247    pub fn PyUnicode_AsCharmapString(
248        unicode: *mut PyObject,
249        mapping: *mut PyObject,
250    ) -> *mut PyObject;
251    pub fn PyUnicode_DecodeLocaleAndSize(
252        str: *const c_char,
253        len: Py_ssize_t,
254        errors: *const c_char,
255    ) -> *mut PyObject;
256    pub fn PyUnicode_DecodeLocale(str: *const c_char, errors: *const c_char) -> *mut PyObject;
257    pub fn PyUnicode_EncodeLocale(unicode: *mut PyObject, errors: *const c_char) -> *mut PyObject;
258    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FSConverter")]
259    pub fn PyUnicode_FSConverter(arg1: *mut PyObject, arg2: *mut c_void) -> c_int;
260    #[cfg_attr(PyPy, link_name = "PyPyUnicode_FSDecoder")]
261    pub fn PyUnicode_FSDecoder(arg1: *mut PyObject, arg2: *mut c_void) -> c_int;
262    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeFSDefault")]
263    pub fn PyUnicode_DecodeFSDefault(s: *const c_char) -> *mut PyObject;
264    #[cfg_attr(PyPy, link_name = "PyPyUnicode_DecodeFSDefaultAndSize")]
265    pub fn PyUnicode_DecodeFSDefaultAndSize(s: *const c_char, size: Py_ssize_t) -> *mut PyObject;
266    #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeFSDefault")]
267    pub fn PyUnicode_EncodeFSDefault(unicode: *mut PyObject) -> *mut PyObject;
268    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Concat")]
269    pub fn PyUnicode_Concat(left: *mut PyObject, right: *mut PyObject) -> *mut PyObject;
270    pub fn PyUnicode_Append(pleft: *mut *mut PyObject, right: *mut PyObject);
271    pub fn PyUnicode_AppendAndDel(pleft: *mut *mut PyObject, right: *mut PyObject);
272    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Split")]
273    pub fn PyUnicode_Split(
274        s: *mut PyObject,
275        sep: *mut PyObject,
276        maxsplit: Py_ssize_t,
277    ) -> *mut PyObject;
278    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Splitlines")]
279    pub fn PyUnicode_Splitlines(s: *mut PyObject, keepends: c_int) -> *mut PyObject;
280    pub fn PyUnicode_Partition(s: *mut PyObject, sep: *mut PyObject) -> *mut PyObject;
281    pub fn PyUnicode_RPartition(s: *mut PyObject, sep: *mut PyObject) -> *mut PyObject;
282    pub fn PyUnicode_RSplit(
283        s: *mut PyObject,
284        sep: *mut PyObject,
285        maxsplit: Py_ssize_t,
286    ) -> *mut PyObject;
287    pub fn PyUnicode_Translate(
288        str: *mut PyObject,
289        table: *mut PyObject,
290        errors: *const c_char,
291    ) -> *mut PyObject;
292    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Join")]
293    pub fn PyUnicode_Join(separator: *mut PyObject, seq: *mut PyObject) -> *mut PyObject;
294    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Tailmatch")]
295    pub fn PyUnicode_Tailmatch(
296        str: *mut PyObject,
297        substr: *mut PyObject,
298        start: Py_ssize_t,
299        end: Py_ssize_t,
300        direction: c_int,
301    ) -> Py_ssize_t;
302    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Find")]
303    pub fn PyUnicode_Find(
304        str: *mut PyObject,
305        substr: *mut PyObject,
306        start: Py_ssize_t,
307        end: Py_ssize_t,
308        direction: c_int,
309    ) -> Py_ssize_t;
310    pub fn PyUnicode_FindChar(
311        str: *mut PyObject,
312        ch: Py_UCS4,
313        start: Py_ssize_t,
314        end: Py_ssize_t,
315        direction: c_int,
316    ) -> Py_ssize_t;
317    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Count")]
318    pub fn PyUnicode_Count(
319        str: *mut PyObject,
320        substr: *mut PyObject,
321        start: Py_ssize_t,
322        end: Py_ssize_t,
323    ) -> Py_ssize_t;
324    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Replace")]
325    pub fn PyUnicode_Replace(
326        str: *mut PyObject,
327        substr: *mut PyObject,
328        replstr: *mut PyObject,
329        maxcount: Py_ssize_t,
330    ) -> *mut PyObject;
331    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Compare")]
332    pub fn PyUnicode_Compare(left: *mut PyObject, right: *mut PyObject) -> c_int;
333    #[cfg_attr(PyPy, link_name = "PyPyUnicode_CompareWithASCIIString")]
334    pub fn PyUnicode_CompareWithASCIIString(left: *mut PyObject, right: *const c_char) -> c_int;
335    #[cfg(Py_3_13)]
336    pub fn PyUnicode_EqualToUTF8(unicode: *mut PyObject, string: *const c_char) -> c_int;
337    #[cfg(Py_3_13)]
338    pub fn PyUnicode_EqualToUTF8AndSize(
339        unicode: *mut PyObject,
340        string: *const c_char,
341        size: Py_ssize_t,
342    ) -> c_int;
343
344    pub fn PyUnicode_RichCompare(
345        left: *mut PyObject,
346        right: *mut PyObject,
347        op: c_int,
348    ) -> *mut PyObject;
349    #[cfg_attr(PyPy, link_name = "PyPyUnicode_Format")]
350    pub fn PyUnicode_Format(format: *mut PyObject, args: *mut PyObject) -> *mut PyObject;
351    pub fn PyUnicode_Contains(container: *mut PyObject, element: *mut PyObject) -> c_int;
352    pub fn PyUnicode_IsIdentifier(s: *mut PyObject) -> c_int;
353}