1use std::borrow::Cow;
2use std::ffi::{CStr, CString, OsStr};
3#[cfg(unix)]
4use std::os::unix::ffi::OsStrExt;
5#[cfg(target_os = "wasi")]
6use std::os::wasi::ffi::OsStrExt;
7use std::{io, str};
8
9pub fn bytes_to_host(bytes: &[u8]) -> io::Result<CString> {
12 let s = str::from_utf8(bytes).map_err(|_| encoding_error())?;
13 str_to_host(s)
14}
15
16pub fn str_to_host(s: &str) -> io::Result<CString> {
19 match CString::new(s) {
20 Ok(c_string) => Ok(c_string),
21 Err(e) => from_arf(s, e.nul_position()),
22 }
23}
24
25pub fn host_os_str_to_str(host: &OsStr) -> io::Result<Cow<str>> {
29 if host.as_bytes().contains(&b'\0') {
30 return Err(encoding_error());
31 }
32 Ok(if let Ok(s) = str::from_utf8(host.as_bytes()) {
33 Cow::Borrowed(s)
34 } else {
35 Cow::Owned(to_arf(host.as_bytes()))
36 })
37}
38
39pub fn host_os_str_to_bytes(host: &OsStr) -> io::Result<Cow<[u8]>> {
43 Ok(match host_os_str_to_str(host)? {
44 Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
45 Cow::Owned(b) => Cow::Owned(b.into_bytes()),
46 })
47}
48
49pub fn host_c_str_to_str(host: &CStr) -> Cow<str> {
52 if let Ok(s) = str::from_utf8(host.to_bytes()) {
53 Cow::Borrowed(s)
54 } else {
55 Cow::Owned(to_arf(host.to_bytes()))
56 }
57}
58
59pub fn host_c_str_to_bytes(host: &CStr) -> Cow<[u8]> {
62 let bytes = host_c_str_to_str(host);
63 match bytes {
64 Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
65 Cow::Owned(b) => Cow::Owned(b.into_bytes()),
66 }
67}
68
69#[cold]
71fn from_arf(s: &str, nul: usize) -> io::Result<CString> {
72 if !s.starts_with('\u{feff}') {
73 return Err(encoding_error());
74 }
75
76 let mut lossy = s.bytes().skip('\u{feff}'.len_utf8());
77 let mut nul_escaped = s.bytes().skip(nul + 1);
78 let mut any_invalid = false;
79 let mut vec = Vec::new();
80 while let Some(b) = nul_escaped.next() {
81 if b == b'\0' {
82 let more = nul_escaped.next().ok_or_else(encoding_error)?;
83 if (more & 0x80) != 0 {
84 return Err(encoding_error());
85 }
86 let l0 = lossy.next().ok_or_else(encoding_error)?;
88 let l1 = lossy.next().ok_or_else(encoding_error)?;
89 let l2 = lossy.next().ok_or_else(encoding_error)?;
90 if [l0, l1, l2] != [0xef, 0xbf, 0xbd] {
91 return Err(encoding_error());
92 }
93 any_invalid = true;
94 vec.push(more | 0x80);
95 } else {
96 if lossy.next() != Some(b) {
97 return Err(encoding_error());
98 }
99 vec.push(b);
100 }
101 }
102 if !any_invalid {
103 return Err(encoding_error());
104 }
105 if lossy.next() != Some(b'\0') {
106 return Err(encoding_error());
107 }
108
109 Ok(unsafe { CString::from_vec_unchecked(vec) })
111}
112
113#[cold]
115fn to_arf(bytes: &[u8]) -> String {
116 let mut data = String::new();
117
118 data.push('\u{feff}');
119
120 let mut input = bytes;
121
122 loop {
125 match std::str::from_utf8(input) {
126 Ok(valid) => {
127 data.push_str(valid);
128 break;
129 }
130 Err(error) => {
131 let (valid, after_valid) = input.split_at(error.valid_up_to());
132 unsafe { data.push_str(str::from_utf8_unchecked(valid)) }
133 data.push('\u{FFFD}');
134
135 if let Some((_, remaining)) = after_valid.split_first() {
136 input = remaining;
137 } else {
138 break;
139 }
140 }
141 }
142 }
143
144 data.push('\0');
145
146 let mut input = bytes;
149 loop {
150 match std::str::from_utf8(input) {
151 Ok(valid) => {
152 data.push_str(valid);
153 break;
154 }
155 Err(error) => {
156 let (valid, after_valid) = input.split_at(error.valid_up_to());
157
158 unsafe { data.push_str(str::from_utf8_unchecked(valid)) }
159 if let Some((byte, remaining)) = after_valid.split_first() {
160 data.push('\0');
161 data.push((byte & 0x7f) as char);
162 input = remaining;
163 } else {
164 break;
165 }
166 }
167 }
168 }
169
170 data
171}
172
173#[cold]
174fn encoding_error() -> io::Error {
175 ::rustix::io::Errno::ILSEQ.into()
176}
177
178#[test]
179fn utf8_inputs() {
180 assert_eq!(str_to_host("").unwrap().to_bytes(), b"");
181 assert_eq!(str_to_host("f").unwrap().to_bytes(), b"f");
182 assert_eq!(str_to_host("foo").unwrap().to_bytes(), b"foo");
183 assert_eq!(
184 str_to_host("\u{fffd}").unwrap().to_bytes(),
185 "\u{fffd}".as_bytes()
186 );
187 assert_eq!(
188 str_to_host("\u{fffd}foo").unwrap().to_bytes(),
189 "\u{fffd}foo".as_bytes()
190 );
191 assert_eq!(
192 str_to_host("\u{feff}foo").unwrap().to_bytes(),
193 "\u{feff}foo".as_bytes()
194 );
195}
196
197#[test]
198fn arf_inputs() {
199 assert_eq!(
200 str_to_host("\u{feff}hello\u{fffd}world\0hello\0\x05world")
201 .unwrap()
202 .to_bytes(),
203 b"hello\x85world"
204 );
205 assert_eq!(
206 str_to_host("\u{feff}hello\u{fffd}\0hello\0\x05")
207 .unwrap()
208 .to_bytes(),
209 b"hello\x85"
210 );
211}
212
213#[test]
214fn errors_from_bytes() {
215 assert!(bytes_to_host(b"\xfe").is_err());
216 assert!(bytes_to_host(b"\xc0\xff").is_err());
217}
218
219#[test]
220fn errors_from_str() {
221 assert!(str_to_host("\u{feff}hello world\0hello world").is_err());
222 assert!(str_to_host("\u{feff}hello world\0\0hello world\0").is_err());
223 assert!(str_to_host("\u{feff}hello\u{fffd}world\0\0hello\0\x05world\0").is_err());
224 assert!(str_to_host("\u{fffe}hello\u{fffd}world\0hello\0\x05world").is_err());
225 assert!(str_to_host("\u{feff}hello\u{fffd}\0hello\0").is_err());
226}
227
228#[test]
229fn valid_utf8() {
230 assert_eq!(host_os_str_to_str(OsStr::from_bytes(b"")).unwrap(), "");
231 assert_eq!(
232 host_os_str_to_str(OsStr::from_bytes(b"foo")).unwrap(),
233 "foo"
234 );
235
236 assert_eq!(
238 host_c_str_to_str(CStr::from_bytes_with_nul(b"\0").unwrap()),
239 ""
240 );
241 assert_eq!(
242 host_c_str_to_str(CStr::from_bytes_with_nul(b"foo\0").unwrap()),
243 "foo"
244 );
245}
246
247#[test]
248fn not_utf8() {
249 assert_eq!(
250 host_os_str_to_str(OsStr::from_bytes(b"\xfe")).unwrap(),
251 "\u{feff}\u{fffd}\0\0\u{7e}"
252 );
253 assert_eq!(
254 host_os_str_to_str(OsStr::from_bytes(b"\xc0\xff")).unwrap(),
255 "\u{feff}\u{fffd}\u{fffd}\0\0\u{40}\0\u{7f}"
256 );
257 assert_eq!(
258 host_os_str_to_str(OsStr::from_bytes(b"\xef\xbb\xbf")).unwrap(),
259 "\u{feff}"
260 );
261 assert_eq!(
262 host_os_str_to_str(OsStr::from_bytes(b"\xef\xbb\xbf\xfd")).unwrap(),
263 "\u{feff}\u{feff}\u{fffd}\0\u{feff}\0\x7d"
264 );
265 assert_eq!(
266 host_os_str_to_str(OsStr::from_bytes(b"\xe2\x98")).unwrap(),
267 "\u{feff}\u{fffd}\u{fffd}\0\0\u{62}\0\u{18}"
268 );
269 assert_eq!(
270 host_os_str_to_str(OsStr::from_bytes(b"\xf0\x9f")).unwrap(),
271 "\u{feff}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}"
272 );
273 assert_eq!(
274 host_os_str_to_str(OsStr::from_bytes(b"\xf0\x9f\x92")).unwrap(),
275 "\u{feff}\u{fffd}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}\0\u{12}"
276 );
277
278 assert_eq!(
280 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xfe\0").unwrap()),
281 "\u{feff}\u{fffd}\0\0\u{7e}"
282 );
283 assert_eq!(
284 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xc0\xff\0").unwrap()),
285 "\u{feff}\u{fffd}\u{fffd}\0\0\u{40}\0\u{7f}"
286 );
287 assert_eq!(
288 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xef\xbb\xbf\0").unwrap()),
289 "\u{feff}"
290 );
291 assert_eq!(
292 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xef\xbb\xbf\xfd\0").unwrap()),
293 "\u{feff}\u{feff}\u{fffd}\0\u{feff}\0\x7d"
294 );
295 assert_eq!(
296 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xe2\x98\0").unwrap()),
297 "\u{feff}\u{fffd}\u{fffd}\0\0\u{62}\0\u{18}"
298 );
299 assert_eq!(
300 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xf0\x9f\0").unwrap()),
301 "\u{feff}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}"
302 );
303 assert_eq!(
304 host_c_str_to_str(CStr::from_bytes_with_nul(b"\xf0\x9f\x92\0").unwrap()),
305 "\u{feff}\u{fffd}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}\0\u{12}"
306 );
307}
308
309#[test]
310fn round_trip() {
311 assert_eq!(
312 host_os_str_to_str(OsStr::from_bytes(bytes_to_host(b"").unwrap().as_bytes())).unwrap(),
313 ""
314 );
315 assert_eq!(
316 host_os_str_to_str(OsStr::from_bytes(
317 bytes_to_host(b"hello").unwrap().as_bytes()
318 ))
319 .unwrap(),
320 "hello"
321 );
322 assert_eq!(
323 str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"hello")).unwrap())
324 .unwrap()
325 .as_bytes(),
326 b"hello"
327 );
328 assert_eq!(
329 str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"h\xc0ello\xc1")).unwrap())
330 .unwrap()
331 .as_bytes(),
332 b"h\xc0ello\xc1"
333 );
334 assert_eq!(
335 str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"\xf5\xff")).unwrap())
336 .unwrap()
337 .as_bytes(),
338 b"\xf5\xff"
339 );
340 assert_eq!(
341 str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"")).unwrap())
342 .unwrap()
343 .as_bytes(),
344 b""
345 );
346 assert_eq!(
347 str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"\xe6\x96")).unwrap())
348 .unwrap()
349 .as_bytes(),
350 b"\xe6\x96"
351 );
352
353 assert_eq!(
355 str_to_host(&host_c_str_to_str(
356 CStr::from_bytes_with_nul(b"hello\0").unwrap()
357 ))
358 .unwrap()
359 .as_bytes(),
360 b"hello"
361 );
362 assert_eq!(
363 str_to_host(&host_c_str_to_str(
364 CStr::from_bytes_with_nul(b"h\xc0ello\xc1\0").unwrap()
365 ))
366 .unwrap()
367 .as_bytes(),
368 b"h\xc0ello\xc1"
369 );
370 assert_eq!(
371 str_to_host(&host_c_str_to_str(
372 CStr::from_bytes_with_nul(b"\xf5\xff\0").unwrap()
373 ))
374 .unwrap()
375 .as_bytes(),
376 b"\xf5\xff"
377 );
378 assert_eq!(
379 str_to_host(&host_c_str_to_str(
380 CStr::from_bytes_with_nul(b"\0").unwrap()
381 ))
382 .unwrap()
383 .as_bytes(),
384 b""
385 );
386 assert_eq!(
387 str_to_host(&host_c_str_to_str(
388 CStr::from_bytes_with_nul(b"\xe6\x96\0").unwrap()
389 ))
390 .unwrap()
391 .as_bytes(),
392 b"\xe6\x96"
393 );
394}