1use unic_normal::StrNormalForm;
13use unic_ucd_bidi::{bidi_class, BidiClass};
14use unic_ucd_normal::is_combining_mark;
15
16use crate::mapping::Mapping;
17use crate::punycode;
18
19pub static PUNYCODE_PREFIX: &'static str = "xn--";
21
22fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
23 match Mapping::of(codepoint) {
24 Mapping::Valid => output.push(codepoint),
25 Mapping::Ignored => {}
26 Mapping::Mapped(slice) => output.push_str(slice),
27 Mapping::Deviation(slice) => {
28 if flags.transitional_processing {
29 output.push_str(slice)
30 } else {
31 output.push(codepoint)
32 }
33 }
34 Mapping::Disallowed => {
35 errors.push(Error::DissallowedCharacter);
36 output.push(codepoint);
37 }
38 Mapping::DisallowedStd3Valid => {
39 if flags.use_std3_ascii_rules {
40 errors.push(Error::DissallowedByStd3AsciiRules);
41 }
42 output.push(codepoint)
43 }
44 Mapping::DisallowedStd3Mapped(slice) => {
45 if flags.use_std3_ascii_rules {
46 errors.push(Error::DissallowedMappedInStd3);
47 }
48 output.push_str(slice)
49 }
50 }
51}
52
53fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
55 use self::bidi_class::abbr_names::*;
56
57 if !is_bidi_domain {
60 return true;
61 }
62
63 let mut chars = label.chars();
64 let first_char_class = match chars.next() {
65 Some(c) => BidiClass::of(c),
66 None => return true, };
68
69 match first_char_class {
70 L => {
72 while let Some(c) = chars.next() {
74 if !matches!(BidiClass::of(c), L | EN | ES | CS | ET | ON | BN | NSM) {
75 return false;
76 }
77 }
78
79 let mut rev_chars = label.chars().rev();
82 let mut last_non_nsm = rev_chars.next();
83 loop {
84 match last_non_nsm {
85 Some(c) if BidiClass::of(c) == NSM => {
86 last_non_nsm = rev_chars.next();
87 continue;
88 }
89 _ => {
90 break;
91 }
92 }
93 }
94 match last_non_nsm {
95 Some(c) if BidiClass::of(c) == L || BidiClass::of(c) == EN => {}
96 Some(_) => {
97 return false;
98 }
99 _ => {}
100 }
101 }
102
103 R | AL => {
105 let mut found_en = false;
106 let mut found_an = false;
107
108 for c in chars {
110 let char_class = BidiClass::of(c);
111
112 if char_class == EN {
113 found_en = true;
114 }
115 if char_class == AN {
116 found_an = true;
117 }
118
119 if !matches!(char_class, R | AL | AN | EN | ES | CS | ET | ON | BN | NSM) {
120 return false;
121 }
122 }
123
124 let mut rev_chars = label.chars().rev();
126 let mut last = rev_chars.next();
127 loop {
128 match last {
130 Some(c) if BidiClass::of(c) == NSM => {
131 last = rev_chars.next();
132 continue;
133 }
134 _ => {
135 break;
136 }
137 }
138 }
139 match last {
140 Some(c) if matches!(BidiClass::of(c), R | AL | EN | AN) => {}
141 _ => {
142 return false;
143 }
144 }
145
146 if found_an && found_en {
148 return false;
149 }
150 }
151
152 _ => {
154 return false;
155 }
156 }
157
158 true
159}
160
161#[cfg_attr(feature = "cargo-clippy", allow(if_same_then_else))]
163fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
164 let first_char = label.chars().next();
165
166 if first_char == None {
167 }
169 else if label.nfc().ne(label.chars()) {
171 errors.push(Error::ValidityCriteria);
172 }
173 else if label.starts_with('-') || label.ends_with('-') {
183 errors.push(Error::ValidityCriteria);
184 }
185 else if is_combining_mark(first_char.unwrap()) {
191 errors.push(Error::ValidityCriteria);
192 }
193 else if label.chars().any(|c| match Mapping::of(c) {
195 Mapping::Valid => false,
196 Mapping::Deviation(_) => flags.transitional_processing,
197 Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
198 _ => true,
199 }) {
200 errors.push(Error::ValidityCriteria);
201 }
202 else if !passes_bidi(label, is_bidi_domain) {
210 errors.push(Error::ValidityCriteria);
211 }
212}
213
214fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
216 use self::bidi_class::abbr_names::*;
217
218 let mut mapped = String::new();
219 for c in domain.chars() {
220 map_char(c, flags, &mut mapped, errors)
221 }
222 let normalized: String = mapped.nfc().collect();
223
224 let mut is_bidi_domain = domain
228 .chars()
229 .any(|c| matches!(BidiClass::of(c), R | AL | AN));
230 if !is_bidi_domain {
231 for label in normalized.split('.') {
233 if label.starts_with(PUNYCODE_PREFIX) {
234 match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
235 Some(decoded_label) => {
236 if decoded_label
237 .chars()
238 .any(|c| matches!(BidiClass::of(c), R | AL | AN))
239 {
240 is_bidi_domain = true;
241 }
242 }
243 None => {
244 is_bidi_domain = true;
245 }
246 }
247 }
248 }
249 }
250
251 let mut validated = String::new();
252 let mut first = true;
253 for label in normalized.split('.') {
254 if !first {
255 validated.push('.');
256 }
257 first = false;
258 if label.starts_with(PUNYCODE_PREFIX) {
259 match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
260 Some(decoded_label) => {
261 let flags = Flags {
262 transitional_processing: false,
263 ..flags
264 };
265 validate(&decoded_label, is_bidi_domain, flags, errors);
266 validated.push_str(&decoded_label)
267 }
268 None => errors.push(Error::PunycodeError),
269 }
270 } else {
271 validate(label, is_bidi_domain, flags, errors);
272 validated.push_str(label)
273 }
274 }
275 validated
276}
277
278#[derive(Copy, Clone, Debug)]
280pub struct Flags {
281 pub use_std3_ascii_rules: bool,
285
286 pub transitional_processing: bool,
291
292 pub verify_dns_length: bool,
296}
297
298#[cfg_attr(feature = "cargo-clippy", allow(enum_variant_names))]
300#[derive(PartialEq, Eq, Clone, Copy, Debug)]
301enum Error {
302 PunycodeError,
303 ValidityCriteria,
304 DissallowedByStd3AsciiRules,
305 DissallowedMappedInStd3,
306 DissallowedCharacter,
307 TooLongForDns,
308 TooShortForDns,
309}
310
311#[derive(Debug, Eq, PartialEq)]
316pub struct Errors(Vec<Error>);
317
318pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
320 let mut errors = Vec::new();
321 let mut result = String::new();
322 let mut first = true;
323 for label in processing(domain, flags, &mut errors).split('.') {
324 if !first {
325 result.push('.');
326 }
327 first = false;
328 if label.is_ascii() {
329 result.push_str(label);
330 } else {
331 match punycode::encode_str(label) {
332 Some(x) => {
333 result.push_str(PUNYCODE_PREFIX);
334 result.push_str(&x);
335 }
336 None => errors.push(Error::PunycodeError),
337 }
338 }
339 }
340
341 if flags.verify_dns_length {
342 let domain = if result.ends_with('.') {
343 &result[..result.len() - 1]
344 } else {
345 &*result
346 };
347 if domain.is_empty() || domain.split('.').any(|label| label.is_empty()) {
348 errors.push(Error::TooShortForDns)
349 }
350 if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
351 errors.push(Error::TooLongForDns)
352 }
353 }
354 if errors.is_empty() {
355 Ok(result)
356 } else {
357 Err(Errors(errors))
358 }
359}
360
361pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
365 flags.transitional_processing = false;
366 let mut errors = Vec::new();
367 let domain = processing(domain, flags, &mut errors);
368 let errors = if errors.is_empty() {
369 Ok(())
370 } else {
371 Err(Errors(errors))
372 };
373 (domain, errors)
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379
380 #[test]
382 fn test_punycode_prefix_with_length_check() {
383 fn _to_ascii(domain: &str) -> Result<String, Errors> {
384 to_ascii(
385 domain,
386 Flags {
387 transitional_processing: false,
388 use_std3_ascii_rules: true,
389 verify_dns_length: true,
390 },
391 )
392 }
393
394 assert!(_to_ascii("xn--").is_err());
395 assert!(_to_ascii("xn---").is_err());
396 assert!(_to_ascii("xn-----").is_err());
397 assert!(_to_ascii("xn--.").is_err());
398 assert!(_to_ascii("xn--...").is_err());
399 assert!(_to_ascii(".xn--").is_err());
400 assert!(_to_ascii("...xn--").is_err());
401 assert!(_to_ascii("xn--.xn--").is_err());
402 assert!(_to_ascii("xn--.example.org").is_err());
403 }
404
405 #[test]
407 fn test_punycode_prefix_without_length_check() {
408 fn _to_ascii(domain: &str) -> Result<String, Errors> {
409 to_ascii(
410 domain,
411 Flags {
412 transitional_processing: false,
413 use_std3_ascii_rules: true,
414 verify_dns_length: false,
415 },
416 )
417 }
418
419 assert_eq!(_to_ascii("xn--"), Ok("".to_owned()));
420 assert!(_to_ascii("xn---").is_err());
421 assert!(_to_ascii("xn-----").is_err());
422 assert_eq!(_to_ascii("xn--."), Ok(".".to_owned()));
423 assert_eq!(_to_ascii("xn--..."), Ok("...".to_owned()));
424 assert_eq!(_to_ascii(".xn--"), Ok(".".to_owned()));
425 assert_eq!(_to_ascii("...xn--"), Ok("...".to_owned()));
426 assert_eq!(_to_ascii("xn--.xn--"), Ok(".".to_owned()));
427 assert_eq!(_to_ascii("xn--.example.org"), Ok(".example.org".to_owned()));
428 }
429
430 #[test]
431 fn test_v5() {
432 fn _to_ascii(domain: &str) -> Result<String, Errors> {
433 to_ascii(
434 domain,
435 Flags {
436 transitional_processing: false,
437 use_std3_ascii_rules: true,
438 verify_dns_length: true,
439 },
440 )
441 }
442
443 assert!(is_combining_mark('\u{11C3A}'));
445 assert!(_to_ascii("\u{11C3A}").is_err());
446 assert!(_to_ascii("\u{850f}.\u{11C3A}").is_err());
447 assert!(_to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
448 }
449
450 #[test]
451 fn test_v8_bidi_rules() {
452 fn _to_ascii(domain: &str) -> Result<String, Errors> {
453 to_ascii(
454 domain,
455 Flags {
456 transitional_processing: false,
457 use_std3_ascii_rules: true,
458 verify_dns_length: true,
459 },
460 )
461 }
462
463 assert_eq!(_to_ascii("abc"), Ok("abc".to_owned()));
464 assert_eq!(_to_ascii("123"), Ok("123".to_owned()));
465 assert_eq!(_to_ascii("אבּג"), Ok("xn--kdb3bdf".to_owned()));
466 assert_eq!(_to_ascii("ابج"), Ok("xn--mgbcm".to_owned()));
467 assert_eq!(_to_ascii("abc.ابج"), Ok("abc.xn--mgbcm".to_owned()));
468 assert_eq!(
469 _to_ascii("אבּג.ابج"),
470 Ok("xn--kdb3bdf.xn--mgbcm".to_owned())
471 );
472
473 assert!(_to_ascii("0a.\u{05D0}").is_err());
475 assert!(_to_ascii("0à.\u{05D0}").is_err());
476
477 assert!(_to_ascii("xn--0ca24w").is_err());
479 }
480}