yaml_rust/
emitter.rs

1use std::convert::From;
2use std::error::Error;
3use std::fmt::{self, Display};
4use crate::yaml::{Hash, Yaml};
5
6#[derive(Copy, Clone, Debug)]
7pub enum EmitError {
8    FmtError(fmt::Error),
9    BadHashmapKey,
10}
11
12impl Error for EmitError {
13    fn cause(&self) -> Option<&dyn Error> {
14        None
15    }
16}
17
18impl Display for EmitError {
19    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
20        match *self {
21            EmitError::FmtError(ref err) => Display::fmt(err, formatter),
22            EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
23        }
24    }
25}
26
27impl From<fmt::Error> for EmitError {
28    fn from(f: fmt::Error) -> Self {
29        EmitError::FmtError(f)
30    }
31}
32
33pub struct YamlEmitter<'a> {
34    writer: &'a mut dyn fmt::Write,
35    best_indent: usize,
36    compact: bool,
37
38    level: isize,
39}
40
41pub type EmitResult = Result<(), EmitError>;
42
43// from serialize::json
44fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
45    wr.write_str("\"")?;
46
47    let mut start = 0;
48
49    for (i, byte) in v.bytes().enumerate() {
50        let escaped = match byte {
51            b'"' => "\\\"",
52            b'\\' => "\\\\",
53            b'\x00' => "\\u0000",
54            b'\x01' => "\\u0001",
55            b'\x02' => "\\u0002",
56            b'\x03' => "\\u0003",
57            b'\x04' => "\\u0004",
58            b'\x05' => "\\u0005",
59            b'\x06' => "\\u0006",
60            b'\x07' => "\\u0007",
61            b'\x08' => "\\b",
62            b'\t' => "\\t",
63            b'\n' => "\\n",
64            b'\x0b' => "\\u000b",
65            b'\x0c' => "\\f",
66            b'\r' => "\\r",
67            b'\x0e' => "\\u000e",
68            b'\x0f' => "\\u000f",
69            b'\x10' => "\\u0010",
70            b'\x11' => "\\u0011",
71            b'\x12' => "\\u0012",
72            b'\x13' => "\\u0013",
73            b'\x14' => "\\u0014",
74            b'\x15' => "\\u0015",
75            b'\x16' => "\\u0016",
76            b'\x17' => "\\u0017",
77            b'\x18' => "\\u0018",
78            b'\x19' => "\\u0019",
79            b'\x1a' => "\\u001a",
80            b'\x1b' => "\\u001b",
81            b'\x1c' => "\\u001c",
82            b'\x1d' => "\\u001d",
83            b'\x1e' => "\\u001e",
84            b'\x1f' => "\\u001f",
85            b'\x7f' => "\\u007f",
86            _ => continue,
87        };
88
89        if start < i {
90            wr.write_str(&v[start..i])?;
91        }
92
93        wr.write_str(escaped)?;
94
95        start = i + 1;
96    }
97
98    if start != v.len() {
99        wr.write_str(&v[start..])?;
100    }
101
102    wr.write_str("\"")?;
103    Ok(())
104}
105
106impl<'a> YamlEmitter<'a> {
107    pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter {
108        YamlEmitter {
109            writer,
110            best_indent: 2,
111            compact: true,
112            level: -1,
113        }
114    }
115
116    /// Set 'compact inline notation' on or off, as described for block
117    /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
118    /// and
119    /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
120    ///
121    /// In this form, blocks cannot have any properties (such as anchors
122    /// or tags), which should be OK, because this emitter doesn't
123    /// (currently) emit those anyways.
124    pub fn compact(&mut self, compact: bool) {
125        self.compact = compact;
126    }
127
128    /// Determine if this emitter is using 'compact inline notation'.
129    pub fn is_compact(&self) -> bool {
130        self.compact
131    }
132
133    pub fn dump(&mut self, doc: &Yaml) -> EmitResult {
134        // write DocumentStart
135        writeln!(self.writer, "---")?;
136        self.level = -1;
137        self.emit_node(doc)
138    }
139
140    fn write_indent(&mut self) -> EmitResult {
141        if self.level <= 0 {
142            return Ok(());
143        }
144        for _ in 0..self.level {
145            for _ in 0..self.best_indent {
146                write!(self.writer, " ")?;
147            }
148        }
149        Ok(())
150    }
151
152    fn emit_node(&mut self, node: &Yaml) -> EmitResult {
153        match *node {
154            Yaml::Array(ref v) => self.emit_array(v),
155            Yaml::Hash(ref h) => self.emit_hash(h),
156            Yaml::String(ref v) => {
157                if need_quotes(v) {
158                    escape_str(self.writer, v)?;
159                } else {
160                    write!(self.writer, "{}", v)?;
161                }
162                Ok(())
163            }
164            Yaml::Boolean(v) => {
165                if v {
166                    self.writer.write_str("true")?;
167                } else {
168                    self.writer.write_str("false")?;
169                }
170                Ok(())
171            }
172            Yaml::Integer(v) => {
173                write!(self.writer, "{}", v)?;
174                Ok(())
175            }
176            Yaml::Real(ref v) => {
177                write!(self.writer, "{}", v)?;
178                Ok(())
179            }
180            Yaml::Null | Yaml::BadValue => {
181                write!(self.writer, "~")?;
182                Ok(())
183            }
184            // XXX(chenyh) Alias
185            _ => Ok(()),
186        }
187    }
188
189    fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
190        if v.is_empty() {
191            write!(self.writer, "[]")?;
192        } else {
193            self.level += 1;
194            for (cnt, x) in v.iter().enumerate() {
195                if cnt > 0 {
196                    writeln!(self.writer)?;
197                    self.write_indent()?;
198                }
199                write!(self.writer, "-")?;
200                self.emit_val(true, x)?;
201            }
202            self.level -= 1;
203        }
204        Ok(())
205    }
206
207    fn emit_hash(&mut self, h: &Hash) -> EmitResult {
208        if h.is_empty() {
209            self.writer.write_str("{}")?;
210        } else {
211            self.level += 1;
212            for (cnt, (k, v)) in h.iter().enumerate() {
213                let complex_key = match *k {
214                    Yaml::Hash(_) | Yaml::Array(_) => true,
215                    _ => false,
216                };
217                if cnt > 0 {
218                    writeln!(self.writer)?;
219                    self.write_indent()?;
220                }
221                if complex_key {
222                    write!(self.writer, "?")?;
223                    self.emit_val(true, k)?;
224                    writeln!(self.writer)?;
225                    self.write_indent()?;
226                    write!(self.writer, ":")?;
227                    self.emit_val(true, v)?;
228                } else {
229                    self.emit_node(k)?;
230                    write!(self.writer, ":")?;
231                    self.emit_val(false, v)?;
232                }
233            }
234            self.level -= 1;
235        }
236        Ok(())
237    }
238
239    /// Emit a yaml as a hash or array value: i.e., which should appear
240    /// following a ":" or "-", either after a space, or on a new line.
241    /// If `inline` is true, then the preceding characters are distinct
242    /// and short enough to respect the compact flag.
243    fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
244        match *val {
245            Yaml::Array(ref v) => {
246                if (inline && self.compact) || v.is_empty() {
247                    write!(self.writer, " ")?;
248                } else {
249                    writeln!(self.writer)?;
250                    self.level += 1;
251                    self.write_indent()?;
252                    self.level -= 1;
253                }
254                self.emit_array(v)
255            }
256            Yaml::Hash(ref h) => {
257                if (inline && self.compact) || h.is_empty() {
258                    write!(self.writer, " ")?;
259                } else {
260                    writeln!(self.writer)?;
261                    self.level += 1;
262                    self.write_indent()?;
263                    self.level -= 1;
264                }
265                self.emit_hash(h)
266            }
267            _ => {
268                write!(self.writer, " ")?;
269                self.emit_node(val)
270            }
271        }
272    }
273}
274
275/// Check if the string requires quoting.
276/// Strings starting with any of the following characters must be quoted.
277/// :, &, *, ?, |, -, <, >, =, !, %, @
278/// Strings containing any of the following characters must be quoted.
279/// {, }, [, ], ,, #, `
280///
281/// If the string contains any of the following control characters, it must be escaped with double quotes:
282/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
283///
284/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
285/// * When the string is true or false (otherwise, it would be treated as a boolean value);
286/// * When the string is null or ~ (otherwise, it would be considered as a null value);
287/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
288/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
289fn need_quotes(string: &str) -> bool {
290    fn need_quotes_spaces(string: &str) -> bool {
291        string.starts_with(' ') || string.ends_with(' ')
292    }
293
294    string == ""
295        || need_quotes_spaces(string)
296        || string.starts_with(|character: char| match character {
297            '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true,
298            _ => false,
299        })
300        || string.contains(|character: char| match character {
301            ':'
302            | '{'
303            | '}'
304            | '['
305            | ']'
306            | ','
307            | '#'
308            | '`'
309            | '\"'
310            | '\''
311            | '\\'
312            | '\0'..='\x06'
313            | '\t'
314            | '\n'
315            | '\r'
316            | '\x0e'..='\x1a'
317            | '\x1c'..='\x1f' => true,
318            _ => false,
319        })
320        || [
321            // http://yaml.org/type/bool.html
322            // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
323            // them as string, not booleans, although it is violating the YAML 1.1 specification.
324            // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
325            "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
326            "false", "on", "On", "ON", "off", "Off", "OFF",
327            // http://yaml.org/type/null.html
328            "null", "Null", "NULL", "~",
329        ]
330        .contains(&string)
331        || string.starts_with('.')
332        || string.starts_with("0x")
333        || string.parse::<i64>().is_ok()
334        || string.parse::<f64>().is_ok()
335}
336
337#[cfg(test)]
338mod test {
339    use super::*;
340    use crate::YamlLoader;
341
342    #[test]
343    fn test_emit_simple() {
344        let s = "
345# comment
346a0 bb: val
347a1:
348    b1: 4
349    b2: d
350a2: 4 # i'm comment
351a3: [1, 2, 3]
352a4:
353    - [a1, a2]
354    - 2
355";
356
357        let docs = YamlLoader::load_from_str(&s).unwrap();
358        let doc = &docs[0];
359        let mut writer = String::new();
360        {
361            let mut emitter = YamlEmitter::new(&mut writer);
362            emitter.dump(doc).unwrap();
363        }
364        println!("original:\n{}", s);
365        println!("emitted:\n{}", writer);
366        let docs_new = match YamlLoader::load_from_str(&writer) {
367            Ok(y) => y,
368            Err(e) => panic!(format!("{}", e)),
369        };
370        let doc_new = &docs_new[0];
371
372        assert_eq!(doc, doc_new);
373    }
374
375    #[test]
376    fn test_emit_complex() {
377        let s = r#"
378cataloge:
379  product: &coffee   { name: Coffee,    price: 2.5  ,  unit: 1l  }
380  product: &cookies  { name: Cookies!,  price: 3.40 ,  unit: 400g}
381
382products:
383  *coffee:
384    amount: 4
385  *cookies:
386    amount: 4
387  [1,2,3,4]:
388    array key
389  2.4:
390    real key
391  true:
392    bool key
393  {}:
394    empty hash key
395            "#;
396        let docs = YamlLoader::load_from_str(&s).unwrap();
397        let doc = &docs[0];
398        let mut writer = String::new();
399        {
400            let mut emitter = YamlEmitter::new(&mut writer);
401            emitter.dump(doc).unwrap();
402        }
403        let docs_new = match YamlLoader::load_from_str(&writer) {
404            Ok(y) => y,
405            Err(e) => panic!(format!("{}", e)),
406        };
407        let doc_new = &docs_new[0];
408        assert_eq!(doc, doc_new);
409    }
410
411    #[test]
412    fn test_emit_avoid_quotes() {
413        let s = r#"---
414a7: 你好
415boolean: "true"
416boolean2: "false"
417date: 2014-12-31
418empty_string: ""
419empty_string1: " "
420empty_string2: "    a"
421empty_string3: "    a "
422exp: "12e7"
423field: ":"
424field2: "{"
425field3: "\\"
426field4: "\n"
427field5: "can't avoid quote"
428float: "2.6"
429int: "4"
430nullable: "null"
431nullable2: "~"
432products:
433  "*coffee":
434    amount: 4
435  "*cookies":
436    amount: 4
437  ".milk":
438    amount: 1
439  "2.4": real key
440  "[1,2,3,4]": array key
441  "true": bool key
442  "{}": empty hash key
443x: test
444y: avoid quoting here
445z: string with spaces"#;
446
447        let docs = YamlLoader::load_from_str(&s).unwrap();
448        let doc = &docs[0];
449        let mut writer = String::new();
450        {
451            let mut emitter = YamlEmitter::new(&mut writer);
452            emitter.dump(doc).unwrap();
453        }
454
455        assert_eq!(s, writer, "actual:\n\n{}\n", writer);
456    }
457
458    #[test]
459    fn emit_quoted_bools() {
460        let input = r#"---
461string0: yes
462string1: no
463string2: "true"
464string3: "false"
465string4: "~"
466null0: ~
467[true, false]: real_bools
468[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools
469bool0: true
470bool1: false"#;
471        let expected = r#"---
472string0: "yes"
473string1: "no"
474string2: "true"
475string3: "false"
476string4: "~"
477null0: ~
478? - true
479  - false
480: real_bools
481? - "True"
482  - "TRUE"
483  - "False"
484  - "FALSE"
485  - y
486  - Y
487  - "yes"
488  - "Yes"
489  - "YES"
490  - n
491  - N
492  - "no"
493  - "No"
494  - "NO"
495  - "on"
496  - "On"
497  - "ON"
498  - "off"
499  - "Off"
500  - "OFF"
501: false_bools
502bool0: true
503bool1: false"#;
504
505        let docs = YamlLoader::load_from_str(&input).unwrap();
506        let doc = &docs[0];
507        let mut writer = String::new();
508        {
509            let mut emitter = YamlEmitter::new(&mut writer);
510            emitter.dump(doc).unwrap();
511        }
512
513        assert_eq!(
514            expected, writer,
515            "expected:\n{}\nactual:\n{}\n",
516            expected, writer
517        );
518    }
519
520    #[test]
521    fn test_empty_and_nested() {
522        test_empty_and_nested_flag(false)
523    }
524
525    #[test]
526    fn test_empty_and_nested_compact() {
527        test_empty_and_nested_flag(true)
528    }
529
530    fn test_empty_and_nested_flag(compact: bool) {
531        let s = if compact {
532            r#"---
533a:
534  b:
535    c: hello
536  d: {}
537e:
538  - f
539  - g
540  - h: []"#
541        } else {
542            r#"---
543a:
544  b:
545    c: hello
546  d: {}
547e:
548  - f
549  - g
550  -
551    h: []"#
552        };
553
554        let docs = YamlLoader::load_from_str(&s).unwrap();
555        let doc = &docs[0];
556        let mut writer = String::new();
557        {
558            let mut emitter = YamlEmitter::new(&mut writer);
559            emitter.compact(compact);
560            emitter.dump(doc).unwrap();
561        }
562
563        assert_eq!(s, writer);
564    }
565
566    #[test]
567    fn test_nested_arrays() {
568        let s = r#"---
569a:
570  - b
571  - - c
572    - d
573    - - e
574      - f"#;
575
576        let docs = YamlLoader::load_from_str(&s).unwrap();
577        let doc = &docs[0];
578        let mut writer = String::new();
579        {
580            let mut emitter = YamlEmitter::new(&mut writer);
581            emitter.dump(doc).unwrap();
582        }
583        println!("original:\n{}", s);
584        println!("emitted:\n{}", writer);
585
586        assert_eq!(s, writer);
587    }
588
589    #[test]
590    fn test_deeply_nested_arrays() {
591        let s = r#"---
592a:
593  - b
594  - - c
595    - d
596    - - e
597      - - f
598      - - e"#;
599
600        let docs = YamlLoader::load_from_str(&s).unwrap();
601        let doc = &docs[0];
602        let mut writer = String::new();
603        {
604            let mut emitter = YamlEmitter::new(&mut writer);
605            emitter.dump(doc).unwrap();
606        }
607        println!("original:\n{}", s);
608        println!("emitted:\n{}", writer);
609
610        assert_eq!(s, writer);
611    }
612
613    #[test]
614    fn test_nested_hashes() {
615        let s = r#"---
616a:
617  b:
618    c:
619      d:
620        e: f"#;
621
622        let docs = YamlLoader::load_from_str(&s).unwrap();
623        let doc = &docs[0];
624        let mut writer = String::new();
625        {
626            let mut emitter = YamlEmitter::new(&mut writer);
627            emitter.dump(doc).unwrap();
628        }
629        println!("original:\n{}", s);
630        println!("emitted:\n{}", writer);
631
632        assert_eq!(s, writer);
633    }
634
635}