hickory_proto/serialize/txt/
zone.rs

1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::{
9    borrow::Cow,
10    collections::BTreeMap,
11    fs, mem,
12    path::{Path, PathBuf},
13    str::FromStr,
14};
15
16use crate::{
17    rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
18    serialize::txt::{
19        parse_rdata::RDataParser,
20        zone_lex::{Lexer, Token},
21        ParseError, ParseErrorKind, ParseResult,
22    },
23};
24
25/// ```text
26/// 5. ZONE FILES
27///
28/// Zone files are text files that contain RRs in text form.  Since the
29/// contents of a zone can be expressed in the form of a list of RRs a
30/// Zone File is most often used to define a zone, though it can be used
31/// to list a cache's contents.  Hence, this section first discusses the
32/// format of RRs in a Zone File, and then the special considerations when
33/// a Zone File is used to create a zone in some name server.
34///
35/// 5.1. Format
36///
37/// The format of these files is a sequence of entries.  Entries are
38/// predominantly line-oriented, though parentheses can be used to continue
39/// a list of items across a line boundary, and text literals can contain
40/// CRLF within the text.  Any combination of tabs and spaces act as a
41/// delimiter between the separate items that make up an entry.  The end of
42/// any line in the Zone File can end with a comment.  The comment starts
43/// with a ";" (semicolon).
44///
45/// The following entries are defined:
46///
47///     <blank>[<comment>]
48///
49///     $ORIGIN <domain-name> [<comment>]
50///
51///     $INCLUDE <file-name> [<domain-name>] [<comment>]
52///
53///     <domain-name><rr> [<comment>]
54///
55///     <blank><rr> [<comment>]
56///
57/// Blank lines, with or without comments, are allowed anywhere in the file.
58///
59/// Two control entries are defined: $ORIGIN and $INCLUDE.  $ORIGIN is
60/// followed by a domain name, and resets the current origin for relative
61/// domain names to the stated name.  $INCLUDE inserts the named file into
62/// the current file, and may optionally specify a domain name that sets the
63/// relative domain name origin for the included file.  $INCLUDE may also
64/// have a comment.  Note that a $INCLUDE entry never changes the relative
65/// origin of the parent file, regardless of changes to the relative origin
66/// made within the included file.
67///
68/// The last two forms represent RRs.  If an entry for an RR begins with a
69/// blank, then the RR is assumed to be owned by the last stated owner.  If
70/// an RR entry begins with a <domain-name>, then the owner name is reset.
71///
72/// <rr> contents take one of the following forms:
73///
74///     [<TTL>] [<class>] <type> <RDATA>
75///
76///     [<class>] [<TTL>] <type> <RDATA>
77///
78/// The RR begins with optional TTL and class fields, followed by a type and
79/// RDATA field appropriate to the type and class.  Class and type use the
80/// standard mnemonics, TTL is a decimal integer.  Omitted class and TTL
81/// values are default to the last explicitly stated values.  Since type and
82/// class mnemonics are disjoint, the parse is unique.  (Note that this
83/// order is different from the order used in examples and the order used in
84/// the actual RRs; the given order allows easier parsing and defaulting.)
85///
86/// <domain-name>s make up a large share of the data in the Zone File.
87/// The labels in the domain name are expressed as character strings and
88/// separated by dots.  Quoting conventions allow arbitrary characters to be
89/// stored in domain names.  Domain names that end in a dot are called
90/// absolute, and are taken as complete.  Domain names which do not end in a
91/// dot are called relative; the actual domain name is the concatenation of
92/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
93/// an argument to the Zone File loading routine.  A relative name is an
94/// error when no origin is available.
95///
96/// <character-string> is expressed in one or two ways: as a contiguous set
97/// of characters without interior spaces, or as a string beginning with a "
98/// and ending with a ".  Inside a " delimited string any character can
99/// occur, except for a " itself, which must be quoted using \ (back slash).
100///
101/// Because these files are text files several special encodings are
102/// necessary to allow arbitrary data to be loaded.  In particular:
103///
104///                 of the root.
105///
106/// @               A free standing @ is used to denote the current origin.
107///
108/// \X              where X is any character other than a digit (0-9), is
109///                 used to quote that character so that its special meaning
110///                 does not apply.  For example, "\." can be used to place
111///                 a dot character in a label.
112///
113/// \DDD            where each D is a digit is the octet corresponding to
114///                 the decimal number described by DDD.  The resulting
115///                 octet is assumed to be text and is not checked for
116///                 special meaning.
117///
118/// ( )             Parentheses are used to group data that crosses a line
119///                 boundary.  In effect, line terminations are not
120///                 recognized within parentheses.
121///
122/// ;               Semicolon is used to start a comment; the remainder of
123///                 the line is ignored.
124/// ```
125pub struct Parser<'a> {
126    lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
127    origin: Option<Name>,
128}
129
130impl<'a> Parser<'a> {
131    /// Returns a new Zone file parser
132    ///
133    /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
134    /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
135    pub fn new(
136        input: impl Into<Cow<'a, str>>,
137        path: Option<PathBuf>,
138        origin: Option<Name>,
139    ) -> Self {
140        Self {
141            lexers: vec![(Lexer::new(input), path)],
142            origin,
143        }
144    }
145
146    /// Parse a file from the Lexer
147    ///
148    /// # Return
149    ///
150    /// A pair of the Zone origin name and a map of all Keys to RecordSets
151    pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
152        let mut origin = self.origin;
153        let mut records: BTreeMap<RrKey, RecordSet> = BTreeMap::new();
154        let mut class: DNSClass = DNSClass::IN;
155        let mut current_name: Option<Name> = None;
156        let mut rtype: Option<RecordType> = None;
157        let mut ttl: Option<u32> = None;
158        let mut state = State::StartLine;
159        let mut stack = self.lexers.len();
160
161        'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
162            while let Some(t) = lexer.next_token()? {
163                state = match state {
164                    State::StartLine => {
165                        // current_name is not reset on the next line b/c it might be needed from the previous
166                        rtype = None;
167
168                        match t {
169                            // if Dollar, then $INCLUDE or $ORIGIN
170                            Token::Include => State::Include(None),
171                            Token::Origin => State::Origin,
172                            Token::Ttl => State::Ttl,
173
174                            // if CharData, then Name then ttl_class_type
175                            Token::CharData(data) => {
176                                current_name = Some(Name::parse(&data, origin.as_ref())?);
177                                State::TtlClassType
178                            }
179
180                            // @ is a placeholder for specifying the current origin
181                            Token::At => {
182                                current_name = origin.clone(); // TODO a COW or RC would reduce copies...
183                                State::TtlClassType
184                            }
185
186                            // if blank, then nothing or ttl_class_type
187                            Token::Blank => State::TtlClassType,
188                            Token::EOL => State::StartLine, // probably a comment
189                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
190                        }
191                    }
192                    State::Ttl => match t {
193                        Token::CharData(data) => {
194                            ttl = Some(Self::parse_time(&data)?);
195                            State::StartLine
196                        }
197                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
198                    },
199                    State::Origin => {
200                        match t {
201                            Token::CharData(data) => {
202                                // TODO an origin was specified, should this be legal? definitely confusing...
203                                origin = Some(Name::parse(&data, None)?);
204                                State::StartLine
205                            }
206                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
207                        }
208                    }
209                    State::Include(include_path) => match (t, include_path) {
210                        (Token::CharData(data), None) => State::Include(Some(data)),
211                        (Token::EOL, Some(include_path)) => {
212                            // RFC1035 (section 5) does not specify how filename for $INCLUDE
213                            // should be resolved into file path. The underlying code implements the
214                            // following:
215                            // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
216                            // * otherwise, it joins the path with parent root of the current file
217                            //
218                            // TODO: Inlining files specified using non-relative path might potentially introduce
219                            // security issue in some cases (e.g. when working with zone files from untrusted sources)
220                            // and should probably be configurable by user.
221
222                            if stack > MAX_INCLUDE_LEVEL {
223                                return Err(ParseErrorKind::Message(
224                                    "Max depth level for nested $INCLUDE is reached",
225                                )
226                                .into());
227                            }
228
229                            let include = Path::new(&include_path);
230                            let include = match (include.is_absolute(), path) {
231                                (true, _) => include.to_path_buf(),
232                                (false, Some(path)) => path
233                                    .parent()
234                                    .expect("file has to have parent folder")
235                                    .join(include),
236                                (false, None) => {
237                                    return Err(ParseErrorKind::Message(
238                                        "Relative $INCLUDE is not supported",
239                                    )
240                                    .into());
241                                }
242                            };
243
244                            let input = fs::read_to_string(&include)?;
245                            let lexer = Lexer::new(input);
246                            self.lexers.push((lexer, Some(include)));
247                            stack += 1;
248                            state = State::StartLine;
249                            continue 'outer;
250                        }
251                        (Token::CharData(_), Some(_)) => {
252                            return Err(ParseErrorKind::Message(
253                                "Domain name for $INCLUDE is not supported",
254                            )
255                            .into());
256                        }
257                        (t, _) => {
258                            return Err(ParseErrorKind::UnexpectedToken(t).into());
259                        }
260                    },
261                    State::TtlClassType => {
262                        match t {
263                            // if number, TTL
264                            // Token::Number(ref num) => ttl = Some(*num),
265                            // One of Class or Type (these cannot be overlapping!)
266                            Token::CharData(mut data) => {
267                                // if it's a number it's a ttl
268                                let result: ParseResult<u32> = Self::parse_time(&data);
269                                if result.is_ok() {
270                                    ttl = result.ok();
271                                    State::TtlClassType // hm, should this go to just ClassType?
272                                } else {
273                                    // if can parse DNSClass, then class
274                                    data.make_ascii_uppercase();
275                                    let result = DNSClass::from_str(&data);
276                                    if let Ok(parsed) = result {
277                                        class = parsed;
278                                        State::TtlClassType
279                                    } else {
280                                        // if can parse RecordType, then RecordType
281                                        rtype = Some(RecordType::from_str(&data)?);
282                                        State::Record(vec![])
283                                    }
284                                }
285                            }
286                            // could be nothing if started with blank and is a comment, i.e. EOL
287                            Token::EOL => {
288                                State::StartLine // next line
289                            }
290                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
291                        }
292                    }
293                    State::Record(record_parts) => {
294                        // b/c of ownership rules, perhaps, just collect all the RData components as a list of
295                        //  tokens to pass into the processor
296                        match t {
297                            Token::EOL => {
298                                Self::flush_record(
299                                    record_parts,
300                                    &origin,
301                                    &current_name,
302                                    rtype,
303                                    &mut ttl,
304                                    class,
305                                    &mut records,
306                                )?;
307                                State::StartLine
308                            }
309                            Token::CharData(part) => {
310                                let mut record_parts = record_parts;
311                                record_parts.push(part);
312                                State::Record(record_parts)
313                            }
314                            // TODO: we should not tokenize the list...
315                            Token::List(list) => {
316                                let mut record_parts = record_parts;
317                                record_parts.extend(list);
318                                State::Record(record_parts)
319                            }
320                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
321                        }
322                    }
323                };
324            }
325
326            // Extra flush at the end for the case of missing endline
327            if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
328                Self::flush_record(
329                    record_parts,
330                    &origin,
331                    &current_name,
332                    rtype,
333                    &mut ttl,
334                    class,
335                    &mut records,
336                )?;
337            }
338
339            stack -= 1;
340            self.lexers.pop();
341        }
342
343        //
344        // build the Authority and return.
345        let origin = origin.ok_or_else(|| {
346            ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
347        })?;
348        Ok((origin, records))
349    }
350
351    fn flush_record(
352        record_parts: Vec<String>,
353        origin: &Option<Name>,
354        current_name: &Option<Name>,
355        rtype: Option<RecordType>,
356        ttl: &mut Option<u32>,
357        class: DNSClass,
358        records: &mut BTreeMap<RrKey, RecordSet>,
359    ) -> ParseResult<()> {
360        // call out to parsers for difference record types
361        // all tokens as part of the Record should be chardata...
362        let rtype = rtype.ok_or_else(|| {
363            ParseError::from(ParseErrorKind::Message("record type not specified"))
364        })?;
365        let rdata = RData::parse(
366            rtype,
367            record_parts.iter().map(AsRef::as_ref),
368            origin.as_ref(),
369        )?;
370
371        // verify that we have everything we need for the record
372        let mut record = Record::new();
373        // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
374        //  might want to wait until RC.weak() stabilizes, as that would be needed for global
375        //  memory where you want
376        record.set_name(current_name.clone().ok_or_else(|| {
377            ParseError::from(ParseErrorKind::Message("record name not specified"))
378        })?);
379        record.set_rr_type(rtype);
380        record.set_dns_class(class);
381
382        // slightly annoying, need to grab the TTL, then move rdata into the record,
383        //  then check the Type again and have custom add logic.
384        match rtype {
385            RecordType::SOA => {
386                // TTL for the SOA is set internally...
387                // expire is for the SOA, minimum is default for records
388                if let RData::SOA(ref soa) = rdata {
389                    // TODO, this looks wrong, get_expire() should be get_minimum(), right?
390                    record.set_ttl(soa.expire() as u32); // the spec seems a little inaccurate with u32 and i32
391                    if ttl.is_none() {
392                        *ttl = Some(soa.minimum());
393                    } // TODO: should this only set it if it's not set?
394                } else {
395                    let msg = format!("Invalid RData here, expected SOA: {rdata:?}");
396                    return ParseResult::Err(ParseError::from(ParseErrorKind::Msg(msg)));
397                }
398            }
399            _ => {
400                record.set_ttl(ttl.ok_or_else(|| {
401                    ParseError::from(ParseErrorKind::Message("record ttl not specified"))
402                })?);
403            }
404        }
405
406        // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
407
408        // move the rdata into record...
409        record.set_data(Some(rdata));
410
411        // add to the map
412        let key = RrKey::new(LowerName::new(record.name()), record.record_type());
413        match rtype {
414            RecordType::SOA => {
415                let set = record.into();
416                if records.insert(key, set).is_some() {
417                    return Err(ParseErrorKind::Message("SOA is already specified").into());
418                }
419            }
420            _ => {
421                // add a Vec if it's not there, then add the record to the list
422                let set = records
423                    .entry(key)
424                    .or_insert_with(|| RecordSet::new(record.name(), record.record_type(), 0));
425                set.insert(record, 0);
426            }
427        }
428        Ok(())
429    }
430
431    /// parses the string following the rules from:
432    ///  <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
433    ///  <https://www.zytrax.com/books/dns/apa/time.html>
434    ///
435    /// default is seconds
436    /// #s = seconds = # x 1 seconds (really!)
437    /// #m = minutes = # x 60 seconds
438    /// #h = hours   = # x 3600 seconds
439    /// #d = day     = # x 86400 seconds
440    /// #w = week    = # x 604800 seconds
441    ///
442    /// returns the result of the parsing or and error
443    ///
444    /// # Example
445    /// ```
446    /// use hickory_proto::serialize::txt::Parser;
447    ///
448    /// assert_eq!(Parser::parse_time("0").unwrap(),  0);
449    /// assert!(Parser::parse_time("s").is_err());
450    /// assert!(Parser::parse_time("").is_err());
451    /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
452    /// assert_eq!(Parser::parse_time("1").unwrap(),  1);
453    /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
454    /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
455    /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
456    /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
457    /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
458    /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
459    /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
460    /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
461    /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
462    /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
463    /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
464    /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
465    /// assert!(Parser::parse_time("7102w").is_err());
466    /// ```
467    pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
468        if ttl_str.is_empty() {
469            return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
470        }
471
472        let (mut state, mut value) = (None, 0_u32);
473        for (i, c) in ttl_str.chars().enumerate() {
474            let start = match (state, c) {
475                (None, '0'..='9') => {
476                    state = Some(i);
477                    continue;
478                }
479                (Some(_), '0'..='9') => continue,
480                (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
481                _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
482            };
483
484            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
485            let number = u32::from_str(&ttl_str[start..i])
486                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
487
488            let multiplier = match c {
489                'S' | 's' => 1,
490                'M' | 'm' => 60,
491                'H' | 'h' => 3_600,
492                'D' | 'd' => 86_400,
493                'W' | 'w' => 604_800,
494                _ => unreachable!(),
495            };
496
497            value = number
498                .checked_mul(multiplier)
499                .and_then(|add| value.checked_add(add))
500                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
501
502            state = None;
503        }
504
505        if let Some(start) = state {
506            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
507            let number = u32::from_str(&ttl_str[start..])
508                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
509            value = value
510                .checked_add(number)
511                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
512        }
513
514        Ok(value)
515    }
516}
517
518#[allow(unused)]
519enum State {
520    StartLine,    // start of line, @, $<WORD>, Name, Blank
521    TtlClassType, // [<TTL>] [<class>] <type>,
522    Ttl,          // $TTL <time>
523    Record(Vec<String>),
524    Include(Option<String>), // $INCLUDE <filename>
525    Origin,
526}
527
528/// Max traversal depth for $INCLUDE files
529const MAX_INCLUDE_LEVEL: usize = 256;
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    #[allow(clippy::uninlined_format_args)]
537    fn test_zone_parse() {
538        let domain = Name::from_str("parameter.origin.org.").unwrap();
539
540        let zone_data = r#"$ORIGIN parsed.zone.origin.org.
541 faulty-record-type 60 IN A 1.2.3.4
542"#;
543
544        let result = Parser::new(zone_data, None, Some(domain)).parse();
545        assert!(
546            result.is_err()
547                & result
548                    .as_ref()
549                    .unwrap_err()
550                    .to_string()
551                    .contains("FAULTY-RECORD-TYPE"),
552            "unexpected success: {:#?}",
553            result
554        );
555    }
556}