1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// https://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

use std::{
    borrow::Cow,
    collections::BTreeMap,
    fs, mem,
    path::{Path, PathBuf},
    str::FromStr,
};

use crate::{
    rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
    serialize::txt::{
        parse_rdata::RDataParser,
        zone_lex::{Lexer, Token},
        ParseError, ParseErrorKind, ParseResult,
    },
};

/// ```text
/// 5. ZONE FILES
///
/// Zone files are text files that contain RRs in text form.  Since the
/// contents of a zone can be expressed in the form of a list of RRs a
/// Zone File is most often used to define a zone, though it can be used
/// to list a cache's contents.  Hence, this section first discusses the
/// format of RRs in a Zone File, and then the special considerations when
/// a Zone File is used to create a zone in some name server.
///
/// 5.1. Format
///
/// The format of these files is a sequence of entries.  Entries are
/// predominantly line-oriented, though parentheses can be used to continue
/// a list of items across a line boundary, and text literals can contain
/// CRLF within the text.  Any combination of tabs and spaces act as a
/// delimiter between the separate items that make up an entry.  The end of
/// any line in the Zone File can end with a comment.  The comment starts
/// with a ";" (semicolon).
///
/// The following entries are defined:
///
///     <blank>[<comment>]
///
///     $ORIGIN <domain-name> [<comment>]
///
///     $INCLUDE <file-name> [<domain-name>] [<comment>]
///
///     <domain-name><rr> [<comment>]
///
///     <blank><rr> [<comment>]
///
/// Blank lines, with or without comments, are allowed anywhere in the file.
///
/// Two control entries are defined: $ORIGIN and $INCLUDE.  $ORIGIN is
/// followed by a domain name, and resets the current origin for relative
/// domain names to the stated name.  $INCLUDE inserts the named file into
/// the current file, and may optionally specify a domain name that sets the
/// relative domain name origin for the included file.  $INCLUDE may also
/// have a comment.  Note that a $INCLUDE entry never changes the relative
/// origin of the parent file, regardless of changes to the relative origin
/// made within the included file.
///
/// The last two forms represent RRs.  If an entry for an RR begins with a
/// blank, then the RR is assumed to be owned by the last stated owner.  If
/// an RR entry begins with a <domain-name>, then the owner name is reset.
///
/// <rr> contents take one of the following forms:
///
///     [<TTL>] [<class>] <type> <RDATA>
///
///     [<class>] [<TTL>] <type> <RDATA>
///
/// The RR begins with optional TTL and class fields, followed by a type and
/// RDATA field appropriate to the type and class.  Class and type use the
/// standard mnemonics, TTL is a decimal integer.  Omitted class and TTL
/// values are default to the last explicitly stated values.  Since type and
/// class mnemonics are disjoint, the parse is unique.  (Note that this
/// order is different from the order used in examples and the order used in
/// the actual RRs; the given order allows easier parsing and defaulting.)
///
/// <domain-name>s make up a large share of the data in the Zone File.
/// The labels in the domain name are expressed as character strings and
/// separated by dots.  Quoting conventions allow arbitrary characters to be
/// stored in domain names.  Domain names that end in a dot are called
/// absolute, and are taken as complete.  Domain names which do not end in a
/// dot are called relative; the actual domain name is the concatenation of
/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
/// an argument to the Zone File loading routine.  A relative name is an
/// error when no origin is available.
///
/// <character-string> is expressed in one or two ways: as a contiguous set
/// of characters without interior spaces, or as a string beginning with a "
/// and ending with a ".  Inside a " delimited string any character can
/// occur, except for a " itself, which must be quoted using \ (back slash).
///
/// Because these files are text files several special encodings are
/// necessary to allow arbitrary data to be loaded.  In particular:
///
///                 of the root.
///
/// @               A free standing @ is used to denote the current origin.
///
/// \X              where X is any character other than a digit (0-9), is
///                 used to quote that character so that its special meaning
///                 does not apply.  For example, "\." can be used to place
///                 a dot character in a label.
///
/// \DDD            where each D is a digit is the octet corresponding to
///                 the decimal number described by DDD.  The resulting
///                 octet is assumed to be text and is not checked for
///                 special meaning.
///
/// ( )             Parentheses are used to group data that crosses a line
///                 boundary.  In effect, line terminations are not
///                 recognized within parentheses.
///
/// ;               Semicolon is used to start a comment; the remainder of
///                 the line is ignored.
/// ```
pub struct Parser<'a> {
    lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
    origin: Option<Name>,
}

impl<'a> Parser<'a> {
    /// Returns a new Zone file parser
    ///
    /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
    /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
    pub fn new(
        input: impl Into<Cow<'a, str>>,
        path: Option<PathBuf>,
        origin: Option<Name>,
    ) -> Self {
        Self {
            lexers: vec![(Lexer::new(input), path)],
            origin,
        }
    }

    /// Parse a file from the Lexer
    ///
    /// # Return
    ///
    /// A pair of the Zone origin name and a map of all Keys to RecordSets
    pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
        let mut origin = self.origin;
        let mut records: BTreeMap<RrKey, RecordSet> = BTreeMap::new();
        let mut class: DNSClass = DNSClass::IN;
        let mut current_name: Option<Name> = None;
        let mut rtype: Option<RecordType> = None;
        let mut ttl: Option<u32> = None;
        let mut state = State::StartLine;
        let mut stack = self.lexers.len();

        'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
            while let Some(t) = lexer.next_token()? {
                state = match state {
                    State::StartLine => {
                        // current_name is not reset on the next line b/c it might be needed from the previous
                        rtype = None;

                        match t {
                            // if Dollar, then $INCLUDE or $ORIGIN
                            Token::Include => State::Include(None),
                            Token::Origin => State::Origin,
                            Token::Ttl => State::Ttl,

                            // if CharData, then Name then ttl_class_type
                            Token::CharData(data) => {
                                current_name = Some(Name::parse(&data, origin.as_ref())?);
                                State::TtlClassType
                            }

                            // @ is a placeholder for specifying the current origin
                            Token::At => {
                                current_name = origin.clone(); // TODO a COW or RC would reduce copies...
                                State::TtlClassType
                            }

                            // if blank, then nothing or ttl_class_type
                            Token::Blank => State::TtlClassType,
                            Token::EOL => State::StartLine, // probably a comment
                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
                        }
                    }
                    State::Ttl => match t {
                        Token::CharData(data) => {
                            ttl = Some(Self::parse_time(&data)?);
                            State::StartLine
                        }
                        _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
                    },
                    State::Origin => {
                        match t {
                            Token::CharData(data) => {
                                // TODO an origin was specified, should this be legal? definitely confusing...
                                origin = Some(Name::parse(&data, None)?);
                                State::StartLine
                            }
                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
                        }
                    }
                    State::Include(include_path) => match (t, include_path) {
                        (Token::CharData(data), None) => State::Include(Some(data)),
                        (Token::EOL, Some(include_path)) => {
                            // RFC1035 (section 5) does not specify how filename for $INCLUDE
                            // should be resolved into file path. The underlying code implements the
                            // following:
                            // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
                            // * otherwise, it joins the path with parent root of the current file
                            //
                            // TODO: Inlining files specified using non-relative path might potentially introduce
                            // security issue in some cases (e.g. when working with zone files from untrusted sources)
                            // and should probably be configurable by user.

                            if stack > MAX_INCLUDE_LEVEL {
                                return Err(ParseErrorKind::Message(
                                    "Max depth level for nested $INCLUDE is reached",
                                )
                                .into());
                            }

                            let include = Path::new(&include_path);
                            let include = match (include.is_absolute(), path) {
                                (true, _) => include.to_path_buf(),
                                (false, Some(path)) => path
                                    .parent()
                                    .expect("file has to have parent folder")
                                    .join(include),
                                (false, None) => {
                                    return Err(ParseErrorKind::Message(
                                        "Relative $INCLUDE is not supported",
                                    )
                                    .into());
                                }
                            };

                            let input = fs::read_to_string(&include)?;
                            let lexer = Lexer::new(input);
                            self.lexers.push((lexer, Some(include)));
                            stack += 1;
                            state = State::StartLine;
                            continue 'outer;
                        }
                        (Token::CharData(_), Some(_)) => {
                            return Err(ParseErrorKind::Message(
                                "Domain name for $INCLUDE is not supported",
                            )
                            .into());
                        }
                        (t, _) => {
                            return Err(ParseErrorKind::UnexpectedToken(t).into());
                        }
                    },
                    State::TtlClassType => {
                        match t {
                            // if number, TTL
                            // Token::Number(ref num) => ttl = Some(*num),
                            // One of Class or Type (these cannot be overlapping!)
                            Token::CharData(mut data) => {
                                // if it's a number it's a ttl
                                let result: ParseResult<u32> = Self::parse_time(&data);
                                if result.is_ok() {
                                    ttl = result.ok();
                                    State::TtlClassType // hm, should this go to just ClassType?
                                } else {
                                    // if can parse DNSClass, then class
                                    data.make_ascii_uppercase();
                                    let result = DNSClass::from_str(&data);
                                    if let Ok(parsed) = result {
                                        class = parsed;
                                        State::TtlClassType
                                    } else {
                                        // if can parse RecordType, then RecordType
                                        rtype = Some(RecordType::from_str(&data)?);
                                        State::Record(vec![])
                                    }
                                }
                            }
                            // could be nothing if started with blank and is a comment, i.e. EOL
                            Token::EOL => {
                                State::StartLine // next line
                            }
                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
                        }
                    }
                    State::Record(record_parts) => {
                        // b/c of ownership rules, perhaps, just collect all the RData components as a list of
                        //  tokens to pass into the processor
                        match t {
                            Token::EOL => {
                                Self::flush_record(
                                    record_parts,
                                    &origin,
                                    &current_name,
                                    rtype,
                                    &mut ttl,
                                    class,
                                    &mut records,
                                )?;
                                State::StartLine
                            }
                            Token::CharData(part) => {
                                let mut record_parts = record_parts;
                                record_parts.push(part);
                                State::Record(record_parts)
                            }
                            // TODO: we should not tokenize the list...
                            Token::List(list) => {
                                let mut record_parts = record_parts;
                                record_parts.extend(list);
                                State::Record(record_parts)
                            }
                            _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
                        }
                    }
                };
            }

            // Extra flush at the end for the case of missing endline
            if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
                Self::flush_record(
                    record_parts,
                    &origin,
                    &current_name,
                    rtype,
                    &mut ttl,
                    class,
                    &mut records,
                )?;
            }

            stack -= 1;
            self.lexers.pop();
        }

        //
        // build the Authority and return.
        let origin = origin.ok_or_else(|| {
            ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
        })?;
        Ok((origin, records))
    }

    fn flush_record(
        record_parts: Vec<String>,
        origin: &Option<Name>,
        current_name: &Option<Name>,
        rtype: Option<RecordType>,
        ttl: &mut Option<u32>,
        class: DNSClass,
        records: &mut BTreeMap<RrKey, RecordSet>,
    ) -> ParseResult<()> {
        // call out to parsers for difference record types
        // all tokens as part of the Record should be chardata...
        let rtype = rtype.ok_or_else(|| {
            ParseError::from(ParseErrorKind::Message("record type not specified"))
        })?;
        let rdata = RData::parse(
            rtype,
            record_parts.iter().map(AsRef::as_ref),
            origin.as_ref(),
        )?;

        // verify that we have everything we need for the record
        let mut record = Record::new();
        // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
        //  might want to wait until RC.weak() stabilizes, as that would be needed for global
        //  memory where you want
        record.set_name(current_name.clone().ok_or_else(|| {
            ParseError::from(ParseErrorKind::Message("record name not specified"))
        })?);
        record.set_rr_type(rtype);
        record.set_dns_class(class);

        // slightly annoying, need to grab the TTL, then move rdata into the record,
        //  then check the Type again and have custom add logic.
        match rtype {
            RecordType::SOA => {
                // TTL for the SOA is set internally...
                // expire is for the SOA, minimum is default for records
                if let RData::SOA(ref soa) = rdata {
                    // TODO, this looks wrong, get_expire() should be get_minimum(), right?
                    record.set_ttl(soa.expire() as u32); // the spec seems a little inaccurate with u32 and i32
                    if ttl.is_none() {
                        *ttl = Some(soa.minimum());
                    } // TODO: should this only set it if it's not set?
                } else {
                    let msg = format!("Invalid RData here, expected SOA: {rdata:?}");
                    return ParseResult::Err(ParseError::from(ParseErrorKind::Msg(msg)));
                }
            }
            _ => {
                record.set_ttl(ttl.ok_or_else(|| {
                    ParseError::from(ParseErrorKind::Message("record ttl not specified"))
                })?);
            }
        }

        // TODO: validate record, e.g. the name of SRV record allows _ but others do not.

        // move the rdata into record...
        record.set_data(Some(rdata));

        // add to the map
        let key = RrKey::new(LowerName::new(record.name()), record.record_type());
        match rtype {
            RecordType::SOA => {
                let set = record.into();
                if records.insert(key, set).is_some() {
                    return Err(ParseErrorKind::Message("SOA is already specified").into());
                }
            }
            _ => {
                // add a Vec if it's not there, then add the record to the list
                let set = records
                    .entry(key)
                    .or_insert_with(|| RecordSet::new(record.name(), record.record_type(), 0));
                set.insert(record, 0);
            }
        }
        Ok(())
    }

    /// parses the string following the rules from:
    ///  <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
    ///  <https://www.zytrax.com/books/dns/apa/time.html>
    ///
    /// default is seconds
    /// #s = seconds = # x 1 seconds (really!)
    /// #m = minutes = # x 60 seconds
    /// #h = hours   = # x 3600 seconds
    /// #d = day     = # x 86400 seconds
    /// #w = week    = # x 604800 seconds
    ///
    /// returns the result of the parsing or and error
    ///
    /// # Example
    /// ```
    /// use hickory_proto::serialize::txt::Parser;
    ///
    /// assert_eq!(Parser::parse_time("0").unwrap(),  0);
    /// assert!(Parser::parse_time("s").is_err());
    /// assert!(Parser::parse_time("").is_err());
    /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
    /// assert_eq!(Parser::parse_time("1").unwrap(),  1);
    /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
    /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
    /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
    /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
    /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
    /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
    /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
    /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
    /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
    /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
    /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
    /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
    /// assert!(Parser::parse_time("7102w").is_err());
    /// ```
    pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
        if ttl_str.is_empty() {
            return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
        }

        let (mut state, mut value) = (None, 0_u32);
        for (i, c) in ttl_str.chars().enumerate() {
            let start = match (state, c) {
                (None, '0'..='9') => {
                    state = Some(i);
                    continue;
                }
                (Some(_), '0'..='9') => continue,
                (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
                _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
            };

            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
            let number = u32::from_str(&ttl_str[start..i])
                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;

            let multiplier = match c {
                'S' | 's' => 1,
                'M' | 'm' => 60,
                'H' | 'h' => 3_600,
                'D' | 'd' => 86_400,
                'W' | 'w' => 604_800,
                _ => unreachable!(),
            };

            value = number
                .checked_mul(multiplier)
                .and_then(|add| value.checked_add(add))
                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;

            state = None;
        }

        if let Some(start) = state {
            // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
            let number = u32::from_str(&ttl_str[start..])
                .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
            value = value
                .checked_add(number)
                .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
        }

        Ok(value)
    }
}

#[allow(unused)]
enum State {
    StartLine,    // start of line, @, $<WORD>, Name, Blank
    TtlClassType, // [<TTL>] [<class>] <type>,
    Ttl,          // $TTL <time>
    Record(Vec<String>),
    Include(Option<String>), // $INCLUDE <filename>
    Origin,
}

/// Max traversal depth for $INCLUDE files
const MAX_INCLUDE_LEVEL: usize = 256;

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    #[allow(clippy::uninlined_format_args)]
    fn test_zone_parse() {
        let domain = Name::from_str("parameter.origin.org.").unwrap();

        let zone_data = r#"$ORIGIN parsed.zone.origin.org.
 faulty-record-type 60 IN A 1.2.3.4
"#;

        let result = Parser::new(zone_data, None, Some(domain)).parse();
        assert!(
            result.is_err()
                & result
                    .as_ref()
                    .unwrap_err()
                    .to_string()
                    .contains("FAULTY-RECORD-TYPE"),
            "unexpected success: {:#?}",
            result
        );
    }
}