hickory_proto/serialize/txt/zone.rs
1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use std::{
9 borrow::Cow,
10 collections::BTreeMap,
11 fs, mem,
12 path::{Path, PathBuf},
13 str::FromStr,
14};
15
16use crate::{
17 rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
18 serialize::txt::{
19 parse_rdata::RDataParser,
20 zone_lex::{Lexer, Token},
21 ParseError, ParseErrorKind, ParseResult,
22 },
23};
24
25/// ```text
26/// 5. ZONE FILES
27///
28/// Zone files are text files that contain RRs in text form. Since the
29/// contents of a zone can be expressed in the form of a list of RRs a
30/// Zone File is most often used to define a zone, though it can be used
31/// to list a cache's contents. Hence, this section first discusses the
32/// format of RRs in a Zone File, and then the special considerations when
33/// a Zone File is used to create a zone in some name server.
34///
35/// 5.1. Format
36///
37/// The format of these files is a sequence of entries. Entries are
38/// predominantly line-oriented, though parentheses can be used to continue
39/// a list of items across a line boundary, and text literals can contain
40/// CRLF within the text. Any combination of tabs and spaces act as a
41/// delimiter between the separate items that make up an entry. The end of
42/// any line in the Zone File can end with a comment. The comment starts
43/// with a ";" (semicolon).
44///
45/// The following entries are defined:
46///
47/// <blank>[<comment>]
48///
49/// $ORIGIN <domain-name> [<comment>]
50///
51/// $INCLUDE <file-name> [<domain-name>] [<comment>]
52///
53/// <domain-name><rr> [<comment>]
54///
55/// <blank><rr> [<comment>]
56///
57/// Blank lines, with or without comments, are allowed anywhere in the file.
58///
59/// Two control entries are defined: $ORIGIN and $INCLUDE. $ORIGIN is
60/// followed by a domain name, and resets the current origin for relative
61/// domain names to the stated name. $INCLUDE inserts the named file into
62/// the current file, and may optionally specify a domain name that sets the
63/// relative domain name origin for the included file. $INCLUDE may also
64/// have a comment. Note that a $INCLUDE entry never changes the relative
65/// origin of the parent file, regardless of changes to the relative origin
66/// made within the included file.
67///
68/// The last two forms represent RRs. If an entry for an RR begins with a
69/// blank, then the RR is assumed to be owned by the last stated owner. If
70/// an RR entry begins with a <domain-name>, then the owner name is reset.
71///
72/// <rr> contents take one of the following forms:
73///
74/// [<TTL>] [<class>] <type> <RDATA>
75///
76/// [<class>] [<TTL>] <type> <RDATA>
77///
78/// The RR begins with optional TTL and class fields, followed by a type and
79/// RDATA field appropriate to the type and class. Class and type use the
80/// standard mnemonics, TTL is a decimal integer. Omitted class and TTL
81/// values are default to the last explicitly stated values. Since type and
82/// class mnemonics are disjoint, the parse is unique. (Note that this
83/// order is different from the order used in examples and the order used in
84/// the actual RRs; the given order allows easier parsing and defaulting.)
85///
86/// <domain-name>s make up a large share of the data in the Zone File.
87/// The labels in the domain name are expressed as character strings and
88/// separated by dots. Quoting conventions allow arbitrary characters to be
89/// stored in domain names. Domain names that end in a dot are called
90/// absolute, and are taken as complete. Domain names which do not end in a
91/// dot are called relative; the actual domain name is the concatenation of
92/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
93/// an argument to the Zone File loading routine. A relative name is an
94/// error when no origin is available.
95///
96/// <character-string> is expressed in one or two ways: as a contiguous set
97/// of characters without interior spaces, or as a string beginning with a "
98/// and ending with a ". Inside a " delimited string any character can
99/// occur, except for a " itself, which must be quoted using \ (back slash).
100///
101/// Because these files are text files several special encodings are
102/// necessary to allow arbitrary data to be loaded. In particular:
103///
104/// of the root.
105///
106/// @ A free standing @ is used to denote the current origin.
107///
108/// \X where X is any character other than a digit (0-9), is
109/// used to quote that character so that its special meaning
110/// does not apply. For example, "\." can be used to place
111/// a dot character in a label.
112///
113/// \DDD where each D is a digit is the octet corresponding to
114/// the decimal number described by DDD. The resulting
115/// octet is assumed to be text and is not checked for
116/// special meaning.
117///
118/// ( ) Parentheses are used to group data that crosses a line
119/// boundary. In effect, line terminations are not
120/// recognized within parentheses.
121///
122/// ; Semicolon is used to start a comment; the remainder of
123/// the line is ignored.
124/// ```
125pub struct Parser<'a> {
126 lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
127 origin: Option<Name>,
128}
129
130impl<'a> Parser<'a> {
131 /// Returns a new Zone file parser
132 ///
133 /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
134 /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
135 pub fn new(
136 input: impl Into<Cow<'a, str>>,
137 path: Option<PathBuf>,
138 origin: Option<Name>,
139 ) -> Self {
140 Self {
141 lexers: vec![(Lexer::new(input), path)],
142 origin,
143 }
144 }
145
146 /// Parse a file from the Lexer
147 ///
148 /// # Return
149 ///
150 /// A pair of the Zone origin name and a map of all Keys to RecordSets
151 pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
152 let mut origin = self.origin;
153 let mut records: BTreeMap<RrKey, RecordSet> = BTreeMap::new();
154 let mut class: DNSClass = DNSClass::IN;
155 let mut current_name: Option<Name> = None;
156 let mut rtype: Option<RecordType> = None;
157 let mut ttl: Option<u32> = None;
158 let mut state = State::StartLine;
159 let mut stack = self.lexers.len();
160
161 'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
162 while let Some(t) = lexer.next_token()? {
163 state = match state {
164 State::StartLine => {
165 // current_name is not reset on the next line b/c it might be needed from the previous
166 rtype = None;
167
168 match t {
169 // if Dollar, then $INCLUDE or $ORIGIN
170 Token::Include => State::Include(None),
171 Token::Origin => State::Origin,
172 Token::Ttl => State::Ttl,
173
174 // if CharData, then Name then ttl_class_type
175 Token::CharData(data) => {
176 current_name = Some(Name::parse(&data, origin.as_ref())?);
177 State::TtlClassType
178 }
179
180 // @ is a placeholder for specifying the current origin
181 Token::At => {
182 current_name = origin.clone(); // TODO a COW or RC would reduce copies...
183 State::TtlClassType
184 }
185
186 // if blank, then nothing or ttl_class_type
187 Token::Blank => State::TtlClassType,
188 Token::EOL => State::StartLine, // probably a comment
189 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
190 }
191 }
192 State::Ttl => match t {
193 Token::CharData(data) => {
194 ttl = Some(Self::parse_time(&data)?);
195 State::StartLine
196 }
197 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
198 },
199 State::Origin => {
200 match t {
201 Token::CharData(data) => {
202 // TODO an origin was specified, should this be legal? definitely confusing...
203 origin = Some(Name::parse(&data, None)?);
204 State::StartLine
205 }
206 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
207 }
208 }
209 State::Include(include_path) => match (t, include_path) {
210 (Token::CharData(data), None) => State::Include(Some(data)),
211 (Token::EOL, Some(include_path)) => {
212 // RFC1035 (section 5) does not specify how filename for $INCLUDE
213 // should be resolved into file path. The underlying code implements the
214 // following:
215 // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
216 // * otherwise, it joins the path with parent root of the current file
217 //
218 // TODO: Inlining files specified using non-relative path might potentially introduce
219 // security issue in some cases (e.g. when working with zone files from untrusted sources)
220 // and should probably be configurable by user.
221
222 if stack > MAX_INCLUDE_LEVEL {
223 return Err(ParseErrorKind::Message(
224 "Max depth level for nested $INCLUDE is reached",
225 )
226 .into());
227 }
228
229 let include = Path::new(&include_path);
230 let include = match (include.is_absolute(), path) {
231 (true, _) => include.to_path_buf(),
232 (false, Some(path)) => path
233 .parent()
234 .expect("file has to have parent folder")
235 .join(include),
236 (false, None) => {
237 return Err(ParseErrorKind::Message(
238 "Relative $INCLUDE is not supported",
239 )
240 .into());
241 }
242 };
243
244 let input = fs::read_to_string(&include)?;
245 let lexer = Lexer::new(input);
246 self.lexers.push((lexer, Some(include)));
247 stack += 1;
248 state = State::StartLine;
249 continue 'outer;
250 }
251 (Token::CharData(_), Some(_)) => {
252 return Err(ParseErrorKind::Message(
253 "Domain name for $INCLUDE is not supported",
254 )
255 .into());
256 }
257 (t, _) => {
258 return Err(ParseErrorKind::UnexpectedToken(t).into());
259 }
260 },
261 State::TtlClassType => {
262 match t {
263 // if number, TTL
264 // Token::Number(ref num) => ttl = Some(*num),
265 // One of Class or Type (these cannot be overlapping!)
266 Token::CharData(mut data) => {
267 // if it's a number it's a ttl
268 let result: ParseResult<u32> = Self::parse_time(&data);
269 if result.is_ok() {
270 ttl = result.ok();
271 State::TtlClassType // hm, should this go to just ClassType?
272 } else {
273 // if can parse DNSClass, then class
274 data.make_ascii_uppercase();
275 let result = DNSClass::from_str(&data);
276 if let Ok(parsed) = result {
277 class = parsed;
278 State::TtlClassType
279 } else {
280 // if can parse RecordType, then RecordType
281 rtype = Some(RecordType::from_str(&data)?);
282 State::Record(vec![])
283 }
284 }
285 }
286 // could be nothing if started with blank and is a comment, i.e. EOL
287 Token::EOL => {
288 State::StartLine // next line
289 }
290 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
291 }
292 }
293 State::Record(record_parts) => {
294 // b/c of ownership rules, perhaps, just collect all the RData components as a list of
295 // tokens to pass into the processor
296 match t {
297 Token::EOL => {
298 Self::flush_record(
299 record_parts,
300 &origin,
301 ¤t_name,
302 rtype,
303 &mut ttl,
304 class,
305 &mut records,
306 )?;
307 State::StartLine
308 }
309 Token::CharData(part) => {
310 let mut record_parts = record_parts;
311 record_parts.push(part);
312 State::Record(record_parts)
313 }
314 // TODO: we should not tokenize the list...
315 Token::List(list) => {
316 let mut record_parts = record_parts;
317 record_parts.extend(list);
318 State::Record(record_parts)
319 }
320 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
321 }
322 }
323 };
324 }
325
326 // Extra flush at the end for the case of missing endline
327 if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
328 Self::flush_record(
329 record_parts,
330 &origin,
331 ¤t_name,
332 rtype,
333 &mut ttl,
334 class,
335 &mut records,
336 )?;
337 }
338
339 stack -= 1;
340 self.lexers.pop();
341 }
342
343 //
344 // build the Authority and return.
345 let origin = origin.ok_or_else(|| {
346 ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
347 })?;
348 Ok((origin, records))
349 }
350
351 fn flush_record(
352 record_parts: Vec<String>,
353 origin: &Option<Name>,
354 current_name: &Option<Name>,
355 rtype: Option<RecordType>,
356 ttl: &mut Option<u32>,
357 class: DNSClass,
358 records: &mut BTreeMap<RrKey, RecordSet>,
359 ) -> ParseResult<()> {
360 // call out to parsers for difference record types
361 // all tokens as part of the Record should be chardata...
362 let rtype = rtype.ok_or_else(|| {
363 ParseError::from(ParseErrorKind::Message("record type not specified"))
364 })?;
365 let rdata = RData::parse(
366 rtype,
367 record_parts.iter().map(AsRef::as_ref),
368 origin.as_ref(),
369 )?;
370
371 // verify that we have everything we need for the record
372 let mut record = Record::new();
373 // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
374 // might want to wait until RC.weak() stabilizes, as that would be needed for global
375 // memory where you want
376 record.set_name(current_name.clone().ok_or_else(|| {
377 ParseError::from(ParseErrorKind::Message("record name not specified"))
378 })?);
379 record.set_rr_type(rtype);
380 record.set_dns_class(class);
381
382 // slightly annoying, need to grab the TTL, then move rdata into the record,
383 // then check the Type again and have custom add logic.
384 match rtype {
385 RecordType::SOA => {
386 // TTL for the SOA is set internally...
387 // expire is for the SOA, minimum is default for records
388 if let RData::SOA(ref soa) = rdata {
389 // TODO, this looks wrong, get_expire() should be get_minimum(), right?
390 record.set_ttl(soa.expire() as u32); // the spec seems a little inaccurate with u32 and i32
391 if ttl.is_none() {
392 *ttl = Some(soa.minimum());
393 } // TODO: should this only set it if it's not set?
394 } else {
395 let msg = format!("Invalid RData here, expected SOA: {rdata:?}");
396 return ParseResult::Err(ParseError::from(ParseErrorKind::Msg(msg)));
397 }
398 }
399 _ => {
400 record.set_ttl(ttl.ok_or_else(|| {
401 ParseError::from(ParseErrorKind::Message("record ttl not specified"))
402 })?);
403 }
404 }
405
406 // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
407
408 // move the rdata into record...
409 record.set_data(Some(rdata));
410
411 // add to the map
412 let key = RrKey::new(LowerName::new(record.name()), record.record_type());
413 match rtype {
414 RecordType::SOA => {
415 let set = record.into();
416 if records.insert(key, set).is_some() {
417 return Err(ParseErrorKind::Message("SOA is already specified").into());
418 }
419 }
420 _ => {
421 // add a Vec if it's not there, then add the record to the list
422 let set = records
423 .entry(key)
424 .or_insert_with(|| RecordSet::new(record.name(), record.record_type(), 0));
425 set.insert(record, 0);
426 }
427 }
428 Ok(())
429 }
430
431 /// parses the string following the rules from:
432 /// <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
433 /// <https://www.zytrax.com/books/dns/apa/time.html>
434 ///
435 /// default is seconds
436 /// #s = seconds = # x 1 seconds (really!)
437 /// #m = minutes = # x 60 seconds
438 /// #h = hours = # x 3600 seconds
439 /// #d = day = # x 86400 seconds
440 /// #w = week = # x 604800 seconds
441 ///
442 /// returns the result of the parsing or and error
443 ///
444 /// # Example
445 /// ```
446 /// use hickory_proto::serialize::txt::Parser;
447 ///
448 /// assert_eq!(Parser::parse_time("0").unwrap(), 0);
449 /// assert!(Parser::parse_time("s").is_err());
450 /// assert!(Parser::parse_time("").is_err());
451 /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
452 /// assert_eq!(Parser::parse_time("1").unwrap(), 1);
453 /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
454 /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
455 /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
456 /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
457 /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
458 /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
459 /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
460 /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
461 /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
462 /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
463 /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
464 /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
465 /// assert!(Parser::parse_time("7102w").is_err());
466 /// ```
467 pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
468 if ttl_str.is_empty() {
469 return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
470 }
471
472 let (mut state, mut value) = (None, 0_u32);
473 for (i, c) in ttl_str.chars().enumerate() {
474 let start = match (state, c) {
475 (None, '0'..='9') => {
476 state = Some(i);
477 continue;
478 }
479 (Some(_), '0'..='9') => continue,
480 (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
481 _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
482 };
483
484 // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
485 let number = u32::from_str(&ttl_str[start..i])
486 .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
487
488 let multiplier = match c {
489 'S' | 's' => 1,
490 'M' | 'm' => 60,
491 'H' | 'h' => 3_600,
492 'D' | 'd' => 86_400,
493 'W' | 'w' => 604_800,
494 _ => unreachable!(),
495 };
496
497 value = number
498 .checked_mul(multiplier)
499 .and_then(|add| value.checked_add(add))
500 .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
501
502 state = None;
503 }
504
505 if let Some(start) = state {
506 // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
507 let number = u32::from_str(&ttl_str[start..])
508 .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
509 value = value
510 .checked_add(number)
511 .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
512 }
513
514 Ok(value)
515 }
516}
517
518#[allow(unused)]
519enum State {
520 StartLine, // start of line, @, $<WORD>, Name, Blank
521 TtlClassType, // [<TTL>] [<class>] <type>,
522 Ttl, // $TTL <time>
523 Record(Vec<String>),
524 Include(Option<String>), // $INCLUDE <filename>
525 Origin,
526}
527
528/// Max traversal depth for $INCLUDE files
529const MAX_INCLUDE_LEVEL: usize = 256;
530
531#[cfg(test)]
532mod tests {
533 use super::*;
534
535 #[test]
536 #[allow(clippy::uninlined_format_args)]
537 fn test_zone_parse() {
538 let domain = Name::from_str("parameter.origin.org.").unwrap();
539
540 let zone_data = r#"$ORIGIN parsed.zone.origin.org.
541 faulty-record-type 60 IN A 1.2.3.4
542"#;
543
544 let result = Parser::new(zone_data, None, Some(domain)).parse();
545 assert!(
546 result.is_err()
547 & result
548 .as_ref()
549 .unwrap_err()
550 .to_string()
551 .contains("FAULTY-RECORD-TYPE"),
552 "unexpected success: {:#?}",
553 result
554 );
555 }
556}