hickory_proto/serialize/txt/zone.rs
1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use alloc::{
9 borrow::Cow,
10 collections::BTreeMap,
11 string::{String, ToString},
12 vec::Vec,
13};
14use core::{mem, str::FromStr};
15use std::{
16 fs,
17 path::{Path, PathBuf},
18};
19
20use crate::{
21 rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
22 serialize::txt::{
23 ParseError, ParseErrorKind, ParseResult,
24 parse_rdata::RDataParser,
25 zone_lex::{Lexer, Token},
26 },
27};
28
29/// ```text
30/// 5. ZONE FILES
31///
32/// Zone files are text files that contain RRs in text form. Since the
33/// contents of a zone can be expressed in the form of a list of RRs a
34/// Zone File is most often used to define a zone, though it can be used
35/// to list a cache's contents. Hence, this section first discusses the
36/// format of RRs in a Zone File, and then the special considerations when
37/// a Zone File is used to create a zone in some name server.
38///
39/// 5.1. Format
40///
41/// The format of these files is a sequence of entries. Entries are
42/// predominantly line-oriented, though parentheses can be used to continue
43/// a list of items across a line boundary, and text literals can contain
44/// CRLF within the text. Any combination of tabs and spaces act as a
45/// delimiter between the separate items that make up an entry. The end of
46/// any line in the Zone File can end with a comment. The comment starts
47/// with a ";" (semicolon).
48///
49/// The following entries are defined:
50///
51/// <blank>[<comment>]
52///
53/// $ORIGIN <domain-name> [<comment>]
54///
55/// $INCLUDE <file-name> [<domain-name>] [<comment>]
56///
57/// <domain-name><rr> [<comment>]
58///
59/// <blank><rr> [<comment>]
60///
61/// Blank lines, with or without comments, are allowed anywhere in the file.
62///
63/// Two control entries are defined: $ORIGIN and $INCLUDE. $ORIGIN is
64/// followed by a domain name, and resets the current origin for relative
65/// domain names to the stated name. $INCLUDE inserts the named file into
66/// the current file, and may optionally specify a domain name that sets the
67/// relative domain name origin for the included file. $INCLUDE may also
68/// have a comment. Note that a $INCLUDE entry never changes the relative
69/// origin of the parent file, regardless of changes to the relative origin
70/// made within the included file.
71///
72/// The last two forms represent RRs. If an entry for an RR begins with a
73/// blank, then the RR is assumed to be owned by the last stated owner. If
74/// an RR entry begins with a <domain-name>, then the owner name is reset.
75///
76/// <rr> contents take one of the following forms:
77///
78/// [<TTL>] [<class>] <type> <RDATA>
79///
80/// [<class>] [<TTL>] <type> <RDATA>
81///
82/// The RR begins with optional TTL and class fields, followed by a type and
83/// RDATA field appropriate to the type and class. Class and type use the
84/// standard mnemonics, TTL is a decimal integer. Omitted class and TTL
85/// values are default to the last explicitly stated values. Since type and
86/// class mnemonics are disjoint, the parse is unique. (Note that this
87/// order is different from the order used in examples and the order used in
88/// the actual RRs; the given order allows easier parsing and defaulting.)
89///
90/// <domain-name>s make up a large share of the data in the Zone File.
91/// The labels in the domain name are expressed as character strings and
92/// separated by dots. Quoting conventions allow arbitrary characters to be
93/// stored in domain names. Domain names that end in a dot are called
94/// absolute, and are taken as complete. Domain names which do not end in a
95/// dot are called relative; the actual domain name is the concatenation of
96/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
97/// an argument to the Zone File loading routine. A relative name is an
98/// error when no origin is available.
99///
100/// <character-string> is expressed in one or two ways: as a contiguous set
101/// of characters without interior spaces, or as a string beginning with a "
102/// and ending with a ". Inside a " delimited string any character can
103/// occur, except for a " itself, which must be quoted using \ (back slash).
104///
105/// Because these files are text files several special encodings are
106/// necessary to allow arbitrary data to be loaded. In particular:
107///
108/// of the root.
109///
110/// @ A free standing @ is used to denote the current origin.
111///
112/// \X where X is any character other than a digit (0-9), is
113/// used to quote that character so that its special meaning
114/// does not apply. For example, "\." can be used to place
115/// a dot character in a label.
116///
117/// \DDD where each D is a digit is the octet corresponding to
118/// the decimal number described by DDD. The resulting
119/// octet is assumed to be text and is not checked for
120/// special meaning.
121///
122/// ( ) Parentheses are used to group data that crosses a line
123/// boundary. In effect, line terminations are not
124/// recognized within parentheses.
125///
126/// ; Semicolon is used to start a comment; the remainder of
127/// the line is ignored.
128/// ```
129pub struct Parser<'a> {
130 lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
131 origin: Option<Name>,
132}
133
134impl<'a> Parser<'a> {
135 /// Returns a new Zone file parser
136 ///
137 /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
138 /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
139 pub fn new(
140 input: impl Into<Cow<'a, str>>,
141 path: Option<PathBuf>,
142 origin: Option<Name>,
143 ) -> Self {
144 Self {
145 lexers: vec![(Lexer::new(input), path)],
146 origin,
147 }
148 }
149
150 /// Parse a file from the Lexer
151 ///
152 /// # Return
153 ///
154 /// A pair of the Zone origin name and a map of all Keys to RecordSets
155 pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
156 let mut origin = self.origin;
157 let mut records: BTreeMap<RrKey, RecordSet> = BTreeMap::new();
158 let mut class: DNSClass = DNSClass::IN;
159 let mut current_name: Option<Name> = None;
160 let mut rtype: Option<RecordType> = None;
161 let mut ttl: Option<u32> = None;
162 let mut state = State::StartLine;
163 let mut stack = self.lexers.len();
164
165 'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
166 while let Some(t) = lexer.next_token()? {
167 state = match state {
168 State::StartLine => {
169 // current_name is not reset on the next line b/c it might be needed from the previous
170 rtype = None;
171
172 match t {
173 // if Dollar, then $INCLUDE or $ORIGIN
174 Token::Include => State::Include(None),
175 Token::Origin => State::Origin,
176 Token::Ttl => State::Ttl,
177
178 // if CharData, then Name then ttl_class_type
179 Token::CharData(data) => {
180 current_name = Some(Name::parse(&data, origin.as_ref())?);
181 State::TtlClassType
182 }
183
184 // @ is a placeholder for specifying the current origin
185 Token::At => {
186 current_name.clone_from(&origin); // TODO a COW or RC would reduce copies...
187 State::TtlClassType
188 }
189
190 // if blank, then nothing or ttl_class_type
191 Token::Blank => State::TtlClassType,
192 Token::EOL => State::StartLine, // probably a comment
193 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
194 }
195 }
196 State::Ttl => match t {
197 Token::CharData(data) => {
198 ttl = Some(Self::parse_time(&data)?);
199 State::StartLine
200 }
201 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
202 },
203 State::Origin => {
204 match t {
205 Token::CharData(data) => {
206 // TODO an origin was specified, should this be legal? definitely confusing...
207 origin = Some(Name::parse(&data, None)?);
208 State::StartLine
209 }
210 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
211 }
212 }
213 State::Include(include_path) => match (t, include_path) {
214 (Token::CharData(data), None) => State::Include(Some(data)),
215 (Token::EOL, Some(include_path)) => {
216 // RFC1035 (section 5) does not specify how filename for $INCLUDE
217 // should be resolved into file path. The underlying code implements the
218 // following:
219 // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
220 // * otherwise, it joins the path with parent root of the current file
221 //
222 // TODO: Inlining files specified using non-relative path might potentially introduce
223 // security issue in some cases (e.g. when working with zone files from untrusted sources)
224 // and should probably be configurable by user.
225
226 if stack > MAX_INCLUDE_LEVEL {
227 return Err(ParseErrorKind::Message(
228 "Max depth level for nested $INCLUDE is reached",
229 )
230 .into());
231 }
232
233 let include = Path::new(&include_path);
234 let include = match (include.is_absolute(), path) {
235 (true, _) => include.to_path_buf(),
236 (false, Some(path)) => path
237 .parent()
238 .expect("file has to have parent folder")
239 .join(include),
240 (false, None) => {
241 return Err(ParseErrorKind::Message(
242 "Relative $INCLUDE is not supported",
243 )
244 .into());
245 }
246 };
247
248 let input = fs::read_to_string(&include)?;
249 let lexer = Lexer::new(input);
250 self.lexers.push((lexer, Some(include)));
251 stack += 1;
252 state = State::StartLine;
253 continue 'outer;
254 }
255 (Token::CharData(_), Some(_)) => {
256 return Err(ParseErrorKind::Message(
257 "Domain name for $INCLUDE is not supported",
258 )
259 .into());
260 }
261 (t, _) => {
262 return Err(ParseErrorKind::UnexpectedToken(t).into());
263 }
264 },
265 State::TtlClassType => {
266 match t {
267 // if number, TTL
268 // Token::Number(num) => ttl = Some(*num),
269 // One of Class or Type (these cannot be overlapping!)
270 Token::CharData(mut data) => {
271 // if it's a number it's a ttl
272 let result: ParseResult<u32> = Self::parse_time(&data);
273 if result.is_ok() {
274 ttl = result.ok();
275 State::TtlClassType // hm, should this go to just ClassType?
276 } else {
277 // if can parse DNSClass, then class
278 data.make_ascii_uppercase();
279 let result = DNSClass::from_str(&data);
280 if let Ok(parsed) = result {
281 class = parsed;
282 State::TtlClassType
283 } else {
284 // if can parse RecordType, then RecordType
285 rtype = Some(RecordType::from_str(&data)?);
286 State::Record(vec![])
287 }
288 }
289 }
290 // could be nothing if started with blank and is a comment, i.e. EOL
291 Token::EOL => {
292 State::StartLine // next line
293 }
294 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
295 }
296 }
297 State::Record(record_parts) => {
298 // b/c of ownership rules, perhaps, just collect all the RData components as a list of
299 // tokens to pass into the processor
300 match t {
301 Token::EOL => {
302 Self::flush_record(
303 record_parts,
304 &origin,
305 ¤t_name,
306 rtype,
307 &mut ttl,
308 class,
309 &mut records,
310 )?;
311 State::StartLine
312 }
313 Token::CharData(part) => {
314 let mut record_parts = record_parts;
315 record_parts.push(part);
316 State::Record(record_parts)
317 }
318 // TODO: we should not tokenize the list...
319 Token::List(list) => {
320 let mut record_parts = record_parts;
321 record_parts.extend(list);
322 State::Record(record_parts)
323 }
324 _ => return Err(ParseErrorKind::UnexpectedToken(t).into()),
325 }
326 }
327 };
328 }
329
330 // Extra flush at the end for the case of missing endline
331 if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
332 Self::flush_record(
333 record_parts,
334 &origin,
335 ¤t_name,
336 rtype,
337 &mut ttl,
338 class,
339 &mut records,
340 )?;
341 }
342
343 stack -= 1;
344 self.lexers.pop();
345 }
346
347 //
348 // build the Authority and return.
349 let origin = origin.ok_or_else(|| {
350 ParseError::from(ParseErrorKind::Message("$ORIGIN was not specified"))
351 })?;
352 Ok((origin, records))
353 }
354
355 fn flush_record(
356 record_parts: Vec<String>,
357 origin: &Option<Name>,
358 current_name: &Option<Name>,
359 rtype: Option<RecordType>,
360 ttl: &mut Option<u32>,
361 class: DNSClass,
362 records: &mut BTreeMap<RrKey, RecordSet>,
363 ) -> ParseResult<()> {
364 // call out to parsers for difference record types
365 // all tokens as part of the Record should be chardata...
366 let rtype = rtype.ok_or_else(|| {
367 ParseError::from(ParseErrorKind::Message("record type not specified"))
368 })?;
369 let rdata = RData::parse(
370 rtype,
371 record_parts.iter().map(AsRef::as_ref),
372 origin.as_ref(),
373 )?;
374
375 // verify that we have everything we need for the record
376 // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
377 // might want to wait until RC.weak() stabilizes, as that would be needed for global
378 // memory where you want
379 let name = current_name.clone().ok_or_else(|| {
380 ParseError::from(ParseErrorKind::Message("record name not specified"))
381 })?;
382
383 // slightly annoying, need to grab the TTL, then move rdata into the record,
384 // then check the Type again and have custom add logic.
385 let set_ttl = match rtype {
386 RecordType::SOA => {
387 // TTL for the SOA is set internally...
388 // expire is for the SOA, minimum is default for records
389 if let RData::SOA(soa) = &rdata {
390 // TODO, this looks wrong, get_expire() should be get_minimum(), right?
391 let set_ttl = soa.expire() as u32; // the spec seems a little inaccurate with u32 and i32
392 if ttl.is_none() {
393 *ttl = Some(soa.minimum());
394 } // TODO: should this only set it if it's not set?
395 set_ttl
396 } else {
397 let msg = format!("Invalid RData here, expected SOA: {rdata:?}");
398 return ParseResult::Err(ParseError::from(ParseErrorKind::Msg(msg)));
399 }
400 }
401 _ => ttl.ok_or_else(|| {
402 ParseError::from(ParseErrorKind::Message("record ttl not specified"))
403 })?,
404 };
405
406 // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
407
408 // move the rdata into record...
409 let mut record = Record::from_rdata(name, set_ttl, rdata);
410 record.set_dns_class(class);
411
412 // add to the map
413 let key = RrKey::new(LowerName::new(record.name()), record.record_type());
414 match rtype {
415 RecordType::SOA => {
416 let set = record.into();
417 if records.insert(key, set).is_some() {
418 return Err(ParseErrorKind::Message("SOA is already specified").into());
419 }
420 }
421 _ => {
422 // add a Vec if it's not there, then add the record to the list
423 let set = records.entry(key).or_insert_with(|| {
424 RecordSet::new(record.name().clone(), record.record_type(), 0)
425 });
426 set.insert(record, 0);
427 }
428 }
429 Ok(())
430 }
431
432 /// parses the string following the rules from:
433 /// <https://tools.ietf.org/html/rfc2308> (NXCaching RFC) and
434 /// <https://www.zytrax.com/books/dns/apa/time.html>
435 ///
436 /// default is seconds
437 /// #s = seconds = # x 1 seconds (really!)
438 /// #m = minutes = # x 60 seconds
439 /// #h = hours = # x 3600 seconds
440 /// #d = day = # x 86400 seconds
441 /// #w = week = # x 604800 seconds
442 ///
443 /// returns the result of the parsing or and error
444 ///
445 /// # Example
446 /// ```
447 /// use hickory_proto::serialize::txt::Parser;
448 ///
449 /// assert_eq!(Parser::parse_time("0").unwrap(), 0);
450 /// assert!(Parser::parse_time("s").is_err());
451 /// assert!(Parser::parse_time("").is_err());
452 /// assert_eq!(Parser::parse_time("0s").unwrap(), 0);
453 /// assert_eq!(Parser::parse_time("1").unwrap(), 1);
454 /// assert_eq!(Parser::parse_time("1S").unwrap(), 1);
455 /// assert_eq!(Parser::parse_time("1s").unwrap(), 1);
456 /// assert_eq!(Parser::parse_time("1M").unwrap(), 60);
457 /// assert_eq!(Parser::parse_time("1m").unwrap(), 60);
458 /// assert_eq!(Parser::parse_time("1H").unwrap(), 3600);
459 /// assert_eq!(Parser::parse_time("1h").unwrap(), 3600);
460 /// assert_eq!(Parser::parse_time("1D").unwrap(), 86400);
461 /// assert_eq!(Parser::parse_time("1d").unwrap(), 86400);
462 /// assert_eq!(Parser::parse_time("1W").unwrap(), 604800);
463 /// assert_eq!(Parser::parse_time("1w").unwrap(), 604800);
464 /// assert_eq!(Parser::parse_time("1s2d3w4h2m").unwrap(), 1+2*86400+3*604800+4*3600+2*60);
465 /// assert_eq!(Parser::parse_time("3w3w").unwrap(), 3*604800+3*604800);
466 /// assert!(Parser::parse_time("7102w").is_err());
467 /// ```
468 pub fn parse_time(ttl_str: &str) -> ParseResult<u32> {
469 if ttl_str.is_empty() {
470 return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into());
471 }
472
473 let (mut state, mut value) = (None, 0_u32);
474 for (i, c) in ttl_str.chars().enumerate() {
475 let start = match (state, c) {
476 (None, '0'..='9') => {
477 state = Some(i);
478 continue;
479 }
480 (Some(_), '0'..='9') => continue,
481 (Some(start), 'S' | 's' | 'M' | 'm' | 'H' | 'h' | 'D' | 'd' | 'W' | 'w') => start,
482 _ => return Err(ParseErrorKind::ParseTime(ttl_str.to_string()).into()),
483 };
484
485 // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
486 let number = u32::from_str(&ttl_str[start..i])
487 .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
488
489 let multiplier = match c {
490 'S' | 's' => 1,
491 'M' | 'm' => 60,
492 'H' | 'h' => 3_600,
493 'D' | 'd' => 86_400,
494 'W' | 'w' => 604_800,
495 _ => unreachable!(),
496 };
497
498 value = number
499 .checked_mul(multiplier)
500 .and_then(|add| value.checked_add(add))
501 .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
502
503 state = None;
504 }
505
506 if let Some(start) = state {
507 // All allowed chars are ASCII, so using char indexes to slice &[u8] is OK
508 let number = u32::from_str(&ttl_str[start..])
509 .map_err(|_| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
510 value = value
511 .checked_add(number)
512 .ok_or_else(|| ParseErrorKind::ParseTime(ttl_str.to_string()))?;
513 }
514
515 Ok(value)
516 }
517}
518
519#[allow(unused)]
520enum State {
521 StartLine, // start of line, @, $<WORD>, Name, Blank
522 TtlClassType, // [<TTL>] [<class>] <type>,
523 Ttl, // $TTL <time>
524 Record(Vec<String>),
525 Include(Option<String>), // $INCLUDE <filename>
526 Origin,
527}
528
529/// Max traversal depth for $INCLUDE files
530const MAX_INCLUDE_LEVEL: usize = 256;
531
532#[cfg(test)]
533mod tests {
534 use alloc::string::ToString;
535
536 use super::*;
537
538 #[test]
539 #[allow(clippy::uninlined_format_args)]
540 fn test_zone_parse() {
541 let domain = Name::from_str("parameter.origin.org.").unwrap();
542
543 let zone_data = r#"$ORIGIN parsed.zone.origin.org.
544 faulty-record-type 60 IN A 1.2.3.4
545"#;
546
547 let result = Parser::new(zone_data, None, Some(domain)).parse();
548 assert!(
549 result.is_err()
550 & result
551 .as_ref()
552 .unwrap_err()
553 .to_string()
554 .contains("FAULTY-RECORD-TYPE"),
555 "unexpected success: {:#?}",
556 result
557 );
558 }
559}