chrono_tz_build/
lib.rs

1extern crate parse_zoneinfo;
2#[cfg(feature = "filter-by-regex")]
3extern crate regex;
4
5use std::collections::BTreeSet;
6use std::env;
7use std::fs::File;
8use std::io::{self, BufRead, BufReader, Write};
9use std::path::Path;
10
11use parse_zoneinfo::line::{Line, LineParser};
12use parse_zoneinfo::structure::{Child, Structure};
13use parse_zoneinfo::table::{Table, TableBuilder};
14use parse_zoneinfo::transitions::FixedTimespan;
15use parse_zoneinfo::transitions::TableTransitions;
16
17/// The name of the environment variable which possibly holds the filter regex.
18const FILTER_ENV_VAR_NAME: &str = "CHRONO_TZ_TIMEZONE_FILTER";
19
20// This function is needed until zoneinfo_parse handles comments correctly.
21// Technically a '#' symbol could occur between double quotes and should be
22// ignored in this case, however this never happens in the tz database as it
23// stands.
24fn strip_comments(mut line: String) -> String {
25    if let Some(pos) = line.find('#') {
26        line.truncate(pos);
27    };
28    line
29}
30
31// Generate a list of the time zone periods beyond the first that apply
32// to this zone, as a string representation of a static slice.
33fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String {
34    let mut ret = "&[\n".to_string();
35    for (
36        start,
37        FixedTimespan {
38            utc_offset,
39            dst_offset,
40            name,
41        },
42    ) in rest
43    {
44        let timespan_name = match name.as_ref() {
45            "%z" => None,
46            name => Some(name),
47        };
48        ret.push_str(&format!(
49            "                    ({start}, FixedTimespan {{ \
50             utc_offset: {utc}, dst_offset: {dst}, name: {name:?} \
51             }}),\n",
52            start = start,
53            utc = utc_offset,
54            dst = dst_offset,
55            name = timespan_name,
56        ));
57    }
58    ret.push_str("                ]");
59    ret
60}
61
62// Convert all '/' to '__', all '+' to 'Plus' and '-' to 'Minus', unless
63// it's a hyphen, in which case remove it. This is so the names can be used
64// as rust identifiers.
65fn convert_bad_chars(name: &str) -> String {
66    let name = name.replace('/', "__").replace('+', "Plus");
67    if let Some(pos) = name.find('-') {
68        if name[pos + 1..]
69            .chars()
70            .next()
71            .map(char::is_numeric)
72            .unwrap_or(false)
73        {
74            name.replace('-', "Minus")
75        } else {
76            name.replace('-', "")
77        }
78    } else {
79        name
80    }
81}
82
83// The timezone file contains impls of `Timespans` for all timezones in the
84// database. The `Wrap` wrapper in the `timezone_impl` module then implements
85// TimeZone for any contained struct that implements `Timespans`.
86fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> {
87    let zones = table
88        .zonesets
89        .keys()
90        .chain(table.links.keys())
91        .collect::<BTreeSet<_>>();
92    writeln!(
93        timezone_file,
94        "use core::fmt::{{self, Debug, Display, Formatter}};",
95    )?;
96    writeln!(timezone_file, "use core::str::FromStr;\n",)?;
97    writeln!(
98        timezone_file,
99        "use crate::timezone_impl::{{TimeSpans, FixedTimespanSet, FixedTimespan}};\n",
100    )?;
101    writeln!(
102        timezone_file,
103        "/// TimeZones built at compile time from the tz database
104///
105/// This implements [`chrono::TimeZone`] so that it may be used in and to
106/// construct chrono's DateTime type. See the root module documentation
107/// for details."
108    )?;
109    writeln!(timezone_file, "#[derive(Clone, Copy, PartialEq, Eq, Hash)]")?;
110    writeln!(
111        timezone_file,
112        r#"#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]"#
113    )?;
114    writeln!(timezone_file, "pub enum Tz {{")?;
115    for zone in &zones {
116        let zone_name = convert_bad_chars(zone);
117        writeln!(
118            timezone_file,
119            "    /// {raw_zone_name}\n    {zone},",
120            zone = zone_name,
121            raw_zone_name = zone
122        )?;
123    }
124    writeln!(timezone_file, "}}")?;
125
126    let mut map = phf_codegen::Map::new();
127    for zone in &zones {
128        map.entry(zone, &format!("Tz::{}", convert_bad_chars(zone)));
129    }
130    writeln!(
131        timezone_file,
132        "static TIMEZONES: ::phf::Map<&'static str, Tz> = \n{};",
133        map.build()
134    )?;
135
136    #[cfg(feature = "case-insensitive")]
137    {
138        writeln!(timezone_file, "use uncased::UncasedStr;\n",)?;
139        let mut map = phf_codegen::Map::new();
140        for zone in &zones {
141            map.entry(
142                uncased::UncasedStr::new(zone),
143                &format!("Tz::{}", convert_bad_chars(zone)),
144            );
145        }
146        writeln!(
147            timezone_file,
148            "static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = \n{};",
149            map.build()
150        )?;
151    }
152
153    writeln!(
154        timezone_file,
155        r#"#[derive(Copy, Clone, Debug, PartialEq, Eq)]
156pub struct ParseError(());
157
158impl Display for ParseError {{
159    fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
160        f.write_str("failed to parse timezone")
161    }}
162}}
163
164#[cfg(feature = "std")]
165impl std::error::Error for ParseError {{}}
166
167impl FromStr for Tz {{
168    type Err = ParseError;
169    fn from_str(s: &str) -> Result<Self, Self::Err> {{
170        return TIMEZONES.get(s).cloned().ok_or(ParseError(()));
171    }}
172}}
173"#
174    )?;
175
176    writeln!(
177        timezone_file,
178        "impl Tz {{
179    pub fn name(self) -> &'static str {{
180        match self {{"
181    )?;
182    for zone in &zones {
183        let zone_name = convert_bad_chars(zone);
184        writeln!(
185            timezone_file,
186            "            Tz::{zone} => \"{raw_zone_name}\",",
187            zone = zone_name,
188            raw_zone_name = zone
189        )?;
190    }
191    writeln!(
192        timezone_file,
193        "        }}
194    }}"
195    )?;
196
197    #[cfg(feature = "case-insensitive")]
198    {
199        writeln!(
200            timezone_file,
201            r#"
202    #[cfg(feature = "case-insensitive")]
203    /// Parses a timezone string in a case-insensitive way
204    pub fn from_str_insensitive(s: &str) -> Result<Self, ParseError> {{
205        return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or(ParseError(()));
206    }}"#
207        )?;
208    }
209
210    writeln!(timezone_file, "}}")?;
211
212    writeln!(
213        timezone_file,
214        "impl Debug for Tz {{
215    fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
216        f.write_str(self.name().as_ref())
217    }}
218}}\n"
219    )?;
220    writeln!(
221        timezone_file,
222        "impl Display for Tz {{
223    fn fmt(&self, f: &mut Formatter) -> fmt::Result {{
224        f.write_str(self.name().as_ref())
225    }}
226}}\n"
227    )?;
228    writeln!(
229        timezone_file,
230        "impl TimeSpans for Tz {{
231    fn timespans(&self) -> FixedTimespanSet {{
232        match *self {{"
233    )?;
234    for zone in &zones {
235        let timespans = table.timespans(zone).unwrap();
236        let zone_name = convert_bad_chars(zone);
237        let timespan_name = match timespans.first.name.as_ref() {
238            "%z" => None,
239            name => Some(name),
240        };
241        writeln!(
242            timezone_file,
243            "            Tz::{zone} => {{
244                const REST: &[(i64, FixedTimespan)] = {rest};
245                FixedTimespanSet {{
246                    first: FixedTimespan {{
247                        utc_offset: {utc},
248                        dst_offset: {dst},
249                        name: {name:?},
250                    }},
251                    rest: REST
252                }}
253            }},\n",
254            zone = zone_name,
255            rest = format_rest(timespans.rest),
256            utc = timespans.first.utc_offset,
257            dst = timespans.first.dst_offset,
258            name = timespan_name,
259        )?;
260    }
261    write!(
262        timezone_file,
263        "         }}
264    }}
265}}\n"
266    )?;
267    write!(
268        timezone_file,
269        "/// An array of every known variant
270///
271/// Useful for iterating over known timezones:
272///
273/// ```
274/// use chrono_tz::{{TZ_VARIANTS, Tz}};
275/// assert!(TZ_VARIANTS.iter().any(|v| *v == Tz::UTC));
276/// ```
277pub static TZ_VARIANTS: [Tz; {num}] = [
278",
279        num = zones.len()
280    )?;
281    for zone in &zones {
282        writeln!(
283            timezone_file,
284            "    Tz::{zone},",
285            zone = convert_bad_chars(zone)
286        )?;
287    }
288    write!(timezone_file, "];")?;
289    Ok(())
290}
291
292// Create a file containing nice-looking re-exports such as Europe::London
293// instead of having to use chrono_tz::timezones::Europe__London
294fn write_directory_file(directory_file: &mut File, table: &Table, version: &str) -> io::Result<()> {
295    // expose the underlying IANA TZDB version
296    writeln!(
297        directory_file,
298        "pub const IANA_TZDB_VERSION : &str = \"{version}\";\n"
299    )?;
300    // add the `loose' zone definitions first
301    writeln!(directory_file, "use crate::timezones::Tz;\n")?;
302    let zones = table
303        .zonesets
304        .keys()
305        .chain(table.links.keys())
306        .filter(|zone| !zone.contains('/'))
307        .collect::<BTreeSet<_>>();
308    for zone in zones {
309        let zone = convert_bad_chars(zone);
310        writeln!(
311            directory_file,
312            "pub const {name} : Tz = Tz::{name};",
313            name = zone
314        )?;
315    }
316    writeln!(directory_file)?;
317
318    // now add the `structured' zone names in submodules
319    for entry in table.structure() {
320        if entry.name.contains('/') {
321            continue;
322        }
323        let module_name = convert_bad_chars(entry.name);
324        writeln!(directory_file, "pub mod {name} {{", name = module_name)?;
325        writeln!(directory_file, "    use crate::timezones::Tz;\n",)?;
326        for child in entry.children {
327            match child {
328                Child::Submodule(name) => {
329                    let submodule_name = convert_bad_chars(name);
330                    writeln!(
331                        directory_file,
332                        "    pub mod {name} {{",
333                        name = submodule_name
334                    )?;
335                    writeln!(directory_file, "        use crate::timezones::Tz;\n",)?;
336                    let full_name = entry.name.to_string() + "/" + name;
337                    for entry in table.structure() {
338                        if entry.name == full_name {
339                            for child in entry.children {
340                                match child {
341                                    Child::Submodule(_) => {
342                                        panic!("Depth of > 3 nested submodules not implemented!")
343                                    }
344                                    Child::TimeZone(name) => {
345                                        let converted_name = convert_bad_chars(name);
346                                        writeln!(directory_file,
347                                    "        pub const {name} : Tz = Tz::{module_name}__{submodule_name}__{name};",
348                                            module_name = module_name,
349                                            submodule_name = submodule_name,
350                                            name = converted_name,
351                                        )?;
352                                    }
353                                }
354                            }
355                        }
356                    }
357                    writeln!(directory_file, "    }}\n")?;
358                }
359                Child::TimeZone(name) => {
360                    let name = convert_bad_chars(name);
361                    writeln!(
362                        directory_file,
363                        "    pub const {name} : Tz = Tz::{module_name}__{name};",
364                        module_name = module_name,
365                        name = name
366                    )?;
367                }
368            }
369        }
370        writeln!(directory_file, "}}\n")?;
371    }
372    Ok(())
373}
374
375/// Stub module because filter-by-regex feature is not enabled
376#[cfg(not(feature = "filter-by-regex"))]
377mod filter {
378    /// stub function because filter-by-regex feature is not enabled
379    pub(crate) fn maybe_filter_timezone_table(_table: &mut super::Table) {}
380}
381
382/// Module containing code supporting filter-by-regex feature
383///
384/// The "GMT" and "UTC" time zones are always included.
385#[cfg(feature = "filter-by-regex")]
386mod filter {
387    use std::collections::HashSet;
388    use std::env;
389
390    use regex::Regex;
391
392    use crate::{Table, FILTER_ENV_VAR_NAME};
393
394    /// Filter `table` by applying [`FILTER_ENV_VAR_NAME`].
395    pub(crate) fn maybe_filter_timezone_table(table: &mut Table) {
396        if let Some(filter_regex) = get_filter_regex() {
397            filter_timezone_table(table, filter_regex);
398        }
399    }
400
401    /// Checks the `CHRONO_TZ_TIMEZONE_FILTER` environment variable.
402    /// Converts it to a regex if set. Panics if the regex is not valid, as we want
403    /// to fail the build if that happens.
404    fn get_filter_regex() -> Option<Regex> {
405        match env::var(FILTER_ENV_VAR_NAME) {
406            Ok(val) => {
407                let val = val.trim();
408                if val.is_empty() {
409                    return None;
410                }
411                match Regex::new(val) {
412                    Ok(regex) => Some(regex),
413                    Err(err) => panic!(
414                        "The value '{:?}' for environment variable {} is not a valid regex, err={}",
415                        val, FILTER_ENV_VAR_NAME, err
416                    ),
417                }
418            }
419            Err(env::VarError::NotPresent) => None,
420            Err(env::VarError::NotUnicode(s)) => panic!(
421                "The value '{:?}' for environment variable {} is not valid Unicode",
422                s, FILTER_ENV_VAR_NAME
423            ),
424        }
425    }
426
427    /// Insert a new name in the list of names to keep. If the name has 3
428    /// parts, then also insert the 2-part prefix. If we don't do this we will lose
429    /// half of Indiana in `directory.rs`. But we *don't* want to keep one-part names,
430    /// otherwise we will inevitably end up with 'America' and include too much as
431    /// a consequence.
432    fn insert_keep_entry(keep: &mut HashSet<String>, new_value: &str) {
433        let mut parts = new_value.split('/');
434        if let (Some(p1), Some(p2), Some(_), None) =
435            (parts.next(), parts.next(), parts.next(), parts.next())
436        {
437            keep.insert(format!("{}/{}", p1, p2));
438        }
439
440        keep.insert(new_value.to_string());
441    }
442
443    /// Filter `table` by applying `filter_regex`.
444    fn filter_timezone_table(table: &mut Table, filter_regex: Regex) {
445        // Compute the transitive closure of things to keep.
446        // Doing this, instead of just filtering `zonesets` and `links` by the
447        // regex, helps to keep the `structure()` intact.
448        let mut keep = HashSet::new();
449        for (k, v) in &table.links {
450            if filter_regex.is_match(k) || k == "GMT" || k == "UTC" {
451                insert_keep_entry(&mut keep, k);
452            }
453            if filter_regex.is_match(v) || k == "GMT" || k == "UTC" {
454                insert_keep_entry(&mut keep, v);
455            }
456        }
457
458        let mut n = 0;
459        loop {
460            let len = keep.len();
461
462            for (k, v) in &table.links {
463                if keep.contains(k) && !keep.contains(v) {
464                    insert_keep_entry(&mut keep, v);
465                }
466                if keep.contains(v) && !keep.contains(k) {
467                    insert_keep_entry(&mut keep, k);
468                }
469            }
470
471            if keep.len() == len {
472                break;
473            }
474
475            n += 1;
476            if n == 50 {
477                println!("cargo:warning=Recursion limit reached while building filter list");
478                break;
479            }
480        }
481
482        // Actually do the filtering.
483        table
484            .links
485            .retain(|k, v| keep.contains(k) || keep.contains(v));
486
487        table
488            .zonesets
489            .retain(|k, _| filter_regex.is_match(k) || keep.iter().any(|s| k.starts_with(s)));
490    }
491}
492
493fn detect_iana_db_version() -> String {
494    let root = env::var("CARGO_MANIFEST_DIR").expect("no Cargo build context");
495    let path = Path::new(&root).join(Path::new("tz/NEWS"));
496    let file = File::open(path).expect("failed to open file");
497
498    let mut lines = BufReader::new(file).lines();
499    while let Some(Ok(line)) = lines.next() {
500        let line = match line.strip_prefix("Release ") {
501            Some(line) => line,
502            _ => continue,
503        };
504
505        match line.split_once(" - ") {
506            Some((version, _)) => return version.to_owned(),
507            _ => continue,
508        }
509    }
510
511    unreachable!("no version found")
512}
513
514pub fn main() {
515    println!("cargo:rerun-if-env-changed={}", FILTER_ENV_VAR_NAME);
516
517    let parser = LineParser::default();
518    let mut table = TableBuilder::new();
519
520    let tzfiles = [
521        "tz/africa",
522        "tz/antarctica",
523        "tz/asia",
524        "tz/australasia",
525        "tz/backward",
526        "tz/etcetera",
527        "tz/europe",
528        "tz/northamerica",
529        "tz/southamerica",
530    ];
531
532    let lines = tzfiles
533        .iter()
534        .map(Path::new)
535        .map(|p| {
536            Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| String::new())).join(p)
537        })
538        .map(|path| {
539            File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {}", path.display(), e))
540        })
541        .map(BufReader::new)
542        .flat_map(BufRead::lines)
543        .map(Result::unwrap)
544        .map(strip_comments);
545
546    for line in lines {
547        match parser.parse_str(&line).unwrap() {
548            Line::Zone(zone) => table.add_zone_line(zone).unwrap(),
549            Line::Continuation(cont) => table.add_continuation_line(cont).unwrap(),
550            Line::Rule(rule) => table.add_rule_line(rule).unwrap(),
551            Line::Link(link) => table.add_link_line(link).unwrap(),
552            Line::Space => {}
553        }
554    }
555
556    let mut table = table.build();
557    filter::maybe_filter_timezone_table(&mut table);
558
559    let timezone_path = Path::new(&env::var("OUT_DIR").unwrap()).join("timezones.rs");
560    let mut timezone_file = File::create(timezone_path).unwrap();
561    write_timezone_file(&mut timezone_file, &table).unwrap();
562
563    let directory_path = Path::new(&env::var("OUT_DIR").unwrap()).join("directory.rs");
564    let mut directory_file = File::create(directory_path).unwrap();
565    let version = detect_iana_db_version();
566    write_directory_file(&mut directory_file, &table, &version).unwrap();
567}