use std::path::Path;
use crate::{
common::{
parse_codepoint_sequence, Codepoint, CodepointIter, UcdFile,
UcdFileByCodepoint,
},
error::Error,
};
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct SpecialCaseMapping {
pub codepoint: Codepoint,
pub lowercase: Vec<Codepoint>,
pub titlecase: Vec<Codepoint>,
pub uppercase: Vec<Codepoint>,
pub conditions: Vec<String>,
}
impl UcdFile for SpecialCaseMapping {
fn relative_file_path() -> &'static Path {
Path::new("SpecialCasing.txt")
}
}
impl UcdFileByCodepoint for SpecialCaseMapping {
fn codepoints(&self) -> CodepointIter {
self.codepoint.into_iter()
}
}
impl std::str::FromStr for SpecialCaseMapping {
type Err = Error;
fn from_str(line: &str) -> Result<SpecialCaseMapping, Error> {
let re_parts = regex!(
r"(?x)
^
\s*(?P<codepoint>[^\s;]+)\s*;
\s*(?P<lower>[^;]+)\s*;
\s*(?P<title>[^;]+)\s*;
\s*(?P<upper>[^;]+)\s*;
\s*(?P<conditions>[^;\x23]+)?
",
);
let caps = match re_parts.captures(line.trim()) {
Some(caps) => caps,
None => return err!("invalid SpecialCasing line: '{}'", line),
};
let conditions = caps
.name("conditions")
.map(|x| {
x.as_str()
.trim()
.split_whitespace()
.map(|c| c.to_string())
.collect()
})
.unwrap_or(vec![]);
Ok(SpecialCaseMapping {
codepoint: caps["codepoint"].parse()?,
lowercase: parse_codepoint_sequence(&caps["lower"])?,
titlecase: parse_codepoint_sequence(&caps["title"])?,
uppercase: parse_codepoint_sequence(&caps["upper"])?,
conditions,
})
}
}
#[cfg(test)]
mod tests {
use super::SpecialCaseMapping;
#[test]
fn parse_no_conds() {
let line = "1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA\n";
let row: SpecialCaseMapping = line.parse().unwrap();
assert_eq!(row.codepoint, 0x1F52);
assert_eq!(row.lowercase, vec![0x1F52]);
assert_eq!(row.titlecase, vec![0x03A5, 0x0313, 0x0300]);
assert_eq!(row.uppercase, vec![0x03A5, 0x0313, 0x0300]);
assert!(row.conditions.is_empty());
}
#[test]
fn parse_conds() {
let line = "0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE\n";
let row: SpecialCaseMapping = line.parse().unwrap();
assert_eq!(row.codepoint, 0x0307);
assert!(row.lowercase.is_empty());
assert_eq!(row.titlecase, vec![0x0307]);
assert_eq!(row.uppercase, vec![0x0307]);
assert_eq!(row.conditions, vec!["tr", "After_I"]);
}
}