use std::path::Path;
use crate::{
common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint},
error::Error,
};
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct CaseFold {
pub codepoint: Codepoint,
pub status: CaseStatus,
pub mapping: Vec<Codepoint>,
}
impl UcdFile for CaseFold {
fn relative_file_path() -> &'static Path {
Path::new("CaseFolding.txt")
}
}
impl UcdFileByCodepoint for CaseFold {
fn codepoints(&self) -> CodepointIter {
self.codepoint.into_iter()
}
}
impl std::str::FromStr for CaseFold {
type Err = Error;
fn from_str(line: &str) -> Result<CaseFold, Error> {
let re_parts = regex!(
r"(?x)
^
\s*(?P<codepoint>[^\s;]+)\s*;
\s*(?P<status>[^\s;]+)\s*;
\s*(?P<mapping>[^;]+)\s*;
",
);
let caps = match re_parts.captures(line.trim()) {
Some(caps) => caps,
None => return err!("invalid CaseFolding line: '{}'", line),
};
let mut mapping = vec![];
for cp in caps["mapping"].split_whitespace() {
mapping.push(cp.parse()?);
}
Ok(CaseFold {
codepoint: caps["codepoint"].parse()?,
status: caps["status"].parse()?,
mapping,
})
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CaseStatus {
Common,
Full,
Simple,
Special,
}
impl Default for CaseStatus {
fn default() -> CaseStatus {
CaseStatus::Common
}
}
impl CaseStatus {
pub fn is_fixed(&self) -> bool {
*self != CaseStatus::Full
}
}
impl std::str::FromStr for CaseStatus {
type Err = Error;
fn from_str(s: &str) -> Result<CaseStatus, Error> {
match s {
"C" => Ok(CaseStatus::Common),
"F" => Ok(CaseStatus::Full),
"S" => Ok(CaseStatus::Simple),
"T" => Ok(CaseStatus::Special),
_ => err!(
"unrecognized case status: '{}' \
(must be one of C, F, S or T)",
s
),
}
}
}
#[cfg(test)]
mod tests {
use super::{CaseFold, CaseStatus};
#[test]
fn parse_common() {
let line =
"0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE\n";
let row: CaseFold = line.parse().unwrap();
assert_eq!(row.codepoint, 0x0150);
assert_eq!(row.status, CaseStatus::Common);
assert_eq!(row.mapping, vec![0x0151]);
}
#[test]
fn parse_full() {
let line = "03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS\n";
let row: CaseFold = line.parse().unwrap();
assert_eq!(row.codepoint, 0x03B0);
assert_eq!(row.status, CaseStatus::Full);
assert_eq!(row.mapping, vec![0x03C5, 0x0308, 0x0301]);
}
#[test]
fn parse_simple() {
let line = "1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI\n";
let row: CaseFold = line.parse().unwrap();
assert_eq!(row.codepoint, 0x1F8F);
assert_eq!(row.status, CaseStatus::Simple);
assert_eq!(row.mapping, vec![0x1F87]);
}
#[test]
fn parse_special() {
let line = "0049; T; 0131; # LATIN CAPITAL LETTER I\n";
let row: CaseFold = line.parse().unwrap();
assert_eq!(row.codepoint, 0x0049);
assert_eq!(row.status, CaseStatus::Special);
assert_eq!(row.mapping, vec![0x0131]);
}
}