#[path = "src/tables.rs"]
mod tables;
use std::cmp::Ordering;
use std::convert::TryFrom;
use std::env;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use block::{Block, LAST_INDEX};
use tables::{GeneralCategory, GENERAL_CATEGORY};
const SHIFT: u32 = block::LAST_INDEX.count_ones();
fn main() {
let output_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("category.rs");
write_table(&output_path, &compile_table());
}
struct CompiledTable {
blocks: Vec<(u32, Block)>,
address_to_block_index: Vec<(u32, usize)>,
last_code_point: u32,
}
fn compile_table() -> CompiledTable {
let mut blocks = Vec::new();
let mut address_to_block_index = Vec::new();
let &(start, _, _) = GENERAL_CATEGORY
.iter()
.min_by_key(|(start, _, _)| start)
.unwrap();
let &(_, end, _) = GENERAL_CATEGORY
.iter()
.max_by_key(|(_, end, _)| end)
.unwrap();
let last_code_point = end;
let end_block_address = end & (!LAST_INDEX as u32);
let end = end_block_address + block::SIZE as u32;
let mut block = Block::new();
for codepoint in start..=end {
let category = lookup(codepoint);
let block_address = (codepoint >> SHIFT).saturating_sub(1) << SHIFT;
if codepoint != 0 && (codepoint & u32::try_from(block::LAST_INDEX).unwrap()) == 0 {
if let Some(index) = blocks.iter().position(|(_, candidate)| candidate == &block) {
address_to_block_index.push((block_address, index));
} else {
address_to_block_index.push((block_address, blocks.len()));
blocks.push((block_address, block.clone()));
}
block.reset();
}
block[usize::try_from(codepoint).unwrap() & block::LAST_INDEX] = category;
}
CompiledTable {
blocks,
address_to_block_index,
last_code_point,
}
}
fn write_table(path: &Path, compiled_table: &CompiledTable) {
let mut output =
File::create(&path).expect(&format!("unable to open {}", path.to_string_lossy()));
writeln!(output, "use crate::GeneralCategory;").unwrap();
writeln!(output, "use crate::GeneralCategory::*;").unwrap();
writeln!(
output,
"\nconst LAST_CODEPOINT: u32 = 0x{:X};",
compiled_table.last_code_point
)
.unwrap();
writeln!(output, "\nconst BLOCK_SIZE: usize = {};", block::SIZE).unwrap();
writeln!(
output,
"\nconst CATEGORY_BLOCKS: [GeneralCategory; {}] = [",
compiled_table.blocks.len() * block::SIZE
)
.unwrap();
for (address, block) in &compiled_table.blocks {
writeln!(output, "// BLOCK: {:04X}\n", address).unwrap();
for (i, category) in block.iter().enumerate() {
if i != 0 && (i & 0xF) == 0 {
writeln!(output).unwrap();
}
write!(output, "{:?},", category).unwrap();
}
write!(output, "\n\n").unwrap();
}
writeln!(output, "];").unwrap();
write!(output, "\n\n").unwrap();
for (index, (address, _)) in compiled_table.blocks.iter().enumerate() {
writeln!(
output,
"const BLOCK_OFFSET_{:04X}: u16 = 0x{:04X};",
address,
index * block::SIZE
)
.unwrap();
}
writeln!(
output,
"\nconst CATEGORY_BLOCK_OFFSETS: [u16; {}] = [",
compiled_table.address_to_block_index.len()
)
.unwrap();
for &(_, index) in &compiled_table.address_to_block_index {
let (block_address, _) = compiled_table.blocks[index];
writeln!(output, " BLOCK_OFFSET_{:04X},", block_address).unwrap();
}
writeln!(output, "];").unwrap();
}
fn lookup(codepoint: u32) -> GeneralCategory {
GENERAL_CATEGORY
.binary_search_by(|&(start, end, _)| {
if codepoint < start {
Ordering::Greater
} else if codepoint > end {
Ordering::Less
} else {
Ordering::Equal
}
})
.ok()
.map(|idx| GENERAL_CATEGORY[idx].2)
.unwrap_or(GeneralCategory::PrivateUse)
}
mod block {
pub const SIZE: usize = 256;
pub const LAST_INDEX: usize = SIZE - 1;
use super::GeneralCategory;
use std::ops::{Index, IndexMut};
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct Block {
data: Vec<GeneralCategory>,
}
impl Block {
pub fn new() -> Self {
Block {
data: vec![GeneralCategory::Unassigned; SIZE],
}
}
pub fn reset(&mut self) {
self.data
.iter_mut()
.for_each(|val| *val = GeneralCategory::Unassigned);
}
pub fn iter(&self) -> impl Iterator<Item = &GeneralCategory> {
self.data.iter()
}
}
impl Index<usize> for Block {
type Output = GeneralCategory;
fn index(&self, index: usize) -> &Self::Output {
&self.data[index]
}
}
impl IndexMut<usize> for Block {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
self.data.index_mut(index)
}
}
}