use std::collections::HashMap;
use std::convert::TryInto;
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::PathBuf;
use std::{env, io};
use serde::Deserialize;
enum Table {
Complete([char; 128]),
Incomplete([Option<char>; 128]),
}
struct CodeTables {
created: String,
tables: Vec<(u16, Table)>,
}
fn main() -> io::Result<()> {
generate_tables()?;
Ok(())
}
fn generate_tables() -> io::Result<()> {
let code_tables = parse_code_tables()?;
let mut output = open_output()?;
write_header(&mut output, code_tables.created)?;
for (code_page, table) in &code_tables.tables {
write_decoding(&mut output, *code_page, table)?;
}
for (code_page, table) in &code_tables.tables {
write_encoding(&mut output, *code_page, table)?;
}
write_decoding_table_cp_map(&mut output, &code_tables.tables)?;
write_encoding_table_cp_map(&mut output, &code_tables.tables)?;
write_footer(&mut output)?;
Ok(())
}
fn open_output() -> io::Result<BufWriter<File>> {
let path = {
let mut path = PathBuf::from(env::var("OUT_DIR").unwrap());
path.push("code_table.rs");
path
};
let output = BufWriter::new(File::create(path)?);
Ok(output)
}
fn parse_code_tables() -> io::Result<CodeTables> {
let path = {
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
path.push("assets");
path.push("code_tables.json");
path
};
let file = BufReader::new(File::open(path)?);
#[derive(Deserialize)]
struct JsonCodeTables {
created: String,
tables: HashMap<String, Vec<Option<u32>>>,
}
let JsonCodeTables { created, tables } = serde_json::from_reader(file).unwrap();
let mut tables = tables
.into_iter()
.map(|(code_page, table)| {
let complete = table.iter().all(Option::is_some);
let code_page = code_page.parse().unwrap();
let table = table
.into_iter()
.skip(128)
.map(|i| i.map(|i| char::from_u32(i).unwrap()));
let table = if complete {
Table::Complete(
table
.map(Option::unwrap)
.collect::<Vec<_>>()
.try_into()
.unwrap(),
)
} else {
Table::Incomplete(table.collect::<Vec<_>>().try_into().unwrap())
};
(code_page, table)
})
.collect::<Vec<_>>();
tables.sort_unstable_by_key(|(code_page, _table)| *code_page);
Ok(CodeTables { created, tables })
}
fn write_header(mut dst: impl Write, created: String) -> io::Result<()> {
writeln!(
&mut dst,
"/// Code table
/// Generated at {created}
pub mod code_table {{
use super::code_table_type::TableType;
use super::OEMCPHashMap;
use TableType::*;
"
)
}
fn write_decoding(mut dst: impl Write, code_page: u16, table: &Table) -> io::Result<()> {
writeln!(&mut dst, "/// Decoding table (CP{code_page} to Unicode)")?;
match table {
Table::Complete(table) => {
writeln!(
&mut dst,
"pub static DECODING_TABLE_CP{code_page}: [char; 128] = {table:?};"
)?;
}
Table::Incomplete(table) => {
writeln!(
&mut dst,
"pub static DECODING_TABLE_CP{code_page}: [Option<char>; 128] = {table:?};"
)?;
}
}
writeln!(&mut dst)?;
Ok(())
}
fn write_encoding(mut dst: impl Write, code_page: u16, table: &Table) -> io::Result<()> {
let mut map = phf_codegen::Map::new();
match table {
Table::Complete(table) => {
for (i, c) in table
.iter()
.copied()
.enumerate()
.map(|(i, c)| (i + 0x80, c))
{
map.entry(c, &i.to_string());
}
}
Table::Incomplete(table) => {
for (i, c) in table
.iter()
.copied()
.enumerate()
.filter_map(|(i, c)| c.map(|c| (i + 0x80, c)))
{
map.entry(c, &i.to_string());
}
}
}
write!(
&mut dst,
"/// Encoding table (Unicode to CP{code_page})
pub static ENCODING_TABLE_CP{code_page}: OEMCPHashMap<char, u8> = {map};",
map = map.build()
)?;
Ok(())
}
fn write_decoding_table_cp_map(mut dst: impl Write, tables: &[(u16, Table)]) -> io::Result<()> {
let mut map = phf_codegen::Map::new();
for (code_page, table) in tables {
let ty = match table {
Table::Complete(_) => "Complete",
Table::Incomplete(_) => "Incomplete",
};
map.entry(code_page, &format!("{ty}(&DECODING_TABLE_CP{code_page})"));
}
writeln!(
&mut dst,
r#"/// map from codepage to decoding table
///
/// `.get` returns `code_table_type::{{Complete,Incomplete}}`.
///
/// * `Complete`: the decoding table doesn't have undefined mapping.
/// * `Incomplete`: it have some undefined mapping.
///
/// This enumerate provides methods `decode_string_lossy` and `decode_string_checked`.
/// The following examples show the use of them. `if let Some(decoder) = *snip* decoder.decode_string_*snip*` is convenient for practical use.
///
/// # Examples
///
/// ```
/// use oem_cp::code_table::{{DECODING_TABLE_CP_MAP, DECODING_TABLE_CP437}};
/// use oem_cp::code_table_type::TableType::*;
/// assert_eq!(DECODING_TABLE_CP_MAP.get(&437).unwrap().decode_string_lossy(vec![0x31, 0xF6, 0xAB, 0x3D, 0x32]), "1÷½=2".to_string());
/// if let Some(cp874_table) = DECODING_TABLE_CP_MAP.get(&874) {{
/// // means shrimp in Thai (U+E49 => 0xE9)
/// assert_eq!(cp874_table.decode_string_checked(vec![0xA1, 0xD8, 0xE9, 0xA7]), Some("กุ้ง".to_string()));
/// // undefined mapping 0xDB for CP874 Windows dialect (strict mode with MB_ERR_INVALID_CHARS)
/// assert_eq!(cp874_table.decode_string_checked(vec![0xDB]), None);
/// }} else {{
/// panic!("CP874 must be defined in DECODING_TABLE_CP_MAP");
/// }}
/// ```
pub static DECODING_TABLE_CP_MAP: OEMCPHashMap<u16, TableType> = {map};"#,
map = map.build()
)?;
Ok(())
}
fn write_encoding_table_cp_map(mut dst: impl Write, tables: &[(u16, Table)]) -> io::Result<()> {
let mut map = phf_codegen::Map::new();
for (code_page, _table) in tables {
map.entry(*code_page, &format!("&ENCODING_TABLE_CP{code_page}"));
}
writeln!(
&mut dst,
r#"/// map from codepage to encoding table
///
/// # Examples
///
/// ```
/// # use std::ptr;
/// use oem_cp::code_table::{{ENCODING_TABLE_CP_MAP, ENCODING_TABLE_CP437}};
/// assert!(ptr::eq(*ENCODING_TABLE_CP_MAP.get(&437).unwrap(), &ENCODING_TABLE_CP437));
/// // CP932 (Shift-JIS; Japanese MBCS) is unsupported
/// assert!(ENCODING_TABLE_CP_MAP.get(&932).is_none());
///
/// use oem_cp::encode_string_checked;
///
/// if let Some(cp437_table) = ENCODING_TABLE_CP_MAP.get(&437) {{
/// assert_eq!(encode_string_checked("π≈22/7", cp437_table), Some(vec![0xE3, 0xF7, 0x32, 0x32, 0x2F, 0x37]));
/// }} else {{
/// panic!("CP437 must be registered in ENCODING_TABLE_CP_MAP");
/// }}
/// ```
pub static ENCODING_TABLE_CP_MAP: OEMCPHashMap<u16, &'static OEMCPHashMap<char, u8>> = {map};"#,
map = map.build()
)?;
Ok(())
}
fn write_footer(mut dst: impl Write) -> io::Result<()> {
writeln!(&mut dst, "}}")
}