#![allow(dead_code)]
#[cfg(feature = "embed_all")] extern crate pocket_resources;
#[cfg(any(feature = "nfc", feature = "nfd", feature = "nfkc", feature = "nfkd"))]
extern crate unicode_normalization;
extern crate bincode;
extern crate fst;
extern crate hyphenation_commons;
extern crate serde;
use bincode as bin;
use serde::ser;
use std::collections::HashMap;
use std::env;
use std::error;
use std::fmt;
use std::fs::{self, File};
use std::io;
use std::io::prelude::*;
use std::iter::FromIterator;
use std::path::{Path, PathBuf};
use hyphenation_commons::dictionary::extended as ext;
use hyphenation_commons::dictionary::{self, *};
use hyphenation_commons::parse::Parse;
use hyphenation_commons::Language::{self, *};
use configuration::*;
mod configuration {
#[cfg(not(any(feature = "nfc", feature = "nfd", feature = "nfkc", feature = "nfkd")))]
pub fn normalize(s : &str) -> String { s.to_owned() }
#[cfg(any(feature = "nfc", feature = "nfd", feature = "nfkc", feature = "nfkd"))]
use unicode_normalization::*;
#[cfg(feature = "nfc")]
pub fn normalize(s : &str) -> String { s.nfc().collect() }
#[cfg(feature = "nfd")]
pub fn normalize(s : &str) -> String { s.nfd().collect() }
#[cfg(feature = "nfkc")]
pub fn normalize(s : &str) -> String { s.nfkc().collect() }
#[cfg(feature = "nfkd")]
pub fn normalize(s : &str) -> String { s.nfkd().collect() }
}
trait TryFromIterator<Tally>: Sized {
fn try_from_iter<I>(iter : I) -> Result<Self, Error>
where I : IntoIterator<Item = (String, Tally)>;
}
impl TryFromIterator<<Patterns as Parse>::Tally> for Patterns {
fn try_from_iter<I>(iter : I) -> Result<Self, Error>
where I : IntoIterator<Item = (String, <Patterns as Parse>::Tally)>
{
Ok(Patterns::from_iter(iter)?)
}
}
impl TryFromIterator<<Exceptions as Parse>::Tally> for Exceptions {
fn try_from_iter<I>(iter : I) -> Result<Self, Error>
where I : IntoIterator<Item = (String, <Exceptions as Parse>::Tally)>
{
Ok(Exceptions(HashMap::from_iter(iter)))
}
}
impl TryFromIterator<<ext::Patterns as Parse>::Tally> for ext::Patterns {
fn try_from_iter<I>(iter : I) -> Result<Self, Error>
where I : IntoIterator<Item = (String, <ext::Patterns as Parse>::Tally)>
{
Ok(ext::Patterns::from_iter(iter)?)
}
}
#[derive(Clone, Debug)]
struct Paths {
source : PathBuf,
out : PathBuf,
}
impl Paths {
fn new() -> Result<Self, Error> {
let source = env::var("CARGO_MANIFEST_DIR").map(PathBuf::from)?;
let out = env::var("OUT_DIR").map(PathBuf::from)?;
Ok(Paths { source, out })
}
fn place_item<P : AsRef<Path>>(&self, p : P) -> PathBuf { self.out.join(p.as_ref()) }
fn source_item<P : AsRef<Path>>(&self, p : P) -> PathBuf { self.source.join(p.as_ref()) }
fn source_pattern(&self, lang : Language, suffix : &str) -> PathBuf {
let fname = format!("hyph-{}.{}.txt", lang.code(), suffix);
self.source_item("patterns").join(fname)
}
fn place_dict(&self, lang : Language, suffix : &str) -> PathBuf {
self.place_item("dictionaries")
.join(Self::dict_name(lang, suffix))
}
fn dict_name(lang : Language, suffix : &str) -> String {
format!("{}.{}.bincode", lang.code(), suffix)
}
}
trait Build: Sized + Parse + TryFromIterator<<Self as Parse>::Tally> {
fn suffix() -> &'static str;
fn sourcepath(lang : Language, paths : &Paths) -> PathBuf {
paths.source_pattern(lang, Self::suffix())
}
fn build(lang : Language, paths : &Paths) -> Result<Self, Error> {
let file = File::open(Self::sourcepath(lang, paths))?;
let by_line = io::BufReader::new(file).lines();
let pairs = by_line.map(|res| Self::pair(&res.unwrap(), normalize));
Self::try_from_iter(pairs.into_iter())
}
}
impl Build for Patterns {
fn suffix() -> &'static str { "pat" }
}
impl Build for Exceptions {
fn suffix() -> &'static str { "hyp" }
}
impl Build for ext::Patterns {
fn suffix() -> &'static str { "ext" }
}
fn write<T>(item : &T, path : &Path) -> Result<(), Error>
where T : ser::Serialize
{
let mut buffer = File::create(&path).map(io::BufWriter::new)?;
bin::serialize_into(&mut buffer, item)?;
Ok(())
}
fn copy_dir(from : &Path, to : &Path) -> Result<(), Error> {
for entry in fs::read_dir(from)? {
entry.and_then(|e| fs::copy(e.path(), to.join(e.file_name())))?;
}
Ok(())
}
fn main() {
let dict_folder = Path::new("dictionaries");
let _std_out = "standard";
let _ext_out = "extended";
let paths = Paths::new().unwrap();
let _dict_source = paths.source_item(dict_folder);
let dict_out = paths.place_item(dict_folder);
let _ext_langs = vec![Catalan, Hungarian];
let _std_langs = vec![Afrikaans,
Albanian,
Armenian,
Assamese,
Basque,
Belarusian,
Bengali,
Bulgarian,
Catalan,
Chinese,
Coptic,
Croatian,
Czech,
Danish,
Dutch,
EnglishGB,
EnglishUS,
Esperanto,
Estonian,
Ethiopic,
Finnish,
FinnishScholastic,
French,
Friulan,
Galician,
Georgian,
German1901,
German1996,
GermanSwiss,
GreekAncient,
GreekMono,
GreekPoly,
Gujarati,
Hindi,
Hungarian,
Icelandic,
Indonesian,
Interlingua,
Irish,
Italian,
Kannada,
Kurmanji,
Latin,
LatinClassic,
LatinLiturgical,
Latvian,
Lithuanian,
Macedonian,
Malayalam,
Marathi,
Mongolian,
NorwegianBokmal,
NorwegianNynorsk,
Occitan,
Oriya,
Pali,
Panjabi,
Piedmontese,
Polish,
Portuguese,
Romanian,
Romansh,
Russian,
Sanskrit,
SerbianCyrillic,
SerbocroatianCyrillic,
SerbocroatianLatin,
SlavonicChurch,
Slovak,
Slovenian,
Spanish,
Swedish,
Tamil,
Telugu,
Thai,
Turkish,
Turkmen,
Ukrainian,
Uppersorbian,
Welsh];
fs::create_dir_all(&dict_out).unwrap();
#[cfg(not(any(feature = "build_dictionaries",
feature = "nfc",
feature = "nfd",
feature = "nfkc",
feature = "nfkd")))]
{
copy_dir(_dict_source.as_path(), dict_out.as_path()).unwrap();
}
#[cfg(any(feature = "build_dictionaries",
feature = "nfc",
feature = "nfd",
feature = "nfkc",
feature = "nfkd"))]
{
println!("Building `Standard` dictionaries:");
for &language in _std_langs.iter() {
println!(" - {:?}", language);
let builder =
Builder { language,
patterns : Patterns::build(language, &paths).unwrap(),
exceptions:
Exceptions::build(language, &paths).unwrap_or(Exceptions::default()) };
let dict = Standard::from(builder);
write(&dict, &paths.place_dict(language, _std_out)).unwrap();
}
println!("Building `Extended` dictionaries:");
for &language in _ext_langs.iter() {
println!(" - {:?}", language);
let builder = ext::Builder { language,
patterns:
ext::Patterns::build(language, &paths).unwrap(),
exceptions : ext::Exceptions::default() };
let dict = ext::Extended::from(builder);
write(&dict, &paths.place_dict(language, _ext_out)).unwrap();
}
}
#[cfg(all(feature = "embed_en-us", not(feature = "embed_all")))]
{
use std::iter;
let dict = (&dict_folder, Paths::dict_name(EnglishUS, _std_out));
pocket_resources::package(iter::once(&dict)).unwrap();
}
#[cfg(feature = "embed_all")]
{
let std_p = _std_langs.iter()
.map(|&l| (&dict_folder, Paths::dict_name(l, _std_out)));
let ext_p = _ext_langs.iter()
.map(|&l| (&dict_folder, Paths::dict_name(l, _ext_out)));
let all_paths : Vec<_> = std_p.chain(ext_p).collect();
pocket_resources::package(all_paths.iter()).unwrap();
}
println!("cargo:rerun-if-changed=build.rs");
}
#[derive(Debug)]
pub enum Error {
Build(fst::Error),
Env(env::VarError),
IO(io::Error),
Serialization(bin::Error),
Resource, }
impl error::Error for Error {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match *self {
Error::Build(ref e) => Some(e),
Error::Env(ref e) => Some(e),
Error::IO(ref e) => Some(e),
Error::Serialization(ref e) => Some(e),
_ => None,
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Build(ref e) => e.fmt(f),
Error::Env(ref e) => e.fmt(f),
Error::IO(ref e) => e.fmt(f),
Error::Serialization(ref e) => e.fmt(f),
Error::Resource => f.write_str("dictionary could not be embedded"),
}
}
}
impl From<io::Error> for Error {
fn from(err : io::Error) -> Error { Error::IO(err) }
}
impl From<env::VarError> for Error {
fn from(err : env::VarError) -> Error { Error::Env(err) }
}
impl From<bin::Error> for Error {
fn from(err : bin::Error) -> Error { Error::Serialization(err) }
}
impl From<fst::Error> for Error {
fn from(err : fst::Error) -> Error { Error::Build(err) }
}
impl From<dictionary::Error> for Error {
fn from(err : dictionary::Error) -> Error { Error::Build(err.0) }
}