extern crate serde;
#[macro_use]
extern crate serde_derive;
use std::borrow::Cow;
mod snowball;
use snowball::SnowballEnv;
use snowball::algorithms;
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
pub enum Algorithm {
Arabic,
Danish,
Dutch,
English,
Finnish,
French,
German,
Greek,
Hungarian,
Italian,
Norwegian,
Portuguese,
Romanian,
Russian,
Spanish,
Swedish,
Tamil,
Turkish
}
pub struct Stemmer {
stemmer: fn(&mut SnowballEnv) -> bool,
}
impl Stemmer {
pub fn create(lang: Algorithm) -> Self {
match lang {
Algorithm::Arabic => Stemmer { stemmer: algorithms::arabic::stem },
Algorithm::Danish => Stemmer { stemmer: algorithms::danish::stem },
Algorithm::Dutch => Stemmer { stemmer: algorithms::dutch::stem },
Algorithm::English => Stemmer { stemmer: algorithms::english::stem },
Algorithm::Finnish => Stemmer { stemmer: algorithms::finnish::stem },
Algorithm::French => Stemmer { stemmer: algorithms::french::stem },
Algorithm::German => Stemmer { stemmer: algorithms::german::stem },
Algorithm::Greek => Stemmer { stemmer: algorithms::greek::stem },
Algorithm::Hungarian => Stemmer { stemmer: algorithms::hungarian::stem },
Algorithm::Italian => Stemmer { stemmer: algorithms::italian::stem },
Algorithm::Norwegian => Stemmer { stemmer: algorithms::norwegian::stem },
Algorithm::Portuguese => Stemmer { stemmer: algorithms::portuguese::stem },
Algorithm::Romanian => Stemmer { stemmer: algorithms::romanian::stem },
Algorithm::Russian => Stemmer { stemmer: algorithms::russian::stem },
Algorithm::Spanish => Stemmer { stemmer: algorithms::spanish::stem },
Algorithm::Swedish => Stemmer { stemmer: algorithms::swedish::stem },
Algorithm::Tamil => Stemmer { stemmer: algorithms::tamil::stem },
Algorithm::Turkish => Stemmer { stemmer: algorithms::turkish::stem },
}
}
pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
let mut env = SnowballEnv::create(input);
(self.stemmer)(&mut env);
env.get_current()
}
}
#[cfg(test)]
mod tests {
use super::{Stemmer, Algorithm};
fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
}
#[test]
fn german_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::German);
}
}
#[test]
fn english_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::English);
}
}
#[test]
fn french_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::French);
}
}
#[test]
fn spanish_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Spanish);
}
}
#[test]
fn portuguese_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Portuguese);
}
}
#[test]
fn italian_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Italian);
}
}
#[test]
fn romanian_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Romanian);
}
}
#[test]
fn russian_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Russian);
}
}
#[test]
fn arabic_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Arabic);
}
}
#[test]
fn finnish_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_fi.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_fi.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Finnish);
}
}
#[test]
fn greek_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_el.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_el.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Greek);
}
}
#[test]
fn norwegian_test() {
use std::fs;
use std::io;
use std::io::BufRead;
let vocab = io::BufReader::new(fs::File::open("test_data/voc_no.txt").unwrap());
let result = io::BufReader::new(fs::File::open("test_data/res_no.txt").unwrap());
let lines = vocab.lines().zip(result.lines());
for (voc, res) in lines {
stemms_to(voc.unwrap().as_str(),
res.unwrap().as_str(),
Algorithm::Norwegian);
}
}
}