unic_normal/
decompose.rs

1// Copyright 2012-2015 The Rust Project Developers.
2// Copyright 2017 The UNIC Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12use std::collections::VecDeque;
13use std::fmt::{self, Write};
14
15use unic_ucd_normal::{decompose_canonical, decompose_compatible, CanonicalCombiningClass};
16
17// Helper functions used for Unicode normalization
18fn canonical_sort(comb: &mut VecDeque<(char, CanonicalCombiningClass)>) {
19    let len = comb.len();
20    for i in 0..len {
21        let mut swapped = false;
22        for j in 1..len - i {
23            let class_a = comb[j - 1].1;
24            let class_b = comb[j].1;
25            if class_a.is_reordered() && class_b.is_reordered() && class_a > class_b {
26                comb.swap(j - 1, j);
27                swapped = true;
28            }
29        }
30        if !swapped {
31            break;
32        }
33    }
34}
35
36#[derive(Clone, Debug)]
37enum DecompositionType {
38    Canonical,
39    Compatible,
40}
41
42/// External iterator for a string decomposition's characters.
43#[derive(Clone, Debug)]
44pub struct Decompositions<I> {
45    kind: DecompositionType,
46    iter: I,
47    buffer: VecDeque<(char, CanonicalCombiningClass)>,
48    sorted: bool,
49}
50
51#[inline]
52pub fn new_canonical<I: Iterator<Item = char>>(iter: I) -> Decompositions<I> {
53    Decompositions {
54        iter,
55        buffer: VecDeque::new(),
56        sorted: false,
57        kind: self::DecompositionType::Canonical,
58    }
59}
60
61#[inline]
62pub fn new_compatible<I: Iterator<Item = char>>(iter: I) -> Decompositions<I> {
63    Decompositions {
64        iter,
65        buffer: VecDeque::new(),
66        sorted: false,
67        kind: self::DecompositionType::Compatible,
68    }
69}
70
71impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
72    type Item = char;
73
74    #[inline]
75    fn next(&mut self) -> Option<char> {
76        use self::DecompositionType::*;
77
78        match self.buffer.front() {
79            Some(&(c, CanonicalCombiningClass::NotReordered)) => {
80                self.sorted = false;
81                self.buffer.pop_front();
82                return Some(c);
83            }
84            Some(&(c, _)) if self.sorted => {
85                self.buffer.pop_front();
86                return Some(c);
87            }
88            _ => self.sorted = false,
89        }
90
91        if !self.sorted {
92            for ch in self.iter.by_ref() {
93                let buffer = &mut self.buffer;
94                let sorted = &mut self.sorted;
95                {
96                    let callback = |d| {
97                        let ccc = CanonicalCombiningClass::of(d);
98                        if ccc.is_not_reordered() && !*sorted {
99                            canonical_sort(buffer);
100                            *sorted = true;
101                        }
102                        buffer.push_back((d, ccc));
103                    };
104                    match self.kind {
105                        Canonical => decompose_canonical(ch, callback),
106                        Compatible => decompose_compatible(ch, callback),
107                    }
108                }
109                if *sorted {
110                    break;
111                }
112            }
113        }
114
115        if !self.sorted {
116            canonical_sort(&mut self.buffer);
117            self.sorted = true;
118        }
119
120        if self.buffer.is_empty() {
121            None
122        } else {
123            match self.buffer.pop_front() {
124                Some((c, CanonicalCombiningClass::NotReordered)) => {
125                    self.sorted = false;
126                    Some(c)
127                }
128                Some((c, _)) => Some(c),
129                None => unreachable!(),
130            }
131        }
132    }
133
134    fn size_hint(&self) -> (usize, Option<usize>) {
135        let (lower, _) = self.iter.size_hint();
136        (lower, None)
137    }
138}
139
140impl<I: Iterator<Item = char> + Clone> fmt::Display for Decompositions<I> {
141    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142        for c in self.clone() {
143            f.write_char(c)?;
144        }
145        Ok(())
146    }
147}