unic_normal/
recompose.rs

1// Copyright 2012-2015 The Rust Project Developers.
2// Copyright 2017 The UNIC Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12use std::collections::VecDeque;
13use std::fmt::{self, Write};
14
15use unic_ucd_normal::{compose, CanonicalCombiningClass};
16
17use crate::decompose::Decompositions;
18
19#[derive(Clone, Debug)]
20enum RecompositionState {
21    Composing,
22    Purging,
23    Finished,
24}
25
26/// External iterator for a string recomposition's characters.
27#[derive(Clone, Debug)]
28pub struct Recompositions<I> {
29    iter: Decompositions<I>,
30    state: RecompositionState,
31    buffer: VecDeque<char>,
32    composee: Option<char>,
33    last_ccc: Option<CanonicalCombiningClass>,
34}
35
36#[inline]
37pub fn new_canonical<I: Iterator<Item = char>>(iter: I) -> Recompositions<I> {
38    Recompositions {
39        iter: super::decompose::new_canonical(iter),
40        state: self::RecompositionState::Composing,
41        buffer: VecDeque::new(),
42        composee: None,
43        last_ccc: None,
44    }
45}
46
47#[inline]
48pub fn new_compatible<I: Iterator<Item = char>>(iter: I) -> Recompositions<I> {
49    Recompositions {
50        iter: super::decompose::new_compatible(iter),
51        state: self::RecompositionState::Composing,
52        buffer: VecDeque::new(),
53        composee: None,
54        last_ccc: None,
55    }
56}
57
58impl<I: Iterator<Item = char>> Iterator for Recompositions<I> {
59    type Item = char;
60
61    #[inline]
62    fn next(&mut self) -> Option<char> {
63        use self::RecompositionState::*;
64
65        loop {
66            match self.state {
67                Composing => {
68                    for ch in self.iter.by_ref() {
69                        let ch_ccc = CanonicalCombiningClass::of(ch);
70                        if self.composee.is_none() {
71                            if ch_ccc.is_reordered() {
72                                return Some(ch);
73                            }
74                            self.composee = Some(ch);
75                            continue;
76                        }
77                        let k = self.composee.unwrap();
78
79                        match self.last_ccc {
80                            None => match compose(k, ch) {
81                                Some(r) => {
82                                    self.composee = Some(r);
83                                    continue;
84                                }
85                                None => {
86                                    if ch_ccc.is_not_reordered() {
87                                        self.composee = Some(ch);
88                                        return Some(k);
89                                    }
90                                    self.buffer.push_back(ch);
91                                    self.last_ccc = Some(ch_ccc);
92                                }
93                            },
94                            Some(last_ccc) => {
95                                if last_ccc >= ch_ccc {
96                                    // `ch` is blocked from `composee`
97                                    if ch_ccc.is_not_reordered() {
98                                        self.composee = Some(ch);
99                                        self.last_ccc = None;
100                                        self.state = Purging;
101                                        return Some(k);
102                                    }
103                                    self.buffer.push_back(ch);
104                                    self.last_ccc = Some(ch_ccc);
105                                    continue;
106                                }
107                                match compose(k, ch) {
108                                    Some(r) => {
109                                        self.composee = Some(r);
110                                        continue;
111                                    }
112                                    None => {
113                                        self.buffer.push_back(ch);
114                                        self.last_ccc = Some(ch_ccc);
115                                    }
116                                }
117                            }
118                        }
119                    }
120                    self.state = Finished;
121                    if self.composee.is_some() {
122                        return self.composee.take();
123                    }
124                }
125                Purging => match self.buffer.pop_front() {
126                    None => self.state = Composing,
127                    s => return s,
128                },
129                Finished => match self.buffer.pop_front() {
130                    None => return self.composee.take(),
131                    s => return s,
132                },
133            }
134        }
135    }
136}
137
138impl<I: Iterator<Item = char> + Clone> fmt::Display for Recompositions<I> {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        for c in self.clone() {
141            f.write_char(c)?;
142        }
143        Ok(())
144    }
145}