unicode_normalization_alignments/
recompose.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11use decompose::Decompositions;
12use smallvec::SmallVec;
13use std::fmt::{self, Write};
14
15#[derive(Clone)]
16enum RecompositionState {
17    Composing,
18    Purging(usize),
19    Finished(usize),
20}
21
22/// External iterator for a string recomposition's characters.
23#[derive(Clone)]
24pub struct Recompositions<I> {
25    iter: Decompositions<I>,
26    state: RecompositionState,
27    buffer: SmallVec<[(char, isize); 4]>,
28    composee: Option<(char, isize)>,
29    last_ccc: Option<u8>,
30}
31
32#[inline]
33pub fn new_canonical<I: Iterator<Item = char>>(iter: I) -> Recompositions<I> {
34    Recompositions {
35        iter: super::decompose::new_canonical(iter),
36        state: self::RecompositionState::Composing,
37        buffer: SmallVec::new(),
38        composee: None,
39        last_ccc: None,
40    }
41}
42
43#[inline]
44pub fn new_compatible<I: Iterator<Item = char>>(iter: I) -> Recompositions<I> {
45    Recompositions {
46        iter: super::decompose::new_compatible(iter),
47        state: self::RecompositionState::Composing,
48        buffer: SmallVec::new(),
49        composee: None,
50        last_ccc: None,
51    }
52}
53
54impl<I: Iterator<Item = char>> Iterator for Recompositions<I> {
55    type Item = (char, isize);
56
57    #[inline]
58    fn next(&mut self) -> Option<(char, isize)> {
59        use self::RecompositionState::*;
60
61        loop {
62            match self.state {
63                Composing => {
64                    for (ch, change) in self.iter.by_ref() {
65                        let ch_class = super::char::canonical_combining_class(ch);
66                        let k = match self.composee {
67                            None => {
68                                if ch_class != 0 {
69                                    return Some((ch, change));
70                                }
71                                self.composee = Some((ch, change));
72                                continue;
73                            }
74                            Some(k) => k,
75                        };
76                        match self.last_ccc {
77                            None => match super::char::compose(k.0, ch) {
78                                Some(r) => {
79                                    self.composee = Some((r, k.1 + change - 1));
80                                    continue;
81                                }
82                                None => {
83                                    if ch_class == 0 {
84                                        self.composee = Some((ch, change));
85                                        return Some(k);
86                                    }
87                                    self.buffer.push((ch, change));
88                                    self.last_ccc = Some(ch_class);
89                                }
90                            },
91                            Some(l_class) => {
92                                if l_class >= ch_class {
93                                    // `ch` is blocked from `composee`
94                                    if ch_class == 0 {
95                                        self.composee = Some((ch, change));
96                                        self.last_ccc = None;
97                                        self.state = Purging(0);
98                                        return Some(k);
99                                    }
100                                    self.buffer.push((ch, change));
101                                    self.last_ccc = Some(ch_class);
102                                    continue;
103                                }
104                                match super::char::compose(k.0, ch) {
105                                    Some(r) => {
106                                        self.composee = Some((r, k.1 + change - 1));
107                                        continue;
108                                    }
109                                    None => {
110                                        self.buffer.push((ch, change));
111                                        self.last_ccc = Some(ch_class);
112                                    }
113                                }
114                            }
115                        }
116                    }
117                    self.state = Finished(0);
118                    if self.composee.is_some() {
119                        return self.composee.take();
120                    }
121                }
122                Purging(next) => match self.buffer.get(next).cloned() {
123                    None => {
124                        self.buffer.clear();
125                        self.state = Composing;
126                    }
127                    s => {
128                        self.state = Purging(next + 1);
129                        return s;
130                    }
131                },
132                Finished(next) => match self.buffer.get(next).cloned() {
133                    None => {
134                        self.buffer.clear();
135                        return self.composee.take();
136                    }
137                    s => {
138                        self.state = Finished(next + 1);
139                        return s;
140                    }
141                },
142            }
143        }
144    }
145}
146
147impl<I: Iterator<Item = char> + Clone> fmt::Display for Recompositions<I> {
148    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
149        for c in self.clone() {
150            f.write_char(c.0)?;
151        }
152        Ok(())
153    }
154}