wezterm_bidi/
lib.rs

1use level::MAX_DEPTH;
2use level_stack::{LevelStack, Override};
3use log::trace;
4use std::borrow::Cow;
5use std::ops::Range;
6use wezterm_dynamic::{FromDynamic, ToDynamic};
7
8mod bidi_brackets;
9mod bidi_class;
10mod direction;
11mod level;
12mod level_stack;
13
14use bidi_brackets::BracketType;
15pub use bidi_class::BidiClass;
16pub use direction::Direction;
17pub use level::Level;
18
19/// Placeholder codepoint index that corresponds to NO_LEVEL
20const DELETED: usize = usize::max_value();
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, FromDynamic, ToDynamic)]
23pub enum ParagraphDirectionHint {
24    LeftToRight,
25    RightToLeft,
26    /// Attempt to auto-detect but fall back to LTR
27    AutoLeftToRight,
28    /// Attempt to auto-detect but fall back to RTL
29    AutoRightToLeft,
30}
31
32impl Default for ParagraphDirectionHint {
33    fn default() -> Self {
34        Self::LeftToRight
35    }
36}
37
38impl ParagraphDirectionHint {
39    /// Returns just the direction portion of the hint, independent
40    /// of the auto-detection state.
41    pub fn direction(self) -> Direction {
42        match self {
43            ParagraphDirectionHint::AutoLeftToRight | ParagraphDirectionHint::LeftToRight => {
44                Direction::LeftToRight
45            }
46            ParagraphDirectionHint::AutoRightToLeft | ParagraphDirectionHint::RightToLeft => {
47                Direction::RightToLeft
48            }
49        }
50    }
51}
52
53#[derive(Debug, Default)]
54pub struct BidiContext {
55    orig_char_types: Vec<BidiClass>,
56    char_types: Vec<BidiClass>,
57    levels: Vec<Level>,
58    base_level: Level,
59    runs: Vec<Run>,
60    reorder_nsm: bool,
61}
62
63/// Represents a formatting character that has been removed by the X9 rule
64pub const NO_LEVEL: i8 = -1;
65
66/// A `BidiRun` represents a run which is a contiguous sequence of codepoints
67/// from the original paragraph that have been resolved to the same embedding
68/// level, and that thus all have the same direction.
69///
70/// The `range` field encapsulates the starting and ending codepoint indices
71/// into the original paragraph.
72///
73/// Note: while the run sequence has the same level throughout, the X9 portion
74/// of the bidi algorithm can logically delete some control characters.
75/// I haven't been able to prove to myself that those control characters
76/// never manifest in the middle of a run, so it is recommended that you use the `indices`
77/// method to skip over any such elements if your shaper doesn't want them.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub struct BidiRun {
80    /// The direction for this run.  Derived from the level.
81    pub direction: Direction,
82
83    /// Embedding level of this run.
84    pub level: Level,
85
86    /// The starting and ending codepoint indices for this run
87    pub range: Range<usize>,
88
89    /// the list of control codepoint indices that were removed from the text
90    /// by the X9 portion of the bidi algorithm.
91    // Expected to have low cardinality and be generally empty, so we're
92    // using a simple vec for this.
93    pub removed_by_x9: Vec<usize>,
94}
95
96impl BidiRun {
97    pub fn indices<'a>(&'a self) -> impl Iterator<Item = usize> + 'a {
98        struct Iter<'a> {
99            range: Range<usize>,
100            removed_by_x9: &'a [usize],
101        }
102
103        impl<'a> Iterator for Iter<'a> {
104            type Item = usize;
105            fn next(&mut self) -> Option<usize> {
106                for idx in self.range.by_ref() {
107                    if self.removed_by_x9.iter().any(|&i| i == idx) {
108                        // Skip it
109                        continue;
110                    }
111                    return Some(idx);
112                }
113                None
114            }
115        }
116
117        Iter {
118            range: self.range.clone(),
119            removed_by_x9: &self.removed_by_x9,
120        }
121    }
122}
123
124struct RunIter<'a> {
125    pos: usize,
126    levels: Cow<'a, [Level]>,
127    line_range: Range<usize>,
128}
129
130impl<'a> Iterator for RunIter<'a> {
131    type Item = BidiRun;
132
133    fn next(&mut self) -> Option<BidiRun> {
134        loop {
135            if self.pos >= self.levels.len() {
136                return None;
137            }
138
139            let start = self.pos;
140            let len = span_len(start, &self.levels);
141            self.pos += len;
142
143            let level = self.levels[start];
144            if !level.removed_by_x9() {
145                let range = start..start + len;
146
147                let mut removed_by_x9 = vec![];
148                for idx in range.clone() {
149                    if self.levels[idx].removed_by_x9() {
150                        removed_by_x9.push(idx + self.line_range.start);
151                    }
152                }
153
154                return Some(BidiRun {
155                    direction: level.direction(),
156                    level,
157                    range: self.line_range.start + range.start..self.line_range.start + range.end,
158                    removed_by_x9,
159                });
160            }
161        }
162    }
163}
164
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub struct ReorderedRun {
167    /// The direction for this run.  Derived from the level.
168    pub direction: Direction,
169
170    /// Embedding level of this run.
171    pub level: Level,
172
173    /// The starting and ending codepoint indices for this run
174    pub range: Range<usize>,
175
176    /// The indices in their adjusted order
177    pub indices: Vec<usize>,
178}
179
180fn span_len(start: usize, levels: &[Level]) -> usize {
181    let starting_level = levels[start];
182    levels
183        .iter()
184        .skip(start + 1)
185        .position(|&l| l != starting_level)
186        .unwrap_or(levels.len() - (start + 1))
187        + 1
188}
189
190impl BidiContext {
191    pub fn new() -> Self {
192        Self::default()
193    }
194
195    pub fn base_level(&self) -> Level {
196        self.base_level
197    }
198
199    /// When `reorder` is set to true, reordering will apply rule L3 to
200    /// non-spacing marks.  This is likely more desirable for terminal
201    /// based applications than it is for more modern GUI applications
202    /// that feed into eg: harfbuzz.
203    pub fn set_reorder_non_spacing_marks(&mut self, reorder: bool) {
204        self.reorder_nsm = reorder;
205    }
206
207    /// Produces a sequence of `BidiRun` structs that represent runs of
208    /// text and their direction (and level) across the entire paragraph.
209    pub fn runs<'a>(&'a self) -> impl Iterator<Item = BidiRun> + 'a {
210        RunIter {
211            pos: 0,
212            levels: Cow::Borrowed(&self.levels),
213            line_range: 0..self.levels.len(),
214        }
215    }
216
217    /// Given a line_range (a subslice of the current paragraph that represents
218    /// a single wrapped line), this method resets whitespace levels for the line
219    /// boundaries, and then returns the set of runs for that line.
220    pub fn line_runs(&self, line_range: Range<usize>) -> impl Iterator<Item = BidiRun> {
221        let levels = self.reset_whitespace_levels(line_range.clone());
222
223        RunIter {
224            pos: 0,
225            levels: levels.into(),
226            line_range,
227        }
228    }
229
230    pub fn reordered_runs(&self, line_range: Range<usize>) -> Vec<ReorderedRun> {
231        // reorder_line's `level` result includes entries that were
232        // removed_by_x9() but `reordered` does NOT (for compatibility with
233        // the UCD test suite).
234        // We need to account for that when we reorder the levels here!
235        let (levels, reordered) = self.reorder_line(line_range);
236        let mut reordered_levels = vec![Level(NO_LEVEL); reordered.len()];
237
238        for (vis_idx, &log_idx) in reordered.iter().enumerate() {
239            reordered_levels[vis_idx] = levels[log_idx];
240        }
241
242        reordered_levels.retain(|l| !l.removed_by_x9());
243
244        let mut runs = vec![];
245
246        let mut idx = 0;
247        while idx < reordered_levels.len() {
248            let len = span_len(idx, &reordered_levels);
249            let level = reordered_levels[idx];
250            if !level.removed_by_x9() {
251                let idx_range = idx..idx + len;
252                let start = reordered[idx_range.clone()].iter().min().unwrap();
253                let end = reordered[idx_range.clone()].iter().max().unwrap();
254                runs.push(ReorderedRun {
255                    direction: level.direction(),
256                    level,
257                    range: *start..*end + 1,
258                    indices: reordered[idx_range].to_vec(),
259                });
260            }
261            idx += len;
262        }
263
264        runs
265    }
266
267    /// `line_range` indicates a contiguous range of character indices
268    /// in the paragraph set via `resolve_paragraph`.
269    /// This method returns the reordered set of indices for display
270    /// purposes.
271    pub fn reorder_line(&self, line_range: Range<usize>) -> (Vec<Level>, Vec<usize>) {
272        self.dump_state("before L1");
273        let mut levels = self.reset_whitespace_levels(line_range.clone());
274        assert_eq!(levels.len(), line_range.end - line_range.start);
275        let reordered = self.reverse_levels(line_range.start, &mut levels);
276
277        (levels, reordered)
278    }
279
280    /// Performs Rule L3.
281    /// This rule is optional and must be enabled by calling the
282    /// set_reorder_non_spacing_marks method
283    fn reorder_non_spacing_marks(&self, levels: &mut [Level], visual: &mut [usize]) {
284        let mut idx = levels.len() - 1;
285        loop {
286            if idx > 0
287                && !levels[idx].removed_by_x9()
288                && levels[idx].direction() == Direction::RightToLeft
289                && self.orig_char_types[visual[idx]] == BidiClass::NonspacingMark
290            {
291                // Keep scanning backwards within this level
292                let level = levels[idx];
293                let seq_end = idx;
294
295                idx -= 1;
296                while idx > 0 && levels[idx].removed_by_x9()
297                    || (levels[idx] == level
298                        && matches!(
299                            self.orig_char_types[visual[idx]],
300                            BidiClass::LeftToRightEmbedding
301                                | BidiClass::RightToLeftEmbedding
302                                | BidiClass::LeftToRightOverride
303                                | BidiClass::RightToLeftOverride
304                                | BidiClass::PopDirectionalFormat
305                                | BidiClass::BoundaryNeutral
306                                | BidiClass::NonspacingMark
307                        ))
308                {
309                    idx -= 1;
310                }
311
312                if levels[idx] != level {
313                    idx += 1;
314                }
315
316                if seq_end > idx {
317                    visual[idx..=seq_end].reverse();
318                    levels[idx..=seq_end].reverse();
319                }
320            }
321
322            if idx == 0 {
323                return;
324            }
325            idx -= 1;
326        }
327    }
328
329    /// This function runs Rule L2.
330    ///
331    /// Find the highest level among the resolved levels.
332    /// Then from that highest level down to the lowest odd
333    /// level, reverse any contiguous runs at that level or higher.
334    fn reverse_levels(&self, first_cidx: usize, levels: &mut [Level]) -> Vec<usize> {
335        // Not typed as Level because the Step trait required by the loop
336        // below is nightly only
337        let mut highest_level = 0;
338        let mut lowest_odd_level = MAX_DEPTH as i8 + 1;
339        let mut no_levels = true;
340
341        for &level in levels.iter() {
342            if level.removed_by_x9() {
343                continue;
344            }
345
346            // Found something other than NO_LEVEL
347            no_levels = false;
348            highest_level = highest_level.max(level.0);
349            if level.0 % 2 == 1 && level.0 < lowest_odd_level {
350                lowest_odd_level = level.0;
351            }
352        }
353
354        if no_levels {
355            return vec![];
356        }
357
358        // Initial visual order
359        let mut visual = vec![];
360        for i in 0..levels.len() {
361            if levels[i].removed_by_x9() {
362                visual.push(DELETED);
363            } else {
364                visual.push(i + first_cidx);
365            }
366        }
367
368        // Apply L3. UAX9 has this occur after L2, but we do it
369        // before that for consistency with FriBidi's implementation.
370        if self.reorder_nsm {
371            self.reorder_non_spacing_marks(levels, &mut visual);
372        }
373
374        // Apply L2.
375        for level in (lowest_odd_level..=highest_level).rev() {
376            let level = Level(level);
377            let mut i = 0;
378            let mut in_range = false;
379            let mut significant_range = false;
380            let mut first_pos = None;
381            let mut last_pos = None;
382
383            while i < levels.len() {
384                if levels[i] >= level {
385                    if !in_range {
386                        in_range = true;
387                        first_pos.replace(i);
388                    } else {
389                        // Hit a second explicit level
390                        significant_range = true;
391                        last_pos.replace(i);
392                    }
393                } else if levels[i].removed_by_x9() {
394                    // Don't break ranges for deleted controls
395                    if in_range {
396                        last_pos.replace(i);
397                    }
398                } else {
399                    // End of a range.  Reset the range flag
400                    // and rever the range.
401                    in_range = false;
402                    match (last_pos, first_pos, significant_range) {
403                        (Some(last_pos), Some(first_pos), true) if last_pos > first_pos => {
404                            visual[first_pos..=last_pos].reverse();
405                        }
406                        _ => {}
407                    }
408                    first_pos = None;
409                    last_pos = None;
410                }
411                i += 1;
412            }
413
414            if in_range && significant_range {
415                match (last_pos, first_pos) {
416                    (Some(last_pos), Some(first_pos)) if last_pos > first_pos => {
417                        visual[first_pos..=last_pos].reverse();
418                    }
419                    _ => {}
420                }
421            }
422        }
423
424        visual.retain(|&i| i != DELETED);
425        visual
426    }
427
428    /// <http://unicode.org/reports/tr9/>
429    pub fn resolve_paragraph(&mut self, paragraph: &[char], hint: ParagraphDirectionHint) {
430        self.populate_char_types(paragraph);
431        self.resolve(hint, paragraph);
432    }
433
434    /// BD1: The bidirectional character types are values assigned to each
435    /// Unicode character, including unassigned characters
436    fn populate_char_types(&mut self, paragraph: &[char]) {
437        self.orig_char_types.clear();
438        self.orig_char_types.reserve(paragraph.len());
439        self.orig_char_types
440            .extend(paragraph.iter().map(|&c| bidi_class_for_char(c)));
441    }
442
443    pub fn set_char_types(&mut self, char_types: &[BidiClass], hint: ParagraphDirectionHint) {
444        self.orig_char_types.clear();
445        self.orig_char_types.extend(char_types);
446        self.resolve(hint, &[]);
447    }
448
449    fn resolve(&mut self, hint: ParagraphDirectionHint, paragraph: &[char]) {
450        trace!("\n**** resolve \n");
451        self.char_types.clear();
452        self.char_types.extend(self.orig_char_types.iter());
453
454        self.base_level = match hint {
455            ParagraphDirectionHint::LeftToRight => Level(0),
456            ParagraphDirectionHint::RightToLeft => Level(1),
457            ParagraphDirectionHint::AutoLeftToRight => {
458                paragraph_level(&self.char_types, false, Direction::LeftToRight)
459            }
460            ParagraphDirectionHint::AutoRightToLeft => {
461                paragraph_level(&self.char_types, false, Direction::RightToLeft)
462            }
463        };
464
465        self.dump_state("before X1-X8");
466        self.explicit_embedding_levels();
467        self.dump_state("before X9");
468        self.delete_format_characters();
469        self.dump_state("after X9");
470        self.identify_runs();
471        let iso_runs = self.identify_isolating_run_sequences();
472
473        self.dump_state("before W1");
474        self.resolve_combining_marks(&iso_runs); // W1
475        self.dump_state("before W2");
476        self.resolve_european_numbers(&iso_runs); // W2
477        self.dump_state("before W3");
478        self.resolve_arabic_letters(&iso_runs); // W3
479        self.dump_state("before W4");
480        self.resolve_separators(&iso_runs); // W4
481        self.dump_state("before W5");
482        self.resolve_terminators(&iso_runs); // W5
483        self.dump_state("before W6");
484        self.resolve_es_cs_et(&iso_runs); // W6
485        self.dump_state("before W7");
486        self.resolve_en(&iso_runs); // W7
487
488        self.dump_state("before N0");
489        self.resolve_paired_brackets(&iso_runs, paragraph); // N0
490
491        self.dump_state("before N1");
492        self.resolve_neutrals_by_context(&iso_runs); // N1
493        self.dump_state("before N2");
494        self.resolve_neutrals_by_level(&iso_runs); // N2
495
496        self.dump_state("before I1, I2");
497        self.resolve_implicit_levels();
498    }
499
500    fn dump_state(&self, label: &str) {
501        trace!("State: {}", label);
502        trace!("BidiClass: {:?}", self.char_types);
503        trace!("Levels: {:?}", self.levels);
504        trace!("");
505    }
506
507    /// This is the method for Rule W1.
508    ///
509    /// Resolve combining marks for a single text chain.
510    ///
511    /// For each character in the text chain, examine its
512    /// Bidi_Class. For characters of bc=NSM, change the Bidi_Class
513    /// value to that of the preceding character. Formatting characters
514    /// (Bidi_Class RLE, LRE, RLO, LRO, PDF) and boundary neutral (Bidi_Class BN)
515    /// are skipped over in this calculation, because they have been
516    /// "deleted" by Rule X9.
517    ///
518    /// If a bc=NSM character occurs at the start of a text chain, it is given
519    /// the Bidi_Class of sot (either R or L).
520    fn resolve_combining_marks(&mut self, iso_runs: &[IsolatingRunSequence]) {
521        for iso_run in iso_runs {
522            let mut prior_bc = iso_run.sos;
523            for &idx in &iso_run.indices {
524                if self.char_types[idx] == BidiClass::NonspacingMark {
525                    self.char_types[idx] = prior_bc;
526                } else if !self.levels[idx].removed_by_x9() {
527                    prior_bc = self.char_types[idx];
528                }
529            }
530        }
531    }
532
533    /// This is the method for Rule W2.
534    ///
535    /// Resolve European numbers for a single text chain.
536    ///
537    /// For each character in the text chain, examine its
538    /// Bidi_Class. For characters of bc=EN, scan back to find the first
539    /// character of strong type (or sot). If the strong type is bc=AL,
540    /// change the Bidi_Class EN to AN. Formatting characters
541    /// (Bidi_Class RLE, LRE, RLO, LRO, PDF) and boundary neutral (Bidi_Class BN)
542    /// are skipped over in this calculation, because they have been
543    /// "deleted" by Rule X9.
544    fn resolve_european_numbers(&mut self, iso_runs: &[IsolatingRunSequence]) {
545        for iso_run in iso_runs {
546            for (ridx, &cidx) in iso_run.indices.iter().enumerate() {
547                if self.char_types[cidx] == BidiClass::EuropeanNumber {
548                    // Scan backwards to find the first strong type
549                    let mut first_strong_bc = iso_run.sos;
550
551                    if ridx > 0 {
552                        for &pidx in iso_run.indices.get(0..ridx).unwrap().iter().rev() {
553                            match self.char_types[pidx] {
554                                bc @ BidiClass::LeftToRight
555                                | bc @ BidiClass::RightToLeft
556                                | bc @ BidiClass::ArabicLetter => {
557                                    first_strong_bc = bc;
558                                    break;
559                                }
560                                _ => {}
561                            }
562                        }
563                    }
564
565                    // Check if the first strong type is AL. If so
566                    // reset this EN to AN.
567                    if first_strong_bc == BidiClass::ArabicLetter {
568                        self.char_types[cidx] = BidiClass::ArabicNumber;
569                    }
570                }
571            }
572        }
573    }
574
575    /// This is the method for Rule W3.
576    ///
577    /// Resolve Bidi_Class=AL for a single text chain.
578    ///
579    /// For each character in the text chain, examine its
580    /// Bidi_Class. For characters of bc=AL, change the Bidi_Class
581    /// value to R.
582    fn resolve_arabic_letters(&mut self, iso_runs: &[IsolatingRunSequence]) {
583        for iso_run in iso_runs {
584            for &idx in &iso_run.indices {
585                if self.char_types[idx] == BidiClass::ArabicLetter {
586                    self.char_types[idx] = BidiClass::RightToLeft;
587                }
588            }
589        }
590    }
591
592    /// Look back ahead of `index_idx` and return true if the
593    /// bidi class == bc.  However, skip backwards over entries
594    /// that were removed by X9; they will have NO_LEVEL.
595    /// Returns the char index of the match.
596    fn is_prior_context(
597        &self,
598        index_idx: usize,
599        indices: &[usize],
600        bc: BidiClass,
601    ) -> Option<usize> {
602        if index_idx == 0 {
603            return None;
604        }
605        for &idx in indices[0..index_idx].iter().rev() {
606            if self.char_types[idx] == bc {
607                return Some(idx);
608            }
609            if !self.levels[idx].removed_by_x9() {
610                break;
611            }
612        }
613        None
614    }
615
616    /// Look ahead of `index_idx` and return true if the
617    /// bidi class == bc.  However, skip over entries
618    /// that were removed by X9; they will have NO_LEVEL.
619    /// Returns the char index of the match.
620    fn is_following_context(
621        &self,
622        index_idx: usize,
623        indices: &[usize],
624        bc: BidiClass,
625    ) -> Option<usize> {
626        for &idx in &indices[index_idx + 1..] {
627            if self.char_types[idx] == bc {
628                return Some(idx);
629            }
630            if !self.levels[idx].removed_by_x9() {
631                break;
632            }
633        }
634        None
635    }
636
637    fn is_in_context(&self, index_idx: usize, indices: &[usize], bc: BidiClass) -> bool {
638        self.is_prior_context(index_idx, indices, bc).is_some()
639            && self.is_following_context(index_idx, indices, bc).is_some()
640    }
641
642    /// This is the method for Rule W4.
643    ///
644    /// Resolve Bidi_Class=ES and CS for a single text chain.
645    ///
646    /// For each character in the text chain, examine its
647    /// Bidi_Class.
648    ///
649    /// For characters of bc=ES, check if they are *between* EN.
650    /// If so, change their Bidi_Class to EN.
651    ///
652    /// For characters of bc=CS, check if they are *between* EN
653    /// or between AN. If so, change their Bidi_Class to match.
654    ///
655    fn resolve_separators(&mut self, iso_runs: &[IsolatingRunSequence]) {
656        for iso_run in iso_runs {
657            for (index_idx, &idx) in iso_run.indices.iter().enumerate() {
658                if self.char_types[idx] == BidiClass::EuropeanSeparator {
659                    if self.is_in_context(index_idx, &iso_run.indices, BidiClass::EuropeanNumber) {
660                        self.char_types[idx] = BidiClass::EuropeanNumber;
661                    }
662                } else if self.char_types[idx] == BidiClass::CommonSeparator {
663                    if self.is_in_context(index_idx, &iso_run.indices, BidiClass::EuropeanNumber) {
664                        self.char_types[idx] = BidiClass::EuropeanNumber;
665                    } else if self.is_in_context(
666                        index_idx,
667                        &iso_run.indices,
668                        BidiClass::ArabicNumber,
669                    ) {
670                        self.char_types[idx] = BidiClass::ArabicNumber;
671                    }
672                }
673            }
674        }
675    }
676
677    /// This is the method for Rule W5.
678    ///
679    /// Resolve Bidi_Class=ET for a single text chain.
680    ///
681    /// For each character in the text chain, examine its
682    /// Bidi_Class.
683    ///
684    /// For characters of bc=ET, check if they are *next to* EN.
685    /// If so, change their Bidi_Class to EN. This includes
686    /// ET on either side of EN, so the context on both sides
687    /// needs to be checked.
688    ///
689    /// Because this rule applies to indefinite sequences of ET,
690    /// and because the context which triggers any change is
691    /// adjacency to EN, the strategy taken here is to seek for
692    /// EN first. If found, scan backwards, changing any eligible
693    /// ET to EN. Then scan forwards, changing any eligible ET
694    /// to EN. Then continue the search from the point of the
695    /// last ET changed (if any).
696    ///
697    fn resolve_terminators(&mut self, iso_runs: &[IsolatingRunSequence]) {
698        for iso_run in iso_runs {
699            for (index_idx, &idx) in iso_run.indices.iter().enumerate() {
700                if self.char_types[idx] == BidiClass::EuropeanNumber {
701                    for &prior_idx in iso_run.indices[0..index_idx].iter().rev() {
702                        if self.char_types[prior_idx] == BidiClass::EuropeanTerminator {
703                            self.char_types[prior_idx] = BidiClass::EuropeanNumber;
704                        } else if !self.levels[prior_idx].removed_by_x9() {
705                            break;
706                        }
707                    }
708                    for &next_idx in &iso_run.indices[index_idx + 1..] {
709                        if self.char_types[next_idx] == BidiClass::EuropeanTerminator {
710                            self.char_types[next_idx] = BidiClass::EuropeanNumber;
711                        } else if !self.levels[next_idx].removed_by_x9() {
712                            break;
713                        }
714                    }
715                }
716            }
717        }
718    }
719
720    /// This is the method for Rule W6.
721    ///
722    /// Resolve remaining Bidi_Class=ES, CS, or ET for a single text chain.
723    ///
724    /// For each character in the text chain, examine its
725    /// Bidi_Class. For characters of bc=ES, bc=CS, or bc=ET, change
726    /// the Bidi_Class value to ON. This resolves any remaining
727    /// separators or terminators which were not already processed
728    /// by Rules W4 and W5.
729    fn resolve_es_cs_et(&mut self, iso_runs: &[IsolatingRunSequence]) {
730        for iso_run in iso_runs {
731            for &idx in &iso_run.indices {
732                match self.char_types[idx] {
733                    BidiClass::EuropeanSeparator
734                    | BidiClass::CommonSeparator
735                    | BidiClass::EuropeanTerminator => {
736                        self.char_types[idx] = BidiClass::OtherNeutral;
737                    }
738                    _ => {}
739                }
740            }
741        }
742    }
743
744    /// This is the method for Rule W7.
745    ///
746    /// Resolve Bidi_Class=EN for a single level text chain.
747    ///
748    /// Process the text chain in reverse order. For each character in the text chain, examine its
749    /// Bidi_Class. For characters of bc=EN, scan back to find the first strong
750    /// directional type. If that type is L, change the Bidi_Class
751    /// value of the number to L.
752    fn resolve_en(&mut self, iso_runs: &[IsolatingRunSequence]) {
753        for iso_run in iso_runs {
754            for (ridx, &cidx) in iso_run.indices.iter().enumerate().rev() {
755                if self.char_types[cidx] == BidiClass::EuropeanNumber {
756                    // Scan backwards to find the first strong type
757                    let mut first_strong_bc = iso_run.sos;
758
759                    if ridx > 0 {
760                        for &pidx in iso_run.indices.get(0..ridx).unwrap().iter().rev() {
761                            match self.char_types[pidx] {
762                                bc @ BidiClass::LeftToRight | bc @ BidiClass::RightToLeft => {
763                                    first_strong_bc = bc;
764                                    break;
765                                }
766                                _ => {}
767                            }
768                        }
769                    }
770
771                    if first_strong_bc == BidiClass::LeftToRight {
772                        self.char_types[cidx] = BidiClass::LeftToRight;
773                    }
774                }
775            }
776        }
777    }
778
779    /// This is the method for Rule N0. (New in UBA63)
780    /// Resolve paired brackets for a single text chain.
781    ///
782    /// For each character in the text chain, examine its
783    /// Bidi_Class. For any character with the bpt value open or close,
784    /// scan its context seeking a matching paired bracket. If found,
785    /// resolve the type of both brackets to match the embedding
786    /// direction.
787    ///
788    /// For UBA63 (and unchanged in UBA70), the error handling for
789    /// a stack overflow was unspecified for this rule.
790    ///
791    /// Starting with UBA80, the exact stack size is specified (63),
792    /// and the specification declares that if a stack overflow
793    /// condition is encountered, the BD16 processing for this
794    /// particular isolating run ceases immediately. This condition
795    /// does not treated as a fatal error, however, so the rule
796    /// should not return an error code here, which would stop
797    /// all processing for *all* runs of the input string.
798    fn resolve_paired_brackets(&mut self, iso_runs: &[IsolatingRunSequence], paragraph: &[char]) {
799        if paragraph.is_empty() {
800            // BidiTest cases don't populate the paragraph, but they
801            // also don't contain any bracket related tests either,
802            // so we have nothing to do here.
803            return;
804        }
805
806        let mut stack = BracketStack::new();
807        for iso_run in iso_runs {
808            stack.clear();
809            for (ridx, &cidx) in iso_run.indices.iter().enumerate() {
810                if let Some((closing_bracket, bpt)) = lookup_closing(paragraph[cidx]) {
811                    trace!("ridx={} cidx={} {:?} bracket", ridx, cidx, paragraph[cidx]);
812                    if self.char_types[cidx] == BidiClass::OtherNeutral {
813                        if bpt == BracketType::Open {
814                            trace!("push open ridx={}", ridx);
815                            if !stack.push(closing_bracket, ridx) {
816                                // Stack overflow: halt processing
817                                return;
818                            }
819                        } else {
820                            // a closing bracket
821                            trace!("close at ridx={}, search for opener", ridx);
822                            stack.seek_matching_open_bracket(paragraph[cidx], ridx);
823                        }
824                    }
825                }
826            }
827
828            if stack.pairs.is_empty() {
829                // The pairList pointer will still be NULL if no paired brackets
830                // were found. In this case, no further processing is necessary.
831                continue;
832            }
833
834            // Because of the way the stack
835            // processing works, the pairs may not be in the best order
836            // in the pair list for further processing. Sort them
837            // by position order of the opening bracket.
838            stack.pairs.sort_unstable_by_key(|p| p.opening_pos);
839            trace!("\nPairs: {:?}", stack.pairs);
840
841            for pair in &stack.pairs {
842                // Now for each pair, we have the first and last position
843                // of the substring in this isolating run sequence
844                // enclosed by those brackets (inclusive
845                // of the brackets). Resolve that individual pair.
846                self.resolve_one_pair(pair, &iso_run);
847            }
848        }
849    }
850
851    /// Set the Bidi_Class of a bracket pair, based on the
852    /// direction determined by the N0 rule processing in
853    /// br_ResolveOnePair().
854    ///
855    /// The direction passed in will either be BIDI_R or BIDI_L.
856    ///
857    /// This setting is abstracted in a function here, rather than
858    /// simply being done inline, because of
859    /// an edge case added to rule N0 as of UBA80. For UBA63 (and
860    /// UBA70), no special handling of combining marks following
861    /// either of the brackets is done. However, starting with UBA80,
862    /// there is an edge case fix-up done which echoes the processing
863    /// of rule W1. The text run needs to be scanned to find any
864    /// combining marks (orig_bc=NSM) following a bracket which has
865    /// its Bidi_Class changed by N0. Then those combining marks
866    /// can again be adjusted to match the Bidi_Class of the
867    /// bracket they apply to. This is an odd edge case, as combining
868    /// marks do not typically occur with brackets, but the UBA80
869    /// specification is now explicit about requiring this fix-up
870    /// to be done.
871    fn set_bracket_pair_bc(
872        pair: &Pair,
873        indices: &[usize],
874        direction: Direction,
875        char_types: &mut [BidiClass],
876        orig_char_types: &[BidiClass],
877        levels: &[Level],
878    ) {
879        let opening_pos = indices[pair.opening_pos];
880        let closing_pos = indices[pair.closing_pos];
881        let bc = match direction {
882            Direction::LeftToRight => BidiClass::LeftToRight,
883            Direction::RightToLeft => BidiClass::RightToLeft,
884        };
885        trace!(
886            "set_bracket_pair_bc index={} from {:?} -> {:?}",
887            opening_pos,
888            char_types[opening_pos],
889            bc
890        );
891        trace!(
892            "set_bracket_pair_bc index={} from {:?} -> {:?}",
893            closing_pos,
894            char_types[closing_pos],
895            bc
896        );
897        char_types[opening_pos] = bc;
898        char_types[closing_pos] = bc;
899
900        // Here is the tricky part.
901        //
902        // First scan from the opening bracket for any subsequent
903        // character whose *original* Bidi_Class was NSM, and set
904        // the current bc for it to direction also, to match the bracket.
905        // Break out of the loop at the first character with any other
906        // original Bidi_Class, so that this change only impacts
907        // actual combining mark sequences.
908        //
909        // 2020-03-27 note: This scanning for original combining marks
910        // must also scan past any intervening NO_LEVEL characters,
911        // typically bc=BN characters removed earlier by rule X9.
912        // Such sequences may, for example involve a ZWJ or ZWNJ,
913        // or in bizarre edge cases involve other bc=BN characters
914        // such as ZWSP. The latter would be defective combining character
915        // sequences, but also need to be handled here.
916        //
917        // Then repeat the process for the matching closing bracket.
918        //
919        // The processing for the opening bracket is bounded to the
920        // right by the position of the matching closing bracket.
921        // The processing for the closing bracket is bounded to the
922        // right by the end of the text run.
923        for &cidx in &indices[pair.opening_pos + 1..pair.closing_pos] {
924            if orig_char_types[cidx] == BidiClass::NonspacingMark {
925                char_types[cidx] = bc;
926            } else if !levels[cidx].removed_by_x9() {
927                break;
928            }
929        }
930        for &cidx in &indices[pair.closing_pos + 1..] {
931            if orig_char_types[cidx] == BidiClass::NonspacingMark {
932                char_types[cidx] = bc;
933            } else if !levels[cidx].removed_by_x9() {
934                break;
935            }
936        }
937    }
938
939    /// Resolve the embedding levels of one pair of matched brackets.
940    ///
941    /// This determination is based on the embedding direction.
942    /// See BD3 in the UBA specification.
943    ///
944    /// If embedding level is even, embedding direction = L.
945    /// If embedding level is odd,  embedding direction = R.
946    fn resolve_one_pair(&mut self, pair: &Pair, iso_run: &IsolatingRunSequence) {
947        let embedding_direction = iso_run.level.direction();
948        let opposite_direction = embedding_direction.opposite();
949
950        let mut strong_type_found = false;
951        // Next check for a strong type (R or L)
952        // between the matched brackets. If a strong type is found
953        // which matches the embedding direction, then set the type of both
954        // brackets to match the embedding direction, too.
955        if pair.opening_pos < pair.closing_pos.saturating_sub(1) {
956            trace!("pair: {:?}", pair);
957            for &cidx in &iso_run.indices[pair.opening_pos + 1..pair.closing_pos] {
958                let direction = match self.char_types[cidx] {
959                    BidiClass::RightToLeft
960                    | BidiClass::EuropeanNumber
961                    | BidiClass::ArabicNumber => Some(Direction::RightToLeft),
962                    BidiClass::LeftToRight => Some(Direction::LeftToRight),
963                    _ => None,
964                };
965
966                if direction == Some(embedding_direction) {
967                    // N0 step b
968                    trace!("Strong direction e between brackets");
969                    Self::set_bracket_pair_bc(
970                        pair,
971                        &iso_run.indices,
972                        embedding_direction,
973                        &mut self.char_types,
974                        &self.orig_char_types,
975                        &self.levels,
976                    );
977                    return;
978                } else if direction == Some(opposite_direction) {
979                    strong_type_found = true;
980                }
981            }
982        }
983
984        if strong_type_found {
985            // First attempt to resolve direction by checking the prior context for
986            // a strong type matching the opposite direction. N0 Step c1.
987            if (opposite_direction == Direction::LeftToRight
988                && self.is_prior_context_left(pair.opening_pos, &iso_run.indices, iso_run.sos))
989                || (opposite_direction == Direction::RightToLeft
990                    && self.is_prior_context_right(pair.opening_pos, &iso_run.indices, iso_run.sos))
991            {
992                Self::set_bracket_pair_bc(
993                    pair,
994                    &iso_run.indices,
995                    opposite_direction,
996                    &mut self.char_types,
997                    &self.orig_char_types,
998                    &self.levels,
999                );
1000                return;
1001            } else {
1002                // No strong type matching the oppositedirection was found either
1003                // before or after these brackets in this text chain. Resolve the
1004                // brackets based on the embedding direction. N0 Step c2.
1005                Self::set_bracket_pair_bc(
1006                    pair,
1007                    &iso_run.indices,
1008                    embedding_direction,
1009                    &mut self.char_types,
1010                    &self.orig_char_types,
1011                    &self.levels,
1012                );
1013                return;
1014            }
1015        } else {
1016            // No strong type was found between the brackets. Leave
1017            // the brackets with unresolved direction.
1018        }
1019    }
1020
1021    /// This is the method for Rule N1.
1022    ///
1023    /// Resolve neutrals by context for a single text chain.
1024    ///
1025    /// For each character in the text chain, examine its
1026    /// Bidi_Class. For any character of neutral type, examine its
1027    /// context.
1028    ///
1029    /// L N L --> L L L
1030    /// R N R --> R R R [note that AN and EN count as R for this rule]
1031    ///
1032    /// Here "N" stands for "any sequence of neutrals", so the neutral
1033    /// does not have to be immediately adjacent to a strong type
1034    /// to be resolved this way.
1035    fn resolve_neutrals_by_context(&mut self, iso_runs: &[IsolatingRunSequence]) {
1036        for iso_run in iso_runs {
1037            for (ridx, &cidx) in iso_run.indices.iter().enumerate().rev() {
1038                if !self.char_types[cidx].is_neutral() {
1039                    continue;
1040                }
1041
1042                if self.is_prior_context_left(ridx, &iso_run.indices, iso_run.sos)
1043                    && self.is_following_context_left(ridx, &iso_run.indices, iso_run.eos)
1044                {
1045                    trace!(
1046                        "ridx={} cidx={} was {:?}, setting to LeftToRight",
1047                        ridx,
1048                        cidx,
1049                        self.char_types[cidx]
1050                    );
1051                    self.char_types[cidx] = BidiClass::LeftToRight;
1052                } else if self.is_prior_context_right(ridx, &iso_run.indices, iso_run.sos)
1053                    && self.is_following_context_right(ridx, &iso_run.indices, iso_run.eos)
1054                {
1055                    trace!(
1056                        "ridx={} cidx={} was {:?}, setting to RightToLeft",
1057                        ridx,
1058                        cidx,
1059                        self.char_types[cidx]
1060                    );
1061                    self.char_types[cidx] = BidiClass::RightToLeft;
1062                }
1063            }
1064        }
1065    }
1066
1067    /// Scan backwards in a text chain, checking if the first non-neutral character
1068    /// is an "L" type.  Skip over any "deleted" controls, which have NO_LEVEL,
1069    /// as well as any neutral types.
1070    fn is_prior_context_left(&self, index_idx: usize, indices: &[usize], sot: BidiClass) -> bool {
1071        if index_idx == 0 {
1072            trace!(
1073                "is_prior_context_left: short circuit because index_idx=0. sot is {:?}",
1074                sot
1075            );
1076            return sot == BidiClass::LeftToRight;
1077        }
1078        for &idx in indices[0..index_idx].iter().rev() {
1079            trace!(
1080                "is_prior_context_left considering idx={} {:?}",
1081                idx,
1082                self.char_types[idx]
1083            );
1084            if self.char_types[idx] == BidiClass::LeftToRight {
1085                return true;
1086            }
1087            if self.levels[idx].removed_by_x9() {
1088                continue;
1089            }
1090            if self.char_types[idx].is_neutral() {
1091                continue;
1092            }
1093            return false;
1094        }
1095        sot == BidiClass::LeftToRight
1096    }
1097
1098    /// Scan forwards in a text chain, checking if the first non-neutral character is an "L" type.
1099    /// Skip over any "deleted" controls, which have NO_LEVEL, as well as any neutral types.
1100    fn is_following_context_left(
1101        &self,
1102        index_idx: usize,
1103        indices: &[usize],
1104        eot: BidiClass,
1105    ) -> bool {
1106        trace!(
1107            "is_following_context_left index_idx={} vs. len {}",
1108            index_idx,
1109            indices.len()
1110        );
1111        for &idx in &indices[index_idx + 1..] {
1112            if self.char_types[idx] == BidiClass::LeftToRight {
1113                trace!("is_following_context_left true because idx={} is left", idx);
1114                return true;
1115            }
1116            if self.levels[idx].removed_by_x9() {
1117                continue;
1118            }
1119            if self.char_types[idx].is_neutral() {
1120                continue;
1121            }
1122            return false;
1123        }
1124        trace!(
1125            "is_following_context_left fall through to bottom, check against eot={:?}",
1126            eot
1127        );
1128        eot == BidiClass::LeftToRight
1129    }
1130
1131    /// Used by Rule N1.
1132    ///
1133    /// Scan backwards in a text chain, checking if the first non-neutral character is an "R" type.
1134    /// (BIDI_R, BIDI_AN, BIDI_EN) Skip over any "deleted" controls, which
1135    /// have NO_LEVEL, as well as any neutral types.
1136    fn is_prior_context_right(&self, index_idx: usize, indices: &[usize], sot: BidiClass) -> bool {
1137        if index_idx == 0 {
1138            return sot == BidiClass::RightToLeft;
1139        }
1140        for &idx in indices[0..index_idx].iter().rev() {
1141            match self.char_types[idx] {
1142                BidiClass::RightToLeft | BidiClass::ArabicNumber | BidiClass::EuropeanNumber => {
1143                    return true;
1144                }
1145                _ => {}
1146            }
1147            if self.levels[idx].removed_by_x9() {
1148                continue;
1149            }
1150            if self.char_types[idx].is_neutral() {
1151                continue;
1152            }
1153            return false;
1154        }
1155        sot == BidiClass::RightToLeft
1156    }
1157
1158    fn is_following_context_right(
1159        &self,
1160        index_idx: usize,
1161        indices: &[usize],
1162        eot: BidiClass,
1163    ) -> bool {
1164        for &idx in &indices[index_idx + 1..] {
1165            match self.char_types[idx] {
1166                BidiClass::RightToLeft | BidiClass::ArabicNumber | BidiClass::EuropeanNumber => {
1167                    return true;
1168                }
1169                _ => {}
1170            }
1171            if self.levels[idx].removed_by_x9() {
1172                continue;
1173            }
1174            if self.char_types[idx].is_neutral() {
1175                continue;
1176            }
1177            return false;
1178        }
1179        eot == BidiClass::RightToLeft
1180    }
1181
1182    /// This is the method for Rule N2.
1183    ///
1184    /// Resolve neutrals by level for a single text chain.
1185    ///
1186    /// For each character in the text chain, examine its
1187    /// Bidi_Class. For any character of neutral type, examine its
1188    /// embedding level and resolve accordingly.
1189    ///
1190    /// N --> e
1191    /// where e = L for an even level, R for an odd level
1192    fn resolve_neutrals_by_level(&mut self, iso_runs: &[IsolatingRunSequence]) {
1193        for iso_run in iso_runs {
1194            for &cidx in iso_run.indices.iter().rev() {
1195                if self.char_types[cidx].is_neutral() {
1196                    self.char_types[cidx] = self.levels[cidx].as_bidi_class();
1197                }
1198            }
1199        }
1200    }
1201
1202    /// This function runs Rules I1 and I2 together.
1203    fn resolve_implicit_levels(&mut self) {
1204        for (idx, level) in self.levels.iter_mut().enumerate() {
1205            if level.removed_by_x9() {
1206                continue;
1207            }
1208
1209            match level.direction() {
1210                Direction::LeftToRight => {
1211                    // I1
1212                    match self.char_types[idx] {
1213                        BidiClass::RightToLeft => {
1214                            level.0 += 1;
1215                        }
1216                        BidiClass::ArabicNumber | BidiClass::EuropeanNumber => {
1217                            level.0 += 2;
1218                        }
1219                        _ => {}
1220                    }
1221                }
1222                Direction::RightToLeft => {
1223                    // I2
1224                    match self.char_types[idx] {
1225                        BidiClass::LeftToRight
1226                        | BidiClass::ArabicNumber
1227                        | BidiClass::EuropeanNumber => {
1228                            level.0 += 1;
1229                        }
1230                        _ => {}
1231                    }
1232                }
1233            }
1234        }
1235    }
1236
1237    /// This function runs Rule L1.
1238    ///
1239    /// The strategy here for Rule L1 is to scan forward through
1240    /// the text searching for segment separators or paragraph
1241    /// separators. If a segment separator or paragraph
1242    /// separator is found, it is reset to the paragraph embedding
1243    /// level. Then scan backwards from the separator to
1244    /// find any contiguous stretch of whitespace characters
1245    /// and reset any which are found to the paragraph embedding
1246    /// level, as well. When we reach the *last* character in the
1247    /// text (which will also constitute, by definition, the last
1248    /// character in the line being processed here), check if it
1249    /// is whitespace. If so, reset it to the paragraph embedding
1250    /// level. Then scan backwards to find any contiguous stretch
1251    /// of whitespace characters and reset those as well.
1252    ///
1253    /// These checks for whitespace are done with the *original*
1254    /// Bidi_Class values for characters, not the resolved values.
1255    ///
1256    /// As for many rules, this rule simply ignores any character
1257    /// whose level has been set to NO_LEVEL, which is the way
1258    /// this reference algorithm "deletes" boundary neutrals and
1259    /// embedding and override controls from the text.
1260    fn reset_whitespace_levels(&self, line_range: Range<usize>) -> Vec<Level> {
1261        fn reset_contiguous_whitespace_before(
1262            line_range: Range<usize>,
1263            base_level: Level,
1264            orig_char_types: &[BidiClass],
1265            levels: &mut Vec<Level>,
1266        ) {
1267            for i in line_range.rev() {
1268                if orig_char_types[i] == BidiClass::WhiteSpace
1269                    || orig_char_types[i].is_iso_control()
1270                {
1271                    levels[i] = base_level;
1272                } else if levels[i].removed_by_x9() {
1273                    // Skip over deleted entries
1274                } else {
1275                    // end of contiguous section
1276                    break;
1277                }
1278            }
1279        }
1280
1281        let mut levels = self.levels.clone();
1282
1283        for (idx, orig_bc) in self
1284            .orig_char_types
1285            .iter()
1286            .enumerate()
1287            .skip(line_range.start)
1288            .take(line_range.end - line_range.start)
1289        {
1290            match orig_bc {
1291                // Explicit boundary
1292                BidiClass::SegmentSeparator | BidiClass::ParagraphSeparator => {
1293                    levels[idx] = self.base_level;
1294                    reset_contiguous_whitespace_before(
1295                        line_range.start..idx,
1296                        self.base_level,
1297                        &self.orig_char_types,
1298                        &mut levels,
1299                    );
1300                }
1301                _ => {}
1302            }
1303        }
1304
1305        reset_contiguous_whitespace_before(
1306            line_range.clone(),
1307            self.base_level,
1308            &self.orig_char_types,
1309            &mut levels,
1310        );
1311
1312        levels[line_range].to_vec()
1313    }
1314
1315    /// Rules X1 through X8
1316    fn explicit_embedding_levels(&mut self) {
1317        // X1: initialize stack and other variables
1318        let mut stack = LevelStack::new();
1319        stack.push(self.base_level, Override::Neutral, false);
1320
1321        let len = self.char_types.len();
1322        self.levels.resize(len, Level::default());
1323
1324        let mut overflow_isolate = 0;
1325        let mut overflow_embedding = 0;
1326        let mut valid_isolate = 0;
1327
1328        // X2..X8: process each character, setting embedding levels
1329        // and override status
1330        for idx in 0..len {
1331            let bc = self.char_types[idx];
1332            trace!("Considering idx={} {:?}", idx, bc);
1333            match bc {
1334                // X2
1335                BidiClass::RightToLeftEmbedding => {
1336                    if let Some(level) = stack.embedding_level().least_greater_odd() {
1337                        if overflow_isolate == 0 && overflow_embedding == 0 {
1338                            stack.push(level, Override::Neutral, false);
1339                            continue;
1340                        }
1341                    }
1342                    if overflow_isolate == 0 {
1343                        overflow_embedding += 1;
1344                    }
1345                }
1346                // X3
1347                BidiClass::LeftToRightEmbedding => {
1348                    if let Some(level) = stack.embedding_level().least_greater_even() {
1349                        if overflow_isolate == 0 && overflow_embedding == 0 {
1350                            stack.push(level, Override::Neutral, false);
1351                            continue;
1352                        }
1353                    }
1354                    if overflow_isolate == 0 {
1355                        overflow_embedding += 1;
1356                    }
1357                }
1358                // X4
1359                BidiClass::RightToLeftOverride => {
1360                    if let Some(level) = stack.embedding_level().least_greater_odd() {
1361                        if overflow_isolate == 0 && overflow_embedding == 0 {
1362                            stack.push(level, Override::RTL, false);
1363                            continue;
1364                        }
1365                    }
1366                    if overflow_isolate == 0 {
1367                        overflow_embedding += 1;
1368                    }
1369                }
1370                // X5
1371                BidiClass::LeftToRightOverride => {
1372                    if let Some(level) = stack.embedding_level().least_greater_even() {
1373                        if overflow_isolate == 0 && overflow_embedding == 0 {
1374                            stack.push(level, Override::LTR, false);
1375                            continue;
1376                        }
1377                    }
1378                    if overflow_isolate == 0 {
1379                        overflow_embedding += 1;
1380                    }
1381                }
1382                // X5a
1383                BidiClass::RightToLeftIsolate => {
1384                    self.levels[idx] = stack.embedding_level();
1385                    stack.apply_override(&mut self.char_types[idx]);
1386                    if let Some(level) = stack.embedding_level().least_greater_odd() {
1387                        if overflow_isolate == 0 && overflow_embedding == 0 {
1388                            valid_isolate += 1;
1389                            stack.push(level, Override::Neutral, true);
1390                            continue;
1391                        }
1392                    }
1393                    overflow_isolate += 1;
1394                }
1395                // X5b
1396                BidiClass::LeftToRightIsolate => {
1397                    self.levels[idx] = stack.embedding_level();
1398                    stack.apply_override(&mut self.char_types[idx]);
1399                    if let Some(level) = stack.embedding_level().least_greater_even() {
1400                        if overflow_isolate == 0 && overflow_embedding == 0 {
1401                            valid_isolate += 1;
1402                            stack.push(level, Override::Neutral, true);
1403                            continue;
1404                        }
1405                    }
1406                    overflow_isolate += 1;
1407                }
1408                // X5c
1409                BidiClass::FirstStrongIsolate => {
1410                    let level =
1411                        paragraph_level(&self.char_types[idx + 1..], true, Direction::LeftToRight);
1412                    self.levels[idx] = stack.embedding_level();
1413                    stack.apply_override(&mut self.char_types[idx]);
1414                    let level = if level.0 == 1 {
1415                        stack.embedding_level().least_greater_odd()
1416                    } else {
1417                        stack.embedding_level().least_greater_even()
1418                    };
1419                    trace!(
1420                        "picked {:?} based on current stack level {:?}",
1421                        level,
1422                        stack.embedding_level()
1423                    );
1424
1425                    if let Some(level) = level {
1426                        if overflow_isolate == 0 && overflow_embedding == 0 {
1427                            valid_isolate += 1;
1428                            stack.push(level, Override::Neutral, true);
1429                            continue;
1430                        }
1431                    }
1432                    overflow_isolate += 1;
1433                }
1434                // X6a
1435                BidiClass::PopDirectionalIsolate => {
1436                    if overflow_isolate > 0 {
1437                        overflow_isolate -= 1;
1438                    } else if valid_isolate == 0 {
1439                        // Do nothing
1440                    } else {
1441                        overflow_embedding = 0;
1442                        loop {
1443                            if stack.isolate_status() {
1444                                break;
1445                            }
1446                            stack.pop();
1447                        }
1448                        stack.pop();
1449                        valid_isolate -= 1;
1450                    }
1451
1452                    self.levels[idx] = stack.embedding_level();
1453                    stack.apply_override(&mut self.char_types[idx]);
1454                }
1455                // X7
1456                BidiClass::PopDirectionalFormat => {
1457                    if overflow_isolate > 0 {
1458                        // Do nothing
1459                    } else if overflow_embedding > 0 {
1460                        overflow_embedding -= 1;
1461                    } else {
1462                        if !stack.isolate_status() {
1463                            if stack.depth() >= 2 {
1464                                stack.pop();
1465                            }
1466                        }
1467                    }
1468                }
1469                BidiClass::BoundaryNeutral => {}
1470                // X8
1471                BidiClass::ParagraphSeparator => {
1472                    // Terminates all embedding contexts.
1473                    // Should only ever be the last character in
1474                    // a paragraph if present at all.
1475                    self.levels[idx] = self.base_level;
1476                }
1477                // X6
1478                _ => {
1479                    self.levels[idx] = stack.embedding_level();
1480                    stack.apply_override(&mut self.char_types[idx]);
1481                }
1482            }
1483        }
1484    }
1485
1486    /// X9
1487    fn delete_format_characters(&mut self) {
1488        for (bc, level) in self.char_types.iter().zip(&mut self.levels) {
1489            match bc {
1490                BidiClass::RightToLeftEmbedding
1491                | BidiClass::LeftToRightEmbedding
1492                | BidiClass::RightToLeftOverride
1493                | BidiClass::LeftToRightOverride
1494                | BidiClass::PopDirectionalFormat
1495                | BidiClass::BoundaryNeutral => {
1496                    *level = Level(NO_LEVEL);
1497                }
1498                _ => {}
1499            }
1500        }
1501    }
1502
1503    /// X10
1504    fn identify_runs(&mut self) {
1505        let mut idx = 0;
1506        let len = self.char_types.len();
1507        self.runs.clear();
1508
1509        while idx < len {
1510            let (span_level, span_len) = span_one_run(&self.char_types, &self.levels, idx);
1511            if !span_level.removed_by_x9() {
1512                self.runs.push(Run {
1513                    start: idx,
1514                    end: idx + span_len,
1515                    len: span_len,
1516                    seq_id: 0,
1517                    level: span_level,
1518                    sor: BidiClass::OtherNeutral,
1519                    eor: BidiClass::OtherNeutral,
1520                });
1521            }
1522
1523            assert!(span_len > 0);
1524            idx += span_len;
1525        }
1526
1527        self.calculate_sor_eor();
1528
1529        trace!("\nRuns: {:#?}", self.runs);
1530    }
1531
1532    fn calculate_sor_eor(&mut self) {
1533        let mut prior_run_level = self.base_level;
1534        let mut iter = self.runs.iter_mut().peekable();
1535        while let Some(run) = iter.next() {
1536            let next_run_level = match iter.peek() {
1537                Some(next) => next.level,
1538                None => self.base_level,
1539            };
1540
1541            // Set sor based on the higher of the prior_run_level and the current level.
1542            run.sor = prior_run_level.max(run.level).as_bidi_class();
1543            run.eor = next_run_level.max(run.level).as_bidi_class();
1544
1545            prior_run_level = run.level;
1546        }
1547    }
1548
1549    /// This function applies only to UBA63. Once the embedding
1550    /// levels are identified, UBA63 requires further processing
1551    /// to assign each of the level runs to an isolating run sequence.
1552    ///
1553    /// Each level run must be uniquely assigned to exactly one
1554    /// isolating run sequence. Each isolating run sequence must
1555    /// have at least one level run, but may have more.
1556    ///
1557    /// The exact details on how to match up isolating run sequences
1558    /// with level runs are specified in BD13.
1559    ///
1560    /// The strategy taken here is to scan the level runs in order.
1561    ///
1562    /// If a level run is not yet assigned to an isolating run sequence,
1563    /// its seqID will be zero. Create a new isolating run sequence
1564    /// and add this level run to it.
1565    ///
1566    /// If the last BIDIUNIT of *this* level run is an isolate
1567    /// initiator (LRI/RLI/FSI), then scan ahead in the list of
1568    /// level runs seeking the next level run which meets the
1569    /// following criteria:
1570    ///   1. seqID = 0 (not yet assigned to an isolating run sequence)
1571    ///   2. its level matches the level we are processing
1572    ///   3. the first BIDIUNIT is a PDI
1573    /// If all those conditions are met, assign that next level run
1574    /// to this isolating run sequence (set its seqID, and append to
1575    /// the list).
1576    ///
1577    /// Repeat until we hit a level run that doesn't terminate with
1578    /// an isolate initiator or we hit the end of the list of level
1579    /// runs.
1580    ///
1581    /// That terminates the definition of the isolating run sequence
1582    /// we are working on. Append it to the list of isolating run
1583    /// sequences in the UBACONTEXT.
1584    ///
1585    /// Then advance to the next level run which has not yet been
1586    /// assigned to an isolating run sequence and repeat the process.
1587    ///
1588    /// Continue until all level runs have been assigned to an
1589    /// isolating run sequence.
1590    fn identify_isolating_run_sequences(&mut self) -> Vec<IsolatingRunSequence> {
1591        let mut seq_id = 0;
1592        let mut iso_runs = vec![];
1593        let num_runs = self.runs.len();
1594
1595        for run_idx in 0..num_runs {
1596            let save_level;
1597
1598            {
1599                let run = &mut self.runs[run_idx];
1600                if run.seq_id != 0 {
1601                    continue;
1602                }
1603                seq_id += 1;
1604                iso_runs.push(Self::new_iso_run_seq(run_idx, run));
1605                run.seq_id = seq_id;
1606
1607                if !self.char_types[run.end - 1].is_iso_init() {
1608                    continue;
1609                }
1610                save_level = run.level;
1611            }
1612
1613            // Look ahead to find the run with the corresponding
1614            // PopDirectionalIsolate
1615            for idx in run_idx + 1..num_runs {
1616                let run = &mut self.runs[idx];
1617                if run.seq_id == 0
1618                    && run.level == save_level
1619                    && run.first_significant_bidi_class(&self.char_types, &self.levels)
1620                        == Some(BidiClass::PopDirectionalIsolate)
1621                {
1622                    // we matched the criteria for adding this run to the sequence.
1623                    let iso_run = iso_runs.last_mut().unwrap();
1624                    iso_run.runs.push(idx);
1625                    iso_run.len += run.len;
1626                    run.seq_id = seq_id;
1627
1628                    // Check if the last char in this run is also an
1629                    // isolate initiator. If not, this sequence is done.
1630                    if !self.char_types[run.end - 1].is_iso_init() {
1631                        break;
1632                    }
1633                }
1634            }
1635        }
1636        self.calculate_sos_eos(&mut iso_runs);
1637        self.build_text_chains(&mut iso_runs);
1638        iso_runs
1639    }
1640
1641    /// In order to simplify later rule processing, assemble the indices
1642    /// of the characters in the isolating runs so that there is just a
1643    /// single list to iterate
1644    fn build_text_chains(&mut self, iso_runs: &mut [IsolatingRunSequence]) {
1645        for iso_run in iso_runs {
1646            for &run_idx in &iso_run.runs {
1647                let run = &self.runs[run_idx];
1648                iso_run.indices.extend(run.start..run.end);
1649            }
1650        }
1651    }
1652
1653    /// Process the isolating run sequence list, calculating sos and eos values for
1654    /// each sequence. Each needs to be set to either L or R.
1655    ///
1656    /// Strategy: Instead of recalculating all the sos and eos values from
1657    /// scratch, as specified in X10, we can take a shortcut here, because
1658    /// we already have sor and eor values assigned to all the level runs.
1659    /// For any isolating run sequence, simply assign sos to the value of
1660    /// sor for the *first* run in that sequence, and assign eos to the
1661    /// value of eor for the *last* run in that sequence. This provides
1662    /// equivalent values, and is more straightforward to implement and
1663    /// understand.
1664    ///
1665    /// This strategy has to be modified for defective isolating run sequences,
1666    /// where the sequence ends with an LRI/RLI/FSI.
1667    /// In those cases the eot needs to be calculated based on
1668    /// the paragraph embedding level, rather than from the level run.
1669    /// Note that this only applies when an isolating run sequence
1670    /// terminating in an LRI/RLI/FSI but with no matching PDI.
1671    /// An example would be:
1672    ///
1673    ///    R  RLI    R
1674    /// <L-----R> <RR>
1675    /// <L------[          <== eot would be L, not R
1676    ///           <RR>
1677    ///
1678    fn calculate_sos_eos(&mut self, iso_runs: &mut [IsolatingRunSequence]) {
1679        for iso_run in iso_runs {
1680            // First inherit the sos and eos values from the
1681            // first and last runs in the sequence.
1682            let first_run_idx = iso_run.runs.first().cloned().expect("at least 1 run");
1683            let last_run_idx = iso_run.runs.last().cloned().expect("at least 1 run");
1684            iso_run.sos = self.runs[first_run_idx].sor;
1685            iso_run.eos = self.runs[last_run_idx].eor;
1686            // Next adjust for the special case when an isolating
1687            // run sequence terminates in an unmatched isolate
1688            // initiator.
1689            if self.char_types[self.runs[last_run_idx].end - 1].is_iso_init() {
1690                let higher_level = self.base_level.max(iso_run.level);
1691                iso_run.eos = higher_level.as_bidi_class();
1692            }
1693        }
1694    }
1695
1696    fn new_iso_run_seq(run_idx: usize, run: &Run) -> IsolatingRunSequence {
1697        let len = run.len;
1698        let level = run.level;
1699        IsolatingRunSequence {
1700            runs: vec![run_idx],
1701            len,
1702            level,
1703            sos: BidiClass::OtherNeutral,
1704            eos: BidiClass::OtherNeutral,
1705            indices: vec![],
1706        }
1707    }
1708}
1709
1710impl BidiClass {
1711    pub fn is_iso_init(self) -> bool {
1712        match self {
1713            BidiClass::RightToLeftIsolate
1714            | BidiClass::LeftToRightIsolate
1715            | BidiClass::FirstStrongIsolate => true,
1716            _ => false,
1717        }
1718    }
1719
1720    pub fn is_iso_control(self) -> bool {
1721        match self {
1722            BidiClass::RightToLeftIsolate
1723            | BidiClass::LeftToRightIsolate
1724            | BidiClass::PopDirectionalIsolate
1725            | BidiClass::FirstStrongIsolate => true,
1726            _ => false,
1727        }
1728    }
1729
1730    pub fn is_neutral(self) -> bool {
1731        match self {
1732            BidiClass::OtherNeutral
1733            | BidiClass::WhiteSpace
1734            | BidiClass::SegmentSeparator
1735            | BidiClass::ParagraphSeparator => true,
1736            _ => self.is_iso_control(),
1737        }
1738    }
1739}
1740
1741#[derive(Debug)]
1742struct Run {
1743    /// char indices for start, end of run
1744    start: usize,
1745    end: usize,
1746    /// length of run
1747    len: usize,
1748    /// isolating run sequence id
1749    seq_id: usize,
1750    /// Embedding level of this run
1751    level: Level,
1752    /// Direction of start of run
1753    sor: BidiClass,
1754    /// Direction of end of run
1755    eor: BidiClass,
1756}
1757
1758impl Run {
1759    fn first_significant_bidi_class(
1760        &self,
1761        types: &[BidiClass],
1762        levels: &[Level],
1763    ) -> Option<BidiClass> {
1764        for idx in self.start..self.end {
1765            if !levels[idx].removed_by_x9() {
1766                return types.get(idx).cloned();
1767            }
1768        }
1769        None
1770    }
1771}
1772
1773#[derive(Debug)]
1774struct IsolatingRunSequence {
1775    /// List of the runs in this sequence. The values are indices
1776    /// into the runs array
1777    runs: Vec<usize>,
1778    /// length of the run
1779    len: usize,
1780    /// Embedding level of this run
1781    level: Level,
1782    /// Direction of start of run
1783    sos: BidiClass,
1784    /// Direction of end of run
1785    eos: BidiClass,
1786    /// The sequence of indices into the original paragraph,
1787    /// across the contained set of runs
1788    indices: Vec<usize>,
1789}
1790
1791/// Starting from `start`, extract the first run containing characters
1792/// all with the same level
1793fn span_one_run(types: &[BidiClass], levels: &[Level], start: usize) -> (Level, usize) {
1794    let mut span_level = Level(NO_LEVEL);
1795    let mut isolate_init_found = false;
1796    let mut span_len = 0;
1797
1798    trace!(
1799        "span_one_run called with types: {:?}, levels: {:?}, start={}",
1800        types,
1801        levels,
1802        start
1803    );
1804
1805    for (idx, (bc, level)) in types
1806        .iter()
1807        .skip(start)
1808        .zip(levels.iter().skip(start))
1809        .enumerate()
1810    {
1811        trace!(
1812            "span_one_run: consider idx={} bc={:?} level={:?}",
1813            idx,
1814            bc,
1815            level
1816        );
1817        if !level.removed_by_x9() {
1818            if bc.is_iso_init() {
1819                isolate_init_found = true;
1820            }
1821            if span_level.removed_by_x9() {
1822                span_level = *level;
1823            } else if *level != span_level {
1824                // End of run
1825                break;
1826            }
1827        }
1828        span_len = idx;
1829        if isolate_init_found {
1830            break;
1831        }
1832    }
1833
1834    (span_level, span_len + 1)
1835}
1836
1837/// 3.3.1 Paragraph level.
1838/// We've been fed a single paragraph, which takes care of rule P1.
1839/// This function implements rules P2 and P3.
1840fn paragraph_level(types: &[BidiClass], respect_pdi: bool, fallback: Direction) -> Level {
1841    let mut isolate_count = 0;
1842    for &t in types {
1843        match t {
1844            BidiClass::RightToLeftIsolate
1845            | BidiClass::LeftToRightIsolate
1846            | BidiClass::FirstStrongIsolate => isolate_count += 1,
1847            BidiClass::PopDirectionalIsolate => {
1848                if isolate_count > 0 {
1849                    isolate_count -= 1;
1850                } else if respect_pdi {
1851                    break;
1852                }
1853            }
1854            BidiClass::LeftToRight if isolate_count == 0 => return Level(0),
1855            BidiClass::RightToLeft | BidiClass::ArabicLetter if isolate_count == 0 => {
1856                return Level(1)
1857            }
1858            _ => {}
1859        }
1860    }
1861    if fallback == Direction::LeftToRight {
1862        Level(0)
1863    } else {
1864        Level(1)
1865    }
1866}
1867
1868struct Pair {
1869    opening_pos: usize,
1870    closing_pos: usize,
1871}
1872
1873impl std::fmt::Debug for Pair {
1874    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
1875        write!(fmt, "Pair{{{},{}}}", self.opening_pos, self.closing_pos)
1876    }
1877}
1878
1879const MAX_PAIRING_DEPTH: usize = 63;
1880struct BracketStack {
1881    closing_bracket: [char; MAX_PAIRING_DEPTH],
1882    position: [usize; MAX_PAIRING_DEPTH],
1883    depth: usize,
1884    pairs: Vec<Pair>,
1885}
1886
1887impl std::fmt::Debug for BracketStack {
1888    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
1889        fmt.debug_struct("BracketStack")
1890            .field("closing_bracket", &&self.closing_bracket[0..self.depth])
1891            .field("position", &&self.position[0..self.depth])
1892            .field("depth", &self.depth)
1893            .field("pairs", &self.pairs)
1894            .finish()
1895    }
1896}
1897
1898impl BracketStack {
1899    pub fn new() -> Self {
1900        Self {
1901            closing_bracket: [' '; MAX_PAIRING_DEPTH],
1902            position: [0; MAX_PAIRING_DEPTH],
1903            depth: 0,
1904            pairs: vec![],
1905        }
1906    }
1907
1908    pub fn clear(&mut self) {
1909        self.pairs.clear();
1910        self.depth = 0;
1911    }
1912
1913    pub fn push(&mut self, closing_bracket: char, pos: usize) -> bool {
1914        let depth = self.depth;
1915        if depth >= MAX_PAIRING_DEPTH {
1916            return false;
1917        }
1918        self.closing_bracket[depth] = closing_bracket;
1919        self.position[depth] = pos;
1920        self.depth += 1;
1921        true
1922    }
1923
1924    /// Seek an opening bracket pair for the closing bracket
1925    /// passed in.
1926    ///
1927    /// This is a stack based search.
1928    /// Start with the top element in the stack and search
1929    /// downwards until we either find a match or reach the
1930    /// bottom of the stack.
1931    ///
1932    /// If we find a match, construct and append the bracket
1933    /// pair to the pairList. Then pop the stack for all the
1934    /// levels down to the level where we found the match.
1935    /// (This approach is designed to discard pairs that
1936    /// are not cleanly nested.)
1937    ///
1938    /// If we search all the way to the bottom of the stack
1939    /// without finding a match, just return without changing
1940    /// state. This represents a closing bracket with no
1941    /// opening bracket to match it. Just discard and move on.
1942    pub fn seek_matching_open_bracket(&mut self, closing_bracket: char, pos: usize) -> bool {
1943        trace!(
1944            "seek_matching_open_bracket: closing_bracket={:?} pos={}\n{:?}",
1945            closing_bracket,
1946            pos,
1947            self
1948        );
1949        for depth in (0..self.depth).rev() {
1950            trace!("seek_matching_open_bracket: consider depth={}", depth);
1951            // The basic test is for the closingcp equal to the bpb value
1952            // stored in the bracketData. But to account for the canonical
1953            // equivalences for U+2329 and U+232A, tack on extra checks here
1954            // for the asymmetrical matches. This hard-coded check avoids
1955            // having to require full normalization of all the bracket code
1956            // points before checking. It is highly unlikely that additional
1957            // canonical singletons for bracket pairs will be added to future
1958            // versions of the UCD.
1959            if self.closing_bracket[depth] == closing_bracket
1960                || (self.closing_bracket[depth] == '\u{232a}' && closing_bracket == '\u{3009}')
1961                || (self.closing_bracket[depth] == '\u{3009}' && closing_bracket == '\u{232a}')
1962            {
1963                self.pairs.push(Pair {
1964                    opening_pos: self.position[depth],
1965                    closing_pos: pos,
1966                });
1967                // Pop back to this depth, pruning out any intermediates;
1968                // they are mismatched brackets
1969                self.depth = depth;
1970                return true;
1971            }
1972        }
1973        false
1974    }
1975}
1976
1977fn lookup_closing(c: char) -> Option<(char, BracketType)> {
1978    use bidi_brackets::BIDI_BRACKETS;
1979    if let Ok(idx) = BIDI_BRACKETS.binary_search_by_key(&c, |&(left, _, _)| left) {
1980        let entry = &BIDI_BRACKETS[idx];
1981        return Some((entry.1, entry.2));
1982    }
1983    None
1984}
1985
1986pub fn bidi_class_for_char(c: char) -> BidiClass {
1987    use std::cmp::Ordering;
1988    if let Ok(idx) = bidi_class::BIDI_CLASS.binary_search_by(|&(lower, upper, _)| {
1989        if c >= lower && c <= upper {
1990            Ordering::Equal
1991        } else if c < lower {
1992            Ordering::Greater
1993        } else if c > upper {
1994            Ordering::Less
1995        } else {
1996            unreachable!()
1997        }
1998    }) {
1999        let entry = &bidi_class::BIDI_CLASS[idx];
2000        if c >= entry.0 && c <= entry.1 {
2001            return entry.2;
2002        }
2003    }
2004    // extracted/DerivedBidiClass.txt says:
2005    // All code points not explicitly listed for Bidi_Class
2006    //  have the value Left_To_Right (L).
2007    BidiClass::LeftToRight
2008}
2009
2010#[cfg(test)]
2011mod tests {
2012    use super::*;
2013    use k9::assert_equal as assert_eq;
2014
2015    #[test]
2016    fn runs() {
2017        let text = vec!['א', 'ב', 'ג', 'a', 'b', 'c'];
2018
2019        let mut context = BidiContext::new();
2020        context.resolve_paragraph(&text, ParagraphDirectionHint::AutoLeftToRight);
2021        k9::snapshot!(
2022            context.runs().collect::<Vec<_>>(),
2023            "
2024[
2025    BidiRun {
2026        direction: RightToLeft,
2027        level: Level(
2028            1,
2029        ),
2030        range: 0..3,
2031        removed_by_x9: [],
2032    },
2033    BidiRun {
2034        direction: LeftToRight,
2035        level: Level(
2036            2,
2037        ),
2038        range: 3..6,
2039        removed_by_x9: [],
2040    },
2041]
2042"
2043        );
2044    }
2045
2046    #[test]
2047    fn mirror() {
2048        assert_eq!(lookup_closing('{'), Some(('}', BracketType::Open)));
2049        assert_eq!(lookup_closing('['), Some((']', BracketType::Open)));
2050        assert_eq!(lookup_closing(']'), Some(('[', BracketType::Close)));
2051    }
2052
2053    #[test]
2054    fn bidi_class_resolve() {
2055        assert_eq!(bidi_class_for_char('\u{0}'), BidiClass::BoundaryNeutral);
2056        assert_eq!(bidi_class_for_char('\u{9}'), BidiClass::SegmentSeparator);
2057        assert_eq!(bidi_class_for_char(' '), BidiClass::WhiteSpace);
2058        assert_eq!(bidi_class_for_char('a'), BidiClass::LeftToRight);
2059        assert_eq!(bidi_class_for_char('\u{590}'), BidiClass::RightToLeft);
2060        assert_eq!(bidi_class_for_char('\u{5d0}'), BidiClass::RightToLeft);
2061        assert_eq!(bidi_class_for_char('\u{5d1}'), BidiClass::RightToLeft);
2062    }
2063
2064    /// This example is taken from
2065    /// <https://terminal-wg.pages.freedesktop.org/bidi/recommendation/combining.html>
2066    #[test]
2067    fn reorder_nsm() {
2068        let shalom: Vec<char> = vec![
2069            '\u{5e9}', '\u{5b8}', '\u{5c1}', '\u{5dc}', '\u{5d5}', '\u{05b9}', '\u{5dd}',
2070        ];
2071        let mut context = BidiContext::new();
2072        context.set_reorder_non_spacing_marks(true);
2073        context.resolve_paragraph(&shalom, ParagraphDirectionHint::LeftToRight);
2074
2075        let mut reordered = vec![];
2076        for run in context.reordered_runs(0..shalom.len()) {
2077            for idx in run.indices {
2078                reordered.push(shalom[idx]);
2079            }
2080        }
2081
2082        let explicit_ltr = vec![
2083            '\u{5dd}', '\u{5d5}', '\u{5b9}', '\u{5dc}', '\u{5e9}', '\u{5b8}', '\u{5c1}',
2084        ];
2085        assert_eq!(reordered, explicit_ltr);
2086    }
2087}