read_fonts/tables/gsub/
closure.rs

1//! Computing the closure over a set of glyphs
2//!
3//! This means taking a set of glyphs and updating it to include any other glyphs
4//! reachable from those glyphs via substitution, recursively.
5
6use std::collections::HashSet;
7
8use font_types::GlyphId16;
9
10use crate::{
11    tables::layout::{
12        ChainedSequenceContextFormat1, ChainedSequenceContextFormat2,
13        ChainedSequenceContextFormat3, ExtensionLookup, SequenceContextFormat1,
14        SequenceContextFormat2, SequenceContextFormat3, Subtables,
15    },
16    FontRead, ReadError,
17};
18
19use super::{
20    AlternateSubstFormat1, ChainedSequenceContext, ClassDef, Gsub, LigatureSubstFormat1,
21    MultipleSubstFormat1, ReverseChainSingleSubstFormat1, SequenceContext, SingleSubst,
22    SingleSubstFormat1, SingleSubstFormat2, SubstitutionSubtables,
23};
24
25/// A trait for tables which participate in closure
26pub(crate) trait GlyphClosure {
27    /// Update the set of glyphs with any glyphs reachable via substitution.
28    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError>;
29}
30
31impl Gsub<'_> {
32    /// Return the set of glyphs reachable from the input set via any substitution.
33    pub fn closure_glyphs(
34        &self,
35        mut glyphs: HashSet<GlyphId16>,
36    ) -> Result<HashSet<GlyphId16>, ReadError> {
37        // we need to do this iteratively, since any glyph found in one pass
38        // over the lookups could also be the target of substitutions.
39
40        // we always call this once, and then keep calling if it produces
41        // additional glyphs
42        let mut prev_glyph_count = glyphs.len();
43        self.closure_glyphs_once(&mut glyphs)?;
44        let mut new_glyph_count = glyphs.len();
45
46        while prev_glyph_count != new_glyph_count {
47            prev_glyph_count = new_glyph_count;
48            self.closure_glyphs_once(&mut glyphs)?;
49            new_glyph_count = glyphs.len();
50        }
51
52        Ok(glyphs)
53    }
54
55    fn closure_glyphs_once(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
56        let lookups_to_use = self.find_reachable_lookups(glyphs)?;
57        let lookup_list = self.lookup_list()?;
58        for (i, lookup) in lookup_list.lookups().iter().enumerate() {
59            if !lookups_to_use.contains(&(i as u16)) {
60                continue;
61            }
62            let subtables = lookup?.subtables()?;
63            subtables.add_reachable_glyphs(glyphs)?;
64        }
65        Ok(())
66    }
67
68    fn find_reachable_lookups(
69        &self,
70        glyphs: &HashSet<GlyphId16>,
71    ) -> Result<HashSet<u16>, ReadError> {
72        let feature_list = self.feature_list()?;
73        let lookup_list = self.lookup_list()?;
74        // first we want to get the lookups that are directly referenced by a feature
75        // (including in a feature variation table)
76        let mut lookup_ids = HashSet::with_capacity(lookup_list.lookup_count() as _);
77        let feature_variations = self
78            .feature_variations()
79            .transpose()?
80            .map(|vars| {
81                let data = vars.offset_data();
82                vars.feature_variation_records()
83                    .iter()
84                    .filter_map(move |rec| {
85                        rec.feature_table_substitution(data)
86                            .transpose()
87                            .ok()
88                            .flatten()
89                    })
90                    .flat_map(|subs| {
91                        subs.substitutions()
92                            .iter()
93                            .map(move |sub| sub.alternate_feature(subs.offset_data()))
94                    })
95            })
96            .into_iter()
97            .flatten();
98        for feature in feature_list
99            .feature_records()
100            .iter()
101            .map(|rec| rec.feature(feature_list.offset_data()))
102            .chain(feature_variations)
103        {
104            lookup_ids.extend(feature?.lookup_list_indices().iter().map(|idx| idx.get()));
105        }
106
107        // and now we need to add lookups referenced by contextual lookups,
108        // IFF they are reachable via the current set of glyphs:
109        for lookup in lookup_list.lookups().iter() {
110            let subtables = lookup?.subtables()?;
111            match subtables {
112                SubstitutionSubtables::Contextual(tables) => tables
113                    .iter()
114                    .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)),
115                SubstitutionSubtables::ChainContextual(tables) => tables
116                    .iter()
117                    .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)),
118                _ => Ok(()),
119            }?;
120        }
121        Ok(lookup_ids)
122    }
123}
124
125impl GlyphClosure for SubstitutionSubtables<'_> {
126    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
127        match self {
128            SubstitutionSubtables::Single(tables) => tables.add_reachable_glyphs(glyphs),
129            SubstitutionSubtables::Multiple(tables) => tables.add_reachable_glyphs(glyphs),
130            SubstitutionSubtables::Alternate(tables) => tables.add_reachable_glyphs(glyphs),
131            SubstitutionSubtables::Ligature(tables) => tables.add_reachable_glyphs(glyphs),
132            SubstitutionSubtables::Reverse(tables) => tables.add_reachable_glyphs(glyphs),
133            _ => Ok(()),
134        }
135    }
136}
137
138impl<'a, T: FontRead<'a> + GlyphClosure + 'a, Ext: ExtensionLookup<'a, T> + 'a> GlyphClosure
139    for Subtables<'a, T, Ext>
140{
141    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
142        self.iter()
143            .try_for_each(|t| t?.add_reachable_glyphs(glyphs))
144    }
145}
146
147impl GlyphClosure for SingleSubst<'_> {
148    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
149        for (target, replacement) in self.iter_subs()? {
150            if glyphs.contains(&target) {
151                glyphs.insert(replacement);
152            }
153        }
154        Ok(())
155    }
156}
157
158impl SingleSubst<'_> {
159    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
160        let (left, right) = match self {
161            SingleSubst::Format1(t) => (Some(t.iter_subs()?), None),
162            SingleSubst::Format2(t) => (None, Some(t.iter_subs()?)),
163        };
164        Ok(left
165            .into_iter()
166            .flatten()
167            .chain(right.into_iter().flatten()))
168    }
169}
170
171impl SingleSubstFormat1<'_> {
172    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
173        let delta = self.delta_glyph_id();
174        let coverage = self.coverage()?;
175        Ok(coverage.iter().filter_map(move |gid| {
176            let raw = (gid.to_u16() as i32).checked_add(delta as i32);
177            let raw = raw.and_then(|raw| u16::try_from(raw).ok())?;
178            Some((gid, GlyphId16::new(raw)))
179        }))
180    }
181}
182
183impl SingleSubstFormat2<'_> {
184    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
185        let coverage = self.coverage()?;
186        let subs = self.substitute_glyph_ids();
187        Ok(coverage.iter().zip(subs.iter().map(|id| id.get())))
188    }
189}
190
191impl GlyphClosure for MultipleSubstFormat1<'_> {
192    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
193        let coverage = self.coverage()?;
194        let sequences = self.sequences();
195        for (gid, replacements) in coverage.iter().zip(sequences.iter()) {
196            let replacements = replacements?;
197            if glyphs.contains(&gid) {
198                glyphs.extend(
199                    replacements
200                        .substitute_glyph_ids()
201                        .iter()
202                        .map(|gid| gid.get()),
203                );
204            }
205        }
206        Ok(())
207    }
208}
209
210impl GlyphClosure for AlternateSubstFormat1<'_> {
211    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
212        let coverage = self.coverage()?;
213        let alts = self.alternate_sets();
214        for (gid, alts) in coverage.iter().zip(alts.iter()) {
215            let alts = alts?;
216            if glyphs.contains(&gid) {
217                glyphs.extend(alts.alternate_glyph_ids().iter().map(|gid| gid.get()));
218            }
219        }
220        Ok(())
221    }
222}
223
224impl GlyphClosure for LigatureSubstFormat1<'_> {
225    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
226        let coverage = self.coverage()?;
227        let ligs = self.ligature_sets();
228        for (gid, lig_set) in coverage.iter().zip(ligs.iter()) {
229            let lig_set = lig_set?;
230            if glyphs.contains(&gid) {
231                for lig in lig_set.ligatures().iter() {
232                    let lig = lig?;
233                    if lig
234                        .component_glyph_ids()
235                        .iter()
236                        .all(|gid| glyphs.contains(&gid.get()))
237                    {
238                        glyphs.insert(lig.ligature_glyph());
239                    }
240                }
241            }
242        }
243        Ok(())
244    }
245}
246
247impl GlyphClosure for ReverseChainSingleSubstFormat1<'_> {
248    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
249        for coverage in self
250            .backtrack_coverages()
251            .iter()
252            .chain(self.lookahead_coverages().iter())
253        {
254            if !coverage?.iter().any(|gid| glyphs.contains(&gid)) {
255                return Ok(());
256            }
257        }
258
259        for (gid, sub) in self.coverage()?.iter().zip(self.substitute_glyph_ids()) {
260            if glyphs.contains(&gid) {
261                glyphs.insert(sub.get());
262            }
263        }
264
265        Ok(())
266    }
267}
268
269impl SequenceContext<'_> {
270    fn add_reachable_lookups(
271        &self,
272        glyphs: &HashSet<GlyphId16>,
273        lookups: &mut HashSet<u16>,
274    ) -> Result<(), ReadError> {
275        match self {
276            SequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups),
277            SequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups),
278            SequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups),
279        }
280    }
281}
282
283impl SequenceContextFormat1<'_> {
284    fn add_reachable_lookups(
285        &self,
286        glyphs: &HashSet<GlyphId16>,
287        lookups: &mut HashSet<u16>,
288    ) -> Result<(), ReadError> {
289        let coverage = self.coverage()?;
290        for seq in coverage
291            .iter()
292            .zip(self.seq_rule_sets().iter())
293            .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(&gid)))
294        {
295            for rule in seq?.seq_rules().iter() {
296                let rule = rule?;
297                if rule
298                    .input_sequence()
299                    .iter()
300                    .all(|gid| glyphs.contains(&gid.get()))
301                {
302                    lookups.extend(
303                        rule.seq_lookup_records()
304                            .iter()
305                            .map(|rec| rec.lookup_list_index()),
306                    );
307                }
308            }
309        }
310        Ok(())
311    }
312}
313
314impl SequenceContextFormat2<'_> {
315    fn add_reachable_lookups(
316        &self,
317        glyphs: &HashSet<GlyphId16>,
318        lookups: &mut HashSet<u16>,
319    ) -> Result<(), ReadError> {
320        let classdef = self.class_def()?;
321        let our_classes = make_class_set(glyphs, &classdef);
322        for seq in self
323            .class_seq_rule_sets()
324            .iter()
325            .enumerate()
326            .filter_map(|(i, seq)| seq.filter(|_| our_classes.contains(&(i as u16))))
327        {
328            for rule in seq?.class_seq_rules().iter() {
329                let rule = rule?;
330                if rule
331                    .input_sequence()
332                    .iter()
333                    .all(|class_id| our_classes.contains(&class_id.get()))
334                {
335                    lookups.extend(
336                        rule.seq_lookup_records()
337                            .iter()
338                            .map(|rec| rec.lookup_list_index()),
339                    )
340                }
341            }
342        }
343        Ok(())
344    }
345}
346
347impl SequenceContextFormat3<'_> {
348    fn add_reachable_lookups(
349        &self,
350        glyphs: &HashSet<GlyphId16>,
351        lookups: &mut HashSet<u16>,
352    ) -> Result<(), ReadError> {
353        for coverage in self.coverages().iter() {
354            if !coverage?.iter().any(|gid| glyphs.contains(&gid)) {
355                return Ok(());
356            }
357        }
358        lookups.extend(
359            self.seq_lookup_records()
360                .iter()
361                .map(|rec| rec.lookup_list_index()),
362        );
363        Ok(())
364    }
365}
366
367impl ChainedSequenceContext<'_> {
368    fn add_reachable_lookups(
369        &self,
370        glyphs: &HashSet<GlyphId16>,
371        lookups: &mut HashSet<u16>,
372    ) -> Result<(), ReadError> {
373        match self {
374            ChainedSequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups),
375            ChainedSequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups),
376            ChainedSequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups),
377        }
378    }
379}
380
381impl ChainedSequenceContextFormat1<'_> {
382    fn add_reachable_lookups(
383        &self,
384        glyphs: &HashSet<GlyphId16>,
385        lookups: &mut HashSet<u16>,
386    ) -> Result<(), ReadError> {
387        let coverage = self.coverage()?;
388        for seq in coverage
389            .iter()
390            .zip(self.chained_seq_rule_sets().iter())
391            .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(&gid)))
392        {
393            for rule in seq?.chained_seq_rules().iter() {
394                let rule = rule?;
395                if rule
396                    .input_sequence()
397                    .iter()
398                    .chain(rule.backtrack_sequence())
399                    .chain(rule.lookahead_sequence())
400                    .all(|gid| glyphs.contains(&gid.get()))
401                {
402                    lookups.extend(
403                        rule.seq_lookup_records()
404                            .iter()
405                            .map(|rec| rec.lookup_list_index()),
406                    );
407                }
408            }
409        }
410        Ok(())
411    }
412}
413
414impl ChainedSequenceContextFormat2<'_> {
415    fn add_reachable_lookups(
416        &self,
417        glyphs: &HashSet<GlyphId16>,
418        lookups: &mut HashSet<u16>,
419    ) -> Result<(), ReadError> {
420        let input = self.input_class_def()?;
421        let backtrack = self.backtrack_class_def()?;
422        let lookahead = self.lookahead_class_def()?;
423
424        let input_classes = make_class_set(glyphs, &input);
425        let backtrack_classes = make_class_set(glyphs, &backtrack);
426        let lookahead_classes = make_class_set(glyphs, &lookahead);
427        for seq in self
428            .chained_class_seq_rule_sets()
429            .iter()
430            .enumerate()
431            .filter_map(|(i, seq)| seq.filter(|_| input_classes.contains(&(i as u16))))
432        {
433            for rule in seq?.chained_class_seq_rules().iter() {
434                let rule = rule?;
435                if rule
436                    .input_sequence()
437                    .iter()
438                    .all(|cls| input_classes.contains(&cls.get()))
439                    && rule
440                        .backtrack_sequence()
441                        .iter()
442                        .all(|cls| backtrack_classes.contains(&cls.get()))
443                    && rule
444                        .lookahead_sequence()
445                        .iter()
446                        .all(|cls| lookahead_classes.contains(&cls.get()))
447                {
448                    lookups.extend(
449                        rule.seq_lookup_records()
450                            .iter()
451                            .map(|rec| rec.lookup_list_index()),
452                    )
453                }
454            }
455        }
456        Ok(())
457    }
458}
459
460impl ChainedSequenceContextFormat3<'_> {
461    fn add_reachable_lookups(
462        &self,
463        glyphs: &HashSet<GlyphId16>,
464        lookups: &mut HashSet<u16>,
465    ) -> Result<(), ReadError> {
466        for coverage in self
467            .backtrack_coverages()
468            .iter()
469            .chain(self.input_coverages().iter())
470            .chain(self.lookahead_coverages().iter())
471        {
472            if !coverage?.iter().any(|gid| glyphs.contains(&gid)) {
473                return Ok(());
474            }
475        }
476        lookups.extend(
477            self.seq_lookup_records()
478                .iter()
479                .map(|rec| rec.lookup_list_index()),
480        );
481        Ok(())
482    }
483}
484
485fn make_class_set(glyphs: &HashSet<GlyphId16>, classdef: &ClassDef) -> HashSet<u16> {
486    glyphs.iter().map(|gid| classdef.get(*gid)).collect()
487}
488
489#[cfg(test)]
490mod tests {
491    use std::collections::HashMap;
492
493    use crate::{FontRef, TableProvider};
494
495    use super::*;
496    use font_test_data::closure as test_data;
497
498    struct GlyphMap {
499        to_gid: HashMap<&'static str, GlyphId16>,
500        from_gid: HashMap<GlyphId16, &'static str>,
501    }
502
503    impl GlyphMap {
504        fn new(raw_order: &'static str) -> GlyphMap {
505            let to_gid: HashMap<_, _> = raw_order
506                .split('\n')
507                .map(|line| line.trim())
508                .filter(|line| !(line.starts_with('#') || line.is_empty()))
509                .enumerate()
510                .map(|(gid, name)| (name, GlyphId16::new(gid.try_into().unwrap())))
511                .collect();
512            let from_gid = to_gid.iter().map(|(name, gid)| (*gid, *name)).collect();
513            GlyphMap { from_gid, to_gid }
514        }
515
516        fn get_gid(&self, name: &str) -> Option<GlyphId16> {
517            self.to_gid.get(name).copied()
518        }
519
520        fn get_name(&self, gid: GlyphId16) -> Option<&str> {
521            self.from_gid.get(&gid).copied()
522        }
523    }
524
525    fn get_gsub(test_data: &'static [u8]) -> Gsub<'static> {
526        let font = FontRef::new(test_data).unwrap();
527        font.gsub().unwrap()
528    }
529
530    fn compute_closure(gsub: &Gsub, glyph_map: &GlyphMap, input: &[&str]) -> HashSet<GlyphId16> {
531        let input_glyphs = input
532            .iter()
533            .map(|name| glyph_map.get_gid(name).unwrap())
534            .collect();
535        gsub.closure_glyphs(input_glyphs).unwrap()
536    }
537
538    /// assert a set of glyph ids matches a slice of names
539    macro_rules! assert_closure_result {
540        ($glyph_map:expr, $result:expr, $expected:expr) => {
541            let result = $result
542                .iter()
543                .map(|gid| $glyph_map.get_name(*gid).unwrap())
544                .collect::<HashSet<_>>();
545            let expected = $expected.iter().copied().collect::<HashSet<_>>();
546            if expected != result {
547                let in_output = result.difference(&expected).collect::<Vec<_>>();
548                let in_expected = expected.difference(&result).collect::<Vec<_>>();
549                let mut msg = format!("Closure output does not match\n");
550                if !in_expected.is_empty() {
551                    msg.push_str(format!("missing {in_expected:?}\n").as_str());
552                }
553                if !in_output.is_empty() {
554                    msg.push_str(format!("unexpected {in_output:?}").as_str());
555                }
556                panic!("{msg}")
557            }
558        };
559    }
560
561    #[test]
562    fn smoke_test() {
563        // tests various lookup types.
564        // test input is font-test-data/test_data/fea/simple_closure.fea
565        let gsub = get_gsub(test_data::SIMPLE);
566        let glyph_map = GlyphMap::new(test_data::SIMPLE_GLYPHS);
567        let result = compute_closure(&gsub, &glyph_map, &["a"]);
568
569        assert_closure_result!(
570            glyph_map,
571            result,
572            &["a", "A", "b", "c", "d", "a_a", "a.1", "a.2", "a.3"]
573        );
574    }
575
576    #[test]
577    fn recursive() {
578        // a scenario in which one substitution adds glyphs that trigger additional
579        // substitutions.
580        //
581        // test input is font-test-data/test_data/fea/recursive_closure.fea
582        let gsub = get_gsub(test_data::RECURSIVE);
583        let glyph_map = GlyphMap::new(test_data::RECURSIVE_GLYPHS);
584        let result = compute_closure(&gsub, &glyph_map, &["a"]);
585        assert_closure_result!(glyph_map, result, &["a", "b", "c", "d"]);
586    }
587
588    #[test]
589    fn contextual_lookups() {
590        let gsub = get_gsub(test_data::CONTEXTUAL);
591        let glyph_map = GlyphMap::new(test_data::CONTEXTUAL_GLYPHS);
592
593        // these match the lookups but not the context
594        let nop = compute_closure(&gsub, &glyph_map, &["three", "four", "e", "f"]);
595        assert_closure_result!(glyph_map, nop, &["three", "four", "e", "f"]);
596
597        let gsub6f1 = compute_closure(
598            &gsub,
599            &glyph_map,
600            &["one", "two", "three", "four", "five", "six", "seven"],
601        );
602        assert_closure_result!(
603            glyph_map,
604            gsub6f1,
605            &["one", "two", "three", "four", "five", "six", "seven", "X", "Y"]
606        );
607
608        let gsub6f3 = compute_closure(&gsub, &glyph_map, &["space", "e"]);
609        assert_closure_result!(glyph_map, gsub6f3, &["space", "e", "e.2"]);
610
611        let gsub5f3 = compute_closure(&gsub, &glyph_map, &["f", "g"]);
612        assert_closure_result!(glyph_map, gsub5f3, &["f", "g", "f.2"]);
613    }
614
615    #[test]
616    fn recursive_context() {
617        let gsub = get_gsub(test_data::RECURSIVE_CONTEXTUAL);
618        let glyph_map = GlyphMap::new(test_data::RECURSIVE_CONTEXTUAL_GLYPHS);
619
620        let nop = compute_closure(&gsub, &glyph_map, &["b", "B"]);
621        assert_closure_result!(glyph_map, nop, &["b", "B"]);
622
623        let full = compute_closure(&gsub, &glyph_map, &["a", "b", "c"]);
624        assert_closure_result!(glyph_map, full, &["a", "b", "c", "B", "B.2", "B.3"]);
625
626        let intermediate = compute_closure(&gsub, &glyph_map, &["a", "B.2"]);
627        assert_closure_result!(glyph_map, intermediate, &["a", "B.2", "B.3"]);
628    }
629
630    #[test]
631    fn feature_variations() {
632        let gsub = get_gsub(test_data::VARIATIONS_CLOSURE);
633        let glyph_map = GlyphMap::new(test_data::VARIATIONS_GLYPHS);
634
635        let input = compute_closure(&gsub, &glyph_map, &["a"]);
636        assert_closure_result!(glyph_map, input, &["a", "b", "c"]);
637    }
638}