read_fonts/tables/
cmap.rs

1//! The [cmap](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap) table
2
3include!("../../generated/generated_cmap.rs");
4
5#[cfg(feature = "std")]
6use crate::collections::IntSet;
7use crate::{FontRef, TableProvider};
8use std::ops::Range;
9
10/// Result of mapping a codepoint with a variation selector.
11#[derive(Copy, Clone, PartialEq, Eq, Debug)]
12pub enum MapVariant {
13    /// The variation selector should be ignored and the default mapping
14    /// of the character should be used.
15    UseDefault,
16    /// The variant glyph mapped by a codepoint and associated variation
17    /// selector.
18    Variant(GlyphId),
19}
20
21impl Cmap<'_> {
22    /// Map a codepoint to a nominal glyph identifier
23    ///
24    /// This uses the first available subtable that provides a valid mapping.
25    ///
26    /// # Note:
27    ///
28    /// Mapping logic is currently only implemented for the most common subtable
29    /// formats.
30    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
31        let codepoint = codepoint.into();
32        for record in self.encoding_records() {
33            if let Ok(subtable) = record.subtable(self.offset_data()) {
34                if let Some(gid) = match subtable {
35                    CmapSubtable::Format4(format4) => format4.map_codepoint(codepoint),
36                    CmapSubtable::Format12(format12) => format12.map_codepoint(codepoint),
37                    _ => None,
38                } {
39                    return Some(gid);
40                }
41            }
42        }
43        None
44    }
45
46    #[cfg(feature = "std")]
47    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
48        for record in self.encoding_records() {
49            if let Ok(subtable) = record.subtable(self.offset_data()) {
50                match subtable {
51                    CmapSubtable::Format14(format14) => {
52                        format14.closure_glyphs(unicodes, glyph_set);
53                        return;
54                    }
55                    _ => {
56                        continue;
57                    }
58                }
59            }
60        }
61    }
62}
63
64impl CmapSubtable<'_> {
65    pub fn language(&self) -> u32 {
66        match self {
67            Self::Format0(item) => item.language() as u32,
68            Self::Format2(item) => item.language() as u32,
69            Self::Format4(item) => item.language() as u32,
70            Self::Format6(item) => item.language() as u32,
71            Self::Format10(item) => item.language(),
72            Self::Format12(item) => item.language(),
73            Self::Format13(item) => item.language(),
74            _ => 0,
75        }
76    }
77}
78
79impl<'a> Cmap4<'a> {
80    /// Maps a codepoint to a nominal glyph identifier.
81    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
82        let codepoint = codepoint.into();
83        if codepoint > 0xFFFF {
84            return None;
85        }
86        let codepoint = codepoint as u16;
87        let mut lo = 0;
88        let mut hi = self.seg_count_x2() as usize / 2;
89        let start_codes = self.start_code();
90        let end_codes = self.end_code();
91        while lo < hi {
92            let i = (lo + hi) / 2;
93            let start_code = start_codes.get(i)?.get();
94            if codepoint < start_code {
95                hi = i;
96            } else if codepoint > end_codes.get(i)?.get() {
97                lo = i + 1;
98            } else {
99                return self.lookup_glyph_id(codepoint, i, start_code);
100            }
101        }
102        None
103    }
104
105    /// Returns an iterator over all (codepoint, glyph identifier) pairs
106    /// in the subtable.
107    pub fn iter(&self) -> Cmap4Iter<'a> {
108        Cmap4Iter::new(self.clone())
109    }
110
111    /// Does the final phase of glyph id lookup.
112    ///
113    /// Shared between Self::map and Cmap4Iter.
114    fn lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId> {
115        let deltas = self.id_delta();
116        let range_offsets = self.id_range_offsets();
117        let delta = deltas.get(index)?.get() as i32;
118        let range_offset = range_offsets.get(index)?.get() as usize;
119        if range_offset == 0 {
120            return Some(GlyphId::from((codepoint as i32 + delta) as u16));
121        }
122        let mut offset = range_offset / 2 + (codepoint - start_code) as usize;
123        offset = offset.saturating_sub(range_offsets.len() - index);
124        let gid = self.glyph_id_array().get(offset)?.get();
125        (gid != 0).then_some(GlyphId::from((gid as i32 + delta) as u16))
126    }
127
128    /// Returns the [start_code, end_code] range at the given index.
129    fn code_range(&self, index: usize) -> Option<Range<u32>> {
130        // Extend to u32 to ensure we don't overflow on the end + 1 bound
131        // below.
132        let start = self.start_code().get(index)?.get() as u32;
133        let end = self.end_code().get(index)?.get() as u32;
134        // Use end + 1 here because the range in the table is inclusive
135        Some(start..end + 1)
136    }
137}
138
139/// Iterator over all (codepoint, glyph identifier) pairs in
140/// the subtable.
141#[derive(Clone)]
142pub struct Cmap4Iter<'a> {
143    subtable: Cmap4<'a>,
144    cur_range: Range<u32>,
145    cur_start_code: u16,
146    cur_range_ix: usize,
147}
148
149impl<'a> Cmap4Iter<'a> {
150    fn new(subtable: Cmap4<'a>) -> Self {
151        let cur_range = subtable.code_range(0).unwrap_or_default();
152        let cur_start_code = cur_range.start as u16;
153        Self {
154            subtable,
155            cur_range,
156            cur_start_code,
157            cur_range_ix: 0,
158        }
159    }
160}
161
162impl Iterator for Cmap4Iter<'_> {
163    type Item = (u32, GlyphId);
164
165    fn next(&mut self) -> Option<Self::Item> {
166        loop {
167            if let Some(codepoint) = self.cur_range.next() {
168                let Some(glyph_id) = self.subtable.lookup_glyph_id(
169                    codepoint as u16,
170                    self.cur_range_ix,
171                    self.cur_start_code,
172                ) else {
173                    continue;
174                };
175                return Some((codepoint, glyph_id));
176            } else {
177                self.cur_range_ix += 1;
178                let next_range = self.subtable.code_range(self.cur_range_ix)?;
179                // Groups should be in order and non-overlapping so make sure
180                // that the start code of next group is at least current_end + 1.
181                // Also avoid start sliding backwards if we see data where end < start by taking the max
182                // of next.end and curr.end as the new end.
183                // This prevents timeout and bizarre results in the face of numerous overlapping ranges
184                // https://github.com/googlefonts/fontations/issues/1100
185                // cmap4 ranges are u16 so no need to stress about values past char::MAX
186                self.cur_range = next_range.start.max(self.cur_range.end)
187                    ..next_range.end.max(self.cur_range.end);
188                self.cur_start_code = self.cur_range.start as u16;
189            }
190        }
191    }
192}
193
194impl<'a> Cmap12<'a> {
195    /// Maps a codepoint to a nominal glyph identifier.
196    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
197        let codepoint = codepoint.into();
198        let groups = self.groups();
199        let mut lo = 0;
200        let mut hi = groups.len();
201        while lo < hi {
202            let i = (lo + hi) / 2;
203            let group = groups.get(i)?;
204            if codepoint < group.start_char_code() {
205                hi = i;
206            } else if codepoint > group.end_char_code() {
207                lo = i + 1;
208            } else {
209                return Some(self.lookup_glyph_id(
210                    codepoint,
211                    group.start_char_code(),
212                    group.start_glyph_id(),
213                ));
214            }
215        }
216        None
217    }
218
219    /// Returns an iterator over all (codepoint, glyph identifier) pairs
220    /// in the subtable.
221    ///
222    /// Malicious and malformed fonts can produce a large number of invalid
223    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
224    /// that is limited to reasonable values.
225    pub fn iter(&self) -> Cmap12Iter<'a> {
226        Cmap12Iter::new(self.clone(), None)
227    }
228
229    /// Returns an iterator over all (codepoint, glyph identifier) pairs
230    /// in the subtable within the given limits.
231    pub fn iter_with_limits(&self, limits: Cmap12IterLimits) -> Cmap12Iter<'a> {
232        Cmap12Iter::new(self.clone(), Some(limits))
233    }
234
235    /// Does the final phase of glyph id lookup.
236    ///
237    /// Shared between Self::map and Cmap12Iter.
238    fn lookup_glyph_id(
239        &self,
240        codepoint: u32,
241        start_char_code: u32,
242        start_glyph_id: u32,
243    ) -> GlyphId {
244        GlyphId::new(start_glyph_id.wrapping_add(codepoint.wrapping_sub(start_char_code)))
245    }
246
247    /// Returns the codepoint range and start glyph id for the group
248    /// at the given index.
249    fn group(&self, index: usize, limits: &Option<Cmap12IterLimits>) -> Option<Cmap12Group> {
250        let group = self.groups().get(index)?;
251        let start_code = group.start_char_code();
252        // Change to exclusive range. This can never overflow since the source
253        // is a 32-bit value
254        let end_code = group.end_char_code() as u64 + 1;
255        let start_glyph_id = group.start_glyph_id();
256        let end_code = if let Some(limits) = limits {
257            // Set our end code to the minimum of our character and glyph
258            // count limit
259            (limits.glyph_count as u64)
260                .saturating_sub(start_glyph_id as u64)
261                .saturating_add(start_code as u64)
262                .min(end_code.min(limits.max_char as u64))
263        } else {
264            end_code
265        };
266        Some(Cmap12Group {
267            range: start_code as u64..end_code,
268            start_code,
269            start_glyph_id,
270        })
271    }
272}
273
274#[derive(Clone, Debug)]
275struct Cmap12Group {
276    range: Range<u64>,
277    start_code: u32,
278    start_glyph_id: u32,
279}
280
281/// Character and glyph limits for iterating format 12 subtables.
282#[derive(Copy, Clone, Debug)]
283pub struct Cmap12IterLimits {
284    /// The maximum valid character.
285    pub max_char: u32,
286    /// The number of glyphs in the font.
287    pub glyph_count: u32,
288}
289
290impl Cmap12IterLimits {
291    /// Returns the default limits for the given font.
292    ///
293    /// This will limit pairs to `char::MAX` and the number of glyphs contained
294    /// in the font. If the font is missing a `maxp` table, the number of
295    /// glyphs will be limited to `u16::MAX`.
296    pub fn default_for_font(font: &FontRef) -> Self {
297        let glyph_count = font
298            .maxp()
299            .map(|maxp| maxp.num_glyphs())
300            .unwrap_or(u16::MAX) as u32;
301        Self {
302            // Limit to the valid range of Unicode characters
303            // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
304            max_char: char::MAX as u32,
305            glyph_count,
306        }
307    }
308}
309
310impl Default for Cmap12IterLimits {
311    fn default() -> Self {
312        Self {
313            max_char: char::MAX as u32,
314            // Revisit this when we actually support big glyph ids
315            glyph_count: u16::MAX as u32,
316        }
317    }
318}
319
320/// Iterator over all (codepoint, glyph identifier) pairs in
321/// the subtable.
322#[derive(Clone)]
323pub struct Cmap12Iter<'a> {
324    subtable: Cmap12<'a>,
325    cur_group: Option<Cmap12Group>,
326    cur_group_ix: usize,
327    limits: Option<Cmap12IterLimits>,
328}
329
330impl<'a> Cmap12Iter<'a> {
331    fn new(subtable: Cmap12<'a>, limits: Option<Cmap12IterLimits>) -> Self {
332        let cur_group = subtable.group(0, &limits);
333        Self {
334            subtable,
335            cur_group,
336            cur_group_ix: 0,
337            limits,
338        }
339    }
340}
341
342impl Iterator for Cmap12Iter<'_> {
343    type Item = (u32, GlyphId);
344
345    fn next(&mut self) -> Option<Self::Item> {
346        loop {
347            let group = self.cur_group.as_mut()?;
348            if let Some(codepoint) = group.range.next() {
349                let codepoint = codepoint as u32;
350                let glyph_id = self.subtable.lookup_glyph_id(
351                    codepoint,
352                    group.start_code,
353                    group.start_glyph_id,
354                );
355                return Some((codepoint, glyph_id));
356            } else {
357                self.cur_group_ix += 1;
358                let mut next_group = self.subtable.group(self.cur_group_ix, &self.limits)?;
359                // Groups should be in order and non-overlapping so make sure
360                // that the start code of next group is at least
361                // current_end.
362                if next_group.range.start < group.range.end {
363                    next_group.range = group.range.end..next_group.range.end;
364                }
365                self.cur_group = Some(next_group);
366            }
367        }
368    }
369}
370
371impl<'a> Cmap14<'a> {
372    /// Maps a codepoint and variation selector to a nominal glyph identifier.
373    pub fn map_variant(
374        &self,
375        codepoint: impl Into<u32>,
376        selector: impl Into<u32>,
377    ) -> Option<MapVariant> {
378        let codepoint = codepoint.into();
379        let selector = selector.into();
380        let selector_records = self.var_selector();
381        // Variation selector records are sorted in order of var_selector. Binary search to find
382        // the appropriate record.
383        let selector_record = selector_records
384            .binary_search_by(|rec| {
385                let rec_selector: u32 = rec.var_selector().into();
386                rec_selector.cmp(&selector)
387            })
388            .ok()
389            .and_then(|idx| selector_records.get(idx))?;
390        // If a default UVS table is present in this selector record, binary search on the ranges
391        // (start_unicode_value, start_unicode_value + additional_count) to find the requested codepoint.
392        // If found, ignore the selector and return a value indicating that the default cmap mapping
393        // should be used.
394        if let Some(Ok(default_uvs)) = selector_record.default_uvs(self.offset_data()) {
395            use core::cmp::Ordering;
396            let found_default_uvs = default_uvs
397                .ranges()
398                .binary_search_by(|range| {
399                    let start = range.start_unicode_value().into();
400                    if codepoint < start {
401                        Ordering::Greater
402                    } else if codepoint > (start + range.additional_count() as u32) {
403                        Ordering::Less
404                    } else {
405                        Ordering::Equal
406                    }
407                })
408                .is_ok();
409            if found_default_uvs {
410                return Some(MapVariant::UseDefault);
411            }
412        }
413        // Binary search the non-default UVS table if present. This maps codepoint+selector to a variant glyph.
414        let non_default_uvs = selector_record.non_default_uvs(self.offset_data())?.ok()?;
415        let mapping = non_default_uvs.uvs_mapping();
416        let ix = mapping
417            .binary_search_by(|map| {
418                let map_codepoint: u32 = map.unicode_value().into();
419                map_codepoint.cmp(&codepoint)
420            })
421            .ok()?;
422        Some(MapVariant::Variant(GlyphId::from(
423            mapping.get(ix)?.glyph_id(),
424        )))
425    }
426
427    /// Returns an iterator over all (codepoint, selector, mapping variant)
428    /// triples in the subtable.
429    pub fn iter(&self) -> Cmap14Iter<'a> {
430        Cmap14Iter::new(self.clone())
431    }
432
433    fn selector(
434        &self,
435        index: usize,
436    ) -> (
437        Option<VariationSelector>,
438        Option<DefaultUvs<'a>>,
439        Option<NonDefaultUvs<'a>>,
440    ) {
441        let selector = self.var_selector().get(index).cloned();
442        let default_uvs = selector.as_ref().and_then(|selector| {
443            selector
444                .default_uvs(self.offset_data())
445                .transpose()
446                .ok()
447                .flatten()
448        });
449        let non_default_uvs = selector.as_ref().and_then(|selector| {
450            selector
451                .non_default_uvs(self.offset_data())
452                .transpose()
453                .ok()
454                .flatten()
455        });
456        (selector, default_uvs, non_default_uvs)
457    }
458
459    #[cfg(feature = "std")]
460    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
461        for selector in self.var_selector() {
462            if !unicodes.contains(selector.var_selector().to_u32()) {
463                continue;
464            }
465            if let Some(non_default_uvs) = selector
466                .non_default_uvs(self.offset_data())
467                .transpose()
468                .ok()
469                .flatten()
470            {
471                glyph_set.extend(
472                    non_default_uvs
473                        .uvs_mapping()
474                        .iter()
475                        .filter(|m| unicodes.contains(m.unicode_value().to_u32()))
476                        .map(|m| m.glyph_id().into()),
477                );
478            }
479        }
480    }
481}
482
483/// Iterator over all (codepoint, selector, mapping variant) triples
484/// in the subtable.
485#[derive(Clone)]
486pub struct Cmap14Iter<'a> {
487    subtable: Cmap14<'a>,
488    selector_record: Option<VariationSelector>,
489    default_uvs: Option<DefaultUvsIter<'a>>,
490    non_default_uvs: Option<NonDefaultUvsIter<'a>>,
491    cur_selector_ix: usize,
492}
493
494impl<'a> Cmap14Iter<'a> {
495    fn new(subtable: Cmap14<'a>) -> Self {
496        let (selector_record, default_uvs, non_default_uvs) = subtable.selector(0);
497        Self {
498            subtable,
499            selector_record,
500            default_uvs: default_uvs.map(DefaultUvsIter::new),
501            non_default_uvs: non_default_uvs.map(NonDefaultUvsIter::new),
502            cur_selector_ix: 0,
503        }
504    }
505}
506
507impl Iterator for Cmap14Iter<'_> {
508    type Item = (u32, u32, MapVariant);
509
510    fn next(&mut self) -> Option<Self::Item> {
511        loop {
512            let selector_record = self.selector_record.as_ref()?;
513            let selector: u32 = selector_record.var_selector().into();
514            if let Some(default_uvs) = self.default_uvs.as_mut() {
515                if let Some(codepoint) = default_uvs.next() {
516                    return Some((codepoint, selector, MapVariant::UseDefault));
517                }
518            }
519            if let Some(non_default_uvs) = self.non_default_uvs.as_mut() {
520                if let Some((codepoint, variant)) = non_default_uvs.next() {
521                    return Some((codepoint, selector, MapVariant::Variant(variant.into())));
522                }
523            }
524            self.cur_selector_ix += 1;
525            let (selector_record, default_uvs, non_default_uvs) =
526                self.subtable.selector(self.cur_selector_ix);
527            self.selector_record = selector_record;
528            self.default_uvs = default_uvs.map(DefaultUvsIter::new);
529            self.non_default_uvs = non_default_uvs.map(NonDefaultUvsIter::new);
530        }
531    }
532}
533
534#[derive(Clone)]
535struct DefaultUvsIter<'a> {
536    ranges: std::slice::Iter<'a, UnicodeRange>,
537    cur_range: Range<u32>,
538}
539
540impl<'a> DefaultUvsIter<'a> {
541    fn new(ranges: DefaultUvs<'a>) -> Self {
542        let mut ranges = ranges.ranges().iter();
543        let cur_range = if let Some(range) = ranges.next() {
544            let start: u32 = range.start_unicode_value().into();
545            let end = start + range.additional_count() as u32 + 1;
546            start..end
547        } else {
548            0..0
549        };
550        Self { ranges, cur_range }
551    }
552}
553
554impl Iterator for DefaultUvsIter<'_> {
555    type Item = u32;
556
557    fn next(&mut self) -> Option<Self::Item> {
558        loop {
559            if let Some(codepoint) = self.cur_range.next() {
560                return Some(codepoint);
561            }
562            let range = self.ranges.next()?;
563            let start: u32 = range.start_unicode_value().into();
564            let end = start + range.additional_count() as u32 + 1;
565            self.cur_range = start..end;
566        }
567    }
568}
569
570#[derive(Clone)]
571struct NonDefaultUvsIter<'a> {
572    iter: std::slice::Iter<'a, UvsMapping>,
573}
574
575impl<'a> NonDefaultUvsIter<'a> {
576    fn new(uvs: NonDefaultUvs<'a>) -> Self {
577        Self {
578            iter: uvs.uvs_mapping().iter(),
579        }
580    }
581}
582
583impl Iterator for NonDefaultUvsIter<'_> {
584    type Item = (u32, GlyphId16);
585
586    fn next(&mut self) -> Option<Self::Item> {
587        let mapping = self.iter.next()?;
588        let codepoint: u32 = mapping.unicode_value().into();
589        let glyph_id = GlyphId16::new(mapping.glyph_id());
590        Some((codepoint, glyph_id))
591    }
592}
593
594#[cfg(test)]
595mod tests {
596    use font_test_data::{be_buffer, bebuffer::BeBuffer};
597
598    use super::*;
599    use crate::{FontRef, GlyphId, TableProvider};
600
601    #[test]
602    fn map_codepoints() {
603        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
604        let cmap = font.cmap().unwrap();
605        assert_eq!(cmap.map_codepoint('A'), Some(GlyphId::new(1)));
606        assert_eq!(cmap.map_codepoint('À'), Some(GlyphId::new(2)));
607        assert_eq!(cmap.map_codepoint('`'), Some(GlyphId::new(3)));
608        assert_eq!(cmap.map_codepoint('B'), None);
609
610        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
611        let cmap = font.cmap().unwrap();
612        assert_eq!(cmap.map_codepoint(' '), Some(GlyphId::new(1)));
613        assert_eq!(cmap.map_codepoint(0xE_u32), Some(GlyphId::new(2)));
614        assert_eq!(cmap.map_codepoint('B'), None);
615    }
616
617    #[test]
618    fn map_variants() {
619        use super::MapVariant::*;
620        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
621        let cmap = font.cmap().unwrap();
622        let cmap14 = find_cmap14(&cmap).unwrap();
623        let selector = '\u{e0100}';
624        assert_eq!(cmap14.map_variant('a', selector), None);
625        assert_eq!(cmap14.map_variant('\u{4e00}', selector), Some(UseDefault));
626        assert_eq!(cmap14.map_variant('\u{4e06}', selector), Some(UseDefault));
627        assert_eq!(
628            cmap14.map_variant('\u{4e08}', selector),
629            Some(Variant(GlyphId::new(25)))
630        );
631        assert_eq!(
632            cmap14.map_variant('\u{4e09}', selector),
633            Some(Variant(GlyphId::new(26)))
634        );
635    }
636
637    #[test]
638    #[cfg(feature = "std")]
639    fn cmap14_closure_glyphs() {
640        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
641        let cmap = font.cmap().unwrap();
642        let mut unicodes = IntSet::empty();
643        unicodes.insert(0x4e08_u32);
644        unicodes.insert(0xe0100_u32);
645
646        let mut glyph_set = IntSet::empty();
647        glyph_set.insert(GlyphId::new(18));
648        cmap.closure_glyphs(&unicodes, &mut glyph_set);
649
650        assert_eq!(glyph_set.len(), 2);
651        assert!(glyph_set.contains(GlyphId::new(18)));
652        assert!(glyph_set.contains(GlyphId::new(25)));
653    }
654
655    #[test]
656    fn cmap4_iter() {
657        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
658        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
659        let mut count = 0;
660        for (codepoint, glyph_id) in cmap4.iter() {
661            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
662            count += 1;
663        }
664        assert_eq!(count, 4);
665        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
666        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
667        let mut count = 0;
668        for (codepoint, glyph_id) in cmap4.iter() {
669            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
670            count += 1;
671        }
672        assert_eq!(count, 3);
673    }
674
675    #[test]
676    fn cmap4_iter_explicit_notdef() {
677        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
678        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
679        let mut notdef_count = 0;
680        for (_, glyph_id) in cmap4.iter() {
681            notdef_count += (glyph_id == GlyphId::NOTDEF) as i32;
682        }
683        assert!(notdef_count > 0);
684        assert_eq!(cmap4.map_codepoint(0xFFFF_u32), Some(GlyphId::NOTDEF));
685    }
686
687    // Make sure we don't bail early when iterating ranges with holes.
688    // Encountered with Gentium Basic and Gentium Basic Book.
689    // See <https://github.com/googlefonts/fontations/issues/897>
690    #[test]
691    fn cmap4_iter_sparse_range() {
692        #[rustfmt::skip]
693        let cmap4_data: &[u16] = &[
694            // format, length, lang
695            4, 0, 0,
696            // segCountX2
697            4, 
698            // bin search data
699            0, 0, 0,
700            // end code
701            262, 0xFFFF, 
702            // reserved pad
703            0,
704            // start code
705            259, 0xFFFF,
706            // id delta
707            0, 1, 
708            // id range offset
709            4, 0,
710            // glyph ids
711            236, 0, 0, 326,
712        ];
713        let mut buf = BeBuffer::new();
714        for &word in cmap4_data {
715            buf = buf.push(word);
716        }
717        let cmap4 = Cmap4::read(FontData::new(&buf)).unwrap();
718        let mappings = cmap4
719            .iter()
720            .map(|(ch, gid)| (ch, gid.to_u32()))
721            .collect::<Vec<_>>();
722        assert_eq!(mappings, &[(259, 236), (262, 326), (65535, 0)]);
723    }
724
725    #[test]
726    fn cmap12_iter() {
727        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
728        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
729        let mut count = 0;
730        for (codepoint, glyph_id) in cmap12.iter() {
731            assert_eq!(cmap12.map_codepoint(codepoint), Some(glyph_id));
732            count += 1;
733        }
734        assert_eq!(count, 10);
735    }
736
737    // oss-fuzz: detected integer addition overflow in Cmap12::group()
738    // ref: https://oss-fuzz.com/testcase-detail/5141969742397440
739    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69547
740    #[test]
741    fn cmap12_iter_avoid_overflow() {
742        // reconstructed cmap from <https://oss-fuzz.com/testcase-detail/5141969742397440>
743        let data = be_buffer! {
744            12u16,      // format
745            0u16,       // reserved, set to 0
746            0u32,       // length, ignored
747            0u32,       // language, ignored
748            2u32,       // numGroups
749            // groups: [startCode, endCode, startGlyphID]
750            [0xFFFFFFFA_u32, 0xFFFFFFFC, 0], // group 0
751            [0xFFFFFFFB_u32, 0xFFFFFFFF, 0] // group 1
752        };
753        let cmap12 = Cmap12::read(data.data().into()).unwrap();
754        let _ = cmap12.iter().count();
755    }
756
757    // oss-fuzz: timeout in Cmap12Iter
758    // ref: https://oss-fuzz.com/testcase-detail/4628971063934976
759    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69540
760    #[test]
761    fn cmap12_iter_avoid_timeout() {
762        // ranges: [SequentialMapGroup { start_char_code: 170, end_char_code: 1330926671, start_glyph_id: 328960 }]
763        let cmap12_data = be_buffer! {
764            12u16,      // format
765            0u16,       // reserved, set to 0
766            0u32,       // length, ignored
767            0u32,       // language, ignored
768            1u32,       // numGroups
769            // groups: [startCode, endCode, startGlyphID]
770            [170u32, 1330926671, 328960] // group 0
771        };
772        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
773        assert!(
774            cmap12.iter_with_limits(Cmap12IterLimits::default()).count() <= char::MAX as usize + 1
775        );
776    }
777
778    // oss-fuzz: timeout in outlines, caused by cmap 12 iter
779    // ref: <https://issues.oss-fuzz.com/issues/394638728>
780    #[test]
781    fn cmap12_iter_avoid_timeout2() {
782        let cmap12_data = be_buffer! {
783            12u16,      // format
784            0u16,       // reserved, set to 0
785            0u32,       // length, ignored
786            0u32,       // language, ignored
787            3u32,       // numGroups
788            // groups: [startCode, endCode, startGlyphID]
789            [199u32, 16777271, 2],
790            [262u32, 262, 3],
791            [268u32, 268, 4]
792        };
793        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
794        // In the test case, maxp.numGlyphs = 8
795        const MAX_GLYPHS: u32 = 8;
796        let limits = Cmap12IterLimits {
797            glyph_count: MAX_GLYPHS,
798            ..Default::default()
799        };
800        assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize);
801    }
802
803    #[test]
804    fn cmap12_iter_glyph_limit() {
805        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
806        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
807        let mut limits = Cmap12IterLimits::default_for_font(&font);
808        // Ensure we obey the glyph count limit.
809        // This font has 11 glyphs
810        for glyph_count in 0..=11 {
811            limits.glyph_count = glyph_count;
812            assert_eq!(
813                cmap12.iter_with_limits(limits).count(),
814                // We always return one less than glyph count limit because
815                // notdef is not mapped
816                (glyph_count as usize).saturating_sub(1)
817            );
818        }
819    }
820
821    #[test]
822    fn cmap12_iter_range_clamping() {
823        let data = be_buffer! {
824            12u16,      // format
825            0u16,       // reserved, set to 0
826            0u32,       // length, ignored
827            0u32,       // language, ignored
828            2u32,       // numGroups
829            // groups: [startCode, endCode, startGlyphID]
830            [0u32, 16777215, 0], // group 0
831            [255u32, 0xFFFFFFFF, 0] // group 1
832        };
833        let cmap12 = Cmap12::read(data.data().into()).unwrap();
834        let ranges = cmap12
835            .groups()
836            .iter()
837            .map(|group| (group.start_char_code(), group.end_char_code()))
838            .collect::<Vec<_>>();
839        // These groups overlap and extend to the whole u32 range
840        assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]);
841        // But we produce at most char::MAX + 1 results
842        let limits = Cmap12IterLimits {
843            glyph_count: u32::MAX,
844            ..Default::default()
845        };
846        assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1);
847    }
848
849    #[test]
850    fn cmap12_iter_explicit_notdef() {
851        let data = be_buffer! {
852            12u16,      // format
853            0u16,       // reserved, set to 0
854            0u32,       // length, ignored
855            0u32,       // language, ignored
856            1u32,       // numGroups
857            // groups: [startCode, endCode, startGlyphID]
858            [0_u32, 1_u32, 0] // group 0
859        };
860        let cmap12 = Cmap12::read(data.data().into()).unwrap();
861        for (i, (codepoint, glyph_id)) in cmap12.iter().enumerate() {
862            assert_eq!(codepoint as usize, i);
863            assert_eq!(glyph_id.to_u32() as usize, i);
864        }
865        assert_eq!(cmap12.iter().next().unwrap().1, GlyphId::NOTDEF);
866    }
867
868    #[test]
869    fn cmap14_iter() {
870        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
871        let cmap14 = find_cmap14(&font.cmap().unwrap()).unwrap();
872        let mut count = 0;
873        for (codepoint, selector, mapping) in cmap14.iter() {
874            assert_eq!(cmap14.map_variant(codepoint, selector), Some(mapping));
875            count += 1;
876        }
877        assert_eq!(count, 7);
878    }
879
880    fn find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>> {
881        cmap.encoding_records()
882            .iter()
883            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
884            .find_map(|subtable| match subtable {
885                CmapSubtable::Format4(cmap4) => Some(cmap4),
886                _ => None,
887            })
888    }
889
890    fn find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>> {
891        cmap.encoding_records()
892            .iter()
893            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
894            .find_map(|subtable| match subtable {
895                CmapSubtable::Format12(cmap12) => Some(cmap12),
896                _ => None,
897            })
898    }
899
900    fn find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>> {
901        cmap.encoding_records()
902            .iter()
903            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
904            .find_map(|subtable| match subtable {
905                CmapSubtable::Format14(cmap14) => Some(cmap14),
906                _ => None,
907            })
908    }
909
910    /// <https://github.com/googlefonts/fontations/issues/1100>
911    ///
912    /// Note that this doesn't demonstrate the timeout, merely that we've eliminated the underlying
913    /// enthusiasm for non-ascending ranges that enabled it
914    #[test]
915    fn cmap4_bad_data() {
916        let buf = font_test_data::cmap::repetitive_cmap4();
917        let cmap4 = Cmap4::read(FontData::new(buf.as_slice())).unwrap();
918
919        // we should have unique, ascending codepoints, not duplicates and overlaps
920        assert_eq!(
921            (6..=64).collect::<Vec<_>>(),
922            cmap4.iter().map(|(cp, _)| cp).collect::<Vec<_>>()
923        );
924    }
925}