read_fonts/tables/
cmap.rs

1//! The [cmap](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap) table
2
3include!("../../generated/generated_cmap.rs");
4
5#[cfg(feature = "std")]
6use crate::collections::IntSet;
7use crate::{FontRef, TableProvider};
8use std::ops::Range;
9
10/// Result of mapping a codepoint with a variation selector.
11#[derive(Copy, Clone, PartialEq, Eq, Debug)]
12pub enum MapVariant {
13    /// The variation selector should be ignored and the default mapping
14    /// of the character should be used.
15    UseDefault,
16    /// The variant glyph mapped by a codepoint and associated variation
17    /// selector.
18    Variant(GlyphId),
19}
20
21impl Cmap<'_> {
22    /// Map a codepoint to a nominal glyph identifier
23    ///
24    /// This uses the first available subtable that provides a valid mapping.
25    ///
26    /// # Note:
27    ///
28    /// Mapping logic is currently only implemented for the most common subtable
29    /// formats.
30    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
31        let codepoint = codepoint.into();
32        for record in self.encoding_records() {
33            if let Ok(subtable) = record.subtable(self.offset_data()) {
34                if let Some(gid) = match subtable {
35                    CmapSubtable::Format4(format4) => format4.map_codepoint(codepoint),
36                    CmapSubtable::Format12(format12) => format12.map_codepoint(codepoint),
37                    _ => None,
38                } {
39                    return Some(gid);
40                }
41            }
42        }
43        None
44    }
45
46    #[cfg(feature = "std")]
47    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
48        for record in self.encoding_records() {
49            if let Ok(subtable) = record.subtable(self.offset_data()) {
50                match subtable {
51                    CmapSubtable::Format14(format14) => {
52                        format14.closure_glyphs(unicodes, glyph_set);
53                        return;
54                    }
55                    _ => {
56                        continue;
57                    }
58                }
59            }
60        }
61    }
62}
63
64impl CmapSubtable<'_> {
65    pub fn language(&self) -> u32 {
66        match self {
67            Self::Format0(item) => item.language() as u32,
68            Self::Format2(item) => item.language() as u32,
69            Self::Format4(item) => item.language() as u32,
70            Self::Format6(item) => item.language() as u32,
71            Self::Format10(item) => item.language(),
72            Self::Format12(item) => item.language(),
73            Self::Format13(item) => item.language(),
74            _ => 0,
75        }
76    }
77}
78
79impl<'a> Cmap4<'a> {
80    /// Maps a codepoint to a nominal glyph identifier.
81    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
82        let codepoint = codepoint.into();
83        if codepoint > 0xFFFF {
84            return None;
85        }
86        let codepoint = codepoint as u16;
87        let mut lo = 0;
88        let mut hi = self.seg_count_x2() as usize / 2;
89        let start_codes = self.start_code();
90        let end_codes = self.end_code();
91        while lo < hi {
92            let i = (lo + hi) / 2;
93            let start_code = start_codes.get(i)?.get();
94            if codepoint < start_code {
95                hi = i;
96            } else if codepoint > end_codes.get(i)?.get() {
97                lo = i + 1;
98            } else {
99                return self.lookup_glyph_id(codepoint, i, start_code);
100            }
101        }
102        None
103    }
104
105    /// Returns an iterator over all (codepoint, glyph identifier) pairs
106    /// in the subtable.
107    pub fn iter(&self) -> Cmap4Iter<'a> {
108        Cmap4Iter::new(self.clone())
109    }
110
111    /// Does the final phase of glyph id lookup.
112    ///
113    /// Shared between Self::map and Cmap4Iter.
114    fn lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId> {
115        let deltas = self.id_delta();
116        let range_offsets = self.id_range_offsets();
117        let delta = deltas.get(index)?.get() as i32;
118        let range_offset = range_offsets.get(index)?.get() as usize;
119        if range_offset == 0 {
120            return Some(GlyphId::from((codepoint as i32 + delta) as u16));
121        }
122        let mut offset = range_offset / 2 + (codepoint - start_code) as usize;
123        offset = offset.saturating_sub(range_offsets.len() - index);
124        let gid = self.glyph_id_array().get(offset)?.get();
125        (gid != 0).then_some(GlyphId::from((gid as i32 + delta) as u16))
126    }
127
128    /// Returns the [start_code, end_code] range at the given index.
129    fn code_range(&self, index: usize) -> Option<Range<u32>> {
130        // Extend to u32 to ensure we don't overflow on the end + 1 bound
131        // below.
132        let start = self.start_code().get(index)?.get() as u32;
133        let end = self.end_code().get(index)?.get() as u32;
134        // Use end + 1 here because the range in the table is inclusive
135        Some(start..end + 1)
136    }
137}
138
139/// Iterator over all (codepoint, glyph identifier) pairs in
140/// the subtable.
141#[derive(Clone)]
142pub struct Cmap4Iter<'a> {
143    subtable: Cmap4<'a>,
144    cur_range: Range<u32>,
145    cur_start_code: u16,
146    cur_range_ix: usize,
147}
148
149impl<'a> Cmap4Iter<'a> {
150    fn new(subtable: Cmap4<'a>) -> Self {
151        let cur_range = subtable.code_range(0).unwrap_or_default();
152        let cur_start_code = cur_range.start as u16;
153        Self {
154            subtable,
155            cur_range,
156            cur_start_code,
157            cur_range_ix: 0,
158        }
159    }
160}
161
162impl Iterator for Cmap4Iter<'_> {
163    type Item = (u32, GlyphId);
164
165    fn next(&mut self) -> Option<Self::Item> {
166        loop {
167            if let Some(codepoint) = self.cur_range.next() {
168                let Some(glyph_id) = self.subtable.lookup_glyph_id(
169                    codepoint as u16,
170                    self.cur_range_ix,
171                    self.cur_start_code,
172                ) else {
173                    continue;
174                };
175                // The table might explicitly map some codepoints to 0. Avoid
176                // returning those here.
177                if glyph_id == GlyphId::NOTDEF {
178                    continue;
179                }
180                return Some((codepoint, glyph_id));
181            } else {
182                self.cur_range_ix += 1;
183                let next_range = self.subtable.code_range(self.cur_range_ix)?;
184                // Groups should be in order and non-overlapping so make sure
185                // that the start code of next group is at least current_end + 1.
186                // Also avoid start sliding backwards if we see data where end < start by taking the max
187                // of next.end and curr.end as the new end.
188                // This prevents timeout and bizarre results in the face of numerous overlapping ranges
189                // https://github.com/googlefonts/fontations/issues/1100
190                // cmap4 ranges are u16 so no need to stress about values past char::MAX
191                self.cur_range = next_range.start.max(self.cur_range.end)
192                    ..next_range.end.max(self.cur_range.end);
193                self.cur_start_code = self.cur_range.start as u16;
194            }
195        }
196    }
197}
198
199impl<'a> Cmap12<'a> {
200    /// Maps a codepoint to a nominal glyph identifier.
201    pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
202        let codepoint = codepoint.into();
203        let groups = self.groups();
204        let mut lo = 0;
205        let mut hi = groups.len();
206        while lo < hi {
207            let i = (lo + hi) / 2;
208            let group = groups.get(i)?;
209            if codepoint < group.start_char_code() {
210                hi = i;
211            } else if codepoint > group.end_char_code() {
212                lo = i + 1;
213            } else {
214                return Some(self.lookup_glyph_id(
215                    codepoint,
216                    group.start_char_code(),
217                    group.start_glyph_id(),
218                ));
219            }
220        }
221        None
222    }
223
224    /// Returns an iterator over all (codepoint, glyph identifier) pairs
225    /// in the subtable.
226    ///
227    /// Malicious and malformed fonts can produce a large number of invalid
228    /// pairs. Use [`Self::iter_with_limits`] to generate a pruned sequence
229    /// that is limited to reasonable values.
230    pub fn iter(&self) -> Cmap12Iter<'a> {
231        Cmap12Iter::new(self.clone(), None)
232    }
233
234    /// Returns an iterator over all (codepoint, glyph identifier) pairs
235    /// in the subtable within the given limits.
236    pub fn iter_with_limits(&self, limits: Cmap12IterLimits) -> Cmap12Iter<'a> {
237        Cmap12Iter::new(self.clone(), Some(limits))
238    }
239
240    /// Does the final phase of glyph id lookup.
241    ///
242    /// Shared between Self::map and Cmap12Iter.
243    fn lookup_glyph_id(
244        &self,
245        codepoint: u32,
246        start_char_code: u32,
247        start_glyph_id: u32,
248    ) -> GlyphId {
249        GlyphId::new(start_glyph_id.wrapping_add(codepoint.wrapping_sub(start_char_code)))
250    }
251
252    /// Returns the codepoint range and start glyph id for the group
253    /// at the given index.
254    fn group(&self, index: usize, limits: &Option<Cmap12IterLimits>) -> Option<Cmap12Group> {
255        let group = self.groups().get(index)?;
256        let start_code = group.start_char_code();
257        // Change to exclusive range. This can never overflow since the source
258        // is a 32-bit value
259        let end_code = group.end_char_code() as u64 + 1;
260        let start_glyph_id = group.start_glyph_id();
261        let end_code = if let Some(limits) = limits {
262            // Set our end code to the minimum of our character and glyph
263            // count limit
264            (limits.glyph_count as u64)
265                .saturating_sub(start_glyph_id as u64)
266                .saturating_add(start_code as u64)
267                .min(end_code.min(limits.max_char as u64))
268        } else {
269            end_code
270        };
271        Some(Cmap12Group {
272            range: start_code as u64..end_code,
273            start_code,
274            start_glyph_id,
275        })
276    }
277}
278
279#[derive(Clone, Debug)]
280struct Cmap12Group {
281    range: Range<u64>,
282    start_code: u32,
283    start_glyph_id: u32,
284}
285
286/// Character and glyph limits for iterating format 12 subtables.
287#[derive(Copy, Clone, Debug)]
288pub struct Cmap12IterLimits {
289    /// The maximum valid character.
290    pub max_char: u32,
291    /// The number of glyphs in the font.
292    pub glyph_count: u32,
293}
294
295impl Cmap12IterLimits {
296    /// Returns the default limits for the given font.
297    ///
298    /// This will limit pairs to `char::MAX` and the number of glyphs contained
299    /// in the font. If the font is missing a `maxp` table, the number of
300    /// glyphs will be limited to `u16::MAX`.
301    pub fn default_for_font(font: &FontRef) -> Self {
302        let glyph_count = font
303            .maxp()
304            .map(|maxp| maxp.num_glyphs())
305            .unwrap_or(u16::MAX) as u32;
306        Self {
307            // Limit to the valid range of Unicode characters
308            // per https://github.com/googlefonts/fontations/issues/952#issuecomment-2161510184
309            max_char: char::MAX as u32,
310            glyph_count,
311        }
312    }
313}
314
315impl Default for Cmap12IterLimits {
316    fn default() -> Self {
317        Self {
318            max_char: char::MAX as u32,
319            // Revisit this when we actually support big glyph ids
320            glyph_count: u16::MAX as u32,
321        }
322    }
323}
324
325/// Iterator over all (codepoint, glyph identifier) pairs in
326/// the subtable.
327#[derive(Clone)]
328pub struct Cmap12Iter<'a> {
329    subtable: Cmap12<'a>,
330    cur_group: Option<Cmap12Group>,
331    cur_group_ix: usize,
332    limits: Option<Cmap12IterLimits>,
333}
334
335impl<'a> Cmap12Iter<'a> {
336    fn new(subtable: Cmap12<'a>, limits: Option<Cmap12IterLimits>) -> Self {
337        let cur_group = subtable.group(0, &limits);
338        Self {
339            subtable,
340            cur_group,
341            cur_group_ix: 0,
342            limits,
343        }
344    }
345}
346
347impl Iterator for Cmap12Iter<'_> {
348    type Item = (u32, GlyphId);
349
350    fn next(&mut self) -> Option<Self::Item> {
351        loop {
352            let group = self.cur_group.as_mut()?;
353            if let Some(codepoint) = group.range.next() {
354                let codepoint = codepoint as u32;
355                let glyph_id = self.subtable.lookup_glyph_id(
356                    codepoint,
357                    group.start_code,
358                    group.start_glyph_id,
359                );
360                // The table might explicitly map some codepoints to 0. Avoid
361                // returning those here.
362                if glyph_id == GlyphId::NOTDEF {
363                    continue;
364                }
365                return Some((codepoint, glyph_id));
366            } else {
367                self.cur_group_ix += 1;
368                let mut next_group = self.subtable.group(self.cur_group_ix, &self.limits)?;
369                // Groups should be in order and non-overlapping so make sure
370                // that the start code of next group is at least
371                // current_end.
372                if next_group.range.start < group.range.end {
373                    next_group.range = group.range.end..next_group.range.end;
374                }
375                self.cur_group = Some(next_group);
376            }
377        }
378    }
379}
380
381impl<'a> Cmap14<'a> {
382    /// Maps a codepoint and variation selector to a nominal glyph identifier.
383    pub fn map_variant(
384        &self,
385        codepoint: impl Into<u32>,
386        selector: impl Into<u32>,
387    ) -> Option<MapVariant> {
388        let codepoint = codepoint.into();
389        let selector = selector.into();
390        let selector_records = self.var_selector();
391        // Variation selector records are sorted in order of var_selector. Binary search to find
392        // the appropriate record.
393        let selector_record = selector_records
394            .binary_search_by(|rec| {
395                let rec_selector: u32 = rec.var_selector().into();
396                rec_selector.cmp(&selector)
397            })
398            .ok()
399            .and_then(|idx| selector_records.get(idx))?;
400        // If a default UVS table is present in this selector record, binary search on the ranges
401        // (start_unicode_value, start_unicode_value + additional_count) to find the requested codepoint.
402        // If found, ignore the selector and return a value indicating that the default cmap mapping
403        // should be used.
404        if let Some(Ok(default_uvs)) = selector_record.default_uvs(self.offset_data()) {
405            use core::cmp::Ordering;
406            let found_default_uvs = default_uvs
407                .ranges()
408                .binary_search_by(|range| {
409                    let start = range.start_unicode_value().into();
410                    if codepoint < start {
411                        Ordering::Greater
412                    } else if codepoint > (start + range.additional_count() as u32) {
413                        Ordering::Less
414                    } else {
415                        Ordering::Equal
416                    }
417                })
418                .is_ok();
419            if found_default_uvs {
420                return Some(MapVariant::UseDefault);
421            }
422        }
423        // Binary search the non-default UVS table if present. This maps codepoint+selector to a variant glyph.
424        let non_default_uvs = selector_record.non_default_uvs(self.offset_data())?.ok()?;
425        let mapping = non_default_uvs.uvs_mapping();
426        let ix = mapping
427            .binary_search_by(|map| {
428                let map_codepoint: u32 = map.unicode_value().into();
429                map_codepoint.cmp(&codepoint)
430            })
431            .ok()?;
432        Some(MapVariant::Variant(GlyphId::from(
433            mapping.get(ix)?.glyph_id(),
434        )))
435    }
436
437    /// Returns an iterator over all (codepoint, selector, mapping variant)
438    /// triples in the subtable.
439    pub fn iter(&self) -> Cmap14Iter<'a> {
440        Cmap14Iter::new(self.clone())
441    }
442
443    fn selector(
444        &self,
445        index: usize,
446    ) -> (
447        Option<VariationSelector>,
448        Option<DefaultUvs<'a>>,
449        Option<NonDefaultUvs<'a>>,
450    ) {
451        let selector = self.var_selector().get(index).cloned();
452        let default_uvs = selector.as_ref().and_then(|selector| {
453            selector
454                .default_uvs(self.offset_data())
455                .transpose()
456                .ok()
457                .flatten()
458        });
459        let non_default_uvs = selector.as_ref().and_then(|selector| {
460            selector
461                .non_default_uvs(self.offset_data())
462                .transpose()
463                .ok()
464                .flatten()
465        });
466        (selector, default_uvs, non_default_uvs)
467    }
468
469    #[cfg(feature = "std")]
470    pub fn closure_glyphs(&self, unicodes: &IntSet<u32>, glyph_set: &mut IntSet<GlyphId>) {
471        for selector in self.var_selector() {
472            if !unicodes.contains(selector.var_selector().to_u32()) {
473                continue;
474            }
475            if let Some(non_default_uvs) = selector
476                .non_default_uvs(self.offset_data())
477                .transpose()
478                .ok()
479                .flatten()
480            {
481                glyph_set.extend(
482                    non_default_uvs
483                        .uvs_mapping()
484                        .iter()
485                        .filter(|m| unicodes.contains(m.unicode_value().to_u32()))
486                        .map(|m| m.glyph_id().into()),
487                );
488            }
489        }
490    }
491}
492
493/// Iterator over all (codepoint, selector, mapping variant) triples
494/// in the subtable.
495#[derive(Clone)]
496pub struct Cmap14Iter<'a> {
497    subtable: Cmap14<'a>,
498    selector_record: Option<VariationSelector>,
499    default_uvs: Option<DefaultUvsIter<'a>>,
500    non_default_uvs: Option<NonDefaultUvsIter<'a>>,
501    cur_selector_ix: usize,
502}
503
504impl<'a> Cmap14Iter<'a> {
505    fn new(subtable: Cmap14<'a>) -> Self {
506        let (selector_record, default_uvs, non_default_uvs) = subtable.selector(0);
507        Self {
508            subtable,
509            selector_record,
510            default_uvs: default_uvs.map(DefaultUvsIter::new),
511            non_default_uvs: non_default_uvs.map(NonDefaultUvsIter::new),
512            cur_selector_ix: 0,
513        }
514    }
515}
516
517impl Iterator for Cmap14Iter<'_> {
518    type Item = (u32, u32, MapVariant);
519
520    fn next(&mut self) -> Option<Self::Item> {
521        loop {
522            let selector_record = self.selector_record.as_ref()?;
523            let selector: u32 = selector_record.var_selector().into();
524            if let Some(default_uvs) = self.default_uvs.as_mut() {
525                if let Some(codepoint) = default_uvs.next() {
526                    return Some((codepoint, selector, MapVariant::UseDefault));
527                }
528            }
529            if let Some(non_default_uvs) = self.non_default_uvs.as_mut() {
530                if let Some((codepoint, variant)) = non_default_uvs.next() {
531                    return Some((codepoint, selector, MapVariant::Variant(variant.into())));
532                }
533            }
534            self.cur_selector_ix += 1;
535            let (selector_record, default_uvs, non_default_uvs) =
536                self.subtable.selector(self.cur_selector_ix);
537            self.selector_record = selector_record;
538            self.default_uvs = default_uvs.map(DefaultUvsIter::new);
539            self.non_default_uvs = non_default_uvs.map(NonDefaultUvsIter::new);
540        }
541    }
542}
543
544#[derive(Clone)]
545struct DefaultUvsIter<'a> {
546    ranges: std::slice::Iter<'a, UnicodeRange>,
547    cur_range: Range<u32>,
548}
549
550impl<'a> DefaultUvsIter<'a> {
551    fn new(ranges: DefaultUvs<'a>) -> Self {
552        let mut ranges = ranges.ranges().iter();
553        let cur_range = if let Some(range) = ranges.next() {
554            let start: u32 = range.start_unicode_value().into();
555            let end = start + range.additional_count() as u32 + 1;
556            start..end
557        } else {
558            0..0
559        };
560        Self { ranges, cur_range }
561    }
562}
563
564impl Iterator for DefaultUvsIter<'_> {
565    type Item = u32;
566
567    fn next(&mut self) -> Option<Self::Item> {
568        loop {
569            if let Some(codepoint) = self.cur_range.next() {
570                return Some(codepoint);
571            }
572            let range = self.ranges.next()?;
573            let start: u32 = range.start_unicode_value().into();
574            let end = start + range.additional_count() as u32 + 1;
575            self.cur_range = start..end;
576        }
577    }
578}
579
580#[derive(Clone)]
581struct NonDefaultUvsIter<'a> {
582    iter: std::slice::Iter<'a, UvsMapping>,
583}
584
585impl<'a> NonDefaultUvsIter<'a> {
586    fn new(uvs: NonDefaultUvs<'a>) -> Self {
587        Self {
588            iter: uvs.uvs_mapping().iter(),
589        }
590    }
591}
592
593impl Iterator for NonDefaultUvsIter<'_> {
594    type Item = (u32, GlyphId16);
595
596    fn next(&mut self) -> Option<Self::Item> {
597        let mapping = self.iter.next()?;
598        let codepoint: u32 = mapping.unicode_value().into();
599        let glyph_id = GlyphId16::new(mapping.glyph_id());
600        Some((codepoint, glyph_id))
601    }
602}
603
604#[cfg(test)]
605mod tests {
606    use font_test_data::{be_buffer, bebuffer::BeBuffer};
607
608    use super::*;
609    use crate::{FontRef, GlyphId, TableProvider};
610
611    #[test]
612    fn map_codepoints() {
613        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
614        let cmap = font.cmap().unwrap();
615        assert_eq!(cmap.map_codepoint('A'), Some(GlyphId::new(1)));
616        assert_eq!(cmap.map_codepoint('À'), Some(GlyphId::new(2)));
617        assert_eq!(cmap.map_codepoint('`'), Some(GlyphId::new(3)));
618        assert_eq!(cmap.map_codepoint('B'), None);
619
620        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
621        let cmap = font.cmap().unwrap();
622        assert_eq!(cmap.map_codepoint(' '), Some(GlyphId::new(1)));
623        assert_eq!(cmap.map_codepoint(0xE_u32), Some(GlyphId::new(2)));
624        assert_eq!(cmap.map_codepoint('B'), None);
625    }
626
627    #[test]
628    fn map_variants() {
629        use super::MapVariant::*;
630        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
631        let cmap = font.cmap().unwrap();
632        let cmap14 = find_cmap14(&cmap).unwrap();
633        let selector = '\u{e0100}';
634        assert_eq!(cmap14.map_variant('a', selector), None);
635        assert_eq!(cmap14.map_variant('\u{4e00}', selector), Some(UseDefault));
636        assert_eq!(cmap14.map_variant('\u{4e06}', selector), Some(UseDefault));
637        assert_eq!(
638            cmap14.map_variant('\u{4e08}', selector),
639            Some(Variant(GlyphId::new(25)))
640        );
641        assert_eq!(
642            cmap14.map_variant('\u{4e09}', selector),
643            Some(Variant(GlyphId::new(26)))
644        );
645    }
646
647    #[test]
648    #[cfg(feature = "std")]
649    fn cmap14_closure_glyphs() {
650        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
651        let cmap = font.cmap().unwrap();
652        let mut unicodes = IntSet::empty();
653        unicodes.insert(0x4e08_u32);
654        unicodes.insert(0xe0100_u32);
655
656        let mut glyph_set = IntSet::empty();
657        glyph_set.insert(GlyphId::new(18));
658        cmap.closure_glyphs(&unicodes, &mut glyph_set);
659
660        assert_eq!(glyph_set.len(), 2);
661        assert!(glyph_set.contains(GlyphId::new(18)));
662        assert!(glyph_set.contains(GlyphId::new(25)));
663    }
664
665    #[test]
666    fn cmap4_iter() {
667        let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
668        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
669        let mut count = 0;
670        for (codepoint, glyph_id) in cmap4.iter() {
671            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
672            count += 1;
673        }
674        assert_eq!(count, 3);
675        let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
676        let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
677        let mut count = 0;
678        for (codepoint, glyph_id) in cmap4.iter() {
679            assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
680            count += 1;
681        }
682        assert_eq!(count, 2);
683    }
684
685    // Make sure we don't bail early when iterating ranges with holes.
686    // Encountered with Gentium Basic and Gentium Basic Book.
687    // See <https://github.com/googlefonts/fontations/issues/897>
688    #[test]
689    fn cmap4_iter_sparse_range() {
690        #[rustfmt::skip]
691        let cmap4_data: &[u16] = &[
692            // format, length, lang
693            4, 0, 0,
694            // segCountX2
695            4, 
696            // bin search data
697            0, 0, 0,
698            // end code
699            262, 0xFFFF, 
700            // reserved pad
701            0,
702            // start code
703            259, 0xFFFF,
704            // id delta
705            0, 1, 
706            // id range offset
707            4, 0,
708            // glyph ids
709            236, 0, 0, 326,
710        ];
711        let mut buf = BeBuffer::new();
712        for &word in cmap4_data {
713            buf = buf.push(word);
714        }
715        let cmap4 = Cmap4::read(FontData::new(&buf)).unwrap();
716        let mappings = cmap4
717            .iter()
718            .map(|(ch, gid)| (ch, gid.to_u32()))
719            .collect::<Vec<_>>();
720        assert_eq!(mappings, &[(259, 236), (262, 326)]);
721    }
722
723    #[test]
724    fn cmap12_iter() {
725        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
726        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
727        let mut count = 0;
728        for (codepoint, glyph_id) in cmap12.iter() {
729            assert_eq!(cmap12.map_codepoint(codepoint), Some(glyph_id));
730            count += 1;
731        }
732        assert_eq!(count, 10);
733    }
734
735    // reconstructed cmap from <https://oss-fuzz.com/testcase-detail/5141969742397440>
736    fn cmap12_overflow_data() -> BeBuffer {
737        be_buffer! {
738            12u16,      // format
739            0u16,       // reserved, set to 0
740            0u32,       // length, ignored
741            0u32,       // language, ignored
742            2u32,       // numGroups
743            // groups: [startCode, endCode, startGlyphID]
744            [0u32, 16777215, 0], // group 0
745            [255u32, 0xFFFFFFFF, 0] // group 1
746        }
747    }
748
749    // oss-fuzz: detected integer addition overflow in Cmap12::group()
750    // ref: https://oss-fuzz.com/testcase-detail/5141969742397440
751    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69547
752    #[test]
753    fn cmap12_iter_avoid_overflow() {
754        let data = cmap12_overflow_data();
755        let cmap12 = Cmap12::read(data.data().into()).unwrap();
756        let _ = cmap12.iter().count();
757    }
758
759    // oss-fuzz: timeout in Cmap12Iter
760    // ref: https://oss-fuzz.com/testcase-detail/4628971063934976
761    // and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69540
762    #[test]
763    fn cmap12_iter_avoid_timeout() {
764        // ranges: [SequentialMapGroup { start_char_code: 170, end_char_code: 1330926671, start_glyph_id: 328960 }]
765        let cmap12_data = be_buffer! {
766            12u16,      // format
767            0u16,       // reserved, set to 0
768            0u32,       // length, ignored
769            0u32,       // language, ignored
770            1u32,       // numGroups
771            // groups: [startCode, endCode, startGlyphID]
772            [170u32, 1330926671, 328960] // group 0
773        };
774        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
775        assert!(
776            cmap12.iter_with_limits(Cmap12IterLimits::default()).count() <= char::MAX as usize + 1
777        );
778    }
779
780    // oss-fuzz: timeout in outlines, caused by cmap 12 iter
781    // ref: <https://issues.oss-fuzz.com/issues/394638728>
782    #[test]
783    fn cmap12_iter_avoid_timeout2() {
784        let cmap12_data = be_buffer! {
785            12u16,      // format
786            0u16,       // reserved, set to 0
787            0u32,       // length, ignored
788            0u32,       // language, ignored
789            3u32,       // numGroups
790            // groups: [startCode, endCode, startGlyphID]
791            [199u32, 16777271, 2],
792            [262u32, 262, 3],
793            [268u32, 268, 4]
794        };
795        let cmap12 = Cmap12::read(cmap12_data.data().into()).unwrap();
796        // In the test case, maxp.numGlyphs = 8
797        const MAX_GLYPHS: u32 = 8;
798        let limits = Cmap12IterLimits {
799            glyph_count: MAX_GLYPHS,
800            ..Default::default()
801        };
802        assert_eq!(cmap12.iter_with_limits(limits).count(), MAX_GLYPHS as usize);
803    }
804
805    #[test]
806    fn cmap12_iter_glyph_limit() {
807        let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
808        let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
809        let mut limits = Cmap12IterLimits::default_for_font(&font);
810        // Ensure we obey the glyph count limit.
811        // This font has 11 glyphs
812        for glyph_count in 0..=11 {
813            limits.glyph_count = glyph_count;
814            assert_eq!(
815                cmap12.iter_with_limits(limits).count(),
816                // We always return one less than glyph count limit because
817                // notdef is not mapped
818                (glyph_count as usize).saturating_sub(1)
819            );
820        }
821    }
822
823    #[test]
824    fn cmap12_iter_range_clamping() {
825        let data = cmap12_overflow_data();
826        let cmap12 = Cmap12::read(data.data().into()).unwrap();
827        let ranges = cmap12
828            .groups()
829            .iter()
830            .map(|group| (group.start_char_code(), group.end_char_code()))
831            .collect::<Vec<_>>();
832        // These groups overlap and extend to the whole u32 range
833        assert_eq!(ranges, &[(0, 16777215), (255, u32::MAX)]);
834        // But we produce at most char::MAX + 1 results
835        let limits = Cmap12IterLimits {
836            glyph_count: u32::MAX,
837            ..Default::default()
838        };
839        assert!(cmap12.iter_with_limits(limits).count() <= char::MAX as usize + 1);
840    }
841
842    #[test]
843    fn cmap14_iter() {
844        let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
845        let cmap14 = find_cmap14(&font.cmap().unwrap()).unwrap();
846        let mut count = 0;
847        for (codepoint, selector, mapping) in cmap14.iter() {
848            assert_eq!(cmap14.map_variant(codepoint, selector), Some(mapping));
849            count += 1;
850        }
851        assert_eq!(count, 7);
852    }
853
854    fn find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>> {
855        cmap.encoding_records()
856            .iter()
857            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
858            .find_map(|subtable| match subtable {
859                CmapSubtable::Format4(cmap4) => Some(cmap4),
860                _ => None,
861            })
862    }
863
864    fn find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>> {
865        cmap.encoding_records()
866            .iter()
867            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
868            .find_map(|subtable| match subtable {
869                CmapSubtable::Format12(cmap12) => Some(cmap12),
870                _ => None,
871            })
872    }
873
874    fn find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>> {
875        cmap.encoding_records()
876            .iter()
877            .filter_map(|record| record.subtable(cmap.offset_data()).ok())
878            .find_map(|subtable| match subtable {
879                CmapSubtable::Format14(cmap14) => Some(cmap14),
880                _ => None,
881            })
882    }
883
884    /// <https://github.com/googlefonts/fontations/issues/1100>
885    ///
886    /// Note that this doesn't demonstrate the timeout, merely that we've eliminated the underlying
887    /// enthusiasm for non-ascending ranges that enabled it
888    #[test]
889    fn cmap4_bad_data() {
890        let buf = font_test_data::cmap::repetitive_cmap4();
891        let cmap4 = Cmap4::read(FontData::new(buf.as_slice())).unwrap();
892
893        // we should have unique, ascending codepoints, not duplicates and overlaps
894        assert_eq!(
895            (6..=64).collect::<Vec<_>>(),
896            cmap4.iter().map(|(cp, _)| cp).collect::<Vec<_>>()
897        );
898    }
899}