ttf_parser/tables/cmap/
format2.rs

1// This table has a pretty complex parsing algorithm.
2// A detailed explanation can be found here:
3// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table
4// https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
5// https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360
6
7use core::convert::TryFrom;
8
9use crate::parser::{FromData, LazyArray16, Stream};
10use crate::GlyphId;
11
12#[derive(Clone, Copy)]
13struct SubHeaderRecord {
14    first_code: u16,
15    entry_count: u16,
16    id_delta: i16,
17    id_range_offset: u16,
18}
19
20impl FromData for SubHeaderRecord {
21    const SIZE: usize = 8;
22
23    #[inline]
24    fn parse(data: &[u8]) -> Option<Self> {
25        let mut s = Stream::new(data);
26        Some(SubHeaderRecord {
27            first_code: s.read::<u16>()?,
28            entry_count: s.read::<u16>()?,
29            id_delta: s.read::<i16>()?,
30            id_range_offset: s.read::<u16>()?,
31        })
32    }
33}
34
35/// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table)
36/// subtable.
37#[derive(Clone, Copy)]
38pub struct Subtable2<'a> {
39    sub_header_keys: LazyArray16<'a, u16>,
40    sub_headers_offset: usize,
41    sub_headers: LazyArray16<'a, SubHeaderRecord>,
42    // The whole subtable data.
43    data: &'a [u8],
44}
45
46impl<'a> Subtable2<'a> {
47    /// Parses a subtable from raw data.
48    pub fn parse(data: &'a [u8]) -> Option<Self> {
49        let mut s = Stream::new(data);
50        s.skip::<u16>(); // format
51        s.skip::<u16>(); // length
52        s.skip::<u16>(); // language
53        let sub_header_keys = s.read_array16::<u16>(256)?;
54        // The maximum index in a sub_header_keys is a sub_headers count.
55        let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1;
56
57        // Remember sub_headers offset before reading. Will be used later.
58        let sub_headers_offset = s.offset();
59        let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;
60
61        Some(Self {
62            sub_header_keys,
63            sub_headers_offset,
64            sub_headers,
65            data,
66        })
67    }
68
69    /// Returns a glyph index for a code point.
70    ///
71    /// Returns `None` when `code_point` is larger than `u16`.
72    pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> {
73        // This subtable supports code points only in a u16 range.
74        let code_point = u16::try_from(code_point).ok()?;
75        let high_byte = code_point >> 8;
76        let low_byte = code_point & 0x00FF;
77
78        let i = if code_point < 0xff {
79            // 'SubHeader 0 is special: it is used for single-byte character codes.'
80            0
81        } else {
82            // 'Array that maps high bytes to subHeaders: value is subHeader index × 8.'
83            self.sub_header_keys.get(high_byte)? / 8
84        };
85
86        let sub_header = self.sub_headers.get(i)?;
87
88        let first_code = sub_header.first_code;
89        let range_end = first_code.checked_add(sub_header.entry_count)?;
90        if low_byte < first_code || low_byte >= range_end {
91            return None;
92        }
93
94        // SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code
95        // in the glyphIndexArray. So we have to advance to our code point.
96        let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE;
97
98        // 'The value of the idRangeOffset is the number of bytes
99        // past the actual location of the idRangeOffset'.
100        let offset = self.sub_headers_offset
101                // Advance to required subheader.
102                + SubHeaderRecord::SIZE * usize::from(i + 1)
103                // Move back to idRangeOffset start.
104                - u16::SIZE
105                // Use defined offset.
106                + usize::from(sub_header.id_range_offset)
107                // Advance to required index in the glyphIndexArray.
108                + index_offset;
109
110        let glyph: u16 = Stream::read_at(self.data, offset)?;
111        if glyph == 0 {
112            return None;
113        }
114
115        u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536)
116            .ok()
117            .map(GlyphId)
118    }
119
120    /// Calls `f` for each codepoint defined in this table.
121    pub fn codepoints(&self, f: impl FnMut(u32)) {
122        let _ = self.codepoints_inner(f);
123    }
124
125    #[inline]
126    fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> {
127        for first_byte in 0u16..256 {
128            let i = self.sub_header_keys.get(first_byte)? / 8;
129            let sub_header = self.sub_headers.get(i)?;
130            let first_code = sub_header.first_code;
131
132            if i == 0 {
133                // This is a single byte code.
134                let range_end = first_code.checked_add(sub_header.entry_count)?;
135                if first_byte >= first_code && first_byte < range_end {
136                    f(u32::from(first_byte));
137                }
138            } else {
139                // This is a two byte code.
140                let base = first_code.checked_add(first_byte << 8)?;
141                for k in 0..sub_header.entry_count {
142                    let code_point = base.checked_add(k)?;
143                    f(u32::from(code_point));
144                }
145            }
146        }
147
148        Some(())
149    }
150}
151
152impl core::fmt::Debug for Subtable2<'_> {
153    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
154        write!(f, "Subtable2 {{ ... }}")
155    }
156}
ttf_parser/tables/cmap/format2.rs

ttf_parser/tables/cmap/
format2.rs