read_fonts/tables/postscript/
charset.rs

1//! CFF charset support.
2
3use super::{
4    CharsetFormat0, CharsetFormat1, CharsetFormat2, CharsetRange1, CharsetRange2, CustomCharset,
5    FontData, FontRead, GlyphId, ReadError, StringId,
6};
7
8/// Character set for mapping from glyph to string identifiers.
9///
10/// See <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=21>
11#[derive(Clone)]
12pub struct Charset<'a> {
13    kind: CharsetKind<'a>,
14    num_glyphs: u32,
15}
16
17impl<'a> Charset<'a> {
18    pub fn new(
19        cff_data: FontData<'a>,
20        charset_offset: usize,
21        num_glyphs: u32,
22    ) -> Result<Self, ReadError> {
23        let kind = match charset_offset {
24            0 => CharsetKind::IsoAdobe,
25            1 => CharsetKind::Expert,
26            2 => CharsetKind::ExpertSubset,
27            _ => {
28                let data = cff_data
29                    .split_off(charset_offset)
30                    .ok_or(ReadError::OutOfBounds)?;
31                CharsetKind::Custom(CustomCharset::read(data)?)
32            }
33        };
34        Ok(Self { kind, num_glyphs })
35    }
36
37    pub fn kind(&self) -> &CharsetKind<'a> {
38        &self.kind
39    }
40
41    pub fn num_glyphs(&self) -> u32 {
42        self.num_glyphs
43    }
44
45    /// Returns the string identifier for the given glyph identifier.
46    pub fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
47        let gid = glyph_id.to_u32();
48        if gid >= self.num_glyphs {
49            return Err(ReadError::OutOfBounds);
50        }
51        match &self.kind {
52            CharsetKind::IsoAdobe => {
53                // The ISOAdobe charset is an identity mapping of gid->sid up
54                // to 228 entries
55                // <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=45>
56                if gid <= 228 {
57                    Ok(StringId::new(gid as u16))
58                } else {
59                    Err(ReadError::OutOfBounds)
60                }
61            }
62            CharsetKind::Expert => EXPERT_CHARSET
63                .get(gid as usize)
64                .copied()
65                .ok_or(ReadError::OutOfBounds)
66                .map(StringId::new),
67            CharsetKind::ExpertSubset => EXPERT_SUBSET_CHARSET
68                .get(gid as usize)
69                .copied()
70                .ok_or(ReadError::OutOfBounds)
71                .map(StringId::new),
72            CharsetKind::Custom(custom) => match custom {
73                CustomCharset::Format0(fmt) => fmt.string_id(glyph_id),
74                CustomCharset::Format1(fmt) => fmt.string_id(glyph_id),
75                CustomCharset::Format2(fmt) => fmt.string_id(glyph_id),
76            },
77        }
78    }
79
80    /// Returns an iterator over all of the glyph and string identifier
81    /// mappings.
82    pub fn iter(&self) -> CharsetIter<'a> {
83        match &self.kind {
84            CharsetKind::IsoAdobe
85            | CharsetKind::Expert
86            | CharsetKind::ExpertSubset
87            | CharsetKind::Custom(CustomCharset::Format0(_)) => {
88                CharsetIter(Iter::Simple(self.clone(), 0))
89            }
90            CharsetKind::Custom(CustomCharset::Format1(custom)) => CharsetIter(Iter::Custom1(
91                RangeIter::new(custom.ranges(), self.num_glyphs),
92            )),
93            CharsetKind::Custom(CustomCharset::Format2(custom)) => CharsetIter(Iter::Custom2(
94                RangeIter::new(custom.ranges(), self.num_glyphs),
95            )),
96        }
97    }
98}
99
100/// Predefined and custom character sets.
101#[derive(Clone)]
102pub enum CharsetKind<'a> {
103    IsoAdobe,
104    Expert,
105    ExpertSubset,
106    Custom(CustomCharset<'a>),
107}
108
109impl CharsetFormat0<'_> {
110    fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
111        let gid = glyph_id.to_u32() as usize;
112        if gid == 0 {
113            Ok(StringId::new(0))
114        } else {
115            self.glyph()
116                .get(gid - 1)
117                .map(|id| StringId::new(id.get()))
118                .ok_or(ReadError::OutOfBounds)
119        }
120    }
121}
122
123impl CharsetFormat1<'_> {
124    fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
125        string_id_from_ranges(self.ranges(), glyph_id)
126    }
127}
128
129impl CharsetFormat2<'_> {
130    fn string_id(&self, glyph_id: GlyphId) -> Result<StringId, ReadError> {
131        string_id_from_ranges(self.ranges(), glyph_id)
132    }
133}
134
135fn string_id_from_ranges<T: CharsetRange>(
136    ranges: &[T],
137    glyph_id: GlyphId,
138) -> Result<StringId, ReadError> {
139    let mut gid = glyph_id.to_u32();
140    // The notdef glyph isn't explicitly mapped so we need to special case
141    // it and add -1 and +1 at a few places when processing ranges
142    if gid == 0 {
143        return Ok(StringId::new(0));
144    }
145    gid -= 1;
146    let mut end = 0u32;
147    // Each range provides the string ids for `n_left + 1` glyphs with
148    // the sequence of string ids starting at `first`. Since the counts
149    // are cumulative, we must scan them all in order until we find
150    // the range that contains our requested glyph.
151    for range in ranges {
152        let next_end = end
153            .checked_add(range.n_left() + 1)
154            .ok_or(ReadError::OutOfBounds)?;
155        if gid < next_end {
156            return (gid - end)
157                .checked_add(range.first())
158                .and_then(|sid| sid.try_into().ok())
159                .ok_or(ReadError::OutOfBounds)
160                .map(StringId::new);
161        }
162        end = next_end;
163    }
164    Err(ReadError::OutOfBounds)
165}
166
167/// Trait that unifies ranges for formats 1 and 2 so that we can implement
168/// the tricky search logic once.
169trait CharsetRange {
170    fn first(&self) -> u32;
171    fn n_left(&self) -> u32;
172}
173
174impl CharsetRange for CharsetRange1 {
175    fn first(&self) -> u32 {
176        self.first.get() as u32
177    }
178
179    fn n_left(&self) -> u32 {
180        self.n_left as u32
181    }
182}
183
184impl CharsetRange for CharsetRange2 {
185    fn first(&self) -> u32 {
186        self.first.get() as u32
187    }
188
189    fn n_left(&self) -> u32 {
190        self.n_left.get() as u32
191    }
192}
193
194/// Iterator over the glyph and string identifier mappings in a character set.
195#[derive(Clone)]
196pub struct CharsetIter<'a>(Iter<'a>);
197
198impl Iterator for CharsetIter<'_> {
199    type Item = (GlyphId, StringId);
200
201    fn next(&mut self) -> Option<Self::Item> {
202        match &mut self.0 {
203            Iter::Simple(charset, cur) => {
204                let gid = GlyphId::new(*cur);
205                let sid = charset.string_id(gid).ok()?;
206                *cur = cur.checked_add(1)?;
207                Some((gid, sid))
208            }
209            Iter::Custom1(custom) => custom.next(),
210            Iter::Custom2(custom) => custom.next(),
211        }
212    }
213}
214
215#[derive(Clone)]
216enum Iter<'a> {
217    /// Predefined sets and custom format 0 are just array lookups so we use
218    /// the builtin mapping function.
219    Simple(Charset<'a>, u32),
220    Custom1(RangeIter<'a, CharsetRange1>),
221    Custom2(RangeIter<'a, CharsetRange2>),
222}
223
224/// Custom iterator for range based formats.
225///
226/// Each individual lookup requires a linear scan through the ranges so this
227/// provides a more efficient code path for iteration.
228#[derive(Clone)]
229struct RangeIter<'a, T> {
230    ranges: std::slice::Iter<'a, T>,
231    num_glyphs: u32,
232    gid: u32,
233    first: u32,
234    end: u32,
235    prev_end: u32,
236}
237
238impl<'a, T> RangeIter<'a, T>
239where
240    T: CharsetRange,
241{
242    fn new(ranges: &'a [T], num_glyphs: u32) -> Self {
243        let mut ranges = ranges.iter();
244        let (first, end) = next_range(&mut ranges).unwrap_or_default();
245        Self {
246            ranges,
247            num_glyphs,
248            gid: 0,
249            first,
250            end,
251            prev_end: 0,
252        }
253    }
254
255    fn next(&mut self) -> Option<(GlyphId, StringId)> {
256        if self.gid >= self.num_glyphs {
257            return None;
258        }
259        // The notdef glyph isn't explicitly mapped so we need to special case
260        // it and add -1 and +1 at a few places when processing ranges
261        if self.gid == 0 {
262            self.gid += 1;
263            return Some((GlyphId::new(0), StringId::new(0)));
264        }
265        let gid = self.gid - 1;
266        self.gid = self.gid.checked_add(1)?;
267        while gid >= self.end {
268            let (first, end) = next_range(&mut self.ranges)?;
269            self.prev_end = self.end;
270            self.first = first;
271            self.end = self.prev_end.checked_add(end)?;
272        }
273        let sid = self
274            .first
275            .checked_add(gid.checked_sub(self.prev_end)?)?
276            .try_into()
277            .ok()?;
278        Some((GlyphId::new(gid + 1), StringId::new(sid)))
279    }
280}
281
282fn next_range<T: CharsetRange>(ranges: &mut std::slice::Iter<T>) -> Option<(u32, u32)> {
283    ranges
284        .next()
285        .map(|range| (range.first(), range.n_left() + 1))
286}
287
288/// See "Expert" charset at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=47>
289#[rustfmt::skip]
290const EXPERT_CHARSET: &[u16] = &[
291      0,    1,  229,  230,  231,  232,  233,  234,  235,  236,  237,  238,   13,   14,   15,   99,
292    239,  240,  241,  242,  243,  244,  245,  246,  247,  248,   27,   28,  249,  250,  251,  252,
293    253,  254,  255,  256,  257,  258,  259,  260,  261,  262,  263,  264,  265,  266,  109,  110,
294    267,  268,  269,  270,  271,  272,  273,  274,  275,  276,  277,  278,  279,  280,  281,  282,
295    283,  284,  285,  286,  287,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,  298,
296    299,  300,  301,  302,  303,  304,  305,  306,  307,  308,  309,  310,  311,  312,  313,  314,
297    315,  316,  317,  318,  158,  155,  163,  319,  320,  321,  322,  323,  324,  325,  326,  150,
298    164,  169,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,  340,
299    341,  342,  343,  344,  345,  346,  347,  348,  349,  350,  351,  352,  353,  354,  355,  356,
300    357,  358,  359,  360,  361,  362,  363,  364,  365,  366,  367,  368,  369,  370,  371,  372,
301    373,  374,  375,  376,  377,  378,
302];
303
304/// See "Expert Subset" charset at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf#page=49>
305#[rustfmt::skip]
306const EXPERT_SUBSET_CHARSET: &[u16] = &[
307      0,    1,  231,  232,  235,  236,  237,  238,   13,   14,   15,   99,  239,  240,  241,  242,
308    243,  244,  245,  246,  247,  248,   27,   28,  249,  250,  251,  253,  254,  255,  256,  257,
309    258,  259,  260,  261,  262,  263,  264,  265,  266,  109,  110,  267,  268,  269,  270,  272,
310    300,  301,  302,  305,  314,  315,  158,  155,  163,  320,  321,  322,  323,  324,  325,  326,
311    150,  164,  169,  327,  328,  329,  330,  331,  332,  333,  334,  335,  336,  337,  338,  339,
312    340,  341,  342,  343,  344,  345,  346
313];
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use font_test_data::bebuffer::BeBuffer;
319
320    #[test]
321    fn iso_adobe_charset() {
322        // Offset of 0 signifies the ISOAdobe charset
323        let charset_offset = 0;
324        let num_glyphs = 64;
325        // This is an identity mapping
326        let expected = |gid: GlyphId| Some(gid.to_u32());
327        test_simple_mapping(charset_offset, num_glyphs, expected);
328    }
329
330    #[test]
331    fn expert_charset() {
332        // Offset 1 signifies the expert charset
333        let charset_offset = 1;
334        let num_glyphs = 64;
335        // This is an array based mapping
336        let expected = |gid: GlyphId| {
337            EXPERT_CHARSET
338                .get(gid.to_u32() as usize)
339                .map(|id| *id as u32)
340        };
341        test_simple_mapping(charset_offset, num_glyphs, expected);
342    }
343
344    #[test]
345    fn expert_subset_charset() {
346        // Offset 2 signifies the expert subset charset
347        let charset_offset = 2;
348        let num_glyphs = 64;
349        // This is an array based mapping
350        let expected = |gid: GlyphId| {
351            EXPERT_SUBSET_CHARSET
352                .get(gid.to_u32() as usize)
353                .map(|id| *id as u32)
354        };
355        test_simple_mapping(charset_offset, num_glyphs, expected);
356    }
357
358    // Common test setup for identity or array based charset mappings
359    fn test_simple_mapping(
360        charset_offset: usize,
361        num_glyphs: u32,
362        expected: impl Fn(GlyphId) -> Option<u32>,
363    ) {
364        let charset = Charset::new(FontData::new(&[]), charset_offset, num_glyphs).unwrap();
365        for gid in 0..num_glyphs {
366            let gid = GlyphId::new(gid);
367            assert_eq!(
368                charset.string_id(gid).unwrap().to_u16() as u32,
369                expected(gid).unwrap()
370            )
371        }
372        // Don't map glyphs beyond num_glyphs
373        for gid in num_glyphs..u16::MAX as u32 {
374            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
375        }
376    }
377
378    #[test]
379    fn custom_mapping_format0() {
380        let mut buf = BeBuffer::new();
381        let num_glyphs = 6;
382        // Add some padding so we can generate an offset greater than 2
383        buf = buf.extend([0u8; 4]);
384        // format 0
385        buf = buf.push(0u8);
386        // glyph array: each sid is gid * 2
387        buf = buf.extend([2u16, 4, 6, 8, 10]);
388        let charset = Charset::new(FontData::new(buf.data()), 4, num_glyphs).unwrap();
389        // Test lookup code path
390        for gid in 0..num_glyphs {
391            assert_eq!(
392                charset.string_id(GlyphId::new(gid)).unwrap().to_u16() as u32,
393                gid * 2
394            )
395        }
396        // Test iterator code path
397        for (gid, sid) in charset.iter() {
398            assert_eq!(sid.to_u16() as u32, gid.to_u32() * 2);
399        }
400        assert_eq!(charset.iter().count() as u32, num_glyphs);
401        // Test out of bounds glyphs
402        for gid in num_glyphs..u16::MAX as u32 {
403            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
404        }
405    }
406
407    #[test]
408    fn custom_mapping_format1() {
409        let mut buf = BeBuffer::new();
410        let num_glyphs = 7;
411        // Add some padding so we can generate an offset greater than 2
412        buf = buf.extend([0u8; 4]);
413        // format 1
414        buf = buf.push(1u8);
415        // Three disjoint range mappings
416        buf = buf.push(8u16).push(2u8);
417        buf = buf.push(1200u16).push(0u8);
418        buf = buf.push(20u16).push(1u8);
419        let expected_sids = [0, 8, 9, 10, 1200, 20, 21];
420        test_range_mapping(buf.data(), num_glyphs, &expected_sids);
421    }
422
423    #[test]
424    fn custom_mapping_format2() {
425        let mut buf = BeBuffer::new();
426        // Add some padding so we can generate an offset greater than 2
427        buf = buf.extend([0u8; 4]);
428        // format 2
429        buf = buf.push(2u8);
430        // Three disjoint range mappings
431        buf = buf.push(8u16).push(2u16);
432        buf = buf.push(1200u16).push(0u16);
433        buf = buf.push(20u16).push(800u16);
434        let mut expected_sids = vec![0, 8, 9, 10, 1200];
435        for i in 0..=800 {
436            expected_sids.push(i + 20);
437        }
438        let num_glyphs = expected_sids.len() as u32;
439        test_range_mapping(buf.data(), num_glyphs, &expected_sids);
440    }
441
442    // Common code for testing range based mappings
443    fn test_range_mapping(data: &[u8], num_glyphs: u32, expected_sids: &[u32]) {
444        let charset = Charset::new(FontData::new(data), 4, num_glyphs).unwrap();
445        // Test lookup code path
446        for (gid, sid) in expected_sids.iter().enumerate() {
447            assert_eq!(
448                charset.string_id(GlyphId::new(gid as _)).unwrap().to_u16() as u32,
449                *sid
450            )
451        }
452        // Test iterator code path
453        assert!(charset.iter().eq(expected_sids
454            .iter()
455            .enumerate()
456            .map(|(gid, sid)| (GlyphId::new(gid as u32), StringId::new(*sid as u16)))));
457        assert_eq!(charset.iter().count() as u32, num_glyphs);
458        // Test out of bounds glyphs
459        for gid in num_glyphs..u16::MAX as u32 {
460            assert_eq!(charset.string_id(GlyphId::new(gid)).ok(), None);
461        }
462    }
463}