read_fonts/tables/
name.rs

1//! The [name (Naming)](https://docs.microsoft.com/en-us/typography/opentype/spec/name) table
2
3include!("../../generated/generated_name.rs");
4
5pub use types::NameId;
6
7impl<'a> Name<'a> {
8    /// The FontData containing the encoded name strings.
9    pub fn string_data(&self) -> FontData<'a> {
10        let base = self.offset_data();
11        let off = self.storage_offset();
12        base.split_off(off as usize).unwrap_or_default()
13    }
14}
15
16impl NameRecord {
17    /// Return a type that can decode the string data for this name entry.
18    pub fn string<'a>(&self, data: FontData<'a>) -> Result<NameString<'a>, ReadError> {
19        let start = self.string_offset().non_null().unwrap_or(0);
20        let end = start + self.length() as usize;
21
22        let data = data
23            .as_bytes()
24            .get(start..end)
25            .ok_or(ReadError::OutOfBounds)?;
26
27        let encoding = Encoding::new(self.platform_id(), self.encoding_id());
28        Ok(NameString { data, encoding })
29    }
30
31    // reference from fonttools:
32    // https://github.com/fonttools/fonttools/blob/c2119229cfb02cdb7c5a63374ef29d3d514259e8/Lib/fontTools/ttLib/tables/_n_a_m_e.py#L509
33    pub fn is_unicode(&self) -> bool {
34        self.platform_id() == 0
35            || (self.platform_id() == 3 && [0, 1, 10].contains(&self.encoding_id()))
36    }
37}
38
39impl LangTagRecord {
40    /// Return a type that can decode the string data for this name entry.
41    pub fn lang_tag<'a>(&self, data: FontData<'a>) -> Result<NameString<'a>, ReadError> {
42        let start = self.lang_tag_offset().non_null().unwrap_or(0);
43        let end = start + self.length() as usize;
44
45        let data = data
46            .as_bytes()
47            .get(start..end)
48            .ok_or(ReadError::OutOfBounds)?;
49
50        let encoding = Encoding::Utf16Be;
51        Ok(NameString { data, encoding })
52    }
53}
54
55//-- all this is from pinot https://github.com/dfrg/pinot/blob/eff5239018ca50290fb890a84da3dd51505da364/src/name.rs
56/// Entry for a name in the naming table.
57///
58/// This provides an iterator over characters.
59#[derive(Copy, Clone, PartialEq, Eq)]
60pub struct NameString<'a> {
61    data: &'a [u8],
62    encoding: Encoding,
63}
64
65impl<'a> NameString<'a> {
66    /// An iterator over the `char`s in this name.
67    pub fn chars(&self) -> CharIter<'a> {
68        CharIter {
69            data: self.data,
70            encoding: self.encoding,
71            pos: 0,
72        }
73    }
74}
75
76#[cfg(feature = "experimental_traverse")]
77impl<'a> traversal::SomeString<'a> for NameString<'a> {
78    fn iter_chars(&self) -> Box<dyn Iterator<Item = char> + 'a> {
79        Box::new(self.into_iter())
80    }
81}
82
83#[cfg(feature = "experimental_traverse")]
84impl NameRecord {
85    fn traverse_string<'a>(&self, data: FontData<'a>) -> traversal::FieldType<'a> {
86        FieldType::StringOffset(traversal::StringOffset {
87            offset: self.string_offset().into(),
88            target: self.string(data).map(|s| Box::new(s) as _),
89        })
90    }
91}
92
93#[cfg(feature = "experimental_traverse")]
94impl LangTagRecord {
95    fn traverse_lang_tag<'a>(&self, data: FontData<'a>) -> traversal::FieldType<'a> {
96        FieldType::StringOffset(traversal::StringOffset {
97            offset: self.lang_tag_offset().into(),
98            target: self.lang_tag(data).map(|s| Box::new(s) as _),
99        })
100    }
101}
102
103impl<'a> IntoIterator for NameString<'a> {
104    type Item = char;
105    type IntoIter = CharIter<'a>;
106    fn into_iter(self) -> Self::IntoIter {
107        self.chars()
108    }
109}
110
111impl std::fmt::Display for NameString<'_> {
112    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
113        for c in self.chars() {
114            c.fmt(f)?;
115        }
116        Ok(())
117    }
118}
119
120impl std::fmt::Debug for NameString<'_> {
121    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
122        write!(f, "\"{self}\"")
123    }
124}
125
126/// An iterator over the chars of a name record.
127#[derive(Clone)]
128pub struct CharIter<'a> {
129    data: &'a [u8],
130    encoding: Encoding,
131    pos: usize,
132}
133
134impl CharIter<'_> {
135    fn bump_u16(&mut self) -> Option<u16> {
136        let result = self
137            .data
138            .get(self.pos..self.pos + 2)
139            .map(|x| u16::from_be_bytes(x.try_into().unwrap()))?;
140        self.pos += 2;
141        Some(result)
142    }
143
144    fn bump_u8(&mut self) -> Option<u8> {
145        let result = self.data.get(self.pos)?;
146        self.pos += 1;
147        Some(*result)
148    }
149}
150
151impl Iterator for CharIter<'_> {
152    type Item = char;
153
154    fn next(&mut self) -> Option<Self::Item> {
155        if self.pos >= self.data.len() {
156            return None;
157        }
158        let rep = core::char::REPLACEMENT_CHARACTER;
159        let raw_c = match self.encoding {
160            Encoding::Utf16Be => {
161                let c1 = self.bump_u16()? as u32;
162                if (0xD800..0xDC00).contains(&c1) {
163                    let Some(c2) = self.bump_u16() else {
164                        return Some(rep);
165                    };
166                    ((c1 & 0x3FF) << 10) + (c2 as u32 & 0x3FF) + 0x10000
167                } else {
168                    c1
169                }
170            }
171            Encoding::MacRoman => {
172                let c = self.bump_u8()?;
173                MacRomanMapping.decode(c) as u32
174            }
175            _ => return None,
176        };
177        Some(std::char::from_u32(raw_c).unwrap_or(rep))
178    }
179}
180
181/// The encoding used by the name table.
182#[derive(Copy, Clone, PartialEq, Eq)]
183pub enum Encoding {
184    Utf16Be,
185    MacRoman,
186    Unknown,
187}
188
189impl Encoding {
190    /// Determine the coding from the platform and encoding id.
191    pub fn new(platform_id: u16, encoding_id: u16) -> Encoding {
192        match (platform_id, encoding_id) {
193            (0, _) => Encoding::Utf16Be,
194            (1, 0) => Encoding::MacRoman,
195            (3, 0) => Encoding::Utf16Be,
196            (3, 1) => Encoding::Utf16Be,
197            (3, 10) => Encoding::Utf16Be,
198            _ => Encoding::Unknown,
199        }
200    }
201}
202
203/// A helper for encoding and decoding Mac OS Roman encoded strings.
204pub struct MacRomanMapping;
205
206impl MacRomanMapping {
207    const START_REMAP: u8 = 128;
208    /// Convert from a mac-roman encoded byte to a `char`
209    pub fn decode(self, raw: u8) -> char {
210        if raw < Self::START_REMAP {
211            raw as char
212        } else {
213            let idx = raw - Self::START_REMAP;
214            char::from_u32(MAC_ROMAN_DECODE[idx as usize] as u32).unwrap()
215        }
216    }
217
218    /// convert from a char to a mac-roman encoded byte, if the char is in the mac-roman charset.
219    pub fn encode(self, c: char) -> Option<u8> {
220        let raw_c = c as u32;
221        let raw_c: u16 = raw_c.try_into().ok()?;
222        if raw_c < Self::START_REMAP as u16 {
223            Some(raw_c as u8)
224        } else {
225            match MAC_ROMAN_ENCODE.binary_search_by_key(&raw_c, |(unic, _)| *unic) {
226                Ok(idx) => Some(MAC_ROMAN_ENCODE[idx].1),
227                Err(_) => None,
228            }
229        }
230    }
231}
232
233/// a lookup table for the Mac Moman encoding. this matches the values 128..=255
234/// to specific unicode values.
235#[rustfmt::skip]
236static MAC_ROMAN_DECODE: [u16; 128] = [
237    196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233,
238    232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249,
239    251, 252, 8224, 176, 162, 163, 167, 8226, 182, 223, 174, 169, 8482, 180,
240    168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719,
241    960, 8747, 170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710,
242    171, 187, 8230, 160, 192, 195, 213, 338, 339, 8211, 8212, 8220, 8221, 8216,
243    8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183,
244    8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212,
245    63743, 210, 218, 219, 217, 305, 710, 732, 175, 728, 729, 730, 184, 733,
246    731, 711,
247];
248
249/// A lookup pairing (sorted) unicode values to Mac Roman values
250#[rustfmt::skip]
251static MAC_ROMAN_ENCODE: [(u16, u8); 128] = [
252    (160, 202), (161, 193), (162, 162), (163, 163),
253    (165, 180), (167, 164), (168, 172), (169, 169),
254    (170, 187), (171, 199), (172, 194), (174, 168),
255    (175, 248), (176, 161), (177, 177), (180, 171),
256    (181, 181), (182, 166), (183, 225), (184, 252),
257    (186, 188), (187, 200), (191, 192), (192, 203),
258    (193, 231), (194, 229), (195, 204), (196, 128),
259    (197, 129), (198, 174), (199, 130), (200, 233),
260    (201, 131), (202, 230), (203, 232), (204, 237),
261    (205, 234), (206, 235), (207, 236), (209, 132),
262    (210, 241), (211, 238), (212, 239), (213, 205),
263    (214, 133), (216, 175), (217, 244), (218, 242),
264    (219, 243), (220, 134), (223, 167), (224, 136),
265    (225, 135), (226, 137), (227, 139), (228, 138),
266    (229, 140), (230, 190), (231, 141), (232, 143),
267    (233, 142), (234, 144), (235, 145), (236, 147),
268    (237, 146), (238, 148), (239, 149), (241, 150),
269    (242, 152), (243, 151), (244, 153), (245, 155),
270    (246, 154), (247, 214), (248, 191), (249, 157),
271    (250, 156), (251, 158), (252, 159), (255, 216),
272    (305, 245), (338, 206), (339, 207), (376, 217),
273    (402, 196), (710, 246), (711, 255), (728, 249),
274    (729, 250), (730, 251), (731, 254), (732, 247),
275    (733, 253), (937, 189), (960, 185), (8211, 208),
276    (8212, 209), (8216, 212), (8217, 213), (8218, 226),
277    (8220, 210), (8221, 211), (8222, 227), (8224, 160),
278    (8225, 224), (8226, 165), (8230, 201), (8240, 228),
279    (8249, 220), (8250, 221), (8260, 218), (8364, 219),
280    (8482, 170), (8706, 182), (8710, 198), (8719, 184),
281    (8721, 183), (8730, 195), (8734, 176), (8747, 186),
282    (8776, 197), (8800, 173), (8804, 178), (8805, 179),
283    (9674, 215), (63743, 240), (64257, 222), (64258, 223),
284];
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn mac_roman() {
292        static INPUT: &str = "Joachim Müller-Lancé";
293        for c in INPUT.chars() {
294            let enc = MacRomanMapping.encode(c).unwrap();
295            assert_eq!(MacRomanMapping.decode(enc), c);
296        }
297    }
298
299    #[test]
300    fn lone_surrogate_at_end() {
301        let chars = CharIter {
302            // DEVANAGARI LETTER SHORT A (U+0904), unpaired high surrogate (0xD800)
303            data: &[0x09, 0x04, 0xD8, 0x00],
304            encoding: Encoding::Utf16Be,
305            pos: 0,
306        };
307        assert!(chars.eq(['ऄ', std::char::REPLACEMENT_CHARACTER].into_iter()))
308    }
309}