read_fonts/tables/layout/
script.rs

1//! Additional support for working with OpenType scripts and language systems.
2
3use super::{FeatureList, LangSys, ReadError, Script, ScriptList, Tag, TaggedElement};
4use std::ops::Deref;
5
6/// A script chosen from a set of candidate tags.
7///
8/// Returned by the [`ScriptList::select`] method.
9#[derive(Copy, Clone, PartialEq, Debug)]
10pub struct SelectedScript {
11    /// The actual OpenType tag of the chosen script.
12    pub tag: Tag,
13    /// Index of the script in the [`ScriptList`].
14    pub index: u16,
15    /// True if a script was chosen that wasn't in the requested list.
16    pub is_fallback: bool,
17}
18
19impl<'a> ScriptList<'a> {
20    /// Returns the index of the script with the given tag.
21    pub fn index_for_tag(&self, tag: Tag) -> Option<u16> {
22        self.script_records()
23            .binary_search_by_key(&tag, |rec| rec.script_tag())
24            .map(|index| index as u16)
25            .ok()
26    }
27
28    /// Returns the tag and script at the given index.
29    pub fn get(&self, index: u16) -> Result<TaggedElement<Script<'a>>, ReadError> {
30        self.script_records()
31            .get(index as usize)
32            .ok_or(ReadError::OutOfBounds)
33            .and_then(|rec| {
34                Ok(TaggedElement::new(
35                    rec.script_tag(),
36                    rec.script(self.offset_data())?,
37                ))
38            })
39    }
40
41    /// Finds the first available script that matches one of the given tags.
42    ///
43    /// When none of the requested scripts are available, then `DFLT`, `dflt`
44    /// and `latn` tags are tried in that order.
45    ///
46    /// If you're starting from a Unicode script code, use the
47    /// [`ScriptTags::from_unicode`] function to generate the appropriate set
48    /// of tags to pass to this method.
49    ///
50    /// See [`hb_ot_layout_table_select_script`](https://github.com/harfbuzz/harfbuzz/blob/2edc371e97d6d2c5ad0e085b26e9af0123501647/src/hb-ot-layout.cc#L547)
51    /// for the corresponding HarfBuzz function.
52    pub fn select(&self, tags: &[Tag]) -> Option<SelectedScript> {
53        for &tag in tags {
54            if let Some(index) = self.index_for_tag(tag) {
55                return Some(SelectedScript {
56                    tag,
57                    index,
58                    is_fallback: false,
59                });
60            }
61        }
62        for tag in [
63            // Try finding 'DFLT'
64            Tag::new(b"DFLT"),
65            // Try with 'dflt'; MS site has had typos and many fonts use it now :(
66            Tag::new(b"dflt"),
67            // try with 'latn'; some old fonts put their features there even though
68            // they're really trying to support Thai, for example :(
69            Tag::new(b"latn"),
70        ] {
71            if let Some(index) = self.index_for_tag(tag) {
72                return Some(SelectedScript {
73                    tag,
74                    index,
75                    is_fallback: true,
76                });
77            }
78        }
79        None
80    }
81}
82
83impl<'a> Script<'a> {
84    /// If the script contains a language system with the given tag, returns
85    /// the index.
86    pub fn lang_sys_index_for_tag(&self, tag: Tag) -> Option<u16> {
87        self.lang_sys_records()
88            .binary_search_by_key(&tag, |rec| rec.lang_sys_tag())
89            .map(|index| index as u16)
90            .ok()
91    }
92
93    /// Returns the language system with the given index.
94    pub fn lang_sys(&self, index: u16) -> Result<TaggedElement<LangSys<'a>>, ReadError> {
95        self.lang_sys_records()
96            .get(index as usize)
97            .ok_or(ReadError::OutOfBounds)
98            .and_then(|rec| {
99                Ok(TaggedElement::new(
100                    rec.lang_sys_tag(),
101                    rec.lang_sys(self.offset_data())?,
102                ))
103            })
104    }
105}
106
107impl LangSys<'_> {
108    /// If the language system references a feature with the given tag,
109    /// returns the index of that feature in the specified feature list.
110    ///
111    /// The feature list can be obtained from the `feature_list` method on
112    /// the parent [Gsub](crate::tables::gsub::Gsub) or
113    /// [Gpos](crate::tables::gpos::Gpos) tables.
114    pub fn feature_index_for_tag(&self, list: &FeatureList, tag: Tag) -> Option<u16> {
115        let records = list.feature_records();
116        self.feature_indices()
117            .iter()
118            .map(|ix| ix.get())
119            .find(|&feature_ix| {
120                records
121                    .get(feature_ix as usize)
122                    .map(|rec| rec.feature_tag())
123                    == Some(tag)
124            })
125    }
126}
127
128/// A prioritized list of OpenType script tags mapped from a Unicode script
129/// tag.
130///
131/// This is useful as input to [`ScriptList::select`] when you have a Unicode
132/// script and would like to find the appropriate OpenType script for shaping.
133#[derive(Copy, Clone, PartialEq, Eq, Default)]
134pub struct ScriptTags {
135    tags: [Tag; 3],
136    len: usize,
137}
138
139impl ScriptTags {
140    /// Given a [Unicode script code](https://unicode.org/iso15924/iso15924-codes.html),
141    /// returns a prioritized list of matching
142    /// [OpenType script tags](https://learn.microsoft.com/en-us/typography/opentype/spec/scripttags).
143    ///
144    /// See [hb_ot_all_tags_from_script](https://github.com/harfbuzz/harfbuzz/blob/63d09dbefcf7ad9f794ca96445d37b6d8c3c9124/src/hb-ot-tag.cc#L155C1-L155C27)
145    /// for the equivalent HarfBuzz function.    
146    pub fn from_unicode(unicode_script: Tag) -> Self {
147        let mut tags = [Tag::default(); 3];
148        let mut len = 0;
149        if let Some(new_tag) = new_tag_from_unicode(unicode_script) {
150            // Myanmar maps to mym2 but there is no mym3
151            if new_tag != Tag::new(b"mym2") {
152                let mut bytes = new_tag.to_be_bytes();
153                bytes[3] = b'3';
154                tags[len] = Tag::new(&bytes);
155                len += 1;
156            }
157            tags[len] = new_tag;
158            len += 1;
159        }
160        tags[len] = old_tag_from_unicode(unicode_script);
161        len += 1;
162        Self { tags, len }
163    }
164
165    /// Returns a slice containing the mapped script tags.
166    pub fn as_slice(&self) -> &[Tag] {
167        &self.tags[..self.len]
168    }
169}
170
171impl Deref for ScriptTags {
172    type Target = [Tag];
173
174    fn deref(&self) -> &Self::Target {
175        &self.tags[..self.len]
176    }
177}
178
179impl std::fmt::Debug for ScriptTags {
180    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
181        write!(f, "{:?}", self.as_slice())
182    }
183}
184
185// See <https://github.com/harfbuzz/harfbuzz/blob/63d09dbefcf7ad9f794ca96445d37b6d8c3c9124/src/hb-ot-tag.cc#L37>
186fn old_tag_from_unicode(unicode_script: Tag) -> Tag {
187    let mut bytes = unicode_script.to_be_bytes();
188    let tag_bytes = match &bytes {
189        b"Zmth" => b"math",
190        // Katakana and Hiragana both map to 'kana'
191        b"Hira" => b"kana",
192        // Spaces at the end are preserved, unlike ISO 15924
193        b"Laoo" => b"lao ",
194        b"Yiii" => b"yi  ",
195        // Unicode 5.0 additions
196        b"Nkoo" => b"nko ",
197        // Unicode 5.1 additions
198        b"Vaii" => b"vai ",
199        _ => {
200            // Else, just change the first char to lowercase
201            bytes[0] = bytes[0].to_ascii_lowercase();
202            &bytes
203        }
204    };
205    Tag::new(tag_bytes)
206}
207
208/// Mapping from Unicode script code to "new" OpenType script
209/// tags.
210#[doc(hidden)]
211pub const UNICODE_TO_NEW_OPENTYPE_SCRIPT_TAGS: &[(&[u8; 4], Tag)] = &[
212    (b"Beng", Tag::new(b"bng2")),
213    (b"Deva", Tag::new(b"dev2")),
214    (b"Gujr", Tag::new(b"gjr2")),
215    (b"Guru", Tag::new(b"gur2")),
216    (b"Knda", Tag::new(b"knd2")),
217    (b"Mlym", Tag::new(b"mlm2")),
218    (b"Mymr", Tag::new(b"mym2")),
219    (b"Orya", Tag::new(b"ory2")),
220    (b"Taml", Tag::new(b"tml2")),
221    (b"Telu", Tag::new(b"tel2")),
222];
223
224// See <https://github.com/harfbuzz/harfbuzz/blob/63d09dbefcf7ad9f794ca96445d37b6d8c3c9124/src/hb-ot-tag.cc#L84>
225fn new_tag_from_unicode(unicode_script: Tag) -> Option<Tag> {
226    let ix = UNICODE_TO_NEW_OPENTYPE_SCRIPT_TAGS
227        .binary_search_by_key(&unicode_script.to_be_bytes(), |entry| *entry.0)
228        .ok()?;
229    UNICODE_TO_NEW_OPENTYPE_SCRIPT_TAGS
230        .get(ix)
231        .map(|entry| entry.1)
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use crate::{FontRef, TableProvider};
238
239    #[test]
240    fn script_index_for_tag() {
241        let font = FontRef::new(font_test_data::NOTOSERIFHEBREW_AUTOHINT_METRICS).unwrap();
242        let gsub_scripts = font.gsub().unwrap().script_list().unwrap();
243        let ordered_scripts = [b"DFLT", b"cyrl", b"grek", b"hebr", b"latn"];
244        for (index, tag) in ordered_scripts.into_iter().enumerate() {
245            let tag = Tag::new(tag);
246            assert_eq!(gsub_scripts.index_for_tag(tag), Some(index as u16));
247        }
248    }
249
250    #[test]
251    fn simple_script_tag_from_unicode() {
252        let unicode_tags = [b"Cyrl", b"Grek", b"Hebr", b"Latn"];
253        for unicode_tag in unicode_tags {
254            // These should all return a single tag that is simply
255            // the lowercase version of the Unicode tag
256            let mut bytes = *unicode_tag;
257            bytes[0] = bytes[0].to_ascii_lowercase();
258            let expected_tag = Tag::new(&bytes);
259            let result = ScriptTags::from_unicode(Tag::new(unicode_tag));
260            assert_eq!(&*result, &[expected_tag]);
261        }
262    }
263
264    #[test]
265    fn exception_script_tag_from_unicode() {
266        let cases = [
267            // (Unicode, OpenType)
268            (b"Kana", b"kana"),
269            // Hiragana maps to kana
270            (b"Hira", b"kana"),
271            // Unicode extends last char but OpenType pads with spaces
272            // for tags < 4 bytes
273            (b"Nkoo", b"nko "),
274            (b"Yiii", b"yi  "),
275            (b"Vaii", b"vai "),
276        ];
277        for (unicode_tag, ot_tag) in cases {
278            let result = ScriptTags::from_unicode(Tag::new(unicode_tag));
279            assert_eq!(&*result, &[Tag::new(ot_tag)]);
280        }
281    }
282
283    #[test]
284    fn multi_script_tags_from_unicode() {
285        let cases = [
286            // (Unicode, OpenType)
287            (b"Beng", &[b"bng3", b"bng2", b"beng"][..]),
288            (b"Orya", &[b"ory3", b"ory2", b"orya"]),
289            (b"Mlym", &[b"mlm3", b"mlm2", b"mlym"]),
290            // There's no version 3 tag for Myanmar
291            (b"Mymr", &[b"mym2", b"mymr"]),
292        ];
293        for (unicode_tag, ot_tags) in cases {
294            let result = ScriptTags::from_unicode(Tag::new(unicode_tag));
295            let ot_tags = ot_tags
296                .iter()
297                .map(|bytes| Tag::new(bytes))
298                .collect::<Vec<_>>();
299            assert_eq!(&*result, &ot_tags);
300        }
301    }
302
303    #[test]
304    fn select_scripts_from_unicode() {
305        let font = FontRef::new(font_test_data::NOTOSERIFHEBREW_AUTOHINT_METRICS).unwrap();
306        let gsub_scripts = font.gsub().unwrap().script_list().unwrap();
307        // We know Hebrew is available
308        let hebr = gsub_scripts
309            .select(&ScriptTags::from_unicode(Tag::new(b"Hebr")))
310            .unwrap();
311        assert_eq!(
312            hebr,
313            SelectedScript {
314                tag: Tag::new(b"hebr"),
315                index: 3,
316                is_fallback: false,
317            }
318        );
319        // But this font doesn't contain any Indic scripts so we'll
320        // select a fallback for Bengali
321        let beng = gsub_scripts
322            .select(&ScriptTags::from_unicode(Tag::new(b"Beng")))
323            .unwrap();
324        assert_eq!(
325            beng,
326            SelectedScript {
327                tag: Tag::new(b"DFLT"),
328                index: 0,
329                is_fallback: true,
330            }
331        );
332    }
333
334    #[test]
335    fn script_list_get() {
336        const LATN: Tag = Tag::new(b"latn");
337        let font = FontRef::new(font_test_data::CANTARELL_VF_TRIMMED).unwrap();
338        let gsub = font.gsub().unwrap();
339        let script_list = gsub.script_list().unwrap();
340        let latn_script_index = script_list.index_for_tag(LATN).unwrap();
341        assert_eq!(latn_script_index, 1);
342        let script = script_list.get(latn_script_index).unwrap();
343        assert_eq!(script.tag, LATN);
344    }
345
346    #[test]
347    fn script_lang_sys_helpers() {
348        const TRK: Tag = Tag::new(b"TRK ");
349        let font = FontRef::new(font_test_data::CANTARELL_VF_TRIMMED).unwrap();
350        let gsub = font.gsub().unwrap();
351        let script_list = gsub.script_list().unwrap();
352        let script = script_list.get(1).unwrap();
353        let lang_sys_index = script.lang_sys_index_for_tag(TRK).unwrap();
354        assert_eq!(lang_sys_index, 0);
355        assert_eq!(script.lang_sys(lang_sys_index).unwrap().tag, TRK);
356    }
357
358    #[test]
359    fn feature_index_for_tag() {
360        let font = FontRef::new(font_test_data::MATERIAL_SYMBOLS_SUBSET).unwrap();
361        let gsub = font.gsub().unwrap();
362        let script_list = gsub.script_list().unwrap();
363        let feature_list = gsub.feature_list().unwrap();
364        let lang_sys = script_list
365            .get(1)
366            .unwrap()
367            .default_lang_sys()
368            .unwrap()
369            .unwrap();
370        assert_eq!(
371            lang_sys.feature_index_for_tag(&feature_list, Tag::new(b"rclt")),
372            Some(0)
373        );
374        assert_eq!(
375            lang_sys.feature_index_for_tag(&feature_list, Tag::new(b"rlig")),
376            Some(1)
377        );
378        for tag in [b"locl", b"abvs", b"liga"] {
379            assert_eq!(
380                lang_sys.feature_index_for_tag(&feature_list, Tag::new(tag)),
381                None
382            );
383        }
384    }
385}