read_fonts/tables/postscript/
string.rs

1//! PostScript string identifiers.
2
3/// PostScript string identifier (SID).
4#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
5pub struct StringId(u16);
6
7impl StringId {
8    /// Creates an identifier from a 16-bit unsigned integer.
9    pub const fn new(raw: u16) -> Self {
10        Self(raw)
11    }
12
13    /// Returns the underlying identifier as a 16-bit unsigned integer.
14    pub const fn to_u16(self) -> u16 {
15        self.0
16    }
17
18    /// Resolves the identifier as a standard string.
19    ///
20    /// If the identifier represents a standard string, returns `Ok(string)`,
21    /// otherwise returns `Err(index)` with the index that should be used to
22    /// retrieve the string from the CFF string INDEX.
23    ///
24    /// The standard string set is available in the section
25    /// "Appendix A - Standard Strings" at <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf>.
26    pub fn standard_string(self) -> Result<Latin1String<'static>, usize> {
27        let ix = self.0 as usize;
28        if let Some(string) = STANDARD_STRINGS.get(ix) {
29            // The standard strings are all ASCII so it's safe to interpret them
30            // as Latin-1. This is verified in a unit test.
31            Ok(Latin1String::new(string.as_bytes()))
32        } else {
33            Err(ix - STANDARD_STRINGS.len())
34        }
35    }
36}
37
38impl From<i32> for StringId {
39    fn from(value: i32) -> Self {
40        Self::new(value as u16)
41    }
42}
43
44/// Reference to a Latin-1 encoded string.
45///
46/// Strings stored in all PostScript defined fonts are usually ASCII but are
47/// technically encoded in Latin-1. This type wraps the raw string data to
48/// prevent attempts to decode as UTF-8.
49///
50/// This implements `PartialEq<&str>` to support easy comparison with UTF-8
51/// strings.
52#[derive(Copy, Clone, PartialEq, Eq, Debug)]
53pub struct Latin1String<'a> {
54    chars: &'a [u8],
55}
56
57impl<'a> Latin1String<'a> {
58    /// Creates a new Latin-1 encoded string reference from the given bytes,
59    /// with each representing a character.
60    pub const fn new(chars: &'a [u8]) -> Self {
61        Self { chars }
62    }
63
64    /// Returns an iterator over the characters of the string.
65    ///
66    /// This simply converts each byte to `char`.
67    pub fn chars(&self) -> impl Iterator<Item = char> + Clone + 'a {
68        self.chars.iter().map(|b| *b as char)
69    }
70
71    /// Returns the raw bytes of the string.
72    pub fn bytes(&self) -> &'a [u8] {
73        self.chars
74    }
75}
76
77impl PartialEq<&str> for Latin1String<'_> {
78    fn eq(&self, other: &&str) -> bool {
79        self.chars().eq(other.chars())
80    }
81}
82
83impl std::fmt::Display for Latin1String<'_> {
84    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
85        for ch in self.chars() {
86            write!(f, "{}", ch)?;
87        }
88        Ok(())
89    }
90}
91
92/// The PostScript standard string set.
93///
94/// See "Appendix A - Standard Strings" in <https://adobe-type-tools.github.io/font-tech-notes/pdfs/5176.CFF.pdf>
95pub const STANDARD_STRINGS: &[&str] = &[
96    ".notdef",
97    "space",
98    "exclam",
99    "quotedbl",
100    "numbersign",
101    "dollar",
102    "percent",
103    "ampersand",
104    "quoteright",
105    "parenleft",
106    "parenright",
107    "asterisk",
108    "plus",
109    "comma",
110    "hyphen",
111    "period",
112    "slash",
113    "zero",
114    "one",
115    "two",
116    "three",
117    "four",
118    "five",
119    "six",
120    "seven",
121    "eight",
122    "nine",
123    "colon",
124    "semicolon",
125    "less",
126    "equal",
127    "greater",
128    "question",
129    "at",
130    "A",
131    "B",
132    "C",
133    "D",
134    "E",
135    "F",
136    "G",
137    "H",
138    "I",
139    "J",
140    "K",
141    "L",
142    "M",
143    "N",
144    "O",
145    "P",
146    "Q",
147    "R",
148    "S",
149    "T",
150    "U",
151    "V",
152    "W",
153    "X",
154    "Y",
155    "Z",
156    "bracketleft",
157    "backslash",
158    "bracketright",
159    "asciicircum",
160    "underscore",
161    "quoteleft",
162    "a",
163    "b",
164    "c",
165    "d",
166    "e",
167    "f",
168    "g",
169    "h",
170    "i",
171    "j",
172    "k",
173    "l",
174    "m",
175    "n",
176    "o",
177    "p",
178    "q",
179    "r",
180    "s",
181    "t",
182    "u",
183    "v",
184    "w",
185    "x",
186    "y",
187    "z",
188    "braceleft",
189    "bar",
190    "braceright",
191    "asciitilde",
192    "exclamdown",
193    "cent",
194    "sterling",
195    "fraction",
196    "yen",
197    "florin",
198    "section",
199    "currency",
200    "quotesingle",
201    "quotedblleft",
202    "guillemotleft",
203    "guilsinglleft",
204    "guilsinglright",
205    "fi",
206    "fl",
207    "endash",
208    "dagger",
209    "daggerdbl",
210    "periodcentered",
211    "paragraph",
212    "bullet",
213    "quotesinglbase",
214    "quotedblbase",
215    "quotedblright",
216    "guillemotright",
217    "ellipsis",
218    "perthousand",
219    "questiondown",
220    "grave",
221    "acute",
222    "circumflex",
223    "tilde",
224    "macron",
225    "breve",
226    "dotaccent",
227    "dieresis",
228    "ring",
229    "cedilla",
230    "hungarumlaut",
231    "ogonek",
232    "caron",
233    "emdash",
234    "AE",
235    "ordfeminine",
236    "Lslash",
237    "Oslash",
238    "OE",
239    "ordmasculine",
240    "ae",
241    "dotlessi",
242    "lslash",
243    "oslash",
244    "oe",
245    "germandbls",
246    "onesuperior",
247    "logicalnot",
248    "mu",
249    "trademark",
250    "Eth",
251    "onehalf",
252    "plusminus",
253    "Thorn",
254    "onequarter",
255    "divide",
256    "brokenbar",
257    "degree",
258    "thorn",
259    "threequarters",
260    "twosuperior",
261    "registered",
262    "minus",
263    "eth",
264    "multiply",
265    "threesuperior",
266    "copyright",
267    "Aacute",
268    "Acircumflex",
269    "Adieresis",
270    "Agrave",
271    "Aring",
272    "Atilde",
273    "Ccedilla",
274    "Eacute",
275    "Ecircumflex",
276    "Edieresis",
277    "Egrave",
278    "Iacute",
279    "Icircumflex",
280    "Idieresis",
281    "Igrave",
282    "Ntilde",
283    "Oacute",
284    "Ocircumflex",
285    "Odieresis",
286    "Ograve",
287    "Otilde",
288    "Scaron",
289    "Uacute",
290    "Ucircumflex",
291    "Udieresis",
292    "Ugrave",
293    "Yacute",
294    "Ydieresis",
295    "Zcaron",
296    "aacute",
297    "acircumflex",
298    "adieresis",
299    "agrave",
300    "aring",
301    "atilde",
302    "ccedilla",
303    "eacute",
304    "ecircumflex",
305    "edieresis",
306    "egrave",
307    "iacute",
308    "icircumflex",
309    "idieresis",
310    "igrave",
311    "ntilde",
312    "oacute",
313    "ocircumflex",
314    "odieresis",
315    "ograve",
316    "otilde",
317    "scaron",
318    "uacute",
319    "ucircumflex",
320    "udieresis",
321    "ugrave",
322    "yacute",
323    "ydieresis",
324    "zcaron",
325    "exclamsmall",
326    "Hungarumlautsmall",
327    "dollaroldstyle",
328    "dollarsuperior",
329    "ampersandsmall",
330    "Acutesmall",
331    "parenleftsuperior",
332    "parenrightsuperior",
333    "twodotenleader",
334    "onedotenleader",
335    "zerooldstyle",
336    "oneoldstyle",
337    "twooldstyle",
338    "threeoldstyle",
339    "fouroldstyle",
340    "fiveoldstyle",
341    "sixoldstyle",
342    "sevenoldstyle",
343    "eightoldstyle",
344    "nineoldstyle",
345    "commasuperior",
346    "threequartersemdash",
347    "periodsuperior",
348    "questionsmall",
349    "asuperior",
350    "bsuperior",
351    "centsuperior",
352    "dsuperior",
353    "esuperior",
354    "isuperior",
355    "lsuperior",
356    "msuperior",
357    "nsuperior",
358    "osuperior",
359    "rsuperior",
360    "ssuperior",
361    "tsuperior",
362    "ff",
363    "ffi",
364    "ffl",
365    "parenleftinferior",
366    "parenrightinferior",
367    "Circumflexsmall",
368    "hyphensuperior",
369    "Gravesmall",
370    "Asmall",
371    "Bsmall",
372    "Csmall",
373    "Dsmall",
374    "Esmall",
375    "Fsmall",
376    "Gsmall",
377    "Hsmall",
378    "Ismall",
379    "Jsmall",
380    "Ksmall",
381    "Lsmall",
382    "Msmall",
383    "Nsmall",
384    "Osmall",
385    "Psmall",
386    "Qsmall",
387    "Rsmall",
388    "Ssmall",
389    "Tsmall",
390    "Usmall",
391    "Vsmall",
392    "Wsmall",
393    "Xsmall",
394    "Ysmall",
395    "Zsmall",
396    "colonmonetary",
397    "onefitted",
398    "rupiah",
399    "Tildesmall",
400    "exclamdownsmall",
401    "centoldstyle",
402    "Lslashsmall",
403    "Scaronsmall",
404    "Zcaronsmall",
405    "Dieresissmall",
406    "Brevesmall",
407    "Caronsmall",
408    "Dotaccentsmall",
409    "Macronsmall",
410    "figuredash",
411    "hypheninferior",
412    "Ogoneksmall",
413    "Ringsmall",
414    "Cedillasmall",
415    "questiondownsmall",
416    "oneeighth",
417    "threeeighths",
418    "fiveeighths",
419    "seveneighths",
420    "onethird",
421    "twothirds",
422    "zerosuperior",
423    "foursuperior",
424    "fivesuperior",
425    "sixsuperior",
426    "sevensuperior",
427    "eightsuperior",
428    "ninesuperior",
429    "zeroinferior",
430    "oneinferior",
431    "twoinferior",
432    "threeinferior",
433    "fourinferior",
434    "fiveinferior",
435    "sixinferior",
436    "seveninferior",
437    "eightinferior",
438    "nineinferior",
439    "centinferior",
440    "dollarinferior",
441    "periodinferior",
442    "commainferior",
443    "Agravesmall",
444    "Aacutesmall",
445    "Acircumflexsmall",
446    "Atildesmall",
447    "Adieresissmall",
448    "Aringsmall",
449    "AEsmall",
450    "Ccedillasmall",
451    "Egravesmall",
452    "Eacutesmall",
453    "Ecircumflexsmall",
454    "Edieresissmall",
455    "Igravesmall",
456    "Iacutesmall",
457    "Icircumflexsmall",
458    "Idieresissmall",
459    "Ethsmall",
460    "Ntildesmall",
461    "Ogravesmall",
462    "Oacutesmall",
463    "Ocircumflexsmall",
464    "Otildesmall",
465    "Odieresissmall",
466    "OEsmall",
467    "Oslashsmall",
468    "Ugravesmall",
469    "Uacutesmall",
470    "Ucircumflexsmall",
471    "Udieresissmall",
472    "Yacutesmall",
473    "Thornsmall",
474    "Ydieresissmall",
475    "001.000",
476    "001.001",
477    "001.002",
478    "001.003",
479    "Black",
480    "Bold",
481    "Book",
482    "Light",
483    "Medium",
484    "Regular",
485    "Roman",
486    "Semibold",
487];
488
489#[cfg(test)]
490mod tests {
491    use super::{Latin1String, StringId, STANDARD_STRINGS};
492
493    #[test]
494    fn lets_latin1() {
495        let latin1 = Latin1String::new(&[223, 214, 209, 208]);
496        let utf8 = "ßÖÑÐ";
497        assert_ne!(latin1.chars, utf8.as_bytes());
498        assert_eq!(latin1, utf8);
499    }
500
501    #[test]
502    fn standard_strings() {
503        for (i, &std_string) in STANDARD_STRINGS.iter().enumerate() {
504            let sid = StringId::new(i as _);
505            let latin1 = sid.standard_string().unwrap();
506            // Ensure we can compare directly with &str
507            assert_eq!(latin1, std_string);
508            // Ensure our to_string() conversion works (via the Display impl)
509            assert_eq!(latin1.to_string(), std_string);
510        }
511    }
512
513    #[test]
514    fn not_a_standard_string() {
515        let sid = StringId::new(STANDARD_STRINGS.len() as _);
516        assert!(sid.standard_string().is_err());
517        assert_eq!(sid.standard_string().unwrap_err(), 0);
518    }
519}