ttf_parser/tables/cmap/format2.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
// This table has a pretty complex parsing algorithm.
// A detailed explanation can be found here:
// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table
// https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
// https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360
use core::convert::TryFrom;
use crate::parser::{FromData, LazyArray16, Stream};
use crate::GlyphId;
#[derive(Clone, Copy)]
struct SubHeaderRecord {
first_code: u16,
entry_count: u16,
id_delta: i16,
id_range_offset: u16,
}
impl FromData for SubHeaderRecord {
const SIZE: usize = 8;
#[inline]
fn parse(data: &[u8]) -> Option<Self> {
let mut s = Stream::new(data);
Some(SubHeaderRecord {
first_code: s.read::<u16>()?,
entry_count: s.read::<u16>()?,
id_delta: s.read::<i16>()?,
id_range_offset: s.read::<u16>()?,
})
}
}
/// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table)
/// subtable.
#[derive(Clone, Copy)]
pub struct Subtable2<'a> {
sub_header_keys: LazyArray16<'a, u16>,
sub_headers_offset: usize,
sub_headers: LazyArray16<'a, SubHeaderRecord>,
// The whole subtable data.
data: &'a [u8],
}
impl<'a> Subtable2<'a> {
/// Parses a subtable from raw data.
pub fn parse(data: &'a [u8]) -> Option<Self> {
let mut s = Stream::new(data);
s.skip::<u16>(); // format
s.skip::<u16>(); // length
s.skip::<u16>(); // language
let sub_header_keys = s.read_array16::<u16>(256)?;
// The maximum index in a sub_header_keys is a sub_headers count.
let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1;
// Remember sub_headers offset before reading. Will be used later.
let sub_headers_offset = s.offset();
let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;
Some(Self {
sub_header_keys,
sub_headers_offset,
sub_headers,
data,
})
}
/// Returns a glyph index for a code point.
///
/// Returns `None` when `code_point` is larger than `u16`.
pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> {
// This subtable supports code points only in a u16 range.
let code_point = u16::try_from(code_point).ok()?;
let high_byte = code_point >> 8;
let low_byte = code_point & 0x00FF;
let i = if code_point < 0xff {
// 'SubHeader 0 is special: it is used for single-byte character codes.'
0
} else {
// 'Array that maps high bytes to subHeaders: value is subHeader index × 8.'
self.sub_header_keys.get(high_byte)? / 8
};
let sub_header = self.sub_headers.get(i)?;
let first_code = sub_header.first_code;
let range_end = first_code.checked_add(sub_header.entry_count)?;
if low_byte < first_code || low_byte >= range_end {
return None;
}
// SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code
// in the glyphIndexArray. So we have to advance to our code point.
let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE;
// 'The value of the idRangeOffset is the number of bytes
// past the actual location of the idRangeOffset'.
let offset = self.sub_headers_offset
// Advance to required subheader.
+ SubHeaderRecord::SIZE * usize::from(i + 1)
// Move back to idRangeOffset start.
- u16::SIZE
// Use defined offset.
+ usize::from(sub_header.id_range_offset)
// Advance to required index in the glyphIndexArray.
+ index_offset;
let glyph: u16 = Stream::read_at(self.data, offset)?;
if glyph == 0 {
return None;
}
u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536)
.ok()
.map(GlyphId)
}
/// Calls `f` for each codepoint defined in this table.
pub fn codepoints(&self, f: impl FnMut(u32)) {
let _ = self.codepoints_inner(f);
}
#[inline]
fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> {
for first_byte in 0u16..256 {
let i = self.sub_header_keys.get(first_byte)? / 8;
let sub_header = self.sub_headers.get(i)?;
let first_code = sub_header.first_code;
if i == 0 {
// This is a single byte code.
let range_end = first_code.checked_add(sub_header.entry_count)?;
if first_byte >= first_code && first_byte < range_end {
f(u32::from(first_byte));
}
} else {
// This is a two byte code.
let base = first_code.checked_add(first_byte << 8)?;
for k in 0..sub_header.entry_count {
let code_point = base.checked_add(k)?;
f(u32::from(code_point));
}
}
}
Some(())
}
}
impl core::fmt::Debug for Subtable2<'_> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "Subtable2 {{ ... }}")
}
}