read_fonts/
font_data.rs

1//! raw font bytes
2
3#![deny(clippy::arithmetic_side_effects)]
4use std::ops::{Range, RangeBounds};
5
6use bytemuck::AnyBitPattern;
7use types::{BigEndian, FixedSize, Scalar};
8
9use crate::array::ComputedArray;
10use crate::read::{ComputeSize, FontReadWithArgs, ReadError};
11use crate::table_ref::TableRef;
12use crate::FontRead;
13
14/// A reference to raw binary font data.
15///
16/// This is a wrapper around a byte slice, that provides convenience methods
17/// for parsing and validating that data.
18#[derive(Debug, Default, Clone, Copy)]
19pub struct FontData<'a> {
20    bytes: &'a [u8],
21}
22
23/// A cursor for validating bytes during parsing.
24///
25/// This type improves the ergonomics of validation blah blah
26///
27/// # Note
28///
29/// call `finish` when you're done to ensure you're in bounds
30#[derive(Debug, Default, Clone, Copy)]
31pub struct Cursor<'a> {
32    pos: usize,
33    data: FontData<'a>,
34}
35
36impl<'a> FontData<'a> {
37    /// Empty data, useful for some tests and examples
38    pub const EMPTY: FontData<'static> = FontData { bytes: &[] };
39
40    /// Create a new `FontData` with these bytes.
41    ///
42    /// You generally don't need to do this? It is handled for you when loading
43    /// data from disk, but may be useful in tests.
44    pub const fn new(bytes: &'a [u8]) -> Self {
45        FontData { bytes }
46    }
47
48    /// The length of the data, in bytes
49    pub fn len(&self) -> usize {
50        self.bytes.len()
51    }
52
53    /// `true` if the data has a length of zero bytes.
54    pub fn is_empty(&self) -> bool {
55        self.bytes.is_empty()
56    }
57
58    /// Returns self[pos..]
59    pub fn split_off(&self, pos: usize) -> Option<FontData<'a>> {
60        self.bytes.get(pos..).map(|bytes| FontData { bytes })
61    }
62
63    /// returns self[..pos], and updates self to = self[pos..];
64    pub fn take_up_to(&mut self, pos: usize) -> Option<FontData<'a>> {
65        if pos > self.len() {
66            return None;
67        }
68        let (head, tail) = self.bytes.split_at(pos);
69        self.bytes = tail;
70        Some(FontData { bytes: head })
71    }
72
73    pub fn slice(&self, range: impl RangeBounds<usize>) -> Option<FontData<'a>> {
74        let bounds = (range.start_bound().cloned(), range.end_bound().cloned());
75        self.bytes.get(bounds).map(|bytes| FontData { bytes })
76    }
77
78    /// Read a scalar at the provided location in the data.
79    pub fn read_at<T: Scalar>(&self, offset: usize) -> Result<T, ReadError> {
80        let end = offset
81            .checked_add(T::RAW_BYTE_LEN)
82            .ok_or(ReadError::OutOfBounds)?;
83        self.bytes
84            .get(offset..end)
85            .and_then(T::read)
86            .ok_or(ReadError::OutOfBounds)
87    }
88
89    /// Read a big-endian value at the provided location in the data.
90    pub fn read_be_at<T: Scalar>(&self, offset: usize) -> Result<BigEndian<T>, ReadError> {
91        let end = offset
92            .checked_add(T::RAW_BYTE_LEN)
93            .ok_or(ReadError::OutOfBounds)?;
94        self.bytes
95            .get(offset..end)
96            .and_then(BigEndian::from_slice)
97            .ok_or(ReadError::OutOfBounds)
98    }
99
100    pub fn read_with_args<T>(&self, range: Range<usize>, args: &T::Args) -> Result<T, ReadError>
101    where
102        T: FontReadWithArgs<'a>,
103    {
104        self.slice(range)
105            .ok_or(ReadError::OutOfBounds)
106            .and_then(|data| T::read_with_args(data, args))
107    }
108
109    fn check_in_bounds(&self, offset: usize) -> Result<(), ReadError> {
110        self.bytes
111            .get(..offset)
112            .ok_or(ReadError::OutOfBounds)
113            .map(|_| ())
114    }
115
116    /// Interpret the bytes at the provided offset as a reference to `T`.
117    ///
118    /// Returns an error if the slice `offset..` is shorter than `T::RAW_BYTE_LEN`.
119    ///
120    /// This is a wrapper around [`read_ref_unchecked`][], which panics if
121    /// the type does not uphold the required invariants.
122    ///
123    /// # Panics
124    ///
125    /// This function will panic if `T` is zero-sized, has an alignment
126    /// other than one, or has any internal padding.
127    ///
128    /// [`read_ref_unchecked`]: [Self::read_ref_unchecked]
129    pub fn read_ref_at<T: AnyBitPattern + FixedSize>(
130        &self,
131        offset: usize,
132    ) -> Result<&'a T, ReadError> {
133        let end = offset
134            .checked_add(T::RAW_BYTE_LEN)
135            .ok_or(ReadError::OutOfBounds)?;
136        self.bytes
137            .get(offset..end)
138            .ok_or(ReadError::OutOfBounds)
139            .map(bytemuck::from_bytes)
140    }
141
142    /// Interpret the bytes at the provided offset as a slice of `T`.
143    ///
144    /// Returns an error if `range` is out of bounds for the underlying data,
145    /// or if the length of the range is not a multiple of `T::RAW_BYTE_LEN`.
146    ///
147    /// This is a wrapper around [`read_array_unchecked`][], which panics if
148    /// the type does not uphold the required invariants.
149    ///
150    /// # Panics
151    ///
152    /// This function will panic if `T` is zero-sized, has an alignment
153    /// other than one, or has any internal padding.
154    ///
155    /// [`read_array_unchecked`]: [Self::read_array_unchecked]
156    pub fn read_array<T: AnyBitPattern + FixedSize>(
157        &self,
158        range: Range<usize>,
159    ) -> Result<&'a [T], ReadError> {
160        let bytes = self
161            .bytes
162            .get(range.clone())
163            .ok_or(ReadError::OutOfBounds)?;
164        if bytes
165            .len()
166            .checked_rem(std::mem::size_of::<T>())
167            .unwrap_or(1) // definitely != 0
168            != 0
169        {
170            return Err(ReadError::InvalidArrayLen);
171        };
172        Ok(bytemuck::cast_slice(bytes))
173    }
174
175    pub(crate) fn cursor(&self) -> Cursor<'a> {
176        Cursor {
177            pos: 0,
178            data: *self,
179        }
180    }
181
182    /// Return the data as a byte slice
183    pub fn as_bytes(&self) -> &'a [u8] {
184        self.bytes
185    }
186}
187
188impl<'a> Cursor<'a> {
189    pub(crate) fn advance<T: Scalar>(&mut self) {
190        self.pos = self.pos.saturating_add(T::RAW_BYTE_LEN);
191    }
192
193    pub(crate) fn advance_by(&mut self, n_bytes: usize) {
194        self.pos = self.pos.saturating_add(n_bytes);
195    }
196
197    /// Read a variable length u32 and advance the cursor
198    pub(crate) fn read_u32_var(&mut self) -> Result<u32, ReadError> {
199        let mut next = || self.read::<u8>().map(|v| v as u32);
200        let b0 = next()?;
201        // TODO this feels possible to simplify, e.g. compute length, loop taking one and shifting and or'ing
202        #[allow(clippy::arithmetic_side_effects)] // these are all checked
203        let result = match b0 {
204            _ if b0 < 0x80 => b0,
205            _ if b0 < 0xC0 => ((b0 - 0x80) << 8) | next()?,
206            _ if b0 < 0xE0 => ((b0 - 0xC0) << 16) | (next()? << 8) | next()?,
207            _ if b0 < 0xF0 => ((b0 - 0xE0) << 24) | (next()? << 16) | (next()? << 8) | next()?,
208            _ => {
209                // TODO: << 32 doesn't make sense. (b0 - 0xF0) << 32
210                (next()? << 24) | (next()? << 16) | (next()? << 8) | next()?
211            }
212        };
213
214        Ok(result)
215    }
216
217    /// Read a scalar and advance the cursor.
218    pub(crate) fn read<T: Scalar>(&mut self) -> Result<T, ReadError> {
219        let temp = self.data.read_at(self.pos);
220        self.advance::<T>();
221        temp
222    }
223
224    /// Read a big-endian value and advance the cursor.
225    pub(crate) fn read_be<T: Scalar>(&mut self) -> Result<BigEndian<T>, ReadError> {
226        let temp = self.data.read_be_at(self.pos);
227        self.advance::<T>();
228        temp
229    }
230
231    pub(crate) fn read_with_args<T>(&mut self, args: &T::Args) -> Result<T, ReadError>
232    where
233        T: FontReadWithArgs<'a> + ComputeSize,
234    {
235        let len = T::compute_size(args)?;
236        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
237        let temp = self.data.read_with_args(self.pos..range_end, args);
238        self.advance_by(len);
239        temp
240    }
241
242    // only used in records that contain arrays :/
243    pub(crate) fn read_computed_array<T>(
244        &mut self,
245        len: usize,
246        args: &T::Args,
247    ) -> Result<ComputedArray<'a, T>, ReadError>
248    where
249        T: FontReadWithArgs<'a> + ComputeSize,
250    {
251        let len = len
252            .checked_mul(T::compute_size(args)?)
253            .ok_or(ReadError::OutOfBounds)?;
254        let range_end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
255        let temp = self.data.read_with_args(self.pos..range_end, args);
256        self.advance_by(len);
257        temp
258    }
259
260    pub(crate) fn read_array<T: AnyBitPattern + FixedSize>(
261        &mut self,
262        n_elem: usize,
263    ) -> Result<&'a [T], ReadError> {
264        let len = n_elem
265            .checked_mul(T::RAW_BYTE_LEN)
266            .ok_or(ReadError::OutOfBounds)?;
267        let end = self.pos.checked_add(len).ok_or(ReadError::OutOfBounds)?;
268        let temp = self.data.read_array(self.pos..end);
269        self.advance_by(len);
270        temp
271    }
272
273    /// return the current position, or an error if we are out of bounds
274    pub(crate) fn position(&self) -> Result<usize, ReadError> {
275        self.data.check_in_bounds(self.pos).map(|_| self.pos)
276    }
277
278    // used when handling fields with an implicit length, which must be at the
279    // end of a table.
280    pub(crate) fn remaining_bytes(&self) -> usize {
281        self.data.len().saturating_sub(self.pos)
282    }
283
284    pub(crate) fn remaining(self) -> Option<FontData<'a>> {
285        self.data.split_off(self.pos)
286    }
287
288    pub fn is_empty(&self) -> bool {
289        self.pos >= self.data.len()
290    }
291
292    pub(crate) fn finish<T>(self, shape: T) -> Result<TableRef<'a, T>, ReadError> {
293        let data = self.data;
294        data.check_in_bounds(self.pos)?;
295        Ok(TableRef { data, shape })
296    }
297}
298
299// useful so we can have offsets that are just to data
300impl<'a> FontRead<'a> for FontData<'a> {
301    fn read(data: FontData<'a>) -> Result<Self, ReadError> {
302        Ok(data)
303    }
304}
305
306impl AsRef<[u8]> for FontData<'_> {
307    fn as_ref(&self) -> &[u8] {
308        self.bytes
309    }
310}
311
312impl<'a> From<&'a [u8]> for FontData<'a> {
313    fn from(src: &'a [u8]) -> FontData<'a> {
314        FontData::new(src)
315    }
316}
317
318//kind of ugly, but makes FontData work with FontBuilder. If FontBuilder stops using
319//Cow in its API, we can probably get rid of this?
320#[cfg(feature = "std")]
321impl<'a> From<FontData<'a>> for std::borrow::Cow<'a, [u8]> {
322    fn from(src: FontData<'a>) -> Self {
323        src.bytes.into()
324    }
325}