exif/
isobmff.rs

1//
2// Copyright (c) 2020 KAMADA Ken'ichi.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions
7// are met:
8// 1. Redistributions of source code must retain the above copyright
9//    notice, this list of conditions and the following disclaimer.
10// 2. Redistributions in binary form must reproduce the above copyright
11//    notice, this list of conditions and the following disclaimer in the
12//    documentation and/or other materials provided with the distribution.
13//
14// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17// ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24// SUCH DAMAGE.
25//
26
27use std::io::{BufRead, ErrorKind, Seek, SeekFrom};
28
29use crate::endian::{Endian, BigEndian};
30use crate::error::Error;
31use crate::util::{read64, BufReadExt as _, ReadExt as _};
32
33// Checking "mif1" in the compatible brands should be enough, because
34// the "heic", "heix", "heim", and "heis" files shall include "mif1"
35// among the compatible brands [ISO23008-12 B.4.1] [ISO23008-12 B.4.3].
36// Same for "msf1" [ISO23008-12 B.4.2] [ISO23008-12 B.4.4].
37static HEIF_BRANDS: &[[u8; 4]] = &[*b"mif1", *b"msf1"];
38
39const MAX_EXIF_SIZE: usize = 65535;
40
41// Most errors in this file are Error::InvalidFormat.
42impl From<&'static str> for Error {
43    fn from(err: &'static str) -> Error {
44        Error::InvalidFormat(err)
45    }
46}
47
48pub fn get_exif_attr<R>(reader: &mut R) -> Result<Vec<u8>, Error>
49where R: BufRead + Seek {
50    let mut parser = Parser::new(reader);
51    match parser.parse() {
52        Err(Error::Io(ref e)) if e.kind() == ErrorKind::UnexpectedEof =>
53            Err("Broken HEIF file".into()),
54        Err(e) => Err(e),
55        Ok(mut buf) => {
56            if buf.len() < 4 {
57                return Err("ExifDataBlock too small".into());
58            }
59            let offset = BigEndian::loadu32(&buf, 0) as usize;
60            if buf.len() - 4 < offset {
61                return Err("Invalid Exif header offset".into());
62            }
63            buf.drain(.. 4 + offset);
64            Ok(buf)
65        },
66    }
67}
68
69#[derive(Debug)]
70struct Parser<R> {
71    reader: R,
72    // Whether the file type box has been checked.
73    ftyp_checked: bool,
74    // The item where Exif data is stored.
75    item_id: Option<u32>,
76    // The location of the item_id.
77    item_location: Option<Location>,
78}
79
80#[derive(Debug)]
81struct Location {
82    construction_method: u8,
83    // index, offset, length
84    extents: Vec<(u64, u64, u64)>,
85    base_offset: u64,
86}
87
88impl<R> Parser<R> where R: BufRead + Seek {
89    fn new(reader: R) -> Self {
90        Self {
91            reader: reader,
92            ftyp_checked: false,
93            item_id: None,
94            item_location: None,
95        }
96    }
97
98    fn parse(&mut self) -> Result<Vec<u8>, Error> {
99        while let Some((size, boxtype)) = self.read_box_header()? {
100            match &boxtype {
101                b"ftyp" => {
102                    let buf = self.read_file_level_box(size)?;
103                    self.parse_ftyp(BoxSplitter::new(&buf))?;
104                    self.ftyp_checked = true;
105                },
106                b"meta" => {
107                    if !self.ftyp_checked {
108                        return Err("MetaBox found before FileTypeBox".into());
109                    }
110                    let buf = self.read_file_level_box(size)?;
111                    let exif = self.parse_meta(BoxSplitter::new(&buf))?;
112                    return Ok(exif);
113                },
114                _ => self.skip_file_level_box(size)?,
115            }
116        }
117        Err(Error::NotFound("HEIF"))
118    }
119
120    // Reads size, type, and largesize,
121    // and returns body size and type.
122    // If no byte can be read due to EOF, None is returned.
123    fn read_box_header(&mut self) -> Result<Option<(u64, [u8; 4])>, Error> {
124        if self.reader.is_eof()? {
125            return Ok(None);
126        }
127        let mut buf = [0; 8];
128        self.reader.read_exact(&mut buf)?;
129        let size = match BigEndian::loadu32(&buf, 0) {
130            0 => Some(std::u64::MAX),
131            1 => read64(&mut self.reader)?.checked_sub(16),
132            x => u64::from(x).checked_sub(8),
133        }.ok_or("Invalid box size")?;
134        let boxtype = buf[4..8].try_into().expect("never fails");
135        Ok(Some((size, boxtype)))
136    }
137
138    fn read_file_level_box(&mut self, size: u64) -> Result<Vec<u8>, Error> {
139        let mut buf;
140        match size {
141            std::u64::MAX => {
142                buf = Vec::new();
143                self.reader.read_to_end(&mut buf)?;
144            },
145            _ => {
146                let size = size.try_into()
147                    .or(Err("Box is larger than the address space"))?;
148                buf = Vec::new();
149                self.reader.read_exact_len(&mut buf, size)?;
150            },
151        }
152        Ok(buf)
153    }
154
155    fn skip_file_level_box(&mut self, size: u64) -> Result<(), Error> {
156        match size {
157            std::u64::MAX => self.reader.seek(SeekFrom::End(0))?,
158            _ => self.reader.seek(SeekFrom::Current(
159                size.try_into().or(Err("Large seek not supported"))?))?,
160        };
161        Ok(())
162    }
163
164    fn parse_ftyp(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
165        let head = boxp.slice(8)?;
166        let _major_brand = &head[0..4];
167        let _minor_version = BigEndian::loadu32(&head, 4);
168        while let Ok(compat_brand) = boxp.array4() {
169            if HEIF_BRANDS.contains(&compat_brand) {
170                return Ok(());
171            }
172        }
173        Err("No compatible brand recognized in ISO base media file".into())
174    }
175
176    fn parse_meta(&mut self, mut boxp: BoxSplitter) -> Result<Vec<u8>, Error> {
177        let (version, _flags) = boxp.fullbox_header()?;
178        if version != 0 {
179            return Err("Unsupported MetaBox".into());
180        }
181        let mut idat = None;
182        let mut iloc = None;
183        while !boxp.is_empty() {
184            let (boxtype, mut body) = boxp.child_box()?;
185            match boxtype {
186                b"idat" => idat = Some(body.slice(body.len())?),
187                b"iinf" => self.parse_iinf(body)?,
188                b"iloc" => iloc = Some(body),
189                _ => {},
190            }
191        }
192
193        self.item_id.ok_or(Error::NotFound("HEIF"))?;
194        self.parse_iloc(iloc.ok_or("No ItemLocationBox")?)?;
195        let location = self.item_location.as_ref()
196            .ok_or("No matching item in ItemLocationBox")?;
197        let mut buf = Vec::new();
198        match location.construction_method {
199            0 => {
200                for &(_, off, len) in &location.extents {
201                    let off = location.base_offset.checked_add(off)
202                        .ok_or("Invalid offset")?;
203                    // Seeking beyond the EOF is allowed and
204                    // implementation-defined, but the subsequent read
205                    // should fail.
206                    self.reader.seek(SeekFrom::Start(off))?;
207                    match len {
208                        0 => { self.reader.read_to_end(&mut buf)?; },
209                        _ => {
210                            let len = len.try_into()
211                                .or(Err("Extent too large"))?;
212                            self.reader.read_exact_len(&mut buf, len)?;
213                        },
214                    }
215                    if buf.len() > MAX_EXIF_SIZE {
216                        return Err("Exif data too large".into());
217                    }
218                }
219            },
220            1 => {
221                let idat = idat.ok_or("No ItemDataBox")?;
222                for &(_, off, len) in &location.extents {
223                    let off = location.base_offset.checked_add(off)
224                        .ok_or("Invalid offset")?;
225                    let end = off.checked_add(len).ok_or("Invalid length")?;
226                    let off = off.try_into().or(Err("Offset too large"))?;
227                    let end = end.try_into().or(Err("Length too large"))?;
228                    buf.extend_from_slice(match len {
229                        0 => idat.get(off..),
230                        _ => idat.get(off..end),
231                    }.ok_or("Out of ItemDataBox")?);
232                    if buf.len() > MAX_EXIF_SIZE {
233                        return Err("Exif data too large".into());
234                    }
235                }
236            },
237            2 => return Err(Error::NotSupported(
238                "Construction by item offset is not supported")),
239            _ => return Err("Invalid construction_method".into()),
240        }
241        Ok(buf)
242    }
243
244    fn parse_iloc(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
245        let (version, _flags) = boxp.fullbox_header()?;
246        let tmp = boxp.uint16().map(usize::from)?;
247        let (offset_size, length_size, base_offset_size) =
248            (tmp >> 12, tmp >> 8 & 0xf, tmp >> 4 & 0xf);
249        let index_size = match version { 1 | 2 => tmp & 0xf, _ => 0 };
250        let item_count = match version {
251            0 | 1 => boxp.uint16()?.into(),
252            2 => boxp.uint32()?,
253            _ => return Err("Unsupported ItemLocationBox".into()),
254        };
255        for _ in 0..item_count {
256            let item_id = match version {
257                0 | 1 => boxp.uint16()?.into(),
258                2 => boxp.uint32()?,
259                _ => unreachable!(),
260            };
261            let construction_method = match version {
262                0 => 0,
263                1 | 2 => boxp.slice(2).map(|x| x[1] & 0xf)?,
264                _ => unreachable!(),
265            };
266            let data_ref_index = boxp.uint16()?;
267            if construction_method == 0 && data_ref_index != 0 {
268                return Err(Error::NotSupported(
269                    "External data reference is not supported"));
270            }
271            let base_offset = boxp.size048(base_offset_size)?
272                .ok_or("Invalid base_offset_size")?;
273            let extent_count = boxp.uint16()?.into();
274            if self.item_id == Some(item_id) {
275                let mut extents = Vec::with_capacity(extent_count);
276                for _ in 0..extent_count {
277                    let index = boxp.size048(index_size)?
278                        .ok_or("Invalid index_size")?;
279                    let offset = boxp.size048(offset_size)?
280                        .ok_or("Invalid offset_size")?;
281                    let length = boxp.size048(length_size)?
282                        .ok_or("Invalid length_size")?;
283                    extents.push((index, offset, length));
284                }
285                self.item_location = Some(Location {
286                    construction_method, extents, base_offset });
287            } else {
288                // (15 + 15 + 15) * u16::MAX never overflows.
289                boxp.slice((index_size + offset_size + length_size) *
290                           extent_count)?;
291            }
292        }
293        Ok(())
294    }
295
296    fn parse_iinf(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
297        let (version, _flags) = boxp.fullbox_header()?;
298        let entry_count = match version {
299            0 => boxp.uint16()?.into(),
300            _ => boxp.uint32()?,
301        };
302        for _ in 0..entry_count {
303            let (boxtype, body) = boxp.child_box()?;
304            match boxtype {
305                b"infe" => self.parse_infe(body)?,
306                _ => {},
307            }
308        }
309        Ok(())
310    }
311
312    fn parse_infe(&mut self, mut boxp: BoxSplitter) -> Result<(), Error> {
313        let (version, _flags) = boxp.fullbox_header()?;
314        let item_id = match version {
315            2 => boxp.uint16()?.into(),
316            3 => boxp.uint32()?,
317            _ => return Err("Unsupported ItemInfoEntry".into()),
318        };
319        let _item_protection_index = boxp.slice(2)?;
320        let item_type = boxp.slice(4)?;
321        if item_type == b"Exif" {
322            self.item_id = Some(item_id);
323        }
324        Ok(())
325    }
326}
327
328pub fn is_heif(buf: &[u8]) -> bool {
329    let mut boxp = BoxSplitter::new(buf);
330    while let Ok((boxtype, mut body)) = boxp.child_box() {
331        if boxtype == b"ftyp" {
332            let _major_brand_minor_version = if body.slice(8).is_err() {
333                return false;
334            };
335            while let Ok(compat_brand) = body.array4() {
336                if HEIF_BRANDS.contains(&compat_brand) {
337                    return true;
338                }
339            }
340            return false;
341        }
342    }
343    false
344}
345
346struct BoxSplitter<'a> {
347    inner: &'a [u8],
348}
349
350impl<'a> BoxSplitter<'a> {
351    fn new(slice: &'a [u8]) -> BoxSplitter<'a> {
352        Self { inner: slice }
353    }
354
355    fn is_empty(&self) -> bool {
356        self.inner.is_empty()
357    }
358
359    fn len(&self) -> usize {
360        self.inner.len()
361    }
362
363    // Returns type and body.
364    fn child_box(&mut self) -> Result<(&'a [u8], BoxSplitter<'a>), Error> {
365        let size = self.uint32()? as usize;
366        let boxtype = self.slice(4)?;
367        let body_len = match size {
368            0 => Some(self.len()),
369            1 => usize::try_from(self.uint64()?)
370                .or(Err("Box is larger than the address space"))?
371                .checked_sub(16),
372            _ => size.checked_sub(8),
373        }.ok_or("Invalid box size")?;
374        let body = self.slice(body_len)?;
375        Ok((boxtype, BoxSplitter::new(body)))
376    }
377
378    // Returns 0-, 4-, or 8-byte unsigned integer.
379    fn size048(&mut self, size: usize) -> Result<Option<u64>, Error> {
380        match size {
381            0 => Ok(Some(0)),
382            4 => self.uint32().map(u64::from).map(Some),
383            8 => self.uint64().map(Some),
384            _ => Ok(None),
385        }
386    }
387
388    // Returns version and flags.
389    fn fullbox_header(&mut self) -> Result<(u32, u32), Error> {
390        let tmp = self.uint32()?;
391        Ok((tmp >> 24, tmp & 0xffffff))
392    }
393
394    fn uint16(&mut self) -> Result<u16, Error> {
395        self.slice(2).map(|num| BigEndian::loadu16(num, 0))
396    }
397
398    fn uint32(&mut self) -> Result<u32, Error> {
399        self.slice(4).map(|num| BigEndian::loadu32(num, 0))
400    }
401
402    fn uint64(&mut self) -> Result<u64, Error> {
403        self.slice(8).map(|num| BigEndian::loadu64(num, 0))
404    }
405
406    fn array4(&mut self) -> Result<[u8; 4], Error> {
407        self.slice(4).map(|x| x.try_into().expect("never fails"))
408    }
409
410    fn slice(&mut self, at: usize) -> Result<&'a [u8], Error> {
411        let slice = self.inner.get(..at).ok_or("Box too small")?;
412        self.inner = &self.inner[at..];
413        Ok(slice)
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use std::io::Cursor;
420    use super::*;
421
422    #[test]
423    fn extract() {
424        let file = std::fs::File::open("tests/exif.heic").unwrap();
425        let buf = get_exif_attr(
426            &mut std::io::BufReader::new(&file)).unwrap();
427        assert_eq!(buf.len(), 79);
428        assert!(buf.starts_with(b"MM\x00\x2a"));
429        assert!(buf.ends_with(b"xif\0"));
430    }
431
432    #[test]
433    fn unknown_before_ftyp() {
434        let data =
435            b"\0\0\0\x09XXXXx\
436              \0\0\0\x14ftypmif1\0\0\0\0mif1\
437              \0\0\0\x57meta\0\0\0\0\
438                  \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\
439                  \0\0\0\x22iinf\0\0\0\0\0\x01\
440                      \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\
441                  \0\0\0\x11idat\0\0\0\x01xabcd";
442        assert!(is_heif(data));
443        let exif = get_exif_attr(&mut Cursor::new(&data[..])).unwrap();
444        assert_eq!(exif, b"abcd");
445    }
446
447    #[test]
448    fn bad_exif_data_block() {
449        let data =
450            b"\0\0\0\x14ftypmif1\0\0\0\0mif1\
451              \0\0\0\x52meta\0\0\0\0\
452                  \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\
453                  \0\0\0\x22iinf\0\0\0\0\0\x01\
454                      \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\
455                  \0\0\0\x0cidat\0\0\0\x01";
456        assert_err_pat!(get_exif_attr(&mut Cursor::new(&data[..])),
457                        Error::InvalidFormat("Invalid Exif header offset"));
458
459        let data =
460            b"\0\0\0\x14ftypmif1\0\0\0\0mif1\
461              \0\0\0\x51meta\0\0\0\0\
462                  \0\0\0\x18iloc\x01\0\0\0\0\0\0\x01\x1e\x1d\0\x01\0\0\0\x01\
463                  \0\0\0\x22iinf\0\0\0\0\0\x01\
464                      \0\0\0\x14infe\x02\0\0\0\x1e\x1d\0\0Exif\
465                  \0\0\0\x0bidat\0\0\0";
466        assert_err_pat!(get_exif_attr(&mut Cursor::new(&data[..])),
467                        Error::InvalidFormat("ExifDataBlock too small"));
468    }
469
470    #[test]
471    fn parser_box_header() {
472        // size
473        let mut p = Parser::new(Cursor::new(b"\0\0\0\x08abcd"));
474        assert_eq!(p.read_box_header().unwrap(), Some((0, *b"abcd")));
475        let mut p = Parser::new(Cursor::new(b"\0\0\0\x08abc"));
476        assert_err_pat!(p.read_box_header(), Error::Io(_));
477        let mut p = Parser::new(Cursor::new(b"\0\0\0\x07abcd"));
478        assert_err_pat!(p.read_box_header(), Error::InvalidFormat(_));
479        // max size
480        let mut p = Parser::new(Cursor::new(b"\xff\xff\xff\xffabcd"));
481        assert_eq!(p.read_box_header().unwrap(),
482                   Some((0xffffffff - 8, *b"abcd")));
483        // to the end of the file
484        let mut p = Parser::new(Cursor::new(b"\0\0\0\0abcd"));
485        assert_eq!(p.read_box_header().unwrap(),
486                   Some((std::u64::MAX, *b"abcd")));
487        // largesize
488        let mut p = Parser::new(Cursor::new(
489            b"\0\0\0\x01abcd\0\0\0\0\0\0\0\x10"));
490        assert_eq!(p.read_box_header().unwrap(), Some((0, *b"abcd")));
491        let mut p = Parser::new(Cursor::new(
492            b"\0\0\0\x01abcd\0\0\0\0\0\0\0"));
493        assert_err_pat!(p.read_box_header(), Error::Io(_));
494        let mut p = Parser::new(Cursor::new(
495            b"\0\0\0\x01abcd\0\0\0\0\0\0\0\x0f"));
496        assert_err_pat!(p.read_box_header(), Error::InvalidFormat(_));
497        // max largesize
498        let mut p = Parser::new(Cursor::new(
499            b"\0\0\0\x01abcd\xff\xff\xff\xff\xff\xff\xff\xff"));
500        assert_eq!(p.read_box_header().unwrap(),
501                   Some((std::u64::MAX.wrapping_sub(16), *b"abcd")));
502    }
503
504    #[test]
505    fn is_heif_test() {
506        // HEIF (with any coding format)
507        assert!(is_heif(b"\0\0\0\x14ftypmif1\0\0\0\0mif1"));
508        // HEIC
509        assert!(is_heif(b"\0\0\0\x18ftypheic\0\0\0\0heicmif1"));
510        // HEIC image sequence
511        assert!(is_heif(b"\0\0\0\x18ftyphevc\0\0\0\0msf1hevc"));
512        // unknown major brand but compatible with HEIF
513        assert!(is_heif(b"\0\0\0\x18ftypXXXX\0\0\0\0XXXXmif1"));
514        // incomplete brand (OK to ignore?)
515        assert!(is_heif(b"\0\0\0\x15ftypmif1\0\0\0\0mif1h"));
516        assert!(is_heif(b"\0\0\0\x16ftypmif1\0\0\0\0mif1he"));
517        assert!(is_heif(b"\0\0\0\x17ftypmif1\0\0\0\0mif1hei"));
518        // ISO base media file but not a HEIF
519        assert!(!is_heif(b"\0\0\0\x14ftypmp41\0\0\0\0mp41"));
520        // missing compatible brands (what should we do?)
521        assert!(!is_heif(b"\0\0\0\x10ftypmif1\0\0\0\0"));
522        // truncated box
523        let mut data: &[u8] = b"\0\0\0\x14ftypmif1\0\0\0\0mif1";
524        while let Some((_, rest)) = data.split_last() {
525            data = rest;
526            assert!(!is_heif(data));
527        }
528        // short box size
529        assert!(!is_heif(b"\0\0\0\x13ftypmif1\0\0\0\0mif1"));
530    }
531
532    #[test]
533    fn box_splitter() {
534        let buf = b"0123456789abcdef";
535        let mut boxp = BoxSplitter::new(buf);
536        assert_err_pat!(boxp.slice(17), Error::InvalidFormat(_));
537        assert_eq!(boxp.slice(16).unwrap(), buf);
538        assert_err_pat!(boxp.slice(std::usize::MAX), Error::InvalidFormat(_));
539
540        let mut boxp = BoxSplitter::new(buf);
541        assert_eq!(boxp.slice(1).unwrap(), b"0");
542        assert_eq!(boxp.uint16().unwrap(), 0x3132);
543        assert_eq!(boxp.uint32().unwrap(), 0x33343536);
544        assert_eq!(boxp.uint64().unwrap(), 0x3738396162636465);
545    }
546}