lzma_rust/
lzma2_reader.rs

1use super::{
2    decoder::LZMADecoder,
3    lz::LZDecoder,
4    range_dec::{RangeDecoder, RangeDecoderBuffer},
5};
6use byteorder::{self, BigEndian, ReadBytesExt};
7use std::io::{ErrorKind, Read, Result};
8pub const COMPRESSED_SIZE_MAX: u32 = 1 << 16;
9
10/// Decompresses a raw LZMA2 stream (no XZ headers).
11/// # Examples
12/// ```
13/// use std::io::Read;
14/// use lzma_rust::LZMA2Reader;
15/// use lzma_rust::LZMA2Options;
16/// let compressed = [1, 0, 12, 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33, 0];
17/// let mut reader = LZMA2Reader::new(compressed, LZMA2Options::DICT_SIZE_DEFAULT, None);
18/// let mut decompressed = Vec::new();
19/// reader.read_to_end(&mut decompressed);
20/// assert_eq!(&decompressed[..], b"Hello, world!");
21/// 
22/// ```
23pub struct LZMA2Reader<R> {
24    inner: R,
25    lz: LZDecoder,
26    rc: RangeDecoder<RangeDecoderBuffer>,
27    lzma: Option<LZMADecoder>,
28    uncompressed_size: usize,
29    is_lzma_chunk: bool,
30    need_dict_reset: bool,
31    need_props: bool,
32    end_reached: bool,
33    error: Option<std::io::Error>,
34}
35#[inline]
36pub fn get_memery_usage(dict_size: u32) -> u32 {
37    40 + COMPRESSED_SIZE_MAX / 1024 + get_dict_size(dict_size) / 1024
38}
39
40#[inline]
41fn get_dict_size(dict_size: u32) -> u32 {
42    dict_size + 15 & !15
43}
44
45impl<R> LZMA2Reader<R> {
46    pub fn into_inner(self) -> R {
47        self.inner
48    }
49
50    pub fn get_ref(&self) -> &R {
51        &self.inner
52    }
53
54    pub fn get_mut(&mut self) -> &mut R {
55        &mut self.inner
56    }
57}
58
59impl<R: Read> LZMA2Reader<R> {
60    /// Create a new LZMA2 reader.
61    /// `inner` is the reader to read compressed data from.
62    /// `dict_size` is the dictionary size in bytes.
63    pub fn new(inner: R, dict_size: u32, preset_dict: Option<&[u8]>) -> Self {
64        let has_preset = preset_dict.as_ref().map(|a| a.len() > 0).unwrap_or(false);
65        let lz = LZDecoder::new(get_dict_size(dict_size) as _, preset_dict);
66        let rc = RangeDecoder::new_buffer(COMPRESSED_SIZE_MAX as _);
67        Self {
68            inner,
69            lz,
70            rc,
71            lzma: None,
72            uncompressed_size: 0,
73            is_lzma_chunk: false,
74            need_dict_reset: !has_preset,
75            need_props: true,
76            end_reached: false,
77            error: None,
78        }
79    }
80
81    fn decode_chunk_header(&mut self) -> Result<()> {
82        let control = self.inner.read_u8()?;
83        if control == 0x00 {
84            self.end_reached = true;
85            return Ok(());
86        }
87
88        if control >= 0xE0 || control == 0x01 {
89            self.need_props = true;
90            self.need_dict_reset = false;
91            self.lz.reset();
92        } else if self.need_dict_reset {
93            return Err(std::io::Error::new(
94                ErrorKind::InvalidInput,
95                "Corrupted input data (LZMA2:0)",
96            ));
97        }
98        if control >= 0x80 {
99            self.is_lzma_chunk = true;
100            self.uncompressed_size = ((control & 0x1F) as usize) << 16;
101            self.uncompressed_size += self.inner.read_u16::<BigEndian>()? as usize + 1;
102            let compressed_size = self.inner.read_u16::<BigEndian>()? as usize + 1;
103            if control >= 0xC0 {
104                self.need_props = false;
105                self.decode_props()?;
106            } else if self.need_props {
107                return Err(std::io::Error::new(
108                    ErrorKind::InvalidInput,
109                    "Corrupted input data (LZMA2:1)",
110                ));
111            } else if control >= 0xA0 {
112                self.lzma.as_mut().map(|l| l.reset());
113            }
114            self.rc.prepare(&mut self.inner, compressed_size)?;
115        } else if control > 0x02 {
116            return Err(std::io::Error::new(
117                ErrorKind::InvalidInput,
118                "Corrupted input data (LZMA2:2)",
119            ));
120        } else {
121            self.is_lzma_chunk = false;
122            self.uncompressed_size = (self.inner.read_u16::<BigEndian>()? + 1) as _;
123        }
124        Ok(())
125    }
126
127    fn decode_props(&mut self) -> std::io::Result<()> {
128        let props = self.inner.read_u8()?;
129        if props > (4 * 5 + 4) * 9 + 8 {
130            return Err(std::io::Error::new(
131                ErrorKind::InvalidInput,
132                "Corrupted input data (LZMA2:3)",
133            ));
134        }
135        let pb = props / (9 * 5);
136        let props = props - pb * 9 * 5;
137        let lp = props / 9;
138        let lc = props - lp * 9;
139        if lc + lp > 4 {
140            return Err(std::io::Error::new(
141                ErrorKind::InvalidInput,
142                "Corrupted input data (LZMA2:4)",
143            ));
144        }
145        self.lzma = Some(LZMADecoder::new(lc as _, lp as _, pb as _));
146
147        Ok(())
148    }
149
150    fn read_decode(&mut self, buf: &mut [u8]) -> Result<usize> {
151        if buf.len() == 0 {
152            return Ok(0);
153        }
154        if let Some(e) = &self.error {
155            return Err(std::io::Error::new(e.kind(), e.to_string()));
156        }
157
158        if self.end_reached {
159            return Ok(0);
160        }
161        let mut size = 0;
162        let mut len = buf.len();
163        let mut off = 0;
164        while len > 0 {
165            if self.uncompressed_size == 0 {
166                self.decode_chunk_header()?;
167                if self.end_reached {
168                    return Ok(size);
169                }
170            }
171
172            let copy_size_max = self.uncompressed_size.min(len);
173            if !self.is_lzma_chunk {
174                self.lz.copy_uncompressed(&mut self.inner, copy_size_max)?;
175            } else {
176                self.lz.set_limit(copy_size_max);
177                if let Some(lzma) = self.lzma.as_mut() {
178                    lzma.decode(&mut self.lz, &mut self.rc)?;
179                }
180            }
181
182            {
183                let copied_size = self.lz.flush(buf, off);
184                off += copied_size;
185                len -= copied_size;
186                size += copied_size;
187                self.uncompressed_size -= copied_size;
188                if self.uncompressed_size == 0 {
189                    if !self.rc.is_finished() || self.lz.has_pending() {
190                        return Err(std::io::Error::new(
191                            ErrorKind::InvalidInput,
192                            "rc not finished or lz has pending",
193                        ));
194                    }
195                }
196            }
197        }
198        Ok(size)
199    }
200}
201
202impl<R: Read> Read for LZMA2Reader<R> {
203    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
204        match self.read_decode(buf) {
205            Ok(size) => Ok(size),
206            Err(e) => {
207                let error = std::io::Error::new(e.kind(), e.to_string());
208                self.error = Some(e);
209                return Err(error);
210            }
211        }
212    }
213}