lzma_rust/
lzma_reader.rs

1use std::io::{Error, ErrorKind, Read, Result};
2
3use byteorder::{LittleEndian, ReadBytesExt};
4
5use super::decoder::LZMADecoder;
6use super::lz::LZDecoder;
7use super::range_dec::RangeDecoder;
8use super::*;
9
10pub fn get_memery_usage_by_props(dict_size: u32, props_byte: u8) -> Result<u32> {
11    if dict_size > DICT_SIZE_MAX {
12        return Err(Error::new(ErrorKind::InvalidInput, "dict size too large"));
13    }
14    if props_byte > (4 * 5 + 4) * 9 + 8 {
15        return Err(Error::new(ErrorKind::InvalidInput, "Invalid props byte"));
16    }
17    let props = props_byte % (9 * 5);
18    let lp = props / 9;
19    let lc = props - lp * 9;
20    get_memery_usage(dict_size, lc as u32, lp as u32)
21}
22pub fn get_memery_usage(dict_size: u32, lc: u32, lp: u32) -> Result<u32> {
23    if lc > 8 || lp > 4 {
24        return Err(Error::new(ErrorKind::InvalidInput, "Invalid lc or lp"));
25    }
26    return Ok(10 + get_dict_size(dict_size)? / 1024 + ((2 * 0x300) << (lc + lp)) / 1024);
27}
28
29fn get_dict_size(dict_size: u32) -> Result<u32> {
30    if dict_size > DICT_SIZE_MAX {
31        return Err(Error::new(ErrorKind::InvalidInput, "dict size too large"));
32    }
33    let dict_size = dict_size.max(4096);
34    Ok((dict_size + 15) & !15)
35}
36
37/// # Examples
38/// ```
39/// use std::io::Read;
40/// use lzma_rust::LZMAReader;
41/// let compressed = [93, 0, 0, 128, 0, 255, 255, 255, 255, 255, 255, 255, 255, 0, 36, 25, 73, 152, 111, 22, 2, 140, 232, 230, 91, 177, 71, 198, 206, 183, 99, 255, 255, 60, 172, 0, 0];
42/// let mut reader = LZMAReader::new(&compressed[..]).unwrap();
43/// let mut buf = [0; 1024];
44/// let mut out = Vec::new();
45/// loop {
46///    let n = reader.read(&mut buf).unwrap();
47///   if n == 0 {
48///      break;
49///   }
50///   out.extend_from_slice(&buf[..n]);
51/// }
52/// assert_eq!(out, b"Hello, world!");
53/// ```
54pub struct LZMAReader<R> {
55    lz: LZDecoder,
56    rc: RangeDecoder<R>,
57    lzma: LZMADecoder,
58    end_reached: bool,
59    relaxed_end_cond: bool,
60    remaining_size: u64,
61}
62
63impl<R> Drop for LZMAReader<R> {
64    fn drop(&mut self) {
65        // self.reader.clone().release();
66    }
67}
68
69impl<R: Read> LZMAReader<R> {
70    fn construct1(
71        reader: R,
72        uncomp_size: u64,
73        mut props: u8,
74        dict_size: u32,
75        preset_dict: Option<&[u8]>,
76    ) -> Result<Self> {
77        if props > (4 * 5 + 4) * 9 + 8 {
78            return Err(Error::new(ErrorKind::InvalidInput, "Invalid props byte"));
79        }
80        let pb = props / (9 * 5);
81        props -= pb * 9 * 5;
82        let lp = props / 9;
83        let lc = props - lp * 9;
84        if dict_size > DICT_SIZE_MAX {
85            return Err(Error::new(ErrorKind::InvalidInput, "dict size too large"));
86        }
87        Self::construct2(
88            reader,
89            uncomp_size,
90            lc as _,
91            lp as _,
92            pb as _,
93            dict_size,
94            preset_dict,
95        )
96    }
97
98    fn construct2(
99        reader: R,
100        uncomp_size: u64,
101        lc: u32,
102        lp: u32,
103        pb: u32,
104        dict_size: u32,
105        preset_dict: Option<&[u8]>,
106    ) -> Result<Self> {
107        if lc > 8 || lp > 4 || pb > 4 {
108            return Err(Error::new(
109                ErrorKind::InvalidInput,
110                "Invalid lc or lp or pb",
111            ));
112        }
113        let mut dict_size = get_dict_size(dict_size)?;
114        if uncomp_size <= u64::MAX / 2 && dict_size as u64 > uncomp_size {
115            dict_size = get_dict_size(uncomp_size as u32)?;
116        }
117        let rc = RangeDecoder::new_stream(reader);
118        let rc = match rc {
119            Ok(r) => r,
120            Err(e) => {
121                return Err(e);
122            }
123        };
124        let lz = LZDecoder::new(get_dict_size(dict_size)? as _, preset_dict);
125        let lzma = LZMADecoder::new(lc, lp, pb);
126        Ok(Self {
127            // reader,
128            lz,
129            rc,
130            lzma,
131            end_reached: false,
132            relaxed_end_cond: true,
133            remaining_size: uncomp_size,
134        })
135    }
136
137    ///
138    /// Creates a new .lzma file format decompressor with an optional memory usage limit.
139    /// - [mem_limit_kb] - memory usage limit in kibibytes (KiB). u32::MAX means no limit.
140    /// - [preset_dict] - preset dictionary or None to use no preset dictionary.
141    pub fn new_mem_limit(
142        mut reader: R,
143        mem_limit_kb: u32,
144        preset_dict: Option<&[u8]>,
145    ) -> Result<Self> {
146        let props = reader.read_u8()?;
147        let dict_size = reader.read_u32::<LittleEndian>()?;
148
149        let uncomp_size = reader.read_u64::<LittleEndian>()?;
150        let need_mem = get_memery_usage_by_props(dict_size, props)?;
151        if mem_limit_kb < need_mem {
152            return Err(Error::new(
153                ErrorKind::OutOfMemory,
154                format!(
155                    "{}kb memery needed,but limit was {}kb",
156                    need_mem, mem_limit_kb
157                ),
158            ));
159        }
160        Self::construct1(reader, uncomp_size, props, dict_size, preset_dict)
161    }
162
163    /// Creates a new input stream that decompresses raw LZMA data (no .lzma header) from `reader` optionally with a preset dictionary.
164    /// - [reader] - the reader to read compressed data from.
165    /// - [uncomp_size] - the uncompressed size of the data to be decompressed.
166    /// - [props] - the LZMA properties byte.
167    /// - [dict_size] - the LZMA dictionary size.
168    /// - [preset_dict] - preset dictionary or None to use no preset dictionary.
169    pub fn new_with_props(
170        reader: R,
171        uncomp_size: u64,
172        props: u8,
173        dict_size: u32,
174        preset_dict: Option<&[u8]>,
175    ) -> Result<Self> {
176        Self::construct1(reader, uncomp_size, props, dict_size, preset_dict)
177    }
178
179    /// Creates a new input stream that decompresses raw LZMA data (no .lzma header) from `reader` optionally with a preset dictionary.
180    /// - [reader] - the input stream to read compressed data from.
181    /// - [uncomp_size] - the uncompressed size of the data to be decompressed.
182    /// - [lc] - the number of literal context bits.
183    /// - [lp] - the number of literal position bits.
184    /// - [pb] - the number of position bits.
185    /// - [dict_size] - the LZMA dictionary size.
186    /// - [preset_dict] - preset dictionary or None to use no preset dictionary.
187    pub fn new(
188        reader: R,
189        uncomp_size: u64,
190        lc: u32,
191        lp: u32,
192        pb: u32,
193        dict_size: u32,
194        preset_dict: Option<&[u8]>,
195    ) -> Result<Self> {
196        Self::construct2(reader, uncomp_size, lc, lp, pb, dict_size, preset_dict)
197    }
198
199    fn read_decode(&mut self, buf: &mut [u8]) -> Result<usize> {
200        if buf.is_empty() {
201            return Ok(0);
202        }
203        if self.end_reached {
204            return Ok(0);
205        }
206        let mut size = 0;
207        let mut len = buf.len() as u32;
208        let mut off = 0u32;
209        while len > 0 {
210            let mut copy_size_max = len as u32;
211            if self.remaining_size <= u64::MAX / 2 && (self.remaining_size as u32) < len {
212                copy_size_max = self.remaining_size as u32;
213            }
214            self.lz.set_limit(copy_size_max as usize);
215
216            match self.lzma.decode(&mut self.lz, &mut self.rc) {
217                Ok(_) => {}
218                Err(e) => {
219                    if self.remaining_size != u64::MAX || !self.lzma.end_marker_detected() {
220                        return Err(e);
221                    }
222                    self.end_reached = true;
223                    self.rc.normalize()?;
224                }
225            }
226
227            let copied_size = self.lz.flush(buf, off as _) as u32;
228            off += copied_size;
229            len -= copied_size;
230            size += copied_size;
231            if self.remaining_size <= u64::MAX / 2 {
232                self.remaining_size -= copied_size as u64;
233                if self.remaining_size == 0 {
234                    self.end_reached = true;
235                }
236            }
237
238            if self.end_reached {
239                if self.lz.has_pending()
240                    || (!self.relaxed_end_cond && !self.rc.is_stream_finished())
241                {
242                    return Err(Error::new(
243                        ErrorKind::InvalidData,
244                        "end reached but not decoder finished",
245                    ));
246                }
247                return Ok(size as _);
248            }
249        }
250        Ok(size as _)
251    }
252}
253
254impl<R: Read> Read for LZMAReader<R> {
255    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
256        self.read_decode(buf)
257    }
258}