lzma_rust/
lzma2_reader.rs1use super::{
2 decoder::LZMADecoder,
3 lz::LZDecoder,
4 range_dec::{RangeDecoder, RangeDecoderBuffer},
5};
6use byteorder::{self, BigEndian, ReadBytesExt};
7use std::io::{ErrorKind, Read, Result};
8pub const COMPRESSED_SIZE_MAX: u32 = 1 << 16;
9
10pub struct LZMA2Reader<R> {
24 inner: R,
25 lz: LZDecoder,
26 rc: RangeDecoder<RangeDecoderBuffer>,
27 lzma: Option<LZMADecoder>,
28 uncompressed_size: usize,
29 is_lzma_chunk: bool,
30 need_dict_reset: bool,
31 need_props: bool,
32 end_reached: bool,
33 error: Option<std::io::Error>,
34}
35#[inline]
36pub fn get_memery_usage(dict_size: u32) -> u32 {
37 40 + COMPRESSED_SIZE_MAX / 1024 + get_dict_size(dict_size) / 1024
38}
39
40#[inline]
41fn get_dict_size(dict_size: u32) -> u32 {
42 dict_size + 15 & !15
43}
44
45impl<R> LZMA2Reader<R> {
46 pub fn into_inner(self) -> R {
47 self.inner
48 }
49
50 pub fn get_ref(&self) -> &R {
51 &self.inner
52 }
53
54 pub fn get_mut(&mut self) -> &mut R {
55 &mut self.inner
56 }
57}
58
59impl<R: Read> LZMA2Reader<R> {
60 pub fn new(inner: R, dict_size: u32, preset_dict: Option<&[u8]>) -> Self {
64 let has_preset = preset_dict.as_ref().map(|a| a.len() > 0).unwrap_or(false);
65 let lz = LZDecoder::new(get_dict_size(dict_size) as _, preset_dict);
66 let rc = RangeDecoder::new_buffer(COMPRESSED_SIZE_MAX as _);
67 Self {
68 inner,
69 lz,
70 rc,
71 lzma: None,
72 uncompressed_size: 0,
73 is_lzma_chunk: false,
74 need_dict_reset: !has_preset,
75 need_props: true,
76 end_reached: false,
77 error: None,
78 }
79 }
80
81 fn decode_chunk_header(&mut self) -> Result<()> {
82 let control = self.inner.read_u8()?;
83 if control == 0x00 {
84 self.end_reached = true;
85 return Ok(());
86 }
87
88 if control >= 0xE0 || control == 0x01 {
89 self.need_props = true;
90 self.need_dict_reset = false;
91 self.lz.reset();
92 } else if self.need_dict_reset {
93 return Err(std::io::Error::new(
94 ErrorKind::InvalidInput,
95 "Corrupted input data (LZMA2:0)",
96 ));
97 }
98 if control >= 0x80 {
99 self.is_lzma_chunk = true;
100 self.uncompressed_size = ((control & 0x1F) as usize) << 16;
101 self.uncompressed_size += self.inner.read_u16::<BigEndian>()? as usize + 1;
102 let compressed_size = self.inner.read_u16::<BigEndian>()? as usize + 1;
103 if control >= 0xC0 {
104 self.need_props = false;
105 self.decode_props()?;
106 } else if self.need_props {
107 return Err(std::io::Error::new(
108 ErrorKind::InvalidInput,
109 "Corrupted input data (LZMA2:1)",
110 ));
111 } else if control >= 0xA0 {
112 self.lzma.as_mut().map(|l| l.reset());
113 }
114 self.rc.prepare(&mut self.inner, compressed_size)?;
115 } else if control > 0x02 {
116 return Err(std::io::Error::new(
117 ErrorKind::InvalidInput,
118 "Corrupted input data (LZMA2:2)",
119 ));
120 } else {
121 self.is_lzma_chunk = false;
122 self.uncompressed_size = (self.inner.read_u16::<BigEndian>()? + 1) as _;
123 }
124 Ok(())
125 }
126
127 fn decode_props(&mut self) -> std::io::Result<()> {
128 let props = self.inner.read_u8()?;
129 if props > (4 * 5 + 4) * 9 + 8 {
130 return Err(std::io::Error::new(
131 ErrorKind::InvalidInput,
132 "Corrupted input data (LZMA2:3)",
133 ));
134 }
135 let pb = props / (9 * 5);
136 let props = props - pb * 9 * 5;
137 let lp = props / 9;
138 let lc = props - lp * 9;
139 if lc + lp > 4 {
140 return Err(std::io::Error::new(
141 ErrorKind::InvalidInput,
142 "Corrupted input data (LZMA2:4)",
143 ));
144 }
145 self.lzma = Some(LZMADecoder::new(lc as _, lp as _, pb as _));
146
147 Ok(())
148 }
149
150 fn read_decode(&mut self, buf: &mut [u8]) -> Result<usize> {
151 if buf.len() == 0 {
152 return Ok(0);
153 }
154 if let Some(e) = &self.error {
155 return Err(std::io::Error::new(e.kind(), e.to_string()));
156 }
157
158 if self.end_reached {
159 return Ok(0);
160 }
161 let mut size = 0;
162 let mut len = buf.len();
163 let mut off = 0;
164 while len > 0 {
165 if self.uncompressed_size == 0 {
166 self.decode_chunk_header()?;
167 if self.end_reached {
168 return Ok(size);
169 }
170 }
171
172 let copy_size_max = self.uncompressed_size.min(len);
173 if !self.is_lzma_chunk {
174 self.lz.copy_uncompressed(&mut self.inner, copy_size_max)?;
175 } else {
176 self.lz.set_limit(copy_size_max);
177 if let Some(lzma) = self.lzma.as_mut() {
178 lzma.decode(&mut self.lz, &mut self.rc)?;
179 }
180 }
181
182 {
183 let copied_size = self.lz.flush(buf, off);
184 off += copied_size;
185 len -= copied_size;
186 size += copied_size;
187 self.uncompressed_size -= copied_size;
188 if self.uncompressed_size == 0 {
189 if !self.rc.is_finished() || self.lz.has_pending() {
190 return Err(std::io::Error::new(
191 ErrorKind::InvalidInput,
192 "rc not finished or lz has pending",
193 ));
194 }
195 }
196 }
197 }
198 Ok(size)
199 }
200}
201
202impl<R: Read> Read for LZMA2Reader<R> {
203 fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
204 match self.read_decode(buf) {
205 Ok(size) => Ok(size),
206 Err(e) => {
207 let error = std::io::Error::new(e.kind(), e.to_string());
208 self.error = Some(e);
209 return Err(error);
210 }
211 }
212 }
213}