lzma_rust/enc/
lzma2_writer.rs

1use std::io::{ErrorKind, Write};
2
3use byteorder::WriteBytesExt;
4
5use super::counting::CountingWriter;
6
7use super::encoder::LZMAEncoderModes;
8use super::{
9    encoder::{EncodeMode, LZMAEncoder},
10    lz::MFType,
11    range_enc::{RangeEncoder, RangeEncoderBuffer},
12};
13
14#[derive(Debug, Clone)]
15pub struct LZMA2Options {
16    pub dict_size: u32,
17    pub lc: u32,
18    pub lp: u32,
19    pub pb: u32,
20    pub mode: EncodeMode,
21    pub nice_len: u32,
22    pub mf: MFType,
23    pub depth_limit: i32,
24    pub preset_dict: Option<Vec<u8>>,
25}
26
27impl Default for LZMA2Options {
28    fn default() -> Self {
29        Self::with_preset(6)
30    }
31}
32impl LZMA2Options {
33    pub const LC_DEFAULT: u32 = 3;
34    pub const LP_DEFAULT: u32 = 0;
35    pub const PB_DEFAULT: u32 = 2;
36    pub const NICE_LEN_MAX: u32 = 273;
37    pub const NICE_LEN_MIN: u32 = 8;
38    pub const DICT_SIZE_DEFAULT: u32 = 8 << 20;
39    const PRESET_TO_DICT_SIZE: &'static [u32] = &[
40        1 << 18,
41        1 << 20,
42        1 << 21,
43        1 << 22,
44        1 << 22,
45        1 << 23,
46        1 << 23,
47        1 << 24,
48        1 << 25,
49        1 << 26,
50    ];
51    const PRESET_TO_DEPTH_LIMIT: &'static [i32] = &[4, 8, 24, 48];
52    pub fn new(
53        dict_size: u32,
54        lc: u32,
55        lp: u32,
56        pb: u32,
57        mode: EncodeMode,
58        nice_len: u32,
59        mf: MFType,
60        depth_limit: i32,
61    ) -> Self {
62        Self {
63            dict_size,
64            lc,
65            lp,
66            pb,
67            mode,
68            nice_len,
69            mf,
70            depth_limit,
71            preset_dict: None,
72        }
73    }
74
75    /// preset: [0..9]
76    #[inline]
77    pub fn with_preset(preset: u32) -> Self {
78        let mut opt = Self {
79            dict_size: Default::default(),
80            lc: Default::default(),
81            lp: Default::default(),
82            pb: Default::default(),
83            mode: EncodeMode::Normal,
84            nice_len: Default::default(),
85            mf: Default::default(),
86            depth_limit: Default::default(),
87            preset_dict: Default::default(),
88        };
89        opt.set_preset(preset);
90        opt
91    }
92
93    /// preset: [0..9]
94    pub fn set_preset(&mut self, preset: u32) {
95        if preset > 9 {
96            return;
97        }
98        self.lc = Self::LC_DEFAULT;
99        self.lp = Self::LP_DEFAULT;
100        self.pb = Self::PB_DEFAULT;
101        self.dict_size = Self::PRESET_TO_DICT_SIZE[preset as usize];
102        if preset <= 3 {
103            self.mode = EncodeMode::Fast;
104            self.mf = MFType::HC4;
105            self.nice_len = if preset <= 1 { 128 } else { Self::NICE_LEN_MAX };
106            self.depth_limit = Self::PRESET_TO_DEPTH_LIMIT[preset as usize];
107        } else {
108            self.mode = EncodeMode::Normal;
109            self.mf = MFType::BT4;
110            self.nice_len = if preset == 4 {
111                16
112            } else if preset == 5 {
113                32
114            } else {
115                64
116            };
117            self.depth_limit = 0;
118        }
119    }
120
121    pub fn get_memery_usage(&self) -> u32 {
122        let dict_size = self.dict_size;
123        let extra_size_before = get_extra_size_before(dict_size);
124        70 + LZMAEncoder::get_mem_usage(self.mode, dict_size, extra_size_before, self.mf)
125    }
126
127    #[inline(always)]
128    pub fn get_props(&self) -> u8 {
129        ((self.pb * 5 + self.lp) * 9 + self.lc) as u8
130    }
131}
132const COMPRESSED_SIZE_MAX: u32 = 64 << 10;
133pub fn get_extra_size_before(dict_size: u32) -> u32 {
134    return if COMPRESSED_SIZE_MAX > dict_size {
135        COMPRESSED_SIZE_MAX - dict_size
136    } else {
137        0
138    };
139}
140
141/// LZMA2 format writer
142/// # Examples
143/// ```
144/// use std::io::Write;
145/// use lzma_rust::enc::lzma2_writer::{LZMA2Options, LZMA2Writer};
146/// let mut writer = LZMA2Writer::new(Vec::new(), &LZMA2Options::default());
147///    writer.write_all(b"hello world").unwrap();
148///    let compressed = writer.finish().unwrap();
149///
150/// ```
151pub struct LZMA2Writer<W: Write> {
152    inner: CountingWriter<W>,
153    rc: RangeEncoder<RangeEncoderBuffer>,
154    lzma: LZMAEncoder,
155    mode: LZMAEncoderModes,
156    props: u8,
157    dict_reset_needed: bool,
158    state_reset_needed: bool,
159    props_needed: bool,
160    pending_size: u32,
161    finished: bool,
162}
163
164impl<W: Write> LZMA2Writer<W> {
165    pub fn new(inner: CountingWriter<W>, options: &LZMA2Options) -> Self {
166        let dict_size = options.dict_size;
167        let rc = RangeEncoder::new_buffer(COMPRESSED_SIZE_MAX as usize);
168        let (mut lzma, mode) = LZMAEncoder::new(
169            options.mode,
170            options.lc,
171            options.lp,
172            options.pb,
173            options.mf,
174            options.depth_limit,
175            options.dict_size,
176            options.nice_len as usize,
177        );
178
179        let props = options.get_props();
180        let mut dict_reset_needed = true;
181        if let Some(preset_dict) = &options.preset_dict {
182            lzma.lz.set_preset_dict(dict_size, preset_dict);
183            dict_reset_needed = false;
184        }
185        Self {
186            inner,
187            rc,
188            lzma,
189            mode,
190            props,
191            dict_reset_needed,
192            state_reset_needed: true,
193            props_needed: true,
194            pending_size: 0,
195            finished: false,
196        }
197    }
198
199    fn write_lzma(&mut self, uncompressed_size: u32, compressed_size: u32) -> std::io::Result<()> {
200        let mut control = if self.props_needed {
201            if self.dict_reset_needed {
202                0x80 + (3 << 5)
203            } else {
204                0x80 + (2 << 5)
205            }
206        } else {
207            if self.state_reset_needed {
208                0x80 + (1 << 5)
209            } else {
210                0x80
211            }
212        };
213        control = control | (uncompressed_size - 1) >> 16;
214        let mut chunk_header = [0u8; 6];
215        chunk_header[0] = control as u8;
216        chunk_header[1] = ((uncompressed_size - 1) >> 8) as u8;
217        chunk_header[2] = (uncompressed_size - 1) as u8;
218        chunk_header[3] = ((compressed_size - 1) >> 8) as u8;
219        chunk_header[4] = (compressed_size - 1) as u8;
220        if self.props_needed {
221            chunk_header[5] = self.props as u8;
222            self.inner.write_all(&chunk_header)?;
223        } else {
224            self.inner.write_all(&chunk_header[..5])?;
225        }
226
227        self.rc.write_to(&mut self.inner)?;
228        self.props_needed = false;
229        self.state_reset_needed = false;
230        self.dict_reset_needed = false;
231        Ok(())
232    }
233
234    fn write_uncompressed(&mut self, mut uncompressed_size: u32) -> std::io::Result<()> {
235        while uncompressed_size > 0 {
236            let chunk_size = uncompressed_size.min(COMPRESSED_SIZE_MAX as u32);
237            let mut chunk_header = [0u8; 3];
238            chunk_header[0] = if self.dict_reset_needed { 0x01 } else { 0x02 };
239            chunk_header[1] = ((chunk_size - 1) >> 8) as u8;
240            chunk_header[2] = (chunk_size - 1) as u8;
241            self.inner.write_all(&chunk_header)?;
242            self.lzma.lz.copy_uncompressed(
243                &mut self.inner,
244                uncompressed_size as i32,
245                chunk_size as usize,
246            )?;
247            uncompressed_size -= chunk_size;
248            self.dict_reset_needed = false;
249        }
250        self.state_reset_needed = true;
251        Ok(())
252    }
253    fn write_chunk(&mut self) -> std::io::Result<()> {
254        let compressed_size = self.rc.finish_buffer()?.unwrap_or_default() as u32;
255        let mut uncompressed_size = self.lzma.data.uncompressed_size;
256        assert!(compressed_size > 0);
257        assert!(
258            uncompressed_size > 0,
259            "uncompressed_size is 0, read_pos={}",
260            self.lzma.lz.read_pos
261        );
262        if compressed_size + 2 < uncompressed_size {
263            self.write_lzma(uncompressed_size, compressed_size)?;
264        } else {
265            self.lzma.reset(&mut self.mode);
266            uncompressed_size = self.lzma.data.uncompressed_size;
267            assert!(uncompressed_size > 0);
268            self.write_uncompressed(uncompressed_size)?;
269        }
270        self.pending_size -= uncompressed_size;
271        self.lzma.reset_uncompressed_size();
272        self.rc.reset_buffer();
273        Ok(())
274    }
275    fn write_end_marker(&mut self) -> std::io::Result<()> {
276        assert!(!self.finished);
277
278        self.lzma.lz.set_finishing();
279
280        while self.pending_size > 0 {
281            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
282            self.write_chunk()?;
283        }
284
285        self.inner.write_u8(0x00)?;
286        self.finished = true;
287
288        Ok(())
289    }
290
291    pub fn finish(&mut self) -> std::io::Result<()> {
292        if !self.finished {
293            self.write_end_marker()?;
294        }
295        Ok(())
296    }
297}
298
299impl<W: Write> Drop for LZMA2Writer<W> {
300    fn drop(&mut self) {}
301}
302impl<W: Write> Write for LZMA2Writer<W> {
303    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
304        let mut len = buf.len();
305        if len == 0 && !self.finished {
306            self.finish()?;
307            self.inner.write(buf)?;
308            return Ok(0);
309        }
310        if self.finished {
311            return Err(std::io::Error::new(ErrorKind::Other, "LZMA2 finished"));
312        }
313
314        let mut off = 0;
315        while len > 0 {
316            let used = self.lzma.lz.fill_window(&buf[off..(off + len)]);
317            off += used;
318            len -= used;
319            self.pending_size += used as u32;
320            if self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)? {
321                self.write_chunk()?;
322            }
323        }
324        Ok(off)
325    }
326
327    fn flush(&mut self) -> std::io::Result<()> {
328        if self.finished {
329            return Err(std::io::Error::new(
330                ErrorKind::Other,
331                "LZMA2 flush finished",
332            ));
333        }
334        self.lzma.lz.set_flushing();
335        while self.pending_size > 0 {
336            self.lzma.encode_for_lzma2(&mut self.rc, &mut self.mode)?;
337            self.write_chunk()?;
338        }
339        self.inner.flush()
340    }
341}