archive_to_parquet/
lib.rs1#![doc = include_str!("../README.md")]
2use byte_unit::{Byte, Unit};
3use indicatif::DecimalBytes;
4pub use parquet::basic::Compression;
5use std::fmt::Display;
6use std::num::NonZeroUsize;
7
8mod batch;
9mod channel;
10mod converter;
11mod hasher;
12mod progress;
13mod sink;
14mod visitor;
15
16pub use anyreader_walker::{AnyWalker, ArchiveStack, EntryDetails, FileEntry, FormatKind};
17pub use channel::{new_record_batch_channel, ConversionCounter, RecordBatchChannel};
18pub use converter::{Converter, ProgressBarConverter, StandardConverter};
19pub use sink::{new_parquet_writer, IncludeType, ParquetSink};
20pub use visitor::*;
21
22#[allow(clippy::too_many_arguments)]
23#[derive(Debug, Clone, derive_new::new)]
24pub struct ConvertionOptions {
25 pub threads: NonZeroUsize,
26 pub include: IncludeType,
27 pub unique: bool,
28 pub compression: Compression,
29 pub min_size: Option<Byte>,
30 pub max_size: Option<Byte>,
31 pub batch_count: usize,
32 pub batch_size: Byte,
33 pub extract_strings: bool,
34}
35
36impl ConvertionOptions {
37 pub const fn const_default() -> Self {
38 Self {
39 threads: NonZeroUsize::new(8).unwrap(),
40 include: IncludeType::All,
41 unique: false,
42 compression: Compression::SNAPPY,
43 min_size: None,
44 max_size: None,
45 batch_count: 14,
46 batch_size: Byte::from_u64_with_unit(100, Unit::MB).unwrap(),
48 extract_strings: false,
49 }
50 }
51
52 #[inline(always)]
53 pub fn get_size_range(&self) -> Option<std::ops::Range<Byte>> {
54 match (self.min_size, self.max_size) {
55 (Some(min), Some(max)) => Some(min..max),
56 (None, Some(max)) => Some(Byte::from(0u64)..max),
57 (Some(min), None) => Some(min..Byte::from(u64::MAX)),
58 (None, None) => None,
59 }
60 }
61}
62
63impl Display for ConvertionOptions {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 write!(
66 f,
67 "ConvertionOptions(threads={}, include={:?}, unique={}, compression={:?}",
68 self.threads, self.include, self.unique, self.compression
69 )?;
70 if let Some(min_size) = &self.min_size {
71 write!(f, ", min_size={}", DecimalBytes(min_size.as_u64()))?;
72 } else {
73 write!(f, ", min_size=None")?;
74 }
75
76 if let Some(max_size) = &self.max_size {
77 write!(f, ", max_size={}", DecimalBytes(max_size.as_u64()))?;
78 } else {
79 write!(f, ", size_range=None")?;
80 }
81 write!(
82 f,
83 ", batch_count={}, batch_size={:#.1})",
84 self.batch_count,
85 DecimalBytes(self.batch_size.as_u64())
86 )
87 }
88}