archive-to-parquet 0.8.0

Recursively convert archives to parquet files
Documentation
[tools]
python = "3.12"
rust = "stable"
pre-commit = "latest"
uv = "latest"
"pipx:maturin" = {version = "latest" }
"cargo:https://github.com/cross-rs/cross" = { version = "branch:main", crate = "cross", locked=false }
"cargo:cargo-release" = "latest"

[tasks.test-data-gping]
description = "Download gping source code"
shell = 'bash -c'
run = [
    "mkdir -p test_data/gping/",
    "rm -f test_data/gping/*",
    "curl -L https://github.com/orf/gping/archive/e66b02167fa0cb2c5bae957e730d19ac9c22de47.zip > test_data/gping/archive.zip",
    "curl -L https://github.com/orf/gping/archive/e66b02167fa0cb2c5bae957e730d19ac9c22de47.tar.gz > test_data/gping/archive.tar.gz",
    "cat test_data/gping/archive.tar.gz | gzip -d > test_data/gping/archive.tar"
]

[tasks.test]
description = "Run tests"
run = 'cross test --locked --workspace --all-targets --exclude=py-archive-to-parquet'
env = { CROSS_NO_WARNINGS = "0", RUST_BACKTRACE = "1" }

[tasks.doctests]
description = "Run doctests"
run = 'cross test --locked --doc --workspace --exclude=py-archive-to-parquet'
env = { CROSS_NO_WARNINGS = "0", RUST_BACKTRACE = "1" }

[tasks.integration-test]
description = "Run integration test"
shell = 'bash -c'
run = [
    'cross run --locked --target={{option(name="target")}} -- test_data/output.parquet test_data/gping/* --unique',
    'cross run --locked --target={{option(name="target")}} -- test_data/output.parquet https://github.com/orf/gping/archive/e66b02167fa0cb2c5bae957e730d19ac9c22de47.tar.gz --urls --unique',
]
depends = ["test-data-gping"]
env = { CROSS_NO_WARNINGS = "0", RUST_BACKTRACE = "1" }

[tasks.release-build]
description = "Build release"
run = 'cross build --locked --release --bin=archive-to-parquet'
env = { CROSS_NO_WARNINGS = "0", RUST_BACKTRACE = "1" }

[tasks.check]
description = "Run check"
run = 'cargo check --workspace'

[tasks.clippy]
description = "Run clippy"
run = 'cargo clippy --all-targets --all-features --locked --workspace -- -D warnings'

[tasks.rustfmt]
description = "Run rustfmt"
run = 'cargo fmt --all -- --check'

[tasks.pre-commit]
description = "Run pre-commit"
run = 'pre-commit run --show-diff-on-failure --color=always --all-files'

[tasks.bench]
description = "Run benchmarks"
run = 'cross bench --locked --workspace --all-targets --exclude=py-archive-to-parquet -F bench'
depends = ["test-data-gping"]
env = { RUSTFLAGS = '-C target_cpu=native' }

[tasks.build-python]
description = "Build python"
run = 'maturin build'

[tasks.release]
run = "cargo release"

#
#[tasks.bench]
#description = "Run benchmarks"
#run = "cargo bench --quiet -F bench"
#depends = ["test-data-linux", "test-data-gping"]
#env = { RUST_BACKTRACE = '1' }
#
#[tasks.bench-opt]
#description = "Run benchmarks"
#run = "cargo bench --quiet -F bench --profile=bench-opt"
#depends = ["test-data-linux", "test-data-gping"]
#env = { RUSTFLAGS = '-C target_cpu=native', RUST_BACKTRACE = '1' }
#
#[tasks.flamegraph]
#description = "Generate flamegraph profile"
#run = """
#cargo flamegraph -f bench --root --profile=flamegraph -- \
#    /dev/null test_data/python/blobs/sha256/ --unique --compression=uncompressed
#"""
#depends = ["test-data-docker"]
#env = { RUSTFLAGS = '-C target_cpu=native', RUST_BACKTRACE = '1' }
#
#[tasks.build-release]
#description = "Build release"
#run = "cargo build --release --bins"