solana_accounts_db/
hardened_unpack.rs

1use {
2    bzip2::bufread::BzDecoder,
3    log::*,
4    rand::{thread_rng, Rng},
5    solana_sdk::genesis_config::{GenesisConfig, DEFAULT_GENESIS_ARCHIVE, DEFAULT_GENESIS_FILE},
6    std::{
7        collections::HashMap,
8        fs::{self, File},
9        io::{BufReader, Read},
10        path::{
11            Component::{self, CurDir, Normal},
12            Path, PathBuf,
13        },
14        time::Instant,
15    },
16    tar::{
17        Archive,
18        EntryType::{Directory, GNUSparse, Regular},
19    },
20    thiserror::Error,
21};
22
23#[derive(Error, Debug)]
24pub enum UnpackError {
25    #[error("IO error: {0}")]
26    Io(#[from] std::io::Error),
27    #[error("Archive error: {0}")]
28    Archive(String),
29}
30
31pub type Result<T> = std::result::Result<T, UnpackError>;
32
33// 64 TiB; some safe margin to the max 128 TiB in amd64 linux userspace VmSize
34// (ref: https://unix.stackexchange.com/a/386555/364236)
35// note that this is directly related to the mmaped data size
36// so protect against insane value
37// This is the file size including holes for sparse files
38const MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE: u64 = 64 * 1024 * 1024 * 1024 * 1024;
39
40// 4 TiB;
41// This is the actually consumed disk usage for sparse files
42const MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE: u64 = 4 * 1024 * 1024 * 1024 * 1024;
43
44const MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT: u64 = 5_000_000;
45pub const MAX_GENESIS_ARCHIVE_UNPACKED_SIZE: u64 = 10 * 1024 * 1024; // 10 MiB
46const MAX_GENESIS_ARCHIVE_UNPACKED_COUNT: u64 = 100;
47
48fn checked_total_size_sum(total_size: u64, entry_size: u64, limit_size: u64) -> Result<u64> {
49    trace!(
50        "checked_total_size_sum: {} + {} < {}",
51        total_size,
52        entry_size,
53        limit_size,
54    );
55    let total_size = total_size.saturating_add(entry_size);
56    if total_size > limit_size {
57        return Err(UnpackError::Archive(format!(
58            "too large archive: {total_size} than limit: {limit_size}",
59        )));
60    }
61    Ok(total_size)
62}
63
64fn checked_total_count_increment(total_count: u64, limit_count: u64) -> Result<u64> {
65    let total_count = total_count + 1;
66    if total_count > limit_count {
67        return Err(UnpackError::Archive(format!(
68            "too many files in snapshot: {total_count:?}"
69        )));
70    }
71    Ok(total_count)
72}
73
74fn check_unpack_result(unpack_result: bool, path: String) -> Result<()> {
75    if !unpack_result {
76        return Err(UnpackError::Archive(format!("failed to unpack: {path:?}")));
77    }
78    Ok(())
79}
80
81#[derive(Debug, PartialEq, Eq)]
82pub enum UnpackPath<'a> {
83    Valid(&'a Path),
84    Ignore,
85    Invalid,
86}
87
88fn unpack_archive<'a, A, C, D>(
89    archive: &mut Archive<A>,
90    apparent_limit_size: u64,
91    actual_limit_size: u64,
92    limit_count: u64,
93    mut entry_checker: C, // checks if entry is valid
94    entry_processor: D,   // processes entry after setting permissions
95) -> Result<()>
96where
97    A: Read,
98    C: FnMut(&[&str], tar::EntryType) -> UnpackPath<'a>,
99    D: Fn(PathBuf),
100{
101    let mut apparent_total_size: u64 = 0;
102    let mut actual_total_size: u64 = 0;
103    let mut total_count: u64 = 0;
104
105    let mut total_entries = 0;
106    for entry in archive.entries()? {
107        let mut entry = entry?;
108        let path = entry.path()?;
109        let path_str = path.display().to_string();
110
111        // Although the `tar` crate safely skips at the actual unpacking, fail
112        // first by ourselves when there are odd paths like including `..` or /
113        // for our clearer pattern matching reasoning:
114        //   https://docs.rs/tar/0.4.26/src/tar/entry.rs.html#371
115        let parts = path
116            .components()
117            .map(|p| match p {
118                CurDir => Ok("."),
119                Normal(c) => c.to_str().ok_or(()),
120                _ => Err(()), // Prefix (for Windows) and RootDir are forbidden
121            })
122            .collect::<std::result::Result<Vec<_>, _>>();
123
124        // Reject old-style BSD directory entries that aren't explicitly tagged as directories
125        let legacy_dir_entry =
126            entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/");
127        let kind = entry.header().entry_type();
128        let reject_legacy_dir_entry = legacy_dir_entry && (kind != Directory);
129        let (Ok(parts), false) = (parts, reject_legacy_dir_entry) else {
130            return Err(UnpackError::Archive(format!(
131                "invalid path found: {path_str:?}"
132            )));
133        };
134
135        let unpack_dir = match entry_checker(parts.as_slice(), kind) {
136            UnpackPath::Invalid => {
137                return Err(UnpackError::Archive(format!(
138                    "extra entry found: {:?} {:?}",
139                    path_str,
140                    entry.header().entry_type(),
141                )));
142            }
143            UnpackPath::Ignore => {
144                continue;
145            }
146            UnpackPath::Valid(unpack_dir) => unpack_dir,
147        };
148
149        apparent_total_size = checked_total_size_sum(
150            apparent_total_size,
151            entry.header().size()?,
152            apparent_limit_size,
153        )?;
154        actual_total_size = checked_total_size_sum(
155            actual_total_size,
156            entry.header().entry_size()?,
157            actual_limit_size,
158        )?;
159        total_count = checked_total_count_increment(total_count, limit_count)?;
160
161        let account_filename = match parts.as_slice() {
162            ["accounts", account_filename] => Some(PathBuf::from(account_filename)),
163            _ => None,
164        };
165        let entry_path = if let Some(account) = account_filename {
166            // Special case account files. We're unpacking an account entry inside one of the
167            // account_paths returned by `entry_checker`. We want to unpack into
168            // account_path/<account> instead of account_path/accounts/<account> so we strip the
169            // accounts/ prefix.
170            sanitize_path(&account, unpack_dir)
171        } else {
172            sanitize_path(&path, unpack_dir)
173        }?; // ? handles file system errors
174        let Some(entry_path) = entry_path else {
175            continue; // skip it
176        };
177
178        let unpack = entry.unpack(&entry_path);
179        check_unpack_result(unpack.map(|_unpack| true)?, path_str)?;
180
181        // Sanitize permissions.
182        let mode = match entry.header().entry_type() {
183            GNUSparse | Regular => 0o644,
184            _ => 0o755,
185        };
186        set_perms(&entry_path, mode)?;
187
188        // Process entry after setting permissions
189        entry_processor(entry_path);
190
191        total_entries += 1;
192    }
193    info!("unpacked {} entries total", total_entries);
194
195    return Ok(());
196
197    #[cfg(unix)]
198    fn set_perms(dst: &Path, mode: u32) -> std::io::Result<()> {
199        use std::os::unix::fs::PermissionsExt;
200
201        let perm = fs::Permissions::from_mode(mode as _);
202        fs::set_permissions(dst, perm)
203    }
204
205    #[cfg(windows)]
206    fn set_perms(dst: &Path, _mode: u32) -> std::io::Result<()> {
207        let mut perm = fs::metadata(dst)?.permissions();
208        // This is OK for Windows, but clippy doesn't realize we're doing this
209        // only on Windows.
210        #[allow(clippy::permissions_set_readonly_false)]
211        perm.set_readonly(false);
212        fs::set_permissions(dst, perm)
213    }
214}
215
216// return Err on file system error
217// return Some(path) if path is good
218// return None if we should skip this file
219fn sanitize_path(entry_path: &Path, dst: &Path) -> Result<Option<PathBuf>> {
220    // We cannot call unpack_in because it errors if we try to use 2 account paths.
221    // So, this code is borrowed from unpack_in
222    // ref: https://docs.rs/tar/*/tar/struct.Entry.html#method.unpack_in
223    let mut file_dst = dst.to_path_buf();
224    const SKIP: Result<Option<PathBuf>> = Ok(None);
225    {
226        let path = entry_path;
227        for part in path.components() {
228            match part {
229                // Leading '/' characters, root paths, and '.'
230                // components are just ignored and treated as "empty
231                // components"
232                Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
233
234                // If any part of the filename is '..', then skip over
235                // unpacking the file to prevent directory traversal
236                // security issues.  See, e.g.: CVE-2001-1267,
237                // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
238                Component::ParentDir => return SKIP,
239
240                Component::Normal(part) => file_dst.push(part),
241            }
242        }
243    }
244
245    // Skip cases where only slashes or '.' parts were seen, because
246    // this is effectively an empty filename.
247    if *dst == *file_dst {
248        return SKIP;
249    }
250
251    // Skip entries without a parent (i.e. outside of FS root)
252    let Some(parent) = file_dst.parent() else {
253        return SKIP;
254    };
255
256    fs::create_dir_all(parent)?;
257
258    // Here we are different than untar_in. The code for tar::unpack_in internally calling unpack is a little different.
259    // ignore return value here
260    validate_inside_dst(dst, parent)?;
261    let target = parent.join(entry_path.file_name().unwrap());
262
263    Ok(Some(target))
264}
265
266// copied from:
267// https://github.com/alexcrichton/tar-rs/blob/d90a02f582c03dfa0fd11c78d608d0974625ae5d/src/entry.rs#L781
268fn validate_inside_dst(dst: &Path, file_dst: &Path) -> Result<PathBuf> {
269    // Abort if target (canonical) parent is outside of `dst`
270    let canon_parent = file_dst.canonicalize().map_err(|err| {
271        UnpackError::Archive(format!(
272            "{} while canonicalizing {}",
273            err,
274            file_dst.display()
275        ))
276    })?;
277    let canon_target = dst.canonicalize().map_err(|err| {
278        UnpackError::Archive(format!("{} while canonicalizing {}", err, dst.display()))
279    })?;
280    if !canon_parent.starts_with(&canon_target) {
281        return Err(UnpackError::Archive(format!(
282            "trying to unpack outside of destination path: {}",
283            canon_target.display()
284        )));
285    }
286    Ok(canon_target)
287}
288
289/// Map from AppendVec file name to unpacked file system location
290pub type UnpackedAppendVecMap = HashMap<String, PathBuf>;
291
292// select/choose only 'index' out of each # of 'divisions' of total items.
293pub struct ParallelSelector {
294    pub index: usize,
295    pub divisions: usize,
296}
297
298impl ParallelSelector {
299    pub fn select_index(&self, index: usize) -> bool {
300        index % self.divisions == self.index
301    }
302}
303
304/// Unpacks snapshot and collects AppendVec file names & paths
305pub fn unpack_snapshot<A: Read>(
306    archive: &mut Archive<A>,
307    ledger_dir: &Path,
308    account_paths: &[PathBuf],
309    parallel_selector: Option<ParallelSelector>,
310) -> Result<UnpackedAppendVecMap> {
311    let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
312
313    unpack_snapshot_with_processors(
314        archive,
315        ledger_dir,
316        account_paths,
317        parallel_selector,
318        |file, path| {
319            unpacked_append_vec_map.insert(file.to_string(), path.join("accounts").join(file));
320        },
321        |_| {},
322    )
323    .map(|_| unpacked_append_vec_map)
324}
325
326/// Unpacks snapshots and sends entry file paths through the `sender` channel
327pub fn streaming_unpack_snapshot<A: Read>(
328    archive: &mut Archive<A>,
329    ledger_dir: &Path,
330    account_paths: &[PathBuf],
331    parallel_selector: Option<ParallelSelector>,
332    sender: &crossbeam_channel::Sender<PathBuf>,
333) -> Result<()> {
334    unpack_snapshot_with_processors(
335        archive,
336        ledger_dir,
337        account_paths,
338        parallel_selector,
339        |_, _| {},
340        |entry_path_buf| {
341            if entry_path_buf.is_file() {
342                let result = sender.send(entry_path_buf);
343                if let Err(err) = result {
344                    panic!(
345                        "failed to send path '{}' from unpacker to rebuilder: {err}",
346                        err.0.display(),
347                    );
348                }
349            }
350        },
351    )
352}
353
354fn unpack_snapshot_with_processors<A, F, G>(
355    archive: &mut Archive<A>,
356    ledger_dir: &Path,
357    account_paths: &[PathBuf],
358    parallel_selector: Option<ParallelSelector>,
359    mut accounts_path_processor: F,
360    entry_processor: G,
361) -> Result<()>
362where
363    A: Read,
364    F: FnMut(&str, &Path),
365    G: Fn(PathBuf),
366{
367    assert!(!account_paths.is_empty());
368    let mut i = 0;
369
370    unpack_archive(
371        archive,
372        MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE,
373        MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE,
374        MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT,
375        |parts, kind| {
376            if is_valid_snapshot_archive_entry(parts, kind) {
377                i += 1;
378                if let Some(parallel_selector) = &parallel_selector {
379                    if !parallel_selector.select_index(i - 1) {
380                        return UnpackPath::Ignore;
381                    }
382                };
383                if let ["accounts", file] = parts {
384                    // Randomly distribute the accounts files about the available `account_paths`,
385                    let path_index = thread_rng().gen_range(0..account_paths.len());
386                    match account_paths
387                        .get(path_index)
388                        .map(|path_buf| path_buf.as_path())
389                    {
390                        Some(path) => {
391                            accounts_path_processor(file, path);
392                            UnpackPath::Valid(path)
393                        }
394                        None => UnpackPath::Invalid,
395                    }
396                } else {
397                    UnpackPath::Valid(ledger_dir)
398                }
399            } else {
400                UnpackPath::Invalid
401            }
402        },
403        entry_processor,
404    )
405}
406
407fn all_digits(v: &str) -> bool {
408    if v.is_empty() {
409        return false;
410    }
411    for x in v.chars() {
412        if !x.is_ascii_digit() {
413            return false;
414        }
415    }
416    true
417}
418
419fn like_storage(v: &str) -> bool {
420    let mut periods = 0;
421    let mut saw_numbers = false;
422    for x in v.chars() {
423        if !x.is_ascii_digit() {
424            if x == '.' {
425                if periods > 0 || !saw_numbers {
426                    return false;
427                }
428                saw_numbers = false;
429                periods += 1;
430            } else {
431                return false;
432            }
433        } else {
434            saw_numbers = true;
435        }
436    }
437    saw_numbers && periods == 1
438}
439
440fn is_valid_snapshot_archive_entry(parts: &[&str], kind: tar::EntryType) -> bool {
441    match (parts, kind) {
442        (["version"], Regular) => true,
443        (["accounts"], Directory) => true,
444        (["accounts", file], GNUSparse) if like_storage(file) => true,
445        (["accounts", file], Regular) if like_storage(file) => true,
446        (["snapshots"], Directory) => true,
447        (["snapshots", "status_cache"], GNUSparse) => true,
448        (["snapshots", "status_cache"], Regular) => true,
449        (["snapshots", dir, file], GNUSparse) if all_digits(dir) && all_digits(file) => true,
450        (["snapshots", dir, file], Regular) if all_digits(dir) && all_digits(file) => true,
451        (["snapshots", dir], Directory) if all_digits(dir) => true,
452        _ => false,
453    }
454}
455
456#[derive(Error, Debug)]
457pub enum OpenGenesisConfigError {
458    #[error("unpack error: {0}")]
459    Unpack(#[from] UnpackError),
460    #[error("Genesis load error: {0}")]
461    Load(#[from] std::io::Error),
462}
463
464pub fn open_genesis_config(
465    ledger_path: &Path,
466    max_genesis_archive_unpacked_size: u64,
467) -> std::result::Result<GenesisConfig, OpenGenesisConfigError> {
468    match GenesisConfig::load(ledger_path) {
469        Ok(genesis_config) => Ok(genesis_config),
470        Err(load_err) => {
471            warn!(
472                "Failed to load genesis_config at {ledger_path:?}: {load_err}. \
473                Will attempt to unpack genesis archive and then retry loading."
474            );
475
476            let genesis_package = ledger_path.join(DEFAULT_GENESIS_ARCHIVE);
477            unpack_genesis_archive(
478                &genesis_package,
479                ledger_path,
480                max_genesis_archive_unpacked_size,
481            )?;
482            GenesisConfig::load(ledger_path).map_err(OpenGenesisConfigError::Load)
483        }
484    }
485}
486
487pub fn unpack_genesis_archive(
488    archive_filename: &Path,
489    destination_dir: &Path,
490    max_genesis_archive_unpacked_size: u64,
491) -> std::result::Result<(), UnpackError> {
492    info!("Extracting {:?}...", archive_filename);
493    let extract_start = Instant::now();
494
495    fs::create_dir_all(destination_dir)?;
496    let tar_bz2 = File::open(archive_filename)?;
497    let tar = BzDecoder::new(BufReader::new(tar_bz2));
498    let mut archive = Archive::new(tar);
499    unpack_genesis(
500        &mut archive,
501        destination_dir,
502        max_genesis_archive_unpacked_size,
503    )?;
504    info!(
505        "Extracted {:?} in {:?}",
506        archive_filename,
507        Instant::now().duration_since(extract_start)
508    );
509    Ok(())
510}
511
512fn unpack_genesis<A: Read>(
513    archive: &mut Archive<A>,
514    unpack_dir: &Path,
515    max_genesis_archive_unpacked_size: u64,
516) -> Result<()> {
517    unpack_archive(
518        archive,
519        max_genesis_archive_unpacked_size,
520        max_genesis_archive_unpacked_size,
521        MAX_GENESIS_ARCHIVE_UNPACKED_COUNT,
522        |p, k| is_valid_genesis_archive_entry(unpack_dir, p, k),
523        |_| {},
524    )
525}
526
527fn is_valid_genesis_archive_entry<'a>(
528    unpack_dir: &'a Path,
529    parts: &[&str],
530    kind: tar::EntryType,
531) -> UnpackPath<'a> {
532    trace!("validating: {:?} {:?}", parts, kind);
533    #[allow(clippy::match_like_matches_macro)]
534    match (parts, kind) {
535        ([DEFAULT_GENESIS_FILE], GNUSparse) => UnpackPath::Valid(unpack_dir),
536        ([DEFAULT_GENESIS_FILE], Regular) => UnpackPath::Valid(unpack_dir),
537        (["rocksdb"], Directory) => UnpackPath::Ignore,
538        (["rocksdb", _], GNUSparse) => UnpackPath::Ignore,
539        (["rocksdb", _], Regular) => UnpackPath::Ignore,
540        (["rocksdb_fifo"], Directory) => UnpackPath::Ignore,
541        (["rocksdb_fifo", _], GNUSparse) => UnpackPath::Ignore,
542        (["rocksdb_fifo", _], Regular) => UnpackPath::Ignore,
543        _ => UnpackPath::Invalid,
544    }
545}
546
547#[cfg(test)]
548mod tests {
549    use {
550        super::*,
551        assert_matches::assert_matches,
552        tar::{Builder, Header},
553    };
554
555    #[test]
556    fn test_archive_is_valid_entry() {
557        assert!(is_valid_snapshot_archive_entry(
558            &["snapshots"],
559            tar::EntryType::Directory
560        ));
561        assert!(!is_valid_snapshot_archive_entry(
562            &["snapshots", ""],
563            tar::EntryType::Directory
564        ));
565        assert!(is_valid_snapshot_archive_entry(
566            &["snapshots", "3"],
567            tar::EntryType::Directory
568        ));
569        assert!(is_valid_snapshot_archive_entry(
570            &["snapshots", "3", "3"],
571            tar::EntryType::Regular
572        ));
573        assert!(is_valid_snapshot_archive_entry(
574            &["version"],
575            tar::EntryType::Regular
576        ));
577        assert!(is_valid_snapshot_archive_entry(
578            &["accounts"],
579            tar::EntryType::Directory
580        ));
581        assert!(!is_valid_snapshot_archive_entry(
582            &["accounts", ""],
583            tar::EntryType::Regular
584        ));
585
586        assert!(!is_valid_snapshot_archive_entry(
587            &["snapshots"],
588            tar::EntryType::Regular
589        ));
590        assert!(!is_valid_snapshot_archive_entry(
591            &["snapshots", "x0"],
592            tar::EntryType::Directory
593        ));
594        assert!(!is_valid_snapshot_archive_entry(
595            &["snapshots", "0x"],
596            tar::EntryType::Directory
597        ));
598        assert!(!is_valid_snapshot_archive_entry(
599            &["snapshots", "①"],
600            tar::EntryType::Directory
601        ));
602        assert!(!is_valid_snapshot_archive_entry(
603            &["snapshots", "0", "aa"],
604            tar::EntryType::Regular
605        ));
606        assert!(!is_valid_snapshot_archive_entry(
607            &["aaaa"],
608            tar::EntryType::Regular
609        ));
610    }
611
612    #[test]
613    fn test_valid_snapshot_accounts() {
614        solana_logger::setup();
615        assert!(is_valid_snapshot_archive_entry(
616            &["accounts", "0.0"],
617            tar::EntryType::Regular
618        ));
619        assert!(is_valid_snapshot_archive_entry(
620            &["accounts", "01829.077"],
621            tar::EntryType::Regular
622        ));
623
624        assert!(!is_valid_snapshot_archive_entry(
625            &["accounts", "1.2.34"],
626            tar::EntryType::Regular
627        ));
628        assert!(!is_valid_snapshot_archive_entry(
629            &["accounts", "12."],
630            tar::EntryType::Regular
631        ));
632        assert!(!is_valid_snapshot_archive_entry(
633            &["accounts", ".12"],
634            tar::EntryType::Regular
635        ));
636        assert!(!is_valid_snapshot_archive_entry(
637            &["accounts", "0x0"],
638            tar::EntryType::Regular
639        ));
640        assert!(!is_valid_snapshot_archive_entry(
641            &["accounts", "abc"],
642            tar::EntryType::Regular
643        ));
644        assert!(!is_valid_snapshot_archive_entry(
645            &["accounts", "232323"],
646            tar::EntryType::Regular
647        ));
648        assert!(!is_valid_snapshot_archive_entry(
649            &["accounts", "৬.¾"],
650            tar::EntryType::Regular
651        ));
652    }
653
654    #[test]
655    fn test_archive_is_valid_archive_entry() {
656        let path = Path::new("");
657        assert_eq!(
658            is_valid_genesis_archive_entry(path, &["genesis.bin"], tar::EntryType::Regular),
659            UnpackPath::Valid(path)
660        );
661        assert_eq!(
662            is_valid_genesis_archive_entry(path, &["genesis.bin"], tar::EntryType::GNUSparse,),
663            UnpackPath::Valid(path)
664        );
665        assert_eq!(
666            is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::Directory),
667            UnpackPath::Ignore
668        );
669        assert_eq!(
670            is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::Regular),
671            UnpackPath::Ignore
672        );
673        assert_eq!(
674            is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::GNUSparse,),
675            UnpackPath::Ignore
676        );
677        assert_eq!(
678            is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::Directory),
679            UnpackPath::Ignore
680        );
681        assert_eq!(
682            is_valid_genesis_archive_entry(path, &["rocksdb_fifo", "foo"], tar::EntryType::Regular),
683            UnpackPath::Ignore
684        );
685        assert_eq!(
686            is_valid_genesis_archive_entry(
687                path,
688                &["rocksdb_fifo", "foo"],
689                tar::EntryType::GNUSparse,
690            ),
691            UnpackPath::Ignore
692        );
693        assert_eq!(
694            is_valid_genesis_archive_entry(path, &["aaaa"], tar::EntryType::Regular),
695            UnpackPath::Invalid
696        );
697        assert_eq!(
698            is_valid_genesis_archive_entry(path, &["aaaa"], tar::EntryType::GNUSparse,),
699            UnpackPath::Invalid
700        );
701        assert_eq!(
702            is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::Regular),
703            UnpackPath::Invalid
704        );
705        assert_eq!(
706            is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::GNUSparse,),
707            UnpackPath::Invalid
708        );
709        assert_eq!(
710            is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::Directory,),
711            UnpackPath::Invalid
712        );
713        assert_eq!(
714            is_valid_genesis_archive_entry(
715                path,
716                &["rocksdb", "foo", "bar"],
717                tar::EntryType::Directory,
718            ),
719            UnpackPath::Invalid
720        );
721        assert_eq!(
722            is_valid_genesis_archive_entry(
723                path,
724                &["rocksdb", "foo", "bar"],
725                tar::EntryType::Regular
726            ),
727            UnpackPath::Invalid
728        );
729        assert_eq!(
730            is_valid_genesis_archive_entry(
731                path,
732                &["rocksdb", "foo", "bar"],
733                tar::EntryType::GNUSparse
734            ),
735            UnpackPath::Invalid
736        );
737        assert_eq!(
738            is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::Regular),
739            UnpackPath::Invalid
740        );
741        assert_eq!(
742            is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::GNUSparse,),
743            UnpackPath::Invalid
744        );
745        assert_eq!(
746            is_valid_genesis_archive_entry(
747                path,
748                &["rocksdb_fifo", "foo"],
749                tar::EntryType::Directory,
750            ),
751            UnpackPath::Invalid
752        );
753        assert_eq!(
754            is_valid_genesis_archive_entry(
755                path,
756                &["rocksdb_fifo", "foo", "bar"],
757                tar::EntryType::Directory,
758            ),
759            UnpackPath::Invalid
760        );
761        assert_eq!(
762            is_valid_genesis_archive_entry(
763                path,
764                &["rocksdb_fifo", "foo", "bar"],
765                tar::EntryType::Regular
766            ),
767            UnpackPath::Invalid
768        );
769        assert_eq!(
770            is_valid_genesis_archive_entry(
771                path,
772                &["rocksdb_fifo", "foo", "bar"],
773                tar::EntryType::GNUSparse
774            ),
775            UnpackPath::Invalid
776        );
777    }
778
779    fn with_finalize_and_unpack<C>(archive: tar::Builder<Vec<u8>>, checker: C) -> Result<()>
780    where
781        C: Fn(&mut Archive<BufReader<&[u8]>>, &Path) -> Result<()>,
782    {
783        let data = archive.into_inner().unwrap();
784        let reader = BufReader::new(&data[..]);
785        let mut archive: Archive<std::io::BufReader<&[u8]>> = Archive::new(reader);
786        let temp_dir = tempfile::TempDir::new().unwrap();
787
788        checker(&mut archive, temp_dir.path())?;
789        // Check that there is no bad permissions preventing deletion.
790        let result = temp_dir.close();
791        assert_matches!(result, Ok(()));
792        Ok(())
793    }
794
795    fn finalize_and_unpack_snapshot(archive: tar::Builder<Vec<u8>>) -> Result<()> {
796        with_finalize_and_unpack(archive, |a, b| {
797            unpack_snapshot_with_processors(a, b, &[PathBuf::new()], None, |_, _| {}, |_| {})
798        })
799    }
800
801    fn finalize_and_unpack_genesis(archive: tar::Builder<Vec<u8>>) -> Result<()> {
802        with_finalize_and_unpack(archive, |a, b| {
803            unpack_genesis(a, b, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE)
804        })
805    }
806
807    #[test]
808    fn test_archive_unpack_snapshot_ok() {
809        let mut header = Header::new_gnu();
810        header.set_path("version").unwrap();
811        header.set_size(4);
812        header.set_cksum();
813
814        let data: &[u8] = &[1, 2, 3, 4];
815
816        let mut archive = Builder::new(Vec::new());
817        archive.append(&header, data).unwrap();
818
819        let result = finalize_and_unpack_snapshot(archive);
820        assert_matches!(result, Ok(()));
821    }
822
823    #[test]
824    fn test_archive_unpack_genesis_ok() {
825        let mut header = Header::new_gnu();
826        header.set_path("genesis.bin").unwrap();
827        header.set_size(4);
828        header.set_cksum();
829
830        let data: &[u8] = &[1, 2, 3, 4];
831
832        let mut archive = Builder::new(Vec::new());
833        archive.append(&header, data).unwrap();
834
835        let result = finalize_and_unpack_genesis(archive);
836        assert_matches!(result, Ok(()));
837    }
838
839    #[test]
840    fn test_archive_unpack_genesis_bad_perms() {
841        let mut archive = Builder::new(Vec::new());
842
843        let mut header = Header::new_gnu();
844        header.set_path("rocksdb").unwrap();
845        header.set_entry_type(Directory);
846        header.set_size(0);
847        header.set_cksum();
848        let data: &[u8] = &[];
849        archive.append(&header, data).unwrap();
850
851        let mut header = Header::new_gnu();
852        header.set_path("rocksdb/test").unwrap();
853        header.set_size(4);
854        header.set_cksum();
855        let data: &[u8] = &[1, 2, 3, 4];
856        archive.append(&header, data).unwrap();
857
858        // Removing all permissions makes it harder to delete this directory
859        // or work with files inside it.
860        let mut header = Header::new_gnu();
861        header.set_path("rocksdb").unwrap();
862        header.set_entry_type(Directory);
863        header.set_mode(0o000);
864        header.set_size(0);
865        header.set_cksum();
866        let data: &[u8] = &[];
867        archive.append(&header, data).unwrap();
868
869        let result = finalize_and_unpack_genesis(archive);
870        assert_matches!(result, Ok(()));
871    }
872
873    #[test]
874    fn test_archive_unpack_genesis_bad_rocksdb_subdir() {
875        let mut archive = Builder::new(Vec::new());
876
877        let mut header = Header::new_gnu();
878        header.set_path("rocksdb").unwrap();
879        header.set_entry_type(Directory);
880        header.set_size(0);
881        header.set_cksum();
882        let data: &[u8] = &[];
883        archive.append(&header, data).unwrap();
884
885        // tar-rs treats following entry as a Directory to support old tar formats.
886        let mut header = Header::new_gnu();
887        header.set_path("rocksdb/test/").unwrap();
888        header.set_entry_type(Regular);
889        header.set_size(0);
890        header.set_cksum();
891        let data: &[u8] = &[];
892        archive.append(&header, data).unwrap();
893
894        let result = finalize_and_unpack_genesis(archive);
895        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "invalid path found: \"rocksdb/test/\"");
896    }
897
898    #[test]
899    fn test_archive_unpack_snapshot_invalid_path() {
900        let mut header = Header::new_gnu();
901        // bypass the sanitization of the .set_path()
902        for (p, c) in header
903            .as_old_mut()
904            .name
905            .iter_mut()
906            .zip(b"foo/../../../dangerous".iter().chain(Some(&0)))
907        {
908            *p = *c;
909        }
910        header.set_size(4);
911        header.set_cksum();
912
913        let data: &[u8] = &[1, 2, 3, 4];
914
915        let mut archive = Builder::new(Vec::new());
916        archive.append(&header, data).unwrap();
917        let result = finalize_and_unpack_snapshot(archive);
918        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "invalid path found: \"foo/../../../dangerous\"");
919    }
920
921    fn with_archive_unpack_snapshot_invalid_path(path: &str) -> Result<()> {
922        let mut header = Header::new_gnu();
923        // bypass the sanitization of the .set_path()
924        for (p, c) in header
925            .as_old_mut()
926            .name
927            .iter_mut()
928            .zip(path.as_bytes().iter().chain(Some(&0)))
929        {
930            *p = *c;
931        }
932        header.set_size(4);
933        header.set_cksum();
934
935        let data: &[u8] = &[1, 2, 3, 4];
936
937        let mut archive = Builder::new(Vec::new());
938        archive.append(&header, data).unwrap();
939        with_finalize_and_unpack(archive, |unpacking_archive, path| {
940            for entry in unpacking_archive.entries()? {
941                if !entry?.unpack_in(path)? {
942                    return Err(UnpackError::Archive("failed!".to_string()));
943                } else if !path.join(path).exists() {
944                    return Err(UnpackError::Archive("not existing!".to_string()));
945                }
946            }
947            Ok(())
948        })
949    }
950
951    #[test]
952    fn test_archive_unpack_itself() {
953        assert_matches!(
954            with_archive_unpack_snapshot_invalid_path("ryoqun/work"),
955            Ok(())
956        );
957        // Absolute paths are neutralized as relative
958        assert_matches!(
959            with_archive_unpack_snapshot_invalid_path("/etc/passwd"),
960            Ok(())
961        );
962        assert_matches!(with_archive_unpack_snapshot_invalid_path("../../../dangerous"), Err(UnpackError::Archive(ref message)) if message == "failed!");
963    }
964
965    #[test]
966    fn test_archive_unpack_snapshot_invalid_entry() {
967        let mut header = Header::new_gnu();
968        header.set_path("foo").unwrap();
969        header.set_size(4);
970        header.set_cksum();
971
972        let data: &[u8] = &[1, 2, 3, 4];
973
974        let mut archive = Builder::new(Vec::new());
975        archive.append(&header, data).unwrap();
976        let result = finalize_and_unpack_snapshot(archive);
977        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "extra entry found: \"foo\" Regular");
978    }
979
980    #[test]
981    fn test_archive_unpack_snapshot_too_large() {
982        let mut header = Header::new_gnu();
983        header.set_path("version").unwrap();
984        header.set_size(1024 * 1024 * 1024 * 1024 * 1024);
985        header.set_cksum();
986
987        let data: &[u8] = &[1, 2, 3, 4];
988
989        let mut archive = Builder::new(Vec::new());
990        archive.append(&header, data).unwrap();
991        let result = finalize_and_unpack_snapshot(archive);
992        assert_matches!(
993            result,
994            Err(UnpackError::Archive(ref message))
995                if message == &format!(
996                    "too large archive: 1125899906842624 than limit: {MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE}"
997                )
998        );
999    }
1000
1001    #[test]
1002    fn test_archive_unpack_snapshot_bad_unpack() {
1003        let result = check_unpack_result(false, "abc".to_string());
1004        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "failed to unpack: \"abc\"");
1005    }
1006
1007    #[test]
1008    fn test_archive_checked_total_size_sum() {
1009        let result = checked_total_size_sum(500, 500, MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE);
1010        assert_matches!(result, Ok(1000));
1011
1012        let result =
1013            checked_total_size_sum(u64::MAX - 2, 2, MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE);
1014        assert_matches!(
1015            result,
1016            Err(UnpackError::Archive(ref message))
1017                if message == &format!(
1018                    "too large archive: 18446744073709551615 than limit: {MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE}"
1019                )
1020        );
1021    }
1022
1023    #[test]
1024    fn test_archive_checked_total_size_count() {
1025        let result = checked_total_count_increment(101, MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT);
1026        assert_matches!(result, Ok(102));
1027
1028        let result =
1029            checked_total_count_increment(999_999_999_999, MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT);
1030        assert_matches!(
1031            result,
1032            Err(UnpackError::Archive(ref message))
1033                if message == "too many files in snapshot: 1000000000000"
1034        );
1035    }
1036
1037    #[test]
1038    fn test_archive_unpack_account_path() {
1039        let mut header = Header::new_gnu();
1040        header.set_path("accounts/123.456").unwrap();
1041        header.set_size(4);
1042        header.set_cksum();
1043        let data: &[u8] = &[1, 2, 3, 4];
1044
1045        let mut archive = Builder::new(Vec::new());
1046        archive.append(&header, data).unwrap();
1047        let result = with_finalize_and_unpack(archive, |ar, tmp| {
1048            unpack_snapshot_with_processors(
1049                ar,
1050                tmp,
1051                &[tmp.join("accounts_dest")],
1052                None,
1053                |_, _| {},
1054                |path| assert_eq!(path, tmp.join("accounts_dest/123.456")),
1055            )
1056        });
1057        assert_matches!(result, Ok(()));
1058    }
1059}