parcel_resolver/
cache.rs

1use bitflags::bitflags;
2use dashmap::DashSet;
3use rustc_hash::FxHasher;
4
5use crate::{
6  fs::FileKind,
7  package_json::PackageJson,
8  tsconfig::{TsConfig, TsConfigWrapper},
9  FileSystem, ResolverError,
10};
11use std::{
12  cell::UnsafeCell,
13  ffi::OsStr,
14  hash::{BuildHasherDefault, Hash, Hasher},
15  ops::Deref,
16  path::{is_separator, Component, Path, PathBuf},
17  sync::{
18    atomic::{AtomicU64, Ordering},
19    Arc, OnceLock,
20  },
21};
22
23/// Stores various cached info about file paths.
24pub struct Cache {
25  pub fs: Arc<dyn FileSystem>,
26  paths: DashSet<PathEntry<'static>, BuildHasherDefault<IdentityHasher>>,
27}
28
29/// An entry in the path cache. Can also be borrowed for lookups without allocations.
30enum PathEntry<'a> {
31  Owned(Arc<PathInfo>),
32  Borrowed { hash: u64, path: &'a Path },
33}
34
35impl<'a> Hash for PathEntry<'a> {
36  fn hash<H: Hasher>(&self, state: &mut H) {
37    match self {
38      PathEntry::Owned(info) => {
39        info.hash.hash(state);
40      }
41      PathEntry::Borrowed { hash, .. } => {
42        hash.hash(state);
43      }
44    }
45  }
46}
47
48impl<'a> PartialEq for PathEntry<'a> {
49  fn eq(&self, other: &Self) -> bool {
50    let self_path = match self {
51      PathEntry::Owned(info) => &info.path,
52      PathEntry::Borrowed { path, .. } => *path,
53    };
54    let other_path = match other {
55      PathEntry::Owned(info) => &info.path,
56      PathEntry::Borrowed { path, .. } => *path,
57    };
58    self_path.as_os_str() == other_path.as_os_str()
59  }
60}
61
62impl<'a> Eq for PathEntry<'a> {}
63
64#[cfg(not(target_arch = "wasm32"))]
65impl Default for Cache {
66  fn default() -> Self {
67    Cache::new(Arc::new(crate::fs::OsFileSystem))
68  }
69}
70
71impl Cache {
72  /// Creates an empty cache with the given file system.
73  pub fn new(fs: Arc<dyn FileSystem>) -> Cache {
74    Cache {
75      fs,
76      paths: DashSet::default(),
77    }
78  }
79
80  /// Returns cached info for a pre-normalized path.
81  pub fn get<P: AsRef<Path>>(&self, path: P) -> CachedPath {
82    self.get_path(path.as_ref())
83  }
84
85  /// Normalizes the given path and returns its cached info.
86  pub fn get_normalized<P: AsRef<Path>>(&self, path: P) -> CachedPath {
87    self.get_path(&normalize_path(path.as_ref()))
88  }
89
90  fn get_path(&self, path: &Path) -> CachedPath {
91    let mut hasher = FxHasher::default();
92    path.as_os_str().hash(&mut hasher);
93    let hash = hasher.finish();
94
95    let key = PathEntry::Borrowed { hash, path };
96
97    // A DashMap is just an array of RwLock<HashSet>, sharded by hash to reduce lock contention.
98    // This uses the low level raw API to avoid cloning the value when using the `entry` method.
99    // First, find which shard the value is in, and check to see if we already have a value in the map.
100    let shard = self.paths.determine_shard(hash as usize);
101    {
102      // Scope the read lock.
103      let map = self.paths.shards()[shard].read();
104      if let Some((PathEntry::Owned(entry), _)) = map.get(hash, |v| v.0 == key) {
105        return CachedPath(Arc::clone(entry));
106      }
107    }
108
109    // If that wasn't found, we need to create a new entry.
110    let parent = path
111      .parent()
112      .map(|p| CachedPath(Arc::clone(&self.get(p).0)));
113    let mut flags = parent.as_ref().map_or(PathFlags::empty(), |p| {
114      p.0.flags & PathFlags::IN_NODE_MODULES
115    });
116    if matches!(path.file_name(), Some(f) if f == "node_modules") {
117      flags |= PathFlags::IS_NODE_MODULES | PathFlags::IN_NODE_MODULES;
118    }
119
120    let info = Arc::new(PathInfo {
121      hash,
122      path: path.to_path_buf(),
123      parent,
124      flags,
125      kind: OnceLock::new(),
126      canonical: OnceLock::new(),
127      canonicalizing: AtomicU64::new(0),
128      package_json: OnceLock::new(),
129      tsconfig: OnceLock::new(),
130    });
131
132    self.paths.insert(PathEntry::Owned(Arc::clone(&info)));
133    CachedPath(info)
134  }
135}
136
137pub(crate) mod private {
138  use super::*;
139
140  #[allow(clippy::large_enum_variant)]
141  /// Special Cow implementation for a Cache that doesn't require Clone.
142  pub enum CacheCow<'a> {
143    Borrowed(&'a Cache),
144    Owned(Cache),
145  }
146
147  impl<'a> Deref for CacheCow<'a> {
148    type Target = Cache;
149
150    fn deref(&self) -> &Self::Target {
151      match self {
152        CacheCow::Borrowed(c) => c,
153        CacheCow::Owned(c) => c,
154      }
155    }
156  }
157
158  impl<'a> From<Cache> for CacheCow<'a> {
159    fn from(value: Cache) -> Self {
160      CacheCow::Owned(value)
161    }
162  }
163
164  impl<'a> From<&'a Cache> for CacheCow<'a> {
165    fn from(value: &'a Cache) -> Self {
166      CacheCow::Borrowed(value)
167    }
168  }
169}
170
171bitflags! {
172  struct PathFlags: u8 {
173    /// Whether this path is inside a node_modules directory.
174    const IN_NODE_MODULES = 1 << 0;
175    /// Whether this path is a node_modules directory.
176    const IS_NODE_MODULES = 1 << 1;
177  }
178}
179
180/// Cached info about a file path.
181struct PathInfo {
182  hash: u64,
183  path: PathBuf,
184  flags: PathFlags,
185  parent: Option<CachedPath>,
186  kind: OnceLock<FileKind>,
187  canonical: OnceLock<Result<CachedPath, ResolverError>>,
188  canonicalizing: AtomicU64,
189  package_json: OnceLock<Arc<Result<PackageJson, ResolverError>>>,
190  tsconfig: OnceLock<Arc<Result<TsConfigWrapper, ResolverError>>>,
191}
192
193#[derive(Clone)]
194pub struct CachedPath(Arc<PathInfo>);
195
196impl CachedPath {
197  /// Returns a std Path.
198  pub fn as_path(&self) -> &Path {
199    self.0.path.as_path()
200  }
201
202  /// Returns the parent path.
203  pub fn parent(&self) -> Option<&CachedPath> {
204    self.0.parent.as_ref()
205  }
206
207  fn kind(&self, fs: &dyn FileSystem) -> FileKind {
208    *self.0.kind.get_or_init(|| fs.kind(self.as_path()))
209  }
210
211  /// Returns whether the path is a file.
212  pub fn is_file(&self, fs: &dyn FileSystem) -> bool {
213    self.kind(fs).contains(FileKind::IS_FILE)
214  }
215
216  /// Returns whether the path is a directory.
217  pub fn is_dir(&self, fs: &dyn FileSystem) -> bool {
218    self.kind(fs).contains(FileKind::IS_DIR)
219  }
220
221  /// Returns whether the path is a node_modules directory.
222  pub fn is_node_modules(&self) -> bool {
223    self.0.flags.contains(PathFlags::IS_NODE_MODULES)
224  }
225
226  /// Returns whether the path is inside a node_modules directory.
227  pub fn in_node_modules(&self) -> bool {
228    self.0.flags.contains(PathFlags::IN_NODE_MODULES)
229  }
230
231  /// Returns the canonical path, resolving all symbolic links.
232  pub fn canonicalize(&self, cache: &Cache) -> Result<CachedPath, ResolverError> {
233    // Check if this thread is already canonicalizing. If so, we have found a circular symlink.
234    // If a different thread is canonicalizing, OnceLock will queue this thread to wait for the result.
235    let tid = THREAD_ID.with(|t| *t);
236    if self.0.canonicalizing.load(Ordering::Acquire) == tid {
237      return Err(std::io::Error::new(std::io::ErrorKind::NotFound, "Circular symlink").into());
238    }
239
240    self
241      .0
242      .canonical
243      .get_or_init(|| {
244        self.0.canonicalizing.store(tid, Ordering::Release);
245
246        let res = self
247          .parent()
248          .map(|parent| {
249            parent.canonicalize(cache).and_then(|parent_canonical| {
250              let path = parent_canonical.join(
251                self
252                  .as_path()
253                  .strip_prefix(parent.as_path())
254                  .map_err(|_| ResolverError::UnknownError)?,
255                cache,
256              );
257
258              if self.kind(&*cache.fs).contains(FileKind::IS_SYMLINK) {
259                let link = cache.fs.read_link(path.as_path())?;
260                if link.is_absolute() {
261                  return cache.get(&normalize_path(&link)).canonicalize(cache);
262                } else {
263                  return path.resolve(&link, cache).canonicalize(cache);
264                }
265              }
266
267              Ok(path)
268            })
269          })
270          .unwrap_or_else(|| Ok(self.clone()));
271
272        self.0.canonicalizing.store(0, Ordering::Release);
273        res
274      })
275      .clone()
276  }
277
278  /// Returns an iterator over all ancestor paths.
279  pub fn ancestors<'a>(&'a self) -> impl Iterator<Item = &'a CachedPath> {
280    std::iter::successors(Some(self), |p| p.parent())
281  }
282
283  /// Returns the file name of this path (the final path component).
284  pub fn file_name(&self) -> Option<&OsStr> {
285    self.as_path().file_name()
286  }
287
288  /// Returns the file extension of this path.
289  pub fn extension(&self) -> Option<&OsStr> {
290    self.as_path().extension()
291  }
292
293  /// Returns a new path with the given path segment appended to this path.
294  pub fn join<P: AsRef<OsStr>>(&self, segment: P, cache: &Cache) -> CachedPath {
295    SCRATCH_PATH.with(|path| {
296      let path = unsafe { &mut *path.get() };
297      path.clear();
298      path.as_mut_os_string().push(self.as_path().as_os_str());
299      push_normalized(path, segment.as_ref());
300      cache.get(path)
301    })
302  }
303
304  /// Returns a new path with the given node_modules directory appended to this path.
305  pub fn join_module(&self, module: &str, cache: &Cache) -> CachedPath {
306    SCRATCH_PATH.with(|path| {
307      let path = unsafe { &mut *path.get() };
308      path.clear();
309      path.as_mut_os_string().push(self.as_path().as_os_str());
310      path.push("node_modules");
311      push_normalized(path, module);
312      cache.get(path)
313    })
314  }
315
316  /// Returns a new path with the given node_modules directory and package subpath appended to this path.
317  pub fn join_package(&self, module: &str, subpath: &str, cache: &Cache) -> CachedPath {
318    SCRATCH_PATH.with(|path| {
319      let path = unsafe { &mut *path.get() };
320      path.clear();
321      path.as_mut_os_string().push(self.as_path().as_os_str());
322      push_normalized(path, module);
323      push_normalized(path, subpath);
324      cache.get(path)
325    })
326  }
327
328  /// Returns a new path by resolving the given subpath (including "." and ".." components) with this path.
329  pub fn resolve(&self, subpath: &Path, cache: &Cache) -> CachedPath {
330    SCRATCH_PATH.with(|path| {
331      let path = unsafe { &mut *path.get() };
332      path.clear();
333      if let Some(parent) = self.0.parent.as_ref() {
334        path.as_mut_os_string().push(parent.0.path.as_os_str());
335      }
336
337      for component in subpath.components() {
338        match component {
339          Component::Prefix(..) | Component::RootDir => unreachable!(),
340          Component::CurDir => {}
341          Component::ParentDir => {
342            path.pop();
343          }
344          Component::Normal(c) => {
345            path.push(c);
346          }
347        }
348      }
349
350      cache.get(path)
351    })
352  }
353
354  /// Returns a new path by appending the given file extension (without leading ".") with this path.
355  pub fn add_extension(&self, ext: &str, cache: &Cache) -> CachedPath {
356    SCRATCH_PATH.with(|path| {
357      let path = unsafe { &mut *path.get() };
358      path.clear();
359      let s = path.as_mut_os_string();
360      s.push(self.as_path().as_os_str());
361      s.push(".");
362      s.push(ext);
363      cache.get(path)
364    })
365  }
366
367  /// Returns the parsed package.json at this path.
368  pub fn package_json(&self, cache: &Cache) -> Arc<Result<PackageJson, ResolverError>> {
369    self
370      .0
371      .package_json
372      .get_or_init(|| Arc::new(PackageJson::read(self, cache)))
373      .clone()
374  }
375
376  /// Returns the parsed tsconfig.json at this path.
377  pub fn tsconfig<F: FnOnce(&mut TsConfigWrapper) -> Result<(), ResolverError>>(
378    &self,
379    cache: &Cache,
380    process: F,
381  ) -> Arc<Result<TsConfigWrapper, ResolverError>> {
382    self
383      .0
384      .tsconfig
385      .get_or_init(|| Arc::new(TsConfig::read(self, process, cache)))
386      .clone()
387  }
388}
389
390static THREAD_COUNT: AtomicU64 = AtomicU64::new(1);
391
392// Per-thread pre-allocated path that is used to perform operations on paths more quickly.
393thread_local! {
394  pub static SCRATCH_PATH: UnsafeCell<PathBuf> = UnsafeCell::new(PathBuf::with_capacity(256));
395  pub static THREAD_ID: u64 = THREAD_COUNT.fetch_add(1, Ordering::SeqCst);
396}
397
398#[cfg(windows)]
399#[inline]
400fn push_normalized<S: AsRef<OsStr>>(path: &mut PathBuf, s: S) {
401  // PathBuf::push does not normalize separators, so on Windows, push each part separately.
402  // Note that this does not use Path::components because that also strips the trailing separator.
403  let bytes = s.as_ref().as_encoded_bytes();
404  for part in bytes.split(|b| *b == b'/') {
405    path.push(unsafe { OsStr::from_encoded_bytes_unchecked(part) });
406  }
407}
408
409#[cfg(not(windows))]
410#[inline]
411fn push_normalized<S: AsRef<OsStr>>(path: &mut PathBuf, s: S) {
412  path.push(s.as_ref());
413}
414
415impl Hash for CachedPath {
416  fn hash<H: Hasher>(&self, state: &mut H) {
417    self.0.hash.hash(state);
418  }
419}
420
421impl PartialEq for CachedPath {
422  fn eq(&self, other: &Self) -> bool {
423    // Cached paths always point to unique values, so we only need to compare the pointers.
424    std::ptr::eq(Arc::as_ptr(&self.0), Arc::as_ptr(&other.0))
425  }
426}
427
428impl Eq for CachedPath {}
429
430impl std::fmt::Debug for CachedPath {
431  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
432    self.0.path.fmt(f)
433  }
434}
435
436/// A hasher that just passes through a value that is already a hash.
437#[derive(Default)]
438pub struct IdentityHasher {
439  hash: u64,
440}
441
442impl Hasher for IdentityHasher {
443  fn write(&mut self, bytes: &[u8]) {
444    if bytes.len() == 8 {
445      self.hash = u64::from_ne_bytes([
446        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
447      ])
448    } else {
449      unreachable!()
450    }
451  }
452
453  fn finish(&self) -> u64 {
454    self.hash
455  }
456}
457
458pub fn normalize_path(path: &Path) -> PathBuf {
459  // Normalize path components to resolve ".." and "." segments.
460  // https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61
461  let mut components = path.components().peekable();
462  let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
463    components.next();
464    PathBuf::from(c.as_os_str())
465  } else {
466    PathBuf::new()
467  };
468
469  for component in components {
470    match component {
471      Component::Prefix(..) => unreachable!(),
472      Component::RootDir => {
473        ret.push(component.as_os_str());
474      }
475      Component::CurDir => {}
476      Component::ParentDir => {
477        ret.pop();
478      }
479      Component::Normal(c) => {
480        ret.push(c);
481      }
482    }
483  }
484
485  // If the path ends with a separator, add an additional empty component.
486  if matches!(path.as_os_str().as_encoded_bytes().last(), Some(b) if is_separator(*b as char)) {
487    ret.push("");
488  }
489
490  ret
491}
492
493#[cfg(test)]
494mod test {
495  use crate::OsFileSystem;
496
497  use super::*;
498  use assert_fs::prelude::*;
499
500  #[test]
501  fn test_canonicalize() -> Result<(), Box<dyn std::error::Error>> {
502    #[cfg(windows)]
503    if !is_elevated::is_elevated() {
504      println!("skipping symlink tests due to missing permissions");
505      return Ok(());
506    }
507
508    let dir = assert_fs::TempDir::new()?;
509    dir.child("foo/bar.js").write_str("")?;
510    dir.child("root.js").write_str("")?;
511
512    dir
513      .child("symlink")
514      .symlink_to_file(Path::new("foo").join("bar.js"))?;
515    dir
516      .child("foo/symlink")
517      .symlink_to_file(Path::new("..").join("root.js"))?;
518    dir
519      .child("absolute")
520      .symlink_to_file(dir.child("root.js").path())?;
521    dir
522      .child("recursive")
523      .symlink_to_file(Path::new("foo").join("symlink"))?;
524    dir.child("cycle").symlink_to_file("cycle1")?;
525    dir.child("cycle1").symlink_to_file("cycle")?;
526    dir
527      .child("absolute_cycle")
528      .symlink_to_file(dir.child("absolute_cycle1").path())?;
529    dir
530      .child("absolute_cycle1")
531      .symlink_to_file(dir.child("absolute_cycle").path())?;
532    dir.child("a/b/c").create_dir_all()?;
533    dir.child("a/b/e").symlink_to_file("..")?;
534    dir.child("a/d").symlink_to_file("..")?;
535    dir.child("a/b/c/x.txt").write_str("")?;
536    dir
537      .child("a/link")
538      .symlink_to_file(dir.child("a/b").path())?;
539
540    let fs = OsFileSystem::default();
541    let cache = Cache::new(Arc::new(fs));
542
543    assert_eq!(
544      cache
545        .get(dir.child("symlink").path())
546        .canonicalize(&cache)?,
547      cache
548        .get(dir.child("foo/bar.js").path())
549        .canonicalize(&cache)?
550    );
551    assert_eq!(
552      cache
553        .get(dir.child("foo/symlink").path())
554        .canonicalize(&cache)?,
555      cache
556        .get(dir.child("root.js").path())
557        .canonicalize(&cache)?
558    );
559    assert_eq!(
560      cache
561        .get(dir.child("absolute").path())
562        .canonicalize(&cache)?,
563      cache
564        .get(dir.child("root.js").path())
565        .canonicalize(&cache)?
566    );
567    assert_eq!(
568      cache
569        .get(dir.child("recursive").path())
570        .canonicalize(&cache)?,
571      cache
572        .get(dir.child("root.js").path())
573        .canonicalize(&cache)?
574    );
575    assert!(cache
576      .get(dir.child("cycle").path())
577      .canonicalize(&cache)
578      .is_err());
579    assert!(cache
580      .get(dir.child("absolute_cycle").path())
581      .canonicalize(&cache)
582      .is_err());
583    assert_eq!(
584      cache
585        .get(dir.child("a/b/e/d/a/b/e/d/a").path())
586        .canonicalize(&cache)?,
587      cache.get(dir.child("a").path()).canonicalize(&cache)?
588    );
589    assert_eq!(
590      cache
591        .get(dir.child("a/link/c/x.txt").path())
592        .canonicalize(&cache)?,
593      cache
594        .get(dir.child("a/b/c/x.txt").path())
595        .canonicalize(&cache)?
596    );
597
598    Ok(())
599  }
600}