1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
use std::sync::LazyLock;
use std::{
  fmt::Display,
  ops::Deref,
  path::PathBuf,
  sync::{
    atomic::{AtomicBool, Ordering},
    Arc,
  },
};

use async_trait::async_trait;
use derivative::Derivative;
use regex::Regex;
use rspack_collections::{Identifiable, Identifier};
use rspack_error::Result;
use rspack_util::identifier::strip_zero_width_space_for_fragment;

use super::LoaderContext;

#[derive(Derivative)]
#[derivative(Debug)]
pub struct LoaderItem<Context> {
  #[derivative(Debug(format_with = "fmt_loader"))]
  loader: Arc<dyn Loader<Context>>,
  /// Loader identifier
  request: Identifier,
  /// An absolute path or a virtual path for represent the loader.
  /// The absolute path is used to represent a loader stayed on the JS side.
  /// `$` split chain may be used to represent a composed loader chain from the JS side.
  /// Virtual path with a builtin protocol to represent a loader from the native side. e.g "builtin:".
  path: PathBuf,
  /// Query of a loader, starts with `?`
  query: Option<String>,
  /// Fragment of a loader, starts with `#`.
  fragment: Option<String>,
  /// Data shared between pitching and normal
  data: serde_json::Value,
  r#type: String,
  pitch_executed: AtomicBool,
  normal_executed: AtomicBool,
}

impl<C> LoaderItem<C> {
  pub fn loader(&self) -> &Arc<dyn Loader<C>> {
    &self.loader
  }

  #[inline]
  pub fn request(&self) -> Identifier {
    self.request
  }

  #[inline]
  pub fn r#type(&self) -> &str {
    &self.r#type
  }

  #[inline]
  pub fn data(&self) -> &serde_json::Value {
    &self.data
  }

  #[inline]
  pub fn set_data(&mut self, data: serde_json::Value) {
    self.data = data;
  }

  #[inline]
  pub fn pitch_executed(&self) -> bool {
    self.pitch_executed.load(Ordering::Relaxed)
  }

  #[inline]
  pub fn normal_executed(&self) -> bool {
    self.normal_executed.load(Ordering::Relaxed)
  }

  #[inline]
  pub fn set_pitch_executed(&self) {
    self.pitch_executed.store(true, Ordering::Relaxed)
  }

  #[inline]
  pub fn set_normal_executed(&self) {
    self.normal_executed.store(true, Ordering::Relaxed)
  }
}

impl<C> Display for LoaderItem<C> {
  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
    write!(f, "{}", self.loader.identifier())
  }
}

pub struct LoaderItemList<'a, Context>(pub &'a [LoaderItem<Context>]);

impl<'a, Context> Deref for LoaderItemList<'a, Context> {
  type Target = [LoaderItem<Context>];

  fn deref(&self) -> &Self::Target {
    self.0
  }
}

impl<'a, Context> Default for LoaderItemList<'a, Context> {
  fn default() -> Self {
    Self(&[])
  }
}

pub trait DisplayWithSuffix: Display {
  fn display_with_suffix(&self, suffix: &str) -> String {
    let s = self.to_string();
    if s.is_empty() {
      return suffix.to_string();
    }
    self.to_string() + "!" + suffix
  }
}

impl<'a, Context> DisplayWithSuffix for LoaderItemList<'a, Context> {}
impl<Context> DisplayWithSuffix for LoaderItem<Context> {}
impl<'a, Context> Display for LoaderItemList<'a, Context> {
  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
    let s = self
      .0
      .iter()
      .map(|item| item.to_string())
      .collect::<Vec<_>>()
      .join("!");

    write!(f, "{s}")
  }
}

impl<C> Identifiable for LoaderItem<C> {
  fn identifier(&self) -> Identifier {
    self.loader.identifier()
  }
}

#[async_trait]
pub trait Loader<Context = ()>: Identifiable + Send + Sync {
  async fn run(&self, _loader_context: &mut LoaderContext<Context>) -> Result<()> {
    // noop
    Ok(())
  }
  async fn pitch(&self, _loader_context: &mut LoaderContext<Context>) -> Result<()> {
    // noop
    Ok(())
  }
}

pub fn fmt_loader<T>(
  loader: &Arc<dyn Loader<T>>,
  fmt: &mut std::fmt::Formatter<'_>,
) -> Result<(), std::fmt::Error> {
  write!(fmt, "{}", loader.identifier())
}

impl<C> From<Arc<dyn Loader<C>>> for LoaderItem<C> {
  fn from(loader: Arc<dyn Loader<C>>) -> Self {
    if let Some((r#type, ident)) = loader.identifier().split_once('|') {
      let ResourceParsedData {
        path,
        query,
        fragment,
      } = parse_resource(ident).expect("identifier should be valid");
      return Self {
        loader,
        request: ident.into(),
        path,
        query,
        fragment,
        data: serde_json::Value::Null,
        r#type: r#type.to_string(),
        pitch_executed: AtomicBool::new(false),
        normal_executed: AtomicBool::new(false),
      };
    }
    let ident = loader.identifier();
    let ResourceParsedData {
      path,
      query,
      fragment,
    } = parse_resource(&ident).expect("identifier should be valid");
    Self {
      loader,
      request: ident,
      path,
      query,
      fragment,
      data: serde_json::Value::Null,
      r#type: String::default(),
      pitch_executed: AtomicBool::new(false),
      normal_executed: AtomicBool::new(false),
    }
  }
}

#[derive(Debug)]
pub struct ResourceParsedData {
  pub path: PathBuf,
  pub query: Option<String>,
  pub fragment: Option<String>,
}

pub fn parse_resource(resource: &str) -> Option<ResourceParsedData> {
  let groups = PATH_QUERY_FRAGMENT_REGEXP.captures(resource)?;

  Some(ResourceParsedData {
    path: strip_zero_width_space_for_fragment(groups.get(1)?.as_str())
      .into_owned()
      .into(),
    query: groups
      .get(2)
      .map(|q| strip_zero_width_space_for_fragment(q.as_str()).into_owned()),
    fragment: groups.get(3).map(|q| q.as_str().to_owned()),
  })
}

static PATH_QUERY_FRAGMENT_REGEXP: LazyLock<Regex> = LazyLock::new(|| {
  Regex::new("^((?:\u{200b}.|[^?#\u{200b}])*)(\\?(?:\u{200b}.|[^#\u{200b}])*)?(#.*)?$")
    .expect("Failed to initialize `PATH_QUERY_FRAGMENT_REGEXP`")
});

#[cfg(test)]
pub(crate) mod test {
  use std::{path::PathBuf, sync::Arc};

  use rspack_collections::{Identifiable, Identifier};

  use super::{Loader, LoaderItem};

  #[allow(dead_code)]
  pub(crate) struct Custom;

  #[async_trait::async_trait]
  impl Loader<()> for Custom {}
  impl Identifiable for Custom {
    fn identifier(&self) -> Identifier {
      "/rspack/custom-loader-1/index.js?foo=1#baz".into()
    }
  }

  #[allow(dead_code)]
  pub(crate) struct Custom2;
  #[async_trait::async_trait]
  impl Loader<()> for Custom2 {}
  impl Identifiable for Custom2 {
    fn identifier(&self) -> Identifier {
      "/rspack/custom-loader-2/index.js?bar=2#baz".into()
    }
  }

  #[allow(dead_code)]
  pub(crate) struct Builtin;
  #[async_trait::async_trait]
  impl Loader<()> for Builtin {}
  impl Identifiable for Builtin {
    fn identifier(&self) -> Identifier {
      "builtin:test-loader".into()
    }
  }

  pub(crate) struct PosixNonLenBlankUnicode;

  #[async_trait::async_trait]
  impl Loader<()> for PosixNonLenBlankUnicode {}
  impl Identifiable for PosixNonLenBlankUnicode {
    fn identifier(&self) -> Identifier {
      "/a/b/c.js?{\"c\": \"\u{200b}#foo\"}".into()
    }
  }

  pub(crate) struct WinNonLenBlankUnicode;
  #[async_trait::async_trait]
  impl Loader<()> for WinNonLenBlankUnicode {}
  impl Identifiable for WinNonLenBlankUnicode {
    fn identifier(&self) -> Identifier {
      "\\a\\b\\c.js?{\"c\": \"\u{200b}#foo\"}".into()
    }
  }

  #[test]
  fn should_handle_posix_non_len_blank_unicode_correctly() {
    let c1 = Arc::new(PosixNonLenBlankUnicode) as Arc<dyn Loader<()>>;
    let l: LoaderItem<()> = c1.into();
    assert_eq!(l.path, PathBuf::from("/a/b/c.js"));
    assert_eq!(l.query, Some("?{\"c\": \"#foo\"}".into()));
    assert_eq!(l.fragment, None);
  }

  #[test]
  fn should_handle_win_non_len_blank_unicode_correctly() {
    let c1 = Arc::new(WinNonLenBlankUnicode) as Arc<dyn Loader<()>>;
    let l: LoaderItem<()> = c1.into();
    assert_eq!(l.path, PathBuf::from(r#"\a\b\c.js"#));
    assert_eq!(l.query, Some("?{\"c\": \"#foo\"}".into()));
    assert_eq!(l.fragment, None);
  }
}