1use std::convert::Infallible;
2
3use crate::Scheme;
4use bstr::{BStr, BString, ByteSlice};
5use percent_encoding::percent_decode_str;
6
7#[derive(Debug, thiserror::Error)]
9#[allow(missing_docs)]
10pub enum Error {
11 #[error("{} \"{url}\" is not valid UTF-8", kind.as_str())]
12 Utf8 {
13 url: BString,
14 kind: UrlKind,
15 source: std::str::Utf8Error,
16 },
17 #[error("{} {url:?} can not be parsed as valid URL", kind.as_str())]
18 Url {
19 url: String,
20 kind: UrlKind,
21 source: url::ParseError,
22 },
23
24 #[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())]
25 TooLong { truncated_url: BString, len: usize },
26 #[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())]
27 MissingRepositoryPath { url: BString, kind: UrlKind },
28 #[error("URL {url:?} is relative which is not allowed in this context")]
29 RelativeUrl { url: String },
30}
31
32impl From<Infallible> for Error {
33 fn from(_: Infallible) -> Self {
34 unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
35 }
36}
37
38#[derive(Debug, Clone, Copy)]
40pub enum UrlKind {
41 Url,
43 Scp,
45 Local,
47}
48
49impl UrlKind {
50 fn as_str(&self) -> &'static str {
51 match self {
52 UrlKind::Url => "URL",
53 UrlKind::Scp => "SCP-like target",
54 UrlKind::Local => "local path",
55 }
56 }
57}
58
59pub(crate) enum InputScheme {
60 Url { protocol_end: usize },
61 Scp { colon: usize },
62 Local,
63}
64
65pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
66 if let Some(protocol_end) = input.find("://") {
69 return InputScheme::Url { protocol_end };
70 }
71
72 if let Some(colon) = input.find_byte(b':') {
73 let explicitly_local = &input[..colon].contains(&b'/');
76 let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1;
77
78 if !explicitly_local && !dos_driver_letter {
79 return InputScheme::Scp { colon };
80 }
81 }
82
83 InputScheme::Local
84}
85
86pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> {
87 const MAX_LEN: usize = 1024;
88 let bytes_to_path = input[protocol_end + "://".len()..]
89 .iter()
90 .filter(|b| !b.is_ascii_whitespace())
91 .skip_while(|b| **b == b'/' || **b == b'\\')
92 .position(|b| *b == b'/')
93 .unwrap_or(input.len() - protocol_end);
94 if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN {
95 return Err(Error::TooLong {
96 truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(),
97 len: input.len(),
98 });
99 }
100 let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?;
101 let scheme = url.scheme().into();
102
103 if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path().is_empty() {
104 return Err(Error::MissingRepositoryPath {
105 url: input.into(),
106 kind: UrlKind::Url,
107 });
108 }
109
110 if url.cannot_be_a_base() {
111 return Err(Error::RelativeUrl { url: input.to_owned() });
112 }
113
114 Ok(crate::Url {
115 serialize_alternative_form: false,
116 scheme,
117 user: url_user(&url, UrlKind::Url)?,
118 password: url
119 .password()
120 .map(|s| percent_decoded_utf8(s, UrlKind::Url))
121 .transpose()?,
122 host: url.host_str().map(Into::into),
123 port: url.port(),
124 path: url.path().into(),
125 })
126}
127
128fn percent_decoded_utf8(s: &str, kind: UrlKind) -> Result<String, Error> {
129 Ok(percent_decode_str(s)
130 .decode_utf8()
131 .map_err(|err| Error::Utf8 {
132 url: s.into(),
133 kind,
134 source: err,
135 })?
136 .into_owned())
137}
138
139pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
140 let input = input_to_utf8(input, UrlKind::Scp)?;
141
142 let (host, path) = input.split_at(colon);
144 debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :");
145 let path = &path[1..];
146
147 if path.is_empty() {
148 return Err(Error::MissingRepositoryPath {
149 url: input.to_owned().into(),
150 kind: UrlKind::Scp,
151 });
152 }
153
154 let url = url::Url::parse(&format!("ssh://{host}")).map_err(|source| Error::Url {
159 url: input.to_owned(),
160 kind: UrlKind::Scp,
161 source,
162 })?;
163
164 Ok(crate::Url {
165 serialize_alternative_form: true,
166 scheme: url.scheme().into(),
167 user: url_user(&url, UrlKind::Scp)?,
168 password: url
169 .password()
170 .map(|s| percent_decoded_utf8(s, UrlKind::Scp))
171 .transpose()?,
172 host: url.host_str().map(Into::into),
173 port: url.port(),
174 path: path.into(),
175 })
176}
177
178fn url_user(url: &url::Url, kind: UrlKind) -> Result<Option<String>, Error> {
179 if url.username().is_empty() && url.password().is_none() {
180 Ok(None)
181 } else {
182 Ok(Some(percent_decoded_utf8(url.username(), kind)?))
183 }
184}
185
186pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> {
187 let input = input_to_utf8(input, UrlKind::Url)?;
188 let input_after_protocol = &input[protocol_colon + "://".len()..];
189
190 let Some(first_slash) = input_after_protocol
191 .find('/')
192 .or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten())
193 else {
194 return Err(Error::MissingRepositoryPath {
195 url: input.to_owned().into(),
196 kind: UrlKind::Url,
197 });
198 };
199
200 let windows_special_path = if cfg!(windows) {
208 let input_after_protocol = if first_slash == 0 {
212 &input_after_protocol[1..]
213 } else {
214 input_after_protocol
215 };
216 if input_after_protocol.chars().nth(1) == Some(':') {
218 Some(input_after_protocol)
219 } else {
220 None
221 }
222 } else {
223 None
224 };
225
226 let host = if windows_special_path.is_some() || first_slash == 0 {
227 None
229 } else {
230 Some(&input_after_protocol[..first_slash])
232 };
233
234 let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]);
236
237 Ok(crate::Url {
238 serialize_alternative_form: false,
239 host: host.map(Into::into),
240 ..local(path.into())?
241 })
242}
243
244pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> {
245 if input.is_empty() {
246 return Err(Error::MissingRepositoryPath {
247 url: input.to_owned(),
248 kind: UrlKind::Local,
249 });
250 }
251
252 Ok(crate::Url {
253 serialize_alternative_form: true,
254 scheme: Scheme::File,
255 password: None,
256 user: None,
257 host: None,
258 port: None,
259 path: input.to_owned(),
260 })
261}
262
263fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> {
264 std::str::from_utf8(input).map_err(|source| Error::Utf8 {
265 url: input.to_owned(),
266 kind,
267 source,
268 })
269}
270
271fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, url::Url), Error> {
272 let input = input_to_utf8(input, kind)?;
273 url::Url::parse(input)
274 .map(|url| (input, url))
275 .map_err(|source| Error::Url {
276 url: input.to_owned(),
277 kind,
278 source,
279 })
280}