gix_url/lib.rs
1//! A library implementing a URL for use in git with access to its special capabilities.
2//! ## Feature Flags
3#![cfg_attr(
4 all(doc, feature = "document-features"),
5 doc = ::document_features::document_features!()
6)]
7#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
8#![deny(rust_2018_idioms, missing_docs)]
9#![forbid(unsafe_code)]
10
11use std::{borrow::Cow, path::PathBuf};
12
13use bstr::{BStr, BString};
14
15///
16pub mod expand_path;
17
18mod scheme;
19pub use scheme::Scheme;
20mod impls;
21
22///
23pub mod parse;
24
25/// Parse the given `bytes` as a [git url](Url).
26///
27/// # Note
28///
29/// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable.
30/// For file-paths, we don't expect UTF8 encoding either.
31pub fn parse(input: &BStr) -> Result<Url, parse::Error> {
32 use parse::InputScheme;
33 match parse::find_scheme(input) {
34 InputScheme::Local => parse::local(input),
35 InputScheme::Url { protocol_end } if input[..protocol_end].eq_ignore_ascii_case(b"file") => {
36 parse::file_url(input, protocol_end)
37 }
38 InputScheme::Url { protocol_end } => parse::url(input, protocol_end),
39 InputScheme::Scp { colon } => parse::scp(input, colon),
40 }
41}
42
43/// Expand `path` for the given `user`, which can be obtained by [`parse()`], resolving the home directories
44/// of `user` automatically.
45///
46/// If more precise control of the resolution mechanism is needed, then use the [expand_path::with()] function.
47pub fn expand_path(user: Option<&expand_path::ForUser>, path: &BStr) -> Result<PathBuf, expand_path::Error> {
48 expand_path::with(user, path, |user| match user {
49 expand_path::ForUser::Current => gix_path::env::home_dir(),
50 expand_path::ForUser::Name(user) => {
51 gix_path::env::home_dir().and_then(|home| home.parent().map(|home_dirs| home_dirs.join(user.to_string())))
52 }
53 })
54}
55
56/// Classification of a portion of a URL by whether it is *syntactically* safe to pass as an argument to a command-line program.
57///
58/// Various parts of URLs can be specified to begin with `-`. If they are used as options to a command-line application
59/// such as an SSH client, they will be treated as options rather than as non-option arguments as the developer intended.
60/// This is a security risk, because URLs are not always trusted and can often be composed or influenced by an attacker.
61/// See <https://secure.phabricator.com/T12961> for details.
62///
63/// # Security Warning
64///
65/// This type only expresses known *syntactic* risk. It does not cover other risks, such as passing a personal access
66/// token as a username rather than a password in an application that logs usernames.
67#[derive(Debug, PartialEq, Eq, Copy, Clone)]
68pub enum ArgumentSafety<'a> {
69 /// May be safe. There is nothing to pass, so there is nothing dangerous.
70 Absent,
71 /// May be safe. The argument does not begin with a `-` and so will not be confused as an option.
72 Usable(&'a str),
73 /// Dangerous! Begins with `-` and could be treated as an option. Use the value in error messages only.
74 Dangerous(&'a str),
75}
76
77/// A URL with support for specialized git related capabilities.
78///
79/// Additionally there is support for [deserialization](Url::from_bytes()) and [serialization](Url::to_bstring()).
80///
81/// # Security Warning
82///
83/// URLs may contain passwords and using standard [formatting](std::fmt::Display) will redact
84/// such password, whereas [lossless serialization](Url::to_bstring()) will contain all parts of the
85/// URL.
86/// **Beware that some URls still print secrets if they use them outside of the designated password fields.**
87///
88/// Also note that URLs that fail to parse are typically stored in [the resulting error](parse::Error) type
89/// and printed in full using its display implementation.
90#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct Url {
93 /// The URL scheme.
94 pub scheme: Scheme,
95 /// The user to impersonate on the remote.
96 user: Option<String>,
97 /// The password associated with a user.
98 password: Option<String>,
99 /// The host to which to connect. Localhost is implied if `None`.
100 host: Option<String>,
101 /// When serializing, use the alternative forms as it was parsed as such.
102 serialize_alternative_form: bool,
103 /// The port to use when connecting to a host. If `None`, standard ports depending on `scheme` will be used.
104 pub port: Option<u16>,
105 /// The path portion of the URL, usually the location of the git repository.
106 ///
107 /// # Security Warning
108 ///
109 /// URLs allow paths to start with `-` which makes it possible to mask command-line arguments as path which then leads to
110 /// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details.
111 ///
112 /// If this value is ever going to be passed to a command-line application, call [Self::path_argument_safe()] instead.
113 pub path: BString,
114}
115
116/// Instantiation
117impl Url {
118 /// Create a new instance from the given parts, including a password, which will be validated by parsing them back.
119 pub fn from_parts(
120 scheme: Scheme,
121 user: Option<String>,
122 password: Option<String>,
123 host: Option<String>,
124 port: Option<u16>,
125 path: BString,
126 serialize_alternative_form: bool,
127 ) -> Result<Self, parse::Error> {
128 parse(
129 Url {
130 scheme,
131 user,
132 password,
133 host,
134 port,
135 path,
136 serialize_alternative_form,
137 }
138 .to_bstring()
139 .as_ref(),
140 )
141 }
142}
143
144/// Modification
145impl Url {
146 /// Set the given `user`, or unset it with `None`. Return the previous value.
147 pub fn set_user(&mut self, user: Option<String>) -> Option<String> {
148 let prev = self.user.take();
149 self.user = user;
150 prev
151 }
152
153 /// Set the given `password`, or unset it with `None`. Return the previous value.
154 pub fn set_password(&mut self, password: Option<String>) -> Option<String> {
155 let prev = self.password.take();
156 self.password = password;
157 prev
158 }
159}
160
161/// Builder
162impl Url {
163 /// Enable alternate serialization for this url, e.g. `file:///path` becomes `/path`.
164 ///
165 /// This is automatically set correctly for parsed URLs, but can be set here for urls
166 /// created by constructor.
167 pub fn serialize_alternate_form(mut self, use_alternate_form: bool) -> Self {
168 self.serialize_alternative_form = use_alternate_form;
169 self
170 }
171
172 /// Turn a file url like `file://relative` into `file:///root/relative`, hence it assures the url's path component is absolute,
173 /// using `current_dir` if needed to achieve that.
174 pub fn canonicalize(&mut self, current_dir: &std::path::Path) -> Result<(), gix_path::realpath::Error> {
175 if self.scheme == Scheme::File {
176 let path = gix_path::from_bstr(Cow::Borrowed(self.path.as_ref()));
177 let abs_path = gix_path::realpath_opts(path.as_ref(), current_dir, gix_path::realpath::MAX_SYMLINKS)?;
178 self.path = gix_path::into_bstr(abs_path).into_owned();
179 }
180 Ok(())
181 }
182}
183
184/// Access
185impl Url {
186 /// Return the username mentioned in the URL, if present.
187 ///
188 /// # Security Warning
189 ///
190 /// URLs allow usernames to start with `-` which makes it possible to mask command-line arguments as username which then leads to
191 /// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details.
192 ///
193 /// If this value is ever going to be passed to a command-line application, call [Self::user_argument_safe()] instead.
194 pub fn user(&self) -> Option<&str> {
195 self.user.as_deref()
196 }
197
198 /// Classify the username of this URL by whether it is safe to pass as a command-line argument.
199 ///
200 /// Use this method instead of [Self::user()] if the host is going to be passed to a command-line application.
201 /// If the unsafe and absent cases need not be distinguished, [Self::user_argument_safe()] may also be used.
202 pub fn user_as_argument(&self) -> ArgumentSafety<'_> {
203 match self.user() {
204 Some(user) if looks_like_command_line_option(user.as_bytes()) => ArgumentSafety::Dangerous(user),
205 Some(user) => ArgumentSafety::Usable(user),
206 None => ArgumentSafety::Absent,
207 }
208 }
209
210 /// Return the username of this URL if present *and* if it can't be mistaken for a command-line argument.
211 ///
212 /// Use this method or [Self::user_as_argument()] instead of [Self::user()] if the host is going to be
213 /// passed to a command-line application. Prefer [Self::user_as_argument()] unless the unsafe and absent
214 /// cases need not be distinguished from each other.
215 pub fn user_argument_safe(&self) -> Option<&str> {
216 match self.user_as_argument() {
217 ArgumentSafety::Usable(user) => Some(user),
218 _ => None,
219 }
220 }
221
222 /// Return the password mentioned in the url, if present.
223 pub fn password(&self) -> Option<&str> {
224 self.password.as_deref()
225 }
226
227 /// Return the host mentioned in the URL, if present.
228 ///
229 /// # Security Warning
230 ///
231 /// URLs allow hosts to start with `-` which makes it possible to mask command-line arguments as host which then leads to
232 /// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details.
233 ///
234 /// If this value is ever going to be passed to a command-line application, call [Self::host_as_argument()]
235 /// or [Self::host_argument_safe()] instead.
236 pub fn host(&self) -> Option<&str> {
237 self.host.as_deref()
238 }
239
240 /// Classify the host of this URL by whether it is safe to pass as a command-line argument.
241 ///
242 /// Use this method instead of [Self::host()] if the host is going to be passed to a command-line application.
243 /// If the unsafe and absent cases need not be distinguished, [Self::host_argument_safe()] may also be used.
244 pub fn host_as_argument(&self) -> ArgumentSafety<'_> {
245 match self.host() {
246 Some(host) if looks_like_command_line_option(host.as_bytes()) => ArgumentSafety::Dangerous(host),
247 Some(host) => ArgumentSafety::Usable(host),
248 None => ArgumentSafety::Absent,
249 }
250 }
251
252 /// Return the host of this URL if present *and* if it can't be mistaken for a command-line argument.
253 ///
254 /// Use this method or [Self::host_as_argument()] instead of [Self::host()] if the host is going to be
255 /// passed to a command-line application. Prefer [Self::host_as_argument()] unless the unsafe and absent
256 /// cases need not be distinguished from each other.
257 pub fn host_argument_safe(&self) -> Option<&str> {
258 match self.host_as_argument() {
259 ArgumentSafety::Usable(host) => Some(host),
260 _ => None,
261 }
262 }
263
264 /// Return the path of this URL *if* it can't be mistaken for a command-line argument.
265 /// Note that it always begins with a slash, which is ignored for this comparison.
266 ///
267 /// Use this method instead of accessing [Self::path] directly if the path is going to be passed to a
268 /// command-line application, unless it is certain that the leading `/` will always be included.
269 pub fn path_argument_safe(&self) -> Option<&BStr> {
270 self.path
271 .get(1..)
272 .and_then(|truncated| (!looks_like_command_line_option(truncated)).then_some(self.path.as_ref()))
273 }
274
275 /// Return true if the path portion of the URL is `/`.
276 pub fn path_is_root(&self) -> bool {
277 self.path == "/"
278 }
279
280 /// Return the actual or default port for use according to the URL scheme.
281 /// Note that there may be no default port either.
282 pub fn port_or_default(&self) -> Option<u16> {
283 self.port.or_else(|| {
284 use Scheme::*;
285 Some(match self.scheme {
286 Http => 80,
287 Https => 443,
288 Ssh => 22,
289 Git => 9418,
290 File | Ext(_) => return None,
291 })
292 })
293 }
294}
295
296fn looks_like_command_line_option(b: &[u8]) -> bool {
297 b.first() == Some(&b'-')
298}
299
300/// Transformation
301impl Url {
302 /// Turn a file URL like `file://relative` into `file:///root/relative`, hence it assures the URL's path component is absolute, using
303 /// `current_dir` if necessary.
304 pub fn canonicalized(&self, current_dir: &std::path::Path) -> Result<Self, gix_path::realpath::Error> {
305 let mut res = self.clone();
306 res.canonicalize(current_dir)?;
307 Ok(res)
308 }
309}
310
311fn percent_encode(s: &str) -> Cow<'_, str> {
312 percent_encoding::utf8_percent_encode(s, percent_encoding::NON_ALPHANUMERIC).into()
313}
314
315/// Serialization
316impl Url {
317 /// Write this URL losslessly to `out`, ready to be parsed again.
318 pub fn write_to(&self, mut out: &mut dyn std::io::Write) -> std::io::Result<()> {
319 if !(self.serialize_alternative_form && (self.scheme == Scheme::File || self.scheme == Scheme::Ssh)) {
320 out.write_all(self.scheme.as_str().as_bytes())?;
321 out.write_all(b"://")?;
322 }
323 match (&self.user, &self.host) {
324 (Some(user), Some(host)) => {
325 out.write_all(percent_encode(user).as_bytes())?;
326 if let Some(password) = &self.password {
327 out.write_all(b":")?;
328 out.write_all(percent_encode(password).as_bytes())?;
329 }
330 out.write_all(b"@")?;
331 out.write_all(host.as_bytes())?;
332 }
333 (None, Some(host)) => {
334 out.write_all(host.as_bytes())?;
335 }
336 (None, None) => {}
337 (Some(_user), None) => unreachable!("BUG: should not be possible to have a user but no host"),
338 };
339 if let Some(port) = &self.port {
340 write!(&mut out, ":{port}")?;
341 }
342 if self.serialize_alternative_form && self.scheme == Scheme::Ssh {
343 out.write_all(b":")?;
344 }
345 out.write_all(&self.path)?;
346 Ok(())
347 }
348
349 /// Transform ourselves into a binary string, losslessly, or fail if the URL is malformed due to host or user parts being incorrect.
350 pub fn to_bstring(&self) -> BString {
351 let mut buf = Vec::with_capacity(
352 (5 + 3)
353 + self.user.as_ref().map(String::len).unwrap_or_default()
354 + 1
355 + self.host.as_ref().map(String::len).unwrap_or_default()
356 + self.port.map(|_| 5).unwrap_or_default()
357 + self.path.len(),
358 );
359 self.write_to(&mut buf).expect("io cannot fail in memory");
360 buf.into()
361 }
362}
363
364/// Deserialization
365impl Url {
366 /// Parse a URL from `bytes`.
367 pub fn from_bytes(bytes: &BStr) -> Result<Self, parse::Error> {
368 parse(bytes)
369 }
370}
371
372/// This module contains extensions to the [Url] struct which are only intended to be used
373/// for testing code. Do not use this module in production! For all intends and purposes the APIs of
374/// all functions and types exposed by this module are considered unstable and are allowed to break
375/// even in patch releases!
376#[doc(hidden)]
377#[cfg(debug_assertions)]
378pub mod testing {
379 use bstr::BString;
380
381 use crate::{Scheme, Url};
382
383 /// Additional functions for [Url] which are only intended to be used for tests.
384 pub trait TestUrlExtension {
385 /// Create a new instance from the given parts without validating them.
386 ///
387 /// This function is primarily intended for testing purposes. For production code please
388 /// consider using [Url::from_parts] instead!
389 fn from_parts_unchecked(
390 scheme: Scheme,
391 user: Option<String>,
392 password: Option<String>,
393 host: Option<String>,
394 port: Option<u16>,
395 path: BString,
396 serialize_alternative_form: bool,
397 ) -> Url {
398 Url {
399 scheme,
400 user,
401 password,
402 host,
403 port,
404 path,
405 serialize_alternative_form,
406 }
407 }
408 }
409
410 impl TestUrlExtension for Url {}
411}