quickwit_common/
uri.rs

1// Copyright (C) 2021 Quickwit, Inc.
2//
3// Quickwit is offered under the AGPL v3.0 and as commercial software.
4// For commercial licensing, contact us at hello@quickwit.io.
5//
6// AGPL:
7// This program is free software: you can redistribute it and/or modify
8// it under the terms of the GNU Affero General Public License as
9// published by the Free Software Foundation, either version 3 of the
10// License, or (at your option) any later version.
11//
12// This program is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15// GNU Affero General Public License for more details.
16//
17// You should have received a copy of the GNU Affero General Public License
18// along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20use std::env;
21use std::ffi::OsStr;
22use std::fmt::Display;
23use std::path::{Component, Path, PathBuf};
24
25use anyhow::{bail, Context};
26use serde::{Serialize, Serializer};
27
28/// Default file protocol `file://`
29pub const FILE_PROTOCOL: &str = "file";
30
31/// S3 protocol `s3://`
32pub const S3_PROTOCOL: &str = "s3";
33
34const POSTGRES_PROTOCOL: &str = "postgres";
35
36const POSTGRESQL_PROTOCOL: &str = "postgresql";
37
38const PROTOCOL_SEPARATOR: &str = "://";
39
40#[derive(Debug, PartialEq)]
41pub enum Extension {
42    Json,
43    Toml,
44    Unknown(String),
45    Yaml,
46}
47
48impl Extension {
49    fn maybe_new(extension: &str) -> Option<Self> {
50        match extension {
51            "json" => Some(Self::Json),
52            "toml" => Some(Self::Toml),
53            "yaml" | "yml" => Some(Self::Yaml),
54            "" => None,
55            unknown => Some(Self::Unknown(unknown.to_string())),
56        }
57    }
58}
59
60/// Encapsulates the URI type.
61#[derive(Debug, PartialEq, Eq, Hash, Clone)]
62pub struct Uri {
63    uri: String,
64    protocol_idx: usize,
65}
66
67impl Uri {
68    /// Attempts to construct a [`Uri`] from a raw string slice.
69    /// A `file://` protocol is assumed if not specified.
70    /// File URIs are resolved (normalized) relative to the current working directory
71    /// unless an absolute path is specified.
72    /// Handles special characters like `~`, `.`, `..`.
73    pub fn try_new(uri: &str) -> anyhow::Result<Self> {
74        if uri.is_empty() {
75            bail!("URI is empty.");
76        }
77        let (protocol, mut path) = match uri.split_once(PROTOCOL_SEPARATOR) {
78            None => (FILE_PROTOCOL, uri.to_string()),
79            Some((protocol, path)) => (protocol, path.to_string()),
80        };
81        if protocol == FILE_PROTOCOL {
82            if path.starts_with('~') {
83                // We only accept `~` (alias to the home directory) and `~/path/to/something`.
84                // If there is something following the `~` that is not `/`, we bail out.
85                if path.len() > 1 && !path.starts_with("~/") {
86                    bail!("Path syntax `{}` is not supported.", uri);
87                }
88
89                let home_dir_path = home::home_dir()
90                    .context("Failed to resolve home directory.")?
91                    .to_string_lossy()
92                    .to_string();
93
94                path.replace_range(0..1, &home_dir_path);
95            }
96            if Path::new(&path).is_relative() {
97                let current_dir = env::current_dir().context(
98                    "Failed to resolve current working directory: dir does not exist or \
99                     insufficient permissions.",
100                )?;
101                path = current_dir.join(path).to_string_lossy().to_string();
102            }
103            path = normalize_path(Path::new(&path))
104                .to_string_lossy()
105                .to_string();
106        }
107        Ok(Self {
108            uri: format!("{}{}{}", protocol, PROTOCOL_SEPARATOR, path),
109            protocol_idx: protocol.len(),
110        })
111    }
112
113    /// Constructs a [`Uri`] from a properly formatted string `<protocol>://<path>` where `path` is
114    /// normalized. Use this method exclusively for trusted input.
115    pub fn new(uri: String) -> Self {
116        let protocol_idx = uri
117            .find(PROTOCOL_SEPARATOR)
118            .expect("URI lacks protocol separator. Use `Uri::new` exclusively for trusted input.");
119        Self { uri, protocol_idx }
120    }
121
122    /// Returns the extension of the URI.
123    pub fn extension(&self) -> Option<Extension> {
124        Path::new(&self.uri)
125            .extension()
126            .and_then(OsStr::to_str)
127            .and_then(Extension::maybe_new)
128    }
129
130    /// Returns the URI as a string slice.
131    pub fn as_str(&self) -> &str {
132        &self.uri
133    }
134
135    /// Returns the protocol of the URI.
136    pub fn protocol(&self) -> &str {
137        &self.uri[..self.protocol_idx]
138    }
139
140    /// Returns the file path of the URI.
141    /// Applies only to `file://` URIs.
142    pub fn filepath(&self) -> Option<&Path> {
143        if self.protocol() == FILE_PROTOCOL {
144            self.uri.strip_prefix("file://").map(Path::new)
145        } else {
146            None
147        }
148    }
149
150    /// Consumes the [`Uri`] struct and returns the normalized URI as a string.
151    pub fn into_string(self) -> String {
152        self.uri
153    }
154
155    /// Creates a new [`Uri`] with `path` adjoined to `self`.
156    /// Fails if `path` is absolute.
157    pub fn join(&self, path: &str) -> anyhow::Result<Self> {
158        if Path::new(path).is_absolute() {
159            bail!(
160                "Cannot join URI `{}` with absolute path `{}`.",
161                self.uri,
162                path
163            );
164        }
165        let joined = match self.protocol() {
166            FILE_PROTOCOL => Path::new(&self.uri)
167                .join(path)
168                .to_string_lossy()
169                .to_string(),
170            POSTGRES_PROTOCOL | POSTGRESQL_PROTOCOL => bail!(
171                "Cannot join PostgreSQL URI `{}` with path `{}`.",
172                self.uri,
173                path
174            ),
175            _ => format!(
176                "{}{}{}",
177                self.uri,
178                if self.uri.ends_with('/') { "" } else { "/" },
179                path
180            ),
181        };
182        Ok(Self {
183            uri: joined,
184            protocol_idx: self.protocol_idx,
185        })
186    }
187}
188
189impl AsRef<str> for Uri {
190    fn as_ref(&self) -> &str {
191        &self.uri
192    }
193}
194
195impl Display for Uri {
196    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197        write!(formatter, "{}", self.uri)
198    }
199}
200
201impl PartialEq<&str> for Uri {
202    fn eq(&self, other: &&str) -> bool {
203        &self.uri == other
204    }
205}
206impl PartialEq<String> for Uri {
207    fn eq(&self, other: &String) -> bool {
208        &self.uri == other
209    }
210}
211
212impl Serialize for Uri {
213    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
214    where S: Serializer {
215        serializer.serialize_str(&self.uri)
216    }
217}
218
219/// Normalizes a path by resolving the components like (., ..).
220/// This helper does the same thing as `Path::canonicalize`.
221/// It only differs from `Path::canonicalize` by not checking file existence
222/// during resolution.
223/// <https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61>
224fn normalize_path(path: &Path) -> PathBuf {
225    let mut components = path.components().peekable();
226    let mut resulting_path_buf =
227        if let Some(component @ Component::Prefix(..)) = components.peek().cloned() {
228            components.next();
229            PathBuf::from(component.as_os_str())
230        } else {
231            PathBuf::new()
232        };
233
234    for component in components {
235        match component {
236            Component::Prefix(..) => unreachable!(),
237            Component::RootDir => {
238                resulting_path_buf.push(component.as_os_str());
239            }
240            Component::CurDir => {}
241            Component::ParentDir => {
242                resulting_path_buf.pop();
243            }
244            Component::Normal(inner_component) => {
245                resulting_path_buf.push(inner_component);
246            }
247        }
248    }
249    resulting_path_buf
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn test_try_new_uri() {
258        Uri::try_new("").unwrap_err();
259
260        let home_dir = home::home_dir().unwrap();
261        let current_dir = env::current_dir().unwrap();
262
263        let uri = Uri::try_new("file:///home/foo/bar").unwrap();
264        assert_eq!(uri.protocol(), "file");
265        assert_eq!(uri.filepath(), Some(Path::new("/home/foo/bar")));
266        assert_eq!(uri, "file:///home/foo/bar");
267        assert_eq!(uri, "file:///home/foo/bar".to_string());
268
269        assert_eq!(
270            Uri::try_new("home/homer/docs/dognuts").unwrap(),
271            format!("file://{}/home/homer/docs/dognuts", current_dir.display())
272        );
273        assert_eq!(
274            Uri::try_new("home/homer/docs/../dognuts").unwrap(),
275            format!("file://{}/home/homer/dognuts", current_dir.display())
276        );
277        assert_eq!(
278            Uri::try_new("home/homer/docs/../../dognuts").unwrap(),
279            format!("file://{}/home/dognuts", current_dir.display())
280        );
281        assert_eq!(
282            Uri::try_new("/home/homer/docs/dognuts").unwrap(),
283            "file:///home/homer/docs/dognuts"
284        );
285        assert_eq!(
286            Uri::try_new("~").unwrap(),
287            format!("file://{}", home_dir.display())
288        );
289        assert_eq!(
290            Uri::try_new("~/").unwrap(),
291            format!("file://{}", home_dir.display())
292        );
293        assert_eq!(
294            Uri::try_new("~anything/bar").unwrap_err().to_string(),
295            "Path syntax `~anything/bar` is not supported."
296        );
297        assert_eq!(
298            Uri::try_new("~/.").unwrap(),
299            format!("file://{}", home_dir.display())
300        );
301        assert_eq!(
302            Uri::try_new("~/..").unwrap(),
303            format!("file://{}", home_dir.parent().unwrap().display())
304        );
305        assert_eq!(
306            Uri::try_new("file://").unwrap(),
307            format!("file://{}", current_dir.display())
308        );
309        assert_eq!(Uri::try_new("file:///").unwrap(), "file:///");
310        assert_eq!(
311            Uri::try_new("file://.").unwrap(),
312            format!("file://{}", current_dir.display())
313        );
314        assert_eq!(
315            Uri::try_new("file://..").unwrap(),
316            format!("file://{}", current_dir.parent().unwrap().display())
317        );
318        assert_eq!(
319            Uri::try_new("s3://home/homer/docs/dognuts").unwrap(),
320            "s3://home/homer/docs/dognuts"
321        );
322        assert_eq!(
323            Uri::try_new("s3://home/homer/docs/../dognuts").unwrap(),
324            "s3://home/homer/docs/../dognuts"
325        );
326    }
327
328    #[test]
329    fn test_uri_extension() {
330        assert!(Uri::try_new("s3://").unwrap().extension().is_none());
331
332        assert_eq!(
333            Uri::try_new("s3://config.json")
334                .unwrap()
335                .extension()
336                .unwrap(),
337            Extension::Json
338        );
339        assert_eq!(
340            Uri::try_new("s3://config.foo")
341                .unwrap()
342                .extension()
343                .unwrap(),
344            Extension::Unknown("foo".to_string())
345        );
346    }
347
348    #[test]
349    fn test_uri_join() {
350        assert_eq!(
351            Uri::new("file:///".to_string()).join("foo").unwrap(),
352            "file:///foo"
353        );
354        assert_eq!(
355            Uri::new("file:///foo".to_string()).join("bar").unwrap(),
356            "file:///foo/bar"
357        );
358        assert_eq!(
359            Uri::new("file:///foo/".to_string()).join("bar").unwrap(),
360            "file:///foo/bar"
361        );
362        assert_eq!(
363            Uri::new("ram://foo".to_string()).join("bar").unwrap(),
364            "ram://foo/bar"
365        );
366        assert_eq!(
367            Uri::new("s3://bucket/".to_string()).join("key").unwrap(),
368            "s3://bucket/key"
369        );
370        Uri::new("s3://bucket/".to_string())
371            .join("/key")
372            .unwrap_err();
373        Uri::new("postgres://username:password@localhost:5432/metastore".to_string())
374            .join("table")
375            .unwrap_err();
376    }
377
378    #[test]
379    fn test_uri_serialize() {
380        let uri = Uri::try_new("s3://bucket/key").unwrap();
381        assert_eq!(
382            serde_json::to_value(&uri).unwrap(),
383            serde_json::Value::String("s3://bucket/key".to_string())
384        );
385    }
386}