crates_io/
lib.rs

1//! > This crate is maintained by the Cargo team for use by the wider
2//! > ecosystem. This crate follows semver compatibility for its APIs.
3
4use std::collections::BTreeMap;
5use std::fs::File;
6use std::io::prelude::*;
7use std::io::{Cursor, SeekFrom};
8use std::time::Instant;
9
10use curl::easy::{Easy, List};
11use percent_encoding::{percent_encode, NON_ALPHANUMERIC};
12use serde::{Deserialize, Serialize};
13use url::Url;
14
15pub type Result<T> = std::result::Result<T, Error>;
16
17pub struct Registry {
18    /// The base URL for issuing API requests.
19    host: String,
20    /// Optional authorization token.
21    /// If None, commands requiring authorization will fail.
22    token: Option<String>,
23    /// Curl handle for issuing requests.
24    handle: Easy,
25    /// Whether to include the authorization token with all requests.
26    auth_required: bool,
27}
28
29#[derive(PartialEq, Clone, Copy)]
30pub enum Auth {
31    Authorized,
32    Unauthorized,
33}
34
35#[derive(Deserialize)]
36pub struct Crate {
37    pub name: String,
38    pub description: Option<String>,
39    pub max_version: String,
40}
41
42/// This struct is serialized as JSON and sent as metadata ahead of the crate
43/// tarball when publishing crates to a crate registry like crates.io.
44///
45/// see <https://doc.rust-lang.org/cargo/reference/registry-web-api.html#publish>
46#[derive(Serialize, Deserialize)]
47pub struct NewCrate {
48    pub name: String,
49    pub vers: String,
50    pub deps: Vec<NewCrateDependency>,
51    pub features: BTreeMap<String, Vec<String>>,
52    pub authors: Vec<String>,
53    pub description: Option<String>,
54    pub documentation: Option<String>,
55    pub homepage: Option<String>,
56    pub readme: Option<String>,
57    pub readme_file: Option<String>,
58    pub keywords: Vec<String>,
59    pub categories: Vec<String>,
60    pub license: Option<String>,
61    pub license_file: Option<String>,
62    pub repository: Option<String>,
63    pub badges: BTreeMap<String, BTreeMap<String, String>>,
64    pub links: Option<String>,
65    pub rust_version: Option<String>,
66}
67
68#[derive(Serialize, Deserialize)]
69pub struct NewCrateDependency {
70    pub optional: bool,
71    pub default_features: bool,
72    pub name: String,
73    pub features: Vec<String>,
74    pub version_req: String,
75    pub target: Option<String>,
76    pub kind: String,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub registry: Option<String>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub explicit_name_in_toml: Option<String>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub artifact: Option<Vec<String>>,
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub bindep_target: Option<String>,
85    #[serde(default, skip_serializing_if = "is_false")]
86    pub lib: bool,
87}
88
89fn is_false(x: &bool) -> bool {
90    *x == false
91}
92
93#[derive(Deserialize)]
94pub struct User {
95    pub id: u32,
96    pub login: String,
97    pub avatar: Option<String>,
98    pub email: Option<String>,
99    pub name: Option<String>,
100}
101
102pub struct Warnings {
103    pub invalid_categories: Vec<String>,
104    pub invalid_badges: Vec<String>,
105    pub other: Vec<String>,
106}
107
108#[derive(Deserialize)]
109struct R {
110    ok: bool,
111}
112#[derive(Deserialize)]
113struct OwnerResponse {
114    ok: bool,
115    msg: String,
116}
117#[derive(Deserialize)]
118struct ApiErrorList {
119    errors: Vec<ApiError>,
120}
121#[derive(Deserialize)]
122struct ApiError {
123    detail: String,
124}
125#[derive(Serialize)]
126struct OwnersReq<'a> {
127    users: &'a [&'a str],
128}
129#[derive(Deserialize)]
130struct Users {
131    users: Vec<User>,
132}
133#[derive(Deserialize)]
134struct TotalCrates {
135    total: u32,
136}
137#[derive(Deserialize)]
138struct Crates {
139    crates: Vec<Crate>,
140    meta: TotalCrates,
141}
142
143/// Error returned when interacting with a registry.
144#[derive(Debug, thiserror::Error)]
145pub enum Error {
146    /// Error from libcurl.
147    #[error(transparent)]
148    Curl(#[from] curl::Error),
149
150    /// Error from seriailzing the request payload and deserializing the
151    /// response body (like response body didn't match expected structure).
152    #[error(transparent)]
153    Json(#[from] serde_json::Error),
154
155    /// Error from IO. Mostly from reading the tarball to upload.
156    #[error("failed to seek tarball")]
157    Io(#[from] std::io::Error),
158
159    /// Response body was not valid utf8.
160    #[error("invalid response body from server")]
161    Utf8(#[from] std::string::FromUtf8Error),
162
163    /// Error from API response containing JSON field `errors.details`.
164    #[error(
165        "the remote server responded with an error{}: {}",
166        status(*code),
167        errors.join(", "),
168    )]
169    Api {
170        code: u32,
171        headers: Vec<String>,
172        errors: Vec<String>,
173    },
174
175    /// Error from API response which didn't have pre-programmed `errors.details`.
176    #[error(
177        "failed to get a 200 OK response, got {code}\nheaders:\n\t{}\nbody:\n{body}",
178        headers.join("\n\t"),
179    )]
180    Code {
181        code: u32,
182        headers: Vec<String>,
183        body: String,
184    },
185
186    /// Reason why the token was invalid.
187    #[error("{0}")]
188    InvalidToken(&'static str),
189
190    /// Server was unavailable and timeouted. Happened when uploading a way
191    /// too large tarball to crates.io.
192    #[error(
193        "Request timed out after 30 seconds. If you're trying to \
194         upload a crate it may be too large. If the crate is under \
195         10MB in size, you can email help@crates.io for assistance.\n\
196         Total size was {0}."
197    )]
198    Timeout(u64),
199}
200
201impl Registry {
202    /// Creates a new `Registry`.
203    ///
204    /// ## Example
205    ///
206    /// ```rust
207    /// use curl::easy::Easy;
208    /// use crates_io::Registry;
209    ///
210    /// let mut handle = Easy::new();
211    /// // If connecting to crates.io, a user-agent is required.
212    /// handle.useragent("my_crawler (example.com/info)");
213    /// let mut reg = Registry::new_handle(String::from("https://crates.io"), None, handle, true);
214    /// ```
215    pub fn new_handle(
216        host: String,
217        token: Option<String>,
218        handle: Easy,
219        auth_required: bool,
220    ) -> Registry {
221        Registry {
222            host,
223            token,
224            handle,
225            auth_required,
226        }
227    }
228
229    pub fn set_token(&mut self, token: Option<String>) {
230        self.token = token;
231    }
232
233    fn token(&self) -> Result<&str> {
234        let token = self.token.as_ref().ok_or_else(|| {
235            Error::InvalidToken("no upload token found, please run `cargo login`")
236        })?;
237        check_token(token)?;
238        Ok(token)
239    }
240
241    pub fn host(&self) -> &str {
242        &self.host
243    }
244
245    pub fn host_is_crates_io(&self) -> bool {
246        is_url_crates_io(&self.host)
247    }
248
249    pub fn add_owners(&mut self, krate: &str, owners: &[&str]) -> Result<String> {
250        let body = serde_json::to_string(&OwnersReq { users: owners })?;
251        let body = self.put(&format!("/crates/{}/owners", krate), body.as_bytes())?;
252        assert!(serde_json::from_str::<OwnerResponse>(&body)?.ok);
253        Ok(serde_json::from_str::<OwnerResponse>(&body)?.msg)
254    }
255
256    pub fn remove_owners(&mut self, krate: &str, owners: &[&str]) -> Result<()> {
257        let body = serde_json::to_string(&OwnersReq { users: owners })?;
258        let body = self.delete(&format!("/crates/{}/owners", krate), Some(body.as_bytes()))?;
259        assert!(serde_json::from_str::<OwnerResponse>(&body)?.ok);
260        Ok(())
261    }
262
263    pub fn list_owners(&mut self, krate: &str) -> Result<Vec<User>> {
264        let body = self.get(&format!("/crates/{}/owners", krate))?;
265        Ok(serde_json::from_str::<Users>(&body)?.users)
266    }
267
268    pub fn publish(&mut self, krate: &NewCrate, mut tarball: &File) -> Result<Warnings> {
269        let json = serde_json::to_string(krate)?;
270        // Prepare the body. The format of the upload request is:
271        //
272        //      <le u32 of json>
273        //      <json request> (metadata for the package)
274        //      <le u32 of tarball>
275        //      <source tarball>
276
277        // NOTE: This can be replaced with `stream_len` if it is ever stabilized.
278        //
279        // This checks the length using seeking instead of metadata, because
280        // on some filesystems, getting the metadata will fail because
281        // the file was renamed in ops::package.
282        let tarball_len = tarball.seek(SeekFrom::End(0))?;
283        tarball.seek(SeekFrom::Start(0))?;
284        let header = {
285            let mut w = Vec::new();
286            w.extend(&(json.len() as u32).to_le_bytes());
287            w.extend(json.as_bytes().iter().cloned());
288            w.extend(&(tarball_len as u32).to_le_bytes());
289            w
290        };
291        let size = tarball_len as usize + header.len();
292        let mut body = Cursor::new(header).chain(tarball);
293
294        let url = format!("{}/api/v1/crates/new", self.host);
295
296        self.handle.put(true)?;
297        self.handle.url(&url)?;
298        self.handle.in_filesize(size as u64)?;
299        let mut headers = List::new();
300        headers.append("Accept: application/json")?;
301        headers.append(&format!("Authorization: {}", self.token()?))?;
302        self.handle.http_headers(headers)?;
303
304        let started = Instant::now();
305        let body = self
306            .handle(&mut |buf| body.read(buf).unwrap_or(0))
307            .map_err(|e| match e {
308                Error::Code { code, .. }
309                    if code == 503
310                        && started.elapsed().as_secs() >= 29
311                        && self.host_is_crates_io() =>
312                {
313                    Error::Timeout(tarball_len)
314                }
315                _ => e.into(),
316            })?;
317
318        let response = if body.is_empty() {
319            "{}".parse()?
320        } else {
321            body.parse::<serde_json::Value>()?
322        };
323
324        let invalid_categories: Vec<String> = response
325            .get("warnings")
326            .and_then(|j| j.get("invalid_categories"))
327            .and_then(|j| j.as_array())
328            .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect())
329            .unwrap_or_else(Vec::new);
330
331        let invalid_badges: Vec<String> = response
332            .get("warnings")
333            .and_then(|j| j.get("invalid_badges"))
334            .and_then(|j| j.as_array())
335            .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect())
336            .unwrap_or_else(Vec::new);
337
338        let other: Vec<String> = response
339            .get("warnings")
340            .and_then(|j| j.get("other"))
341            .and_then(|j| j.as_array())
342            .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect())
343            .unwrap_or_else(Vec::new);
344
345        Ok(Warnings {
346            invalid_categories,
347            invalid_badges,
348            other,
349        })
350    }
351
352    pub fn search(&mut self, query: &str, limit: u32) -> Result<(Vec<Crate>, u32)> {
353        let formatted_query = percent_encode(query.as_bytes(), NON_ALPHANUMERIC);
354        let body = self.req(
355            &format!("/crates?q={}&per_page={}", formatted_query, limit),
356            None,
357            Auth::Unauthorized,
358        )?;
359
360        let crates = serde_json::from_str::<Crates>(&body)?;
361        Ok((crates.crates, crates.meta.total))
362    }
363
364    pub fn yank(&mut self, krate: &str, version: &str) -> Result<()> {
365        let body = self.delete(&format!("/crates/{}/{}/yank", krate, version), None)?;
366        assert!(serde_json::from_str::<R>(&body)?.ok);
367        Ok(())
368    }
369
370    pub fn unyank(&mut self, krate: &str, version: &str) -> Result<()> {
371        let body = self.put(&format!("/crates/{}/{}/unyank", krate, version), &[])?;
372        assert!(serde_json::from_str::<R>(&body)?.ok);
373        Ok(())
374    }
375
376    fn put(&mut self, path: &str, b: &[u8]) -> Result<String> {
377        self.handle.put(true)?;
378        self.req(path, Some(b), Auth::Authorized)
379    }
380
381    fn get(&mut self, path: &str) -> Result<String> {
382        self.handle.get(true)?;
383        self.req(path, None, Auth::Authorized)
384    }
385
386    fn delete(&mut self, path: &str, b: Option<&[u8]>) -> Result<String> {
387        self.handle.custom_request("DELETE")?;
388        self.req(path, b, Auth::Authorized)
389    }
390
391    fn req(&mut self, path: &str, body: Option<&[u8]>, authorized: Auth) -> Result<String> {
392        self.handle.url(&format!("{}/api/v1{}", self.host, path))?;
393        let mut headers = List::new();
394        headers.append("Accept: application/json")?;
395        if body.is_some() {
396            headers.append("Content-Type: application/json")?;
397        }
398
399        if self.auth_required || authorized == Auth::Authorized {
400            headers.append(&format!("Authorization: {}", self.token()?))?;
401        }
402        self.handle.http_headers(headers)?;
403        match body {
404            Some(mut body) => {
405                self.handle.upload(true)?;
406                self.handle.in_filesize(body.len() as u64)?;
407                self.handle(&mut |buf| body.read(buf).unwrap_or(0))
408                    .map_err(|e| e.into())
409            }
410            None => self.handle(&mut |_| 0).map_err(|e| e.into()),
411        }
412    }
413
414    fn handle(&mut self, read: &mut dyn FnMut(&mut [u8]) -> usize) -> Result<String> {
415        let mut headers = Vec::new();
416        let mut body = Vec::new();
417        {
418            let mut handle = self.handle.transfer();
419            handle.read_function(|buf| Ok(read(buf)))?;
420            handle.write_function(|data| {
421                body.extend_from_slice(data);
422                Ok(data.len())
423            })?;
424            handle.header_function(|data| {
425                // Headers contain trailing \r\n, trim them to make it easier
426                // to work with.
427                let s = String::from_utf8_lossy(data).trim().to_string();
428                // Don't let server sneak extra lines anywhere.
429                if s.contains('\n') {
430                    return true;
431                }
432                headers.push(s);
433                true
434            })?;
435            handle.perform()?;
436        }
437
438        let body = String::from_utf8(body)?;
439        let errors = serde_json::from_str::<ApiErrorList>(&body)
440            .ok()
441            .map(|s| s.errors.into_iter().map(|s| s.detail).collect::<Vec<_>>());
442
443        match (self.handle.response_code()?, errors) {
444            (0, None) => Ok(body),
445            (code, None) if is_success(code) => Ok(body),
446            (code, Some(errors)) => Err(Error::Api {
447                code,
448                headers,
449                errors,
450            }),
451            (code, None) => Err(Error::Code {
452                code,
453                headers,
454                body,
455            }),
456        }
457    }
458}
459
460fn is_success(code: u32) -> bool {
461    code >= 200 && code < 300
462}
463
464fn status(code: u32) -> String {
465    if is_success(code) {
466        String::new()
467    } else {
468        let reason = reason(code);
469        format!(" (status {code} {reason})")
470    }
471}
472
473fn reason(code: u32) -> &'static str {
474    // Taken from https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
475    match code {
476        100 => "Continue",
477        101 => "Switching Protocol",
478        103 => "Early Hints",
479        200 => "OK",
480        201 => "Created",
481        202 => "Accepted",
482        203 => "Non-Authoritative Information",
483        204 => "No Content",
484        205 => "Reset Content",
485        206 => "Partial Content",
486        300 => "Multiple Choice",
487        301 => "Moved Permanently",
488        302 => "Found",
489        303 => "See Other",
490        304 => "Not Modified",
491        307 => "Temporary Redirect",
492        308 => "Permanent Redirect",
493        400 => "Bad Request",
494        401 => "Unauthorized",
495        402 => "Payment Required",
496        403 => "Forbidden",
497        404 => "Not Found",
498        405 => "Method Not Allowed",
499        406 => "Not Acceptable",
500        407 => "Proxy Authentication Required",
501        408 => "Request Timeout",
502        409 => "Conflict",
503        410 => "Gone",
504        411 => "Length Required",
505        412 => "Precondition Failed",
506        413 => "Payload Too Large",
507        414 => "URI Too Long",
508        415 => "Unsupported Media Type",
509        416 => "Request Range Not Satisfiable",
510        417 => "Expectation Failed",
511        429 => "Too Many Requests",
512        431 => "Request Header Fields Too Large",
513        500 => "Internal Server Error",
514        501 => "Not Implemented",
515        502 => "Bad Gateway",
516        503 => "Service Unavailable",
517        504 => "Gateway Timeout",
518        _ => "<unknown>",
519    }
520}
521
522/// Returns `true` if the host of the given URL is "crates.io".
523pub fn is_url_crates_io(url: &str) -> bool {
524    Url::parse(url)
525        .map(|u| u.host_str() == Some("crates.io"))
526        .unwrap_or(false)
527}
528
529/// Checks if a token is valid or malformed.
530///
531/// This check is necessary to prevent sending tokens which create an invalid HTTP request.
532/// It would be easier to check just for alphanumeric tokens, but we can't be sure that all
533/// registries only create tokens in that format so that is as less restricted as possible.
534pub fn check_token(token: &str) -> Result<()> {
535    if token.is_empty() {
536        return Err(Error::InvalidToken("please provide a non-empty token"));
537    }
538    if token.bytes().all(|b| {
539        // This is essentially the US-ASCII limitation of
540        // https://www.rfc-editor.org/rfc/rfc9110#name-field-values. That is,
541        // visible ASCII characters (0x21-0x7e), space, and tab. We want to be
542        // able to pass this in an HTTP header without encoding.
543        b >= 32 && b < 127 || b == b'\t'
544    }) {
545        Ok(())
546    } else {
547        Err(Error::InvalidToken(
548            "token contains invalid characters.\nOnly printable ISO-8859-1 characters \
549             are allowed as it is sent in a HTTPS header.",
550        ))
551    }
552}