crates_index/git/
changes.rs

1use crate::error::GixError;
2use crate::git::{fetch_remote, Change};
3use crate::Error;
4use crate::GitIndex;
5use gix::bstr::ByteSlice;
6use gix::prelude::TreeEntryRefExt;
7use std::collections::{HashSet, VecDeque};
8use std::time::{Duration, SystemTime};
9
10const INDEX_GIT_ARCHIVE_URL: &str = "https://github.com/rust-lang/crates.io-index-archive";
11
12/// An iterator over individual changes, see [`GitIndex::changes`] for more.
13pub struct Changes<'repo> {
14    repo: &'repo gix::Repository,
15    current: gix::Commit<'repo>,
16    current_tree: gix::Tree<'repo>,
17    out: VecDeque<Change>,
18}
19
20impl<'repo> Iterator for Changes<'repo> {
21    type Item = Result<Change, Error>;
22
23    fn next(&mut self) -> Option<Self::Item> {
24        while self.out.is_empty() {
25            let parent = match self.get_parent() {
26                Ok(Some(parent)) => parent,
27                Ok(None) => return None,
28                Err(e) => return Some(Err(e.into())),
29            };
30            let parent_tree = parent.tree().ok()?;
31            let time = SystemTime::UNIX_EPOCH + Duration::from_secs(self.current.time().ok()?.seconds.max(0) as _);
32            Self::tree_additions(
33                &self.repo,
34                &mut self.out,
35                time,
36                &self.current.id(),
37                &self.current_tree,
38                &parent_tree,
39            )
40            .ok()?;
41            self.current_tree = parent_tree;
42            self.current = parent;
43        }
44        self.out.pop_front().map(Ok)
45    }
46}
47
48impl<'repo> Changes<'repo> {
49    pub(crate) fn new(index: &'repo GitIndex) -> Result<Self, GixError> {
50        let current = index.repo.find_object(index.head_commit)?.peel_to_commit()?;
51        let current_tree = current.tree()?;
52
53        Ok(Self {
54            repo: &index.repo,
55            current,
56            current_tree,
57            out: VecDeque::new(),
58        })
59    }
60
61    fn get_parent(&self) -> Result<Option<gix::Commit<'repo>>, GixError> {
62        match self
63            .current
64            .parent_ids()
65            .next()
66            .map(|id| id.try_object())
67            .transpose()?
68            .flatten()
69        {
70            Some(obj) => Ok(Some(obj.try_into_commit()?)),
71            None => {
72                let msg = self.current.message_raw_sloppy().to_str_lossy();
73                let (oid, branch) = match oid_and_branch_from_commit_message(msg.as_ref()) {
74                    Some(res) => res,
75                    None => return Ok(None),
76                };
77                match self.repo.try_find_object(oid)? {
78                    Some(obj) => Ok(Some(obj.try_into_commit()?)),
79                    None => {
80                        let mut remote = self.repo.remote_at(INDEX_GIT_ARCHIVE_URL)?;
81                        fetch_remote(&mut remote, &[&format!("+refs/heads/{}", branch)])?;
82                        Ok(Some(self.repo.find_object(oid)?.try_into_commit()?))
83                    }
84                }
85            }
86        }
87    }
88
89    fn tree_additions(
90        repo: &gix::Repository,
91        out: &mut VecDeque<Change>,
92        change_time: SystemTime,
93        commit: &gix::hash::oid,
94        new: &gix::Tree<'_>,
95        old: &gix::Tree<'_>,
96    ) -> Result<(), GixError> {
97        let old_oids = old
98            .iter()
99            .map(|old| old.map(|e| e.object_id()))
100            .collect::<Result<HashSet<_>, _>>()?;
101        let old = old.decode()?;
102        for new_entry in new.iter().filter_map(Result::ok) {
103            if old_oids.contains(new_entry.oid()) {
104                continue;
105            }
106            if new_entry.mode().is_tree() {
107                let new_tree = new_entry.object()?.into_tree();
108                let name = new_entry.filename();
109                // Recurse only into crate subdirs, and they all happen to be 1 or 2 letters long
110                let is_crates_subdir = name.len() <= 2 && name.iter().copied().all(valid_crate_name_char);
111                let old_obj = if is_crates_subdir {
112                    old.bisect_entry(name, true).map(|entry| entry.attach(repo))
113                } else {
114                    None
115                }
116                .map(|o| o.object())
117                .transpose()?;
118                let old_tree = match old_obj.and_then(|o| o.try_into_tree().ok()) {
119                    Some(t) => t,
120                    None => repo.empty_tree(),
121                };
122                Self::tree_additions(repo, out, change_time, commit, &new_tree, &old_tree)?;
123            } else {
124                let name = new_entry.filename();
125                // filter out config.json
126                if name.iter().copied().all(valid_crate_name_char) {
127                    out.push_back(Change {
128                        time: change_time,
129                        crate_name: name.to_string().into(),
130                        commit: commit.into(),
131                    });
132                }
133            }
134        }
135        Ok(())
136    }
137}
138
139#[inline]
140fn valid_crate_name_char(c: u8) -> bool {
141    c.is_ascii_alphanumeric() || c == b'-' || c == b'_'
142}
143
144fn oid_and_branch_from_commit_message(msg: &str) -> Option<(gix::ObjectId, &str)> {
145    let hash_start = msg
146        .split_once("Previous HEAD was ")?
147        .1
148        .trim_start_matches(|c: char| !c.is_ascii_hexdigit());
149    let (hash_str, rest) = hash_start.split_once(|c: char| !c.is_ascii_hexdigit())?;
150    let hash = gix::ObjectId::from_hex(hash_str.as_bytes()).ok()?;
151    let snapshot_start = rest.find("snapshot-")?;
152    let branch = rest.get(snapshot_start..snapshot_start + "snapshot-xxxx-xx-xx".len())?;
153
154    Some((hash, branch))
155}
156
157#[cfg(test)]
158pub(crate) mod test {
159    use super::oid_and_branch_from_commit_message;
160
161    #[test]
162    fn changes_parse_split_message() {
163        let (id, branch) = oid_and_branch_from_commit_message(
164            "Previous HEAD was 4181c62812c70fafb2b56cbbd66c31056671b445, now on the `snapshot-2021-07-02` branch
165
166More information about this change can be found [online] and on [this issue].
167
168[online]: https://internals.rust-lang.org/t/cargos-crate-index-upcoming-squash-into-one-commit/8440
169[this issue]: https://github.com/rust-lang/crates-io-cargo-teams/issues/47",
170        )
171        .unwrap();
172        assert_eq!("4181c62812c70fafb2b56cbbd66c31056671b445", id.to_string());
173        assert_eq!("snapshot-2021-07-02", branch);
174    }
175}