atuin_client/import/
bash.rs

1use std::{path::PathBuf, str};
2
3use async_trait::async_trait;
4use directories::UserDirs;
5use eyre::{Result, eyre};
6use itertools::Itertools;
7use time::{Duration, OffsetDateTime};
8
9use super::{Importer, Loader, get_histfile_path, unix_byte_lines};
10use crate::history::History;
11use crate::import::read_to_end;
12
13#[derive(Debug)]
14pub struct Bash {
15    bytes: Vec<u8>,
16}
17
18fn default_histpath() -> Result<PathBuf> {
19    let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?;
20    let home_dir = user_dirs.home_dir();
21
22    Ok(home_dir.join(".bash_history"))
23}
24
25#[async_trait]
26impl Importer for Bash {
27    const NAME: &'static str = "bash";
28
29    async fn new() -> Result<Self> {
30        let bytes = read_to_end(get_histfile_path(default_histpath)?)?;
31        Ok(Self { bytes })
32    }
33
34    async fn entries(&mut self) -> Result<usize> {
35        let count = unix_byte_lines(&self.bytes)
36            .map(LineType::from)
37            .filter(|line| matches!(line, LineType::Command(_)))
38            .count();
39        Ok(count)
40    }
41
42    async fn load(self, h: &mut impl Loader) -> Result<()> {
43        let lines = unix_byte_lines(&self.bytes)
44            .map(LineType::from)
45            .filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored
46            .collect_vec();
47
48        let (commands_before_first_timestamp, first_timestamp) = lines
49            .iter()
50            .enumerate()
51            .find_map(|(i, line)| match line {
52                LineType::Timestamp(t) => Some((i, *t)),
53                _ => None,
54            })
55            // if no known timestamps, use now as base
56            .unwrap_or((lines.len(), OffsetDateTime::now_utc()));
57
58        // if no timestamp is recorded, then use this increment to set an arbitrary timestamp
59        // to preserve ordering
60        // this increment is deliberately very small to prevent particularly fast fingers
61        // causing ordering issues; it also helps in handling the "here document" syntax,
62        // where several lines are recorded in succession without individual timestamps
63        let timestamp_increment = Duration::milliseconds(1);
64
65        // make sure there is a minimum amount of time before the first known timestamp
66        // to fit all commands, given the default increment
67        let mut next_timestamp =
68            first_timestamp - timestamp_increment * commands_before_first_timestamp as i32;
69
70        for line in lines.into_iter() {
71            match line {
72                LineType::NotUtf8 => unreachable!(), // already filtered
73                LineType::Empty => {}                // do nothing
74                LineType::Timestamp(t) => {
75                    if t < next_timestamp {
76                        warn!(
77                            "Time reversal detected in Bash history! Commands may be ordered incorrectly."
78                        );
79                    }
80                    next_timestamp = t;
81                }
82                LineType::Command(c) => {
83                    let imported = History::import().timestamp(next_timestamp).command(c);
84
85                    h.push(imported.build().into()).await?;
86                    next_timestamp += timestamp_increment;
87                }
88            }
89        }
90
91        Ok(())
92    }
93}
94
95#[derive(Debug, Clone)]
96enum LineType<'a> {
97    NotUtf8,
98    /// Can happen when using the "here document" syntax.
99    Empty,
100    /// A timestamp line start with a '#', followed immediately by an integer
101    /// that represents seconds since UNIX epoch.
102    Timestamp(OffsetDateTime),
103    /// Anything else.
104    Command(&'a str),
105}
106impl<'a> From<&'a [u8]> for LineType<'a> {
107    fn from(bytes: &'a [u8]) -> Self {
108        let Ok(line) = str::from_utf8(bytes) else {
109            return LineType::NotUtf8;
110        };
111        if line.is_empty() {
112            return LineType::Empty;
113        }
114        let parsed = match try_parse_line_as_timestamp(line) {
115            Some(time) => LineType::Timestamp(time),
116            None => LineType::Command(line),
117        };
118        parsed
119    }
120}
121
122fn try_parse_line_as_timestamp(line: &str) -> Option<OffsetDateTime> {
123    let seconds = line.strip_prefix('#')?.parse().ok()?;
124    OffsetDateTime::from_unix_timestamp(seconds).ok()
125}
126
127#[cfg(test)]
128mod test {
129    use std::cmp::Ordering;
130
131    use itertools::{Itertools, assert_equal};
132
133    use crate::import::{Importer, tests::TestLoader};
134
135    use super::Bash;
136
137    #[tokio::test]
138    async fn parse_no_timestamps() {
139        let bytes = r"cargo install atuin
140cargo update
141cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
142"
143        .as_bytes()
144        .to_owned();
145
146        let mut bash = Bash { bytes };
147        assert_eq!(bash.entries().await.unwrap(), 3);
148
149        let mut loader = TestLoader::default();
150        bash.load(&mut loader).await.unwrap();
151
152        assert_equal(
153            loader.buf.iter().map(|h| h.command.as_str()),
154            [
155                "cargo install atuin",
156                "cargo update",
157                "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
158            ],
159        );
160        assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp)))
161    }
162
163    #[tokio::test]
164    async fn parse_with_timestamps() {
165        let bytes = b"#1672918999
166git reset
167#1672919006
168git clean -dxf
169#1672919020
170cd ../
171"
172        .to_vec();
173
174        let mut bash = Bash { bytes };
175        assert_eq!(bash.entries().await.unwrap(), 3);
176
177        let mut loader = TestLoader::default();
178        bash.load(&mut loader).await.unwrap();
179
180        assert_equal(
181            loader.buf.iter().map(|h| h.command.as_str()),
182            ["git reset", "git clean -dxf", "cd ../"],
183        );
184        assert_equal(
185            loader.buf.iter().map(|h| h.timestamp.unix_timestamp()),
186            [1672918999, 1672919006, 1672919020],
187        )
188    }
189
190    #[tokio::test]
191    async fn parse_with_partial_timestamps() {
192        let bytes = b"git reset
193#1672919006
194git clean -dxf
195cd ../
196"
197        .to_vec();
198
199        let mut bash = Bash { bytes };
200        assert_eq!(bash.entries().await.unwrap(), 3);
201
202        let mut loader = TestLoader::default();
203        bash.load(&mut loader).await.unwrap();
204
205        assert_equal(
206            loader.buf.iter().map(|h| h.command.as_str()),
207            ["git reset", "git clean -dxf", "cd ../"],
208        );
209        assert!(is_strictly_sorted(loader.buf.iter().map(|h| h.timestamp)))
210    }
211
212    fn is_strictly_sorted<T>(iter: impl IntoIterator<Item = T>) -> bool
213    where
214        T: Clone + PartialOrd,
215    {
216        iter.into_iter()
217            .tuple_windows()
218            .all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less)))
219    }
220}