line_col/
lib.rs

1#[cfg(feature = "grapheme-clusters")]
2use unicode_segmentation::UnicodeSegmentation;
3use std::{cell::{Ref, RefCell}};
4
5/// Pre-cached line/column lookup table for a string slice.
6pub struct LineColLookup<'source> {
7    src: &'source str,
8    line_heads: RefCell<Option<Vec<usize>>>,
9}
10
11impl<'source> LineColLookup<'source> {
12    /// Creates a new line/col lookup table. The `src` parameter provides the input string used to calculate lines and columns.
13    ///
14    /// Internally, this scans `src` and caches the starting positions of all lines. This means this is an O(n) operation.
15    pub fn new(src: &'source str) -> Self {
16        Self {
17            src,
18            line_heads: RefCell::new(None),
19        }
20    }
21
22    fn heads(&self) -> Ref<'_, Option<Vec<usize>>> {
23        if self.line_heads.borrow().is_none() {
24            let line_heads: Vec<usize> = std::iter::once(0)
25            .chain(self.src
26                .char_indices()
27                .filter_map(|(i, c)| Some(i + 1).filter(|_| c == '\n')))
28            .collect();
29            self.line_heads.replace(Some(line_heads));
30        }
31
32        self.line_heads.borrow()
33    }
34
35    /// Looks up the 1-based line and column numbers of the specified byte index.
36    ///
37    /// Returns a tuple with the line number first, then column number. 
38    ///
39    /// # Example
40    /// ```rust
41    /// use line_col::*;
42    /// let text = "One\nTwo";
43    /// let lookup = LineColLookup::new(text);
44    /// assert_eq!(lookup.get(0), (1, 1)); // 'O' (line 1, col 1)
45    /// assert_eq!(lookup.get(1), (1, 2)); // 'n' (line 1, col 2)
46    /// assert_eq!(lookup.get(2), (1, 3)); // 'e' (line 1, col 3)
47    /// assert_eq!(lookup.get(4), (2, 1)); // 'T' (line 2, col 1)
48    /// assert_eq!(lookup.get(5), (2, 2)); // 'w' (line 2, col 2)
49    /// assert_eq!(lookup.get(6), (2, 3)); // 'o' (line 2, col 3)
50    /// assert_eq!(lookup.get(7), (2, 4)); // <end> (line 2, col 4)
51    /// ```
52    ///
53    /// # Panics
54    ///
55    /// Panics if `index` is greater than the length of the input `&str`.
56    ///
57    /// # Notes
58    /// This function uses a binary search to locate the line on which `index` resides.
59    /// This means that it runs in approximately O(log n) time.
60    pub fn get(&self, index: usize) -> (usize, usize) {
61        if index > self.src.len() {
62            panic!("Index cannot be greater than the length of the input slice.");
63        }
64
65        if let Some(heads) = self.heads().as_ref() {
66            // Perform a binary search to locate the line on which `index` resides
67            let mut line_range = 0..heads.len();
68            while line_range.end - line_range.start > 1 {
69                let range_middle = line_range.start + (line_range.end - line_range.start) / 2;
70                let (left, right) = (line_range.start..range_middle, range_middle..line_range.end);
71                // Check which line window contains our character index
72                if (heads[left.start] .. heads[left.end]).contains(&index) {
73                    line_range = left;
74                } else {
75                    line_range = right;
76                }
77            }
78
79            let line_start_index = heads[line_range.start];
80            let line = line_range.start + 1;
81            let col = index - line_start_index + 1;
82
83            return (line, col)
84        }
85
86        unreachable!()
87    }
88
89    /// Looks up the 1-based line and column numbers of the specified byte index.
90    /// The column number correlates to the number of grapheme clusters up to and at the specified index.
91    ///
92    /// Returns a tuple with the line number first, then column number. 
93    ///
94    /// # Panics
95    ///
96    /// Panics if `index` is greater than the length of the input `&str`.
97    ///
98    /// # Notes
99    /// This function uses a binary search to locate the line on which `index` resides.
100    /// This means that it runs in approximately O(log n) time.
101    #[cfg(feature = "grapheme-clusters")]
102    pub fn get_by_cluster(&self, index: usize) -> (usize, usize) {
103        if index > self.src.len() {
104            panic!("Index cannot be greater than the length of the input slice.");
105        }
106
107        if let Some(heads) = self.heads().as_ref() {
108            // Perform a binary search to locate the line on which `index` resides
109            let mut line_range = 0..heads.len();
110            while line_range.end - line_range.start > 1 {
111                let range_middle = line_range.start + (line_range.end - line_range.start) / 2;
112                let (left, right) = (line_range.start..range_middle, range_middle..line_range.end);
113                // Check which line window contains our character index
114                if (heads[left.start] .. heads[left.end]).contains(&index) {
115                    line_range = left;
116                } else {
117                    line_range = right;
118                }
119            }
120
121            let line_start_index = heads[line_range.start];
122            let line = line_range.start + 1;
123            let col = UnicodeSegmentation::graphemes(&self.src[line_start_index..index], true).count() + 1;
124
125            return (line, col)
126        }
127
128        unreachable!()
129    }
130}
131
132#[cfg(test)]
133mod tests {
134    use crate::*;
135
136    #[test]
137    fn empty_str() {
138        let text = "";
139        let lookup = LineColLookup::new(text);
140        assert_eq!(lookup.get(0), (1, 1));
141    }
142
143    #[test]
144    fn line_col_iter_by_codepoints() {
145        let text = "a\nab\nabc";
146        let lookup = LineColLookup::new(text);
147        assert_eq!(lookup.get(0), (1, 1));
148        assert_eq!(lookup.get(1), (1, 2));
149        assert_eq!(lookup.get(2), (2, 1));
150        assert_eq!(lookup.get(3), (2, 2));
151        assert_eq!(lookup.get(4), (2, 3));
152        assert_eq!(lookup.get(5), (3, 1));
153        assert_eq!(lookup.get(6), (3, 2));
154        assert_eq!(lookup.get(7), (3, 3));
155        assert_eq!(lookup.get(8), (3, 4));
156    }
157
158    #[test]
159    #[cfg(feature = "grapheme-clusters")]
160    fn emoji_text_by_grapheme_clusters() {
161        let text = "The 👨‍👩‍👦 emoji is made of 5 code points and 18 bytes in UTF-8.";
162        let lookup = LineColLookup::new(text);
163        assert_eq!(lookup.get_by_cluster(4), (1, 5));
164        assert_eq!(lookup.get_by_cluster(22), (1, 6));
165    }
166
167    #[test]
168    fn emoji_text_by_codepoints() {
169        let text = "The 👨‍👩‍👦 emoji is made of 5 code points and 18 bytes in UTF-8.";
170        let lookup = LineColLookup::new(text);
171        assert_eq!(lookup.get(4), (1, 5));
172        assert_eq!(lookup.get(22), (1, 23));
173    }
174}