1#[cfg(feature = "grapheme-clusters")]
2use unicode_segmentation::UnicodeSegmentation;
3use std::{cell::{Ref, RefCell}};
4
5pub struct LineColLookup<'source> {
7 src: &'source str,
8 line_heads: RefCell<Option<Vec<usize>>>,
9}
10
11impl<'source> LineColLookup<'source> {
12 pub fn new(src: &'source str) -> Self {
16 Self {
17 src,
18 line_heads: RefCell::new(None),
19 }
20 }
21
22 fn heads(&self) -> Ref<'_, Option<Vec<usize>>> {
23 if self.line_heads.borrow().is_none() {
24 let line_heads: Vec<usize> = std::iter::once(0)
25 .chain(self.src
26 .char_indices()
27 .filter_map(|(i, c)| Some(i + 1).filter(|_| c == '\n')))
28 .collect();
29 self.line_heads.replace(Some(line_heads));
30 }
31
32 self.line_heads.borrow()
33 }
34
35 pub fn get(&self, index: usize) -> (usize, usize) {
61 if index > self.src.len() {
62 panic!("Index cannot be greater than the length of the input slice.");
63 }
64
65 if let Some(heads) = self.heads().as_ref() {
66 let mut line_range = 0..heads.len();
68 while line_range.end - line_range.start > 1 {
69 let range_middle = line_range.start + (line_range.end - line_range.start) / 2;
70 let (left, right) = (line_range.start..range_middle, range_middle..line_range.end);
71 if (heads[left.start] .. heads[left.end]).contains(&index) {
73 line_range = left;
74 } else {
75 line_range = right;
76 }
77 }
78
79 let line_start_index = heads[line_range.start];
80 let line = line_range.start + 1;
81 let col = index - line_start_index + 1;
82
83 return (line, col)
84 }
85
86 unreachable!()
87 }
88
89 #[cfg(feature = "grapheme-clusters")]
102 pub fn get_by_cluster(&self, index: usize) -> (usize, usize) {
103 if index > self.src.len() {
104 panic!("Index cannot be greater than the length of the input slice.");
105 }
106
107 if let Some(heads) = self.heads().as_ref() {
108 let mut line_range = 0..heads.len();
110 while line_range.end - line_range.start > 1 {
111 let range_middle = line_range.start + (line_range.end - line_range.start) / 2;
112 let (left, right) = (line_range.start..range_middle, range_middle..line_range.end);
113 if (heads[left.start] .. heads[left.end]).contains(&index) {
115 line_range = left;
116 } else {
117 line_range = right;
118 }
119 }
120
121 let line_start_index = heads[line_range.start];
122 let line = line_range.start + 1;
123 let col = UnicodeSegmentation::graphemes(&self.src[line_start_index..index], true).count() + 1;
124
125 return (line, col)
126 }
127
128 unreachable!()
129 }
130}
131
132#[cfg(test)]
133mod tests {
134 use crate::*;
135
136 #[test]
137 fn empty_str() {
138 let text = "";
139 let lookup = LineColLookup::new(text);
140 assert_eq!(lookup.get(0), (1, 1));
141 }
142
143 #[test]
144 fn line_col_iter_by_codepoints() {
145 let text = "a\nab\nabc";
146 let lookup = LineColLookup::new(text);
147 assert_eq!(lookup.get(0), (1, 1));
148 assert_eq!(lookup.get(1), (1, 2));
149 assert_eq!(lookup.get(2), (2, 1));
150 assert_eq!(lookup.get(3), (2, 2));
151 assert_eq!(lookup.get(4), (2, 3));
152 assert_eq!(lookup.get(5), (3, 1));
153 assert_eq!(lookup.get(6), (3, 2));
154 assert_eq!(lookup.get(7), (3, 3));
155 assert_eq!(lookup.get(8), (3, 4));
156 }
157
158 #[test]
159 #[cfg(feature = "grapheme-clusters")]
160 fn emoji_text_by_grapheme_clusters() {
161 let text = "The 👨👩👦 emoji is made of 5 code points and 18 bytes in UTF-8.";
162 let lookup = LineColLookup::new(text);
163 assert_eq!(lookup.get_by_cluster(4), (1, 5));
164 assert_eq!(lookup.get_by_cluster(22), (1, 6));
165 }
166
167 #[test]
168 fn emoji_text_by_codepoints() {
169 let text = "The 👨👩👦 emoji is made of 5 code points and 18 bytes in UTF-8.";
170 let lookup = LineColLookup::new(text);
171 assert_eq!(lookup.get(4), (1, 5));
172 assert_eq!(lookup.get(22), (1, 23));
173 }
174}