tantivy_common/
json_path_writer.rs

1use crate::replace_in_place;
2
3/// Separates the different segments of a json path.
4pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
5pub const JSON_PATH_SEGMENT_SEP_STR: &str =
6    unsafe { std::str::from_utf8_unchecked(&[JSON_PATH_SEGMENT_SEP]) };
7
8/// Create a new JsonPathWriter, that creates flattened json paths for tantivy.
9#[derive(Clone, Debug, Default)]
10pub struct JsonPathWriter {
11    path: String,
12    indices: Vec<usize>,
13    expand_dots: bool,
14}
15
16impl JsonPathWriter {
17    pub fn new() -> Self {
18        JsonPathWriter {
19            path: String::new(),
20            indices: Vec::new(),
21            expand_dots: false,
22        }
23    }
24
25    /// When expand_dots is enabled, json object like
26    /// `{"k8s.node.id": 5}` is processed as if it was
27    /// `{"k8s": {"node": {"id": 5}}}`.
28    /// This option has the merit of allowing users to
29    /// write queries  like `k8s.node.id:5`.
30    /// On the other, enabling that feature can lead to
31    /// ambiguity.
32    #[inline]
33    pub fn set_expand_dots(&mut self, expand_dots: bool) {
34        self.expand_dots = expand_dots;
35    }
36
37    /// Push a new segment to the path.
38    #[inline]
39    pub fn push(&mut self, segment: &str) {
40        let len_path = self.path.len();
41        self.indices.push(len_path);
42        if !self.path.is_empty() {
43            self.path.push_str(JSON_PATH_SEGMENT_SEP_STR);
44        }
45        self.path.push_str(segment);
46        if self.expand_dots {
47            // This might include the separation byte, which is ok because it is not a dot.
48            let appended_segment = &mut self.path[len_path..];
49            // The unsafe below is safe as long as b'.' and JSON_PATH_SEGMENT_SEP are
50            // valid single byte ut8 strings.
51            // By utf-8 design, they cannot be part of another codepoint.
52            unsafe {
53                replace_in_place(b'.', JSON_PATH_SEGMENT_SEP, appended_segment.as_bytes_mut())
54            };
55        }
56    }
57
58    /// Remove the last segment. Does nothing if the path is empty.
59    #[inline]
60    pub fn pop(&mut self) {
61        if let Some(last_idx) = self.indices.pop() {
62            self.path.truncate(last_idx);
63        }
64    }
65
66    /// Clear the path.
67    #[inline]
68    pub fn clear(&mut self) {
69        self.path.clear();
70        self.indices.clear();
71    }
72
73    /// Get the current path.
74    #[inline]
75    pub fn as_str(&self) -> &str {
76        &self.path
77    }
78}
79
80impl From<JsonPathWriter> for String {
81    #[inline]
82    fn from(value: JsonPathWriter) -> Self {
83        value.path
84    }
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    #[test]
92    fn json_path_writer_test() {
93        let mut writer = JsonPathWriter::new();
94
95        writer.push("root");
96        assert_eq!(writer.as_str(), "root");
97
98        writer.push("child");
99        assert_eq!(writer.as_str(), "root\u{1}child");
100
101        writer.pop();
102        assert_eq!(writer.as_str(), "root");
103
104        writer.push("k8s.node.id");
105        assert_eq!(writer.as_str(), "root\u{1}k8s.node.id");
106
107        writer.set_expand_dots(true);
108        writer.pop();
109        writer.push("k8s.node.id");
110        assert_eq!(writer.as_str(), "root\u{1}k8s\u{1}node\u{1}id");
111    }
112}