anstyle_parse/
lib.rs

1//! Parser for implementing virtual terminal emulators
2//!
3//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
4//! state machine]. The state machine doesn't assign meaning to the parsed data
5//! and is thus not itself sufficient for writing a terminal emulator. Instead,
6//! it is expected that an implementation of [`Perform`] is provided which does
7//! something useful with the parsed data. The [`Parser`] handles the book
8//! keeping, and the [`Perform`] gets to simply handle actions.
9//!
10//! # Examples
11//!
12//! For an example of using the [`Parser`] please see the examples folder. The example included
13//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
14//! pipe `vim` into it
15//!
16//! ```sh
17//! cargo build --release --example parselog
18//! vim | target/release/examples/parselog
19//! ```
20//!
21//! Just type `:q` to exit.
22//!
23//! # Differences from original state machine description
24//!
25//! * UTF-8 Support for Input
26//! * OSC Strings can be terminated by 0x07
27//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in
28//!   all states.
29//!
30//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
31#![cfg_attr(not(test), no_std)]
32#![cfg_attr(docsrs, feature(doc_auto_cfg))]
33#![allow(missing_docs)]
34#![warn(clippy::print_stderr)]
35#![warn(clippy::print_stdout)]
36
37#[cfg(not(feature = "core"))]
38extern crate alloc;
39
40use core::mem::MaybeUninit;
41
42#[cfg(feature = "core")]
43use arrayvec::ArrayVec;
44#[cfg(feature = "utf8")]
45use utf8parse as utf8;
46
47mod params;
48pub mod state;
49
50pub use params::{Params, ParamsIter};
51
52use state::{state_change, Action, State};
53
54const MAX_INTERMEDIATES: usize = 2;
55const MAX_OSC_PARAMS: usize = 16;
56#[cfg(feature = "core")]
57const MAX_OSC_RAW: usize = 1024;
58
59/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
60#[allow(unused_qualifications)]
61#[derive(Default, Clone, Debug, PartialEq, Eq)]
62pub struct Parser<C = DefaultCharAccumulator> {
63    state: State,
64    intermediates: [u8; MAX_INTERMEDIATES],
65    intermediate_idx: usize,
66    params: Params,
67    param: u16,
68    #[cfg(feature = "core")]
69    osc_raw: ArrayVec<u8, MAX_OSC_RAW>,
70    #[cfg(not(feature = "core"))]
71    osc_raw: alloc::vec::Vec<u8>,
72    osc_params: [(usize, usize); MAX_OSC_PARAMS],
73    osc_num_params: usize,
74    ignoring: bool,
75    utf8_parser: C,
76}
77
78impl<C> Parser<C>
79where
80    C: CharAccumulator,
81{
82    /// Create a new Parser
83    pub fn new() -> Parser {
84        Parser::default()
85    }
86
87    #[inline]
88    fn params(&self) -> &Params {
89        &self.params
90    }
91
92    #[inline]
93    fn intermediates(&self) -> &[u8] {
94        &self.intermediates[..self.intermediate_idx]
95    }
96
97    /// Advance the parser state
98    ///
99    /// Requires a [`Perform`] in case `byte` triggers an action
100    #[inline]
101    pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
102        // Utf8 characters are handled out-of-band.
103        if let State::Utf8 = self.state {
104            self.process_utf8(performer, byte);
105            return;
106        }
107
108        let (state, action) = state_change(self.state, byte);
109        self.perform_state_change(performer, state, action, byte);
110    }
111
112    #[inline]
113    fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
114    where
115        P: Perform,
116    {
117        if let Some(c) = self.utf8_parser.add(byte) {
118            performer.print(c);
119            self.state = State::Ground;
120        }
121    }
122
123    #[inline]
124    fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
125    where
126        P: Perform,
127    {
128        match state {
129            State::Anywhere => {
130                // Just run the action
131                self.perform_action(performer, action, byte);
132            }
133            state => {
134                match self.state {
135                    State::DcsPassthrough => {
136                        self.perform_action(performer, Action::Unhook, byte);
137                    }
138                    State::OscString => {
139                        self.perform_action(performer, Action::OscEnd, byte);
140                    }
141                    _ => (),
142                }
143
144                match action {
145                    Action::Nop => (),
146                    action => {
147                        self.perform_action(performer, action, byte);
148                    }
149                }
150
151                match state {
152                    State::CsiEntry | State::DcsEntry | State::Escape => {
153                        self.perform_action(performer, Action::Clear, byte);
154                    }
155                    State::DcsPassthrough => {
156                        self.perform_action(performer, Action::Hook, byte);
157                    }
158                    State::OscString => {
159                        self.perform_action(performer, Action::OscStart, byte);
160                    }
161                    _ => (),
162                }
163
164                // Assume the new state
165                self.state = state;
166            }
167        }
168    }
169
170    /// Separate method for `osc_dispatch` that borrows self as read-only
171    ///
172    /// The aliasing is needed here for multiple slices into `self.osc_raw`
173    #[inline]
174    fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
175        let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
176            unsafe { MaybeUninit::uninit().assume_init() };
177
178        for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
179            let indices = self.osc_params[i];
180            *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
181        }
182
183        unsafe {
184            let num_params = self.osc_num_params;
185            let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
186            performer.osc_dispatch(&*params, byte == 0x07);
187        }
188    }
189
190    #[inline]
191    fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
192        match action {
193            Action::Print => performer.print(byte as char),
194            Action::Execute => performer.execute(byte),
195            Action::Hook => {
196                if self.params.is_full() {
197                    self.ignoring = true;
198                } else {
199                    self.params.push(self.param);
200                }
201
202                performer.hook(self.params(), self.intermediates(), self.ignoring, byte);
203            }
204            Action::Put => performer.put(byte),
205            Action::OscStart => {
206                self.osc_raw.clear();
207                self.osc_num_params = 0;
208            }
209            Action::OscPut => {
210                #[cfg(feature = "core")]
211                {
212                    if self.osc_raw.is_full() {
213                        return;
214                    }
215                }
216
217                let idx = self.osc_raw.len();
218
219                // Param separator
220                if byte == b';' {
221                    let param_idx = self.osc_num_params;
222                    match param_idx {
223                        // Only process up to MAX_OSC_PARAMS
224                        MAX_OSC_PARAMS => return,
225
226                        // First param is special - 0 to current byte index
227                        0 => {
228                            self.osc_params[param_idx] = (0, idx);
229                        }
230
231                        // All other params depend on previous indexing
232                        _ => {
233                            let prev = self.osc_params[param_idx - 1];
234                            let begin = prev.1;
235                            self.osc_params[param_idx] = (begin, idx);
236                        }
237                    }
238
239                    self.osc_num_params += 1;
240                } else {
241                    self.osc_raw.push(byte);
242                }
243            }
244            Action::OscEnd => {
245                let param_idx = self.osc_num_params;
246                let idx = self.osc_raw.len();
247
248                match param_idx {
249                    // Finish last parameter if not already maxed
250                    MAX_OSC_PARAMS => (),
251
252                    // First param is special - 0 to current byte index
253                    0 => {
254                        self.osc_params[param_idx] = (0, idx);
255                        self.osc_num_params += 1;
256                    }
257
258                    // All other params depend on previous indexing
259                    _ => {
260                        let prev = self.osc_params[param_idx - 1];
261                        let begin = prev.1;
262                        self.osc_params[param_idx] = (begin, idx);
263                        self.osc_num_params += 1;
264                    }
265                }
266                self.osc_dispatch(performer, byte);
267            }
268            Action::Unhook => performer.unhook(),
269            Action::CsiDispatch => {
270                if self.params.is_full() {
271                    self.ignoring = true;
272                } else {
273                    self.params.push(self.param);
274                }
275
276                performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte);
277            }
278            Action::EscDispatch => {
279                performer.esc_dispatch(self.intermediates(), self.ignoring, byte);
280            }
281            Action::Collect => {
282                if self.intermediate_idx == MAX_INTERMEDIATES {
283                    self.ignoring = true;
284                } else {
285                    self.intermediates[self.intermediate_idx] = byte;
286                    self.intermediate_idx += 1;
287                }
288            }
289            Action::Param => {
290                if self.params.is_full() {
291                    self.ignoring = true;
292                    return;
293                }
294
295                if byte == b';' {
296                    self.params.push(self.param);
297                    self.param = 0;
298                } else if byte == b':' {
299                    self.params.extend(self.param);
300                    self.param = 0;
301                } else {
302                    // Continue collecting bytes into param
303                    self.param = self.param.saturating_mul(10);
304                    self.param = self.param.saturating_add((byte - b'0') as u16);
305                }
306            }
307            Action::Clear => {
308                // Reset everything on ESC/CSI/DCS entry
309                self.intermediate_idx = 0;
310                self.ignoring = false;
311                self.param = 0;
312
313                self.params.clear();
314            }
315            Action::BeginUtf8 => self.process_utf8(performer, byte),
316            Action::Ignore => (),
317            Action::Nop => (),
318        }
319    }
320}
321
322/// Build a `char` out of bytes
323pub trait CharAccumulator: Default {
324    /// Build a `char` out of bytes
325    ///
326    /// Return `None` when more data is needed
327    fn add(&mut self, byte: u8) -> Option<char>;
328}
329
330/// Most flexible [`CharAccumulator`] for [`Parser`] based on active features
331#[cfg(feature = "utf8")]
332pub type DefaultCharAccumulator = Utf8Parser;
333#[cfg(not(feature = "utf8"))]
334pub type DefaultCharAccumulator = AsciiParser;
335
336/// Only allow parsing 7-bit ASCII
337#[allow(clippy::exhaustive_structs)]
338#[derive(Default, Clone, Debug, PartialEq, Eq)]
339pub struct AsciiParser;
340
341impl CharAccumulator for AsciiParser {
342    fn add(&mut self, _byte: u8) -> Option<char> {
343        unreachable!("multi-byte UTF8 characters are unsupported")
344    }
345}
346
347/// Allow parsing UTF-8
348#[cfg(feature = "utf8")]
349#[derive(Default, Clone, Debug, PartialEq, Eq)]
350pub struct Utf8Parser {
351    utf8_parser: utf8::Parser,
352}
353
354#[cfg(feature = "utf8")]
355impl CharAccumulator for Utf8Parser {
356    fn add(&mut self, byte: u8) -> Option<char> {
357        let mut c = None;
358        let mut receiver = VtUtf8Receiver(&mut c);
359        self.utf8_parser.advance(&mut receiver, byte);
360        c
361    }
362}
363
364#[cfg(feature = "utf8")]
365struct VtUtf8Receiver<'a>(&'a mut Option<char>);
366
367#[cfg(feature = "utf8")]
368impl<'a> utf8::Receiver for VtUtf8Receiver<'a> {
369    fn codepoint(&mut self, c: char) {
370        *self.0 = Some(c);
371    }
372
373    fn invalid_sequence(&mut self) {
374        *self.0 = Some('�');
375    }
376}
377
378/// Performs actions requested by the [`Parser`]
379///
380/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
381/// movement, or simply printing characters to the screen.
382///
383/// The methods on this type correspond to actions described in
384/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in
385/// a useful way in my own words for completeness, but the site should be
386/// referenced if something isn't clear. If the site disappears at some point in
387/// the future, consider checking archive.org.
388pub trait Perform {
389    /// Draw a character to the screen and update states.
390    fn print(&mut self, _c: char) {}
391
392    /// Execute a C0 or C1 control function.
393    fn execute(&mut self, _byte: u8) {}
394
395    /// Invoked when a final character arrives in first part of device control string.
396    ///
397    /// The control function should be determined from the private marker, final character, and
398    /// execute with a parameter list. A handler should be selected for remaining characters in the
399    /// string; the handler function should subsequently be called by `put` for every character in
400    /// the control string.
401    ///
402    /// The `ignore` flag indicates that more than two intermediates arrived and
403    /// subsequent characters were ignored.
404    fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {}
405
406    /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
407    /// will also be passed to the handler.
408    fn put(&mut self, _byte: u8) {}
409
410    /// Called when a device control string is terminated.
411    ///
412    /// The previously selected handler should be notified that the DCS has
413    /// terminated.
414    fn unhook(&mut self) {}
415
416    /// Dispatch an operating system command.
417    fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {}
418
419    /// A final character has arrived for a CSI sequence
420    ///
421    /// The `ignore` flag indicates that either more than two intermediates arrived
422    /// or the number of parameters exceeded the maximum supported length,
423    /// and subsequent characters were ignored.
424    fn csi_dispatch(
425        &mut self,
426        _params: &Params,
427        _intermediates: &[u8],
428        _ignore: bool,
429        _action: u8,
430    ) {
431    }
432
433    /// The final character of an escape sequence has arrived.
434    ///
435    /// The `ignore` flag indicates that more than two intermediates arrived and
436    /// subsequent characters were ignored.
437    fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
438}