regex_automata/util/
escape.rs

1/*!
2Provides convenience routines for escaping raw bytes.
3
4Since this crate tends to deal with `&[u8]` everywhere and the default
5`Debug` implementation just shows decimal integers, it makes debugging those
6representations quite difficult. This module provides types that show `&[u8]`
7as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex
8representation.
9*/
10
11use crate::util::utf8;
12
13/// Provides a convenient `Debug` implementation for a `u8`.
14///
15/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
16/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
17/// escape sequence.
18#[derive(Clone, Copy)]
19pub struct DebugByte(pub u8);
20
21impl core::fmt::Debug for DebugByte {
22    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
23        // Special case ASCII space. It's too hard to read otherwise, so
24        // put quotes around it. I sometimes wonder whether just '\x20' would
25        // be better...
26        if self.0 == b' ' {
27            return write!(f, "' '");
28        }
29        // 10 bytes is enough to cover any output from ascii::escape_default.
30        let mut bytes = [0u8; 10];
31        let mut len = 0;
32        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
33            // capitalize \xab to \xAB
34            if i >= 2 && b'a' <= b && b <= b'f' {
35                b -= 32;
36            }
37            bytes[len] = b;
38            len += 1;
39        }
40        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
41    }
42}
43
44/// Provides a convenient `Debug` implementation for `&[u8]`.
45///
46/// This generally works best when the bytes are presumed to be mostly UTF-8,
47/// but will work for anything. For any bytes that aren't UTF-8, they are
48/// emitted as hex escape sequences.
49pub struct DebugHaystack<'a>(pub &'a [u8]);
50
51impl<'a> core::fmt::Debug for DebugHaystack<'a> {
52    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
53        write!(f, "\"")?;
54        // This is a sad re-implementation of a similar impl found in bstr.
55        let mut bytes = self.0;
56        while let Some(result) = utf8::decode(bytes) {
57            let ch = match result {
58                Ok(ch) => ch,
59                Err(byte) => {
60                    write!(f, r"\x{:02x}", byte)?;
61                    bytes = &bytes[1..];
62                    continue;
63                }
64            };
65            bytes = &bytes[ch.len_utf8()..];
66            match ch {
67                '\0' => write!(f, "\\0")?,
68                // ASCII control characters except \0, \n, \r, \t
69                '\x01'..='\x08'
70                | '\x0b'
71                | '\x0c'
72                | '\x0e'..='\x19'
73                | '\x7f' => {
74                    write!(f, "\\x{:02x}", u32::from(ch))?;
75                }
76                '\n' | '\r' | '\t' | _ => {
77                    write!(f, "{}", ch.escape_debug())?;
78                }
79            }
80        }
81        write!(f, "\"")?;
82        Ok(())
83    }
84}