#![recursion_limit = "196"]
#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
mod error;
mod generator;
#[cfg(not(feature = "fuzzing"))]
mod graph;
#[cfg(feature = "fuzzing")]
pub mod graph;
mod leaf;
#[cfg(not(feature = "fuzzing"))]
mod mir;
#[cfg(feature = "fuzzing")]
pub mod mir;
mod parser;
mod util;
#[macro_use]
#[allow(missing_docs)]
mod macros;
use generator::Generator;
use graph::{DisambiguationError, Fork, Graph, Rope};
use leaf::Leaf;
use parser::{IgnoreFlags, Mode, Parser};
use quote::ToTokens;
use util::MaybeVoid;
use proc_macro2::{Delimiter, TokenStream, TokenTree};
use quote::quote;
use syn::parse_quote;
use syn::spanned::Spanned;
use syn::{Fields, ItemEnum};
const LOGOS_ATTR: &str = "logos";
const ERROR_ATTR: &str = "error";
const TOKEN_ATTR: &str = "token";
const REGEX_ATTR: &str = "regex";
pub fn generate(input: TokenStream) -> TokenStream {
debug!("Reading input token streams");
let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
let name = &item.ident;
let mut parser = Parser::default();
for param in item.generics.params {
parser.parse_generic(param);
}
for attr in &mut item.attrs {
parser.try_parse_logos(attr);
}
let mut ropes = Vec::new();
let mut regex_ids = Vec::new();
let mut graph = Graph::new();
{
let errors = &mut parser.errors;
for literal in &parser.skips {
match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) {
Ok(mir) => {
let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority()));
let id = graph.regex(mir, then);
regex_ids.push(id);
}
Err(err) => {
errors.err(err, literal.span());
}
}
}
}
debug!("Iterating through enum variants");
for variant in &mut item.variants {
let field = match &mut variant.fields {
Fields::Unit => MaybeVoid::Void,
Fields::Unnamed(fields) => {
if fields.unnamed.len() != 1 {
parser.err(
format!(
"Logos currently only supports variants with one field, found {}",
fields.unnamed.len(),
),
fields.span(),
);
}
let ty = &mut fields
.unnamed
.first_mut()
.expect("Already checked len; qed")
.ty;
let ty = parser.get_type(ty);
MaybeVoid::Some(ty)
}
Fields::Named(fields) => {
parser.err("Logos doesn't support named fields yet.", fields.span());
MaybeVoid::Void
}
};
let var_ident = &variant.ident;
let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
for attr in &mut variant.attrs {
let attr_name = match attr.path().get_ident() {
Some(ident) => ident.to_string(),
None => continue,
};
match attr_name.as_str() {
ERROR_ATTR => {
parser.err(
"\
Since 0.13 Logos no longer requires the #[error] variant.\n\
\n\
For help with migration see release notes: \
https://github.com/maciejhirsz/logos/releases\
",
attr.span(),
);
}
TOKEN_ATTR => {
let definition = match parser.parse_definition(attr) {
Some(definition) => definition,
None => {
parser.err("Expected #[token(...)]", attr.span());
continue;
}
};
if definition.ignore_flags.is_empty() {
let bytes = definition.literal.to_bytes();
let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or(bytes.len() * 2))
.callback(definition.callback),
);
ropes.push(Rope::new(bytes, then));
} else {
let mir = definition
.literal
.escape_regex()
.to_mir(
&Default::default(),
definition.ignore_flags,
&mut parser.errors,
)
.expect("The literal should be perfectly valid regex");
let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or_else(|| mir.priority()))
.callback(definition.callback),
);
let id = graph.regex(mir, then);
regex_ids.push(id);
}
}
REGEX_ATTR => {
let definition = match parser.parse_definition(attr) {
Some(definition) => definition,
None => {
parser.err("Expected #[regex(...)]", attr.span());
continue;
}
};
let mir = match definition.literal.to_mir(
&parser.subpatterns,
definition.ignore_flags,
&mut parser.errors,
) {
Ok(mir) => mir,
Err(err) => {
parser.err(err, definition.literal.span());
continue;
}
};
let then = graph.push(
leaf(definition.literal.span())
.priority(definition.priority.unwrap_or_else(|| mir.priority()))
.callback(definition.callback),
);
let id = graph.regex(mir, then);
regex_ids.push(id);
}
_ => (),
}
}
}
let mut root = Fork::new();
debug!("Parsing additional options (extras, source, ...)");
let error_type = parser.error_type.take();
let extras = parser.extras.take();
let source = parser
.source
.take()
.map(strip_wrapping_parens)
.unwrap_or(match parser.mode {
Mode::Utf8 => quote!(str),
Mode::Binary => quote!([u8]),
});
let logos_path = parser
.logos_path
.take()
.unwrap_or_else(|| parse_quote!(::logos));
let generics = parser.generics();
let this = quote!(#name #generics);
let impl_logos = |body| {
quote! {
impl<'s> #logos_path::Logos<'s> for #this {
type Error = #error_type;
type Extras = #extras;
type Source = #source;
fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
#body
}
}
}
};
for id in regex_ids {
let fork = graph.fork_off(id);
root.merge(fork, &mut graph);
}
for rope in ropes {
root.merge(rope.into_fork(&mut graph), &mut graph);
}
while let Some(id) = root.miss.take() {
let fork = graph.fork_off(id);
if fork.branches().next().is_some() {
root.merge(fork, &mut graph);
} else {
break;
}
}
debug!("Checking if any two tokens have the same priority");
for &DisambiguationError(a, b) in graph.errors() {
let a = graph[a].unwrap_leaf();
let b = graph[b].unwrap_leaf();
let disambiguate = a.priority + 1;
let mut err = |a: &Leaf, b: &Leaf| {
parser.err(
format!(
"\
A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
\n\
hint: Consider giving one definition a higher priority: \
#[regex(..., priority = {disambiguate})]\
",
),
a.span
);
};
err(a, b);
err(b, a);
}
if let Some(errors) = parser.errors.render() {
return impl_logos(errors);
}
let root = graph.push(root);
graph.shake(root);
debug!("Generating code from graph:\n{graph:#?}");
let generator = Generator::new(name, &this, root, &graph);
let body = generator.generate();
impl_logos(quote! {
use #logos_path::internal::{LexerInternal, CallbackResult};
type Lexer<'s> = #logos_path::Lexer<'s, #this>;
fn _end<'s>(lex: &mut Lexer<'s>) {
lex.end()
}
fn _error<'s>(lex: &mut Lexer<'s>) {
lex.bump_unchecked(1);
lex.error();
}
#body
})
}
pub fn strip_attributes(input: TokenStream) -> TokenStream {
let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
strip_attrs_from_vec(&mut item.attrs);
for attr in &mut item.attrs {
if let syn::Meta::List(meta) = &mut attr.meta {
if meta.path.is_ident("derive") {
let mut tokens =
std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
while let Some(TokenTree::Ident(ident)) = tokens.next() {
let punct = tokens.next();
if ident == "Logos" {
continue;
}
meta.tokens.extend([TokenTree::Ident(ident)]);
meta.tokens.extend(punct);
}
}
}
}
for variant in &mut item.variants {
strip_attrs_from_vec(&mut variant.attrs);
for field in &mut variant.fields {
strip_attrs_from_vec(&mut field.attrs);
}
}
item.to_token_stream()
}
fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
attrs.retain(|attr| !is_logos_attr(attr))
}
fn is_logos_attr(attr: &syn::Attribute) -> bool {
attr.path().is_ident(LOGOS_ATTR)
|| attr.path().is_ident(TOKEN_ATTR)
|| attr.path().is_ident(REGEX_ATTR)
}
fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
let tts: Vec<TokenTree> = t.into_iter().collect();
if tts.len() != 1 {
tts.into_iter().collect()
} else {
match tts.into_iter().next().unwrap() {
TokenTree::Group(g) => {
if g.delimiter() == Delimiter::Parenthesis {
g.stream()
} else {
core::iter::once(TokenTree::Group(g)).collect()
}
}
tt => core::iter::once(tt).collect(),
}
}
}