wasmer_wit_parser/
lib.rs

1use anyhow::{anyhow, bail, Context, Result};
2use id_arena::{Arena, Id};
3use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag};
4use std::collections::{HashMap, HashSet};
5use std::fs;
6use std::path::{Path, PathBuf};
7
8pub mod abi;
9mod ast;
10mod sizealign;
11pub use sizealign::*;
12
13/// Checks if the given string is a legal identifier in wit.
14pub fn validate_id(s: &str) -> Result<()> {
15    ast::validate_id(0, s)?;
16    Ok(())
17}
18
19#[derive(Debug, Clone, Default, PartialEq)]
20pub struct Interface {
21    pub name: String,
22    /// The module name to use for bindings generation.
23    ///
24    /// If `None`, then the interface name will be used.
25    ///
26    /// If `Some`, then this value is used to format an export
27    /// name of `<module>#<name>` for exports or an import module
28    /// name of `<module>` for imports.
29    pub module: Option<String>,
30    pub types: Arena<TypeDef>,
31    pub type_lookup: HashMap<String, TypeId>,
32    pub resources: Arena<Resource>,
33    pub resource_lookup: HashMap<String, ResourceId>,
34    pub interfaces: Arena<Interface>,
35    pub interface_lookup: HashMap<String, InterfaceId>,
36    pub functions: Vec<Function>,
37    pub globals: Vec<Global>,
38}
39
40pub type TypeId = Id<TypeDef>;
41pub type ResourceId = Id<Resource>;
42pub type InterfaceId = Id<Interface>;
43
44#[derive(Debug, Clone, PartialEq)]
45pub struct TypeDef {
46    pub docs: Docs,
47    pub kind: TypeDefKind,
48    pub name: Option<String>,
49    /// `None` if this type is originally declared in this instance or
50    /// otherwise `Some` if it was originally defined in a different module.
51    pub foreign_module: Option<String>,
52}
53
54#[derive(Debug, Clone, PartialEq)]
55pub enum TypeDefKind {
56    Record(Record),
57    Flags(Flags),
58    Tuple(Tuple),
59    Variant(Variant),
60    Enum(Enum),
61    Option(Type),
62    Expected(Expected),
63    Union(Union),
64    List(Type),
65    Future(Type),
66    Stream(Stream),
67    Type(Type),
68}
69
70#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
71pub enum Type {
72    Unit,
73    Bool,
74    U8,
75    U16,
76    U32,
77    U64,
78    S8,
79    S16,
80    S32,
81    S64,
82    Float32,
83    Float64,
84    Char,
85    String,
86    Handle(ResourceId),
87    Id(TypeId),
88}
89
90#[derive(PartialEq, Debug, Copy, Clone)]
91pub enum Int {
92    U8,
93    U16,
94    U32,
95    U64,
96}
97
98#[derive(Debug, Clone, PartialEq)]
99pub struct Record {
100    pub fields: Vec<Field>,
101}
102
103#[derive(Debug, Clone, PartialEq)]
104pub struct Field {
105    pub docs: Docs,
106    pub name: String,
107    pub ty: Type,
108}
109
110#[derive(Debug, Clone, PartialEq)]
111pub struct Flags {
112    pub flags: Vec<Flag>,
113}
114
115#[derive(Debug, Clone, PartialEq)]
116pub struct Flag {
117    pub docs: Docs,
118    pub name: String,
119}
120
121#[derive(Debug)]
122pub enum FlagsRepr {
123    U8,
124    U16,
125    U32(usize),
126}
127
128impl Flags {
129    pub fn repr(&self) -> FlagsRepr {
130        match self.flags.len() {
131            n if n <= 8 => FlagsRepr::U8,
132            n if n <= 16 => FlagsRepr::U16,
133            n => FlagsRepr::U32(sizealign::align_to(n, 32) / 32),
134        }
135    }
136}
137
138impl FlagsRepr {
139    pub fn count(&self) -> usize {
140        match self {
141            FlagsRepr::U8 => 1,
142            FlagsRepr::U16 => 1,
143            FlagsRepr::U32(n) => *n,
144        }
145    }
146}
147
148#[derive(Debug, Clone, PartialEq)]
149pub struct Tuple {
150    pub types: Vec<Type>,
151}
152
153#[derive(Debug, Clone, PartialEq)]
154pub struct Variant {
155    pub cases: Vec<Case>,
156}
157
158#[derive(Debug, Clone, PartialEq)]
159pub struct Case {
160    pub docs: Docs,
161    pub name: String,
162    pub ty: Type,
163}
164
165impl Variant {
166    pub fn tag(&self) -> Int {
167        match self.cases.len() {
168            n if n <= u8::max_value() as usize => Int::U8,
169            n if n <= u16::max_value() as usize => Int::U16,
170            n if n <= u32::max_value() as usize => Int::U32,
171            _ => panic!("too many cases to fit in a repr"),
172        }
173    }
174}
175
176#[derive(Debug, Clone, PartialEq)]
177pub struct Enum {
178    pub cases: Vec<EnumCase>,
179}
180
181#[derive(Debug, Clone, PartialEq)]
182pub struct EnumCase {
183    pub docs: Docs,
184    pub name: String,
185}
186
187impl Enum {
188    pub fn tag(&self) -> Int {
189        match self.cases.len() {
190            n if n <= u8::max_value() as usize => Int::U8,
191            n if n <= u16::max_value() as usize => Int::U16,
192            n if n <= u32::max_value() as usize => Int::U32,
193            _ => panic!("too many cases to fit in a repr"),
194        }
195    }
196}
197
198#[derive(Debug, Clone, PartialEq)]
199pub struct Expected {
200    pub ok: Type,
201    pub err: Type,
202}
203
204#[derive(Debug, Clone, PartialEq)]
205pub struct Union {
206    pub cases: Vec<UnionCase>,
207}
208
209#[derive(Debug, Clone, PartialEq)]
210pub struct UnionCase {
211    pub docs: Docs,
212    pub ty: Type,
213}
214
215impl Union {
216    pub fn tag(&self) -> Int {
217        match self.cases.len() {
218            n if n <= u8::max_value() as usize => Int::U8,
219            n if n <= u16::max_value() as usize => Int::U16,
220            n if n <= u32::max_value() as usize => Int::U32,
221            _ => panic!("too many cases to fit in a repr"),
222        }
223    }
224}
225
226#[derive(Debug, Clone, PartialEq)]
227pub struct Stream {
228    pub element: Type,
229    pub end: Type,
230}
231
232#[derive(Clone, Default, Debug, PartialEq)]
233pub struct Docs {
234    pub contents: Option<String>,
235}
236
237#[derive(Debug, Clone, PartialEq)]
238pub struct Resource {
239    pub docs: Docs,
240    pub name: String,
241    pub supertype: Option<String>,
242    /// `None` if this resource is defined within the containing instance,
243    /// otherwise `Some` if it's defined in an instance named here.
244    pub foreign_module: Option<String>,
245}
246
247#[derive(Debug, Clone, PartialEq)]
248pub struct Global {
249    pub docs: Docs,
250    pub name: String,
251    pub ty: Type,
252}
253
254#[derive(Debug, Clone, PartialEq)]
255pub struct Function {
256    pub is_async: bool,
257    pub docs: Docs,
258    pub name: String,
259    pub kind: FunctionKind,
260    pub params: Vec<(String, Type)>,
261    pub result: Type,
262}
263
264#[derive(Debug, Clone, PartialEq)]
265pub enum FunctionKind {
266    Freestanding,
267    Static { resource: ResourceId, name: String },
268    Method { resource: ResourceId, name: String },
269}
270
271impl Function {
272    pub fn item_name(&self) -> &str {
273        match &self.kind {
274            FunctionKind::Freestanding => &self.name,
275            FunctionKind::Static { name, .. } => name,
276            FunctionKind::Method { name, .. } => name,
277        }
278    }
279}
280
281fn unwrap_md(contents: &str) -> String {
282    let mut wit = String::new();
283    let mut last_pos = 0;
284    let mut in_wit_code_block = false;
285    Parser::new_ext(contents, Options::empty())
286        .into_offset_iter()
287        .for_each(|(event, range)| match (event, range) {
288            (Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed("wit")))), _) => {
289                in_wit_code_block = true;
290            }
291            (Event::Text(text), range) if in_wit_code_block => {
292                // Ensure that offsets are correct by inserting newlines to
293                // cover the Markdown content outside of wit code blocks.
294                for _ in contents[last_pos..range.start].lines() {
295                    wit.push_str("\n");
296                }
297                wit.push_str(&text);
298                last_pos = range.end;
299            }
300            (Event::End(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed("wit")))), _) => {
301                in_wit_code_block = false;
302            }
303            _ => {}
304        });
305    wit
306}
307
308impl Interface {
309    pub fn parse(name: &str, input: &str) -> Result<Interface> {
310        Interface::parse_with(name, input, |f| {
311            Err(anyhow!("cannot load submodule `{}`", f))
312        })
313    }
314
315    pub fn parse_file(path: impl AsRef<Path>) -> Result<Interface> {
316        let path = path.as_ref();
317        let parent = path.parent().unwrap();
318        let contents = std::fs::read_to_string(&path)
319            .with_context(|| format!("failed to read: {}", path.display()))?;
320        Interface::parse_with(path, &contents, |path| load_fs(parent, path))
321    }
322
323    pub fn parse_with(
324        filename: impl AsRef<Path>,
325        contents: &str,
326        mut load: impl FnMut(&str) -> Result<(PathBuf, String)>,
327    ) -> Result<Interface> {
328        Interface::_parse_with(
329            filename.as_ref(),
330            contents,
331            &mut load,
332            &mut HashSet::new(),
333            &mut HashMap::new(),
334        )
335    }
336
337    fn _parse_with(
338        filename: &Path,
339        contents: &str,
340        load: &mut dyn FnMut(&str) -> Result<(PathBuf, String)>,
341        visiting: &mut HashSet<PathBuf>,
342        map: &mut HashMap<String, Interface>,
343    ) -> Result<Interface> {
344        let name = filename
345            .file_name()
346            .context("wit path must end in a file name")?
347            .to_str()
348            .context("wit filename must be valid unicode")?
349            // TODO: replace with `file_prefix` if/when that gets stabilized.
350            .split(".")
351            .next()
352            .unwrap();
353        let mut contents = contents;
354
355        // If we have a ".md" file, it's a wit file wrapped in a markdown file;
356        // parse the markdown to extract the `wit` code blocks.
357        let md_contents;
358        if filename.extension().and_then(|s| s.to_str()) == Some("md") {
359            md_contents = unwrap_md(contents);
360            contents = &md_contents[..];
361        }
362
363        // Parse the `contents `into an AST
364        let ast = match ast::Ast::parse(contents) {
365            Ok(ast) => ast,
366            Err(mut e) => {
367                let file = filename.display().to_string();
368                ast::rewrite_error(&mut e, &file, contents);
369                return Err(e);
370            }
371        };
372
373        // Load up any modules into our `map` that have not yet been parsed.
374        if !visiting.insert(filename.to_path_buf()) {
375            bail!("file `{}` recursively imports itself", filename.display())
376        }
377        for item in ast.items.iter() {
378            let u = match item {
379                ast::Item::Use(u) => u,
380                _ => continue,
381            };
382            if map.contains_key(&*u.from[0].name) {
383                continue;
384            }
385            let (filename, contents) = load(&u.from[0].name)
386                // TODO: insert context here about `u.name.span` and `filename`
387                ?;
388            let instance = Interface::_parse_with(&filename, &contents, load, visiting, map)?;
389            map.insert(u.from[0].name.to_string(), instance);
390        }
391        visiting.remove(filename);
392
393        // and finally resolve everything into our final instance
394        match ast.resolve(name, map) {
395            Ok(i) => Ok(i),
396            Err(mut e) => {
397                let file = filename.display().to_string();
398                ast::rewrite_error(&mut e, &file, contents);
399                Err(e)
400            }
401        }
402    }
403
404    pub fn topological_types(&self) -> Vec<TypeId> {
405        let mut ret = Vec::new();
406        let mut visited = HashSet::new();
407        for (id, _) in self.types.iter() {
408            self.topo_visit(id, &mut ret, &mut visited);
409        }
410        ret
411    }
412
413    fn topo_visit(&self, id: TypeId, list: &mut Vec<TypeId>, visited: &mut HashSet<TypeId>) {
414        if !visited.insert(id) {
415            return;
416        }
417        match &self.types[id].kind {
418            TypeDefKind::Flags(_) | TypeDefKind::Enum(_) => {}
419            TypeDefKind::Type(t) | TypeDefKind::List(t) => self.topo_visit_ty(t, list, visited),
420            TypeDefKind::Record(r) => {
421                for f in r.fields.iter() {
422                    self.topo_visit_ty(&f.ty, list, visited);
423                }
424            }
425            TypeDefKind::Tuple(t) => {
426                for t in t.types.iter() {
427                    self.topo_visit_ty(t, list, visited);
428                }
429            }
430            TypeDefKind::Variant(v) => {
431                for v in v.cases.iter() {
432                    self.topo_visit_ty(&v.ty, list, visited);
433                }
434            }
435            TypeDefKind::Option(ty) => self.topo_visit_ty(ty, list, visited),
436            TypeDefKind::Expected(e) => {
437                self.topo_visit_ty(&e.ok, list, visited);
438                self.topo_visit_ty(&e.err, list, visited);
439            }
440            TypeDefKind::Union(u) => {
441                for t in u.cases.iter() {
442                    self.topo_visit_ty(&t.ty, list, visited);
443                }
444            }
445            TypeDefKind::Future(ty) => {
446                self.topo_visit_ty(ty, list, visited);
447            }
448            TypeDefKind::Stream(s) => {
449                self.topo_visit_ty(&s.element, list, visited);
450                self.topo_visit_ty(&s.end, list, visited);
451            }
452        }
453        list.push(id);
454    }
455
456    fn topo_visit_ty(&self, ty: &Type, list: &mut Vec<TypeId>, visited: &mut HashSet<TypeId>) {
457        if let Type::Id(id) = ty {
458            self.topo_visit(*id, list, visited);
459        }
460    }
461
462    pub fn all_bits_valid(&self, ty: &Type) -> bool {
463        match ty {
464            Type::Unit
465            | Type::U8
466            | Type::S8
467            | Type::U16
468            | Type::S16
469            | Type::U32
470            | Type::S32
471            | Type::U64
472            | Type::S64
473            | Type::Float32
474            | Type::Float64 => true,
475
476            Type::Bool | Type::Char | Type::Handle(_) | Type::String => false,
477
478            Type::Id(id) => match &self.types[*id].kind {
479                TypeDefKind::List(_)
480                | TypeDefKind::Variant(_)
481                | TypeDefKind::Enum(_)
482                | TypeDefKind::Option(_)
483                | TypeDefKind::Expected(_)
484                | TypeDefKind::Future(_)
485                | TypeDefKind::Stream(_)
486                | TypeDefKind::Union(_) => false,
487                TypeDefKind::Type(t) => self.all_bits_valid(t),
488                TypeDefKind::Record(r) => r.fields.iter().all(|f| self.all_bits_valid(&f.ty)),
489                TypeDefKind::Tuple(t) => t.types.iter().all(|t| self.all_bits_valid(t)),
490
491                // FIXME: this could perhaps be `true` for multiples-of-32 but
492                // seems better to probably leave this as unconditionally
493                // `false` for now, may want to reconsider later?
494                TypeDefKind::Flags(_) => false,
495            },
496        }
497    }
498
499    pub fn get_variant(&self, ty: &Type) -> Option<&Variant> {
500        if let Type::Id(id) = ty {
501            match &self.types[*id].kind {
502                TypeDefKind::Variant(v) => Some(v),
503                _ => None,
504            }
505        } else {
506            None
507        }
508    }
509}
510
511fn load_fs(root: &Path, name: &str) -> Result<(PathBuf, String)> {
512    let wit = root.join(name).with_extension("wit");
513
514    // Attempt to read a ".wit" file.
515    match fs::read_to_string(&wit) {
516        Ok(contents) => Ok((wit, contents)),
517
518        // If no such file was found, attempt to read a ".wit.md" file.
519        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
520            let wit_md = wit.with_extension("wit.md");
521            match fs::read_to_string(&wit_md) {
522                Ok(contents) => Ok((wit_md, contents)),
523                Err(_err) => Err(err.into()),
524            }
525        }
526
527        Err(err) => return Err(err.into()),
528    }
529}