From b0020ff838bb0ce614e674ee18b07bc91a4875ea Mon Sep 17 00:00:00 2001 From: nothendev Date: Thu, 4 May 2023 14:19:32 +0300 Subject: [PATCH] aidl commit almost mvp --- programs/aidl/assets/vfs.idl | 5 +- programs/aidl/src/ast.rs | 43 ++++++++++ programs/aidl/src/lexer.rs | 111 ++++++++++++++++++++++++++ programs/aidl/src/main.rs | 76 +++++------------- programs/aidl/src/parser.rs | 150 +++++++++++++++++++++++++++++++++++ rust-toolchain | 1 - rust-toolchain.toml | 3 + 7 files changed, 329 insertions(+), 60 deletions(-) create mode 100644 programs/aidl/src/ast.rs create mode 100644 programs/aidl/src/lexer.rs create mode 100644 programs/aidl/src/parser.rs delete mode 100644 rust-toolchain create mode 100644 rust-toolchain.toml diff --git a/programs/aidl/assets/vfs.idl b/programs/aidl/assets/vfs.idl index c766843..3a3648b 100644 --- a/programs/aidl/assets/vfs.idl +++ b/programs/aidl/assets/vfs.idl @@ -1,5 +1,4 @@ -// core provides lots of useful types like String and Byte -use core; +Use core; Constant VERSION Version{ major: 1, @@ -21,4 +20,4 @@ Interface File{ function open accepts(Path) returns(File); function close accepts(File) returns(None); -} \ No newline at end of file +} diff --git a/programs/aidl/src/ast.rs b/programs/aidl/src/ast.rs new file mode 100644 index 0000000..eb2290a --- /dev/null +++ b/programs/aidl/src/ast.rs @@ -0,0 +1,43 @@ +//! **note** the order of fields is the order of parsing. + +/// An IDL module. +/// +/// Parsing order: +/// - use declarations, +/// - items +#[derive(Debug)] +pub struct IDLModule { + // why: only allow use before other items + // parser will error if use is present in any other place + pub uses: Vec, + pub items: Vec +} + +#[derive(Debug)] +pub enum Item { + Interface(ItemInterface), + Type(ItemType) +} + +#[derive(Debug)] +pub struct Function { + pub name: String, + +} + +#[derive(Debug)] +pub struct ItemInterface { + pub name: String, + pub functions: Vec +} + +#[derive(Debug)] +pub struct ItemType { + pub name: String, + pub referree: String +} + +#[derive(Debug)] +pub struct UseDecl { + pub module: String +} diff --git a/programs/aidl/src/lexer.rs b/programs/aidl/src/lexer.rs new file mode 100644 index 0000000..c1d068f --- /dev/null +++ b/programs/aidl/src/lexer.rs @@ -0,0 +1,111 @@ +use std::ops::{Range, Add}; + +use logos::Logos; + +#[derive(Logos, Debug, PartialEq)] +#[logos(skip r"[ \t\n\f]+")] +pub enum Token { + #[token("{")] + LeftBrace, + + #[token("}")] + RightBrace, + + #[token("(")] + LeftParen, + + #[token(")")] + RightParen, + + #[token(";")] + Semicolon, + + #[token(":")] + Colon, + + #[token("<")] + LeftArrow, + + #[token(">")] + RightArrow, + + #[token(",")] + Comma, + + #[token("=")] + Equals, + + //#[regex(r#"[A-z]+"#, |lex| lex.slice().parse().ok())] + //Literal(String), + + #[regex(r#"[A-z]+"#, |lex| Ident::lexer(lex.slice()).next().and_then(Result::ok))] + Ident(Ident), + + #[regex("use [a-zA-Z/]+;", |lex| lex.slice().parse().ok())] + Component(String), + + #[regex("U[0-9]+", |lex| lex.slice().parse().ok())] + UnsignedType(String), + + #[regex("I[0-9]+", |lex| lex.slice().parse().ok())] + SignedType(String), + + #[regex(r"//.*", |lex| lex.slice().parse().ok())] + Comment(String), +} + +#[derive(Logos, Debug, PartialEq, Eq)] +pub enum Ident { + #[token("Interface")] + Interface, + #[token("Function")] + Function, + #[token("Constant")] + Constant, + #[token("Structure")] + Structure, + #[token("Type")] + Type, + #[token("Use")] + Use, + #[regex(r"[A-z]+", |lex| lex.slice().parse().ok())] + Other(String) +} + +#[derive(Debug, Clone)] +pub struct Span(pub Range); +impl Span { + pub const ZERO: Self = Self(0..0); + + pub fn lower(&self) -> usize { + self.0.start + } + pub fn upper(&self) -> usize { + self.0.end + } + + pub fn concat(self, other: Span) -> Self { + use std::cmp::{min, max}; + + Self(min(self.lower(), other.lower())..max(self.upper(), other.upper())) + } +} +impl Add for Span { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + self.concat(rhs) + } +} + +#[derive(Debug, Clone)] +pub struct Spanned(pub T, pub Span); + +impl Spanned { + pub fn new(thing: T, spans: [Span; N]) -> Self { + Self(thing, spans.into_iter().fold(Span::ZERO, Span::concat)) + } + pub fn map(self, f: impl Fn(T) -> R) -> Spanned { + Spanned(f(self.0), self.1) + } +} diff --git a/programs/aidl/src/main.rs b/programs/aidl/src/main.rs index c3e2ccc..e75f71e 100644 --- a/programs/aidl/src/main.rs +++ b/programs/aidl/src/main.rs @@ -1,62 +1,26 @@ +#![allow(non_snake_case)] + use logos::Logos; +use parser::Parser; -#[derive(Logos, Debug, PartialEq)] -#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens -enum Token { - #[token("{")] - LeftBrace, +use crate::lexer::Token; +mod ast; +mod lexer; +mod parser; - #[token("}")] - RightBrace, - - #[token("(")] - LeftParen, - - #[token(")")] - RightParen, - - #[token(";")] - Semicolon, - - #[token(":")] - Colon, - - #[token("<")] - LeftArrow, - - #[token(">")] - RightArrow, - - #[token(",")] - Comma, - - #[token("=")] - Equals, - - #[regex(r#"[A-z]+"#, |lex| lex.slice().parse().ok())] - Literal(String), - - #[regex("use [a-zA-Z/]+;", |lex| lex.slice().parse().ok())] - Component(String), - - #[regex("U[0-9]+", |lex| lex.slice().parse().ok())] - UnsignedType(String), - - #[regex("I[0-9]+", |lex| lex.slice().parse().ok())] - SignedType(String), - - #[regex(r"//[ a-zA-Z!-+]+", |lex| lex.slice().parse().ok())] - Comment(String), -} +const VFS: &str = include_str!("../assets/vfs.idl"); fn main() { - let mut lex = Token::lexer(include_str!("../../../programs/aidl/assets/vfs.idl")); - - for token in lex { - // let ok_token = token.ok(); - // if ok_token.is_some() { - // println!("{:?}", ok_token.unwrap()); - // } - println!("{:?}", token); - } + dbg!(Token::lexer(VFS).for_each(|a| println!("{:#?}", a))); + dbg!(Parser::new(VFS).parse()); +} + +#[macro_export] +macro_rules! unwrap_match { + ($x:expr, $m:pat => $a:expr) => { + match $x { + $m => $a, + _ => unreachable!() + } + }; } diff --git a/programs/aidl/src/parser.rs b/programs/aidl/src/parser.rs new file mode 100644 index 0000000..71ea971 --- /dev/null +++ b/programs/aidl/src/parser.rs @@ -0,0 +1,150 @@ +use logos::{Logos, SpannedIter}; + +use crate::{ + ast::{IDLModule, Item, ItemInterface, ItemType, UseDecl}, + lexer::{Span, Spanned, Token, Ident}, +}; +use std::{ + iter::{Iterator, Peekable, Filter}, + ops::Range, +}; + +type Wtf<'a> = Peekable< + Filter, Box, Range)) -> bool>>, + >; + +struct TokenIterator<'a> { + spanned: Wtf<'a>, +} + +fn token_is_not_comment((ref a, ..): &(Result, Range)) -> bool { + !matches!(a, Err(_) | Ok(Token::Comment(..))) +} + +impl<'a> TokenIterator<'a> { + pub fn new(src: &'a str) -> Self { + let spanned = Token::lexer(src) + .spanned() + .filter( + Box::new(token_is_not_comment) as Box, Range)) -> bool>, + ) + .peekable(); + + Self { spanned } + } + pub fn next(&mut self) -> Option> { + let nxt = self + .spanned + .next() + .and_then(|(token, span)| Some(Spanned(token.ok()?, Span(span)))); + println!("[NEXT] {:#?}", nxt); + nxt + } + + pub fn peek(&mut self) -> Option> { + let peek = self + .spanned + .peek() + .and_then(|(token, span)| Some(Spanned(token.as_ref().ok()?, Span(span.clone())))); + println!("[PEEK] {:#?}", peek); + peek + } +} + +pub struct Parser<'a> { + tokens: TokenIterator<'a>, +} + +impl<'a> Parser<'a> { + pub fn new(src: &'a str) -> Self { + Self { + tokens: TokenIterator::new(src), + } + } + + fn get_real(&mut self, matcher: impl Fn(&Token) -> bool) -> Option> { + if matcher(self.tokens.peek()?.0) { + self.tokens.next() + } else { + None + } + } + + fn semi(&mut self) -> Option { + Some(self.get_real(|token| matches!(token, Token::Semicolon))?.1) + } + + fn ask_ident(&mut self) -> Option> { + Some(crate::unwrap_match!( + self.get_real(|token| matches!(token, Token::Ident(Ident::Other(_))))?, + Spanned(Token::Ident(Ident::Other(ident)), span) => + Spanned(ident, span) + )) + } + + fn ask_interface(&mut self) -> Option> { + let Spanned(_, kSp) = + self.get_real(|token| matches!(token, Token::Ident(Ident::Interface)))?; + let Spanned(ident, iSp) = self.ask_ident()?; + + Some(Spanned::new( + ItemInterface { + name: ident, + functions: vec![], + }, + [kSp, iSp, self.semi()?], + )) + } + + fn ask_typealias(&mut self) -> Option> { + let Spanned(_, kSp) = + self.get_real(|token| matches!(token, Token::Ident(Ident::Type)))?; + let Spanned(name, nSp) = self.ask_ident()?; + + let Spanned(_, eqSp) = self.get_real(|token| matches!(token, Token::Equals))?; + let Spanned(referree, rSp) = self.ask_ident()?; + + Some(Spanned::new( + ItemType { name, referree }, + [kSp, nSp, eqSp, rSp, self.semi()?], + )) + } + + fn ask_item(&mut self) -> Option> { + Some(match self.tokens.peek()?.0 { + Token::Ident(Ident::Other(_)) => None?, + Token::Ident(keyword) => match keyword { + Ident::Interface => self.ask_interface()?.map(Item::Interface), + Ident::Type => self.ask_typealias()?.map(Item::Type), + _ => None?, + }, + _ => None?, + }) + } + + fn ask_use(&mut self) -> Option> { + let Spanned(_, kSp) = + self.get_real(|token| matches!(token, Token::Ident(Ident::Use)))?; + let Spanned(name, nSp) = self.ask_ident()?; + + Some(Spanned::new( + UseDecl { module: name }, + [kSp, nSp, self.semi()?], + )) + } + + pub fn parse(mut self) -> IDLModule { + IDLModule { + uses: fill_while(|| self.ask_use()), + items: fill_while(|| self.ask_item()), + } + } +} + +fn fill_while(mut f: impl FnMut() -> Option>) -> Vec { + let mut real = vec![]; + while let Some(Spanned(t, _)) = f() { + real.push(t); + } + real +} diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 07ade69..0000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -nightly \ No newline at end of file diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..f11a1d4 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly" +components = ["cargo", "clippy", "rustfmt", "rust-analyzer"]