From 7a721f586db862c1db42e45a10a162d361e2649f Mon Sep 17 00:00:00 2001 From: nothendev Date: Thu, 4 May 2023 15:44:49 +0300 Subject: [PATCH] normal syntax tree i guess --- programs/aidl/Cargo.toml | 3 +- programs/aidl/assets/core.idl | 6 +- programs/aidl/assets/vfs.idl | 7 +- programs/aidl/assets/why.idl | 3 + programs/aidl/src/ast.rs | 47 ++++++-- programs/aidl/src/lexer.rs | 20 ++-- programs/aidl/src/main.rs | 11 +- programs/aidl/src/parser.rs | 194 ++++++++++++++++++++++------------ 8 files changed, 197 insertions(+), 94 deletions(-) create mode 100644 programs/aidl/assets/why.idl diff --git a/programs/aidl/Cargo.toml b/programs/aidl/Cargo.toml index 5596c9f..abea912 100644 --- a/programs/aidl/Cargo.toml +++ b/programs/aidl/Cargo.toml @@ -6,4 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -logos = "0.13.0" +logos = "0" +thiserror = "1" diff --git a/programs/aidl/assets/core.idl b/programs/aidl/assets/core.idl index eaebf0c..2049a70 100644 --- a/programs/aidl/assets/core.idl +++ b/programs/aidl/assets/core.idl @@ -1,7 +1,7 @@ Type Byte = U8; Type String = Vector; -Enumurate Boolean{ +Enumurate Boolean { False = 0, True = 1, } @@ -11,10 +11,8 @@ Union Option{ Some } - - Structure Version { major: Byte, minor: Byte, patch: Byte, -}; \ No newline at end of file +}; diff --git a/programs/aidl/assets/vfs.idl b/programs/aidl/assets/vfs.idl index 3a3648b..7b97604 100644 --- a/programs/aidl/assets/vfs.idl +++ b/programs/aidl/assets/vfs.idl @@ -1,19 +1,20 @@ +// core provides lots of useful types like String and Byte Use core; -Constant VERSION Version{ +Constant VERSION Version { major: 1, minor: 0, patch: 0, } -Type Path = String; +Alias Path = String; Structure File { name: String, data: Vector, } -Interface File{ +Interface File { function new accepts(Path) returns(None); // Open in this iteration assumes the file exists diff --git a/programs/aidl/assets/why.idl b/programs/aidl/assets/why.idl new file mode 100644 index 0000000..c6edc6d --- /dev/null +++ b/programs/aidl/assets/why.idl @@ -0,0 +1,3 @@ +Use core; + +Alias Thing = Byte; diff --git a/programs/aidl/src/ast.rs b/programs/aidl/src/ast.rs index eb2290a..db56496 100644 --- a/programs/aidl/src/ast.rs +++ b/programs/aidl/src/ast.rs @@ -10,34 +10,67 @@ pub struct IDLModule { // why: only allow use before other items // parser will error if use is present in any other place pub uses: Vec, - pub items: Vec + pub items: Vec, } #[derive(Debug)] pub enum Item { Interface(ItemInterface), - Type(ItemType) + Type(ItemAlias), + Constant(ItemConstant), } #[derive(Debug)] pub struct Function { pub name: String, - } #[derive(Debug)] pub struct ItemInterface { pub name: String, - pub functions: Vec + pub functions: Vec, } #[derive(Debug)] -pub struct ItemType { +pub struct ItemAlias { + pub name: String, + pub referree: String, +} + +#[derive(Debug)] +pub struct ItemConstant { pub name: String, - pub referree: String } #[derive(Debug)] pub struct UseDecl { - pub module: String + pub module: String, +} + +#[derive(Debug)] +pub enum Expr { + Literal(Literal), +} + +#[derive(Debug)] +pub enum Literal { + String(String), + Number(NumberLiteral), +} + +#[derive(Debug)] +pub enum NumberLiteral { + Ptr(usize), + + U8(u8), + I8(i8), + + U16(u16), + I16(i16), + + U32(u32), + I32(i32), + + U64(u64), + I64(i64), } diff --git a/programs/aidl/src/lexer.rs b/programs/aidl/src/lexer.rs index c1d068f..55806e4 100644 --- a/programs/aidl/src/lexer.rs +++ b/programs/aidl/src/lexer.rs @@ -1,4 +1,4 @@ -use std::ops::{Range, Add}; +use std::{ops::{Range, Add}, fmt::Display}; use logos::Logos; @@ -35,9 +35,6 @@ pub enum Token { #[token("=")] Equals, - //#[regex(r#"[A-z]+"#, |lex| lex.slice().parse().ok())] - //Literal(String), - #[regex(r#"[A-z]+"#, |lex| Ident::lexer(lex.slice()).next().and_then(Result::ok))] Ident(Ident), @@ -64,8 +61,8 @@ pub enum Ident { Constant, #[token("Structure")] Structure, - #[token("Type")] - Type, + #[token("Alias")] + Alias, #[token("Use")] Use, #[regex(r"[A-z]+", |lex| lex.slice().parse().ok())] @@ -90,6 +87,11 @@ impl Span { Self(min(self.lower(), other.lower())..max(self.upper(), other.upper())) } } +impl Display for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..{}", self.lower(), self.upper()) + } +} impl Add for Span { type Output = Self; @@ -109,3 +111,9 @@ impl Spanned { Spanned(f(self.0), self.1) } } + +impl Display for Spanned { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} @ {}", self.0, self.1) + } +} diff --git a/programs/aidl/src/main.rs b/programs/aidl/src/main.rs index e75f71e..1ddd593 100644 --- a/programs/aidl/src/main.rs +++ b/programs/aidl/src/main.rs @@ -1,18 +1,19 @@ #![allow(non_snake_case)] -use logos::Logos; use parser::Parser; -use crate::lexer::Token; mod ast; mod lexer; mod parser; -const VFS: &str = include_str!("../assets/vfs.idl"); +const TEST: &str = include_str!("../assets/why.idl"); fn main() { - dbg!(Token::lexer(VFS).for_each(|a| println!("{:#?}", a))); - dbg!(Parser::new(VFS).parse()); + let res = Parser::new(TEST).parse(); + match res { + Ok(ast) => { dbg!(ast); } + Err(e) => println!("{}", e) + } } #[macro_export] diff --git a/programs/aidl/src/parser.rs b/programs/aidl/src/parser.rs index 71ea971..81f19eb 100644 --- a/programs/aidl/src/parser.rs +++ b/programs/aidl/src/parser.rs @@ -1,53 +1,56 @@ -use logos::{Logos, SpannedIter}; +use logos::{Lexer, Logos, SpannedIter}; use crate::{ - ast::{IDLModule, Item, ItemInterface, ItemType, UseDecl}, - lexer::{Span, Spanned, Token, Ident}, -}; -use std::{ - iter::{Iterator, Peekable, Filter}, - ops::Range, + ast::{IDLModule, Item, ItemAlias, ItemConstant, ItemInterface, UseDecl}, + lexer::{Ident, Span, Spanned, Token}, }; +use std::iter::{Filter, Iterator, Peekable}; -type Wtf<'a> = Peekable< - Filter, Box, Range)) -> bool>>, - >; +type Wtf<'a> = Peekable, Box) -> bool>>>; struct TokenIterator<'a> { - spanned: Wtf<'a>, + lexer: Lexer<'a, Token>, + peeked: Option>, } -fn token_is_not_comment((ref a, ..): &(Result, Range)) -> bool { +fn token_is_not_comment(a: &Result) -> bool { !matches!(a, Err(_) | Ok(Token::Comment(..))) } impl<'a> TokenIterator<'a> { pub fn new(src: &'a str) -> Self { - let spanned = Token::lexer(src) - .spanned() - .filter( - Box::new(token_is_not_comment) as Box, Range)) -> bool>, - ) - .peekable(); + let lexer = Token::lexer(src); - Self { spanned } + Self { + lexer, + peeked: None, + } } - pub fn next(&mut self) -> Option> { - let nxt = self - .spanned - .next() - .and_then(|(token, span)| Some(Spanned(token.ok()?, Span(span)))); + pub fn next(&mut self) -> Result, ParserError> { + let n = match self.peeked.take() { + Some(thing) => thing, + None => self.lexer.find(token_is_not_comment).and_then(Result::ok), + }; + let nxt = n.map(|token| Spanned(token, Span(self.lexer.span()))); println!("[NEXT] {:#?}", nxt); - nxt + nxt.ok_or(ParserError::UnexpectedEOF) } - pub fn peek(&mut self) -> Option> { - let peek = self - .spanned - .peek() - .and_then(|(token, span)| Some(Spanned(token.as_ref().ok()?, Span(span.clone())))); + fn _peek(&mut self) -> Option<&Token> { + self.peeked + .get_or_insert_with(|| self.lexer.find(token_is_not_comment).and_then(Result::ok)) + .as_ref() + } + + pub fn peek(&mut self) -> Result, ParserError> { + let span = Span(self.lexer.span()); + let peek = self._peek().map(|token| Spanned(token, span)); println!("[PEEK] {:#?}", peek); - peek + peek.ok_or(ParserError::UnexpectedEOF) + } + + pub fn current(&self) -> Spanned { + Spanned(self.lexer.slice().to_owned(), Span(self.lexer.span())) } } @@ -62,32 +65,44 @@ impl<'a> Parser<'a> { } } - fn get_real(&mut self, matcher: impl Fn(&Token) -> bool) -> Option> { + fn get_real( + &mut self, + matcher: impl Fn(&Token) -> bool, + expected: &'static str, + ) -> Result, ParserError> { if matcher(self.tokens.peek()?.0) { self.tokens.next() } else { - None + Err(self.unexpected(expected)) } } - fn semi(&mut self) -> Option { - Some(self.get_real(|token| matches!(token, Token::Semicolon))?.1) + fn unexpected(&self, expected: &'static str) -> ParserError { + ParserError::Unexpected(expected.to_owned(), self.tokens.current()) } - fn ask_ident(&mut self) -> Option> { - Some(crate::unwrap_match!( - self.get_real(|token| matches!(token, Token::Ident(Ident::Other(_))))?, + fn semi(&mut self) -> Result { + Ok(self + .get_real(|token| matches!(token, Token::Semicolon), "a semicolon")? + .1) + } + + fn ask_ident(&mut self) -> Result, ParserError> { + Ok(crate::unwrap_match!( + self.get_real(|token| matches!(token, Token::Ident(Ident::Other(_))), "an identifier")?, Spanned(Token::Ident(Ident::Other(ident)), span) => Spanned(ident, span) )) } - fn ask_interface(&mut self) -> Option> { - let Spanned(_, kSp) = - self.get_real(|token| matches!(token, Token::Ident(Ident::Interface)))?; + fn _ask_interface(&mut self) -> Result, ParserError> { + let Spanned(_, kSp) = self.get_real( + |token| matches!(token, Token::Ident(Ident::Interface)), + "`Interface`", + )?; let Spanned(ident, iSp) = self.ask_ident()?; - Some(Spanned::new( + Ok(Spanned::new( ItemInterface { name: ident, functions: vec![], @@ -96,55 +111,98 @@ impl<'a> Parser<'a> { )) } - fn ask_typealias(&mut self) -> Option> { - let Spanned(_, kSp) = - self.get_real(|token| matches!(token, Token::Ident(Ident::Type)))?; + fn ask_alias(&mut self) -> Result, ParserError> { + let Spanned(_, kSp) = self.get_real( + |token| matches!(token, Token::Ident(Ident::Alias)), + "`Alias`", + )?; let Spanned(name, nSp) = self.ask_ident()?; - let Spanned(_, eqSp) = self.get_real(|token| matches!(token, Token::Equals))?; + let Spanned(_, eqSp) = self.get_real(|token| matches!(token, Token::Equals), "`=`")?; let Spanned(referree, rSp) = self.ask_ident()?; - Some(Spanned::new( - ItemType { name, referree }, + Ok(Spanned::new( + ItemAlias { name, referree }, [kSp, nSp, eqSp, rSp, self.semi()?], )) } - fn ask_item(&mut self) -> Option> { - Some(match self.tokens.peek()?.0 { - Token::Ident(Ident::Other(_)) => None?, + fn ask_constant(&mut self) -> Result, ParserError> { + Err(self.unexpected("")) + } + + fn ask_item(&mut self) -> Result, ParserError> { + Ok(match self.tokens.peek()?.0 { + Token::Ident(Ident::Other(_)) => { + Err(self.unexpected("a keyword, not just an identifier"))? + } Token::Ident(keyword) => match keyword { - Ident::Interface => self.ask_interface()?.map(Item::Interface), - Ident::Type => self.ask_typealias()?.map(Item::Type), - _ => None?, + //Ident::Interface => self.ask_interface()?.map(Item::Interface), + Ident::Alias => self.ask_alias()?.map(Item::Type), + Ident::Constant => self.ask_constant()?.map(Item::Constant), + _ => Err(self.unexpected("`Alias` or `Constant`"))?, }, - _ => None?, + _ => Err(self.unexpected("a keyword"))?, }) } - fn ask_use(&mut self) -> Option> { - let Spanned(_, kSp) = - self.get_real(|token| matches!(token, Token::Ident(Ident::Use)))?; + fn ask_use(&mut self) -> Result, ParserError> { + let Spanned(_, kSp) = { + match self.tokens.peek()? { + Spanned(Token::Ident(Ident::Use), _) => Ok(self.tokens.next()?), + _ => Err(ParserError::PleaseStopParsingUse), + } + }?; let Spanned(name, nSp) = self.ask_ident()?; - Some(Spanned::new( + Ok(Spanned::new( UseDecl { module: name }, [kSp, nSp, self.semi()?], )) } - pub fn parse(mut self) -> IDLModule { - IDLModule { - uses: fill_while(|| self.ask_use()), - items: fill_while(|| self.ask_item()), - } + pub fn parse(mut self) -> Result { + Ok(IDLModule { + uses: { + let mut real = vec![]; + loop { + let r = self.ask_use(); + match r { + Ok(Spanned(a, _)) => real.push(a), + Err(ParserError::UnexpectedEOF) => return Err(ParserError::UnexpectedEOF), + Err(ParserError::PleaseStopParsingUse) => break, + Err(unexpected @ ParserError::Unexpected(..)) => return Err(unexpected), + } + } + Ok(real) + }?, + items: fill_while(|| self.ask_item())?, + }) } } -fn fill_while(mut f: impl FnMut() -> Option>) -> Vec { +fn fill_while( + mut f: impl FnMut() -> Result, ParserError>, +) -> Result, ParserError> { let mut real = vec![]; - while let Some(Spanned(t, _)) = f() { - real.push(t); + loop { + match f() { + Ok(Spanned(next, _)) => real.push(next), + Err(unexpected @ ParserError::Unexpected(..)) => return Err(unexpected), + Err(ParserError::UnexpectedEOF) => break, + Err(ParserError::PleaseStopParsingUse) => unreachable!(), + } } - real + Ok(real) +} + +#[derive(thiserror::Error, Debug)] +pub enum ParserError { + // expected, got + #[error("Unexpected `{_1}`, expected {_0}")] + Unexpected(String, Spanned), + #[error("Unexpected end of file")] + UnexpectedEOF, + #[error("please stop.")] + PleaseStopParsingUse, }