diff --git a/Cargo.lock b/Cargo.lock index ab1d19f..e9a3e91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,26 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "literify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd552332051e9b3db140d34a371dcc0ed378b72a9227b5273070af58ea34abf4" +dependencies = [ + "litrs", + "proc-macro2", + "quote", +] + +[[package]] +name = "litrs" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f17c3668f3cc1132437cdadc93dab05e52d592f06948d3f64828430c36e4a70" +dependencies = [ + "proc-macro2", +] + [[package]] name = "logos" version = "0.13.0" @@ -93,6 +113,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "proc-macro2" version = "1.0.67" @@ -123,7 +149,9 @@ version = "0.1.0" dependencies = [ "bumpalo", "lasso", + "literify", "logos", + "paste", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a59dd2c..5ed8fcb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,8 @@ version = "0.1.0" edition = "2021" [dependencies] -bumpalo = { version = "3", features = ["collections"] } -lasso = "0.7" -logos = "0.13" +bumpalo = { version = "3", features = ["collections"] } +lasso = "0.7" +literify = "0.2" +logos = "0.13" +paste = "1.0" diff --git a/src/main.rs b/src/main.rs index e25511a..a567cc9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod syntax; mod utils; +use bumpalo::Bump; use std::io::{stdin, Read}; use utils::default; @@ -10,5 +11,7 @@ fn main() -> Result<(), Box> { let mut buf = default(); stdin().read_to_string(&mut buf)?; + let arena = Bump::new(); + println!("{:?}", syntax::parser::parse(&buf, &arena)); Ok(()) } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 8a80787..7a9c4f3 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -44,15 +44,10 @@ pub enum Expr<'a> { Call(ExprRef<'a>, ExprList<'a>), Binary(Spanned, ExprRef<'a>, ExprRef<'a>), Unary(Spanned, ExprRef<'a>), - Def { - kind: DefKind, - ident: Spanned, - ty: Option>, - init: Option>, - }, Set(ExprRef<'a>, ExprRef<'a>), Loop(ExprList<'a>), Block(ExprList<'a>), + Definition(Definition<'a>), Switch { on: ExprRef<'a>, branches: &'a [(Spanned, ExprRef<'a>)], @@ -62,15 +57,25 @@ pub enum Expr<'a> { branches: &'a [(ExprRef<'a>, ExprRef<'a>)], else_: ExprRef<'a>, }, + Break(Option>), + Return(Option>), + Continue, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Definition<'a> { + Binding { + kind: DefKind, + ident: Spanned, + ty: Option>, + init: Option>, + }, Func { ident: Spanned, params: &'a [(Spanned, Spanned)], ret: Spanned, body: ExprList<'a>, }, - Break(Option>), - Return(Option>), - Continue, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 846ab01..716da41 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,2 +1,3 @@ pub mod ast; +pub mod parser; pub mod token; diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs new file mode 100644 index 0000000..d2fd621 --- /dev/null +++ b/src/syntax/parser.rs @@ -0,0 +1,63 @@ +use bumpalo::Bump; +use logos::Logos; + +use super::{ + ast::{Definition, Spanned}, + token::Token, +}; + +type Lexer<'a> = logos::Lexer<'a, Token>; + +macro_rules! extract { + ( + $self:expr, + $(pat:pat),* $(,)? + ) => { + + }; +} + +struct Parser<'a, 'l> { + arena: &'a Bump, + lexer: Lexer<'l>, +} + +impl<'a, 'l> Parser<'a, 'l> { + /// Poll next token + fn next(&mut self) -> Result { + match self.lexer.next() { + Some(Ok(token)) => Ok(token), + Some(Err(())) => Err(ErrorKind::InvalidToken), + None => Err(ErrorKind::UnexpectedEnd), + } + .map_err(|k| Spanned::new(k, self.lexer.span())) + } + + /// Form an error + #[inline] + fn error(&self, kind: ErrorKind) -> Error { + Spanned::new(kind, self.lexer.span()) + } + + /// Parse everything or DIE! + fn run(self) -> Result<&'a [Definition<'a>]> { + Ok(self.arena.alloc_slice_copy(&[])) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ErrorKind { + InvalidToken, + UnexpectedEnd, +} + +pub type Error = Spanned; +type Result = std::result::Result; + +pub fn parse<'a>(code: &str, arena: &'a Bump) -> Result<&'a [Definition<'a>]> { + Parser { + arena, + lexer: Token::lexer(code), + } + .run() +} diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 5f44839..8ae2f5e 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -6,53 +6,72 @@ pub struct Extras { pub interner: Rodeo, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] -#[logos(extras = Extras)] -#[logos(skip r"[ \t\n\f]+")] -#[logos(skip r"\\.*")] -#[rustfmt::skip] -pub enum Token { - #[token("(")] LeftParen, - #[token(")")] RightParen, - #[token("{")] LeftCurly, - #[token("}")] RightCurly, - #[token(".")] Dot, - #[token(",")] Comma, - #[token(":")] Colon, - #[token(";")] Semicolon, +macro_rules! token_def { + ( + unit { $($u_name:ident : $($u_tok:literal),* $(,)?;)* } + keyword { $($kw:tt),* $(,)* } + else { $($e_tt:tt)* } + ) => { + literify::literify! (paste::paste! { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] + #[logos(extras = Extras)] + #[logos(skip r"[ \t\n\f]+")] + #[logos(skip r"\\.*")] + pub enum Token { + $( + $(#[token($u_tok)])* + $u_name, + )* - #[token("←")] //______ - #[token("<-")] LArrow, - #[token("→")] //______ - #[token("->")] RArrow, + $( + #[token(~($kw))] + [<$kw:camel>], + )* - #[token("+")] Plus, - #[token("-")] Minus, - #[token("*")] Star, - #[token("/")] Slash, + $($e_tt)* + } - #[token("=")] Equ, - #[token("≠")] //___ - #[token("/=")] Neq, - #[token("<")] Lt, - #[token(">")] Gt, - #[token("≤")] //____ - #[token("<=")] LtEq, - #[token("≥")] //____ - #[token(">=")] GtEq, + macro_rules! T { + $($( + ($u_tok) => { $crate::syntax::token::Token::$u_name }; + )*)* + } + }); + }; +} - #[token("func")] Func, - #[token("var")] Var, - #[token("const")] Const, - #[token("include")] Include, - #[token("switch")] Switch, - #[token("loop")] Loop, - #[token("return")] Return, - #[token("break")] Break, - #[token("continue")] Continue, - #[token("uninit")] Uninit, - #[token("asm")] Asm, +token_def!( + unit { + LeftParen : "("; + RightParen: ")"; + LeftCurly : "{"; + RightCurly: "}"; + Dot : "."; + Comma : ","; + Colon : ":"; + Semicolon : ";"; + LArrow: "←", "<-"; + RArrow: "→", "->"; + + Plus : "+"; + Minus : "-"; + Star : "*"; + Slash : "/"; + Precent: "%"; + + Equ : "="; + Neq : "≠", "/="; + Lt : "<"; + Gt : ">"; + LtEq : "≤", "<="; + GtEq : "≥", ">="; + } + + keyword { func, var, const, include, switch, loop, + return, break, continue, uninit, asm } + + else { #[regex( r"\p{XID_Start}\p{XID_Continue}*", |l| intern(l, l.slice()), @@ -70,7 +89,10 @@ pub enum Token { "[0-9]+", |l| l.slice().parse::().ok() )] Int(u64), -} + } +); + +pub(crate) use T; fn intern(lexer: &mut Lexer<'_, Token>, s: &str) -> Spur { lexer.extras.interner.get_or_intern(s)