From 47d44dcd0473fb3cb48184f554d11f9c4e267e1f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 4 Oct 2023 02:38:27 +0200 Subject: [PATCH] Added some syntax --- rustfmt.toml | 1 + src/main.rs | 16 +++-- src/syntax/ast.rs | 5 +- src/syntax/parser.rs | 157 ++++++++++++++++++++++++++++++++++++++++--- src/syntax/token.rs | 34 +++++----- 5 files changed, 183 insertions(+), 30 deletions(-) diff --git a/rustfmt.toml b/rustfmt.toml index 2c4092d..94ccf0a 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,2 +1,3 @@ enum_discrim_align_threshold = 16 struct_field_align_threshold = 16 +imports_granularity = "one" diff --git a/src/main.rs b/src/main.rs index a567cc9..3caf743 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,15 +3,23 @@ mod syntax; mod utils; -use bumpalo::Bump; -use std::io::{stdin, Read}; -use utils::default; +use { + bumpalo::Bump, + std::io::{stdin, Read}, + utils::default, +}; fn main() -> Result<(), Box> { let mut buf = default(); stdin().read_to_string(&mut buf)?; let arena = Bump::new(); - println!("{:?}", syntax::parser::parse(&buf, &arena)); + match syntax::parser::parse(&buf, &arena) { + Ok(ast) => println!("{ast:?}"), + Err(e) => { + eprintln!("[ERROR] {e:?}"); + eprintln!(" Caused at: `{}`", &buf[e.span.start..e.span.end]) + } + } Ok(()) } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 7a9c4f3..b687cfd 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -59,6 +59,7 @@ pub enum Expr<'a> { }, Break(Option>), Return(Option>), + Uninit, Continue, } @@ -68,10 +69,10 @@ pub enum Definition<'a> { kind: DefKind, ident: Spanned, ty: Option>, - init: Option>, + init: ExprRef<'a>, }, Func { - ident: Spanned, + name: Spanned, params: &'a [(Spanned, Spanned)], ret: Spanned, body: ExprList<'a>, diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index d2fd621..644e5cb 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -1,19 +1,43 @@ -use bumpalo::Bump; -use logos::Logos; - -use super::{ - ast::{Definition, Spanned}, - token::Token, +use { + super::{ + ast::{DefKind, Definition, Expr, ExprList, Ident, Spanned, Type}, + token::Token, + }, + crate::syntax::token::T, + bumpalo::{vec, Bump}, + logos::Logos, }; type Lexer<'a> = logos::Lexer<'a, Token>; macro_rules! extract { + ($self:expr, $pat:pat) => { + let $pat = $self.next()? else { + return Err($self.error(ErrorKind::UnexpectedToken)); + }; + }; +} + +macro_rules! let_until { ( $self:expr, - $(pat:pat),* $(,)? + let $bind:pat, + until |$next:pat_param| $cond:expr, + $expr:expr + $(,)? ) => { + loop { + let $next = $self.next()?; + if $cond { + break; + } + let $bind = $self.next()? else { + return Err($self.error(ErrorKind::UnexpectedToken)); + }; + + $expr; + } }; } @@ -39,16 +63,133 @@ impl<'a, 'l> Parser<'a, 'l> { Spanned::new(kind, self.lexer.span()) } + /// Mark with current span + #[inline] + fn spanned(&self, item: T) -> Spanned { + Spanned::new(item, self.lexer.span()) + } + + /// Require a token to be + fn require(&mut self, token: Token) -> Result<()> { + if self.next()? != token { + Err(self.error(ErrorKind::UnexpectedToken)) + } else { + Ok(()) + } + } + /// Parse everything or DIE! - fn run(self) -> Result<&'a [Definition<'a>]> { + fn run(mut self) -> Result<&'a [Definition<'a>]> { + let mut defs = vec![in self.arena]; + loop { + match self.lexer.next() { + Some(Ok(Token::Func)) => { + defs.push(self.func()?); + } + Some(Ok(Token::Const)) => defs.push(self.var_def(DefKind::Const)?), + Some(Ok(Token::Var)) => defs.push(self.var_def(DefKind::Var)?), + Some(Ok(_)) => return Err(self.error(ErrorKind::UnexpectedToken)), + Some(Err(())) => return Err(self.error(ErrorKind::InvalidToken)), + None => return Ok(defs.into_bump_slice()), + } + } + } + + fn ident(&mut self) -> Result> { + extract!(self, Token::Ident(id)); + Ok(self.spanned(id)) + } + + fn ty(&mut self) -> Result> { + extract!(self, Token::Ident(id)); + Ok(self.spanned(Type::Ident(id))) + } + + fn block(&mut self) -> Result> { + self.require(T!["{"])?; + // TODO + self.require(T!["}"])?; + Ok(self.arena.alloc_slice_copy(&[])) } + + fn var_def(&mut self, kind: DefKind) -> Result> { + // [: ] = ; + // ^^^^^^ + + extract!(self, Token::Ident(id)); + let ident = self.spanned(id); + + let ty = match self.next()? { + Token::Colon => { + let r = Some(self.ty()?); + self.require(T!["="])?; + r + } + Token::Equ => None, + _ => return Err(self.error(ErrorKind::UnexpectedToken)), + }; + + self.require(T!["uninit"])?; + self.require(T![";"])?; + + Ok(Definition::Binding { + kind, + ident, + ty, + init: self.arena.alloc(self.spanned(Expr::Uninit)), + }) + } + + fn func(&mut self) -> Result> { + // func ($(: ),*) → { … } + // ^^^^ + + let name = self.ident()?; + + // Parameter list + let mut params = vec![in self.arena]; + + self.require(T!["("])?; + let mut next = self.next()?; + if next != T![")"] { + loop { + let Token::Ident(id) = next else { + return Err(self.error(ErrorKind::UnexpectedToken)); + }; + + let id = self.spanned(id); + self.require(T![":"])?; + params.push((id, self.ty()?)); + + match self.next()? { + Token::RightParen => break, + Token::Comma => (), + _ => return Err(self.error(ErrorKind::UnexpectedToken)), + } + + next = self.next()?; + } + } + + self.require(T!["→"])?; + let ret = self.ty()?; + let body = self.block()?; + + Ok(Definition::Func { + name, + params: params.into_bump_slice(), + ret, + body, + }) + } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ErrorKind { InvalidToken, UnexpectedEnd, + UnexpectedToken, } pub type Error = Spanned; diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 8ae2f5e..28e6caa 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -12,7 +12,7 @@ macro_rules! token_def { keyword { $($kw:tt),* $(,)* } else { $($e_tt:tt)* } ) => { - literify::literify! (paste::paste! { + literify::literify!(paste::paste! { #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] #[logos(extras = Extras)] #[logos(skip r"[ \t\n\f]+")] @@ -35,6 +35,8 @@ macro_rules! token_def { $($( ($u_tok) => { $crate::syntax::token::Token::$u_name }; )*)* + + $((~($kw)) => { $crate::syntax::token::Token::[<$kw:camel>] };)* } }); }; @@ -72,23 +74,23 @@ token_def!( return, break, continue, uninit, asm } else { - #[regex( - r"\p{XID_Start}\p{XID_Continue}*", - |l| intern(l, l.slice()), - )] Ident(Spur), + #[regex( + r"\p{XID_Start}\p{XID_Continue}*", + |l| intern(l, l.slice()), + )] Ident(Spur), - #[regex( - "\"[^\"]*\"", - |l| { - let s = l.slice(); - intern(l, &s[1..s.len() - 1]) - }, - )] String(Spur), + #[regex( + "\"[^\"]*\"", + |l| { + let s = l.slice(); + intern(l, &s[1..s.len() - 1]) + }, + )] String(Spur), - #[regex( - "[0-9]+", - |l| l.slice().parse::().ok() - )] Int(u64), + #[regex( + "[0-9]+", + |l| l.slice().parse::().ok() + )] Int(u64), } );