From b3be2a1358718a3671f412d7dd3ad5ab6e7ac9a3 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 4 Oct 2023 18:59:44 +0200 Subject: [PATCH] New parser --- Cargo.lock | 70 ++++++++++ Cargo.toml | 1 + src/main.rs | 11 +- src/syntax/ast.rs | 33 ++--- src/syntax/parser.rs | 325 ++++++++++++++++++------------------------- src/syntax/token.rs | 2 + src/utils.rs | 11 ++ 7 files changed, 237 insertions(+), 216 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e9a3e91..037ec6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,12 +25,31 @@ version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "1.0.0-alpha.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3172a80699de358070dd99f80ea8badc6cdf8ac2417cb5a96e6d81bf5fe06d" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "fnv" version = "1.0.7" @@ -55,6 +74,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "libc" +version = "0.2.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" + [[package]] name = "literify" version = "0.2.0" @@ -128,6 +153,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.33" @@ -148,12 +182,26 @@ name = "rhea" version = "0.1.0" dependencies = [ "bumpalo", + "chumsky", "lasso", "literify", "logos", "paste", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "syn" version = "2.0.33" @@ -176,3 +224,25 @@ name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 5ed8fcb..79bd5e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] bumpalo = { version = "3", features = ["collections"] } +chumsky = "1.0.0-alpha" lasso = "0.7" literify = "0.2" logos = "0.13" diff --git a/src/main.rs b/src/main.rs index 3caf743..209aebe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,7 @@ // Rhea +use {logos::Logos, syntax::token::Token}; + mod syntax; mod utils; @@ -14,12 +16,7 @@ fn main() -> Result<(), Box> { stdin().read_to_string(&mut buf)?; let arena = Bump::new(); - match syntax::parser::parse(&buf, &arena) { - Ok(ast) => println!("{ast:?}"), - Err(e) => { - eprintln!("[ERROR] {e:?}"); - eprintln!(" Caused at: `{}`", &buf[e.span.start..e.span.end]) - } - } + syntax::parser::parse_lexer(Token::lexer(&buf), &arena); + Ok(()) } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index b687cfd..8594459 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,32 +1,21 @@ -use lasso::Spur; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct Span { - pub start: usize, - pub end: usize, -} - -impl From> for Span { - fn from(value: std::ops::Range) -> Self { - Self { - start: value.start, - end: value.end, - } - } -} +use {chumsky::span::SimpleSpan, lasso::Spur}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Spanned { pub item: T, - pub span: Span, + pub span: SimpleSpan, } impl Spanned { #[inline] - pub fn new(item: T, span: impl Into) -> Self { - Self { - item, - span: span.into(), + pub const fn new(item: T, span: SimpleSpan) -> Self { + Self { item, span } + } + + pub fn map(self, mut f: impl FnMut(T) -> U) -> Spanned { + Spanned { + item: f(self.item), + span: self.span, } } } @@ -72,7 +61,7 @@ pub enum Definition<'a> { init: ExprRef<'a>, }, Func { - name: Spanned, + name: Spanned, params: &'a [(Spanned, Spanned)], ret: Spanned, body: ExprList<'a>, diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 644e5cb..1fc6a1a 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -1,204 +1,155 @@ +use super::ast::{DefKind, Expr, ExprList, Type}; + use { super::{ - ast::{DefKind, Definition, Expr, ExprList, Ident, Spanned, Type}, - token::Token, + ast::{Definition, Ident, SpanExpr, Spanned}, + token::{Token, T}, }, - crate::syntax::token::T, - bumpalo::{vec, Bump}, - logos::Logos, + crate::utils::Pipe, + bumpalo::Bump, + chumsky::{ + extra::Full, + input::{Stream, ValueInput}, + prelude::*, + }, + logos::Lexer, }; -type Lexer<'a> = logos::Lexer<'a, Token>; - -macro_rules! extract { - ($self:expr, $pat:pat) => { - let $pat = $self.next()? else { - return Err($self.error(ErrorKind::UnexpectedToken)); - }; +/// Equivalently-named unit variant mapping +macro_rules! equivmap { + ($src:ident, $target:ident, [$variant0:ident $(, $variant:ident)* $(,)?] $(,)?) => { + just($src::$variant0).to($target::$variant0) + $(.or(just($src::$variant).to($target::$variant)))* }; } -macro_rules! let_until { - ( - $self:expr, - let $bind:pat, - until |$next:pat_param| $cond:expr, - $expr:expr - $(,)? - ) => { - loop { - let $next = $self.next()?; - if $cond { - break; - } - - let $bind = $self.next()? else { - return Err($self.error(ErrorKind::UnexpectedToken)); - }; - - $expr; - } - }; +fn ident<'a, I>() -> impl Parser<'a, I, Spanned, Extra<'a>> + Clone + Copy +where + I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, +{ + select!(Token::Ident(id) => id).map_with_span(Spanned::new) } -struct Parser<'a, 'l> { +fn ty<'a, I>() -> impl Parser<'a, I, Spanned, Extra<'a>> + Clone + Copy +where + I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, +{ + ident().map(|i| i.map(Type::Ident)) +} + +fn definition<'a, I>() -> impl Parser<'a, I, Spanned>, Extra<'a>> + Clone +where + I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, +{ + let ident = ident(); + let ty = ty(); + + let func = just(T!["func"]) + .ignore_then(ident) + .then( + ident + .then_ignore(just(T![":"])) + .then(ty) + .separated_by(just(T![","])) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(T!["("]), just(T![")"])), + ) + .then_ignore(just(T!["→"])) + .then(ty) + .then(just([T!["{"], T!["}"]])) + .map_with_state( + |(((name, params), ret), _body), _, state| Definition::Func { + name, + params, + ret, + body: state.arena.alloc_slice_copy(&[]), + }, + ); + + let binding = equivmap!(Token, DefKind, [Const, Var]) + .then(ident) + .then(just(T![":"]).ignore_then(ty).or_not()) + .then( + just(T!["="]).ignore_then( + just(T!["uninit"]) + .to(Expr::Uninit) + .map_with_span(Spanned::new), + ), + ) + .map_with_state( + |(((kind, ident), ty), init), _, state| Definition::Binding { + kind, + ident, + ty, + init: state.arena.alloc(init), + }, + ); + + func.or(binding).map_with_span(Spanned::new) +} + +pub struct State<'a> { + pub arena: &'a Bump, +} + +type Extra<'a> = Full, State<'a>, ()>; +type ParseResult = (); + +pub fn parse_input<'a>( + input: impl ValueInput<'a, Token = Token, Span = SimpleSpan>, arena: &'a Bump, - lexer: Lexer<'l>, +) -> ParseResult { + println!( + "{:?}", + definition().parse_with_state(input, &mut State { arena }) + ); } -impl<'a, 'l> Parser<'a, 'l> { - /// Poll next token - fn next(&mut self) -> Result { - match self.lexer.next() { - Some(Ok(token)) => Ok(token), - Some(Err(())) => Err(ErrorKind::InvalidToken), - None => Err(ErrorKind::UnexpectedEnd), - } - .map_err(|k| Spanned::new(k, self.lexer.span())) - } - - /// Form an error - #[inline] - fn error(&self, kind: ErrorKind) -> Error { - Spanned::new(kind, self.lexer.span()) - } - - /// Mark with current span - #[inline] - fn spanned(&self, item: T) -> Spanned { - Spanned::new(item, self.lexer.span()) - } - - /// Require a token to be - fn require(&mut self, token: Token) -> Result<()> { - if self.next()? != token { - Err(self.error(ErrorKind::UnexpectedToken)) - } else { - Ok(()) - } - } - - /// Parse everything or DIE! - fn run(mut self) -> Result<&'a [Definition<'a>]> { - let mut defs = vec![in self.arena]; - loop { - match self.lexer.next() { - Some(Ok(Token::Func)) => { - defs.push(self.func()?); - } - Some(Ok(Token::Const)) => defs.push(self.var_def(DefKind::Const)?), - Some(Ok(Token::Var)) => defs.push(self.var_def(DefKind::Var)?), - Some(Ok(_)) => return Err(self.error(ErrorKind::UnexpectedToken)), - Some(Err(())) => return Err(self.error(ErrorKind::InvalidToken)), - None => return Ok(defs.into_bump_slice()), - } - } - } - - fn ident(&mut self) -> Result> { - extract!(self, Token::Ident(id)); - Ok(self.spanned(id)) - } - - fn ty(&mut self) -> Result> { - extract!(self, Token::Ident(id)); - Ok(self.spanned(Type::Ident(id))) - } - - fn block(&mut self) -> Result> { - self.require(T!["{"])?; - // TODO - self.require(T!["}"])?; - - Ok(self.arena.alloc_slice_copy(&[])) - } - - fn var_def(&mut self, kind: DefKind) -> Result> { - // [: ] = ; - // ^^^^^^ - - extract!(self, Token::Ident(id)); - let ident = self.spanned(id); - - let ty = match self.next()? { - Token::Colon => { - let r = Some(self.ty()?); - self.require(T!["="])?; - r - } - Token::Equ => None, - _ => return Err(self.error(ErrorKind::UnexpectedToken)), - }; - - self.require(T!["uninit"])?; - self.require(T![";"])?; - - Ok(Definition::Binding { - kind, - ident, - ty, - init: self.arena.alloc(self.spanned(Expr::Uninit)), - }) - } - - fn func(&mut self) -> Result> { - // func ($(: ),*) → { … } - // ^^^^ - - let name = self.ident()?; - - // Parameter list - let mut params = vec![in self.arena]; - - self.require(T!["("])?; - let mut next = self.next()?; - if next != T![")"] { - loop { - let Token::Ident(id) = next else { - return Err(self.error(ErrorKind::UnexpectedToken)); - }; - - let id = self.spanned(id); - self.require(T![":"])?; - params.push((id, self.ty()?)); - - match self.next()? { - Token::RightParen => break, - Token::Comma => (), - _ => return Err(self.error(ErrorKind::UnexpectedToken)), - } - - next = self.next()?; - } - } - - self.require(T!["→"])?; - let ret = self.ty()?; - let body = self.block()?; - - Ok(Definition::Func { - name, - params: params.into_bump_slice(), - ret, - body, - }) - } +pub fn parse_iter( + input: impl Iterator, + eoi: impl Into, + arena: &Bump, +) -> ParseResult { + parse_input(Stream::from_iter(input).spanned(eoi.into()), arena) } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum ErrorKind { - InvalidToken, - UnexpectedEnd, - UnexpectedToken, -} - -pub type Error = Spanned; -type Result = std::result::Result; - -pub fn parse<'a>(code: &str, arena: &'a Bump) -> Result<&'a [Definition<'a>]> { - Parser { +pub fn parse_lexer(input: Lexer, arena: &Bump) -> ParseResult { + let end = input.span().end; + parse_iter( + input + .spanned() + .map(|(token, span)| (token.unwrap_or(Token::Invalid), span.into())), + end..end + 1, arena, - lexer: Token::lexer(code), - } - .run() + ) +} + +fn arena_collect<'a, I, O: 'a>( + parser: impl IterParser<'a, I, O, Extra<'a>> + Clone, +) -> impl Parser<'a, I, &'a [O], Extra<'a>> + Clone +where + I: Input<'a, Span = SimpleSpan, Token = Token>, +{ + empty() + .map_with_state(|_, _, state: &mut State| bumpalo::vec![in state.arena]) + .foldl(parser, |mut v, o| { + v.push(o); + v + }) + .map(bumpalo::collections::Vec::into_bump_slice) +} + +fn arena_box<'a, I, O: 'a>( + parser: impl Parser<'a, I, O, Extra<'a>> + Clone, +) -> impl Parser<'a, I, &'a O, Extra<'a>> + Clone +where + I: Input<'a, Span = SimpleSpan, Token = Token>, +{ + parser.map_with_state(|item, _, state| &*state.arena.alloc(item)) +} + +#[inline] +fn merge_spans(start: SimpleSpan, end: SimpleSpan) -> SimpleSpan { + SimpleSpan::new(start.start, end.end) } diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 28e6caa..5500f44 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -91,6 +91,8 @@ token_def!( "[0-9]+", |l| l.slice().parse::().ok() )] Int(u64), + + Invalid, } ); diff --git a/src/utils.rs b/src/utils.rs index 31a900e..6b4c784 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,3 +2,14 @@ pub fn default() -> T { Default::default() } + +pub trait Pipe { + fn pipe(self, mut f: impl FnMut(Self) -> R) -> R + where + Self: Sized, + { + f(self) + } +} + +impl Pipe for T {}