New parser

This commit is contained in:
Erin 2023-10-04 18:59:44 +02:00 committed by ondra05
parent 47d44dcd04
commit b3be2a1358
7 changed files with 237 additions and 216 deletions

70
Cargo.lock generated
View file

@ -25,12 +25,31 @@ version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "1.0.0-alpha.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc3172a80699de358070dd99f80ea8badc6cdf8ac2417cb5a96e6d81bf5fe06d"
dependencies = [
"hashbrown",
"stacker",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -55,6 +74,12 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "libc"
version = "0.2.148"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
[[package]] [[package]]
name = "literify" name = "literify"
version = "0.2.0" version = "0.2.0"
@ -128,6 +153,15 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.33" version = "1.0.33"
@ -148,12 +182,26 @@ name = "rhea"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"chumsky",
"lasso", "lasso",
"literify", "literify",
"logos", "logos",
"paste", "paste",
] ]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.33" version = "2.0.33"
@ -176,3 +224,25 @@ name = "version_check"
version = "0.9.4" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -5,6 +5,7 @@ edition = "2021"
[dependencies] [dependencies]
bumpalo = { version = "3", features = ["collections"] } bumpalo = { version = "3", features = ["collections"] }
chumsky = "1.0.0-alpha"
lasso = "0.7" lasso = "0.7"
literify = "0.2" literify = "0.2"
logos = "0.13" logos = "0.13"

View file

@ -1,5 +1,7 @@
// Rhea // Rhea
use {logos::Logos, syntax::token::Token};
mod syntax; mod syntax;
mod utils; mod utils;
@ -14,12 +16,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
stdin().read_to_string(&mut buf)?; stdin().read_to_string(&mut buf)?;
let arena = Bump::new(); let arena = Bump::new();
match syntax::parser::parse(&buf, &arena) { syntax::parser::parse_lexer(Token::lexer(&buf), &arena);
Ok(ast) => println!("{ast:?}"),
Err(e) => {
eprintln!("[ERROR] {e:?}");
eprintln!(" Caused at: `{}`", &buf[e.span.start..e.span.end])
}
}
Ok(()) Ok(())
} }

View file

@ -1,32 +1,21 @@
use lasso::Spur; use {chumsky::span::SimpleSpan, lasso::Spur};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Span {
pub start: usize,
pub end: usize,
}
impl From<std::ops::Range<usize>> for Span {
fn from(value: std::ops::Range<usize>) -> Self {
Self {
start: value.start,
end: value.end,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Spanned<T> { pub struct Spanned<T> {
pub item: T, pub item: T,
pub span: Span, pub span: SimpleSpan,
} }
impl<T> Spanned<T> { impl<T> Spanned<T> {
#[inline] #[inline]
pub fn new(item: T, span: impl Into<Span>) -> Self { pub const fn new(item: T, span: SimpleSpan) -> Self {
Self { Self { item, span }
item, }
span: span.into(),
pub fn map<U>(self, mut f: impl FnMut(T) -> U) -> Spanned<U> {
Spanned {
item: f(self.item),
span: self.span,
} }
} }
} }
@ -72,7 +61,7 @@ pub enum Definition<'a> {
init: ExprRef<'a>, init: ExprRef<'a>,
}, },
Func { Func {
name: Spanned<Ident>, name: Spanned<Ident>,
params: &'a [(Spanned<Ident>, Spanned<Type>)], params: &'a [(Spanned<Ident>, Spanned<Type>)],
ret: Spanned<Type>, ret: Spanned<Type>,
body: ExprList<'a>, body: ExprList<'a>,

View file

@ -1,204 +1,155 @@
use super::ast::{DefKind, Expr, ExprList, Type};
use { use {
super::{ super::{
ast::{DefKind, Definition, Expr, ExprList, Ident, Spanned, Type}, ast::{Definition, Ident, SpanExpr, Spanned},
token::Token, token::{Token, T},
}, },
crate::syntax::token::T, crate::utils::Pipe,
bumpalo::{vec, Bump}, bumpalo::Bump,
logos::Logos, chumsky::{
extra::Full,
input::{Stream, ValueInput},
prelude::*,
},
logos::Lexer,
}; };
type Lexer<'a> = logos::Lexer<'a, Token>; /// Equivalently-named unit variant mapping
macro_rules! equivmap {
macro_rules! extract { ($src:ident, $target:ident, [$variant0:ident $(, $variant:ident)* $(,)?] $(,)?) => {
($self:expr, $pat:pat) => { just($src::$variant0).to($target::$variant0)
let $pat = $self.next()? else { $(.or(just($src::$variant).to($target::$variant)))*
return Err($self.error(ErrorKind::UnexpectedToken));
};
}; };
} }
macro_rules! let_until { fn ident<'a, I>() -> impl Parser<'a, I, Spanned<Ident>, Extra<'a>> + Clone + Copy
( where
$self:expr, I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
let $bind:pat, {
until |$next:pat_param| $cond:expr, select!(Token::Ident(id) => id).map_with_span(Spanned::new)
$expr:expr
$(,)?
) => {
loop {
let $next = $self.next()?;
if $cond {
break;
}
let $bind = $self.next()? else {
return Err($self.error(ErrorKind::UnexpectedToken));
};
$expr;
}
};
} }
struct Parser<'a, 'l> { fn ty<'a, I>() -> impl Parser<'a, I, Spanned<Type>, Extra<'a>> + Clone + Copy
where
I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
{
ident().map(|i| i.map(Type::Ident))
}
fn definition<'a, I>() -> impl Parser<'a, I, Spanned<Definition<'a>>, Extra<'a>> + Clone
where
I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
{
let ident = ident();
let ty = ty();
let func = just(T!["func"])
.ignore_then(ident)
.then(
ident
.then_ignore(just(T![":"]))
.then(ty)
.separated_by(just(T![","]))
.allow_trailing()
.pipe(arena_collect)
.delimited_by(just(T!["("]), just(T![")"])),
)
.then_ignore(just(T![""]))
.then(ty)
.then(just([T!["{"], T!["}"]]))
.map_with_state(
|(((name, params), ret), _body), _, state| Definition::Func {
name,
params,
ret,
body: state.arena.alloc_slice_copy(&[]),
},
);
let binding = equivmap!(Token, DefKind, [Const, Var])
.then(ident)
.then(just(T![":"]).ignore_then(ty).or_not())
.then(
just(T!["="]).ignore_then(
just(T!["uninit"])
.to(Expr::Uninit)
.map_with_span(Spanned::new),
),
)
.map_with_state(
|(((kind, ident), ty), init), _, state| Definition::Binding {
kind,
ident,
ty,
init: state.arena.alloc(init),
},
);
func.or(binding).map_with_span(Spanned::new)
}
pub struct State<'a> {
pub arena: &'a Bump,
}
type Extra<'a> = Full<Rich<'a, Token>, State<'a>, ()>;
type ParseResult = ();
pub fn parse_input<'a>(
input: impl ValueInput<'a, Token = Token, Span = SimpleSpan>,
arena: &'a Bump, arena: &'a Bump,
lexer: Lexer<'l>, ) -> ParseResult {
println!(
"{:?}",
definition().parse_with_state(input, &mut State { arena })
);
} }
impl<'a, 'l> Parser<'a, 'l> { pub fn parse_iter(
/// Poll next token input: impl Iterator<Item = (Token, SimpleSpan)>,
fn next(&mut self) -> Result<Token> { eoi: impl Into<SimpleSpan>,
match self.lexer.next() { arena: &Bump,
Some(Ok(token)) => Ok(token), ) -> ParseResult {
Some(Err(())) => Err(ErrorKind::InvalidToken), parse_input(Stream::from_iter(input).spanned(eoi.into()), arena)
None => Err(ErrorKind::UnexpectedEnd),
}
.map_err(|k| Spanned::new(k, self.lexer.span()))
}
/// Form an error
#[inline]
fn error(&self, kind: ErrorKind) -> Error {
Spanned::new(kind, self.lexer.span())
}
/// Mark with current span
#[inline]
fn spanned<T>(&self, item: T) -> Spanned<T> {
Spanned::new(item, self.lexer.span())
}
/// Require a token to be
fn require(&mut self, token: Token) -> Result<()> {
if self.next()? != token {
Err(self.error(ErrorKind::UnexpectedToken))
} else {
Ok(())
}
}
/// Parse everything or DIE!
fn run(mut self) -> Result<&'a [Definition<'a>]> {
let mut defs = vec![in self.arena];
loop {
match self.lexer.next() {
Some(Ok(Token::Func)) => {
defs.push(self.func()?);
}
Some(Ok(Token::Const)) => defs.push(self.var_def(DefKind::Const)?),
Some(Ok(Token::Var)) => defs.push(self.var_def(DefKind::Var)?),
Some(Ok(_)) => return Err(self.error(ErrorKind::UnexpectedToken)),
Some(Err(())) => return Err(self.error(ErrorKind::InvalidToken)),
None => return Ok(defs.into_bump_slice()),
}
}
}
fn ident(&mut self) -> Result<Spanned<Ident>> {
extract!(self, Token::Ident(id));
Ok(self.spanned(id))
}
fn ty(&mut self) -> Result<Spanned<Type>> {
extract!(self, Token::Ident(id));
Ok(self.spanned(Type::Ident(id)))
}
fn block(&mut self) -> Result<ExprList<'a>> {
self.require(T!["{"])?;
// TODO
self.require(T!["}"])?;
Ok(self.arena.alloc_slice_copy(&[]))
}
fn var_def(&mut self, kind: DefKind) -> Result<Definition<'a>> {
// <kind> <ident> [: <ty>] = <expr>;
// ^^^^^^
extract!(self, Token::Ident(id));
let ident = self.spanned(id);
let ty = match self.next()? {
Token::Colon => {
let r = Some(self.ty()?);
self.require(T!["="])?;
r
}
Token::Equ => None,
_ => return Err(self.error(ErrorKind::UnexpectedToken)),
};
self.require(T!["uninit"])?;
self.require(T![";"])?;
Ok(Definition::Binding {
kind,
ident,
ty,
init: self.arena.alloc(self.spanned(Expr::Uninit)),
})
}
fn func(&mut self) -> Result<Definition<'a>> {
// func <ident> ($(<ident>: <ty>),*) → <ty> { … }
// ^^^^
let name = self.ident()?;
// Parameter list
let mut params = vec![in self.arena];
self.require(T!["("])?;
let mut next = self.next()?;
if next != T![")"] {
loop {
let Token::Ident(id) = next else {
return Err(self.error(ErrorKind::UnexpectedToken));
};
let id = self.spanned(id);
self.require(T![":"])?;
params.push((id, self.ty()?));
match self.next()? {
Token::RightParen => break,
Token::Comma => (),
_ => return Err(self.error(ErrorKind::UnexpectedToken)),
}
next = self.next()?;
}
}
self.require(T![""])?;
let ret = self.ty()?;
let body = self.block()?;
Ok(Definition::Func {
name,
params: params.into_bump_slice(),
ret,
body,
})
}
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub fn parse_lexer(input: Lexer<Token>, arena: &Bump) -> ParseResult {
pub enum ErrorKind { let end = input.span().end;
InvalidToken, parse_iter(
UnexpectedEnd, input
UnexpectedToken, .spanned()
} .map(|(token, span)| (token.unwrap_or(Token::Invalid), span.into())),
end..end + 1,
pub type Error = Spanned<ErrorKind>;
type Result<T, E = Error> = std::result::Result<T, E>;
pub fn parse<'a>(code: &str, arena: &'a Bump) -> Result<&'a [Definition<'a>]> {
Parser {
arena, arena,
lexer: Token::lexer(code), )
} }
.run()
fn arena_collect<'a, I, O: 'a>(
parser: impl IterParser<'a, I, O, Extra<'a>> + Clone,
) -> impl Parser<'a, I, &'a [O], Extra<'a>> + Clone
where
I: Input<'a, Span = SimpleSpan, Token = Token>,
{
empty()
.map_with_state(|_, _, state: &mut State| bumpalo::vec![in state.arena])
.foldl(parser, |mut v, o| {
v.push(o);
v
})
.map(bumpalo::collections::Vec::into_bump_slice)
}
fn arena_box<'a, I, O: 'a>(
parser: impl Parser<'a, I, O, Extra<'a>> + Clone,
) -> impl Parser<'a, I, &'a O, Extra<'a>> + Clone
where
I: Input<'a, Span = SimpleSpan, Token = Token>,
{
parser.map_with_state(|item, _, state| &*state.arena.alloc(item))
}
#[inline]
fn merge_spans(start: SimpleSpan, end: SimpleSpan) -> SimpleSpan {
SimpleSpan::new(start.start, end.end)
} }

View file

@ -91,6 +91,8 @@ token_def!(
"[0-9]+", "[0-9]+",
|l| l.slice().parse::<u64>().ok() |l| l.slice().parse::<u64>().ok()
)] Int(u64), )] Int(u64),
Invalid,
} }
); );

View file

@ -2,3 +2,14 @@
pub fn default<T: Default>() -> T { pub fn default<T: Default>() -> T {
Default::default() Default::default()
} }
pub trait Pipe {
fn pipe<R>(self, mut f: impl FnMut(Self) -> R) -> R
where
Self: Sized,
{
f(self)
}
}
impl<T> Pipe for T {}