New parser
This commit is contained in:
parent
47d44dcd04
commit
b3be2a1358
70
Cargo.lock
generated
70
Cargo.lock
generated
|
@ -25,12 +25,31 @@ version = "3.13.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chumsky"
|
||||
version = "1.0.0-alpha.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc3172a80699de358070dd99f80ea8badc6cdf8ac2417cb5a96e6d81bf5fe06d"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
|
@ -55,6 +74,12 @@ dependencies = [
|
|||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.148"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
|
||||
|
||||
[[package]]
|
||||
name = "literify"
|
||||
version = "0.2.0"
|
||||
|
@ -128,6 +153,15 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
|
@ -148,12 +182,26 @@ name = "rhea"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"chumsky",
|
||||
"lasso",
|
||||
"literify",
|
||||
"logos",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stacker"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.33"
|
||||
|
@ -176,3 +224,25 @@ name = "version_check"
|
|||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
|
|
@ -5,6 +5,7 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
bumpalo = { version = "3", features = ["collections"] }
|
||||
chumsky = "1.0.0-alpha"
|
||||
lasso = "0.7"
|
||||
literify = "0.2"
|
||||
logos = "0.13"
|
||||
|
|
11
src/main.rs
11
src/main.rs
|
@ -1,5 +1,7 @@
|
|||
// Rhea
|
||||
|
||||
use {logos::Logos, syntax::token::Token};
|
||||
|
||||
mod syntax;
|
||||
mod utils;
|
||||
|
||||
|
@ -14,12 +16,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
stdin().read_to_string(&mut buf)?;
|
||||
|
||||
let arena = Bump::new();
|
||||
match syntax::parser::parse(&buf, &arena) {
|
||||
Ok(ast) => println!("{ast:?}"),
|
||||
Err(e) => {
|
||||
eprintln!("[ERROR] {e:?}");
|
||||
eprintln!(" Caused at: `{}`", &buf[e.span.start..e.span.end])
|
||||
}
|
||||
}
|
||||
syntax::parser::parse_lexer(Token::lexer(&buf), &arena);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,32 +1,21 @@
|
|||
use lasso::Spur;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl From<std::ops::Range<usize>> for Span {
|
||||
fn from(value: std::ops::Range<usize>) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: value.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
use {chumsky::span::SimpleSpan, lasso::Spur};
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Spanned<T> {
|
||||
pub item: T,
|
||||
pub span: Span,
|
||||
pub span: SimpleSpan,
|
||||
}
|
||||
|
||||
impl<T> Spanned<T> {
|
||||
#[inline]
|
||||
pub fn new(item: T, span: impl Into<Span>) -> Self {
|
||||
Self {
|
||||
item,
|
||||
span: span.into(),
|
||||
pub const fn new(item: T, span: SimpleSpan) -> Self {
|
||||
Self { item, span }
|
||||
}
|
||||
|
||||
pub fn map<U>(self, mut f: impl FnMut(T) -> U) -> Spanned<U> {
|
||||
Spanned {
|
||||
item: f(self.item),
|
||||
span: self.span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -72,7 +61,7 @@ pub enum Definition<'a> {
|
|||
init: ExprRef<'a>,
|
||||
},
|
||||
Func {
|
||||
name: Spanned<Ident>,
|
||||
name: Spanned<Ident>,
|
||||
params: &'a [(Spanned<Ident>, Spanned<Type>)],
|
||||
ret: Spanned<Type>,
|
||||
body: ExprList<'a>,
|
||||
|
|
|
@ -1,204 +1,155 @@
|
|||
use super::ast::{DefKind, Expr, ExprList, Type};
|
||||
|
||||
use {
|
||||
super::{
|
||||
ast::{DefKind, Definition, Expr, ExprList, Ident, Spanned, Type},
|
||||
token::Token,
|
||||
ast::{Definition, Ident, SpanExpr, Spanned},
|
||||
token::{Token, T},
|
||||
},
|
||||
crate::syntax::token::T,
|
||||
bumpalo::{vec, Bump},
|
||||
logos::Logos,
|
||||
crate::utils::Pipe,
|
||||
bumpalo::Bump,
|
||||
chumsky::{
|
||||
extra::Full,
|
||||
input::{Stream, ValueInput},
|
||||
prelude::*,
|
||||
},
|
||||
logos::Lexer,
|
||||
};
|
||||
|
||||
type Lexer<'a> = logos::Lexer<'a, Token>;
|
||||
|
||||
macro_rules! extract {
|
||||
($self:expr, $pat:pat) => {
|
||||
let $pat = $self.next()? else {
|
||||
return Err($self.error(ErrorKind::UnexpectedToken));
|
||||
};
|
||||
/// Equivalently-named unit variant mapping
|
||||
macro_rules! equivmap {
|
||||
($src:ident, $target:ident, [$variant0:ident $(, $variant:ident)* $(,)?] $(,)?) => {
|
||||
just($src::$variant0).to($target::$variant0)
|
||||
$(.or(just($src::$variant).to($target::$variant)))*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! let_until {
|
||||
(
|
||||
$self:expr,
|
||||
let $bind:pat,
|
||||
until |$next:pat_param| $cond:expr,
|
||||
$expr:expr
|
||||
$(,)?
|
||||
) => {
|
||||
loop {
|
||||
let $next = $self.next()?;
|
||||
if $cond {
|
||||
break;
|
||||
}
|
||||
|
||||
let $bind = $self.next()? else {
|
||||
return Err($self.error(ErrorKind::UnexpectedToken));
|
||||
};
|
||||
|
||||
$expr;
|
||||
}
|
||||
};
|
||||
fn ident<'a, I>() -> impl Parser<'a, I, Spanned<Ident>, Extra<'a>> + Clone + Copy
|
||||
where
|
||||
I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
|
||||
{
|
||||
select!(Token::Ident(id) => id).map_with_span(Spanned::new)
|
||||
}
|
||||
|
||||
struct Parser<'a, 'l> {
|
||||
fn ty<'a, I>() -> impl Parser<'a, I, Spanned<Type>, Extra<'a>> + Clone + Copy
|
||||
where
|
||||
I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
|
||||
{
|
||||
ident().map(|i| i.map(Type::Ident))
|
||||
}
|
||||
|
||||
fn definition<'a, I>() -> impl Parser<'a, I, Spanned<Definition<'a>>, Extra<'a>> + Clone
|
||||
where
|
||||
I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>,
|
||||
{
|
||||
let ident = ident();
|
||||
let ty = ty();
|
||||
|
||||
let func = just(T!["func"])
|
||||
.ignore_then(ident)
|
||||
.then(
|
||||
ident
|
||||
.then_ignore(just(T![":"]))
|
||||
.then(ty)
|
||||
.separated_by(just(T![","]))
|
||||
.allow_trailing()
|
||||
.pipe(arena_collect)
|
||||
.delimited_by(just(T!["("]), just(T![")"])),
|
||||
)
|
||||
.then_ignore(just(T!["→"]))
|
||||
.then(ty)
|
||||
.then(just([T!["{"], T!["}"]]))
|
||||
.map_with_state(
|
||||
|(((name, params), ret), _body), _, state| Definition::Func {
|
||||
name,
|
||||
params,
|
||||
ret,
|
||||
body: state.arena.alloc_slice_copy(&[]),
|
||||
},
|
||||
);
|
||||
|
||||
let binding = equivmap!(Token, DefKind, [Const, Var])
|
||||
.then(ident)
|
||||
.then(just(T![":"]).ignore_then(ty).or_not())
|
||||
.then(
|
||||
just(T!["="]).ignore_then(
|
||||
just(T!["uninit"])
|
||||
.to(Expr::Uninit)
|
||||
.map_with_span(Spanned::new),
|
||||
),
|
||||
)
|
||||
.map_with_state(
|
||||
|(((kind, ident), ty), init), _, state| Definition::Binding {
|
||||
kind,
|
||||
ident,
|
||||
ty,
|
||||
init: state.arena.alloc(init),
|
||||
},
|
||||
);
|
||||
|
||||
func.or(binding).map_with_span(Spanned::new)
|
||||
}
|
||||
|
||||
pub struct State<'a> {
|
||||
pub arena: &'a Bump,
|
||||
}
|
||||
|
||||
type Extra<'a> = Full<Rich<'a, Token>, State<'a>, ()>;
|
||||
type ParseResult = ();
|
||||
|
||||
pub fn parse_input<'a>(
|
||||
input: impl ValueInput<'a, Token = Token, Span = SimpleSpan>,
|
||||
arena: &'a Bump,
|
||||
lexer: Lexer<'l>,
|
||||
) -> ParseResult {
|
||||
println!(
|
||||
"{:?}",
|
||||
definition().parse_with_state(input, &mut State { arena })
|
||||
);
|
||||
}
|
||||
|
||||
impl<'a, 'l> Parser<'a, 'l> {
|
||||
/// Poll next token
|
||||
fn next(&mut self) -> Result<Token> {
|
||||
match self.lexer.next() {
|
||||
Some(Ok(token)) => Ok(token),
|
||||
Some(Err(())) => Err(ErrorKind::InvalidToken),
|
||||
None => Err(ErrorKind::UnexpectedEnd),
|
||||
}
|
||||
.map_err(|k| Spanned::new(k, self.lexer.span()))
|
||||
}
|
||||
|
||||
/// Form an error
|
||||
#[inline]
|
||||
fn error(&self, kind: ErrorKind) -> Error {
|
||||
Spanned::new(kind, self.lexer.span())
|
||||
}
|
||||
|
||||
/// Mark with current span
|
||||
#[inline]
|
||||
fn spanned<T>(&self, item: T) -> Spanned<T> {
|
||||
Spanned::new(item, self.lexer.span())
|
||||
}
|
||||
|
||||
/// Require a token to be
|
||||
fn require(&mut self, token: Token) -> Result<()> {
|
||||
if self.next()? != token {
|
||||
Err(self.error(ErrorKind::UnexpectedToken))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse everything or DIE!
|
||||
fn run(mut self) -> Result<&'a [Definition<'a>]> {
|
||||
let mut defs = vec![in self.arena];
|
||||
loop {
|
||||
match self.lexer.next() {
|
||||
Some(Ok(Token::Func)) => {
|
||||
defs.push(self.func()?);
|
||||
}
|
||||
Some(Ok(Token::Const)) => defs.push(self.var_def(DefKind::Const)?),
|
||||
Some(Ok(Token::Var)) => defs.push(self.var_def(DefKind::Var)?),
|
||||
Some(Ok(_)) => return Err(self.error(ErrorKind::UnexpectedToken)),
|
||||
Some(Err(())) => return Err(self.error(ErrorKind::InvalidToken)),
|
||||
None => return Ok(defs.into_bump_slice()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ident(&mut self) -> Result<Spanned<Ident>> {
|
||||
extract!(self, Token::Ident(id));
|
||||
Ok(self.spanned(id))
|
||||
}
|
||||
|
||||
fn ty(&mut self) -> Result<Spanned<Type>> {
|
||||
extract!(self, Token::Ident(id));
|
||||
Ok(self.spanned(Type::Ident(id)))
|
||||
}
|
||||
|
||||
fn block(&mut self) -> Result<ExprList<'a>> {
|
||||
self.require(T!["{"])?;
|
||||
// TODO
|
||||
self.require(T!["}"])?;
|
||||
|
||||
Ok(self.arena.alloc_slice_copy(&[]))
|
||||
}
|
||||
|
||||
fn var_def(&mut self, kind: DefKind) -> Result<Definition<'a>> {
|
||||
// <kind> <ident> [: <ty>] = <expr>;
|
||||
// ^^^^^^
|
||||
|
||||
extract!(self, Token::Ident(id));
|
||||
let ident = self.spanned(id);
|
||||
|
||||
let ty = match self.next()? {
|
||||
Token::Colon => {
|
||||
let r = Some(self.ty()?);
|
||||
self.require(T!["="])?;
|
||||
r
|
||||
}
|
||||
Token::Equ => None,
|
||||
_ => return Err(self.error(ErrorKind::UnexpectedToken)),
|
||||
};
|
||||
|
||||
self.require(T!["uninit"])?;
|
||||
self.require(T![";"])?;
|
||||
|
||||
Ok(Definition::Binding {
|
||||
kind,
|
||||
ident,
|
||||
ty,
|
||||
init: self.arena.alloc(self.spanned(Expr::Uninit)),
|
||||
})
|
||||
}
|
||||
|
||||
fn func(&mut self) -> Result<Definition<'a>> {
|
||||
// func <ident> ($(<ident>: <ty>),*) → <ty> { … }
|
||||
// ^^^^
|
||||
|
||||
let name = self.ident()?;
|
||||
|
||||
// Parameter list
|
||||
let mut params = vec![in self.arena];
|
||||
|
||||
self.require(T!["("])?;
|
||||
let mut next = self.next()?;
|
||||
if next != T![")"] {
|
||||
loop {
|
||||
let Token::Ident(id) = next else {
|
||||
return Err(self.error(ErrorKind::UnexpectedToken));
|
||||
};
|
||||
|
||||
let id = self.spanned(id);
|
||||
self.require(T![":"])?;
|
||||
params.push((id, self.ty()?));
|
||||
|
||||
match self.next()? {
|
||||
Token::RightParen => break,
|
||||
Token::Comma => (),
|
||||
_ => return Err(self.error(ErrorKind::UnexpectedToken)),
|
||||
}
|
||||
|
||||
next = self.next()?;
|
||||
}
|
||||
}
|
||||
|
||||
self.require(T!["→"])?;
|
||||
let ret = self.ty()?;
|
||||
let body = self.block()?;
|
||||
|
||||
Ok(Definition::Func {
|
||||
name,
|
||||
params: params.into_bump_slice(),
|
||||
ret,
|
||||
body,
|
||||
})
|
||||
}
|
||||
pub fn parse_iter(
|
||||
input: impl Iterator<Item = (Token, SimpleSpan)>,
|
||||
eoi: impl Into<SimpleSpan>,
|
||||
arena: &Bump,
|
||||
) -> ParseResult {
|
||||
parse_input(Stream::from_iter(input).spanned(eoi.into()), arena)
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ErrorKind {
|
||||
InvalidToken,
|
||||
UnexpectedEnd,
|
||||
UnexpectedToken,
|
||||
}
|
||||
|
||||
pub type Error = Spanned<ErrorKind>;
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
pub fn parse<'a>(code: &str, arena: &'a Bump) -> Result<&'a [Definition<'a>]> {
|
||||
Parser {
|
||||
pub fn parse_lexer(input: Lexer<Token>, arena: &Bump) -> ParseResult {
|
||||
let end = input.span().end;
|
||||
parse_iter(
|
||||
input
|
||||
.spanned()
|
||||
.map(|(token, span)| (token.unwrap_or(Token::Invalid), span.into())),
|
||||
end..end + 1,
|
||||
arena,
|
||||
lexer: Token::lexer(code),
|
||||
}
|
||||
.run()
|
||||
)
|
||||
}
|
||||
|
||||
fn arena_collect<'a, I, O: 'a>(
|
||||
parser: impl IterParser<'a, I, O, Extra<'a>> + Clone,
|
||||
) -> impl Parser<'a, I, &'a [O], Extra<'a>> + Clone
|
||||
where
|
||||
I: Input<'a, Span = SimpleSpan, Token = Token>,
|
||||
{
|
||||
empty()
|
||||
.map_with_state(|_, _, state: &mut State| bumpalo::vec![in state.arena])
|
||||
.foldl(parser, |mut v, o| {
|
||||
v.push(o);
|
||||
v
|
||||
})
|
||||
.map(bumpalo::collections::Vec::into_bump_slice)
|
||||
}
|
||||
|
||||
fn arena_box<'a, I, O: 'a>(
|
||||
parser: impl Parser<'a, I, O, Extra<'a>> + Clone,
|
||||
) -> impl Parser<'a, I, &'a O, Extra<'a>> + Clone
|
||||
where
|
||||
I: Input<'a, Span = SimpleSpan, Token = Token>,
|
||||
{
|
||||
parser.map_with_state(|item, _, state| &*state.arena.alloc(item))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn merge_spans(start: SimpleSpan, end: SimpleSpan) -> SimpleSpan {
|
||||
SimpleSpan::new(start.start, end.end)
|
||||
}
|
||||
|
|
|
@ -91,6 +91,8 @@ token_def!(
|
|||
"[0-9]+",
|
||||
|l| l.slice().parse::<u64>().ok()
|
||||
)] Int(u64),
|
||||
|
||||
Invalid,
|
||||
}
|
||||
);
|
||||
|
||||
|
|
11
src/utils.rs
11
src/utils.rs
|
@ -2,3 +2,14 @@
|
|||
pub fn default<T: Default>() -> T {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub trait Pipe {
|
||||
fn pipe<R>(self, mut f: impl FnMut(Self) -> R) -> R
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
f(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Pipe for T {}
|
||||
|
|
Loading…
Reference in a new issue