diff --git a/Cargo.lock b/Cargo.lock index 8621c4b..ab1d19f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,31 +25,12 @@ version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" -[[package]] -name = "cc" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" -dependencies = [ - "libc", -] - [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chumsky" -version = "1.0.0-alpha.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc3172a80699de358070dd99f80ea8badc6cdf8ac2417cb5a96e6d81bf5fe06d" -dependencies = [ - "hashbrown", - "stacker", -] - [[package]] name = "fnv" version = "1.0.7" @@ -74,12 +55,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "libc" -version = "0.2.148" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" - [[package]] name = "logos" version = "0.13.0" @@ -127,15 +102,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "psm" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" -dependencies = [ - "cc", -] - [[package]] name = "quote" version = "1.0.33" @@ -156,24 +122,10 @@ name = "rhea" version = "0.1.0" dependencies = [ "bumpalo", - "chumsky", "lasso", "logos", ] -[[package]] -name = "stacker" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "winapi", -] - [[package]] name = "syn" version = "2.0.33" @@ -196,25 +148,3 @@ name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index a11643b..a59dd2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,5 @@ edition = "2021" [dependencies] bumpalo = { version = "3", features = ["collections"] } -chumsky = "1.0.0-alpha" lasso = "0.7" logos = "0.13" diff --git a/src/main.rs b/src/main.rs index da3537e..113ec36 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,21 +1,13 @@ // Rhea -use bumpalo::Bump; -use logos::Logos; -use std::io::{stdin, Read}; -use utils::default; - -mod syntax; mod utils; +use std::io::{stdin, Read}; +use utils::default; fn main() -> Result<(), Box> { let mut buf = default(); stdin().read_to_string(&mut buf)?; - let lexer = syntax::token::Token::lexer_with_extras(&buf, default()); - let arena = Bump::new(); - syntax::parser::parse_lexer(lexer, &arena); - Ok(()) } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs deleted file mode 100644 index fd54f81..0000000 --- a/src/syntax/ast.rs +++ /dev/null @@ -1,83 +0,0 @@ -use {super::token::IntLit, chumsky::span::SimpleSpan, lasso::Spur}; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct Spanned { - pub item: T, - pub span: SimpleSpan, -} - -impl Spanned { - #[inline] - pub fn new(item: T, span: SimpleSpan) -> Self { - Self { item, span } - } -} - -pub type SpanExpr<'a> = Spanned>; -pub type ExprRef<'a> = &'a SpanExpr<'a>; -pub type ExprList<'a> = &'a [SpanExpr<'a>]; -pub type Ident = Spur; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Expr<'a> { - Ident(Ident), - Path(ExprList<'a>), - Literal(Literal), - Call(ExprRef<'a>, ExprList<'a>), - Binary(Spanned, ExprRef<'a>, ExprRef<'a>), - Unary(Spanned, ExprRef<'a>), - BindLocal(Spanned, ExprRef<'a>, Option>), - BindIn( - Spanned, - ExprRef<'a>, - ExprList<'a>, - Option>, - ), - Set(ExprRef<'a>, ExprRef<'a>), - Match(ExprRef<'a>, &'a [(Spanned, SpanExpr<'a>)]), - Func(&'a [(Spanned, Spanned)], Spanned, ExprRef<'a>), - Block(ExprList<'a>), - Unit, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Type { - Ident(Ident), - Unit, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Pattern { - Ident(Ident), - Literal(Literal), - None, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Literal { - String(Spur), - Integer(IntLit), -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum BinaryOperator { - Plus, - Minus, - Star, - Slash, - And, - VLine, - Lt, - Gt, - Equ, - Nequ, - LtEqu, - GtEqu, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum UnaryOperator { - Tilde, - Minus, - Star, -} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs deleted file mode 100644 index 716da41..0000000 --- a/src/syntax/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod ast; -pub mod parser; -pub mod token; diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs deleted file mode 100644 index 3dd8aef..0000000 --- a/src/syntax/parser.rs +++ /dev/null @@ -1,284 +0,0 @@ -use super::ast::Type; - -use { - super::{ - ast::{BinaryOperator, Expr, Literal, Pattern, SpanExpr, Spanned, UnaryOperator}, - token::Token, - }, - crate::utils::Pipe, - bumpalo::Bump, - chumsky::{ - extra::Full, - input::{Stream, ValueInput}, - prelude::*, - }, - logos::Lexer, -}; - -/// Equivalently-named unit variant mapping -macro_rules! equivmap { - ($src:ident, $target:ident, [$variant0:ident $(, $variant:ident)* $(,)?] $(,)?) => { - just($src::$variant0).to($target::$variant0) - $(.or(just($src::$variant).to($target::$variant)))* - }; -} - -fn expr<'a, I>() -> impl Parser<'a, I, SpanExpr<'a>, Extra<'a>> + Clone -where - I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, -{ - recursive(|expr| { - let ident = select!(Token::Ident(id) => id); - - let literal = select! { - Token::Int(a) => Literal::Integer(a), - Token::String(a) => Literal::String(a) - }; - - let pattern = select! { - Token::Ident(id) => Pattern::Ident(id), - Token::Underscore => Pattern::None, - } - .or(literal.map(Pattern::Literal)) - .map_with_span(Spanned::new); - - let type_ = just([Token::LeftParen, Token::RightParen]) - .to(Type::Unit) - .or(ident.map(Type::Ident)) - .map_with_span(Spanned::new); - - let block = expr - .clone() - .separated_by(just(Token::Semicolon)) - .allow_trailing() - .pipe(arena_collect) - .delimited_by(just(Token::LeftCurly), just(Token::RightCurly)); - - let func = just(Token::Func) - .ignore_then( - pattern - .then_ignore(just(Token::Colon)) - .then(type_) - .separated_by(just(Token::Comma)) - .allow_trailing() - .pipe(arena_collect) - .delimited_by(just(Token::LeftParen), just(Token::RightParen)), - ) - .then_ignore(just(Token::Colon)) - .then(type_) - .then( - just(Token::Equ) - .ignore_then(expr.clone()) - .or(block.clone().map(Expr::Block).map_with_span(Spanned::new)), - ) - .map_with_state(|((params, ret), expr), _, state| { - Expr::Func(params, ret, state.arena.alloc(expr)) - }); - - let atom = literal - .map(Expr::Literal) - .or(just([Token::LeftParen, Token::RightParen]).to(Expr::Unit)) - .or(ident.map(Expr::Ident)) - .or(func) - .map_with_span(Spanned::new) - .or(expr - .clone() - .delimited_by(just(Token::LeftParen), just(Token::RightParen))); - - // (expr1, expr2, …) - let call = atom.clone().foldl_with_state( - expr.clone() - .separated_by(just(Token::Comma)) - .allow_trailing() - .pipe(arena_collect) - .delimited_by(just(Token::LeftParen), just(Token::RightParen)) - .map_with_span(Spanned::new) - .repeated(), - |expr, paramlist, state: &mut State| { - Spanned::new( - Expr::Call(state.arena.alloc(expr), paramlist.item), - merge_spans(expr.span, paramlist.span), - ) - }, - ); - - let path = call - .clone() - .map_with_state(|item, _, state| bumpalo::vec![in state.arena; item]) - .foldl( - just(Token::Dot).ignore_then(call).repeated(), - |mut v, expr| { - v.push(expr); - v - }, - ) - .map(|v| Expr::Path(v.into_bump_slice())) - .map_with_span(Spanned::new); - - /* let unary = equivmap!(Token, UnaryOperator, [Minus, Tilde]) - .map_with_span(Spanned::new) - .repeated() - .foldr_with_state(call, |op, expr, state| { - Spanned::new( - Expr::Unary(op, state.arena.alloc(expr)), - merge_spans(op.span, expr.span), - ) - }); - */ - - let unary = path.foldl_with_state( - just([Token::Dot, Token::Star]) - .to(UnaryOperator::Star) - .or(just(Token::Tilde).to(UnaryOperator::Tilde)) - .map_with_span(Spanned::new) - .repeated(), - |expr, op, state| { - Spanned::new( - Expr::Unary(op, state.arena.alloc(expr)), - merge_spans(expr.span, op.span), - ) - }, - ); - - // OP - let binary = unary.clone().foldl_with_state( - equivmap!( - Token, - BinaryOperator, - [Plus, Minus, Star, Slash, And, VLine, Lt, Gt, Equ, Nequ, LtEqu, GtEqu], - ) - .map_with_span(Spanned::new) - .then(unary) - .repeated(), - |l, (op, r), state: &mut State| { - Spanned::new( - Expr::Binary(op, state.arena.alloc(l), state.arena.alloc(r)), - merge_spans(l.span, r.span), - ) - }, - ); - - let bind = { - let start = pattern.then_ignore(just(Token::Colon)).then(expr.clone()); // := - let else_ = just(Token::Else).ignore_then(block.clone()).or_not(); // else {…} - - // := [else {…}] - let local = start.clone().then(else_.clone()).map_with_state( - |((pat, expr), else_), _, state| { - Expr::BindLocal(pat, &*state.arena.alloc(expr), else_) - }, - ); - - // := {…} else {…} - let in_ = start.then(block.clone()).then(else_).map_with_state( - |(((pat, expr), block), else_), _, state| { - Expr::BindIn(pat, &*state.arena.alloc(expr), block, else_) - }, - ); - - in_.or(local) - }; - - // - let set = atom - .clone() - .then_ignore(just(Token::LArrow)) - .then(expr.clone()) - .map_with_state(|(place, expr), _, state| { - Expr::Set(state.arena.alloc(place), state.arena.alloc(expr)) - }); - - // .match { , … } - let match_ = atom - .clone() - .then_ignore(just([Token::Dot, Token::Match])) - .then( - pattern - .then_ignore(just(Token::RArrow)) - .then(expr) - .separated_by(just(Token::Comma)) - .allow_trailing() - .pipe(arena_collect) - .delimited_by(just(Token::LeftCurly), just(Token::RightCurly)), - ) - .map_with_state(|(expr, branches), _, state| { - Expr::Match(state.arena.alloc(expr), branches) - }); - - bind.or(set) - .or(match_) - .or(block.map(Expr::Block)) - .map_with_span(Spanned::new) - .or(binary) - .or(atom) - }) -} - -pub struct State<'a> { - pub arena: &'a Bump, -} - -type Extra<'a> = Full, State<'a>, ()>; -type ParseResult = (); - -pub fn parse_input<'a>( - input: impl ValueInput<'a, Token = Token, Span = SimpleSpan>, - arena: &'a Bump, -) -> ParseResult { - println!( - "{:?}", - expr() - .separated_by(just(Token::Semicolon)) - .allow_trailing() - .pipe(arena_collect) - .parse_with_state(input, &mut State { arena }) - ); -} - -pub fn parse_iter( - input: impl Iterator, - eoi: impl Into, - arena: &Bump, -) -> ParseResult { - parse_input(Stream::from_iter(input).spanned(eoi.into()), arena) -} - -pub fn parse_lexer(input: Lexer, arena: &Bump) -> ParseResult { - let end = input.span().end; - parse_iter( - input - .spanned() - .map(|(token, span)| (token.unwrap_or(Token::Invalid), span.into())), - end..end + 1, - arena, - ) -} - -fn arena_collect<'a, I, O: 'a>( - parser: impl IterParser<'a, I, O, Extra<'a>> + Clone, -) -> impl Parser<'a, I, &'a [O], Extra<'a>> + Clone -where - I: Input<'a, Span = SimpleSpan, Token = Token>, -{ - empty() - .map_with_state(|_, _, state: &mut State| bumpalo::vec![in state.arena]) - .foldl(parser, |mut v, o| { - v.push(o); - v - }) - .map(bumpalo::collections::Vec::into_bump_slice) -} - -fn arena_box<'a, I, O: 'a>( - parser: impl Parser<'a, I, O, Extra<'a>> + Clone, -) -> impl Parser<'a, I, &'a O, Extra<'a>> + Clone -where - I: Input<'a, Span = SimpleSpan, Token = Token>, -{ - parser.map_with_state(|item, _, state| &*state.arena.alloc(item)) -} - -#[inline] -fn merge_spans(start: SimpleSpan, end: SimpleSpan) -> SimpleSpan { - SimpleSpan::new(start.start, end.end) -} diff --git a/src/syntax/token.rs b/src/syntax/token.rs deleted file mode 100644 index 2651e09..0000000 --- a/src/syntax/token.rs +++ /dev/null @@ -1,112 +0,0 @@ -use lasso::Spur; -use logos::Lexer; - -use {lasso::Rodeo, logos::Logos}; - -#[derive(Default)] -pub struct Lextras { - pub interner: Rodeo, -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum IntLit { - Signed(i64), - Unsigned(u64), -} - -#[derive(Logos, Copy, Clone, Debug, PartialEq, Eq)] -#[logos(extras = Lextras)] -#[logos(skip r"[ \t\n\f]+")] -#[logos(skip r"-- .*")] -#[rustfmt::skip] -pub enum Token { - #[token("(")] LeftParen, - #[token(")")] RightParen, - #[token("{")] LeftCurly, - #[token("}")] RightCurly, - #[token(".")] Dot, - #[token(",")] Comma, - #[token(":")] Colon, - #[token(";")] Semicolon, - #[token("_")] Underscore, - - #[token("←")] //____ - #[token("<-")] LArrow, - #[token("→")] //____ - #[token("->")] RArrow, - - #[token(":>")] Pipe, - - #[token("+")] Plus, - #[token("-")] Minus, - #[token("*")] Star, - #[token("/")] Slash, - #[token("&")] And, - #[token("|")] VLine, - #[token("~")] Tilde, - - #[token("<")] Lt, - #[token(">")] Gt, - #[token("=")] Equ, - #[token("≠") ] //__ - #[token("/=")] Nequ, - #[token("≤") ] //___ - #[token("<=")] LtEqu, - #[token("≥") ] //___, - #[token(">=")] GtEqu, - - #[token("match")] Match, - #[token("else")] Else, - #[token("loop")] Loop, - #[token("const")] Const, - #[token("var")] Var, - #[token("func")] Func, - // Modules aren't real here ondra just variables with imported functions - #[token("module")] Module, - - #[regex( - r"\p{XID_Start}\p{XID_Continue}*", - |l| l.extras.interner.get_or_intern(l.slice()) - )] Ident(Spur), - - #[token("»", better_string)] - #[regex( - "\"[^\"]*\"", - |l| { - let slice = l.slice(); - l.extras.interner.get_or_intern(&slice[1..slice.len() - 1]) - } - )] String(Spur), - - #[regex( - "-?[0-9]+", - |l| { - Some(if let Some(slice) = l.slice().strip_prefix('-') { - IntLit::Signed(slice.parse::().ok()?) - } else { - IntLit::Unsigned(l.slice().parse::().ok()?) - }) - } - )] Int(IntLit), - - Invalid, -} - -// For Evy, with love. -fn better_string(lexer: &mut Lexer) -> Option { - let mut count = 1; - for (ix, chr) in lexer.remainder().char_indices() { - match chr { - '«' => count -= 1, - '»' => count += 1, - _ => (), - } - - if count == 0 { - let slice = &lexer.remainder()[..ix]; - lexer.bump(ix + '«'.len_utf8()); - return Some(lexer.extras.interner.get_or_intern(slice)); - } - } - None -} diff --git a/src/utils.rs b/src/utils.rs index 6b4c784..31a900e 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,14 +2,3 @@ pub fn default() -> T { Default::default() } - -pub trait Pipe { - fn pipe(self, mut f: impl FnMut(Self) -> R) -> R - where - Self: Sized, - { - f(self) - } -} - -impl Pipe for T {}