From de94dfdfbad2c22f04f5c2ac7c1e405328de514f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 13 Sep 2023 22:44:03 +0200 Subject: [PATCH] Paths --- src/syntax/ast.rs | 16 +++- src/syntax/parser.rs | 181 ++++++++++++++++++++++++++++++++++++------- src/syntax/token.rs | 31 +++++++- 3 files changed, 194 insertions(+), 34 deletions(-) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 45fb1cf..fd54f81 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -26,22 +26,30 @@ pub enum Expr<'a> { Call(ExprRef<'a>, ExprList<'a>), Binary(Spanned, ExprRef<'a>, ExprRef<'a>), Unary(Spanned, ExprRef<'a>), - BindLocal(ExprRef<'a>, Spanned, Option>), + BindLocal(Spanned, ExprRef<'a>, Option>), BindIn( - ExprRef<'a>, Spanned, + ExprRef<'a>, ExprList<'a>, Option>, ), - Match(ExprRef<'a>, &'a [(Spanned, SpanExpr<'a>)]), Set(ExprRef<'a>, ExprRef<'a>), - Func(Ident, &'a [Spanned], ExprRef<'a>), + Match(ExprRef<'a>, &'a [(Spanned, SpanExpr<'a>)]), + Func(&'a [(Spanned, Spanned)], Spanned, ExprRef<'a>), Block(ExprList<'a>), + Unit, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Type { + Ident(Ident), + Unit, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Pattern { Ident(Ident), + Literal(Literal), None, } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 78b7b31..efd1e72 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -1,8 +1,8 @@ -use super::ast::{BinaryOperator, Literal, SpanExpr, UnaryOperator}; +use super::ast::Type; use { super::{ - ast::{Expr, Spanned}, + ast::{BinaryOperator, Expr, Literal, Pattern, SpanExpr, Spanned, UnaryOperator}, token::Token, }, crate::utils::Pipe, @@ -28,39 +28,107 @@ where I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, { recursive(|expr| { - let ident = select!(Token::Ident(id) => Expr::Ident(id)); + let ident = select!(Token::Ident(id) => id); let literal = select! { Token::Int(a) => Literal::Integer(a), Token::String(a) => Literal::String(a) + }; + + let pattern = select! { + Token::Ident(id) => Pattern::Ident(id), + Token::Underscore => Pattern::None, } - .map(Expr::Literal); + .or(literal.map(Pattern::Literal)) + .map_with_span(Spanned::new); - let atom = literal.or(ident).map_with_span(Spanned::new).or(expr - .clone() - .delimited_by(just(Token::LeftParen), just(Token::RightParen))); + let type_ = just([Token::LeftParen, Token::RightParen]) + .to(Type::Unit) + .or(ident.map(Type::Ident)) + .map_with_span(Spanned::new); - let call = atom + let block = expr .clone() - .foldl_with_state( - expr.clone() + .separated_by(just(Token::Semicolon)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftCurly), just(Token::RightCurly)); + + let func = just(Token::Func) + .ignore_then( + pattern + .then_ignore(just(Token::Colon)) + .then(type_) .separated_by(just(Token::Comma)) .allow_trailing() .pipe(arena_collect) - .delimited_by(just(Token::LeftParen), just(Token::RightParen)) - .map_with_span(Spanned::new) - .repeated(), - |expr, paramlist, state: &mut State| { - Spanned::new( - Expr::Call(state.arena.alloc(expr), paramlist.item), - merge_spans(expr.span, paramlist.span), - ) + .delimited_by(just(Token::LeftParen), just(Token::RightParen)), + ) + .then_ignore(just(Token::Colon)) + .then(type_) + .then( + just(Token::Equ) + .ignore_then(expr.clone()) + .or(block.clone().map(Expr::Block).map_with_span(Spanned::new)), + ) + .map_with_state(|((params, ret), expr), _, state| { + Expr::Func(params, ret, state.arena.alloc(expr)) + }); + + let atom = literal + .map(Expr::Literal) + .or(just([Token::LeftParen, Token::RightParen]).to(Expr::Unit)) + .or(ident.map(Expr::Ident)) + .or(func) + .map_with_span(Spanned::new) + .or(expr + .clone() + .delimited_by(just(Token::LeftParen), just(Token::RightParen))); + + // (expr1, expr2, …) + let call = atom.clone().foldl_with_state( + expr.clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftParen), just(Token::RightParen)) + .map_with_span(Spanned::new) + .repeated(), + |expr, paramlist, state: &mut State| { + Spanned::new( + Expr::Call(state.arena.alloc(expr), paramlist.item), + merge_spans(expr.span, paramlist.span), + ) + }, + ); + + let path = call + .clone() + .map_with_state(|item, _, state| bumpalo::vec![in state.arena; item]) + .foldl( + just(Token::Dot).ignore_then(call).repeated(), + |mut v, expr| { + v.push(expr); + v }, ) - .boxed(); + .map(|v| Expr::Path(v.into_bump_slice())) + .map_with_span(Spanned::new); - let unary = call; + /* let unary = equivmap!(Token, UnaryOperator, [Minus, Tilde]) + .map_with_span(Spanned::new) + .repeated() + .foldr_with_state(call, |op, expr, state| { + Spanned::new( + Expr::Unary(op, state.arena.alloc(expr)), + merge_spans(op.span, expr.span), + ) + }); + */ + let unary = path; + + // OP let binary = unary.clone().foldl_with_state( equivmap!( Token, @@ -78,31 +146,92 @@ where }, ); - binary.or(atom) + let bind = { + let start = pattern.then_ignore(just(Token::Colon)).then(expr.clone()); // := + let else_ = just(Token::Else).ignore_then(block.clone()).or_not(); // else {…} + + // := [else {…}] + let local = start.clone().then(else_.clone()).map_with_state( + |((pat, expr), else_), _, state| { + Expr::BindLocal(pat, &*state.arena.alloc(expr), else_) + }, + ); + + // := {…} else {…} + let in_ = start.then(block.clone()).then(else_).map_with_state( + |(((pat, expr), block), else_), _, state| { + Expr::BindIn(pat, &*state.arena.alloc(expr), block, else_) + }, + ); + + in_.or(local) + }; + + // + let set = atom + .clone() + .then_ignore(just(Token::LArrow)) + .then(expr.clone()) + .map_with_state(|(place, expr), _, state| { + Expr::Set(state.arena.alloc(place), state.arena.alloc(expr)) + }); + + // .match { , … } + let match_ = atom + .clone() + .then_ignore(just([Token::Dot, Token::Match])) + .then( + pattern + .then_ignore(just(Token::RArrow)) + .then(expr) + .separated_by(just(Token::Comma)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftCurly), just(Token::RightCurly)), + ) + .map_with_state(|(expr, branches), _, state| { + Expr::Match(state.arena.alloc(expr), branches) + }); + + bind.or(set) + .or(match_) + .or(block.map(Expr::Block)) + .map_with_span(Spanned::new) + .or(binary) + .or(atom) }) } pub struct State<'a> { pub arena: &'a Bump, } -pub type Extra<'a> = Full, State<'a>, ()>; + +type Extra<'a> = Full, State<'a>, ()>; +type ParseResult = (); pub fn parse_input<'a>( input: impl ValueInput<'a, Token = Token, Span = SimpleSpan>, arena: &'a Bump, -) { - println!("{:?}", expr().parse_with_state(input, &mut State { arena })); +) -> ParseResult { + println!( + "{:?}", + expr() + .separated_by(just(Token::Semicolon)) + .allow_trailing() + .pipe(arena_collect) + .parse_with_state(input, &mut State { arena }) + ); } pub fn parse_iter( input: impl Iterator, eoi: impl Into, arena: &Bump, -) { +) -> ParseResult { parse_input(Stream::from_iter(input).spanned(eoi.into()), arena) } -pub fn parse_lexer(input: Lexer, arena: &Bump) { +pub fn parse_lexer(input: Lexer, arena: &Bump) -> ParseResult { let end = input.span().end; parse_iter( input diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 6ce9a45..2651e09 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,4 +1,5 @@ use lasso::Spur; +use logos::Lexer; use {lasso::Rodeo, logos::Logos}; @@ -25,12 +26,14 @@ pub enum Token { #[token("}")] RightCurly, #[token(".")] Dot, #[token(",")] Comma, + #[token(":")] Colon, #[token(";")] Semicolon, - #[token(":=")] //__ - #[token("≔") ] Bind, + #[token("_")] Underscore, - #[token("←")] //_ - #[token("<-")] Set, + #[token("←")] //____ + #[token("<-")] LArrow, + #[token("→")] //____ + #[token("->")] RArrow, #[token(":>")] Pipe, @@ -66,6 +69,7 @@ pub enum Token { |l| l.extras.interner.get_or_intern(l.slice()) )] Ident(Spur), + #[token("»", better_string)] #[regex( "\"[^\"]*\"", |l| { @@ -87,3 +91,22 @@ pub enum Token { Invalid, } + +// For Evy, with love. +fn better_string(lexer: &mut Lexer) -> Option { + let mut count = 1; + for (ix, chr) in lexer.remainder().char_indices() { + match chr { + '«' => count -= 1, + '»' => count += 1, + _ => (), + } + + if count == 0 { + let slice = &lexer.remainder()[..ix]; + lexer.bump(ix + '«'.len_utf8()); + return Some(lexer.extras.interner.get_or_intern(slice)); + } + } + None +}