From e6bbbdd5bd797a3210f792ad49cd27eedabf26e8 Mon Sep 17 00:00:00 2001 From: azur Date: Wed, 1 Mar 2023 02:23:45 +0700 Subject: [PATCH] parser again --- b.hlm | 7 +- src/main.rs | 86 ++++++++++--------- src/parse/parse.rs | 206 +++++++++++++++++++++++++++++++++++++++++++++ src/parse/past.rs | 17 ++-- src/trans/ast.rs | 13 ++- src/trans/js.rs | 9 ++ src/trans/low.rs | 31 ++++--- src/trans/ty.rs | 2 + 8 files changed, 304 insertions(+), 67 deletions(-) diff --git a/b.hlm b/b.hlm index 2142a2b..aa8e810 100644 --- a/b.hlm +++ b/b.hlm @@ -1,5 +1,4 @@ -let foo : num = 1 in bar(foo) end +println((\x: num -> x + 35)(34)); +16---1*3/-f(16)+8%-2; -lambda (foo : num) -> unknown = bar(foo) - -let x : t = e1 in e2 end \ No newline at end of file +a(b, c(d(e, f), g(h), i), j(k, l), m); \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 12308f2..9b8cf90 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,54 +2,58 @@ pub mod parse; pub mod trans; -use parse::parse::lex; +use parse::parse::{lex, parse}; +use trans::low::{translate_expr, translate_js}; fn main() { - let input = r#" - println((\x: int -> x + 1)(1)); - "#; + let path = std::env::args().nth(1).expect("No file path provided"); + let src = std::fs::read_to_string(path).expect("Failed to read file"); - let tokens = lex(input.to_owned()); - println!("{:?}", tokens); + let (tokens, lex_errs) = lex(src.to_owned()); - // use parse::past::*; - // use trans::ty::Type; - // use trans::low::*; + let parse_errs = if let Some(tokens) = tokens { + let (ast, parse_errs) = parse(tokens, src.len()); - // let exprs = vec![ - // PExpr::Call(Box::new(PExpr::Sym("println".to_string())), vec![ - // PExpr::Str("Hello, world!".to_string()), - // ]), - // PExpr::Let { - // vars: vec![ - // ("x".to_string(), Type::Num, PExpr::Num(1)), - // ], - // body: Box::new(PExpr::Sym("x".to_string())), - // }, - // PExpr::Let { - // vars: vec![ - // ("x".to_string(), Type::Num, PExpr::Num(34)), - // ("y".to_string(), Type::Num, PExpr::Num(35)), - // ], - // body: Box::new(PExpr::BinaryOp( - // PBinaryOp::Add, - // Box::new(PExpr::Sym("x".to_string())), - // Box::new(PExpr::Sym("y".to_string())), - // )), - // }, - // ]; + if let Some(ast) = ast { + println!(); + println!("\x1b[90m───SOURCE─────────────────────────────────────────\x1b[0m"); + println!("{src}"); + println!("\x1b[90m───PARSE TREE─────────────────────────────────────\x1b[0m"); + for (e, _) in &ast { + println!("{}", { + let e = format!("{:?}", e); + if e.len() > 50 { + format!("{}...", &e[..47]) + } else { + e + } + }); + } + println!("\x1b[90m───INTERNAL AST───────────────────────────────────\x1b[0m"); + let nexprs = ast.into_iter().map(|(e, _)| translate_expr(e)).collect::>(); - // let nexprs = exprs.into_iter().map(translate_expr).collect::>(); + for expr in &nexprs { + println!("{}", expr); + } + println!("\x1b[90m───JS OUTPUT──────────────────────────────────────\x1b[0m"); + let jsexprs = nexprs.into_iter().map(translate_js).collect::>(); - // for expr in &nexprs { - // println!("{}", expr); - // } + for expr in &jsexprs { + println!("{}", expr); + } + println!(); + } - // println!("──────────────────────────────────────────────────"); + parse_errs + } else { + Vec::new() + }; - // let jsexprs = nexprs.into_iter().map(translate_js).collect::>(); - - // for expr in &jsexprs { - // println!("{}", expr); - // } + if !lex_errs.is_empty() || !parse_errs.is_empty() { + lex_errs + .into_iter() + .map(|e| e.map(|c| c.to_string())) + .chain(parse_errs.into_iter().map(|e| e.map(|t| t.to_string()))) + .for_each(|e| println!("{}", e)); + } } diff --git a/src/parse/parse.rs b/src/parse/parse.rs index 4210668..be97e13 100644 --- a/src/parse/parse.rs +++ b/src/parse/parse.rs @@ -1,6 +1,8 @@ #![allow(clippy::type_complexity)] use chumsky::{error, prelude::*, Stream}; use std::fmt::{Display, Formatter, Result as FmtResult}; +use crate::trans::ty::Type; + use super::past::*; #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] @@ -174,6 +176,32 @@ pub fn symbol_parser() -> impl P { .labelled("symbol") } +pub fn type_parser() -> impl P { + recursive(|ty| { + let litty = symbol_parser().map(|s| match s.as_str() { + "num" => Type::Num, + "str" => Type::Str, + "bool" => Type::Bool, + "?" => Type::Unknown, + _ => Type::Sym(s), + }); + + let fun = just(Token::Open(Delim::Paren)) + .ignore_then( + ty.clone() + .separated_by(just(Token::Comma)) + ) + .then_ignore(just(Token::Close(Delim::Paren))) + .then_ignore(just(Token::Arrow)) + .then(ty) + .map(|(args, ret)| Type::Fun(args, Box::new(ret))); + + litty + .or(fun) + .labelled("type") + }) +} + pub fn nested_parser<'a, T: 'a>( parser: impl P + 'a, delim: Delim, @@ -202,3 +230,181 @@ pub fn nested_parser<'a, T: 'a>( )) .boxed() } + +pub fn expr_parser() -> impl P> { + recursive(|expr: Recursive, Simple>| { + let lit = literal_parser().map(PExpr::Lit); + let sym = symbol_parser().map(PExpr::Sym); + + let vec = nested_parser( + expr.clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .map(Some), + Delim::Brack, + |_| None, + ) + .map(|xs| match xs { + Some(xs) => PExpr::Vec(xs), + None => PExpr::Vec(Vec::new()), + }) + .labelled("vector"); + + // (e) + let paren_expr = just(Token::Open(Delim::Paren)) + .ignore_then(expr.clone()) + .then_ignore(just(Token::Close(Delim::Paren))) + .map(|e| e.0) + .labelled("parenthesized expression"); + + // \[sym : type]* -> expr + let lam = just(Token::Lambda) + .ignore_then( + ( + symbol_parser() + .then_ignore(just(Token::Colon)) + .then(type_parser()) + ) + .repeated() + ) + .then_ignore(just(Token::Arrow)) + .then(expr.clone()) + .map(|(args, body)| PExpr::Lambda { + args, + body: Box::new(body), + }) + .labelled("lambda"); + + let atom = lit + .or(sym) + .or(vec) + .or(paren_expr) + .or(lam) + .map_with_span(|e, s| (e, s)) + .boxed() + .labelled("atom"); + + // e(e*) + let call = atom + .then( + nested_parser( + expr.clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .map(Some), + Delim::Paren, + |_| None, + ) + .or_not(), + ) + .map_with_span(|(f, args), s| match args { + Some(Some(args)) => (PExpr::Call(Box::new(f), args), s), + Some(None) => (PExpr::Error, s), + None => f, + }); + + // op e + let unary = choice(( + just(Token::Sub).to(PUnaryOp::Neg), + just(Token::Not).to(PUnaryOp::Not), + )) + .map_with_span(|op, s| (op, s)) + .repeated() + .then(call) + .foldr(|op, expr| { + let s = op.1.start()..expr.1.end(); + (PExpr::Unary(op, Box::new(expr)), s) + }) + .boxed(); + + let product = unary + .clone() + .then( + choice(( + just(Token::Mul).to(PBinaryOp::Mul), + just(Token::Div).to(PBinaryOp::Div), + just(Token::Mod).to(PBinaryOp::Mod), + )) + .map_with_span(|op, s| (op, s)) + .then(unary) + .repeated(), + ) + .foldl(|lhs, (op, rhs)| { + let s = lhs.1.start()..rhs.1.end(); + (PExpr::Binary(op, Box::new(lhs), Box::new(rhs)), s) + }) + .boxed(); + + let sum = product + .clone() + .then( + choice(( + just(Token::Add).to(PBinaryOp::Add), + just(Token::Sub).to(PBinaryOp::Sub), + )) + .map_with_span(|op, s| (op, s)) + .then(product) + .repeated(), + ) + .foldl(|lhs, (op, rhs)| { + let s = lhs.1.start()..rhs.1.end(); + (PExpr::Binary(op, Box::new(lhs), Box::new(rhs)), s) + }) + .boxed(); + + let comparison = sum + .clone() + .then( + choice(( + just(Token::Eq).to(PBinaryOp::Eq), + just(Token::Neq).to(PBinaryOp::Neq), + just(Token::Lt).to(PBinaryOp::Lt), + just(Token::Lte).to(PBinaryOp::Lte), + just(Token::Gt).to(PBinaryOp::Gt), + just(Token::Gte).to(PBinaryOp::Gte), + )) + .map_with_span(|op, s| (op, s)) + .then(sum) + .repeated(), + ) + .foldl(|lhs, (op, rhs)| { + let s = lhs.1.start()..rhs.1.end(); + (PExpr::Binary(op, Box::new(lhs), Box::new(rhs)), s) + }) + .boxed(); + + comparison + .clone() + .then( + choice(( + just(Token::And).to(PBinaryOp::And), + just(Token::Or).to(PBinaryOp::Or), + )) + .map_with_span(|op, s| (op, s)) + .then(comparison) + .repeated(), + ) + .foldl(|lhs, (op, rhs)| { + let s = lhs.1.start()..rhs.1.end(); + (PExpr::Binary(op, Box::new(lhs), Box::new(rhs)), s) + }) + .boxed() + }) +} + +pub fn exprs_parser() -> impl P>> { + expr_parser() + .then_ignore(just(Token::Semicolon)) + .repeated() +} + +pub fn parse( + tokens: Vec>, + len: usize, +) -> (Option>>, Vec>) { + let (ast, parse_error) = exprs_parser() + .then_ignore(end()) + .parse_recovery(Stream::from_iter(len..len + 1, tokens.into_iter())); + + (ast, parse_error) +} \ No newline at end of file diff --git a/src/parse/past.rs b/src/parse/past.rs index 4c02d9c..78900f8 100644 --- a/src/parse/past.rs +++ b/src/parse/past.rs @@ -1,6 +1,8 @@ use std::fmt::{Display, Formatter, Result as FmtResult}; use crate::trans::ty::*; +use super::parse::Spanned; + #[derive(Clone, Debug)] pub enum PUnaryOp { Neg, @@ -20,21 +22,22 @@ pub enum PLiteral { Num(i64), Str(String), Bool(bool) } /// Enum to represent a parsed expression #[derive(Clone, Debug)] pub enum PExpr { + Error, + Lit(PLiteral), Sym(String), + Vec(Vec>), - Vec(Vec), + Unary(Spanned, Box>), + Binary(Spanned, Box>, Box>), - UnaryOp(PUnaryOp, Box), - BinaryOp(PBinaryOp, Box, Box), - - Call(Box, Vec), + Call(Box>, Vec>), Lambda { args: Vec<(String, Type)>, - body: Box, + body: Box>, }, Let { vars: Vec<(String, Type, Self)>, body: Box, - } + }, } \ No newline at end of file diff --git a/src/trans/ast.rs b/src/trans/ast.rs index f3485bc..c3b4442 100644 --- a/src/trans/ast.rs +++ b/src/trans/ast.rs @@ -24,6 +24,7 @@ pub enum Literal { pub enum Expr { Lit(Literal), Sym(String), + Vec(Vec), UnaryOp(UnaryOp, Box), BinaryOp(BinaryOp, Box, Box), @@ -44,9 +45,17 @@ impl Display for Expr { Literal::Bool(b) => write!(f, "{}", b), }, Expr::Sym(s) => write!(f, "{}", s), + Expr::Vec(v) => { + write!(f, "[")?; + for (i, e) in v.iter().enumerate() { + if i > 0 { write!(f, " ")?; } + write!(f, "{}", e)?; + } + write!(f, "]") + }, - Expr::UnaryOp(op, e) => write!(f, "({:?} {})", op, e), - Expr::BinaryOp(op, e1, e2) => write!(f, "({:?} {} {})", op, e1, e2), + Expr::UnaryOp(op, e) => write!(f, "({} {})", format!("{:?}", op).to_lowercase(), e), + Expr::BinaryOp(op, e1, e2) => write!(f, "({} {} {})", format!("{:?}", op).to_lowercase(), e1, e2), Expr::Call(c, args) => { write!(f, "({}", c)?; diff --git a/src/trans/js.rs b/src/trans/js.rs index 06c654f..9329cfe 100644 --- a/src/trans/js.rs +++ b/src/trans/js.rs @@ -9,6 +9,7 @@ pub enum JSLiteral { Num(i64), Str(String), Bool(bool) } pub enum JSExpr { Lit(JSLiteral), Sym(String), + Array(Vec), Op(&'static str, Box, Option>), @@ -29,6 +30,14 @@ impl Display for JSExpr { JSLiteral::Bool(b) => write!(f, "{}", b), }, JSExpr::Sym(s) => write!(f, "{}", s), + JSExpr::Array(v) => { + write!(f, "[")?; + for (i, e) in v.iter().enumerate() { + if i > 0 { write!(f, ", ")?; } + write!(f, "{}", e)?; + } + write!(f, "]") + }, JSExpr::Op(op, lhs, rhs) => { match rhs { diff --git a/src/trans/low.rs b/src/trans/low.rs index 224b591..11283ca 100644 --- a/src/trans/low.rs +++ b/src/trans/low.rs @@ -6,18 +6,21 @@ use super::{ pub fn translate_expr(expr: PExpr) -> Expr { match expr { + PExpr::Error => panic!("Error in expression!"), + PExpr::Lit(l) => Expr::Lit(match l { PLiteral::Num(n) => Literal::Num(n), PLiteral::Str(s) => Literal::Str(s), PLiteral::Bool(b) => Literal::Bool(b), }), - PExpr::Sym(s) => Expr::Sym(s), + PExpr::Sym(s) => Expr::Sym(s), + PExpr::Vec(v) => Expr::Vec(v.into_iter().map(|e| translate_expr(e.0)).collect()), - PExpr::UnaryOp(op, e) => Expr::UnaryOp(match op { + PExpr::Unary(op, e) => Expr::UnaryOp(match op.0 { PUnaryOp::Neg => UnaryOp::Neg, PUnaryOp::Not => UnaryOp::Not, - }, Box::new(translate_expr(*e))), - PExpr::BinaryOp(op, e1, e2) => Expr::BinaryOp( + }, Box::new(translate_expr((*e).0))), + PExpr::Binary((op, _), e1, e2) => Expr::BinaryOp( match op { PBinaryOp::Add => BinaryOp::Add, PBinaryOp::Sub => BinaryOp::Sub, @@ -36,33 +39,34 @@ pub fn translate_expr(expr: PExpr) -> Expr { PBinaryOp::And => BinaryOp::And, PBinaryOp::Or => BinaryOp::Or, }, - Box::new(translate_expr(*e1)), - Box::new(translate_expr(*e2)), + Box::new(translate_expr((*e1).0)), + Box::new(translate_expr((*e2).0)), ), PExpr::Call(f, args) => Expr::Call( - Box::new(translate_expr(*f)), - args.into_iter().map(translate_expr).collect(), + Box::new(translate_expr((*f).0)), + args.into_iter().map(|a| translate_expr(a.0)).collect(), ), PExpr::Lambda { args, body } => Expr::Lambda { args, - body: Box::new(translate_expr(*body)), + body: Box::new(translate_expr((*body).0)), }, PExpr::Let { vars, body } => { - let mut expr = *body; // The expression we're building up + let mut expr: Expr = translate_expr(*body); // The expression we're building up for (name, ty, val) in vars.into_iter().rev() { // Reverse so we can build up the lambda // e.g.: let x : t = e1 in e2 end => (lambda (x : t) = e2)(e1) // Build up the lambda - expr = PExpr::Lambda { + expr = Expr::Lambda { args: vec![(name, ty)], body: Box::new(expr), }; // Call the lambda with the value - expr = PExpr::Call(Box::new(expr), vec![val]); + let val = translate_expr(val); + expr = Expr::Call(Box::new(expr), vec![val]); } - translate_expr(expr) + expr } } } @@ -75,6 +79,7 @@ pub fn translate_js(expr: Expr) -> JSExpr { Literal::Bool(b) => JSExpr::Lit(JSLiteral::Bool(b)), }, Expr::Sym(s) => JSExpr::Sym(s), + Expr::Vec(v) => JSExpr::Array(v.into_iter().map(translate_js).collect()), Expr::UnaryOp(op, e) => JSExpr::Op(match op { UnaryOp::Neg => "-", diff --git a/src/trans/ty.rs b/src/trans/ty.rs index 74988da..841de62 100644 --- a/src/trans/ty.rs +++ b/src/trans/ty.rs @@ -3,6 +3,7 @@ use std::fmt::{Display, Formatter, Result as FmtResult}; #[derive(Clone, Debug)] pub enum Type { Num, Str, Bool, + Sym(String), Fun(Vec, Box), Unknown, } @@ -13,6 +14,7 @@ impl Display for Type { Type::Num => write!(f, "num"), Type::Str => write!(f, "str"), Type::Bool => write!(f, "bool"), + Type::Sym(s) => write!(f, "{}", s), Type::Fun(args, ret) => { write!(f, "(")?; for (i, arg) in args.iter().enumerate() {