From fce1760198e221075b890be2b7af609bd3ef148f Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Thu, 17 Feb 2022 12:04:52 +0700 Subject: [PATCH] lexer + call --- example/ex.hyc | 4 +- src/front/parse.rs | 187 ++++++++++++++++++++++++++++++++++++--------- src/main.rs | 14 +++- 3 files changed, 163 insertions(+), 42 deletions(-) diff --git a/example/ex.hyc b/example/ex.hyc index 11f9ee5..80ba9bb 100644 --- a/example/ex.hyc +++ b/example/ex.hyc @@ -1,2 +1,2 @@ -fun add a b = a + b; -let foo = add (1, 2); \ No newline at end of file +fun foo a b = a + b; +let res = foo(34, 35); \ No newline at end of file diff --git a/src/front/parse.rs b/src/front/parse.rs index 0c74b5d..7330752 100644 --- a/src/front/parse.rs +++ b/src/front/parse.rs @@ -1,10 +1,101 @@ use chumsky::prelude::*; +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Token { + // Types + Int(i64), Float(String), + Boolean(bool), String(String), + Ident(String), + + // Symbols + Operator(String), + Delimiter(char), + Semicolon, + Assign, Colon, + Comma, + + // Keywords + Let, Fun, +} + +pub type Span = std::ops::Range; +pub fn lexer() -> impl Parser, Error = Simple> { + let int = text::int(10) + .map(|s: String| Token::Int(s.parse().unwrap())); + + let float = text::int(10) + .then_ignore(just('.')) + .chain::(text::digits(10)) + .collect::() + .map(|s: String| Token::Float(s)); + + let string = just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').repeated()) + .then_ignore(just('"')) + .collect::() + .map(|s: String| Token::String(s)); + + let operator = choice(( + just("+"), + just("-"), + just("*"), + just("/"), + just("%"), + + just("!"), + just("=="), + just("!="), + just("<"), + just(">"), + just("<="), + just(">="), + )).map(|c| Token::Operator(c.to_string())); + + let delimiter = choice(( + just('('), + just(')'), + )).map(|c| Token::Delimiter(c)); + + let symbol = choice(( + just(';').to(Token::Semicolon), + just('=').to(Token::Assign), + just(':').to(Token::Colon), + just(',').to(Token::Comma), + )); + + let keyword = text::ident().map(|s: String| match s.as_str() { + "true" => Token::Boolean(true), + "false" => Token::Boolean(false), + + "let" => Token::Let, + "fun" => Token::Fun, + _ => Token::Ident(s), + }); + + let token = int + .or(float) + .or(string) + .or(operator) + .or(delimiter) + .or(symbol) + .or(keyword) + .recover_with(skip_then_retry_until([])); + + let comment = just("//").then(take_until(just('\n'))).padded(); + + token + .padded_by(comment.repeated()) + .map_with_span(|token, span| (token, span)) + .padded() + .repeated() +} + #[derive(Clone, Debug)] pub enum Expr { - Int(i64), - Float(f64), + Int(i64), Float(f64), + Boolean(bool), String(String), Ident(String), + Unary { op: String, expr: Box }, Binary { op: String, left: Box, right: Box }, @@ -18,45 +109,67 @@ pub enum Expr { body: Box, }, Call { - name: String, + name: Box, args: Vec, }, } -fn expr_parser() -> impl Parser> { - let ident = text::ident().padded(); +fn expr_parser() -> impl Parser> + Clone { + let ident = filter_map(|span, token| match token { + Token::Ident(s) => Ok(s.clone()), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), + }).labelled("identifier"); let expr = recursive(|expr| { - let int = text::int(10) - .map(|s: String| Expr::Int(s.parse().unwrap())); - - let float = text::int(10) - .then_ignore(just('.')) - .chain::(text::digits(10)) - .collect::() - .map(|s: String| Expr::Float(s.parse().unwrap())); + let literal = filter_map(|span, token| match token { + Token::Int(i) => Ok(Expr::Int(i)), + Token::Float(f) => Ok(Expr::Float(f.parse().unwrap())), + Token::Boolean(b) => Ok(Expr::Boolean(b)), + Token::String(s) => Ok(Expr::String(s)), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), + }).labelled("literal"); - let call = ident - .then(expr.clone() - .separated_by(just(',')) - .allow_trailing() - .delimited_by(just('('), just(')'))) - .map(|(name, args)| Expr::Call { name, args }); + let items = expr.clone() + .chain(just(Token::Comma) + .ignore_then(expr.clone()).repeated()) + .then_ignore(just(Token::Comma).or_not()) + .or_not() + .map(|item| item.unwrap_or_else(Vec::new)); - let atom = int - .or(float) - .or(call) + let atom = literal .or(ident.map(Expr::Ident)) - .or(expr.delimited_by(just('('), just(')'))) + .or( + expr.clone() + .delimited_by(just(Token::Delimiter('(')), just(Token::Delimiter(')')))) .labelled("atom"); + + let call = atom + .then( + items + .delimited_by( + just(Token::Delimiter('(')), + just(Token::Delimiter(')'))) + .repeated() + ) + .foldl(|f, args| { + Expr::Call { + name: Box::new(f), + args, + } + }); - let unary = choice((just('-'), just('!'))) + let unary = choice(( + just(Token::Operator("-".to_string())).to("-"), + just(Token::Operator("!".to_string())).to("!"))) .repeated() - .then(atom) + .then(call) .foldr(|op, rhs| Expr::Unary { op: op.to_string(), expr: Box::new(rhs) }).labelled("unary"); let factor = unary.clone() - .then(choice((just('*'), just('/'))) + .then( + choice(( + just(Token::Operator("*".to_string())).to("*"), + just(Token::Operator("/".to_string())).to("/"))) .then(unary) .repeated()) .foldl(|lhs, (op, rhs)| Expr::Binary { @@ -66,7 +179,10 @@ fn expr_parser() -> impl Parser> { }).labelled("factor"); let term = factor.clone() - .then(choice((just('+'), just('-'))) + .then( + choice(( + just(Token::Operator("+".to_string())).to("+"), + just(Token::Operator("-".to_string())).to("-"))) .then(factor) .repeated()) .foldl(|lhs, (op, rhs)| Expr::Binary { @@ -75,26 +191,26 @@ fn expr_parser() -> impl Parser> { right: Box::new(rhs) }).labelled("term"); - term.padded() + term }).labelled("expression"); let declare = recursive(|decl| { - let declare_var = text::keyword("let") + let declare_var = just(Token::Let) .ignore_then(ident) - .then_ignore(just('=')) + .then_ignore(just(Token::Assign)) .then(expr.clone()) - .then_ignore(just(';')) + .then_ignore(just(Token::Semicolon)) .map(|(name, rhs)| Expr::Let { name, value: Box::new(rhs), }); - let declare_fun = text::keyword("fun") + let declare_fun = just(Token::Fun) .ignore_then(ident) .then(ident.repeated()) - .then_ignore(just('=')) + .then_ignore(just(Token::Assign)) .then(expr.clone()) - .then_ignore(just(';')) + .then_ignore(just(Token::Semicolon)) .map(|((name, args), body)| Expr::Fun { name, args, @@ -104,13 +220,12 @@ fn expr_parser() -> impl Parser> { declare_var .or(declare_fun) .or(expr) - .padded() }); declare } -pub fn parser() -> impl Parser, Error = Simple> { +pub fn parser() -> impl Parser, Error = Simple> + Clone { expr_parser() .repeated() .then_ignore(end()) diff --git a/src/main.rs b/src/main.rs index 40622a5..b714f95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use std::fs; -use chumsky::Parser; +use chumsky::{Parser, Stream}; use clap::Parser as ArgParser; /// Arguments handler. @@ -10,15 +10,21 @@ use args::{Args, Options}; /// Front-end of the language. /// Contains lexer, parser and token types. pub mod front; -use front::parse::parser; +use front::parse::{lexer, parser}; fn main() { let args = Args::parse(); match args.options { Options::Compile { input: src, ast: _print_ast } => { let src = fs::read_to_string(src).expect("Failed to read file"); - let tokens = parser().parse_recovery(src.as_str()); - println!("{:?}", tokens); + let (tokens, lex_error) = lexer().parse_recovery(src.as_str()); + let len = src.chars().count(); + let (ast, parse_error) = parser().parse_recovery(Stream::from_iter(len..len + 1, tokens.clone().unwrap().into_iter())); + if parse_error.is_empty() { + println!("{:#?}", ast); + } else { + println!("{:?}", parse_error); + } }, } } \ No newline at end of file