From 91f89d7ef6994410b7e176a3ffdb7218a9a1ecf7 Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Wed, 16 Feb 2022 22:36:33 +0700 Subject: [PATCH] rewrote with chumsky --- Cargo.lock | 103 ++++++++++++--- Cargo.toml | 2 +- README.md | 11 +- example/ex.hyc | 20 +-- src/front/helper.rs | 31 ----- src/front/lex.rs | 157 ---------------------- src/front/mod.rs | 6 +- src/front/model.rs | 162 ----------------------- src/front/parse.rs | 117 +++++++++++++++++ src/front/parser.rs | 307 -------------------------------------------- src/main.rs | 18 +-- 11 files changed, 212 insertions(+), 722 deletions(-) delete mode 100644 src/front/helper.rs delete mode 100644 src/front/lex.rs delete mode 100644 src/front/model.rs create mode 100644 src/front/parse.rs delete mode 100644 src/front/parser.rs diff --git a/Cargo.lock b/Cargo.lock index 9ccfa34..8bbedb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" +dependencies = [ + "const-random", +] + [[package]] name = "atty" version = "0.2.14" @@ -25,6 +34,21 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" +dependencies = [ + "ahash", +] + [[package]] name = "clap" version = "3.0.14" @@ -55,6 +79,45 @@ dependencies = [ "syn", ] +[[package]] +name = "const-random" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" +dependencies = [ + "getrandom", + "lazy_static", + "proc-macro-hack", + "tiny-keccak", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "getrandom" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.11.2" @@ -80,8 +143,8 @@ dependencies = [ name = "hycron" version = "0.1.0" dependencies = [ + "chumsky", "clap", - "nom", ] [[package]] @@ -112,23 +175,6 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" -dependencies = [ - "memchr", - "minimal-lexical", - "version_check", -] - [[package]] name = "os_str_bytes" version = "6.0.0" @@ -162,6 +208,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.36" @@ -212,6 +264,15 @@ version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80" +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "unicode-xid" version = "0.2.2" @@ -224,6 +285,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index f2faba4..0eff5bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,4 @@ edition = "2021" [dependencies] clap = { version = "3.0.14", features = ["derive"] } -nom = "7.1.0" \ No newline at end of file +chumsky = "0.8.0" \ No newline at end of file diff --git a/README.md b/README.md index 2781401..00b68b5 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,7 @@ # Hycron Programming language -``` -func main :: () -> Int = { - let msg :: String = "Hello, World"; - puts(msg); - return 1; -}; -``` - # TODO - Compliation - Optimization -- Use [chumsky](https://github.com/zesterer/chumsky) instead of [nom](https://github.com/Geal/nom) for parsing -- Error reporting (better with chumsky) \ No newline at end of file +- Error reporting \ No newline at end of file diff --git a/example/ex.hyc b/example/ex.hyc index edcb1f1..11f9ee5 100644 --- a/example/ex.hyc +++ b/example/ex.hyc @@ -1,18 +1,2 @@ -import "path/to/library.hyc"; - -// user defined function -func foo :: (a, b) -> Bool = { - return a == b; -}; - -// entry point -func main :: () -> Int = { - // if else in variable definition - let cond_str :: String = if foo(1, 1) { return "t" } else { return "f" }; - - // Infix operator - let n :: Bool = 2 == 2; - // Prefix operator - let m :: Bool = !n; - puts(m); -}; \ No newline at end of file +fun add a b = a + b; +let foo = add (1, 2); \ No newline at end of file diff --git a/src/front/helper.rs b/src/front/helper.rs deleted file mode 100644 index 452371e..0000000 --- a/src/front/helper.rs +++ /dev/null @@ -1,31 +0,0 @@ -use std::str::{self, Utf8Error, FromStr}; - -pub type Bytes = [u8]; - -#[macro_export] -macro_rules! syntax { - ($func_name: ident, $tag_string: literal, $output_token: expr) => { - fn $func_name<'a>(s: &'a Bytes) -> IResult<&Bytes, Token> { - map(tag($tag_string), |_| $output_token)(s) - } - }; -} - -pub fn concat_slice_vec(a: &Bytes, b: Vec) -> Vec { - let mut result = a.to_vec(); - result.extend(&b); - result -} - -pub fn convert_vec_utf8(v: Vec) -> Result { - let slice = v.as_slice(); - str::from_utf8(slice).map(|s| s.to_owned()) -} - -pub fn str_from_bytes(c: &Bytes) -> Result<&str, Utf8Error> { - str::from_utf8(c) -} - -pub fn str_to_from_str(c: &str) -> Result { - FromStr::from_str(c) -} \ No newline at end of file diff --git a/src/front/lex.rs b/src/front/lex.rs deleted file mode 100644 index 4344069..0000000 --- a/src/front/lex.rs +++ /dev/null @@ -1,157 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take, take_until}, - character::complete::{multispace0, alphanumeric1, alpha1, digit1}, - combinator::{map, map_res, recognize}, - IResult, - multi::many0, - sequence::{delimited, pair}, AsBytes, -}; - -use crate::syntax; -use super::{ - model::Token, - helper::{Bytes, convert_vec_utf8, concat_slice_vec, str_from_bytes, str_to_from_str}, -}; - -// Comparison -syntax! { equal_operator , "==", Token::Eq } -syntax! { not_equal_operator , "!=", Token::NEq } -syntax! { less_than_operator , "<" , Token::Lt } -syntax! { greater_than_operator , ">" , Token::Gt } -syntax! { less_than_equal_operator , "<=", Token::Lte } -syntax! { greater_than_equal_operator , ">=", Token::Gte } - -// Arithmetic -syntax! { assign_operator , "=", Token::Assign } -syntax! { add_operator , "+", Token::Plus } -syntax! { subtract_operator , "-", Token::Minus } -syntax! { multiply_operator , "*", Token::Mul } -syntax! { divide_operator , "/", Token::Div } -syntax! { not_operator , "!", Token::Not } - -// Punctuations -syntax! { typehint_punctuation , "::", Token::Typehint } -syntax! { returnhint_punctuation , "->", Token::Return } -syntax! { lparen_punctuation , "(", Token::LParen } -syntax! { rparen_punctuation , ")", Token::RParen } -syntax! { lbrace_punctuation , "{", Token::LBrace } -syntax! { rbrace_punctuation , "}", Token::RBrace } -syntax! { semicolon_punctuation , ";", Token::Semicolon } -syntax! { colon_punctuation , ":", Token::Colon } -syntax! { comma_punctuation , ",", Token::Comma } - -// Operator & Punctuation -fn lex_operator_punctuation(input: &Bytes) -> IResult<&Bytes, Token> { - alt(( - typehint_punctuation, returnhint_punctuation, - lparen_punctuation, rparen_punctuation, - lbrace_punctuation, rbrace_punctuation, - semicolon_punctuation, colon_punctuation, comma_punctuation, - - equal_operator, not_equal_operator, - less_than_operator, greater_than_operator, - less_than_equal_operator, greater_than_equal_operator, - - assign_operator, - add_operator, subtract_operator, multiply_operator, divide_operator, - not_operator, - - ))(input) -} - -// String -fn string_value(input: &Bytes) -> IResult<&Bytes, Vec> { - let (i1, c1) = take(1usize)(input)?; - match c1.as_bytes() { - b"\"" => Ok((input, vec![])), - b"\\" => { - let (i2, c2) = take(1usize)(i1)?; - string_value(i2).map(|(slice, done)| (slice, concat_slice_vec(c2, done))) - } - c => string_value(i1).map(|(slice, done)| (slice, concat_slice_vec(c, done))) - } -} - -fn string(input: &Bytes) -> IResult<&Bytes, String> { - delimited(tag("\""), map_res(string_value, convert_vec_utf8), tag("\""))(input) -} - -fn lex_string(input: &Bytes) -> IResult<&Bytes, Token> { - map(string, |s| Token::String(s))(input) -} - -// Reserved keywords & Identifiers -fn lex_reserved_identifier(input: &Bytes) -> IResult<&Bytes, Token> { - map_res( - recognize(pair( - alt((alpha1, tag("_")) - ), - many0(alt((alphanumeric1, tag("_")))), - )), - |s| { - let c = str_from_bytes(s); - c.map(|syntax| match syntax { - "import" => Token::Import, - "if" => Token::If, - "else" => Token::Else, - "let" => Token::Let, - "func" => Token::Func, - "return" => Token::Return, - "true" => Token::Bool(true), - "false" => Token::Bool(false), - _ => Token::Identifier(syntax.to_string()), - }) - }, - )(input) -} - -// Integers -fn lex_integer(input: &Bytes) -> IResult<&Bytes, Token> { - map( - map_res( - map_res(digit1, str_from_bytes), - str_to_from_str, - ), - Token::Int, - )(input) -} - -// Illegal tokens -fn lex_illegal(input: &Bytes) -> IResult<&Bytes, Token> { - map(take(1usize), |_| Token::Illegal)(input) -} - -fn lex_comment(input: &Bytes) -> IResult<&Bytes, ()> { - let (i1, c1) = take(2usize)(input)?; - if c1.as_bytes() == b"//" { - let (i2, _) = take_until("\n")(i1)?; - let (i3, _) = take(1usize)(i2)?; - let (i4, _) = multispace0(i3)?; - let (i5, _) = lex_comment(i4)?; - Ok((i5, ())) - } else { Ok((input, ())) } -} - -// Tokens -fn lex_token(input: &Bytes) -> IResult<&Bytes, Token> { - let (i1, _) = lex_comment(input)?; - alt(( - lex_operator_punctuation, - lex_reserved_identifier, - lex_string, - lex_integer, - lex_illegal, - ))(i1) -} - -fn lex_tokens(input: &Bytes) -> IResult<&Bytes, Vec> { - many0(delimited(multispace0, lex_token, multispace0))(input) -} - -pub struct Lexer; -impl Lexer { - pub fn lex_tokens(input: &Bytes) -> IResult<&Bytes, Vec> { - lex_tokens(input).map(|(slice, result)| (slice, [&result[..], &vec![Token::EndOfFile][..]].concat())) - } -} \ No newline at end of file diff --git a/src/front/mod.rs b/src/front/mod.rs index 95506e6..329584d 100644 --- a/src/front/mod.rs +++ b/src/front/mod.rs @@ -1,5 +1 @@ -pub mod model; -pub mod helper; - -pub mod lex; -pub mod parser; \ No newline at end of file +pub mod parse; \ No newline at end of file diff --git a/src/front/model.rs b/src/front/model.rs deleted file mode 100644 index 84b3b6c..0000000 --- a/src/front/model.rs +++ /dev/null @@ -1,162 +0,0 @@ -use std::iter::Enumerate; - -use nom::{InputTake, Needed, InputIter, InputLength}; - -#[derive(Clone, Debug, PartialEq)] -pub enum Token { - Illegal, EndOfFile, - - Identifier(String), String(String), - Int(i64), Bool(bool), - - Assign, Typehint, Returnhint, - - Plus, Minus, Mul, Div, Not, - Eq, NEq, Lt, Gt, Lte, Gte, - - LParen, RParen, - LBrace, RBrace, - Semicolon, Colon, Comma, - - If, Else, Let, Func, Return, - Import, -} - -/// Token struct with position information. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Tokens<'a> { - pub tokens: &'a [Token], - pub start: usize, pub end: usize, -} - -impl<'a> Tokens<'a> { - pub fn new(tokens: &'a [Token]) -> Self { - Tokens { tokens, start: 0, end: tokens.len(), } - } -} - -impl<'a> InputTake for Tokens<'a> { - #[inline] - fn take(&self, count: usize) -> Self { - Tokens { - tokens: &self.tokens[0..count], - start: 0, - end: count, - } - } - - #[inline] - fn take_split(&self, count: usize) -> (Self, Self) { - let (prefix, suffix) = self.tokens.split_at(count); - let first = Tokens { - tokens: prefix, - start: 0, - end: prefix.len(), - }; - let second = Tokens { - tokens: suffix, - start: 0, - end: suffix.len(), - }; - (second, first) - } -} - -impl<'a> InputLength for Tokens<'a> { - #[inline] - fn input_len(&self) -> usize { - self.tokens.len() - } -} - -impl<'a> InputIter for Tokens<'a> { - type Item = &'a Token; - type Iter = Enumerate<::std::slice::Iter<'a, Token>>; - type IterElem = ::std::slice::Iter<'a, Token>; - - #[inline] - fn iter_indices(&self) -> Enumerate<::std::slice::Iter<'a, Token>> { - self.tokens.iter().enumerate() - } - - #[inline] - fn iter_elements(&self) -> ::std::slice::Iter<'a, Token> { - self.tokens.iter() - } - - #[inline] - fn position

(&self, predicate: P) -> Option - where P: Fn(Self::Item) -> bool { - self.tokens.iter().position(predicate) - } - - #[inline] - fn slice_index(&self, count: usize) -> Result { - if self.tokens.len() >= count { Ok(count) } - else { Err(Needed::Unknown) } - } -} - -pub type Program = Vec; - -#[derive(Clone, Debug, PartialEq)] -pub enum Stmt { - Import(Literal), - - Let(Ident, Ident, Expr), - Func(Ident, Vec, Ident, Vec), - Call(Ident, Vec), - Return(Expr), -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Expr { - Ident(Ident), Literal(Literal), - Array(Vec), - Prefix(Prefix, Box), - Infix(Infix, Box, Box), - If { - cond: Box, - then: Program, - else_: Option, - }, - Func { - name: Ident, - args: Vec, - body: Program, - }, - Call { - func: Box, - args: Vec, - }, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Literal { - Int(i64), Bool(bool), String(String), -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Ident(pub String); - -#[derive(Clone, Debug, PartialEq)] -pub enum Prefix { - Plus, Minus, - Not, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum Infix { - Plus, Minus, Mul, Div, - Eq, NEq, Lt, Gt, Lte, Gte, -} - -#[derive(Clone, Debug, PartialEq, PartialOrd)] -pub enum Precedence { - Lowest, - Equals, - LessGreater, - Sum, - Product, - Call, -} \ No newline at end of file diff --git a/src/front/parse.rs b/src/front/parse.rs new file mode 100644 index 0000000..0c74b5d --- /dev/null +++ b/src/front/parse.rs @@ -0,0 +1,117 @@ +use chumsky::prelude::*; + +#[derive(Clone, Debug)] +pub enum Expr { + Int(i64), + Float(f64), + Ident(String), + Unary { op: String, expr: Box }, + Binary { op: String, left: Box, right: Box }, + + Let { + name: String, + value: Box, + }, + Fun { + name: String, + args: Vec, + body: Box, + }, + Call { + name: String, + args: Vec, + }, +} + +fn expr_parser() -> impl Parser> { + let ident = text::ident().padded(); + + let expr = recursive(|expr| { + let int = text::int(10) + .map(|s: String| Expr::Int(s.parse().unwrap())); + + let float = text::int(10) + .then_ignore(just('.')) + .chain::(text::digits(10)) + .collect::() + .map(|s: String| Expr::Float(s.parse().unwrap())); + + let call = ident + .then(expr.clone() + .separated_by(just(',')) + .allow_trailing() + .delimited_by(just('('), just(')'))) + .map(|(name, args)| Expr::Call { name, args }); + + let atom = int + .or(float) + .or(call) + .or(ident.map(Expr::Ident)) + .or(expr.delimited_by(just('('), just(')'))) + .labelled("atom"); + + let unary = choice((just('-'), just('!'))) + .repeated() + .then(atom) + .foldr(|op, rhs| Expr::Unary { op: op.to_string(), expr: Box::new(rhs) }).labelled("unary"); + + let factor = unary.clone() + .then(choice((just('*'), just('/'))) + .then(unary) + .repeated()) + .foldl(|lhs, (op, rhs)| Expr::Binary { + op: op.to_string(), + left: Box::new(lhs), + right: Box::new(rhs) + }).labelled("factor"); + + let term = factor.clone() + .then(choice((just('+'), just('-'))) + .then(factor) + .repeated()) + .foldl(|lhs, (op, rhs)| Expr::Binary { + op: op.to_string(), + left: Box::new(lhs), + right: Box::new(rhs) + }).labelled("term"); + + term.padded() + }).labelled("expression"); + + let declare = recursive(|decl| { + let declare_var = text::keyword("let") + .ignore_then(ident) + .then_ignore(just('=')) + .then(expr.clone()) + .then_ignore(just(';')) + .map(|(name, rhs)| Expr::Let { + name, + value: Box::new(rhs), + }); + + let declare_fun = text::keyword("fun") + .ignore_then(ident) + .then(ident.repeated()) + .then_ignore(just('=')) + .then(expr.clone()) + .then_ignore(just(';')) + .map(|((name, args), body)| Expr::Fun { + name, + args, + body: Box::new(body), + }); + + declare_var + .or(declare_fun) + .or(expr) + .padded() + }); + + declare +} + +pub fn parser() -> impl Parser, Error = Simple> { + expr_parser() + .repeated() + .then_ignore(end()) +} \ No newline at end of file diff --git a/src/front/parser.rs b/src/front/parser.rs deleted file mode 100644 index e8de9f4..0000000 --- a/src/front/parser.rs +++ /dev/null @@ -1,307 +0,0 @@ -use nom::{ - branch::alt, - bytes::complete::take, - combinator::{verify, map, opt}, - Err, - error::{Error, ErrorKind}, - IResult, - multi::many0, - sequence::{terminated, tuple, pair, preceded, delimited}, error_position, -}; - -use super::model::{Token, Tokens, Precedence, Infix, Program, Stmt, Expr, Ident, Literal, Prefix}; - -macro_rules! tag_token ( - ($func_name:ident, $tag: expr) => ( - fn $func_name(tokens: Tokens) -> IResult { - verify(take(1usize), |t: &Tokens| t.tokens[0] == $tag)(tokens) - } - ) -); - -tag_token!(tag_import, Token::Import); - -tag_token!(tag_let, Token::Let); -tag_token!(tag_func, Token::Func); -tag_token!(tag_return, Token::Return); -tag_token!(tag_if, Token::If); -tag_token!(tag_else, Token::Else); - -tag_token!(tag_plus, Token::Plus); -tag_token!(tag_minus, Token::Minus); -tag_token!(tag_not, Token::Not); - -tag_token!(tag_assign, Token::Assign); -tag_token!(tag_typehint, Token::Typehint); -tag_token!(tag_returnhint, Token::Return); -tag_token!(tag_semicolon, Token::Semicolon); -tag_token!(tag_lparen, Token::LParen); -tag_token!(tag_rparen, Token::RParen); -tag_token!(tag_lbrace, Token::LBrace); -tag_token!(tag_rbrace, Token::RBrace); -tag_token!(tag_comma, Token::Comma); -tag_token!(tag_end_of_file, Token::EndOfFile); - -fn infix_operator(token: &Token) -> (Precedence, Option) { - match *token { - Token::Eq => (Precedence::Equals, Some(Infix::Eq)), - Token::NEq => (Precedence::Equals, Some(Infix::NEq)), - Token::Lt => (Precedence::LessGreater, Some(Infix::Lt)), - Token::Gt => (Precedence::LessGreater, Some(Infix::Gt)), - Token::Lte => (Precedence::LessGreater, Some(Infix::Lte)), - Token::Gte => (Precedence::LessGreater, Some(Infix::Gte)), - Token::Plus => (Precedence::Sum, Some(Infix::Plus)), - Token::Minus => (Precedence::Sum, Some(Infix::Minus)), - Token::Mul => (Precedence::Product, Some(Infix::Mul)), - Token::Div => (Precedence::Product, Some(Infix::Div)), - Token::LParen => (Precedence::Call, None), - _ => (Precedence::Lowest, None), - } -} - -fn parse_literal(input: Tokens) -> IResult { - let (i1, t1) = take(1usize)(input)?; - if t1.tokens.is_empty() { Err(Err::Error(Error::new(input, ErrorKind::Tag))) } - else { - match t1.tokens[0].clone() { - Token::Int(i) => Ok((i1, Literal::Int(i))), - Token::String(s) => Ok((i1, Literal::String(s))), - Token::Bool(b) => Ok((i1, Literal::Bool(b))), - _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), - } - } -} - -fn parse_literal_expr(input: Tokens) -> IResult { - map(parse_literal, Expr::Literal)(input) -} - -fn parse_atom_expr(input: Tokens) -> IResult { - alt(( - parse_literal_expr, - parse_ident_expr, - parse_prefix_expr, - parse_paren_expr, - parse_if_expr, - ))(input) -} - -fn parse_paren_expr(input: Tokens) -> IResult { - delimited(tag_lparen, parse_expr_lowest, tag_rparen)(input) -} - -fn parse_ident(input: Tokens) -> IResult { - let (i1, t1) = take(1usize)(input)?; - if t1.tokens.is_empty() { Err(Err::Error(Error::new(input, ErrorKind::Tag))) } - else { - match t1.tokens[0].clone() { - Token::Identifier(name) => Ok((i1, Ident(name))), - _ => Err(Err::Error(Error::new(input, ErrorKind::Tag))), - } - } -} - -fn parse_ident_expr(input: Tokens) -> IResult { - map(parse_ident, Expr::Ident)(input) -} - -fn parse_params(input: Tokens) -> IResult> { - map( - pair(parse_ident, many0(preceded(tag_comma, parse_ident))), - |(p, ps)| [&vec![p][..], &ps[..]].concat(), - )(input) -} - -fn empty_params(input: Tokens) -> IResult> { Ok((input, vec![])) } - -fn parse_call_expr(input: Tokens, func_handle: Expr) -> IResult { - map( - delimited( - tag_lparen, - parse_exprs, - tag_rparen, - ), - |e| Expr::Call { func: Box::new(func_handle.clone()), args: e }, - )(input) -} - -fn parse_infix_expr(input: Tokens, left: Expr) -> IResult { - let (i1, t1) = take(1usize)(input)?; - if t1.tokens.is_empty() { Err(Err::Error(error_position!(input, ErrorKind::Tag))) } - else { - let next = &t1.tokens[0]; - let (prec, op) = infix_operator(next); - match op { - None => Err(Err::Error(error_position!(input, ErrorKind::Tag))), - Some(op) => { - let (i2, right) = parse_expr_with(i1, prec)?; - Ok((i2, Expr::Infix(op, Box::new(left), Box::new(right)))) - } - } - } -} - -fn parse_prefix_expr(input: Tokens) -> IResult { - let (i1, t1) = alt((tag_plus, tag_minus, tag_not))(input)?; - if t1.tokens.is_empty() { Err(Err::Error(error_position!(input, ErrorKind::Tag))) } - else { - let (i2, e) = parse_atom_expr(i1)?; - match t1.tokens[0].clone() { - Token::Plus => Ok((i2, Expr::Prefix(Prefix::Plus, Box::new(e)))), - Token::Minus => Ok((i2, Expr::Prefix(Prefix::Minus, Box::new(e)))), - Token::Not => Ok((i2, Expr::Prefix(Prefix::Not, Box::new(e)))), - _ => Err(Err::Error(error_position!(input, ErrorKind::Tag))), - } - } -} - -fn parse_expr(input: Tokens, precedence: Precedence, left: Expr) -> IResult { - let (i1, t1) = take(1usize)(input)?; - - if t1.tokens.is_empty() { Ok((i1, left)) } - else { - let p = infix_operator(&t1.tokens[0]); - match p { - (Precedence::Call, _) if precedence < Precedence::Call => { - let (i2, left2) = parse_call_expr(input, left)?; - parse_expr(i2, precedence, left2) - }, - (ref peek, _) if precedence < *peek => { - let (i2, left2) = parse_infix_expr(input, left)?; - parse_expr(i2, precedence, left2) - }, - _ => Ok((input, left)), - } - } -} - -fn parse_if_expr(input: Tokens) -> IResult { - map( - tuple(( - tag_if, - parse_expr_lowest, - parse_block_stmt, - parse_else_expr, - )), - |(_, cond, then, else_)| Expr::If { cond: Box::new(cond), then, else_ }, - )(input) -} - -fn parse_else_expr(input: Tokens) -> IResult> { - opt(preceded(tag_else, parse_block_stmt))(input) -} - -fn parse_comma_exprs(input: Tokens) -> IResult { - preceded(tag_comma, parse_expr_lowest)(input) -} - -fn parse_exprs(input: Tokens) -> IResult> { - map( - pair(parse_expr_lowest, many0(parse_comma_exprs)), - |(first, second)| [&vec![first][..], &second[..]].concat(), - )(input) -} - -fn parse_expr_with(input: Tokens, precedence: Precedence) -> IResult { - let (i1, left) = parse_atom_expr(input)?; - parse_expr(i1, precedence, left) -} - -fn parse_expr_lowest(input: Tokens) -> IResult { - parse_expr_with(input, Precedence::Lowest) -} - -fn parse_return_stmt(input: Tokens) -> IResult { - map( - delimited( - tag_return, - parse_expr_lowest, - opt(tag_semicolon), - ), - Stmt::Return, - )(input) -} - -fn parse_call_stmt(input: Tokens) -> IResult { - map( - tuple(( - parse_ident, - tag_lparen, - parse_exprs, - tag_rparen, - opt(tag_semicolon), - )), - |(ident, _, args, _, _)| Stmt::Call(ident, args), - )(input) -} - -fn parse_block_stmt(input: Tokens) -> IResult { - delimited(tag_lbrace, many0(parse_stmt), tag_rbrace)(input) -} - -fn parse_func_stmt(input: Tokens) -> IResult { - map( - tuple(( - tag_func, - parse_ident, - tag_typehint, - tag_lparen, - alt((parse_params, empty_params)), - tag_rparen, - tag_returnhint, - parse_ident, - tag_assign, - parse_block_stmt, - opt(tag_semicolon), - )), - |(_, ident, _, _, params, _, _, returntype, _, block, _)| Stmt::Func(ident, params, returntype, block), - )(input) -} - -fn parse_let_stmt(input: Tokens) -> IResult { - map( - tuple(( - tag_let, - parse_ident, - tag_typehint, - parse_ident, - tag_assign, - parse_expr_lowest, - opt(tag_semicolon), - )), - |(_, ident, _, typehint, _, expr, _)| Stmt::Let(ident, typehint, expr), - )(input) -} - -fn parse_import(input: Tokens) -> IResult { - map( - tuple(( - tag_import, - parse_literal, - opt(tag_semicolon), - )), - |(_, path, _)| Stmt::Import(path), - )(input) -} - -fn parse_stmt(input: Tokens) -> IResult { - alt(( - parse_import, - parse_let_stmt, - parse_func_stmt, - parse_call_stmt, - parse_return_stmt, - ))(input) -} - -fn parse_program(input: Tokens) -> IResult { - terminated(many0(parse_stmt), tag_end_of_file)(input) -} - -pub struct Parser; - -impl Parser { - pub fn parse(tokens: Tokens) -> IResult { - parse_program(tokens) - } -} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 963701a..40622a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use std::fs; +use chumsky::Parser; use clap::Parser as ArgParser; /// Arguments handler. @@ -9,24 +10,15 @@ use args::{Args, Options}; /// Front-end of the language. /// Contains lexer, parser and token types. pub mod front; -use front::{lex::Lexer, parser::Parser, model::Tokens}; +use front::parse::parser; fn main() { let args = Args::parse(); match args.options { Options::Compile { input: src, ast: _print_ast } => { - let bytes: Vec = fs::read(src).unwrap(); - let (_errs_, tokens) = Lexer::lex_tokens(&bytes).unwrap(); - let tokens = Tokens::new(&tokens); - let ast = Parser::parse(tokens); - match ast { - Ok(ast) => { - println!("{:#?}", ast); - } - Err(err) => { - println!("{:#?}", err); - } - } + let src = fs::read_to_string(src).expect("Failed to read file"); + let tokens = parser().parse_recovery(src.as_str()); + println!("{:?}", tokens); }, } } \ No newline at end of file