From 87b3c7717b0dca0e0dde15a1351a4c5063bca66d Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Sun, 6 Mar 2022 22:04:48 +0700 Subject: [PATCH] massive amount of refactor - parser and lexer now have span - split into mini-crates --- Cargo.lock | 38 +++- Cargo.toml | 16 +- crates/lexer/Cargo.toml | 9 + crates/lexer/src/lib.rs | 132 +++++++++++++ crates/main/Cargo.toml | 11 ++ {src => crates/main/src}/args.rs | 0 crates/main/src/main.rs | 44 +++++ {src => crates/main/src}/util.rs | 0 crates/parser/Cargo.toml | 11 ++ crates/parser/src/lib.rs | 231 ++++++++++++++++++++++ example/ex.hyc | 18 +- src/back/js.rs | 91 --------- src/back/mod.rs | 2 - src/front/mod.rs | 1 - src/front/parse.rs | 319 ------------------------------- src/main.rs | 82 -------- src/middle/ir.rs | 90 --------- src/middle/mod.rs | 2 - 18 files changed, 486 insertions(+), 611 deletions(-) create mode 100644 crates/lexer/Cargo.toml create mode 100644 crates/lexer/src/lib.rs create mode 100644 crates/main/Cargo.toml rename {src => crates/main/src}/args.rs (100%) create mode 100644 crates/main/src/main.rs rename {src => crates/main/src}/util.rs (100%) create mode 100644 crates/parser/Cargo.toml create mode 100644 crates/parser/src/lib.rs delete mode 100644 src/back/js.rs delete mode 100644 src/back/mod.rs delete mode 100644 src/front/mod.rs delete mode 100644 src/front/parse.rs delete mode 100644 src/main.rs delete mode 100644 src/middle/ir.rs delete mode 100644 src/middle/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 8bbedb4..f7bcf3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "const-random", ] +[[package]] +name = "ariadne" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1cb2a2046bea8ce5e875551f5772024882de0b540c7f93dfc5d6cf1ca8b030c" +dependencies = [ + "yansi", +] + [[package]] name = "atty" version = "0.2.14" @@ -109,9 +118,9 @@ checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "getrandom" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" +checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" dependencies = [ "cfg-if", "libc", @@ -143,8 +152,9 @@ dependencies = [ name = "hycron" version = "0.1.0" dependencies = [ - "chumsky", "clap", + "lexer", + "parser", ] [[package]] @@ -163,6 +173,13 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexer" +version = "0.1.0" +dependencies = [ + "chumsky", +] + [[package]] name = "libc" version = "0.2.117" @@ -184,6 +201,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "parser" +version = "0.1.0" +dependencies = [ + "ariadne", + "chumsky", + "lexer", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -321,3 +347,9 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "yansi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71" diff --git a/Cargo.toml b/Cargo.toml index 0eff5bb..1575ccd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,6 @@ -[package] -name = "hycron" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -clap = { version = "3.0.14", features = ["derive"] } -chumsky = "0.8.0" \ No newline at end of file +[workspace] +members = [ + "crates/main", + "crates/lexer", + "crates/parser", +] \ No newline at end of file diff --git a/crates/lexer/Cargo.toml b/crates/lexer/Cargo.toml new file mode 100644 index 0000000..cdac346 --- /dev/null +++ b/crates/lexer/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "lexer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +chumsky = "0.8.0" \ No newline at end of file diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs new file mode 100644 index 0000000..b4684cd --- /dev/null +++ b/crates/lexer/src/lib.rs @@ -0,0 +1,132 @@ +use chumsky::prelude::*; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Token { + // Keywords + KwLet, KwFun, + KwDo, KwEnd, + KwIf, KwThen, KwElse, + + // Literals + Int(i64), Float(String), Boolean(bool), + String(String), Identifier(String), + + // Operators + Plus, Minus, Multiply, Divide, + Not, Equal, NotEqual, Less, Greater, + + // Symbols & Delimiters + Assign, + Dot, Comma, + Colon, SemiColon, + OpenParen, CloseParen, +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Token::KwLet => write!(f, "let"), + Token::KwFun => write!(f, "fun"), + Token::KwDo => write!(f, "do"), + Token::KwEnd => write!(f, "end"), + Token::KwIf => write!(f, "if"), + Token::KwThen => write!(f, "then"), + Token::KwElse => write!(f, "else"), + + Token::Int(i) => write!(f, "{}", i), + Token::Float(s) => write!(f, "{}", s), + Token::Boolean(b) => write!(f, "{}", b), + Token::String(s) => write!(f, "{}", s), + Token::Identifier(s) => write!(f, "{}", s), + + Token::Plus => write!(f, "+"), + Token::Minus => write!(f, "-"), + Token::Multiply => write!(f, "*"), + Token::Divide => write!(f, "/"), + Token::Not => write!(f, "!"), + Token::Equal => write!(f, "=="), + Token::NotEqual => write!(f, "!="), + Token::Less => write!(f, "<"), + Token::Greater => write!(f, ">"), + + Token::Assign => write!(f, "="), + Token::Dot => write!(f, "."), + Token::Comma => write!(f, ","), + Token::Colon => write!(f, ":"), + Token::SemiColon => write!(f, ";"), + Token::OpenParen => write!(f, "("), + Token::CloseParen => write!(f, ")"), + } + } +} + +pub type Span = std::ops::Range; +pub fn lexer() -> impl Parser, Error = Simple> { + let int = text::int(10) + .map(|s: String| Token::Int(s.parse().unwrap())); + let float = text::int(10) + .chain(just('.')) + .chain::(text::digits(10)) + .collect::() + .map(Token::Float); + + let string = just('"') + .ignore_then(filter(|c| *c != '"').repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::String); + + let symbol = choice(( + just('+').to(Token::Plus), + just('-').to(Token::Minus), + just('*').to(Token::Multiply), + just('/').to(Token::Divide), + just('!').to(Token::Not), + just("==").to(Token::Equal), + just('<').to(Token::Less), + just('>').to(Token::Greater), + just('=').to(Token::Assign), + just('.').to(Token::Dot), + just(',').to(Token::Comma), + just(':').to(Token::Colon), + just(';').to(Token::SemiColon), + just('(').to(Token::OpenParen), + just(')').to(Token::CloseParen), + )); + + let keyword = text::ident().map(|s: String| match s.as_str() { + "true" => Token::Boolean(true), + "false" => Token::Boolean(false), + + "let" => Token::KwLet, + "fun" => Token::KwFun, + "do" => Token::KwDo, + "end" => Token::KwEnd, + "if" => Token::KwIf, + "then" => Token::KwThen, + "else" => Token::KwElse, + _ => Token::Identifier(s), + }); + + let token = int + .or(float) + .or(string) + .or(symbol) + .or(keyword) + .recover_with(skip_then_retry_until([])); + + let comment = just("--") + .ignore_then(filter(|c| *c != '\n').repeated()) + .then_ignore(just('\n')); + + token + .padded_by(comment.repeated()) + .map_with_span(|token, span| (token, span)) + .padded() + .repeated() +} + +pub fn lex(src: String) -> (Option)>>, Vec>) { + let (tokens, lex_error) = lexer().parse_recovery(src.as_str()); + return (tokens, lex_error); +} \ No newline at end of file diff --git a/crates/main/Cargo.toml b/crates/main/Cargo.toml new file mode 100644 index 0000000..e64d976 --- /dev/null +++ b/crates/main/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "hycron" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "3.0.14", features = ["derive"] } +lexer = { path = "../lexer" } +parser = { path = "../parser" } \ No newline at end of file diff --git a/src/args.rs b/crates/main/src/args.rs similarity index 100% rename from src/args.rs rename to crates/main/src/args.rs diff --git a/crates/main/src/main.rs b/crates/main/src/main.rs new file mode 100644 index 0000000..b748c2a --- /dev/null +++ b/crates/main/src/main.rs @@ -0,0 +1,44 @@ +use std::fs; + +use clap::Parser as ArgParser; +use lexer::lex; +use parser::parse; + +pub mod args; +use args::{Args, Options}; + +pub mod util; +use crate::util::log; + +fn main() { + let args = Args::parse(); + match args.options { + Options::Compile { + input: file_name, + ast: _print_ast, + } => { + // Get file contents. + let src = fs::read_to_string(&file_name).expect("Failed to read file"); + + // Lex the file. + let (tokens, lex_error) = lex(src.clone()); + + if lex_error.is_empty() { + log(0, "Lexing successful."); + + let (ast, parse_error) = parse(tokens.unwrap(), src.chars().count()); + + if parse_error.is_empty() { + println!("{:#?}", ast); + log(0, "Parsing successful."); + } else { + println!("{:#?}", parse_error); + log(2, "Parsing failed."); + } + } else { + println!("{:#?}", lex_error); + log(2, "Lexing failed."); + } + } + } +} diff --git a/src/util.rs b/crates/main/src/util.rs similarity index 100% rename from src/util.rs rename to crates/main/src/util.rs diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml new file mode 100644 index 0000000..5d7b331 --- /dev/null +++ b/crates/parser/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lexer = { path = "../lexer" } +chumsky = "0.8.0" +ariadne = "0.1.5" \ No newline at end of file diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs new file mode 100644 index 0000000..287e85c --- /dev/null +++ b/crates/parser/src/lib.rs @@ -0,0 +1,231 @@ +use chumsky::{prelude::*, Stream}; +use lexer::Token; + +pub type Spanned = (T, std::ops::Range); + +#[derive(Clone, Debug)] +pub enum Expr { + Int(i64), Float(f64), Boolean(bool), + String(String), Identifier(String), + + Unary { op: String, rhs: Box> }, + Binary { lhs: Box>, op: String, rhs: Box> }, + Call { name: Box>, args: Spanned>> }, + + Let { + name: String, + type_hint: String, + value: Box>, + }, + Fun { + name: String, + type_hint: String, + args: Spanned, Spanned)>>, + body: Box> + }, + + If { + cond: Box>, + then: Box>, + else_: Box> + }, + Do { + body: Vec> + }, +} + +fn expr_parser() -> impl Parser>, Error = Simple> + Clone { + let identifier = filter_map(|span, token| match token { + Token::Identifier(s) => Ok((s, span)), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), + }).labelled("identifier"); + + let literal = filter_map(|span, token| match token { + Token::Int(i) => Ok((Expr::Int(i), span)), + Token::Float(f) => Ok((Expr::Float(f.parse().unwrap()), span)), + Token::Boolean(b) => Ok((Expr::Boolean(b), span)), + Token::String(s) => Ok((Expr::String(s), span)), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), + }).labelled("literal"); + + let expr = recursive(|expr| { + let args = expr.clone() + .separated_by(just(Token::Comma)) + .allow_trailing(); + + let atom = literal + .or(identifier.map(|(s, span)| (Expr::Identifier(s), span))) + // .or( + // expr.clone() + // .delimited_by(just(Token::OpenParen), just(Token::CloseParen))) + .labelled("atom"); + + let call = atom + .then( + args.clone() + .delimited_by( + just(Token::OpenParen), + just(Token::CloseParen), + ) + .repeated() + ) + .foldl(|name, args| {( + Expr::Call { + name: Box::new(name.clone()), + args: (args, name.1.clone()), + }, + name.1, + )}); + + let unary = choice(( + just(Token::Plus), + just(Token::Minus))) + .repeated() + .then(call) + .foldr(|op, rhs| { + ( + Expr::Unary { + op: op.to_string(), + rhs: Box::new(rhs.clone()), + }, + rhs.1, + ) + }); + + let factor = unary.clone() + .then( + choice(( + just(Token::Multiply), + just(Token::Divide))) + .then(unary) + .repeated()) + .foldl(|lhs, (op, rhs)| { + ( + Expr::Binary { + lhs: Box::new(lhs), + op: op.to_string(), + rhs: Box::new(rhs.clone()), + }, + rhs.1, + ) + }); + + let term = factor.clone() + .then( + choice(( + just(Token::Plus), + just(Token::Minus))) + .then(factor) + .repeated()) + .foldl(|lhs, (op, rhs)| { + ( + Expr::Binary { + lhs: Box::new(lhs), + op: op.to_string(), + rhs: Box::new(rhs.clone()), + }, + rhs.1, + ) + }); + + let compare = term.clone() + .then( + choice(( + just(Token::Less), + just(Token::Greater), + just(Token::Equal), + just(Token::NotEqual))) + .then(term) + .repeated()) + .foldl(|lhs, (op, rhs)| { + ( + Expr::Binary { + lhs: Box::new(lhs), + op: op.to_string(), + rhs: Box::new(rhs.clone()), + }, + rhs.1, + ) + }); + + let let_ = just(Token::KwLet) + .ignore_then(identifier) + .then_ignore(just(Token::Colon)) + .then(identifier) + .then_ignore(just(Token::Assign)) + .then(expr.clone()) + .map(|((name, type_hint), value)| { + ( + Expr::Let { + name: name.0.clone(), + type_hint: type_hint.0, + value: Box::new(value.clone()), + }, + name.1.start..value.1.end, + ) + }); + + let fun = just(Token::KwFun) + .ignore_then(identifier) + .then( + identifier + .then_ignore(just(Token::Colon)) + .then(identifier) + .delimited_by( + just(Token::OpenParen), + just(Token::CloseParen), + ) + .repeated() + ) + .then_ignore(just(Token::Colon)) + .then(identifier) + .then_ignore(just(Token::Assign)) + .then(expr.clone()) + .map(|(((name, args), type_hint), body)| { + ( + Expr::Fun { + name: name.0.clone(), + type_hint: type_hint.0, + args: (args, name.1.clone()), + body: Box::new(body.clone()), + }, + name.1.start..body.1.end, + ) + }); + + let do_block = just(Token::KwDo) + .ignore_then( + expr.clone() + .then_ignore(just(Token::SemiColon)) + .repeated() + ) + .then_ignore(just(Token::KwEnd)) + .map_with_span(|body, span| { + ( + Expr::Do { + body: body.clone(), + }, + span, + ) + }); + + let_ + .or(fun) + .or(do_block) + .or(compare) + }).labelled("expression"); + + expr + .then_ignore(just(Token::SemiColon)) + .repeated() + .then_ignore(end()) +} + +pub fn parse(tokens: Vec<(Token, std::ops::Range)>, len: usize) -> (Option)>>, Vec>) { + let (ast, parse_error) = expr_parser().parse_recovery(Stream::from_iter( + len..len + 1, + tokens.into_iter(), + )); + + return (ast, parse_error) +} \ No newline at end of file diff --git a/example/ex.hyc b/example/ex.hyc index 92078c1..a3a1533 100644 --- a/example/ex.hyc +++ b/example/ex.hyc @@ -1,12 +1,8 @@ -let foo: string = 1; +let fard: Int = 1; +let balls: String = "balls"; -fun bar (baz: int) -> int = baz + 1; -fun qux (quux: int) -> int = do - let corge: int = quux + quux; - bar(corge); -end; - -fun add (lhs: int, rhs: int) -> int = lhs + rhs; -print(add(34, 35)); - -print(qux(5)); \ No newline at end of file +fun add (lhs: Int) (rhs: Int): Int = lhs + rhs; +fun add_2 (lhs: Int) (rhs: Int): Int = do + let a: Int = lhs + rhs; + let b: Int = a + lhs; +end; \ No newline at end of file diff --git a/src/back/js.rs b/src/back/js.rs deleted file mode 100644 index 7e6e9cb..0000000 --- a/src/back/js.rs +++ /dev/null @@ -1,91 +0,0 @@ -use crate::middle::ir::{IR, Value}; - -pub fn gen(irs: Vec) -> String { - let mut output = String::new(); - for ir in irs { - output.push_str(&gen_ir(&ir)); - } - output -} - -fn gen_ir(ir: &IR) -> String { - match ir { - IR::Define { name, type_hint: _, value } => { // type_hint is only used in type_checking i think - let value = gen_ir(value); - format!("const {} = {};", name, value) - }, - IR::Fun { name, return_type_hint: _, args, body } => { - let args = args - .iter() - .map(|(name, _)| format!("{}", name)) - .collect::>() - .join(", "); - - let body = match &**body { - IR::Value { value } => gen_value(value), - IR::Do { body } => { - let mut out = String::new(); - for (i, node) in body.iter().enumerate() { - if i == body.len() - 1 { - out.push_str(format!("return {};", gen_ir(node)).as_str()); - } else { - out.push_str(&gen_ir(node)); - } - } - out - }, - IR::Binary { op, left, right } => { - format!( - "return {} {} {};", - gen_ir(left), - op, - gen_ir(right) - ) - }, - _ => { println!("{:?}", body); todo!() } - }; - - format!( - "const {} = ({}) => {{ {} }};", - name, - args, - body - ) - }, - IR::Call { name, args } => { - match name.as_str() { - "print" => { - let args = gen_ir(&args[0]); - format!("console.log({});", args.trim_end_matches(";")) - }, - _ => { - let args = args - .iter() - .map(|arg| gen_ir(arg)) - .collect::>() - .join(", "); - format!("{}({})", name, args) - }, - } - }, - IR::Value { value } => { - gen_value(value) - }, - IR::Binary { op, left, right } => { - let left = gen_ir(left); - let right = gen_ir(right); - format!("({} {} {});", left, op, right) - }, - _ => { println!("{:?}", ir); todo!() } - } -} - -fn gen_value(value: &Value) -> String { - match value { - Value::Int(i) => format!("{}", i), - Value::Float(f) => format!("{}", f), - Value::Bool(b) => format!("{}", b), - Value::String(s) => format!("\"{}\"", s), - Value::Ident(s) => format!("{}", s), - } -} \ No newline at end of file diff --git a/src/back/mod.rs b/src/back/mod.rs deleted file mode 100644 index cf65c45..0000000 --- a/src/back/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -/// Javascript compiler backend -pub mod js; \ No newline at end of file diff --git a/src/front/mod.rs b/src/front/mod.rs deleted file mode 100644 index 329584d..0000000 --- a/src/front/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod parse; \ No newline at end of file diff --git a/src/front/parse.rs b/src/front/parse.rs deleted file mode 100644 index c3a2f15..0000000 --- a/src/front/parse.rs +++ /dev/null @@ -1,319 +0,0 @@ -use chumsky::prelude::*; - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum Token { - // Types - Int(i64), Float(String), - Bool(bool), String(String), - Ident(String), - - // Symbols - Operator(String), - Delimiter(char), - Semicolon, - Assign, Colon, - Comma, Dot, - ReturnHint, - - // Keywords - Import, - Let, Fun, - If, Then, Else, End, - Do, -} - -pub type Span = std::ops::Range; -pub fn lexer() -> impl Parser, Error = Simple> { - let int = text::int(10) - .map(|s: String| Token::Int(s.parse().unwrap())); - - // TODO: this is not working somehow - let float = text::int(10) - .then_ignore(just('.')) - .chain::(text::digits(10)) - .collect::() - .map(|s: String| Token::Float(s)); - - let string = just('"') - .ignore_then(filter(|c| *c != '"').repeated()) - .then_ignore(just('"')) - .collect::() - .map(|s: String| Token::String(s)); - - let symbol = choice(( - just(';').to(Token::Semicolon), - just('=').to(Token::Assign), - just(':').to(Token::Colon), - just(',').to(Token::Comma), - just("->").to(Token::ReturnHint), - just(".").to(Token::Dot), - )); - - let operator = choice(( - just("+"), - just("-"), - just("*"), - just("/"), - just("%"), - - just("!"), - just("=="), - just("!="), - just("<"), - just(">"), - just("<="), - just(">="), - )).map(|c| Token::Operator(c.to_string())); - - let delimiter = choice(( - just('('), - just(')'), - just('{'), - just('}'), - )).map(|c| Token::Delimiter(c)); - - let keyword = text::ident().map(|s: String| match s.as_str() { - "true" => Token::Bool(true), - "false" => Token::Bool(false), - - "import" => Token::Import, - "let" => Token::Let, - "fun" => Token::Fun, - "if" => Token::If, - "then" => Token::Then, - "else" => Token::Else, - "end" => Token::End, - "do" => Token::Do, - _ => Token::Ident(s), - }); - - let token = int - .or(float) - .or(string) - .or(symbol) - .or(operator) - .or(delimiter) - .or(keyword) - .recover_with(skip_then_retry_until([])); - - let comment = just("/*").then(take_until(just("*/"))) - .padded() - .ignored(); - - token - .padded_by(comment.repeated()) - .map_with_span(|token, span| (token, span)) - .padded() - .repeated() -} - -#[derive(Clone, Debug)] -pub enum Expr { - Int(i64), Float(f64), - Bool(bool), String(String), - Ident(String), - - Unary { op: String, expr: Box }, - Binary { op: String, left: Box, right: Box }, - Call { name: Box, args: Vec }, - - Let { - name: String, - type_hint: String, - value: Box, - }, - Fun { - name: String, - type_hint: String, - args: Vec<(String, String)>, - body: Box, - }, - - If { - cond: Box, - then: Box, - else_: Box, - }, - Do { body: Vec }, -} - -fn expr_parser() -> impl Parser> + Clone { - let ident = filter_map(|span, token| match token { - Token::Ident(s) => Ok(s.clone()), - _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), - }).labelled("identifier"); - - let literal = filter_map(|span, token| match token { - Token::Int(i) => Ok(Expr::Int(i)), - Token::Float(f) => Ok(Expr::Float(f.parse().unwrap())), - Token::Bool(b) => Ok(Expr::Bool(b)), - Token::String(s) => Ok(Expr::String(s)), - _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), - }).labelled("literal"); - - let expr = recursive(|expr| { - let args = expr.clone() - .separated_by(just(Token::Comma)) - .allow_trailing(); - - let atom = literal - .or(ident.map(Expr::Ident)) - .or( - expr.clone() - .delimited_by(just(Token::Delimiter('(')), just(Token::Delimiter(')')))) - .labelled("atom"); - - let call = atom - .then( - args - .delimited_by( - just(Token::Delimiter('(')), - just(Token::Delimiter(')'))) - .repeated() - ) - .foldl(|f, args| { - Expr::Call { - name: Box::new(f), - args, - } - }); - - let unary = choice(( - just(Token::Operator("-".to_string())).to("-"), - just(Token::Operator("!".to_string())).to("!"))) - .repeated() - .then(call) - .foldr(|op, rhs| Expr::Unary { op: op.to_string(), expr: Box::new(rhs) }).labelled("unary"); - - let factor = unary.clone() - .then( - choice(( - just(Token::Operator("*".to_string())).to("*"), - just(Token::Operator("/".to_string())).to("/"))) - .then(unary) - .repeated()) - .foldl(|lhs, (op, rhs)| Expr::Binary { - op: op.to_string(), - left: Box::new(lhs), - right: Box::new(rhs) - }).labelled("factor"); - - let term = factor.clone() - .then( - choice(( - just(Token::Operator("+".to_string())).to("+"), - just(Token::Operator("-".to_string())).to("-"))) - .then(factor) - .repeated()) - .foldl(|lhs, (op, rhs)| Expr::Binary { - op: op.to_string(), - left: Box::new(lhs), - right: Box::new(rhs) - }).labelled("term"); - - let compare = term.clone() - .then( - choice(( - just(Token::Operator("==".to_string())).to("=="), - just(Token::Operator("!=".to_string())).to("!="), - just(Token::Operator("<".to_string())).to("<"), - just(Token::Operator(">".to_string())).to(">"), - just(Token::Operator("<=".to_string())).to("<="), - just(Token::Operator(">=".to_string())).to(">="))) - .then(term) - .repeated()) - .foldl(|lhs, (op, rhs)| Expr::Binary { - op: op.to_string(), - left: Box::new(lhs), - right: Box::new(rhs) - }).labelled("compare"); - - compare - }).labelled("expression"); - - let declare = recursive(|decl| { - let do_block = just(Token::Do) - .ignore_then( - expr.clone() - .or(decl.clone()) - .then_ignore(just(Token::Semicolon)) - .repeated()) - .then_ignore(just(Token::End)) - .map(|body| Expr::Do { body }); - - let declare_var = just(Token::Let) - .ignore_then(ident) - .then_ignore(just(Token::Colon)) - .then(ident) - .then_ignore(just(Token::Assign)) - .then( - do_block.clone() - .or(decl.clone()) - ) - .map(|((name, type_hint), value)| Expr::Let { - name, - type_hint, - value: Box::new(value), - }).labelled("variable"); - - let declare_fun = just(Token::Fun) - .ignore_then(ident) - .then_ignore(just(Token::Delimiter('('))) - .then( - (ident - .then_ignore(just(Token::Colon)) - .then(ident)) - .separated_by(just(Token::Comma)) - .allow_trailing() - ) - .then_ignore(just(Token::Delimiter(')'))) - .then_ignore(just(Token::ReturnHint)) - .then(ident) - .then_ignore(just(Token::Assign)) - .then( - do_block.clone() - .or(decl.clone()) - ) - .map(|(((name, args), type_hint), body)| Expr::Fun { - name, - type_hint, - args, - body: Box::new(body), - }).labelled("function"); - - let if_cond = just(Token::If) - .ignore_then(expr.clone()) - .then_ignore(just(Token::Then)) - .then( - do_block.clone() - .or(decl.clone().then_ignore(just(Token::Semicolon).or_not())) - ) - .then_ignore(just(Token::Else)) - .then( - do_block.clone() - .or(decl.clone().then_ignore(just(Token::Semicolon).or_not())) - ) - .then_ignore(just(Token::End)) - .map(|((cond, then), else_)| Expr::If { - cond: Box::new(cond), - then: Box::new(then), - else_: Box::new(else_), - }).labelled("if"); - - declare_var - .or(declare_fun) - .or(if_cond) - .or(do_block) - .or(expr) - - }).labelled("declare"); - - declare -} - -pub fn parser() -> impl Parser, Error = Simple> + Clone { - expr_parser() - .then_ignore(just(Token::Semicolon)) - .repeated() - .then_ignore(end()) -} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 2760a9a..0000000 --- a/src/main.rs +++ /dev/null @@ -1,82 +0,0 @@ -use std::{ - fs, - io::{self, Write}, - time, -}; - -use chumsky::{Parser, Stream}; -use clap::Parser as ArgParser; - -/// Arguments handler. -pub mod args; -use args::{Args, Options}; - -/// Front-end of the language. -/// Contains lexer, parser and token types. -pub mod front; -use front::parse::{lexer, parser}; - -/// Middle-end of the language. -/// Contains the intermediate representation. -pub mod middle; -use middle::ir; - -/// Back-end of the language. -/// Contains code generator. -pub mod back; - -/// Utility functions. -pub mod util; -use crate::util::log; - -fn main() { - let args = Args::parse(); - match args.options { - Options::Compile { - input: file_name, - ast: _print_ast, - } => { - // Get file contents. - let src = fs::read_to_string(&file_name).expect("Failed to read file"); - - // Lex the file. - let (tokens, lex_error) = lexer().parse_recovery(src.as_str()); - let len = src.chars().count(); - - // Parse the file. - let (ast, parse_error) = parser().parse_recovery(Stream::from_iter( - len..len + 1, - tokens.clone().unwrap().into_iter(), - )); - - if lex_error.is_empty() { - if parse_error.is_empty() { - match ast { - // If there is some AST then generate code. - Some(ast) => { - let start = time::Instant::now(); - - let ir = ir::ast_to_ir(ast); - - let out = back::js::gen(ir); - - let file = fs::File::create("out.js").expect("Failed to create file"); - let mut file = io::BufWriter::new(file); - file.write_all(out.as_bytes()) - .expect("Failed to write file"); - - let all_elapsed = start.elapsed(); - log(0, format!("Done in {}s", all_elapsed.as_secs_f64())); - } - // If there is no AST, then notify the user. - None => println!("no ast :("), - }; - } else { - eprintln!("{:#?}\n(Parser error)", parse_error); - } - } else { - eprintln!("{:#?}\n(Lexer error)", lex_error); - } - } - } -} diff --git a/src/middle/ir.rs b/src/middle/ir.rs deleted file mode 100644 index e50ebe8..0000000 --- a/src/middle/ir.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::front::parse::Expr; - -#[derive(Debug, Clone)] -pub enum TypeHint { - Int, - Float, - Bool, - String, -} - -#[derive(Debug, Clone)] -pub enum Value { - Int(i64), - Float(f64), - Bool(bool), - String(String), - Ident(String), -} - -#[derive(Debug, Clone)] -pub enum IR { - Define { name: String, type_hint: TypeHint, value: Box }, - Fun { name: String, return_type_hint: TypeHint, args: Vec<(String, TypeHint)>, body: Box }, - Call { name: String, args: Vec }, - Do { body: Vec }, - If { cond: Box, body: Box, else_body: Box }, - Value { value: Value }, - Binary { op: String, left: Box, right: Box }, -} - -pub fn ast_to_ir(ast: Vec) -> Vec { - let mut ir = Vec::new(); - for expr in ast { - ir.push(expr_to_ir(&expr)); - } - ir -} - -pub fn expr_to_ir(expr: &Expr) -> IR { - match expr { - Expr::Let { name, type_hint, value } => IR::Define { - name: name.clone(), - type_hint: get_typehint(type_hint), - value: Box::new(expr_to_ir(value)), - }, - Expr::Fun { name, type_hint, args, body } => IR::Fun { - name: name.clone(), - return_type_hint: get_typehint(type_hint), - args: args - .iter() - .map(|(name, type_hint)| (name.to_string(), get_typehint(type_hint))) - .collect::>(), - body: Box::new(expr_to_ir(body)), - }, - Expr::Call { name, args } => IR::Call { - name: match &**name { - Expr::Ident(s) => s.clone(), - _ => panic!("Expected ident in call"), - }, - args: args.iter().map(|arg| expr_to_ir(arg)).collect(), - }, - Expr::Do { body } => IR::Do { - body: body - .iter() - .map(|expr| expr_to_ir(expr)) - .collect::>(), - }, - Expr::Binary { op, left, right } => IR::Binary { - op: op.to_string(), - left: Box::new(expr_to_ir(left)), - right: Box::new(expr_to_ir(right)), - }, - Expr::Int(value) => IR::Value { value: Value::Int(*value) }, - Expr::Float(value) => IR::Value { value: Value::Float(*value) }, - Expr::Bool(value) => IR::Value { value: Value::Bool(*value) }, - Expr::String(value) => IR::Value { value: Value::String(value.clone()) }, - Expr::Ident(name) => IR::Value { value: Value::Ident(name.clone()) }, - _ => { println!("{:?}", expr); todo!() } - } -} - -fn get_typehint(from: &String) -> TypeHint { - match from.as_str() { - "int" => TypeHint::Int, - "float" => TypeHint::Float, - "bool" => TypeHint::Bool, - "string" => TypeHint::String, - _ => panic!("Unsupported type hint: {}", from) - } -} \ No newline at end of file diff --git a/src/middle/mod.rs b/src/middle/mod.rs deleted file mode 100644 index f919949..0000000 --- a/src/middle/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// The intemediate representation of the AST -pub mod ir; \ No newline at end of file