mirror of
https://github.com/azur1s/bobbylisp.git
synced 2024-10-16 02:37:40 -05:00
rewrote with chumsky
This commit is contained in:
parent
d169670b00
commit
91f89d7ef6
103
Cargo.lock
generated
103
Cargo.lock
generated
|
@ -2,6 +2,15 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
|
||||
dependencies = [
|
||||
"const-random",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
|
@ -25,6 +34,21 @@ version = "1.3.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chumsky"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.0.14"
|
||||
|
@ -55,6 +79,45 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4"
|
||||
dependencies = [
|
||||
"const-random-macro",
|
||||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random-macro"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"lazy_static",
|
||||
"proc-macro-hack",
|
||||
"tiny-keccak",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crunchy"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
|
@ -80,8 +143,8 @@ dependencies = [
|
|||
name = "hycron"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"clap",
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -112,23 +175,6 @@ version = "2.4.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.0.0"
|
||||
|
@ -162,6 +208,12 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.36"
|
||||
|
@ -212,6 +264,15 @@ version = "0.14.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80"
|
||||
|
||||
[[package]]
|
||||
name = "tiny-keccak"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
|
||||
dependencies = [
|
||||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.2"
|
||||
|
@ -224,6 +285,12 @@ version = "0.9.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.2+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
|
|
|
@ -7,4 +7,4 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
clap = { version = "3.0.14", features = ["derive"] }
|
||||
nom = "7.1.0"
|
||||
chumsky = "0.8.0"
|
11
README.md
11
README.md
|
@ -1,16 +1,7 @@
|
|||
# Hycron
|
||||
Programming language
|
||||
|
||||
```
|
||||
func main :: () -> Int = {
|
||||
let msg :: String = "Hello, World";
|
||||
puts(msg);
|
||||
return 1;
|
||||
};
|
||||
```
|
||||
|
||||
# TODO
|
||||
- Compliation
|
||||
- Optimization
|
||||
- Use [chumsky](https://github.com/zesterer/chumsky) instead of [nom](https://github.com/Geal/nom) for parsing
|
||||
- Error reporting (better with chumsky)
|
||||
- Error reporting
|
|
@ -1,18 +1,2 @@
|
|||
import "path/to/library.hyc";
|
||||
|
||||
// user defined function
|
||||
func foo :: (a, b) -> Bool = {
|
||||
return a == b;
|
||||
};
|
||||
|
||||
// entry point
|
||||
func main :: () -> Int = {
|
||||
// if else in variable definition
|
||||
let cond_str :: String = if foo(1, 1) { return "t" } else { return "f" };
|
||||
|
||||
// Infix operator
|
||||
let n :: Bool = 2 == 2;
|
||||
// Prefix operator
|
||||
let m :: Bool = !n;
|
||||
puts(m);
|
||||
};
|
||||
fun add a b = a + b;
|
||||
let foo = add (1, 2);
|
|
@ -1,31 +0,0 @@
|
|||
use std::str::{self, Utf8Error, FromStr};
|
||||
|
||||
pub type Bytes = [u8];
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! syntax {
|
||||
($func_name: ident, $tag_string: literal, $output_token: expr) => {
|
||||
fn $func_name<'a>(s: &'a Bytes) -> IResult<&Bytes, Token> {
|
||||
map(tag($tag_string), |_| $output_token)(s)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn concat_slice_vec(a: &Bytes, b: Vec<u8>) -> Vec<u8> {
|
||||
let mut result = a.to_vec();
|
||||
result.extend(&b);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn convert_vec_utf8(v: Vec<u8>) -> Result<String, Utf8Error> {
|
||||
let slice = v.as_slice();
|
||||
str::from_utf8(slice).map(|s| s.to_owned())
|
||||
}
|
||||
|
||||
pub fn str_from_bytes(c: &Bytes) -> Result<&str, Utf8Error> {
|
||||
str::from_utf8(c)
|
||||
}
|
||||
|
||||
pub fn str_to_from_str<F: FromStr>(c: &str) -> Result<F, F::Err> {
|
||||
FromStr::from_str(c)
|
||||
}
|
157
src/front/lex.rs
157
src/front/lex.rs
|
@ -1,157 +0,0 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, take, take_until},
|
||||
character::complete::{multispace0, alphanumeric1, alpha1, digit1},
|
||||
combinator::{map, map_res, recognize},
|
||||
IResult,
|
||||
multi::many0,
|
||||
sequence::{delimited, pair}, AsBytes,
|
||||
};
|
||||
|
||||
use crate::syntax;
|
||||
use super::{
|
||||
model::Token,
|
||||
helper::{Bytes, convert_vec_utf8, concat_slice_vec, str_from_bytes, str_to_from_str},
|
||||
};
|
||||
|
||||
// Comparison
|
||||
syntax! { equal_operator , "==", Token::Eq }
|
||||
syntax! { not_equal_operator , "!=", Token::NEq }
|
||||
syntax! { less_than_operator , "<" , Token::Lt }
|
||||
syntax! { greater_than_operator , ">" , Token::Gt }
|
||||
syntax! { less_than_equal_operator , "<=", Token::Lte }
|
||||
syntax! { greater_than_equal_operator , ">=", Token::Gte }
|
||||
|
||||
// Arithmetic
|
||||
syntax! { assign_operator , "=", Token::Assign }
|
||||
syntax! { add_operator , "+", Token::Plus }
|
||||
syntax! { subtract_operator , "-", Token::Minus }
|
||||
syntax! { multiply_operator , "*", Token::Mul }
|
||||
syntax! { divide_operator , "/", Token::Div }
|
||||
syntax! { not_operator , "!", Token::Not }
|
||||
|
||||
// Punctuations
|
||||
syntax! { typehint_punctuation , "::", Token::Typehint }
|
||||
syntax! { returnhint_punctuation , "->", Token::Return }
|
||||
syntax! { lparen_punctuation , "(", Token::LParen }
|
||||
syntax! { rparen_punctuation , ")", Token::RParen }
|
||||
syntax! { lbrace_punctuation , "{", Token::LBrace }
|
||||
syntax! { rbrace_punctuation , "}", Token::RBrace }
|
||||
syntax! { semicolon_punctuation , ";", Token::Semicolon }
|
||||
syntax! { colon_punctuation , ":", Token::Colon }
|
||||
syntax! { comma_punctuation , ",", Token::Comma }
|
||||
|
||||
// Operator & Punctuation
|
||||
fn lex_operator_punctuation(input: &Bytes) -> IResult<&Bytes, Token> {
|
||||
alt((
|
||||
typehint_punctuation, returnhint_punctuation,
|
||||
lparen_punctuation, rparen_punctuation,
|
||||
lbrace_punctuation, rbrace_punctuation,
|
||||
semicolon_punctuation, colon_punctuation, comma_punctuation,
|
||||
|
||||
equal_operator, not_equal_operator,
|
||||
less_than_operator, greater_than_operator,
|
||||
less_than_equal_operator, greater_than_equal_operator,
|
||||
|
||||
assign_operator,
|
||||
add_operator, subtract_operator, multiply_operator, divide_operator,
|
||||
not_operator,
|
||||
|
||||
))(input)
|
||||
}
|
||||
|
||||
// String
|
||||
fn string_value(input: &Bytes) -> IResult<&Bytes, Vec<u8>> {
|
||||
let (i1, c1) = take(1usize)(input)?;
|
||||
match c1.as_bytes() {
|
||||
b"\"" => Ok((input, vec![])),
|
||||
b"\\" => {
|
||||
let (i2, c2) = take(1usize)(i1)?;
|
||||
string_value(i2).map(|(slice, done)| (slice, concat_slice_vec(c2, done)))
|
||||
}
|
||||
c => string_value(i1).map(|(slice, done)| (slice, concat_slice_vec(c, done)))
|
||||
}
|
||||
}
|
||||
|
||||
fn string(input: &Bytes) -> IResult<&Bytes, String> {
|
||||
delimited(tag("\""), map_res(string_value, convert_vec_utf8), tag("\""))(input)
|
||||
}
|
||||
|
||||
fn lex_string(input: &Bytes) -> IResult<&Bytes, Token> {
|
||||
map(string, |s| Token::String(s))(input)
|
||||
}
|
||||
|
||||
// Reserved keywords & Identifiers
|
||||
fn lex_reserved_identifier(input: &Bytes) -> IResult<&Bytes, Token> {
|
||||
map_res(
|
||||
recognize(pair(
|
||||
alt((alpha1, tag("_"))
|
||||
),
|
||||
many0(alt((alphanumeric1, tag("_")))),
|
||||
)),
|
||||
|s| {
|
||||
let c = str_from_bytes(s);
|
||||
c.map(|syntax| match syntax {
|
||||
"import" => Token::Import,
|
||||
"if" => Token::If,
|
||||
"else" => Token::Else,
|
||||
"let" => Token::Let,
|
||||
"func" => Token::Func,
|
||||
"return" => Token::Return,
|
||||
"true" => Token::Bool(true),
|
||||
"false" => Token::Bool(false),
|
||||
_ => Token::Identifier(syntax.to_string()),
|
||||
})
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
// Integers
|
||||
fn lex_integer(input: &Bytes) -> IResult<&Bytes, Token> {
|
||||
map(
|
||||
map_res(
|
||||
map_res(digit1, str_from_bytes),
|
||||
str_to_from_str,
|
||||
),
|
||||
Token::Int,
|
||||
)(input)
|
||||
}
|
||||
|
||||
// Illegal tokens
|
||||
fn lex_illegal(input: &Bytes) -> IResult<&Bytes, Token> {
|
||||
map(take(1usize), |_| Token::Illegal)(input)
|
||||
}
|
||||
|
||||
fn lex_comment(input: &Bytes) -> IResult<&Bytes, ()> {
|
||||
let (i1, c1) = take(2usize)(input)?;
|
||||
if c1.as_bytes() == b"//" {
|
||||
let (i2, _) = take_until("\n")(i1)?;
|
||||
let (i3, _) = take(1usize)(i2)?;
|
||||
let (i4, _) = multispace0(i3)?;
|
||||
let (i5, _) = lex_comment(i4)?;
|
||||
Ok((i5, ()))
|
||||
} else { Ok((input, ())) }
|
||||
}
|
||||
|
||||
// Tokens
|
||||
fn lex_token(input: &Bytes) -> IResult<&Bytes, Token> {
|
||||
let (i1, _) = lex_comment(input)?;
|
||||
alt((
|
||||
lex_operator_punctuation,
|
||||
lex_reserved_identifier,
|
||||
lex_string,
|
||||
lex_integer,
|
||||
lex_illegal,
|
||||
))(i1)
|
||||
}
|
||||
|
||||
fn lex_tokens(input: &Bytes) -> IResult<&Bytes, Vec<Token>> {
|
||||
many0(delimited(multispace0, lex_token, multispace0))(input)
|
||||
}
|
||||
|
||||
pub struct Lexer;
|
||||
impl Lexer {
|
||||
pub fn lex_tokens(input: &Bytes) -> IResult<&Bytes, Vec<Token>> {
|
||||
lex_tokens(input).map(|(slice, result)| (slice, [&result[..], &vec![Token::EndOfFile][..]].concat()))
|
||||
}
|
||||
}
|
|
@ -1,5 +1 @@
|
|||
pub mod model;
|
||||
pub mod helper;
|
||||
|
||||
pub mod lex;
|
||||
pub mod parser;
|
||||
pub mod parse;
|
|
@ -1,162 +0,0 @@
|
|||
use std::iter::Enumerate;
|
||||
|
||||
use nom::{InputTake, Needed, InputIter, InputLength};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Token {
|
||||
Illegal, EndOfFile,
|
||||
|
||||
Identifier(String), String(String),
|
||||
Int(i64), Bool(bool),
|
||||
|
||||
Assign, Typehint, Returnhint,
|
||||
|
||||
Plus, Minus, Mul, Div, Not,
|
||||
Eq, NEq, Lt, Gt, Lte, Gte,
|
||||
|
||||
LParen, RParen,
|
||||
LBrace, RBrace,
|
||||
Semicolon, Colon, Comma,
|
||||
|
||||
If, Else, Let, Func, Return,
|
||||
Import,
|
||||
}
|
||||
|
||||
/// Token struct with position information.
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct Tokens<'a> {
|
||||
pub tokens: &'a [Token],
|
||||
pub start: usize, pub end: usize,
|
||||
}
|
||||
|
||||
impl<'a> Tokens<'a> {
|
||||
pub fn new(tokens: &'a [Token]) -> Self {
|
||||
Tokens { tokens, start: 0, end: tokens.len(), }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> InputTake for Tokens<'a> {
|
||||
#[inline]
|
||||
fn take(&self, count: usize) -> Self {
|
||||
Tokens {
|
||||
tokens: &self.tokens[0..count],
|
||||
start: 0,
|
||||
end: count,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn take_split(&self, count: usize) -> (Self, Self) {
|
||||
let (prefix, suffix) = self.tokens.split_at(count);
|
||||
let first = Tokens {
|
||||
tokens: prefix,
|
||||
start: 0,
|
||||
end: prefix.len(),
|
||||
};
|
||||
let second = Tokens {
|
||||
tokens: suffix,
|
||||
start: 0,
|
||||
end: suffix.len(),
|
||||
};
|
||||
(second, first)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> InputLength for Tokens<'a> {
|
||||
#[inline]
|
||||
fn input_len(&self) -> usize {
|
||||
self.tokens.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> InputIter for Tokens<'a> {
|
||||
type Item = &'a Token;
|
||||
type Iter = Enumerate<::std::slice::Iter<'a, Token>>;
|
||||
type IterElem = ::std::slice::Iter<'a, Token>;
|
||||
|
||||
#[inline]
|
||||
fn iter_indices(&self) -> Enumerate<::std::slice::Iter<'a, Token>> {
|
||||
self.tokens.iter().enumerate()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn iter_elements(&self) -> ::std::slice::Iter<'a, Token> {
|
||||
self.tokens.iter()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn position<P>(&self, predicate: P) -> Option<usize>
|
||||
where P: Fn(Self::Item) -> bool {
|
||||
self.tokens.iter().position(predicate)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn slice_index(&self, count: usize) -> Result<usize, Needed> {
|
||||
if self.tokens.len() >= count { Ok(count) }
|
||||
else { Err(Needed::Unknown) }
|
||||
}
|
||||
}
|
||||
|
||||
pub type Program = Vec<Stmt>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Stmt {
|
||||
Import(Literal),
|
||||
|
||||
Let(Ident, Ident, Expr),
|
||||
Func(Ident, Vec<Ident>, Ident, Vec<Stmt>),
|
||||
Call(Ident, Vec<Expr>),
|
||||
Return(Expr),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Expr {
|
||||
Ident(Ident), Literal(Literal),
|
||||
Array(Vec<Expr>),
|
||||
Prefix(Prefix, Box<Expr>),
|
||||
Infix(Infix, Box<Expr>, Box<Expr>),
|
||||
If {
|
||||
cond: Box<Expr>,
|
||||
then: Program,
|
||||
else_: Option<Program>,
|
||||
},
|
||||
Func {
|
||||
name: Ident,
|
||||
args: Vec<Ident>,
|
||||
body: Program,
|
||||
},
|
||||
Call {
|
||||
func: Box<Expr>,
|
||||
args: Vec<Expr>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Literal {
|
||||
Int(i64), Bool(bool), String(String),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Ident(pub String);
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Prefix {
|
||||
Plus, Minus,
|
||||
Not,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Infix {
|
||||
Plus, Minus, Mul, Div,
|
||||
Eq, NEq, Lt, Gt, Lte, Gte,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, PartialOrd)]
|
||||
pub enum Precedence {
|
||||
Lowest,
|
||||
Equals,
|
||||
LessGreater,
|
||||
Sum,
|
||||
Product,
|
||||
Call,
|
||||
}
|
117
src/front/parse.rs
Normal file
117
src/front/parse.rs
Normal file
|
@ -0,0 +1,117 @@
|
|||
use chumsky::prelude::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Expr {
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
Ident(String),
|
||||
Unary { op: String, expr: Box<Self> },
|
||||
Binary { op: String, left: Box<Self>, right: Box<Self> },
|
||||
|
||||
Let {
|
||||
name: String,
|
||||
value: Box<Self>,
|
||||
},
|
||||
Fun {
|
||||
name: String,
|
||||
args: Vec<String>,
|
||||
body: Box<Self>,
|
||||
},
|
||||
Call {
|
||||
name: String,
|
||||
args: Vec<Self>,
|
||||
},
|
||||
}
|
||||
|
||||
fn expr_parser() -> impl Parser<char, Expr, Error = Simple<char>> {
|
||||
let ident = text::ident().padded();
|
||||
|
||||
let expr = recursive(|expr| {
|
||||
let int = text::int(10)
|
||||
.map(|s: String| Expr::Int(s.parse().unwrap()));
|
||||
|
||||
let float = text::int(10)
|
||||
.then_ignore(just('.'))
|
||||
.chain::<char, _, _>(text::digits(10))
|
||||
.collect::<String>()
|
||||
.map(|s: String| Expr::Float(s.parse().unwrap()));
|
||||
|
||||
let call = ident
|
||||
.then(expr.clone()
|
||||
.separated_by(just(','))
|
||||
.allow_trailing()
|
||||
.delimited_by(just('('), just(')')))
|
||||
.map(|(name, args)| Expr::Call { name, args });
|
||||
|
||||
let atom = int
|
||||
.or(float)
|
||||
.or(call)
|
||||
.or(ident.map(Expr::Ident))
|
||||
.or(expr.delimited_by(just('('), just(')')))
|
||||
.labelled("atom");
|
||||
|
||||
let unary = choice((just('-'), just('!')))
|
||||
.repeated()
|
||||
.then(atom)
|
||||
.foldr(|op, rhs| Expr::Unary { op: op.to_string(), expr: Box::new(rhs) }).labelled("unary");
|
||||
|
||||
let factor = unary.clone()
|
||||
.then(choice((just('*'), just('/')))
|
||||
.then(unary)
|
||||
.repeated())
|
||||
.foldl(|lhs, (op, rhs)| Expr::Binary {
|
||||
op: op.to_string(),
|
||||
left: Box::new(lhs),
|
||||
right: Box::new(rhs)
|
||||
}).labelled("factor");
|
||||
|
||||
let term = factor.clone()
|
||||
.then(choice((just('+'), just('-')))
|
||||
.then(factor)
|
||||
.repeated())
|
||||
.foldl(|lhs, (op, rhs)| Expr::Binary {
|
||||
op: op.to_string(),
|
||||
left: Box::new(lhs),
|
||||
right: Box::new(rhs)
|
||||
}).labelled("term");
|
||||
|
||||
term.padded()
|
||||
}).labelled("expression");
|
||||
|
||||
let declare = recursive(|decl| {
|
||||
let declare_var = text::keyword("let")
|
||||
.ignore_then(ident)
|
||||
.then_ignore(just('='))
|
||||
.then(expr.clone())
|
||||
.then_ignore(just(';'))
|
||||
.map(|(name, rhs)| Expr::Let {
|
||||
name,
|
||||
value: Box::new(rhs),
|
||||
});
|
||||
|
||||
let declare_fun = text::keyword("fun")
|
||||
.ignore_then(ident)
|
||||
.then(ident.repeated())
|
||||
.then_ignore(just('='))
|
||||
.then(expr.clone())
|
||||
.then_ignore(just(';'))
|
||||
.map(|((name, args), body)| Expr::Fun {
|
||||
name,
|
||||
args,
|
||||
body: Box::new(body),
|
||||
});
|
||||
|
||||
declare_var
|
||||
.or(declare_fun)
|
||||
.or(expr)
|
||||
.padded()
|
||||
});
|
||||
|
||||
declare
|
||||
}
|
||||
|
||||
pub fn parser() -> impl Parser<char, Vec<Expr>, Error = Simple<char>> {
|
||||
expr_parser()
|
||||
.repeated()
|
||||
.then_ignore(end())
|
||||
}
|
|
@ -1,307 +0,0 @@
|
|||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::take,
|
||||
combinator::{verify, map, opt},
|
||||
Err,
|
||||
error::{Error, ErrorKind},
|
||||
IResult,
|
||||
multi::many0,
|
||||
sequence::{terminated, tuple, pair, preceded, delimited}, error_position,
|
||||
};
|
||||
|
||||
use super::model::{Token, Tokens, Precedence, Infix, Program, Stmt, Expr, Ident, Literal, Prefix};
|
||||
|
||||
macro_rules! tag_token (
|
||||
($func_name:ident, $tag: expr) => (
|
||||
fn $func_name(tokens: Tokens) -> IResult<Tokens, Tokens> {
|
||||
verify(take(1usize), |t: &Tokens| t.tokens[0] == $tag)(tokens)
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
tag_token!(tag_import, Token::Import);
|
||||
|
||||
tag_token!(tag_let, Token::Let);
|
||||
tag_token!(tag_func, Token::Func);
|
||||
tag_token!(tag_return, Token::Return);
|
||||
tag_token!(tag_if, Token::If);
|
||||
tag_token!(tag_else, Token::Else);
|
||||
|
||||
tag_token!(tag_plus, Token::Plus);
|
||||
tag_token!(tag_minus, Token::Minus);
|
||||
tag_token!(tag_not, Token::Not);
|
||||
|
||||
tag_token!(tag_assign, Token::Assign);
|
||||
tag_token!(tag_typehint, Token::Typehint);
|
||||
tag_token!(tag_returnhint, Token::Return);
|
||||
tag_token!(tag_semicolon, Token::Semicolon);
|
||||
tag_token!(tag_lparen, Token::LParen);
|
||||
tag_token!(tag_rparen, Token::RParen);
|
||||
tag_token!(tag_lbrace, Token::LBrace);
|
||||
tag_token!(tag_rbrace, Token::RBrace);
|
||||
tag_token!(tag_comma, Token::Comma);
|
||||
tag_token!(tag_end_of_file, Token::EndOfFile);
|
||||
|
||||
fn infix_operator(token: &Token) -> (Precedence, Option<Infix>) {
|
||||
match *token {
|
||||
Token::Eq => (Precedence::Equals, Some(Infix::Eq)),
|
||||
Token::NEq => (Precedence::Equals, Some(Infix::NEq)),
|
||||
Token::Lt => (Precedence::LessGreater, Some(Infix::Lt)),
|
||||
Token::Gt => (Precedence::LessGreater, Some(Infix::Gt)),
|
||||
Token::Lte => (Precedence::LessGreater, Some(Infix::Lte)),
|
||||
Token::Gte => (Precedence::LessGreater, Some(Infix::Gte)),
|
||||
Token::Plus => (Precedence::Sum, Some(Infix::Plus)),
|
||||
Token::Minus => (Precedence::Sum, Some(Infix::Minus)),
|
||||
Token::Mul => (Precedence::Product, Some(Infix::Mul)),
|
||||
Token::Div => (Precedence::Product, Some(Infix::Div)),
|
||||
Token::LParen => (Precedence::Call, None),
|
||||
_ => (Precedence::Lowest, None),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_literal(input: Tokens) -> IResult<Tokens, Literal> {
|
||||
let (i1, t1) = take(1usize)(input)?;
|
||||
if t1.tokens.is_empty() { Err(Err::Error(Error::new(input, ErrorKind::Tag))) }
|
||||
else {
|
||||
match t1.tokens[0].clone() {
|
||||
Token::Int(i) => Ok((i1, Literal::Int(i))),
|
||||
Token::String(s) => Ok((i1, Literal::String(s))),
|
||||
Token::Bool(b) => Ok((i1, Literal::Bool(b))),
|
||||
_ => Err(Err::Error(Error::new(input, ErrorKind::Tag))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_literal_expr(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
map(parse_literal, Expr::Literal)(input)
|
||||
}
|
||||
|
||||
fn parse_atom_expr(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
alt((
|
||||
parse_literal_expr,
|
||||
parse_ident_expr,
|
||||
parse_prefix_expr,
|
||||
parse_paren_expr,
|
||||
parse_if_expr,
|
||||
))(input)
|
||||
}
|
||||
|
||||
fn parse_paren_expr(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
delimited(tag_lparen, parse_expr_lowest, tag_rparen)(input)
|
||||
}
|
||||
|
||||
fn parse_ident(input: Tokens) -> IResult<Tokens, Ident> {
|
||||
let (i1, t1) = take(1usize)(input)?;
|
||||
if t1.tokens.is_empty() { Err(Err::Error(Error::new(input, ErrorKind::Tag))) }
|
||||
else {
|
||||
match t1.tokens[0].clone() {
|
||||
Token::Identifier(name) => Ok((i1, Ident(name))),
|
||||
_ => Err(Err::Error(Error::new(input, ErrorKind::Tag))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ident_expr(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
map(parse_ident, Expr::Ident)(input)
|
||||
}
|
||||
|
||||
fn parse_params(input: Tokens) -> IResult<Tokens, Vec<Ident>> {
|
||||
map(
|
||||
pair(parse_ident, many0(preceded(tag_comma, parse_ident))),
|
||||
|(p, ps)| [&vec![p][..], &ps[..]].concat(),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn empty_params(input: Tokens) -> IResult<Tokens, Vec<Ident>> { Ok((input, vec![])) }
|
||||
|
||||
fn parse_call_expr(input: Tokens, func_handle: Expr) -> IResult<Tokens, Expr> {
|
||||
map(
|
||||
delimited(
|
||||
tag_lparen,
|
||||
parse_exprs,
|
||||
tag_rparen,
|
||||
),
|
||||
|e| Expr::Call { func: Box::new(func_handle.clone()), args: e },
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_infix_expr(input: Tokens, left: Expr) -> IResult<Tokens, Expr> {
|
||||
let (i1, t1) = take(1usize)(input)?;
|
||||
if t1.tokens.is_empty() { Err(Err::Error(error_position!(input, ErrorKind::Tag))) }
|
||||
else {
|
||||
let next = &t1.tokens[0];
|
||||
let (prec, op) = infix_operator(next);
|
||||
match op {
|
||||
None => Err(Err::Error(error_position!(input, ErrorKind::Tag))),
|
||||
Some(op) => {
|
||||
let (i2, right) = parse_expr_with(i1, prec)?;
|
||||
Ok((i2, Expr::Infix(op, Box::new(left), Box::new(right))))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prefix_expr(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
let (i1, t1) = alt((tag_plus, tag_minus, tag_not))(input)?;
|
||||
if t1.tokens.is_empty() { Err(Err::Error(error_position!(input, ErrorKind::Tag))) }
|
||||
else {
|
||||
let (i2, e) = parse_atom_expr(i1)?;
|
||||
match t1.tokens[0].clone() {
|
||||
Token::Plus => Ok((i2, Expr::Prefix(Prefix::Plus, Box::new(e)))),
|
||||
Token::Minus => Ok((i2, Expr::Prefix(Prefix::Minus, Box::new(e)))),
|
||||
Token::Not => Ok((i2, Expr::Prefix(Prefix::Not, Box::new(e)))),
|
||||
_ => Err(Err::Error(error_position!(input, ErrorKind::Tag))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expr(input: Tokens, precedence: Precedence, left: Expr) -> IResult<Tokens, Expr> {
|
||||
let (i1, t1) = take(1usize)(input)?;
|
||||
|
||||
if t1.tokens.is_empty() { Ok((i1, left)) }
|
||||
else {
|
||||
let p = infix_operator(&t1.tokens[0]);
|
||||
match p {
|
||||
(Precedence::Call, _) if precedence < Precedence::Call => {
|
||||
let (i2, left2) = parse_call_expr(input, left)?;
|
||||
parse_expr(i2, precedence, left2)
|
||||
},
|
||||
(ref peek, _) if precedence < *peek => {
|
||||
let (i2, left2) = parse_infix_expr(input, left)?;
|
||||
parse_expr(i2, precedence, left2)
|
||||
},
|
||||
_ => Ok((input, left)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_if_expr(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
map(
|
||||
tuple((
|
||||
tag_if,
|
||||
parse_expr_lowest,
|
||||
parse_block_stmt,
|
||||
parse_else_expr,
|
||||
)),
|
||||
|(_, cond, then, else_)| Expr::If { cond: Box::new(cond), then, else_ },
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_else_expr(input: Tokens) -> IResult<Tokens, Option<Program>> {
|
||||
opt(preceded(tag_else, parse_block_stmt))(input)
|
||||
}
|
||||
|
||||
fn parse_comma_exprs(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
preceded(tag_comma, parse_expr_lowest)(input)
|
||||
}
|
||||
|
||||
fn parse_exprs(input: Tokens) -> IResult<Tokens, Vec<Expr>> {
|
||||
map(
|
||||
pair(parse_expr_lowest, many0(parse_comma_exprs)),
|
||||
|(first, second)| [&vec![first][..], &second[..]].concat(),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_expr_with(input: Tokens, precedence: Precedence) -> IResult<Tokens, Expr> {
|
||||
let (i1, left) = parse_atom_expr(input)?;
|
||||
parse_expr(i1, precedence, left)
|
||||
}
|
||||
|
||||
fn parse_expr_lowest(input: Tokens) -> IResult<Tokens, Expr> {
|
||||
parse_expr_with(input, Precedence::Lowest)
|
||||
}
|
||||
|
||||
fn parse_return_stmt(input: Tokens) -> IResult<Tokens, Stmt> {
|
||||
map(
|
||||
delimited(
|
||||
tag_return,
|
||||
parse_expr_lowest,
|
||||
opt(tag_semicolon),
|
||||
),
|
||||
Stmt::Return,
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_call_stmt(input: Tokens) -> IResult<Tokens, Stmt> {
|
||||
map(
|
||||
tuple((
|
||||
parse_ident,
|
||||
tag_lparen,
|
||||
parse_exprs,
|
||||
tag_rparen,
|
||||
opt(tag_semicolon),
|
||||
)),
|
||||
|(ident, _, args, _, _)| Stmt::Call(ident, args),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_block_stmt(input: Tokens) -> IResult<Tokens, Program> {
|
||||
delimited(tag_lbrace, many0(parse_stmt), tag_rbrace)(input)
|
||||
}
|
||||
|
||||
fn parse_func_stmt(input: Tokens) -> IResult<Tokens, Stmt> {
|
||||
map(
|
||||
tuple((
|
||||
tag_func,
|
||||
parse_ident,
|
||||
tag_typehint,
|
||||
tag_lparen,
|
||||
alt((parse_params, empty_params)),
|
||||
tag_rparen,
|
||||
tag_returnhint,
|
||||
parse_ident,
|
||||
tag_assign,
|
||||
parse_block_stmt,
|
||||
opt(tag_semicolon),
|
||||
)),
|
||||
|(_, ident, _, _, params, _, _, returntype, _, block, _)| Stmt::Func(ident, params, returntype, block),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_let_stmt(input: Tokens) -> IResult<Tokens, Stmt> {
|
||||
map(
|
||||
tuple((
|
||||
tag_let,
|
||||
parse_ident,
|
||||
tag_typehint,
|
||||
parse_ident,
|
||||
tag_assign,
|
||||
parse_expr_lowest,
|
||||
opt(tag_semicolon),
|
||||
)),
|
||||
|(_, ident, _, typehint, _, expr, _)| Stmt::Let(ident, typehint, expr),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_import(input: Tokens) -> IResult<Tokens, Stmt> {
|
||||
map(
|
||||
tuple((
|
||||
tag_import,
|
||||
parse_literal,
|
||||
opt(tag_semicolon),
|
||||
)),
|
||||
|(_, path, _)| Stmt::Import(path),
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn parse_stmt(input: Tokens) -> IResult<Tokens, Stmt> {
|
||||
alt((
|
||||
parse_import,
|
||||
parse_let_stmt,
|
||||
parse_func_stmt,
|
||||
parse_call_stmt,
|
||||
parse_return_stmt,
|
||||
))(input)
|
||||
}
|
||||
|
||||
fn parse_program(input: Tokens) -> IResult<Tokens, Program> {
|
||||
terminated(many0(parse_stmt), tag_end_of_file)(input)
|
||||
}
|
||||
|
||||
pub struct Parser;
|
||||
|
||||
impl Parser {
|
||||
pub fn parse(tokens: Tokens) -> IResult<Tokens, Program> {
|
||||
parse_program(tokens)
|
||||
}
|
||||
}
|
18
src/main.rs
18
src/main.rs
|
@ -1,5 +1,6 @@
|
|||
use std::fs;
|
||||
|
||||
use chumsky::Parser;
|
||||
use clap::Parser as ArgParser;
|
||||
|
||||
/// Arguments handler.
|
||||
|
@ -9,24 +10,15 @@ use args::{Args, Options};
|
|||
/// Front-end of the language.
|
||||
/// Contains lexer, parser and token types.
|
||||
pub mod front;
|
||||
use front::{lex::Lexer, parser::Parser, model::Tokens};
|
||||
use front::parse::parser;
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
match args.options {
|
||||
Options::Compile { input: src, ast: _print_ast } => {
|
||||
let bytes: Vec<u8> = fs::read(src).unwrap();
|
||||
let (_errs_, tokens) = Lexer::lex_tokens(&bytes).unwrap();
|
||||
let tokens = Tokens::new(&tokens);
|
||||
let ast = Parser::parse(tokens);
|
||||
match ast {
|
||||
Ok(ast) => {
|
||||
println!("{:#?}", ast);
|
||||
}
|
||||
Err(err) => {
|
||||
println!("{:#?}", err);
|
||||
}
|
||||
}
|
||||
let src = fs::read_to_string(src).expect("Failed to read file");
|
||||
let tokens = parser().parse_recovery(src.as_str());
|
||||
println!("{:?}", tokens);
|
||||
},
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue