From 850696a7abc8e6c4517821d44a3a111ed9aac110 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 14 Sep 2023 17:35:11 +0200 Subject: [PATCH] Parsing --- Cargo.lock | 151 +++++++------- Cargo.toml | 11 +- assets/examples/array.rhea | 7 + assets/examples/loops.rhea | 13 ++ assets/examples/match.rhea | 9 + assets/examples/rand.rhea | 6 + assets/libraries/math/constants.rhea | 5 +- assets/libraries/math/math.rhea | 25 ++- assets/libraries/rand/rand.rhea | 4 + assets/libraries/std/ecalls.rhea | 20 ++ assets/libraries/std/io.rhea | 7 +- assets/libraries/std/log.rhea | 30 ++- assets/libraries/std/std.rhea | 8 +- assets/libraries/std/terminal.rhea | 20 ++ src/main.rs | 64 ++---- src/memory.rs | 53 ----- src/syntax/ast.rs | 83 ++++++++ src/syntax/mod.rs | 3 + src/syntax/parser.rs | 284 +++++++++++++++++++++++++++ src/syntax/token.rs | 112 +++++++++++ src/utils.rs | 15 ++ 21 files changed, 743 insertions(+), 187 deletions(-) create mode 100644 assets/examples/array.rhea create mode 100644 assets/examples/loops.rhea create mode 100644 assets/examples/match.rhea create mode 100644 assets/examples/rand.rhea create mode 100644 assets/libraries/rand/rand.rhea create mode 100644 assets/libraries/std/ecalls.rhea create mode 100644 assets/libraries/std/terminal.rhea delete mode 100644 src/memory.rs create mode 100644 src/syntax/ast.rs create mode 100644 src/syntax/mod.rs create mode 100644 src/syntax/parser.rs create mode 100644 src/syntax/token.rs create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index b2eea7e..8621c4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,24 +13,43 @@ dependencies = [ "version_check", ] -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "beef" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "1.0.0-alpha.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3172a80699de358070dd99f80ea8badc6cdf8ac2417cb5a96e6d81bf5fe06d" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "fnv" version = "1.0.7" @@ -38,31 +57,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "getrandom" -version = "0.2.10" +name = "hashbrown" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "cfg-if", - "libc", - "wasi", + "ahash", ] [[package]] -name = "hashbrown" -version = "0.14.0" +name = "lasso" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" dependencies = [ - "ahash", - "allocator-api2", + "hashbrown", ] [[package]] name = "libc" -version = "0.2.147" +version = "0.2.148" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" [[package]] name = "logos" @@ -102,21 +118,24 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.33" @@ -126,36 +145,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - [[package]] name = "regex-syntax" version = "0.6.29" @@ -166,16 +155,30 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" name = "rhea" version = "0.1.0" dependencies = [ - "hashbrown", + "bumpalo", + "chumsky", + "lasso", "logos", - "rand", +] + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", ] [[package]] name = "syn" -version = "2.0.29" +version = "2.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +checksum = "9caece70c63bfba29ec2fed841a09851b14a235c60010fa4de58089b6c025668" dependencies = [ "proc-macro2", "quote", @@ -184,9 +187,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "version_check" @@ -195,7 +198,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 9f78ace..a11643b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,10 @@ [package] -name = "rhea" +name = "rhea" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -logos = "*" -hashbrown = "*" -rand = "*" +bumpalo = { version = "3", features = ["collections"] } +chumsky = "1.0.0-alpha" +lasso = "0.7" +logos = "0.13" diff --git a/assets/examples/array.rhea b/assets/examples/array.rhea new file mode 100644 index 0000000..27dea36 --- /dev/null +++ b/assets/examples/array.rhea @@ -0,0 +1,7 @@ +var std = include "std"; +var print = std.print; + +func main(){ + var arr = [123, 456, 789]; + print(arr[1]); +} \ No newline at end of file diff --git a/assets/examples/loops.rhea b/assets/examples/loops.rhea new file mode 100644 index 0000000..ca2167f --- /dev/null +++ b/assets/examples/loops.rhea @@ -0,0 +1,13 @@ +func main(){ + var i = 0; + loop { + match i { + 10 -> break; + } + i = i + 1; + } + loop {} +} + + + diff --git a/assets/examples/match.rhea b/assets/examples/match.rhea new file mode 100644 index 0000000..1062754 --- /dev/null +++ b/assets/examples/match.rhea @@ -0,0 +1,9 @@ +var std = import "std"; +var print = std.print; + +func main() { + match 2 { + 1 -> { print("One") } + 2 -> { print("Two") } + } +} \ No newline at end of file diff --git a/assets/examples/rand.rhea b/assets/examples/rand.rhea new file mode 100644 index 0000000..b9113b2 --- /dev/null +++ b/assets/examples/rand.rhea @@ -0,0 +1,6 @@ +var rand = include "rand"; +var rd64 = rand.random_u64; + +func main(){ + var abc = rd64(); +} \ No newline at end of file diff --git a/assets/libraries/math/constants.rhea b/assets/libraries/math/constants.rhea index 50db5b9..9abab85 100644 --- a/assets/libraries/math/constants.rhea +++ b/assets/libraries/math/constants.rhea @@ -3,4 +3,7 @@ const C = 299_792_458; const pi = 3.141592653589793; -const tau = 6.283185307179586; \ No newline at end of file +const π = pi; + +const tau = 6.283185307179586; +const 𝜏 = tau; \ No newline at end of file diff --git a/assets/libraries/math/math.rhea b/assets/libraries/math/math.rhea index 8e22b41..e1f25ea 100644 --- a/assets/libraries/math/math.rhea +++ b/assets/libraries/math/math.rhea @@ -1 +1,24 @@ -var constants = include "constants"; \ No newline at end of file +var constants = include "constants"; + +func square_root(value){ + match value{ + 0..1 -> return value; + value -> { + var y = x; + var z = (y + (x/y)) / 2; + // NOTE: not finalized syntax + while abs(y - z) >= 0.00001{ + y = z + z = (y + (x/y)) / 2 + } + return z; + } + } +} +var sqrt = square_root; +var √ = square_root; + +func absolute_value(value){ + +} +var abs = absolute_value; \ No newline at end of file diff --git a/assets/libraries/rand/rand.rhea b/assets/libraries/rand/rand.rhea new file mode 100644 index 0000000..a516db1 --- /dev/null +++ b/assets/libraries/rand/rand.rhea @@ -0,0 +1,4 @@ +func random_u64(){ + // TODO: randomness + return 3; +} \ No newline at end of file diff --git a/assets/libraries/std/ecalls.rhea b/assets/libraries/std/ecalls.rhea new file mode 100644 index 0000000..c884f9d --- /dev/null +++ b/assets/libraries/std/ecalls.rhea @@ -0,0 +1,20 @@ + +var (error,warn,info,debug,trace) = io.log.(error,warn,info,debug,trace); + + + +// TODO: define and add in IDL shenanigans for a proper IPC Protocol +func make_ipc_buffer(bounded: bool, length: u64) { + match bounded{ + true -> match length { + 0 -> error("Bound array has length of zero") + } + } + + asm { + li r254, bounded + li r253, length + } + // Return a pointer to a memory address with `length` + return (123, 456); +} \ No newline at end of file diff --git a/assets/libraries/std/io.rhea b/assets/libraries/std/io.rhea index 90f0d10..3dc74b6 100644 --- a/assets/libraries/std/io.rhea +++ b/assets/libraries/std/io.rhea @@ -1 +1,6 @@ -var log = include "log"; \ No newline at end of file +var log = include "log"; + +func print(value) { + // TODO: define an api for output +} + diff --git a/assets/libraries/std/log.rhea b/assets/libraries/std/log.rhea index 3237ccb..1b6a90b 100644 --- a/assets/libraries/std/log.rhea +++ b/assets/libraries/std/log.rhea @@ -1,5 +1,25 @@ -func error(){} -func warn(){} -func info(){} -func debug(){} -func trace(){} \ No newline at end of file +var io = include "io"; +var print = io.print; + +// TODO: include time in the log + + +func error(value){ + print("error " + value) +} + +func warn(value){ + print("warn " + value) +} + +func info(value){ + print("info " + value) +} + +func debug(value){ + print("debug " + value) +} + +func trace(value){ + print("trace " + value) +} \ No newline at end of file diff --git a/assets/libraries/std/std.rhea b/assets/libraries/std/std.rhea index 4b29b34..c38ee45 100644 --- a/assets/libraries/std/std.rhea +++ b/assets/libraries/std/std.rhea @@ -1,2 +1,6 @@ -var io = include "std.io"; -var math = include "math" \ No newline at end of file +var io = include "io"; + +var math = include "math"; +var ecalls = include "ecalls"; + +var print = io.print; \ No newline at end of file diff --git a/assets/libraries/std/terminal.rhea b/assets/libraries/std/terminal.rhea new file mode 100644 index 0000000..53c38d5 --- /dev/null +++ b/assets/libraries/std/terminal.rhea @@ -0,0 +1,20 @@ +var ecalls = include "ecalls"; +var make_ipc_buffer = ecalls.make_ipc_buffer; + +var terminal; +terminal.init = terminal_init; +terminal.write = terminal_write; + +func terminal_init(){ + // setup a buffer with the TextIO protocol + var buffer = make_ipc_buffer(false, 0); + terminal.buffer = buffer; +} + +func terminal_write(value: String){ + // TODO: write value into buffer according to TextIO protocol + + + +} + diff --git a/src/main.rs b/src/main.rs index d80f0d9..da3537e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,61 +1,21 @@ // Rhea +use bumpalo::Bump; use logos::Logos; -use memory::{Variable, VariableHashmap, VariableType}; +use std::io::{stdin, Read}; +use utils::default; -mod memory; +mod syntax; +mod utils; -#[derive(Logos, Debug, PartialEq)] -#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens -enum Token { - #[token(".")] - Period, - #[token("(")] - LeftParen, +fn main() -> Result<(), Box> { + let mut buf = default(); + stdin().read_to_string(&mut buf)?; - #[token("{")] - LeftBrace, + let lexer = syntax::token::Token::lexer_with_extras(&buf, default()); + let arena = Bump::new(); + syntax::parser::parse_lexer(lexer, &arena); - #[token(")")] - RightParen, - - #[token("}")] - RightBrace, - - #[token("include")] - Include, - - #[token("=")] - Equals, - - #[token(";")] - Semicolon, - - #[token("\"")] - Quote, - - // Or regular expressions. - #[regex("[a-zA-Z]+")] - Text, -} - -fn main() { - let mut memmap = Vec::new(); - let mut varmap: VariableHashmap = hashbrown::HashMap::new(); - - let variable_name = "abcd".to_string(); - let variable_type = Some(memory::VariableType::Signed); - let constant = false; - let length = 8; - let abc = Variable::new(&mut memmap, variable_type, constant, length); - - varmap.insert(variable_name, abc); - - let mut lex = Token::lexer(include_str!("../assets/examples/library.rhea")); - for x in lex { - println!("{:?}", x); - } - - //todo compile to hb + Ok(()) } diff --git a/src/memory.rs b/src/memory.rs deleted file mode 100644 index 05793ea..0000000 --- a/src/memory.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::ops::Range; - -// I do not like std -use hashbrown::HashMap; - -pub type MemoryMap = Vec<(u64, u64)>; - -pub type VariableHashmap = HashMap; - -pub enum VariableType { - Unsigned, - Signed, - String, -} - -pub struct Variable { - vtype: VariableType, - constant: bool, - memory_addr: u64, - length: u64, -} -impl Variable { - pub fn new( - memmap: &mut MemoryMap, - variable_type: Option, - constant: bool, - length: u64, - ) -> Self { - let addr = get_random_addr_and_validate(memmap, length); - //todo type guessing - let vtype = VariableType::String; - Self { - vtype, - constant, - memory_addr: addr, - length, - } - } -} - -use rand::prelude::*; - -pub fn get_random_addr_and_validate(memmap: &mut MemoryMap, length: u64) -> u64 { - let raddr: u64 = rand::random(); - // TODO: validate fr later - // for (x, _) in memmap.iter() { - // if *x == raddr { - // } - // } - - memmap.push((raddr, raddr + length)); - raddr -} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs new file mode 100644 index 0000000..fd54f81 --- /dev/null +++ b/src/syntax/ast.rs @@ -0,0 +1,83 @@ +use {super::token::IntLit, chumsky::span::SimpleSpan, lasso::Spur}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Spanned { + pub item: T, + pub span: SimpleSpan, +} + +impl Spanned { + #[inline] + pub fn new(item: T, span: SimpleSpan) -> Self { + Self { item, span } + } +} + +pub type SpanExpr<'a> = Spanned>; +pub type ExprRef<'a> = &'a SpanExpr<'a>; +pub type ExprList<'a> = &'a [SpanExpr<'a>]; +pub type Ident = Spur; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Expr<'a> { + Ident(Ident), + Path(ExprList<'a>), + Literal(Literal), + Call(ExprRef<'a>, ExprList<'a>), + Binary(Spanned, ExprRef<'a>, ExprRef<'a>), + Unary(Spanned, ExprRef<'a>), + BindLocal(Spanned, ExprRef<'a>, Option>), + BindIn( + Spanned, + ExprRef<'a>, + ExprList<'a>, + Option>, + ), + Set(ExprRef<'a>, ExprRef<'a>), + Match(ExprRef<'a>, &'a [(Spanned, SpanExpr<'a>)]), + Func(&'a [(Spanned, Spanned)], Spanned, ExprRef<'a>), + Block(ExprList<'a>), + Unit, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Type { + Ident(Ident), + Unit, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Pattern { + Ident(Ident), + Literal(Literal), + None, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Literal { + String(Spur), + Integer(IntLit), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BinaryOperator { + Plus, + Minus, + Star, + Slash, + And, + VLine, + Lt, + Gt, + Equ, + Nequ, + LtEqu, + GtEqu, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UnaryOperator { + Tilde, + Minus, + Star, +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs new file mode 100644 index 0000000..716da41 --- /dev/null +++ b/src/syntax/mod.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod parser; +pub mod token; diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs new file mode 100644 index 0000000..3dd8aef --- /dev/null +++ b/src/syntax/parser.rs @@ -0,0 +1,284 @@ +use super::ast::Type; + +use { + super::{ + ast::{BinaryOperator, Expr, Literal, Pattern, SpanExpr, Spanned, UnaryOperator}, + token::Token, + }, + crate::utils::Pipe, + bumpalo::Bump, + chumsky::{ + extra::Full, + input::{Stream, ValueInput}, + prelude::*, + }, + logos::Lexer, +}; + +/// Equivalently-named unit variant mapping +macro_rules! equivmap { + ($src:ident, $target:ident, [$variant0:ident $(, $variant:ident)* $(,)?] $(,)?) => { + just($src::$variant0).to($target::$variant0) + $(.or(just($src::$variant).to($target::$variant)))* + }; +} + +fn expr<'a, I>() -> impl Parser<'a, I, SpanExpr<'a>, Extra<'a>> + Clone +where + I: Input<'a, Token = Token, Span = SimpleSpan> + ValueInput<'a>, +{ + recursive(|expr| { + let ident = select!(Token::Ident(id) => id); + + let literal = select! { + Token::Int(a) => Literal::Integer(a), + Token::String(a) => Literal::String(a) + }; + + let pattern = select! { + Token::Ident(id) => Pattern::Ident(id), + Token::Underscore => Pattern::None, + } + .or(literal.map(Pattern::Literal)) + .map_with_span(Spanned::new); + + let type_ = just([Token::LeftParen, Token::RightParen]) + .to(Type::Unit) + .or(ident.map(Type::Ident)) + .map_with_span(Spanned::new); + + let block = expr + .clone() + .separated_by(just(Token::Semicolon)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftCurly), just(Token::RightCurly)); + + let func = just(Token::Func) + .ignore_then( + pattern + .then_ignore(just(Token::Colon)) + .then(type_) + .separated_by(just(Token::Comma)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftParen), just(Token::RightParen)), + ) + .then_ignore(just(Token::Colon)) + .then(type_) + .then( + just(Token::Equ) + .ignore_then(expr.clone()) + .or(block.clone().map(Expr::Block).map_with_span(Spanned::new)), + ) + .map_with_state(|((params, ret), expr), _, state| { + Expr::Func(params, ret, state.arena.alloc(expr)) + }); + + let atom = literal + .map(Expr::Literal) + .or(just([Token::LeftParen, Token::RightParen]).to(Expr::Unit)) + .or(ident.map(Expr::Ident)) + .or(func) + .map_with_span(Spanned::new) + .or(expr + .clone() + .delimited_by(just(Token::LeftParen), just(Token::RightParen))); + + // (expr1, expr2, …) + let call = atom.clone().foldl_with_state( + expr.clone() + .separated_by(just(Token::Comma)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftParen), just(Token::RightParen)) + .map_with_span(Spanned::new) + .repeated(), + |expr, paramlist, state: &mut State| { + Spanned::new( + Expr::Call(state.arena.alloc(expr), paramlist.item), + merge_spans(expr.span, paramlist.span), + ) + }, + ); + + let path = call + .clone() + .map_with_state(|item, _, state| bumpalo::vec![in state.arena; item]) + .foldl( + just(Token::Dot).ignore_then(call).repeated(), + |mut v, expr| { + v.push(expr); + v + }, + ) + .map(|v| Expr::Path(v.into_bump_slice())) + .map_with_span(Spanned::new); + + /* let unary = equivmap!(Token, UnaryOperator, [Minus, Tilde]) + .map_with_span(Spanned::new) + .repeated() + .foldr_with_state(call, |op, expr, state| { + Spanned::new( + Expr::Unary(op, state.arena.alloc(expr)), + merge_spans(op.span, expr.span), + ) + }); + */ + + let unary = path.foldl_with_state( + just([Token::Dot, Token::Star]) + .to(UnaryOperator::Star) + .or(just(Token::Tilde).to(UnaryOperator::Tilde)) + .map_with_span(Spanned::new) + .repeated(), + |expr, op, state| { + Spanned::new( + Expr::Unary(op, state.arena.alloc(expr)), + merge_spans(expr.span, op.span), + ) + }, + ); + + // OP + let binary = unary.clone().foldl_with_state( + equivmap!( + Token, + BinaryOperator, + [Plus, Minus, Star, Slash, And, VLine, Lt, Gt, Equ, Nequ, LtEqu, GtEqu], + ) + .map_with_span(Spanned::new) + .then(unary) + .repeated(), + |l, (op, r), state: &mut State| { + Spanned::new( + Expr::Binary(op, state.arena.alloc(l), state.arena.alloc(r)), + merge_spans(l.span, r.span), + ) + }, + ); + + let bind = { + let start = pattern.then_ignore(just(Token::Colon)).then(expr.clone()); // := + let else_ = just(Token::Else).ignore_then(block.clone()).or_not(); // else {…} + + // := [else {…}] + let local = start.clone().then(else_.clone()).map_with_state( + |((pat, expr), else_), _, state| { + Expr::BindLocal(pat, &*state.arena.alloc(expr), else_) + }, + ); + + // := {…} else {…} + let in_ = start.then(block.clone()).then(else_).map_with_state( + |(((pat, expr), block), else_), _, state| { + Expr::BindIn(pat, &*state.arena.alloc(expr), block, else_) + }, + ); + + in_.or(local) + }; + + // + let set = atom + .clone() + .then_ignore(just(Token::LArrow)) + .then(expr.clone()) + .map_with_state(|(place, expr), _, state| { + Expr::Set(state.arena.alloc(place), state.arena.alloc(expr)) + }); + + // .match { , … } + let match_ = atom + .clone() + .then_ignore(just([Token::Dot, Token::Match])) + .then( + pattern + .then_ignore(just(Token::RArrow)) + .then(expr) + .separated_by(just(Token::Comma)) + .allow_trailing() + .pipe(arena_collect) + .delimited_by(just(Token::LeftCurly), just(Token::RightCurly)), + ) + .map_with_state(|(expr, branches), _, state| { + Expr::Match(state.arena.alloc(expr), branches) + }); + + bind.or(set) + .or(match_) + .or(block.map(Expr::Block)) + .map_with_span(Spanned::new) + .or(binary) + .or(atom) + }) +} + +pub struct State<'a> { + pub arena: &'a Bump, +} + +type Extra<'a> = Full, State<'a>, ()>; +type ParseResult = (); + +pub fn parse_input<'a>( + input: impl ValueInput<'a, Token = Token, Span = SimpleSpan>, + arena: &'a Bump, +) -> ParseResult { + println!( + "{:?}", + expr() + .separated_by(just(Token::Semicolon)) + .allow_trailing() + .pipe(arena_collect) + .parse_with_state(input, &mut State { arena }) + ); +} + +pub fn parse_iter( + input: impl Iterator, + eoi: impl Into, + arena: &Bump, +) -> ParseResult { + parse_input(Stream::from_iter(input).spanned(eoi.into()), arena) +} + +pub fn parse_lexer(input: Lexer, arena: &Bump) -> ParseResult { + let end = input.span().end; + parse_iter( + input + .spanned() + .map(|(token, span)| (token.unwrap_or(Token::Invalid), span.into())), + end..end + 1, + arena, + ) +} + +fn arena_collect<'a, I, O: 'a>( + parser: impl IterParser<'a, I, O, Extra<'a>> + Clone, +) -> impl Parser<'a, I, &'a [O], Extra<'a>> + Clone +where + I: Input<'a, Span = SimpleSpan, Token = Token>, +{ + empty() + .map_with_state(|_, _, state: &mut State| bumpalo::vec![in state.arena]) + .foldl(parser, |mut v, o| { + v.push(o); + v + }) + .map(bumpalo::collections::Vec::into_bump_slice) +} + +fn arena_box<'a, I, O: 'a>( + parser: impl Parser<'a, I, O, Extra<'a>> + Clone, +) -> impl Parser<'a, I, &'a O, Extra<'a>> + Clone +where + I: Input<'a, Span = SimpleSpan, Token = Token>, +{ + parser.map_with_state(|item, _, state| &*state.arena.alloc(item)) +} + +#[inline] +fn merge_spans(start: SimpleSpan, end: SimpleSpan) -> SimpleSpan { + SimpleSpan::new(start.start, end.end) +} diff --git a/src/syntax/token.rs b/src/syntax/token.rs new file mode 100644 index 0000000..2651e09 --- /dev/null +++ b/src/syntax/token.rs @@ -0,0 +1,112 @@ +use lasso::Spur; +use logos::Lexer; + +use {lasso::Rodeo, logos::Logos}; + +#[derive(Default)] +pub struct Lextras { + pub interner: Rodeo, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum IntLit { + Signed(i64), + Unsigned(u64), +} + +#[derive(Logos, Copy, Clone, Debug, PartialEq, Eq)] +#[logos(extras = Lextras)] +#[logos(skip r"[ \t\n\f]+")] +#[logos(skip r"-- .*")] +#[rustfmt::skip] +pub enum Token { + #[token("(")] LeftParen, + #[token(")")] RightParen, + #[token("{")] LeftCurly, + #[token("}")] RightCurly, + #[token(".")] Dot, + #[token(",")] Comma, + #[token(":")] Colon, + #[token(";")] Semicolon, + #[token("_")] Underscore, + + #[token("←")] //____ + #[token("<-")] LArrow, + #[token("→")] //____ + #[token("->")] RArrow, + + #[token(":>")] Pipe, + + #[token("+")] Plus, + #[token("-")] Minus, + #[token("*")] Star, + #[token("/")] Slash, + #[token("&")] And, + #[token("|")] VLine, + #[token("~")] Tilde, + + #[token("<")] Lt, + #[token(">")] Gt, + #[token("=")] Equ, + #[token("≠") ] //__ + #[token("/=")] Nequ, + #[token("≤") ] //___ + #[token("<=")] LtEqu, + #[token("≥") ] //___, + #[token(">=")] GtEqu, + + #[token("match")] Match, + #[token("else")] Else, + #[token("loop")] Loop, + #[token("const")] Const, + #[token("var")] Var, + #[token("func")] Func, + // Modules aren't real here ondra just variables with imported functions + #[token("module")] Module, + + #[regex( + r"\p{XID_Start}\p{XID_Continue}*", + |l| l.extras.interner.get_or_intern(l.slice()) + )] Ident(Spur), + + #[token("»", better_string)] + #[regex( + "\"[^\"]*\"", + |l| { + let slice = l.slice(); + l.extras.interner.get_or_intern(&slice[1..slice.len() - 1]) + } + )] String(Spur), + + #[regex( + "-?[0-9]+", + |l| { + Some(if let Some(slice) = l.slice().strip_prefix('-') { + IntLit::Signed(slice.parse::().ok()?) + } else { + IntLit::Unsigned(l.slice().parse::().ok()?) + }) + } + )] Int(IntLit), + + Invalid, +} + +// For Evy, with love. +fn better_string(lexer: &mut Lexer) -> Option { + let mut count = 1; + for (ix, chr) in lexer.remainder().char_indices() { + match chr { + '«' => count -= 1, + '»' => count += 1, + _ => (), + } + + if count == 0 { + let slice = &lexer.remainder()[..ix]; + lexer.bump(ix + '«'.len_utf8()); + return Some(lexer.extras.interner.get_or_intern(slice)); + } + } + None +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..6b4c784 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,15 @@ +#[inline(always)] +pub fn default() -> T { + Default::default() +} + +pub trait Pipe { + fn pipe(self, mut f: impl FnMut(Self) -> R) -> R + where + Self: Sized, + { + f(self) + } +} + +impl Pipe for T {}