From d588c6b230cf93a8b53db8a589cc2b0ef9ee5b8b Mon Sep 17 00:00:00 2001 From: Erin Date: Sat, 2 Sep 2023 00:46:48 +0200 Subject: [PATCH] Basic work on syntax framework --- Cargo.lock | 135 ++++++++++++++++++++++++------------------- Cargo.toml | 11 ++-- src/main.rs | 70 ++++++---------------- src/memory.rs | 53 ----------------- src/syntax/ast.rs | 39 +++++++++++++ src/syntax/mod.rs | 3 + src/syntax/parser.rs | 18 ++++++ src/syntax/token.rs | 88 ++++++++++++++++++++++++++++ src/utils.rs | 4 ++ 9 files changed, 251 insertions(+), 170 deletions(-) delete mode 100644 src/memory.rs create mode 100644 src/syntax/ast.rs create mode 100644 src/syntax/mod.rs create mode 100644 src/syntax/parser.rs create mode 100644 src/syntax/token.rs create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index b2eea7e..ae3bf6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,24 +13,43 @@ dependencies = [ "version_check", ] -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "beef" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "1.0.0-alpha.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3172a80699de358070dd99f80ea8badc6cdf8ac2417cb5a96e6d81bf5fe06d" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "fnv" version = "1.0.7" @@ -38,24 +57,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "getrandom" -version = "0.2.10" +name = "hashbrown" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "cfg-if", - "libc", - "wasi", + "ahash", ] [[package]] -name = "hashbrown" -version = "0.14.0" +name = "lasso" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" dependencies = [ - "ahash", - "allocator-api2", + "hashbrown", ] [[package]] @@ -102,12 +118,6 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - [[package]] name = "proc-macro2" version = "1.0.66" @@ -117,6 +127,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.33" @@ -126,36 +145,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - [[package]] name = "regex-syntax" version = "0.6.29" @@ -166,9 +155,23 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" name = "rhea" version = "0.1.0" dependencies = [ - "hashbrown", + "bumpalo", + "chumsky", + "lasso", "logos", - "rand", +] + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", ] [[package]] @@ -195,7 +198,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 9f78ace..992ec19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,10 @@ [package] -name = "rhea" +name = "rhea" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -logos = "*" -hashbrown = "*" -rand = "*" +bumpalo = "3" +chumsky = "1.0.0-alpha" +lasso = "0.7" +logos = "0.13" diff --git a/src/main.rs b/src/main.rs index d80f0d9..e073dd7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,61 +1,25 @@ // Rhea +use bumpalo::Bump; use logos::Logos; -use memory::{Variable, VariableHashmap, VariableType}; +use std::io::{stdin, Read}; +use utils::default; -mod memory; +mod syntax; +mod utils; -#[derive(Logos, Debug, PartialEq)] -#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens -enum Token { - #[token(".")] - Period, +fn main() -> Result<(), Box> { + let mut buf = default(); + stdin().read_to_string(&mut buf)?; - #[token("(")] - LeftParen, + let lexer = syntax::token::Token::lexer_with_extras(&buf, default()); + let arena = Bump::new(); + syntax::parser::parse( + lexer + .spanned() + .map(|(item, span)| (item.unwrap_or(syntax::token::Token::Invalid), span.into())), + &arena, + ); - #[token("{")] - LeftBrace, - - #[token(")")] - RightParen, - - #[token("}")] - RightBrace, - - #[token("include")] - Include, - - #[token("=")] - Equals, - - #[token(";")] - Semicolon, - - #[token("\"")] - Quote, - - // Or regular expressions. - #[regex("[a-zA-Z]+")] - Text, -} - -fn main() { - let mut memmap = Vec::new(); - let mut varmap: VariableHashmap = hashbrown::HashMap::new(); - - let variable_name = "abcd".to_string(); - let variable_type = Some(memory::VariableType::Signed); - let constant = false; - let length = 8; - let abc = Variable::new(&mut memmap, variable_type, constant, length); - - varmap.insert(variable_name, abc); - - let mut lex = Token::lexer(include_str!("../assets/examples/library.rhea")); - for x in lex { - println!("{:?}", x); - } - - //todo compile to hb + Ok(()) } diff --git a/src/memory.rs b/src/memory.rs deleted file mode 100644 index 05793ea..0000000 --- a/src/memory.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::ops::Range; - -// I do not like std -use hashbrown::HashMap; - -pub type MemoryMap = Vec<(u64, u64)>; - -pub type VariableHashmap = HashMap; - -pub enum VariableType { - Unsigned, - Signed, - String, -} - -pub struct Variable { - vtype: VariableType, - constant: bool, - memory_addr: u64, - length: u64, -} -impl Variable { - pub fn new( - memmap: &mut MemoryMap, - variable_type: Option, - constant: bool, - length: u64, - ) -> Self { - let addr = get_random_addr_and_validate(memmap, length); - //todo type guessing - let vtype = VariableType::String; - Self { - vtype, - constant, - memory_addr: addr, - length, - } - } -} - -use rand::prelude::*; - -pub fn get_random_addr_and_validate(memmap: &mut MemoryMap, length: u64) -> u64 { - let raddr: u64 = rand::random(); - // TODO: validate fr later - // for (x, _) in memmap.iter() { - // if *x == raddr { - // } - // } - - memmap.push((raddr, raddr + length)); - raddr -} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs new file mode 100644 index 0000000..4e1fa1c --- /dev/null +++ b/src/syntax/ast.rs @@ -0,0 +1,39 @@ +use lasso::Spur; + +use super::token::IntLit; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Expr<'a> { + Binary(BinaryOperator, &'a Self, &'a Self), + Unary(UnaryOperator, &'a Self), + Literal(Literal), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Literal { + String(Spur), + Integer(IntLit), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BinaryOperator { + Plus, + Minus, + Star, + Slash, + And, + Or, + Lt, + Gt, + Equ, + Nequ, + LtEqu, + GtEqu, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UnaryOperator { + Not, + Neg, + Deref, +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs new file mode 100644 index 0000000..716da41 --- /dev/null +++ b/src/syntax/mod.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod parser; +pub mod token; diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs new file mode 100644 index 0000000..92adc53 --- /dev/null +++ b/src/syntax/parser.rs @@ -0,0 +1,18 @@ +use super::{ast::Expr, token::Token}; +use bumpalo::Bump; +use chumsky::{extra::Full, input::Stream, prelude::*}; + +type Item = (Token, SimpleSpan); +type Extra<'s> = Full, State<'s>, ()>; +struct State<'a> { + arena: &'a Bump, +} + +pub fn parse(input: impl Iterator, arena: &Bump) { + expr().parse_with_state(Stream::from_iter(input), &mut State { arena }); +} + +fn expr<'s, 'a, I: Iterator + 's>( +) -> impl Parser<'s, Stream, Expr<'a>, Extra<'s>> { + todo() +} diff --git a/src/syntax/token.rs b/src/syntax/token.rs new file mode 100644 index 0000000..044bb36 --- /dev/null +++ b/src/syntax/token.rs @@ -0,0 +1,88 @@ +use lasso::Spur; + +use {lasso::Rodeo, logos::Logos}; + +#[derive(Default)] +pub struct Lextras { + pub interner: Rodeo, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum IntLit { + Signed(i64), + Unsigned(u64), +} + +#[derive(Logos, Copy, Clone, Debug, PartialEq, Eq)] +#[logos(extras = Lextras)] +#[logos(skip r"[ \t\n\f]+")] +#[logos(skip r"-- .*")] +#[rustfmt::skip] +pub enum Token { + #[token("(")] LeftParen, + #[token(")")] RightParen, + #[token("{")] LeftCurly, + #[token("}")] RightCurly, + #[token(".")] Dot, + #[token(",")] Comma, + #[token(";")] Semicolon, + #[token(":=")] //__ + #[token("≔") ] Bind, + + #[token("←")] //_ + #[token("<-")] Set, + + #[token(":>")] Pipe, + + #[token("+")] Plus, + #[token("-")] Minus, + #[token("*")] Star, + #[token("/")] Slash, + #[token("&")] And, + #[token("|")] Or, + #[token("~")] Tilde, + + #[token("<")] Lt, + #[token(">")] Gt, + #[token("=")] Equ, + #[token("≠") ] //__ + #[token("/=")] Nequ, + #[token("≤") ] //___ + #[token("<=")] LtEqu, + #[token("≥") ] //___, + #[token(">=")] GtEqu, + + #[token("match")] Match, + #[token("else")] Else, + #[token("loop")] Loop, + #[token("const")] Const, + #[token("var")] Var, + #[token("func")] Func, + #[token("module")] Module, + + #[regex( + r"\p{XID_Start}\p{XID_Continue}*", + |l| l.extras.interner.get_or_intern(l.slice()) + )] Ident(Spur), + + #[regex( + "\"[^\"]*\"", + |l| { + let slice = l.slice(); + l.extras.interner.get_or_intern(&slice[1..slice.len() - 1]) + } + )] String(Spur), + + #[regex( + "-?[0-9]+", + |l| { + Some(if let Some(slice) = l.slice().strip_prefix('-') { + IntLit::Signed(slice.parse::().ok()?) + } else { + IntLit::Unsigned(l.slice().parse::().ok()?) + }) + } + )] Int(IntLit), + + Invalid, +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..31a900e --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,4 @@ +#[inline(always)] +pub fn default() -> T { + Default::default() +}