From f34df3340b1bb811b97328945366ac9b3ddb352d Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 21 Jul 2022 00:56:00 +0200 Subject: [PATCH] Added parser --- Cargo.lock | 85 ++++++++++++++++++++++++- Cargo.toml | 2 + src/lexer.rs | 138 ++++++++++------------------------------ src/lib.rs | 3 +- src/list.rs | 17 +++++ src/main.rs | 61 +++--------------- src/node.rs | 26 -------- src/parser.rs | 70 ++++++++++++++------ src/value.rs | 16 +++++ web_lisp_src/style.wisp | 2 +- 10 files changed, 212 insertions(+), 208 deletions(-) create mode 100644 src/list.rs delete mode 100644 src/node.rs create mode 100644 src/value.rs diff --git a/Cargo.lock b/Cargo.lock index 95626ac..bb72475 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,6 +18,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13739d7177fbd22bb0ed28badfff9f372f8bef46c863db4e1c6248f6b223b6e" +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" +dependencies = [ + "const-random", +] + [[package]] name = "ahash" version = "0.7.6" @@ -157,6 +166,15 @@ dependencies = [ "libc", ] +[[package]] +name = "chumsky" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" +dependencies = [ + "ahash 0.3.8", +] + [[package]] name = "clipboard-win" version = "4.4.1" @@ -209,6 +227,28 @@ dependencies = [ "memchr", ] +[[package]] +name = "const-random" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" +dependencies = [ + "getrandom", + "lazy_static", + "proc-macro-hack", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.7.0" @@ -291,6 +331,12 @@ dependencies = [ "objc", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "cty" version = "0.2.2" @@ -380,7 +426,7 @@ version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb095a8b9feb9b7ff8f00b6776dffcef059538a3f4a91238e03c900e9c9ad9a2" dependencies = [ - "ahash", + "ahash 0.7.6", "epaint", "nohash-hasher", "tracing", @@ -431,7 +477,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c29567088888e8ac3e8f61bbb2ddc820207ebb8d69eefde5bcefa06d65e4e89" dependencies = [ "ab_glyph", - "ahash", + "ahash 0.7.6", "atomic_refcell", "bytemuck", "emath", @@ -900,6 +946,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "num_enum" version = "0.5.7" @@ -956,6 +1011,15 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" +[[package]] +name = "ordered-float" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" +dependencies = [ + "num-traits", +] + [[package]] name = "osmesa-sys" version = "0.1.2" @@ -1050,6 +1114,12 @@ dependencies = [ "toml", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.40" @@ -1206,6 +1276,15 @@ dependencies = [ "syn", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -1477,8 +1556,10 @@ dependencies = [ name = "web-lisp" version = "0.1.0" dependencies = [ + "chumsky", "eframe", "logos", + "ordered-float", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 969de62..c4d6189 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,5 @@ edition = "2021" [dependencies] logos = "*" eframe = "*" +chumsky = "0.8" +ordered-float = "3.0" diff --git a/src/lexer.rs b/src/lexer.rs index 9f94c81..c8da9df 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,123 +1,49 @@ -/// This ID is explicitly picked to prevent excessive bloat -pub type ID = u16; -use std::num::ParseIntError; -pub struct Metadata { - pub title: Option, -} - use logos::{Lexer, Logos}; +use ordered_float::OrderedFloat; -#[derive(Logos, Debug, PartialEq)] -pub enum Token { - // Tokens can be literal strings, of any length. - #[regex(r#"\([a-zA-Z-]+"#, tag_parser)] - Tag(String), +#[derive(Clone, Debug, Hash, Logos, PartialEq, Eq, PartialOrd, Ord)] +pub enum Token<'a> { + // Symbols + #[token("(")] + LeftParen, + + #[token(")")] + RightParen, + + #[token("[")] + LeftBracket, + + #[token("]")] + RightBracket, #[token("'")] Quote, - #[token("(")] - StartParen, - #[token(")")] - EndParen, + // Values + #[regex("\"(\\.|[^\"])*\"", (lex_slice::<1, 1>))] + String(&'a str), - #[regex("\"[a-zA-Z ,.!]+\"", strg)] - Strg(String), + #[regex("[+-]?([0-9]*[.])?[0-9]+", lex_float)] + Number(OrderedFloat), - #[regex(":[a-zA-Z ,.!]+", kwarg_parse)] - Kwarg(Kwarg), + // TODO: Better symbol definition + #[regex(r"\p{XID_Start}[\p{XID_Continue}|-]*")] + Symbol(&'a str), - #[regex("[+-]?[0-9]+", num)] - Num(i64), + #[regex(r":\p{XID_Start}[\p{XID_Continue}|-]*", (lex_slice::<1, 0>))] + Keyword(&'a str), - #[regex("[+-]?[0-9]*[.]?[0-9]+(?:[eE][+-]?[0-9]+)?", priority = 2, callback = float_parse)] - Float(f64), - - #[regex("#[0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F]", hexa)] - HexaDec(Rgb), - - #[error] + #[regex(r";(.|[^\n])*\n", logos::skip)] #[regex(r"[ \t\n\f]+", logos::skip)] + #[error] Error, } -pub fn lex_string(strn: String) -> Vec { - let lex = Token::lexer(&strn); - let mut vec = vec![]; - for token in lex { - vec.push(token); - } - vec +fn lex_slice<'a, const S: usize, const E: usize>(lexer: &mut Lexer<'a, Token<'a>>) -> &'a str { + let slice = lexer.slice(); + &slice[S..slice.len() - E] } -pub fn tag_parser(lex: &mut Lexer) -> Option { - let mut tag = lex.slice().to_string(); - tag.remove(0); - Some(tag) -} - -pub fn strg(lex: &mut Lexer) -> Option { - let mut strg = lex.slice().to_string(); - strg.remove(0); - strg.pop(); - Some(strg) -} - -pub fn float_parse(lex: &mut Lexer) -> f64 { - let num = lex.slice(); - - num.parse::().unwrap() -} - -pub fn num(lex: &mut Lexer) -> i64 { - let num = lex.slice(); - - let num: Result = num.parse::(); - - match num { - Ok(num) => num, - Err(err) => { - unreachable!("{}", err) - } - } -} - -pub fn kwarg_parse(lex: &mut Lexer) -> Kwarg { - let mut strg = lex.slice().to_string(); - - strg.remove(0); - let mut spl = strg.split(" "); - - let arg_name = spl.next().unwrap().to_string(); - - let arg_value = spl.next().unwrap().to_string(); - - Kwarg { - name: arg_name, - value: arg_value, - } -} -#[derive(Debug, PartialEq)] -pub struct Rgb { - pub red: u8, - pub green: u8, - pub blue: u8, -} - -#[derive(Debug, PartialEq)] -pub struct Kwarg { - name: String, - value: String, -} - -pub fn hexa(lex: &mut Lexer) -> Rgb { - let slice = lex.slice(); - - let rgb = Rgb { - red: slice[0..=1].as_bytes()[0], - green: slice[2..=3].as_bytes()[0], - blue: slice[4..=5].as_bytes()[0], - }; - - rgb +fn lex_float<'a>(lexer: &mut Lexer<'a, Token<'a>>) -> Option> { + lexer.slice().parse().ok() } diff --git a/src/lib.rs b/src/lib.rs index a974d33..e86b072 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ pub mod lexer; -pub mod node; pub mod parser; +pub mod value; +pub mod list; diff --git a/src/list.rs b/src/list.rs new file mode 100644 index 0000000..d3c05e5 --- /dev/null +++ b/src/list.rs @@ -0,0 +1,17 @@ +use crate::value::Value; + +/// Single-linked list +#[derive(Debug, Default, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum List { + Cons(Value, Box), + #[default] + Nil, +} + +impl List { + pub fn from_vec(vec: Vec) -> Self { + vec.into_iter() + .rev() + .fold(Self::Nil, |list, next| Self::Cons(next, Box::new(list))) + } +} diff --git a/src/main.rs b/src/main.rs index 8f1134e..cf5180c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,57 +1,14 @@ #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] -// hide console window on Windows in release -use web_lisp::node::Node; -use web_lisp::{ - lexer::{self, Token}, - parser, -}; +use web_lisp::parser; -fn main() { - let abc = lexer::lex_string(include_str!("../web_lisp_src/hello_world.wisp").to_string()); - let alksjdhfhlkj = parser::parse_vec(abc.as_slice()); - - eframe::run_native( - "Web Lisp Browser", - eframe::NativeOptions::default(), - Box::new(|_cc| Box::new(MyApp::new(abc))), +fn main() -> Result<(), Box> { + println!( + "{:?}", + parser::read(&std::fs::read_to_string( + std::env::args().nth(1).ok_or("no filename provided")?, + )?) ); -} - -use eframe::egui::{self, Label, RichText, TextEdit}; - -struct MyApp { - omnibar: String, - tokens: Vec, - root_node: Node, -} - -impl MyApp { - fn new(tokens: Vec) -> Self { - let mut root_node = Node::default(); - root_node.text = "hi".to_string(); - root_node.size = Some(30.0); - Self { - omnibar: "".to_string(), - tokens, - root_node, - } - } -} - -impl eframe::App for MyApp { - fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) { - egui::CentralPanel::default().show(ctx, |ui| { - _frame.set_window_title("title"); - ui.text_edit_singleline(&mut self.omnibar); - ui.separator(); - - let rn = &self.root_node; - let tx = RichText::new(rn.text.clone()) - .size(rn.size.unwrap_or(12.0)) - .underline(); - - ui.add(Label::new(tx)); - }); - } + + Ok(()) } diff --git a/src/node.rs b/src/node.rs deleted file mode 100644 index abf17dc..0000000 --- a/src/node.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::fmt::Display; - -#[derive(Debug)] -pub struct Node { - pub text: String, - pub size: Option, - pub children: Vec, -} - -impl Display for Node { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:#?}", self)?; - - Ok(()) - } -} - -impl Default for Node { - fn default() -> Self { - Self { - text: Default::default(), - size: Default::default(), - children: Default::default(), - } - } -} diff --git a/src/parser.rs b/src/parser.rs index 929e834..cdc4c99 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,24 +1,54 @@ -use crate::{ - lexer::{Kwarg, Token}, - node::Node, -}; -pub enum ParserError {} +use crate::{lexer::Token, list::List, value::Value}; +use chumsky::{prelude::*, Stream}; +use logos::{Lexer, Logos}; -pub fn parse_vec(mut tokens: &[Token]) -> Node { - let mut nodes: Node = Node::default(); - - for i in 1..tokens.len() - 1 { - let token = &tokens[i]; - match token { - _ => {} - } - } - - println!("{}", nodes); - nodes +/// Parse source string into a value +pub fn read(src: &str) -> Result, Vec>>> { + parser().parse(stream_of_lexer(Token::lexer(src))) } -pub enum TagTypes { - Text, - Unknown, +fn parser<'a>() -> impl Parser, Vec, Error = Simple>> { + recursive(|value| { + let atom = select! { + Token::Symbol("true") => Value::Bool(true), + Token::Symbol("false") => Value::Bool(false), + Token::Symbol("nil") => Value::Nil, + Token::Symbol(s) => Value::Symbol(s.to_owned()), + Token::Keyword(k) => Value::Keyword(k.to_owned()), + Token::String(s) => Value::String(s.to_owned()), + Token::Number(n) => Value::Number(n), + }; + + let list = value + .clone() + .repeated() + .map(List::from_vec) + .map(Box::new) + .map(Value::List) + .delimited_by(just(Token::LeftParen), just(Token::RightParen)); + + let vector = value + .clone() + .repeated() + .map(Value::Vector) + .delimited_by(just(Token::LeftBracket), just(Token::RightBracket)); + + let quote = just(Token::Quote).ignore_then(value).map(|value| { + Value::List(Box::new(List::Cons( + Value::Symbol("quote".to_owned()), + Box::new(List::Cons(value, Box::new(List::Nil))), + ))) + }); + + atom.or(list).or(vector).or(quote) + }) + .repeated() +} + +/// Convert Logos' Lexer into Chumsky'a Stream +fn stream_of_lexer<'a>( + lexer: Lexer<'a, Token<'a>>, +) -> Stream<'_, Token<'_>, logos::Span, logos::SpannedIter<'_, Token<'_>>> { + let len = lexer.source().len(); + Stream::from_iter(len..len + 1, lexer.spanned()) } diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..a3ae1bb --- /dev/null +++ b/src/value.rs @@ -0,0 +1,16 @@ +use crate::list::List; +use ordered_float::OrderedFloat; + +/// A Wisp value +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum Value { + List(Box), + Vector(Vec), + Symbol(String), + Keyword(String), + Function { args: Vec, body: Box }, + Bool(bool), + Number(OrderedFloat), + String(String), + Nil, +} diff --git a/web_lisp_src/style.wisp b/web_lisp_src/style.wisp index 4546dd5..4a239e2 100644 --- a/web_lisp_src/style.wisp +++ b/web_lisp_src/style.wisp @@ -1,4 +1,4 @@ (wisp - (style :id 1 `(:size 12 )) + (style :id 1 '(:size 12 )) (document (text :style 1 "Hello, World!"))) \ No newline at end of file