From 8d88fc41adcf757684dfc31a4fa91924989a978f Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Mon, 24 Jan 2022 04:49:27 +0700 Subject: [PATCH] fix: fix string being splitted --- README.md | 16 +++++++--------- example/hello.vl | 2 +- src/parser.rs | 12 +++++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 942a696..93df472 100644 --- a/README.md +++ b/README.md @@ -2,30 +2,28 @@ another lisp dialect ```lisp -(fun factorial [x] +(fun factorial (x) (if (<= x 1) 1 (* x (factorial (- x 1))))) -(def times 7) (do - (print (factorial times))) + (print (factorial 7))) ``` Compliation flow: ``` -Input(file) -> Lexer -> Parser -> Interpret - String Token Expr IO - |-> Compile(TODO) - File +Input(file) -> Parser -> Interpret(TODO) + String SExprs IO + |-> Compile(TODO) + File ``` Progress: - [X] Lexer & Parser - [ ] Syntax checker & Type checker -- [X] Interpreter +- [ ] Interpreter - [ ] Compiler Problems: -- Parser only detect the first error. - Parser can't detect `(()))` syntax error. \ No newline at end of file diff --git a/example/hello.vl b/example/hello.vl index f2b66a9..0e5436a 100644 --- a/example/hello.vl +++ b/example/hello.vl @@ -1,2 +1,2 @@ (print "hi") -(print "hello") +(print "Hello, World!") diff --git a/src/parser.rs b/src/parser.rs index 9839710..355f17a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -86,9 +86,11 @@ impl Parser { } pub fn tokenize(str: &str) -> Vec { - str.replace("(", " ( ") - .replace(")", " ) ") - .split_whitespace() - .map(|s| s.to_string()) - .collect() + let regex = Regex::new(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]+)"###).unwrap(); + let mut res = vec![]; + for cap in regex.captures_iter(str) { + if cap[1].starts_with(";") { continue; } + res.push(String::from(&cap[1])); + } + res } \ No newline at end of file