From c20d8feda42d2bdf0e2a80a898c1c4d4c754815f Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Sun, 23 Jan 2022 04:36:13 +0700 Subject: [PATCH] feat: better error reporting --- Cargo.lock | 113 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + README.md | 7 ++- example/hello.vl | 2 +- src/lexer.rs | 14 ++++-- src/main.rs | 27 ++++++----- src/parser.rs | 23 +++++----- src/token.rs | 8 ++++ 8 files changed, 168 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f8f53c..e105953 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,12 +17,106 @@ version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cranelift" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ad9e4003896823c4c4fe9e60e584cd763461f94985c789e1703a2466f0a31a3" +dependencies = [ + "cranelift-codegen", + "cranelift-frontend", +] + +[[package]] +name = "cranelift-bforest" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9516ba6b2ba47b4cbf63b713f75b432fafa0a0e0464ec8381ec76e6efe931ab3" +dependencies = [ + "cranelift-entity", +] + +[[package]] +name = "cranelift-codegen" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "489e5d0081f7edff6be12d71282a8bf387b5df64d5592454b75d662397f2d642" +dependencies = [ + "cranelift-bforest", + "cranelift-codegen-meta", + "cranelift-codegen-shared", + "cranelift-entity", + "log", + "regalloc", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d36ee1140371bb0f69100e734b30400157a4adf7b86148dee8b0a438763ead48" +dependencies = [ + "cranelift-codegen-shared", +] + +[[package]] +name = "cranelift-codegen-shared" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "981da52d8f746af1feb96290c83977ff8d41071a7499e991d8abae0d4869f564" + +[[package]] +name = "cranelift-entity" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2906740053dd3bcf95ce53df0fd9b5649c68ae4bd9adada92b406f059eae461" + +[[package]] +name = "cranelift-frontend" +version = "0.80.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7cb156de1097f567d46bf57a0cd720a72c3e15e1a2bd8b1041ba2fc894471b7" +dependencies = [ + "cranelift-codegen", + "log", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + [[package]] name = "memchr" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "regalloc" +version = "0.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d808cff91dfca7b239d40b972ba628add94892b1d9e19a842aedc5cfae8ab1a" +dependencies = [ + "log", + "rustc-hash", + "smallvec", +] + [[package]] name = "regex" version = "1.5.4" @@ -40,10 +134,29 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "target-lexicon" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9bffcddbc2458fa3e6058414599e3c838a022abae82e5c67b4f7f80298d5bff" + [[package]] name = "vl" version = "0.1.0" dependencies = [ "anyhow", + "cranelift", "regex", ] diff --git a/Cargo.toml b/Cargo.toml index 3b357d1..9ce51b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ edition = "2021" [dependencies] regex = "1.5.4" anyhow = "1.0.52" +cranelift = "0.80.0" diff --git a/README.md b/README.md index bdb35f7..e1596c0 100644 --- a/README.md +++ b/README.md @@ -14,4 +14,9 @@ another lisp dialect Progress: - [X] Lexer & Parser -- [ ] Compiler \ No newline at end of file +- [ ] Syntax checker & Type checker +- [ ] Compiler + +Problems: +- Parser only detect the first error and quit. +- Parser can't detect `(()))` syntax error. \ No newline at end of file diff --git a/example/hello.vl b/example/hello.vl index 5403a3e..09560db 100644 --- a/example/hello.vl +++ b/example/hello.vl @@ -1,2 +1,2 @@ (print "Hello, World!\n") -(print "Hi!") \ No newline at end of file +(print "Hi\n")( \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index 2db5aa4..feacd1f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -3,17 +3,19 @@ use anyhow::{anyhow, Error}; const REGEX : &str = r###"[\s,]*([\[\]{}()]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('",;)]+)"###; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Token { pub value: String, pub span: (usize, usize), + pub line: usize, } impl Token { - pub fn new(value: String, span: (usize, usize)) -> Token { + pub fn new(value: String, span: (usize, usize), line: usize) -> Token { Token { value, span, + line, } } } @@ -31,8 +33,14 @@ pub fn lexer(input: &str) -> Result, Error> { let position = capture.get(0).ok_or(anyhow!("No position found"))?; let span = (position.start(), position.end()); - results.push(Token::new(value, span)); + let line = input[..span.1].matches("\n").count(); + + results.push(Token::new(value, span, line)); } Ok(results) +} + +pub fn here(src: &str, token: &Token) -> String { + format!("{}:{}:{}", src, token.line + 1, token.span.0) } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index e2e2618..fffd582 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod token; mod util; mod lexer; mod parser; +// mod compiler; fn main() { let args = args().nth(1).expect("No input file"); @@ -13,19 +14,21 @@ fn main() { let tokens = lexer::lexer(&src); if tokens.is_err() { - eprintln!("{}", tokens.as_ref().unwrap_err()); - } - - let ast = parser::parse(tokens.unwrap()); - if ast.is_err() { - eprintln!("{:?}", ast.as_ref().unwrap_err()); + eprintln!("{}", tokens.unwrap_err()); + return; } else { - // Everything is in a List(..) so we need to get it out and make it into - // a vector of Expr instead, so we can compile it. - let a = util::unwrap_list_nest(ast.unwrap()); - for e in a.iter() { - println!("{}", e); + // for t in tokens.as_ref().unwrap() { + // println!("{:?}", t); + // } + let ast = parser::parse(tokens.unwrap(), &args); + if ast.is_err() { + eprintln!("{}", ast.as_ref().unwrap_err()); + return; + } else { + // Everything is in a List(..) so we need to get it out and make it into + // a vector of Expr instead, so we can compile it. + let _a = util::unwrap_list_nest(ast.unwrap()); + // compiler::compile(a); } - // TODO: compile to something else.. } } diff --git a/src/parser.rs b/src/parser.rs index db61ffb..a80e62a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ use regex::Regex; use std::rc::Rc; use crate::{ - lexer::Token, + lexer::{Token, here}, token::{Expr::{self, Null, List, Vector}, Return, Error::{self, ErrorString}}, list, vector, }; @@ -10,13 +10,15 @@ const INT_REGEX: &str = r#"^-?[0-9]+$"#; const STRING_REGEX: &str = r#""(?:\\.|[^\\"])*""#; struct Reader { + src: String, tokens: Vec, position: usize, } impl Reader { - fn new(tokens: Vec) -> Reader { + fn new(tokens: Vec, src: String) -> Reader { Reader { + src, tokens, position: 0, } @@ -53,18 +55,19 @@ fn read_atom(reader: &mut Reader) -> Return { fn read_sequence(reader: &mut Reader, end: &str) -> Return { let mut sequence: Vec = Vec::new(); - reader.next()?; + let _current_token_ = reader.next()?; loop { let token = match reader.peek() { Ok(token) => token, - Err(_) => return Err(ErrorString(format!("Unexpected end of input, found '{}'", end))), + Err(_) => return Err(ErrorString( + format!("{} Unexpected end of input, expected '{}'", here(&reader.src, &reader.tokens[reader.position - 1]), end) + )), }; if token.value == end { break; } sequence.push(read_form(reader)?) } - let _ = reader.next(); - + let _match_token_ = reader.next()?; match end { ")" => Ok(list!(sequence)), "]" => Ok(vector!(sequence)), @@ -75,15 +78,15 @@ fn read_sequence(reader: &mut Reader, end: &str) -> Return { fn read_form(reader: &mut Reader) -> Return { let token = reader.peek()?; match &token.value[..] { - "(" => read_sequence(reader, ")"), ")" => Err(ErrorString("Unexpected ')'".to_string())), - "[" => read_sequence(reader, "]"), + "(" => read_sequence(reader, ")"), "]" => Err(ErrorString("Unexpected ']'".to_string())), + "[" => read_sequence(reader, "]"), _ => read_atom(reader), } } -pub fn parse(tokens: Vec) -> Return { +pub fn parse(tokens: Vec, src: &str) -> Return { if tokens.len() == 0 { return Ok(Null); } - read_form(&mut Reader::new(tokens)) + read_form(&mut Reader::new(tokens, src.to_string())) } \ No newline at end of file diff --git a/src/token.rs b/src/token.rs index 6d1d3eb..804e8ef 100644 --- a/src/token.rs +++ b/src/token.rs @@ -33,6 +33,14 @@ pub enum Error { ErrorString(String), } +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::ErrorString(s) => write!(f, "{}", s), + } + } +} + // pub type Arguments = Vec; pub type Return = Result;