From af2299e78f0cf1fa9510fbe2466024e2819f1f16 Mon Sep 17 00:00:00 2001 From: Natapat Samutpong Date: Sat, 22 Jan 2022 06:43:50 +0700 Subject: [PATCH] feat: init --- .gitignore | 1 + Cargo.lock | 49 ++++++++++++++++++++++++++ Cargo.toml | 10 ++++++ README.md | 17 +++++++++ example/hello.vl | 2 ++ example/s.vl | 8 +++++ src/lexer.rs | 38 +++++++++++++++++++++ src/main.rs | 29 ++++++++++++++++ src/parser.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ src/token.rs | 71 ++++++++++++++++++++++++++++++++++++++ src/util.rs | 49 ++++++++++++++++++++++++++ 11 files changed, 363 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 example/hello.vl create mode 100644 example/s.vl create mode 100644 src/lexer.rs create mode 100644 src/main.rs create mode 100644 src/parser.rs create mode 100644 src/token.rs create mode 100644 src/util.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..4f8f53c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,49 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3" + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "vl" +version = "0.1.0" +dependencies = [ + "anyhow", + "regex", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..3b357d1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "vl" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +regex = "1.5.4" +anyhow = "1.0.52" diff --git a/README.md b/README.md new file mode 100644 index 0000000..bdb35f7 --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +# vl +another lisp dialect + +```lisp +(fun factorial [x] + (if (<= x 1) + 1 + (* x (factorial (- x 1))))) + +(def times 7) +(do + (print (factorial times))) +``` + +Progress: +- [X] Lexer & Parser +- [ ] Compiler \ No newline at end of file diff --git a/example/hello.vl b/example/hello.vl new file mode 100644 index 0000000..5403a3e --- /dev/null +++ b/example/hello.vl @@ -0,0 +1,2 @@ +(print "Hello, World!\n") +(print "Hi!") \ No newline at end of file diff --git a/example/s.vl b/example/s.vl new file mode 100644 index 0000000..beb78e0 --- /dev/null +++ b/example/s.vl @@ -0,0 +1,8 @@ +(fun factorial [x] + (if (<= x 1) + 1 + (* x (factorial (- x 1))))) + +(def amount 7) +(do + (print (factorial amount))) \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..2db5aa4 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,38 @@ +use regex::Regex; +use anyhow::{anyhow, Error}; + +const REGEX : &str = r###"[\s,]*([\[\]{}()]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('",;)]+)"###; + +#[derive(Debug)] +pub struct Token { + pub value: String, + pub span: (usize, usize), +} + +impl Token { + pub fn new(value: String, span: (usize, usize)) -> Token { + Token { + value, + span, + } + } +} + +pub fn lexer(input: &str) -> Result, Error> { + let mut results: Vec = Vec::new(); + let regex = Regex::new(REGEX).unwrap(); + + for capture in regex.captures_iter(input) { + if capture[1].starts_with(";") { + continue; + } + + let value = capture[1].to_string(); + let position = capture.get(0).ok_or(anyhow!("No position found"))?; + let span = (position.start(), position.end()); + + results.push(Token::new(value, span)); + } + + Ok(results) +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9062b55 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,29 @@ +use std::{fs::read_to_string, env::args, path::Path}; + +mod token; +mod util; +mod lexer; +mod parser; + +fn main() { + let args = args().nth(1).expect("No input file"); + + let src = util::cover_paren(read_to_string(&args).unwrap()); + let _file_name = Path::new(&args).file_stem().unwrap().to_str().unwrap(); + + let tokens = lexer::lexer(&src); + if tokens.is_err() { + eprintln!("{}", tokens.as_ref().unwrap_err()); + } + + let ast = parser::parse(tokens.unwrap()); + if ast.is_err() { + eprintln!("{:?}", ast.as_ref().unwrap_err()); + } else { + // Everything is in a List(..) so we need to get it out and make it into + // a vector of Expr instead, so we can compile it. + let a = util::unwrap_list_nest(ast.unwrap()); + println!("{:#?}", a); + // TODO: compile to something else.. + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..db61ffb --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,89 @@ +use regex::Regex; +use std::rc::Rc; + +use crate::{ + lexer::Token, + token::{Expr::{self, Null, List, Vector}, Return, Error::{self, ErrorString}}, list, vector, +}; + +const INT_REGEX: &str = r#"^-?[0-9]+$"#; +const STRING_REGEX: &str = r#""(?:\\.|[^\\"])*""#; + +struct Reader { + tokens: Vec, + position: usize, +} + +impl Reader { + fn new(tokens: Vec) -> Reader { + Reader { + tokens, + position: 0, + } + } + fn next(&mut self) -> Result<&Token, Error> { + self.position += 1; + Ok(self.tokens.get(self.position - 1).ok_or(ErrorString("Underflow".to_string()))?) + } + fn peek(&mut self) -> Result<&Token, Error> { + Ok(self.tokens.get(self.position).ok_or(ErrorString("Underflow".to_string()))?) + } +} + +fn read_atom(reader: &mut Reader) -> Return { + let int_regex = Regex::new(INT_REGEX).unwrap(); + let string_regex = Regex::new(STRING_REGEX).unwrap(); + + let token = reader.next()?; + match &token.value[..] { + "null" => Ok(Expr::Null), + "true" => Ok(Expr::Bool(true)), + "false" => Ok(Expr::Bool(false)), + _ => { + if int_regex.is_match(&token.value) { + Ok(Expr::Number(token.value.parse().unwrap())) + } else if string_regex.is_match(&token.value) { + Ok(Expr::String(token.value[1..token.value.len() - 1].to_string())) + } else { + Ok(Expr::Symbol(token.value.to_string())) + } + } + } +} + +fn read_sequence(reader: &mut Reader, end: &str) -> Return { + let mut sequence: Vec = Vec::new(); + reader.next()?; + loop { + let token = match reader.peek() { + Ok(token) => token, + Err(_) => return Err(ErrorString(format!("Unexpected end of input, found '{}'", end))), + }; + if token.value == end { break; } + sequence.push(read_form(reader)?) + } + + let _ = reader.next(); + + match end { + ")" => Ok(list!(sequence)), + "]" => Ok(vector!(sequence)), + _ => return Err(ErrorString(format!("Unknown sequence end value: '{}'", end))), + } +} + +fn read_form(reader: &mut Reader) -> Return { + let token = reader.peek()?; + match &token.value[..] { + "(" => read_sequence(reader, ")"), + ")" => Err(ErrorString("Unexpected ')'".to_string())), + "[" => read_sequence(reader, "]"), + "]" => Err(ErrorString("Unexpected ']'".to_string())), + _ => read_atom(reader), + } +} + +pub fn parse(tokens: Vec) -> Return { + if tokens.len() == 0 { return Ok(Null); } + read_form(&mut Reader::new(tokens)) +} \ No newline at end of file diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..fadd65c --- /dev/null +++ b/src/token.rs @@ -0,0 +1,71 @@ +use std::rc::Rc; + +use crate::util::unescape; + +#[derive(Debug, Clone)] +pub enum Expr { + Null, + Bool(bool), + Number(f64), + String(String), + Symbol(String), + List(Rc>, Rc), + Vector(Rc>, Rc), + // Function(fn(Arguments) -> Return, Rc), +} + +impl std::fmt::Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Null => write!(f, "Null"), + Expr::Bool(b) => write!(f, "{}", b), + Expr::Number(n) => write!(f, "{}", n), + Expr::String(s) => write!(f, "{}", unescape(s.to_string())), + Expr::Symbol(s) => write!(f, "{}", s), + Expr::List(l, _) => { + write!(f, "(")?; + for e in l.iter() { + write!(f, "{}", e)?; + } + write!(f, ")") + } + Expr::Vector(l, _) => { + write!(f, "[")?; + for e in l.iter() { + write!(f, "{}", e)?; + } + write!(f, "]") + } + } + } +} + +#[derive(Debug)] +pub enum Error { + ErrorString(String), +} + +// pub type Arguments = Vec; +pub type Return = Result; + +#[macro_export] +macro_rules! list { + ($seq:expr) => {{ + List(Rc::new($seq),Rc::new(Null)) + }}; + [$($args:expr),*] => {{ + let v: Vec = vec![$($args),*]; + List(Rc::new(v),Rc::new(Null)) + }} +} + +#[macro_export] +macro_rules! vector { + ($seq:expr) => {{ + Vector(Rc::new($seq), Rc::new(Null)) + }}; + [$($args:expr),*] => {{ + let v: Vec = vec![$($args),*]; + Vector(Rc::new(v), Rc::new(Null)) + }} +} \ No newline at end of file diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..5ff3433 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,49 @@ +use crate::token::Expr::{self, List}; + +pub fn cover_paren(s: String) -> String { + format!("({})", s) +} + +pub fn unescape(s: String) -> String { + let mut result = String::new(); + let mut i = 0; + while i < s.len() { + if s.chars().nth(i).unwrap() == '\\' { + match s.chars().nth(i + 1).unwrap() { + 'n' => result.push('\n'), + 't' => result.push('\t'), + 'r' => result.push('\r'), + '\\' => result.push('\\'), + '"' => result.push('"'), + _ => result.push(s.chars().nth(i + 1).unwrap()), + } + i += 2; + } else { + result.push(s.chars().nth(i).unwrap()); + i += 1; + } + } + result +} + +pub fn unwrap_list_nest(ast: Expr) -> Vec { + let mut result: Vec = Vec::new(); + + match ast.clone() { + List(l, _) => { + for expr in l.iter() { + + result.push(expr.clone()); + + } + } + _ => { + // This probably will not happen because everything is wrapped + // in list. So it would be impossible that the ast is not a list. + eprintln!("Possibly a bug in the compiler, you shouln't get this messages."); + dbg!(ast); + } + }; + + result +} \ No newline at end of file