1
1
Fork 0
mirror of https://github.com/azur1s/bobbylisp.git synced 2024-10-16 02:37:40 -05:00

refactor: simplify parser

This commit is contained in:
Natapat Samutpong 2022-01-24 04:42:08 +07:00
parent 456f220590
commit aa4fbf3d3b
11 changed files with 95 additions and 581 deletions

209
Cargo.lock generated
View file

@ -17,166 +17,12 @@ version = "1.0.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cranelift"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ad9e4003896823c4c4fe9e60e584cd763461f94985c789e1703a2466f0a31a3"
dependencies = [
"cranelift-codegen",
"cranelift-frontend",
]
[[package]]
name = "cranelift-bforest"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9516ba6b2ba47b4cbf63b713f75b432fafa0a0e0464ec8381ec76e6efe931ab3"
dependencies = [
"cranelift-entity",
]
[[package]]
name = "cranelift-codegen"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "489e5d0081f7edff6be12d71282a8bf387b5df64d5592454b75d662397f2d642"
dependencies = [
"cranelift-bforest",
"cranelift-codegen-meta",
"cranelift-codegen-shared",
"cranelift-entity",
"log",
"regalloc",
"smallvec",
"target-lexicon",
]
[[package]]
name = "cranelift-codegen-meta"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d36ee1140371bb0f69100e734b30400157a4adf7b86148dee8b0a438763ead48"
dependencies = [
"cranelift-codegen-shared",
]
[[package]]
name = "cranelift-codegen-shared"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "981da52d8f746af1feb96290c83977ff8d41071a7499e991d8abae0d4869f564"
[[package]]
name = "cranelift-entity"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2906740053dd3bcf95ce53df0fd9b5649c68ae4bd9adada92b406f059eae461"
[[package]]
name = "cranelift-frontend"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7cb156de1097f567d46bf57a0cd720a72c3e15e1a2bd8b1041ba2fc894471b7"
dependencies = [
"cranelift-codegen",
"log",
"smallvec",
"target-lexicon",
]
[[package]]
name = "cranelift-jit"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e0f0e20dbcac1e6c3caef955e004598a9a6a5f310e591e2c629ec15c7fa6bfa"
dependencies = [
"anyhow",
"cranelift-codegen",
"cranelift-entity",
"cranelift-module",
"cranelift-native",
"libc",
"log",
"region",
"target-lexicon",
"winapi",
]
[[package]]
name = "cranelift-module"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93460fc789770f2a63163bfb5f2b851635ce29d91526f2e96854bcc4ed53a778"
dependencies = [
"anyhow",
"cranelift-codegen",
]
[[package]]
name = "cranelift-native"
version = "0.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "166028ca0343a6ee7bddac0e70084e142b23f99c701bd6f6ea9123afac1a7a46"
dependencies = [
"cranelift-codegen",
"libc",
"target-lexicon",
]
[[package]]
name = "libc"
version = "0.2.113"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eef78b64d87775463c549fbd80e19249ef436ea3bf1de2a1eb7e717ec7fab1e9"
[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if",
]
[[package]]
name = "mach"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
dependencies = [
"libc",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "regalloc"
version = "0.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d808cff91dfca7b239d40b972ba628add94892b1d9e19a842aedc5cfae8ab1a"
dependencies = [
"log",
"rustc-hash",
"smallvec",
]
[[package]]
name = "regex"
version = "1.5.4"
@ -194,65 +40,10 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "region"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877e54ea2adcd70d80e9179344c97f93ef0dffd6b03e1f4529e6e83ab2fa9ae0"
dependencies = [
"bitflags",
"libc",
"mach",
"winapi",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "smallvec"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]]
name = "target-lexicon"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9bffcddbc2458fa3e6058414599e3c838a022abae82e5c67b4f7f80298d5bff"
[[package]]
name = "vl"
version = "0.1.0"
dependencies = [
"anyhow",
"cranelift",
"cranelift-jit",
"cranelift-module",
"regex",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -8,6 +8,3 @@ edition = "2021"
[dependencies]
regex = "1.5.4"
anyhow = "1.0.52"
cranelift = "0.80.0"
cranelift-module = "0.80.0"
cranelift-jit = "0.80.0"

View file

@ -12,11 +12,20 @@ another lisp dialect
(print (factorial times)))
```
Compliation flow:
```
Input(file) -> Lexer -> Parser -> Interpret
String Token Expr IO
|-> Compile(TODO)
File
```
Progress:
- [X] Lexer & Parser
- [ ] Syntax checker & Type checker
- [X] Interpreter
- [ ] Compiler
Problems:
- Parser only detect the first error and quit.
- Parser only detect the first error.
- Parser can't detect `(()))` syntax error.

View file

@ -1 +1,2 @@
(print "Hello, World")
(print "hi")
(print "hello")

View file

@ -1,4 +1,4 @@
(fun factorial [x]
(fun factorial (x)
(if (<= x 1)
1
(* x (factorial (- x 1)))))

View file

View file

@ -1,46 +0,0 @@
use regex::Regex;
use anyhow::{anyhow, Error};
const REGEX : &str = r###"[\s,]*([\[\]{}()]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('",;)]+)"###;
#[derive(Debug, Clone)]
pub struct Token {
pub value: String,
pub span: (usize, usize),
pub line: usize,
}
impl Token {
pub fn new(value: String, span: (usize, usize), line: usize) -> Token {
Token {
value,
span,
line,
}
}
}
pub fn lexer(input: &str) -> Result<Vec<Token>, Error> {
let mut results: Vec<Token> = Vec::new();
let regex = Regex::new(REGEX).unwrap();
for capture in regex.captures_iter(input) {
if capture[1].starts_with(";") {
continue;
}
let value = capture[1].to_string();
let position = capture.get(0).ok_or(anyhow!("No position found"))?;
let span = (position.start(), position.end());
let line = input[..span.1].matches("\n").count();
results.push(Token::new(value, span, line));
}
Ok(results)
}
pub fn here(src: &str, token: &Token) -> String {
format!("{}:{}:{}", src, token.line + 1, token.span.0)
}

View file

@ -1,39 +1,19 @@
use std::{fs::read_to_string, env::args, path::Path};
mod token;
mod util;
mod lexer;
use util::cover_paren;
mod parser;
mod compiler;
use parser::tokenize;
fn main() {
let args = args().nth(1).expect("No input file");
let src = util::cover_paren(read_to_string(&args).unwrap());
let src = cover_paren(read_to_string(&args).unwrap());
let _file_name = Path::new(&args).file_stem().unwrap().to_str().unwrap();
let tokens = lexer::lexer(&src);
if tokens.is_err() {
eprintln!("{}", tokens.unwrap_err());
return;
} else {
// for t in tokens.as_ref().unwrap() {
// println!("{:?}", t);
// }
let ast = parser::parse(tokens.unwrap(), &args);
if ast.is_err() {
eprintln!("{}", ast.as_ref().unwrap_err());
return;
} else {
let c = parser::translate_expr(ast.unwrap());
match c {
Ok(e) => {
println!("{:#?}", e);
}
Err(e) => {
eprintln!("{}", e);
}
}
}
}
let mut parser = parser::Parser::new(tokenize(&src));
let result = parser.parse();
println!("{:#?}", result);
}

View file

@ -1,222 +1,94 @@
use regex::Regex;
use std::rc::Rc;
use crate::{
lexer::{Token, here},
token::{
Type::{self, *}, Return, Error::{self, ErrorString},
}, list, vector,
};
const INT_REGEX: &str = r#"^-?[0-9]+$"#;
const STRING_REGEX: &str = r#""(?:\\.|[^\\"])*""#;
use crate::parser::Sexpr::*;
#[derive(Debug, Clone)]
pub enum Expr {
Literal(Type),
List(Vec<Expr>),
Vector(Vec<Expr>),
Identifier(String),
Assign(String, Box<Expr>),
Binary(Box<Expr>, BinaryOp, Box<Expr>),
If(Box<Expr>, Vec<Expr>, Vec<Expr>),
While(Box<Expr>, Vec<Expr>),
Call(String, Vec<Expr>),
Function(String, Vec<String>, Vec<Expr>),
NoOperation,
pub enum Sexpr {
Int(i64), Float(f64), Str(String), Boolean(bool),
Symbol(String),
Cons(Box<Sexpr>, Vec<Sexpr>),
Nil,
}
#[derive(Debug, Clone)]
pub enum BinaryOp {
Add, Sub,
Mul, Div, Mod,
Eq, Ne,
Lt, Le, Gt, Ge,
}
pub type ParseResult = Result<Sexpr, String>;
struct Reader {
src: String,
tokens: Vec<Token>,
pub struct Parser {
unparsed: Vec<String>,
position: usize,
}
impl Reader {
fn new(tokens: Vec<Token>, src: String) -> Reader {
Reader {
src,
tokens,
impl Parser {
pub fn new(src: Vec<String>) -> Parser {
Parser {
unparsed: src,
position: 0,
}
}
fn next(&mut self) -> Result<&Token, Error> {
fn peek(&mut self) -> Option<String> {
self.unparsed.get(self.position).cloned()
}
fn next(&mut self) -> Option<String> {
self.position += 1;
Ok(self.tokens.get(self.position - 1).ok_or(ErrorString("Underflow".to_string()))?)
self.unparsed.get(self.position - 1).cloned()
}
fn peek(&mut self) -> Result<&Token, Error> {
Ok(self.tokens.get(self.position).ok_or(ErrorString("Underflow".to_string()))?)
}
}
fn read_atom(reader: &mut Reader) -> Return {
let int_regex = Regex::new(INT_REGEX).unwrap();
let string_regex = Regex::new(STRING_REGEX).unwrap();
let token = reader.next()?;
match &token.value[..] {
"null" => Ok(Type::Null),
"true" => Ok(Type::Bool(true)),
"false" => Ok(Type::Bool(false)),
_ => {
if int_regex.is_match(&token.value) {
Ok(Type::Number(token.value.parse().unwrap()))
} else if string_regex.is_match(&token.value) {
Ok(Type::Str(token.value[1..token.value.len() - 1].to_string()))
} else {
Ok(Type::Symbol(token.value.to_string()))
pub fn parse(&mut self) -> ParseResult {
match self.peek() {
Some(s) => match s.as_str() {
")" => Err(format!("Unexpected ')' at position {}", self.position)),
"'" => { self.next(); Ok(Cons(Box::new(Str("quote".to_string())), vec![self.parse()?])) },
"(" => self.parse_sequence(")"),
_ => self.parse_atom(),
}
None => return Err("Unexpected EOF".to_string()),
}
}
}
fn read_sequence(reader: &mut Reader, end: &str) -> Return {
let mut sequence: Vec<Type> = Vec::new();
let _current_token_ = reader.next()?;
loop {
let token = match reader.peek() {
Ok(token) => token,
Err(_) => return Err(ErrorString(
format!("{} Unexpected end of input, expected '{}'", here(&reader.src, &reader.tokens[reader.position - 1]), end)
)),
};
if token.value == end { break; }
sequence.push(read_form(reader)?)
fn parse_sequence(&mut self, end: &str) -> ParseResult {
self.next();
let car = self.parse()?;
let mut cdr = Vec::new();
loop {
let token = match self.peek() {
Some(token) => token,
None => return Err(format!("Unexpected end of input, expected '{}'", end)),
};
if token == end { break; }
cdr.push(self.parse()?)
}
self.next();
Ok(Sexpr::Cons(Box::new(car), cdr))
}
let _match_token_ = reader.next()?;
match end {
")" => Ok(list!(sequence)),
"]" => Ok(vector!(sequence)),
_ => return Err(ErrorString(format!("Unknown sequence end value: '{}'", end))),
}
}
fn read_form(reader: &mut Reader) -> Return {
let token = reader.peek()?;
match &token.value[..] {
")" => Err(ErrorString("Unexpected ')'".to_string())),
"(" => read_sequence(reader, ")"),
"]" => Err(ErrorString("Unexpected ']'".to_string())),
"[" => read_sequence(reader, "]"),
_ => read_atom(reader),
}
}
pub fn parse(tokens: Vec<Token>, src: &str) -> Return {
if tokens.len() == 0 { return Ok(Null); }
read_form(&mut Reader::new(tokens, src.to_string()))
}
pub fn translate_expr(ast: Type) -> Result<Expr, String> {
let result: Result<Expr, String>;
result = match ast {
Type::Null => Ok(Expr::Literal(Null)),
Type::Bool(b) => Ok(Expr::Literal(Bool(b))),
Type::Number(n) => Ok(Expr::Literal(Number(n))),
Type::Str(s) => Ok(Expr::Literal(Str(s))),
Type::Symbol(s) => Ok(Expr::Identifier(s)),
Type::List(list, _) => {
if list.len() == 0 {
Ok(Expr::NoOperation)
} else {
match &list[0] {
Type::Symbol(s) => {
match s.as_str() {
"def" => {
let value = translate_expr(list[1].clone())?;
Ok(Expr::Assign(s.clone(), Box::new(value)))
}
"if" => {
let cond = translate_expr(list[1].clone())?;
let then = translate_expr(list[2].clone())?;
let else_ = translate_expr(list[3].clone())?;
Ok(Expr::If(Box::new(cond), vec![then], vec![else_]))
}
"while" => {
let cond = translate_expr(list[1].clone())?;
let body = translate_expr(list[2].clone())?;
Ok(Expr::While(Box::new(cond), vec![body]))
}
// (fn [args] body)
"fun" => {
let function_name = match list[1].clone() {
Type::Symbol(s) => s,
_ => return Err(format!("Expected symbol as function name, got: {:?}", list[1]))
};
let args = match list[2].clone() {
Type::Vector(v, _) => {
let mut args: Vec<String> = Vec::new();
for arg in v.iter() {
match arg {
Type::Symbol(s) => {
args.push(s.clone());
}
_ => return Err(format!("Unexpected type in function arguments")),
}
}
args
},
_ => return Err(format!("Expected vector of args, got: {:?}", list[1])),
};
let body = translate_expr(list[3].clone())?;
Ok(Expr::Function(function_name, args, vec![body]))
}
"+" | "-" | "*" | "/" | "%" | "=" | "!=" | "<" | "<=" | ">" | ">=" => {
let left = translate_expr(list[1].clone())?;
let right = translate_expr(list[2].clone())?;
let op = match s.as_str() {
"+" => BinaryOp::Add,
"-" => BinaryOp::Sub,
"*" => BinaryOp::Mul,
"/" => BinaryOp::Div,
"%" => BinaryOp::Mod,
"=" => BinaryOp::Eq,
"!=" => BinaryOp::Ne,
"<" => BinaryOp::Lt,
"<=" => BinaryOp::Le,
">" => BinaryOp::Gt,
">=" => BinaryOp::Ge,
_ => return Err(format!("Unknown binary operator: '{}'", s)),
};
Ok(Expr::Binary(Box::new(left), op, Box::new(right)))
}
_ => {
let mut args: Vec<Expr> = Vec::new();
for arg in list.iter().skip(1) {
args.push(translate_expr(arg.clone())?);
}
Ok(Expr::Call(s.clone(), args))
}
}
},
_ => {
let mut args: Vec<Expr> = Vec::new();
for arg in list.iter() {
args.push(translate_expr(arg.clone())?);
}
Ok(Expr::List(args))
},
fn parse_atom(&mut self) -> ParseResult {
let token = self.next().unwrap();
match token.as_str() {
"null" => Ok(Nil),
"true" => Ok(Boolean(true)),
"false" => Ok(Boolean(false)),
_ => {
if Regex::new(r#"[+-]?([0-9]*[.])?[0-9]+"#).unwrap().is_match(&token) {
Ok(Int(token.parse().unwrap()))
} else if Regex::new(r#"[+-]?([0-9]*[.])?[0-9]+"#).unwrap().is_match(&token) {
Ok(Float(token.parse().unwrap()))
} else if Regex::new(r#""(?:\\.|[^\\"])*""#).unwrap().is_match(&token) {
Ok(Str(token[1..token.len() - 1].to_string()))
} else {
Ok(Symbol(token))
}
}
},
Type::Vector(vector, _) => {
let mut vec: Vec<Expr> = Vec::new();
for item in vector.iter() {
vec.push(translate_expr(item.clone())?);
}
return Ok(Expr::Vector(vec));
}
};
result
}
}
pub fn tokenize(str: &str) -> Vec<String> {
str.replace("(", " ( ")
.replace(")", " ) ")
.split_whitespace()
.map(|s| s.to_string())
.collect()
}

View file

@ -1,68 +0,0 @@
use std::rc::Rc;
use crate::util::unescape;
#[derive(Debug, Clone)]
pub enum Type {
Null,
Bool(bool),
Number(i64),
Str(String),
Symbol(String),
List(Rc<Vec<Type>>, Rc<Type>),
Vector(Rc<Vec<Type>>, Rc<Type>),
// Function(fn(Arguments) -> Return, Rc<Type>),
}
impl std::fmt::Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Null => write!(f, "Null"),
Type::Bool(b) => write!(f, "{}", b),
Type::Number(n) => write!(f, "{}", n),
Type::Str(s) => write!(f, "\"{}\"", unescape(s.to_string())),
Type::Symbol(s) => write!(f, "{}", s),
Type::List(l, _) => write!(f, "({})", l.iter().map(|e| format!("{}", e)).collect::<Vec<String>>().join(" ")),
Type::Vector(l, _) => write!(f, "[{}]", l.iter().map(|e| format!("{}", e)).collect::<Vec<String>>().join(", ")),
// Type::Function(func, _) => write!(f, "<{:?}>", func),
}
}
}
#[derive(Debug)]
pub enum Error {
ErrorString(String),
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::ErrorString(s) => write!(f, "{}", s),
}
}
}
// pub type Arguments = Vec<Type>;
pub type Return = Result<Type, Error>;
#[macro_export]
macro_rules! list {
($seq:expr) => {{
List(Rc::new($seq),Rc::new(Null))
}};
[$($args:expr),*] => {{
let v: Vec<Type> = vec![$($args),*];
List(Rc::new(v),Rc::new(Null))
}}
}
#[macro_export]
macro_rules! vector {
($seq:expr) => {{
Vector(Rc::new($seq), Rc::new(Null))
}};
[$($args:expr),*] => {{
let v: Vec<Type> = vec![$($args),*];
Vector(Rc::new(v), Rc::new(Null))
}}
}

View file

@ -1,25 +1,3 @@
pub fn cover_paren(s: String) -> String {
format!("({})", s)
}
pub fn unescape(s: String) -> String {
let mut result = String::new();
let mut i = 0;
while i < s.len() {
if s.chars().nth(i).unwrap() == '\\' {
match s.chars().nth(i + 1).unwrap() {
'n' => result.push('\n'),
't' => result.push('\t'),
'r' => result.push('\r'),
'\\' => result.push('\\'),
'"' => result.push('"'),
_ => result.push(s.chars().nth(i + 1).unwrap()),
}
i += 2;
} else {
result.push(s.chars().nth(i).unwrap());
i += 1;
}
}
result
format!("(do {})", s)
}