2022-01-21 17:43:50 -06:00
|
|
|
use regex::Regex;
|
2022-01-26 02:35:40 -06:00
|
|
|
use crate::compiler::parser::Sexpr::*;
|
2022-01-21 17:43:50 -06:00
|
|
|
|
2022-01-23 05:49:54 -06:00
|
|
|
#[derive(Debug, Clone)]
|
2022-01-23 15:42:08 -06:00
|
|
|
pub enum Sexpr {
|
|
|
|
Int(i64), Float(f64), Str(String), Boolean(bool),
|
|
|
|
Symbol(String),
|
|
|
|
Cons(Box<Sexpr>, Vec<Sexpr>),
|
|
|
|
Nil,
|
2022-01-23 05:49:54 -06:00
|
|
|
}
|
|
|
|
|
2022-01-25 16:06:57 -06:00
|
|
|
impl std::fmt::Display for Sexpr {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
|
|
match self {
|
|
|
|
Int(i) => write!(f, "{}", i),
|
|
|
|
Float(fl) => write!(f, "{}", fl),
|
|
|
|
Str(s) => write!(f, "{}", s),
|
|
|
|
Boolean(b) => write!(f, "{}", b),
|
|
|
|
Symbol(s) => write!(f, "{}", s),
|
|
|
|
Cons(car, cdr) => {
|
|
|
|
write!(f, "(")?;
|
|
|
|
write!(f, "{}", car)?;
|
|
|
|
for c in cdr {
|
|
|
|
write!(f, " {}", c)?;
|
|
|
|
}
|
|
|
|
write!(f, ")")
|
|
|
|
},
|
|
|
|
Nil => write!(f, "nil"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
pub type ParseResult = Result<Sexpr, String>;
|
2022-01-23 05:49:54 -06:00
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
pub struct Parser {
|
|
|
|
unparsed: Vec<String>,
|
2022-01-21 17:43:50 -06:00
|
|
|
position: usize,
|
|
|
|
}
|
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
impl Parser {
|
|
|
|
pub fn new(src: Vec<String>) -> Parser {
|
|
|
|
Parser {
|
|
|
|
unparsed: src,
|
2022-01-21 17:43:50 -06:00
|
|
|
position: 0,
|
|
|
|
}
|
|
|
|
}
|
2022-01-23 15:42:08 -06:00
|
|
|
|
|
|
|
fn peek(&mut self) -> Option<String> {
|
|
|
|
self.unparsed.get(self.position).cloned()
|
2022-01-21 17:43:50 -06:00
|
|
|
}
|
2022-01-23 15:42:08 -06:00
|
|
|
|
|
|
|
fn next(&mut self) -> Option<String> {
|
|
|
|
self.position += 1;
|
|
|
|
self.unparsed.get(self.position - 1).cloned()
|
2022-01-21 17:43:50 -06:00
|
|
|
}
|
2022-01-23 05:49:54 -06:00
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
pub fn parse(&mut self) -> ParseResult {
|
|
|
|
match self.peek() {
|
|
|
|
Some(s) => match s.as_str() {
|
|
|
|
")" => Err(format!("Unexpected ')' at position {}", self.position)),
|
|
|
|
"'" => { self.next(); Ok(Cons(Box::new(Str("quote".to_string())), vec![self.parse()?])) },
|
|
|
|
"(" => self.parse_sequence(")"),
|
|
|
|
_ => self.parse_atom(),
|
2022-01-21 17:43:50 -06:00
|
|
|
}
|
2022-01-23 15:42:08 -06:00
|
|
|
None => return Err("Unexpected EOF".to_string()),
|
2022-01-21 17:43:50 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
fn parse_sequence(&mut self, end: &str) -> ParseResult {
|
|
|
|
self.next();
|
|
|
|
let car = self.parse()?;
|
2022-01-21 17:43:50 -06:00
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
let mut cdr = Vec::new();
|
|
|
|
|
|
|
|
loop {
|
|
|
|
let token = match self.peek() {
|
|
|
|
Some(token) => token,
|
|
|
|
None => return Err(format!("Unexpected end of input, expected '{}'", end)),
|
|
|
|
};
|
|
|
|
if token == end { break; }
|
|
|
|
cdr.push(self.parse()?)
|
|
|
|
}
|
2022-01-23 05:49:54 -06:00
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
self.next();
|
|
|
|
Ok(Sexpr::Cons(Box::new(car), cdr))
|
|
|
|
}
|
2022-01-23 05:49:54 -06:00
|
|
|
|
2022-01-23 15:42:08 -06:00
|
|
|
fn parse_atom(&mut self) -> ParseResult {
|
|
|
|
let token = self.next().unwrap();
|
|
|
|
match token.as_str() {
|
|
|
|
"null" => Ok(Nil),
|
|
|
|
"true" => Ok(Boolean(true)),
|
|
|
|
"false" => Ok(Boolean(false)),
|
|
|
|
_ => {
|
|
|
|
if Regex::new(r#"[+-]?([0-9]*[.])?[0-9]+"#).unwrap().is_match(&token) {
|
|
|
|
Ok(Int(token.parse().unwrap()))
|
|
|
|
} else if Regex::new(r#"[+-]?([0-9]*[.])?[0-9]+"#).unwrap().is_match(&token) {
|
|
|
|
Ok(Float(token.parse().unwrap()))
|
|
|
|
} else if Regex::new(r#""(?:\\.|[^\\"])*""#).unwrap().is_match(&token) {
|
|
|
|
Ok(Str(token[1..token.len() - 1].to_string()))
|
|
|
|
} else {
|
|
|
|
Ok(Symbol(token))
|
2022-01-23 05:49:54 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-23 15:42:08 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn tokenize(str: &str) -> Vec<String> {
|
2022-01-23 15:49:27 -06:00
|
|
|
let regex = Regex::new(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]+)"###).unwrap();
|
|
|
|
let mut res = vec![];
|
|
|
|
for cap in regex.captures_iter(str) {
|
|
|
|
if cap[1].starts_with(";") { continue; }
|
|
|
|
res.push(String::from(&cap[1]));
|
|
|
|
}
|
|
|
|
res
|
2022-01-21 17:43:50 -06:00
|
|
|
}
|