Merge pull request #10 from erindesu/master

Reimplemented parser
This commit is contained in:
Able 2021-04-27 09:26:29 -05:00 committed by GitHub
commit 345bed5f66
9 changed files with 348 additions and 143 deletions

View file

@ -1,10 +1,3 @@
functio test() {
functio nested() {
var c = false;
}
var a = true;
}
functio another() {
var b = false;
if (true) {
var a = 3;
}

View file

@ -122,3 +122,13 @@ pub fn num2char(number: i32) -> char {
_ => ' ',
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn str_to_base55() {
let chrs: Vec<i32> = "AbleScript".chars().map(char2num).collect();
assert_eq!(chrs, &[-1, 2, 12, 5, -19, 3, 18, 9, 16, 20]);
}
}

View file

@ -8,12 +8,7 @@ pub struct Error {
#[derive(Debug, Clone)]
pub enum ErrorKind {
SyntaxError,
}
impl Error {
pub fn panic(&self, span: &str) {
println!("{:?} occured at {:?}", self.kind, self.position);
println!(" {}", &span);
}
SyntaxError(String),
EndOfTokenStream,
InvalidIdentifier,
}

View file

@ -32,7 +32,7 @@ fn main() {
// Parse
let mut parser = Parser::new(&source);
let ast = parser.parse();
let ast = parser.init();
println!("{:#?}", ast);
}
None => {

View file

@ -1,6 +1,27 @@
use crate::variables::Value;
#[derive(Debug, Clone)]
pub struct Iden(pub String);
#[derive(Debug, Clone)]
pub enum Expr {
VariableDeclaration { iden: String, init: Option<String> },
FunctionDeclaration { iden: String, body: Vec<Expr> },
BfFDeclaration { iden: String, code: String },
VariableDeclaration {
iden: String,
init: Option<Box<Expr>>,
},
FunctionDeclaration {
iden: String,
body: Vec<Expr>,
},
BfFDeclaration {
iden: String,
body: String,
},
If {
cond: Box<Expr>,
body: Vec<Expr>,
},
Literal(Value),
Melo(Iden),
}

View file

@ -3,14 +3,18 @@ mod utils;
use item::Expr;
use crate::error::{Error, ErrorKind};
use crate::tokens::Token;
use crate::{
error::{Error, ErrorKind},
variables::Value,
};
use crate::{parser::item::Iden, tokens::Token};
use logos::Logos;
/// Parser structure / state machine
pub struct Parser<'a> {
lexer: logos::Lexer<'a, Token>,
ast: Vec<Expr>,
}
impl<'a> Parser<'a> {
@ -18,51 +22,80 @@ impl<'a> Parser<'a> {
pub fn new(source: &'a str) -> Self {
Self {
lexer: Token::lexer(source),
ast: Vec::new(),
}
}
/// Start parsing Token Vector into Abstract Syntax Tree
pub fn parse(&mut self) -> Vec<Expr> {
let mut ast = vec![];
while let Some(token) = self.lexer.next() {
let expr = match token {
pub fn init(&mut self) -> Result<Vec<Expr>, Error> {
loop {
let token = self.lexer.next();
if token.is_none() {
return Ok(self.ast.clone());
};
let expr = self.parse_expr(token)?;
self.ast.push(expr);
}
}
fn parse_expr(&mut self, token: Option<Token>) -> Result<Expr, Error> {
if matches!(token, None) {
return Err(Error {
kind: ErrorKind::EndOfTokenStream,
position: self.lexer.span(),
});
}
let token = token.unwrap();
let start = self.lexer.span().start;
match token {
// Control flow
Token::If => self.if_cond(),
// Declarations
Token::Variable => self.variable_declaration(),
Token::Function => self.function_declaration(),
Token::BfFunction => self.bff_declaration(),
Token::RightBrace => return ast,
_ => Err(Error {
kind: ErrorKind::SyntaxError,
position: 0..0,
}),
};
match expr {
Ok(o) => ast.push(o),
Err(e) => {
e.panic(self.lexer.slice());
break;
}
}
// Literals
Token::String(x) => Ok(Expr::Literal(Value::Str(x))),
Token::Integer(x) => Ok(Expr::Literal(Value::Int(x))),
Token::Boolean(x) => Ok(Expr::Literal(Value::Bool(x))),
Token::Aboolean(x) => Ok(Expr::Literal(Value::Abool(x))),
// Prefix keywords
// Melo - ban variable from next usage (runtime error)
Token::Melo => {
let e = self.require_iden()?;
self.require(Token::Semicolon)?;
Ok(Expr::Melo(Iden(e)))
}
ast
_ => Err(Error {
kind: ErrorKind::SyntaxError("Unexpected identifier".to_owned()),
position: start..self.lexer.span().end,
}),
}
}
/// Parse variable declaration
///
/// `var [iden] = [literal];`
fn variable_declaration(&mut self) -> Result<Expr, Error> {
let iden = self.require(Token::Identifier)?;
let iden = self.require_iden()?;
let init = match self.lexer.next() {
Some(Token::Semicolon) => None,
Some(Token::Assignment) => {
let value = self.require(Token::Boolean)?; // TODO: Shouldn't be limited to boolean (pattern match?)
let value = self.lexer.next();
let value = self.parse_expr(value)?;
self.require(Token::Semicolon)?;
Some(value)
Some(Box::new(value))
}
_ => {
return Err(Error {
kind: ErrorKind::SyntaxError,
kind: ErrorKind::SyntaxError("Unexpected token".to_owned()),
position: self.lexer.span(),
})
}
@ -75,12 +108,13 @@ impl<'a> Parser<'a> {
///
/// `functio [iden] ([expr], [expr]) { ... }
fn function_declaration(&mut self) -> Result<Expr, Error> {
let iden = self.require(Token::Identifier)?;
let iden = self.require_iden()?;
self.require(Token::LeftParenthesis)?;
// TODO: Arguments
self.require(Token::RightParenthesis)?;
self.require(Token::LeftBrace)?;
let body = self.parse();
// Parse function body
let body = self.parse_body()?;
Ok(Expr::FunctionDeclaration { iden, body })
}
@ -89,10 +123,56 @@ impl<'a> Parser<'a> {
///
/// `bff [iden] { ... }`
fn bff_declaration(&mut self) -> Result<Expr, Error> {
let iden = self.require(Token::Identifier)?;
let iden = self.require_iden()?;
self.require(Token::LeftBrace)?;
let code = self.require(Token::String)?; // <-- Nasty hack, but works
self.require(Token::RightBrace)?;
Ok(Expr::BfFDeclaration { iden, code })
let mut body = String::new();
loop {
let token = {
match self.lexer.next() {
Some(t) => t,
None => {
return Err(Error {
kind: ErrorKind::EndOfTokenStream,
position: self.lexer.span(),
})
}
}
};
if token == Token::RightBrace {
break;
}
body.push_str(match token {
Token::OpGt
| Token::OpLt
| Token::Addition
| Token::Subtract
| Token::FullStop
| Token::Comma
| Token::LeftBracket
| Token::RightBracket => self.lexer.slice(),
Token::RightBrace => break,
_ => return Err(self.unexpected_token(None)),
});
}
Ok(Expr::BfFDeclaration { iden, body })
}
/// Parse If-expression
pub fn if_cond(&mut self) -> Result<Expr, Error> {
self.require(Token::LeftParenthesis)?;
let cond = self.lexer.next();
let cond = self.parse_expr(cond)?;
self.require(Token::RightParenthesis)?;
self.require(Token::LeftBrace)?;
let body = self.parse_body()?;
Ok(Expr::If {
cond: Box::new(cond),
body,
})
}
}

View file

@ -2,7 +2,7 @@ use crate::error::{Error, ErrorKind};
use crate::tokens::Token;
use crate::variables::Abool;
use super::Parser;
use super::{item::Expr, Parser};
pub fn abool2num(abool: Abool) -> i32 {
match abool {
@ -23,13 +23,55 @@ pub fn num2abool(number: i32) -> Abool {
impl<'a> Parser<'a> {
/// Require type of token as next and return it's value (sometimes irrelevant)
pub(super) fn require(&mut self, with: Token) -> Result<String, Error> {
if self.lexer.next() == Some(with) {
if self.lexer.next() == Some(with.clone()) {
Ok(self.lexer.slice().to_owned())
} else {
Err(self.unexpected_token(Some(with)))
}
}
pub(super) fn require_iden(&mut self) -> Result<String, Error> {
if let Some(Token::Identifier(id)) = self.lexer.next() {
Ok(id)
} else {
Err(Error {
kind: ErrorKind::SyntaxError,
kind: ErrorKind::InvalidIdentifier,
position: self.lexer.span(),
})
}
}
pub(super) fn unexpected_token(&mut self, expected: Option<Token>) -> Error {
Error {
kind: ErrorKind::SyntaxError(format!(
"Unexpected token: `{}` (required: `{:?}`)",
self.lexer.slice(),
expected
)),
position: self.lexer.span(),
}
}
pub(super) fn parse_body(&mut self) -> Result<Vec<Expr>, Error> {
let mut body = Vec::new();
loop {
let token = {
match self.lexer.next() {
Some(t) => t,
None => {
return Err(Error {
kind: ErrorKind::EndOfTokenStream,
position: self.lexer.span(),
})
}
}
};
if token == Token::RightBrace {
break;
}
body.push(self.parse_expr(Some(token))?);
}
Ok(body)
}
}

View file

@ -1,72 +1,9 @@
use logos::Logos;
use logos::{Lexer, Logos};
#[derive(Logos, Debug, PartialEq)]
use crate::variables::Abool;
#[derive(Logos, Debug, PartialEq, Clone)]
pub enum Token {
// Literals
/// True, False
#[regex("true|false")]
Boolean,
/// Always, Sometimes, Never
#[regex("always|sometimes|never")]
Aboolean,
/// String
#[regex("\"(\\.|[^\"])*\"")]
String,
/// Integer
#[regex(r"[0-9]+")]
Integer,
/// A C-complaint identifier
#[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")]
Identifier,
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token("{")]
LeftBrace,
#[token("}")]
RightBrace,
#[token(";")]
Semicolon,
#[regex(r"#.*")]
Comment,
// Operators
#[token("-")]
Subtract,
#[token("+")]
Addition,
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("=")]
Assignment,
/// Base52 based character ('a')
#[token("'.*'")]
Char,
#[token("functio")]
Function,
@ -99,7 +36,152 @@ pub enum Token {
#[token("loop")]
Loop,
// Literals
/// True, False
#[regex("true|false", get_bool)]
Boolean(bool),
/// Always, Sometimes, Never
#[regex("always|sometimes|never", get_abool)]
Aboolean(Abool),
/// String
#[regex("\"(\\.|[^\"])*\"", get_string)]
String(String),
/// Integer
#[regex(r"[0-9]+", get_int)]
Integer(i32),
/// A C-complaint identifier
#[regex(r"[a-zA-Z_][a-zA-Z_0-9]*", get_iden)]
Identifier(String),
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token("{")]
LeftBrace,
#[token("}")]
RightBrace,
#[token(";")]
Semicolon,
#[token(".")]
FullStop,
#[token(",")]
Comma,
#[regex(r"#.*")]
Comment,
// Operators
#[token("-")]
Subtract,
#[token("+")]
Addition,
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("=")]
Assignment,
// Logical operators
#[token("<")]
OpLt,
#[token(">")]
OpGt,
#[token("==")]
OpEq,
#[token("!=")]
OpNeq,
/// Base52 based character ('a')
#[token("'.*'")]
Char,
#[regex(r"[ \t\n\f]+", logos::skip)]
#[error]
Error,
}
fn get_bool(lexer: &mut Lexer<Token>) -> Option<bool> {
lexer.slice().parse().ok()
}
fn get_int(lexer: &mut Lexer<Token>) -> Option<i32> {
lexer.slice().parse().ok()
}
fn get_string(lexer: &mut Lexer<Token>) -> String {
lexer.slice().trim_matches('"').to_owned()
}
fn get_abool(lexer: &mut Lexer<Token>) -> Option<Abool> {
match lexer.slice() {
"always" => Some(Abool::Always),
"sometimes" => Some(Abool::Sometimes),
"never" => Some(Abool::Never),
_ => None,
}
}
fn get_iden(lexer: &mut Lexer<Token>) -> String {
lexer.slice().to_owned()
}
#[cfg(test)]
mod tests {
use super::Token;
use super::Token::*;
use logos::Logos;
#[test]
fn simple_fn() {
let code = "functio test() { var a = 3; if a == 3 { a print } }";
let expected = &[
Function,
Identifier("test".to_owned()),
LeftParenthesis,
RightParenthesis,
LeftBrace,
Variable,
Identifier("a".to_owned()),
Assignment,
Integer(3),
Semicolon,
If,
Identifier("a".to_owned()),
OpEq,
Integer(3),
LeftBrace,
Identifier("a".to_owned()),
Print,
RightBrace,
RightBrace,
];
let lexer = Token::lexer(code);
let result: Vec<Token> = lexer.collect();
assert_eq!(result, expected);
}
}

View file

@ -1,5 +1,4 @@
use rand::Rng;
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub enum Abool {
@ -31,20 +30,3 @@ pub struct Variable {
melo: bool,
value: Value,
}
pub fn test() {
let mut map = HashMap::new();
let a = Variable {
melo: false,
value: Value::Str("1".to_string()),
};
let b = Variable {
melo: false,
value: Value::Int(2),
};
map.insert("a", a);
map.insert("b", b);
for (key, value) in &map {
println!("{}: {:?}", key, value);
}
}