diff --git a/able-script-test/parse_test.able b/able-script-test/parse_test.able index 72786dc..6d7ceca 100644 --- a/able-script-test/parse_test.able +++ b/able-script-test/parse_test.able @@ -1,10 +1,3 @@ -functio test() { - functio nested() { - var c = false; - } - var a = true; -} - -functio another() { - var b = false; +if (true) { + var a = 3; } \ No newline at end of file diff --git a/src/base_55.rs b/src/base_55.rs index 422eb1b..bb9d5ce 100644 --- a/src/base_55.rs +++ b/src/base_55.rs @@ -122,3 +122,13 @@ pub fn num2char(number: i32) -> char { _ => ' ', } } + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn str_to_base55() { + let chrs: Vec = "AbleScript".chars().map(char2num).collect(); + assert_eq!(chrs, &[-1, 2, 12, 5, -19, 3, 18, 9, 16, 20]); + } +} diff --git a/src/error.rs b/src/error.rs index 510aa33..1b3ac84 100644 --- a/src/error.rs +++ b/src/error.rs @@ -8,12 +8,7 @@ pub struct Error { #[derive(Debug, Clone)] pub enum ErrorKind { - SyntaxError, -} - -impl Error { - pub fn panic(&self, span: &str) { - println!("{:?} occured at {:?}", self.kind, self.position); - println!(" {}", &span); - } + SyntaxError(String), + EndOfTokenStream, + InvalidIdentifier, } diff --git a/src/main.rs b/src/main.rs index bf9230a..32d3d3c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,7 +32,7 @@ fn main() { // Parse let mut parser = Parser::new(&source); - let ast = parser.parse(); + let ast = parser.init(); println!("{:#?}", ast); } None => { diff --git a/src/parser/item.rs b/src/parser/item.rs index af6af74..46616dc 100644 --- a/src/parser/item.rs +++ b/src/parser/item.rs @@ -1,6 +1,27 @@ +use crate::variables::Value; + +#[derive(Debug, Clone)] +pub struct Iden(pub String); + #[derive(Debug, Clone)] pub enum Expr { - VariableDeclaration { iden: String, init: Option }, - FunctionDeclaration { iden: String, body: Vec }, - BfFDeclaration { iden: String, code: String }, + VariableDeclaration { + iden: String, + init: Option>, + }, + FunctionDeclaration { + iden: String, + body: Vec, + }, + BfFDeclaration { + iden: String, + body: String, + }, + If { + cond: Box, + body: Vec, + }, + + Literal(Value), + Melo(Iden), } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 33a8409..36a44c0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3,14 +3,18 @@ mod utils; use item::Expr; -use crate::error::{Error, ErrorKind}; -use crate::tokens::Token; +use crate::{ + error::{Error, ErrorKind}, + variables::Value, +}; +use crate::{parser::item::Iden, tokens::Token}; use logos::Logos; /// Parser structure / state machine pub struct Parser<'a> { lexer: logos::Lexer<'a, Token>, + ast: Vec, } impl<'a> Parser<'a> { @@ -18,51 +22,80 @@ impl<'a> Parser<'a> { pub fn new(source: &'a str) -> Self { Self { lexer: Token::lexer(source), + ast: Vec::new(), } } - /// Start parsing Token Vector into Abstract Syntax Tree - pub fn parse(&mut self) -> Vec { - let mut ast = vec![]; - while let Some(token) = self.lexer.next() { - let expr = match token { - Token::Variable => self.variable_declaration(), - Token::Function => self.function_declaration(), - Token::BfFunction => self.bff_declaration(), - Token::RightBrace => return ast, - _ => Err(Error { - kind: ErrorKind::SyntaxError, - position: 0..0, - }), + pub fn init(&mut self) -> Result, Error> { + loop { + let token = self.lexer.next(); + if token.is_none() { + return Ok(self.ast.clone()); }; - match expr { - Ok(o) => ast.push(o), - Err(e) => { - e.panic(self.lexer.slice()); - break; - } - } + + let expr = self.parse_expr(token)?; + self.ast.push(expr); + } + } + + fn parse_expr(&mut self, token: Option) -> Result { + if matches!(token, None) { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }); } - ast + let token = token.unwrap(); + let start = self.lexer.span().start; + + match token { + // Control flow + Token::If => self.if_cond(), + + // Declarations + Token::Variable => self.variable_declaration(), + Token::Function => self.function_declaration(), + Token::BfFunction => self.bff_declaration(), + + // Literals + Token::String(x) => Ok(Expr::Literal(Value::Str(x))), + Token::Integer(x) => Ok(Expr::Literal(Value::Int(x))), + Token::Boolean(x) => Ok(Expr::Literal(Value::Bool(x))), + Token::Aboolean(x) => Ok(Expr::Literal(Value::Abool(x))), + + // Prefix keywords + // Melo - ban variable from next usage (runtime error) + Token::Melo => { + let e = self.require_iden()?; + self.require(Token::Semicolon)?; + Ok(Expr::Melo(Iden(e))) + } + + _ => Err(Error { + kind: ErrorKind::SyntaxError("Unexpected identifier".to_owned()), + position: start..self.lexer.span().end, + }), + } } /// Parse variable declaration /// /// `var [iden] = [literal];` fn variable_declaration(&mut self) -> Result { - let iden = self.require(Token::Identifier)?; + let iden = self.require_iden()?; let init = match self.lexer.next() { Some(Token::Semicolon) => None, Some(Token::Assignment) => { - let value = self.require(Token::Boolean)?; // TODO: Shouldn't be limited to boolean (pattern match?) + let value = self.lexer.next(); + let value = self.parse_expr(value)?; self.require(Token::Semicolon)?; - Some(value) + Some(Box::new(value)) } _ => { return Err(Error { - kind: ErrorKind::SyntaxError, + kind: ErrorKind::SyntaxError("Unexpected token".to_owned()), position: self.lexer.span(), }) } @@ -75,12 +108,13 @@ impl<'a> Parser<'a> { /// /// `functio [iden] ([expr], [expr]) { ... } fn function_declaration(&mut self) -> Result { - let iden = self.require(Token::Identifier)?; + let iden = self.require_iden()?; self.require(Token::LeftParenthesis)?; - // TODO: Arguments self.require(Token::RightParenthesis)?; + self.require(Token::LeftBrace)?; - let body = self.parse(); + // Parse function body + let body = self.parse_body()?; Ok(Expr::FunctionDeclaration { iden, body }) } @@ -89,10 +123,56 @@ impl<'a> Parser<'a> { /// /// `bff [iden] { ... }` fn bff_declaration(&mut self) -> Result { - let iden = self.require(Token::Identifier)?; + let iden = self.require_iden()?; self.require(Token::LeftBrace)?; - let code = self.require(Token::String)?; // <-- Nasty hack, but works - self.require(Token::RightBrace)?; - Ok(Expr::BfFDeclaration { iden, code }) + + let mut body = String::new(); + loop { + let token = { + match self.lexer.next() { + Some(t) => t, + None => { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }) + } + } + }; + + if token == Token::RightBrace { + break; + } + body.push_str(match token { + Token::OpGt + | Token::OpLt + | Token::Addition + | Token::Subtract + | Token::FullStop + | Token::Comma + | Token::LeftBracket + | Token::RightBracket => self.lexer.slice(), + Token::RightBrace => break, + _ => return Err(self.unexpected_token(None)), + }); + } + Ok(Expr::BfFDeclaration { iden, body }) + } + + /// Parse If-expression + pub fn if_cond(&mut self) -> Result { + self.require(Token::LeftParenthesis)?; + let cond = self.lexer.next(); + let cond = self.parse_expr(cond)?; + self.require(Token::RightParenthesis)?; + + self.require(Token::LeftBrace)?; + + let body = self.parse_body()?; + + Ok(Expr::If { + cond: Box::new(cond), + body, + }) } } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index bae92a3..42c2b63 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -2,7 +2,7 @@ use crate::error::{Error, ErrorKind}; use crate::tokens::Token; use crate::variables::Abool; -use super::Parser; +use super::{item::Expr, Parser}; pub fn abool2num(abool: Abool) -> i32 { match abool { @@ -23,13 +23,55 @@ pub fn num2abool(number: i32) -> Abool { impl<'a> Parser<'a> { /// Require type of token as next and return it's value (sometimes irrelevant) pub(super) fn require(&mut self, with: Token) -> Result { - if self.lexer.next() == Some(with) { + if self.lexer.next() == Some(with.clone()) { Ok(self.lexer.slice().to_owned()) + } else { + Err(self.unexpected_token(Some(with))) + } + } + + pub(super) fn require_iden(&mut self) -> Result { + if let Some(Token::Identifier(id)) = self.lexer.next() { + Ok(id) } else { Err(Error { - kind: ErrorKind::SyntaxError, + kind: ErrorKind::InvalidIdentifier, position: self.lexer.span(), }) } } + + pub(super) fn unexpected_token(&mut self, expected: Option) -> Error { + Error { + kind: ErrorKind::SyntaxError(format!( + "Unexpected token: `{}` (required: `{:?}`)", + self.lexer.slice(), + expected + )), + position: self.lexer.span(), + } + } + + pub(super) fn parse_body(&mut self) -> Result, Error> { + let mut body = Vec::new(); + loop { + let token = { + match self.lexer.next() { + Some(t) => t, + None => { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }) + } + } + }; + + if token == Token::RightBrace { + break; + } + body.push(self.parse_expr(Some(token))?); + } + Ok(body) + } } diff --git a/src/tokens.rs b/src/tokens.rs index ddfe26e..b21c4d5 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,72 +1,9 @@ -use logos::Logos; +use logos::{Lexer, Logos}; -#[derive(Logos, Debug, PartialEq)] +use crate::variables::Abool; + +#[derive(Logos, Debug, PartialEq, Clone)] pub enum Token { - // Literals - /// True, False - #[regex("true|false")] - Boolean, - - /// Always, Sometimes, Never - #[regex("always|sometimes|never")] - Aboolean, - - /// String - #[regex("\"(\\.|[^\"])*\"")] - String, - - /// Integer - #[regex(r"[0-9]+")] - Integer, - - /// A C-complaint identifier - #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")] - Identifier, - - #[token("(")] - LeftParenthesis, - - #[token(")")] - RightParenthesis, - - #[token("[")] - LeftBracket, - - #[token("]")] - RightBracket, - - #[token("{")] - LeftBrace, - - #[token("}")] - RightBrace, - - #[token(";")] - Semicolon, - - #[regex(r"#.*")] - Comment, - - // Operators - #[token("-")] - Subtract, - - #[token("+")] - Addition, - - #[token("*")] - Multiply, - - #[token("/")] - Divide, - - #[token("=")] - Assignment, - - /// Base52 based character ('a') - #[token("'.*'")] - Char, - #[token("functio")] Function, @@ -99,7 +36,152 @@ pub enum Token { #[token("loop")] Loop, + // Literals + /// True, False + #[regex("true|false", get_bool)] + Boolean(bool), + + /// Always, Sometimes, Never + #[regex("always|sometimes|never", get_abool)] + Aboolean(Abool), + + /// String + #[regex("\"(\\.|[^\"])*\"", get_string)] + String(String), + + /// Integer + #[regex(r"[0-9]+", get_int)] + Integer(i32), + + /// A C-complaint identifier + #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*", get_iden)] + Identifier(String), + + #[token("(")] + LeftParenthesis, + + #[token(")")] + RightParenthesis, + + #[token("[")] + LeftBracket, + + #[token("]")] + RightBracket, + + #[token("{")] + LeftBrace, + + #[token("}")] + RightBrace, + + #[token(";")] + Semicolon, + + #[token(".")] + FullStop, + + #[token(",")] + Comma, + + #[regex(r"#.*")] + Comment, + + // Operators + #[token("-")] + Subtract, + + #[token("+")] + Addition, + + #[token("*")] + Multiply, + + #[token("/")] + Divide, + + #[token("=")] + Assignment, + + // Logical operators + #[token("<")] + OpLt, + + #[token(">")] + OpGt, + + #[token("==")] + OpEq, + + #[token("!=")] + OpNeq, + + /// Base52 based character ('a') + #[token("'.*'")] + Char, + #[regex(r"[ \t\n\f]+", logos::skip)] #[error] Error, } + +fn get_bool(lexer: &mut Lexer) -> Option { + lexer.slice().parse().ok() +} + +fn get_int(lexer: &mut Lexer) -> Option { + lexer.slice().parse().ok() +} + +fn get_string(lexer: &mut Lexer) -> String { + lexer.slice().trim_matches('"').to_owned() +} + +fn get_abool(lexer: &mut Lexer) -> Option { + match lexer.slice() { + "always" => Some(Abool::Always), + "sometimes" => Some(Abool::Sometimes), + "never" => Some(Abool::Never), + _ => None, + } +} + +fn get_iden(lexer: &mut Lexer) -> String { + lexer.slice().to_owned() +} + +#[cfg(test)] +mod tests { + use super::Token; + use super::Token::*; + use logos::Logos; + + #[test] + fn simple_fn() { + let code = "functio test() { var a = 3; if a == 3 { a print } }"; + let expected = &[ + Function, + Identifier("test".to_owned()), + LeftParenthesis, + RightParenthesis, + LeftBrace, + Variable, + Identifier("a".to_owned()), + Assignment, + Integer(3), + Semicolon, + If, + Identifier("a".to_owned()), + OpEq, + Integer(3), + LeftBrace, + Identifier("a".to_owned()), + Print, + RightBrace, + RightBrace, + ]; + let lexer = Token::lexer(code); + let result: Vec = lexer.collect(); + assert_eq!(result, expected); + } +} diff --git a/src/variables.rs b/src/variables.rs index b3020a1..cb476b9 100644 --- a/src/variables.rs +++ b/src/variables.rs @@ -1,5 +1,4 @@ use rand::Rng; -use std::collections::HashMap; #[derive(Debug, Clone, PartialEq)] pub enum Abool { @@ -31,20 +30,3 @@ pub struct Variable { melo: bool, value: Value, } -pub fn test() { - let mut map = HashMap::new(); - let a = Variable { - melo: false, - value: Value::Str("1".to_string()), - }; - let b = Variable { - melo: false, - value: Value::Int(2), - }; - map.insert("a", a); - map.insert("b", b); - - for (key, value) in &map { - println!("{}: {:?}", key, value); - } -}