From cc4ec803c4e4ae0438cd5ed46be2679500f175e6 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 26 Apr 2021 10:44:42 +0200 Subject: [PATCH 1/6] Starting work on parser improvements - Parser should parse single expressions --- src/error.rs | 3 ++- src/main.rs | 2 +- src/parser/mod.rs | 46 ++++++++++++++++++++++++++++++++++++++++++--- src/parser/utils.rs | 2 +- src/tokens.rs | 19 +++++++++++++++++++ 5 files changed, 66 insertions(+), 6 deletions(-) diff --git a/src/error.rs b/src/error.rs index 510aa33..1ae42e8 100644 --- a/src/error.rs +++ b/src/error.rs @@ -8,7 +8,8 @@ pub struct Error { #[derive(Debug, Clone)] pub enum ErrorKind { - SyntaxError, + SyntaxError(String), + EndOfTokenStream, } impl Error { diff --git a/src/main.rs b/src/main.rs index bf9230a..32d3d3c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,7 +32,7 @@ fn main() { // Parse let mut parser = Parser::new(&source); - let ast = parser.parse(); + let ast = parser.init(); println!("{:#?}", ast); } None => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 33a8409..d1d432e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11,6 +11,7 @@ use logos::Logos; /// Parser structure / state machine pub struct Parser<'a> { lexer: logos::Lexer<'a, Token>, + ast: Vec, } impl<'a> Parser<'a> { @@ -18,9 +19,33 @@ impl<'a> Parser<'a> { pub fn new(source: &'a str) -> Self { Self { lexer: Token::lexer(source), + ast: Vec::new(), } } + pub fn init(&mut self) -> Result, Error> { + loop { + let token = self.lexer.next(); + if token.is_none() { + return Ok(self.ast.clone()); + }; + let expr = self.parse_expr(&token)?; + self.ast.push(expr); + } + } + + fn parse_expr(&mut self, token: &Option) -> Result { + if matches!(token, None) { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }); + } + + Ok(todo!()) + } + + /* /// Start parsing Token Vector into Abstract Syntax Tree pub fn parse(&mut self) -> Vec { let mut ast = vec![]; @@ -46,6 +71,7 @@ impl<'a> Parser<'a> { ast } + */ /// Parse variable declaration /// @@ -62,7 +88,7 @@ impl<'a> Parser<'a> { } _ => { return Err(Error { - kind: ErrorKind::SyntaxError, + kind: ErrorKind::SyntaxError("Unexpected token".to_owned()), position: self.lexer.span(), }) } @@ -80,7 +106,7 @@ impl<'a> Parser<'a> { // TODO: Arguments self.require(Token::RightParenthesis)?; self.require(Token::LeftBrace)?; - let body = self.parse(); + let body = vec![]; Ok(Expr::FunctionDeclaration { iden, body }) } @@ -91,7 +117,21 @@ impl<'a> Parser<'a> { fn bff_declaration(&mut self) -> Result { let iden = self.require(Token::Identifier)?; self.require(Token::LeftBrace)?; - let code = self.require(Token::String)?; // <-- Nasty hack, but works + let mut code = String::new(); + while let Some(token) = self.lexer.next() { + code.push_str(match token { + Token::OpGt + | Token::OpLt + | Token::Addition + | Token::Subtract + | Token::FullStop + | Token::Comma + | Token::LeftBracket + | Token::RightBracket => self.lexer.slice(), + Token::RightBrace => break, + _ => break, + }); + } self.require(Token::RightBrace)?; Ok(Expr::BfFDeclaration { iden, code }) } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index bae92a3..cbf8d8c 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -27,7 +27,7 @@ impl<'a> Parser<'a> { Ok(self.lexer.slice().to_owned()) } else { Err(Error { - kind: ErrorKind::SyntaxError, + kind: ErrorKind::SyntaxError("Mysterious parse error".to_owned()), position: self.lexer.span(), }) } diff --git a/src/tokens.rs b/src/tokens.rs index ddfe26e..8b458c5 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -44,6 +44,12 @@ pub enum Token { #[token(";")] Semicolon, + #[token(".")] + FullStop, + + #[token(",")] + Comma, + #[regex(r"#.*")] Comment, @@ -63,6 +69,19 @@ pub enum Token { #[token("=")] Assignment, + // Logical operators + #[token("<")] + OpLt, + + #[token(">")] + OpGt, + + #[token("==")] + OpEq, + + #[token("!=")] + OpNeq, + /// Base52 based character ('a') #[token("'.*'")] Char, From 3b8ce34c2b0928931e2a7a09e90462d33a28ae44 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 27 Apr 2021 10:51:39 +0200 Subject: [PATCH 2/6] Continue work on parser, improved lexer - Added literal parsing (improved lexing) - Revised error handling --- able-script-test/parse_test.able | 12 +----- src/parser/item.rs | 18 +++++++-- src/parser/mod.rs | 64 +++++++++++++++----------------- src/parser/utils.rs | 18 ++++++--- src/tokens.rs | 43 ++++++++++++++++----- 5 files changed, 92 insertions(+), 63 deletions(-) diff --git a/able-script-test/parse_test.able b/able-script-test/parse_test.able index 72786dc..bfab6c5 100644 --- a/able-script-test/parse_test.able +++ b/able-script-test/parse_test.able @@ -1,10 +1,2 @@ -functio test() { - functio nested() { - var c = false; - } - var a = true; -} - -functio another() { - var b = false; -} \ No newline at end of file +var a = 3; +var b = 4; \ No newline at end of file diff --git a/src/parser/item.rs b/src/parser/item.rs index af6af74..39e2fbf 100644 --- a/src/parser/item.rs +++ b/src/parser/item.rs @@ -1,6 +1,18 @@ +use crate::variables::Value; + #[derive(Debug, Clone)] pub enum Expr { - VariableDeclaration { iden: String, init: Option }, - FunctionDeclaration { iden: String, body: Vec }, - BfFDeclaration { iden: String, code: String }, + VariableDeclaration { + iden: String, + init: Option>, + }, + FunctionDeclaration { + iden: String, + body: Vec, + }, + BfFDeclaration { + iden: String, + code: String, + }, + Literal(Value), } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d1d432e..5f7b08b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3,8 +3,11 @@ mod utils; use item::Expr; -use crate::error::{Error, ErrorKind}; use crate::tokens::Token; +use crate::{ + error::{Error, ErrorKind}, + variables::Value, +}; use logos::Logos; @@ -29,12 +32,13 @@ impl<'a> Parser<'a> { if token.is_none() { return Ok(self.ast.clone()); }; - let expr = self.parse_expr(&token)?; + + let expr = self.parse_expr(token)?; self.ast.push(expr); } } - fn parse_expr(&mut self, token: &Option) -> Result { + fn parse_expr(&mut self, token: Option) -> Result { if matches!(token, None) { return Err(Error { kind: ErrorKind::EndOfTokenStream, @@ -42,36 +46,23 @@ impl<'a> Parser<'a> { }); } - Ok(todo!()) - } + let token = token.unwrap(); + let start = self.lexer.span().start; - /* - /// Start parsing Token Vector into Abstract Syntax Tree - pub fn parse(&mut self) -> Vec { - let mut ast = vec![]; - while let Some(token) = self.lexer.next() { - let expr = match token { - Token::Variable => self.variable_declaration(), - Token::Function => self.function_declaration(), - Token::BfFunction => self.bff_declaration(), - Token::RightBrace => return ast, - _ => Err(Error { - kind: ErrorKind::SyntaxError, - position: 0..0, - }), - }; - match expr { - Ok(o) => ast.push(o), - Err(e) => { - e.panic(self.lexer.slice()); - break; - } - } + match token { + Token::Variable => self.variable_declaration(), + Token::Function => self.function_declaration(), + Token::BfFunction => self.bff_declaration(), + Token::String(x) => Ok(Expr::Literal(Value::Str(x))), + Token::Integer(x) => Ok(Expr::Literal(Value::Int(x))), + Token::Boolean(x) => Ok(Expr::Literal(Value::Bool(x))), + Token::Aboolean(x) => Ok(Expr::Literal(Value::Abool(x))), + _ => Err(Error { + kind: ErrorKind::SyntaxError("Unexpected identifier".to_owned()), + position: start..self.lexer.span().end, + }), } - - ast } - */ /// Parse variable declaration /// @@ -82,9 +73,10 @@ impl<'a> Parser<'a> { let init = match self.lexer.next() { Some(Token::Semicolon) => None, Some(Token::Assignment) => { - let value = self.require(Token::Boolean)?; // TODO: Shouldn't be limited to boolean (pattern match?) + let value = self.lexer.next(); + let value = self.parse_expr(value)?; // TODO: Shouldn't be limited to boolean (pattern match?) self.require(Token::Semicolon)?; - Some(value) + Some(Box::new(value)) } _ => { return Err(Error { @@ -106,7 +98,9 @@ impl<'a> Parser<'a> { // TODO: Arguments self.require(Token::RightParenthesis)?; self.require(Token::LeftBrace)?; - let body = vec![]; + let expr = self.lexer.next(); + let expr = self.parse_expr(expr); + let body = vec![expr?]; Ok(Expr::FunctionDeclaration { iden, body }) } @@ -115,6 +109,7 @@ impl<'a> Parser<'a> { /// /// `bff [iden] { ... }` fn bff_declaration(&mut self) -> Result { + // TODO: Make it throw error when EOF let iden = self.require(Token::Identifier)?; self.require(Token::LeftBrace)?; let mut code = String::new(); @@ -129,10 +124,9 @@ impl<'a> Parser<'a> { | Token::LeftBracket | Token::RightBracket => self.lexer.slice(), Token::RightBrace => break, - _ => break, + _ => return Err(self.unexpected_token(None)), }); } - self.require(Token::RightBrace)?; Ok(Expr::BfFDeclaration { iden, code }) } } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index cbf8d8c..218b2fa 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -23,13 +23,21 @@ pub fn num2abool(number: i32) -> Abool { impl<'a> Parser<'a> { /// Require type of token as next and return it's value (sometimes irrelevant) pub(super) fn require(&mut self, with: Token) -> Result { - if self.lexer.next() == Some(with) { + if self.lexer.next() == Some(with.clone()) { Ok(self.lexer.slice().to_owned()) } else { - Err(Error { - kind: ErrorKind::SyntaxError("Mysterious parse error".to_owned()), - position: self.lexer.span(), - }) + Err(self.unexpected_token(Some(with))) + } + } + + pub(super) fn unexpected_token(&mut self, expected: Option) -> Error { + Error { + kind: ErrorKind::SyntaxError(format!( + "Unexpected token: `{}` (required: `{:?}`)", + self.lexer.slice(), + expected + )), + position: self.lexer.span(), } } } diff --git a/src/tokens.rs b/src/tokens.rs index 8b458c5..fcc822a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,23 +1,25 @@ -use logos::Logos; +use logos::{Lexer, Logos}; -#[derive(Logos, Debug, PartialEq)] +use crate::variables::Abool; + +#[derive(Logos, Debug, PartialEq, Clone)] pub enum Token { // Literals /// True, False - #[regex("true|false")] - Boolean, + #[regex("true|false", get_bool)] + Boolean(bool), /// Always, Sometimes, Never - #[regex("always|sometimes|never")] - Aboolean, + #[regex("always|sometimes|never", get_abool)] + Aboolean(Abool), /// String - #[regex("\"(\\.|[^\"])*\"")] - String, + #[regex("\"(\\.|[^\"])*\"", get_string)] + String(String), /// Integer - #[regex(r"[0-9]+")] - Integer, + #[regex(r"[0-9]+", get_int)] + Integer(i32), /// A C-complaint identifier #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")] @@ -122,3 +124,24 @@ pub enum Token { #[error] Error, } + +fn get_bool(lexer: &mut Lexer) -> Option { + lexer.slice().parse().ok() +} + +fn get_int(lexer: &mut Lexer) -> Option { + lexer.slice().parse().ok() +} + +fn get_string(lexer: &mut Lexer) -> String { + lexer.slice().to_owned() +} + +fn get_abool(lexer: &mut Lexer) -> Option { + match lexer.slice() { + "always" => Some(Abool::Always), + "sometimes" => Some(Abool::Sometimes), + "never" => Some(Abool::Never), + _ => None, + } +} From d2160a3a4a794d7e3f3bbbfe70cad6a1b956a000 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 27 Apr 2021 11:09:19 +0200 Subject: [PATCH 3/6] Added testing for base55, new identifier lexing --- src/base_55.rs | 9 +++++++++ src/error.rs | 1 + src/parser/mod.rs | 6 +++--- src/parser/utils.rs | 11 +++++++++++ src/tokens.rs | 46 ++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 67 insertions(+), 6 deletions(-) diff --git a/src/base_55.rs b/src/base_55.rs index 422eb1b..2efa1c3 100644 --- a/src/base_55.rs +++ b/src/base_55.rs @@ -122,3 +122,12 @@ pub fn num2char(number: i32) -> char { _ => ' ', } } + +#[cfg(test)] +mod tests { + use super::*; + #[test] fn str_to_base55() { + let chrs: Vec = "AbleScript".chars().map(char2num).collect(); + assert_eq!(chrs, &[-1, 2, 12, 5, -19, 3, 18, 9, 16, 20]); + } +} \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index 1ae42e8..a5188eb 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,7 @@ pub struct Error { pub enum ErrorKind { SyntaxError(String), EndOfTokenStream, + InvalidIdentifier, } impl Error { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5f7b08b..826b25c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -68,7 +68,7 @@ impl<'a> Parser<'a> { /// /// `var [iden] = [literal];` fn variable_declaration(&mut self) -> Result { - let iden = self.require(Token::Identifier)?; + let iden = self.require_iden()?; let init = match self.lexer.next() { Some(Token::Semicolon) => None, @@ -93,7 +93,7 @@ impl<'a> Parser<'a> { /// /// `functio [iden] ([expr], [expr]) { ... } fn function_declaration(&mut self) -> Result { - let iden = self.require(Token::Identifier)?; + let iden = self.require_iden()?; self.require(Token::LeftParenthesis)?; // TODO: Arguments self.require(Token::RightParenthesis)?; @@ -110,7 +110,7 @@ impl<'a> Parser<'a> { /// `bff [iden] { ... }` fn bff_declaration(&mut self) -> Result { // TODO: Make it throw error when EOF - let iden = self.require(Token::Identifier)?; + let iden = self.require_iden()?; self.require(Token::LeftBrace)?; let mut code = String::new(); while let Some(token) = self.lexer.next() { diff --git a/src/parser/utils.rs b/src/parser/utils.rs index 218b2fa..90b6138 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -30,6 +30,17 @@ impl<'a> Parser<'a> { } } + pub(super) fn require_iden(&mut self) -> Result { + if let Some(Token::Identifier(id)) = self.lexer.next() { + Ok(id) + } else { + Err(Error { + kind: ErrorKind::InvalidIdentifier, + position: self.lexer.span(), + }) + } + } + pub(super) fn unexpected_token(&mut self, expected: Option) -> Error { Error { kind: ErrorKind::SyntaxError(format!( diff --git a/src/tokens.rs b/src/tokens.rs index fcc822a..6ee599a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -22,8 +22,8 @@ pub enum Token { Integer(i32), /// A C-complaint identifier - #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")] - Identifier, + #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*", get_iden)] + Identifier(String), #[token("(")] LeftParenthesis, @@ -134,7 +134,7 @@ fn get_int(lexer: &mut Lexer) -> Option { } fn get_string(lexer: &mut Lexer) -> String { - lexer.slice().to_owned() + lexer.slice().trim_matches('"').to_owned() } fn get_abool(lexer: &mut Lexer) -> Option { @@ -145,3 +145,43 @@ fn get_abool(lexer: &mut Lexer) -> Option { _ => None, } } + +fn get_iden(lexer: &mut Lexer) -> String { + lexer.slice().to_owned() +} + +#[cfg(test)] +mod tests { + use super::Token; + use super::Token::*; + use logos::Logos; + + #[test] + fn simple_fn() { + let code = "functio test() { var a = 3; if a == 3 { a print } }"; + let expected = &[ + Function, + Identifier("test".to_owned()), + LeftParenthesis, + RightParenthesis, + LeftBrace, + Variable, + Identifier("a".to_owned()), + Assignment, + Integer(3), + Semicolon, + If, + Identifier("a".to_owned()), + OpEq, + Integer(3), + LeftBrace, + Identifier("a".to_owned()), + Print, + RightBrace, + RightBrace, + ]; + let lexer = Token::lexer(code); + let result: Vec = lexer.collect(); + assert_eq!(result, expected); + } +} \ No newline at end of file From 39a8bf6a54047a91458117e9c60fd3103634ef2b Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 27 Apr 2021 11:49:07 +0200 Subject: [PATCH 4/6] Redone original parser = implemented original features --- able-script-test/parse_test.able | 6 ++++-- src/error.rs | 9 +-------- src/parser/mod.rs | 13 ++++++++----- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/able-script-test/parse_test.able b/able-script-test/parse_test.able index bfab6c5..efe7b0e 100644 --- a/able-script-test/parse_test.able +++ b/able-script-test/parse_test.able @@ -1,2 +1,4 @@ -var a = 3; -var b = 4; \ No newline at end of file +functio test () { + var a = 3; + var b = 4; +} \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index a5188eb..0d725ac 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,11 +11,4 @@ pub enum ErrorKind { SyntaxError(String), EndOfTokenStream, InvalidIdentifier, -} - -impl Error { - pub fn panic(&self, span: &str) { - println!("{:?} occured at {:?}", self.kind, self.position); - println!(" {}", &span); - } -} +} \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 826b25c..f4970c5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -74,7 +74,7 @@ impl<'a> Parser<'a> { Some(Token::Semicolon) => None, Some(Token::Assignment) => { let value = self.lexer.next(); - let value = self.parse_expr(value)?; // TODO: Shouldn't be limited to boolean (pattern match?) + let value = self.parse_expr(value)?; self.require(Token::Semicolon)?; Some(Box::new(value)) } @@ -95,12 +95,15 @@ impl<'a> Parser<'a> { fn function_declaration(&mut self) -> Result { let iden = self.require_iden()?; self.require(Token::LeftParenthesis)?; - // TODO: Arguments self.require(Token::RightParenthesis)?; + self.require(Token::LeftBrace)?; - let expr = self.lexer.next(); - let expr = self.parse_expr(expr); - let body = vec![expr?]; + // Parse function body + let mut body = Vec::new(); + while let Some(token) = self.lexer.next() { + if token == Token::RightBrace { break } + body.push(self.parse_expr(Some(token))?); + } Ok(Expr::FunctionDeclaration { iden, body }) } From 3f9e6b72cc3d90d961d7d783cf5b2e9af293be2e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 27 Apr 2021 11:57:11 +0200 Subject: [PATCH 5/6] Made parser to throw error when unexpected EOF --- able-script-test/parse_test.able | 5 +--- src/parser/item.rs | 2 +- src/parser/mod.rs | 43 ++++++++++++++++++++++++++------ src/variables.rs | 20 +-------------- 4 files changed, 39 insertions(+), 31 deletions(-) diff --git a/able-script-test/parse_test.able b/able-script-test/parse_test.able index efe7b0e..9597bd1 100644 --- a/able-script-test/parse_test.able +++ b/able-script-test/parse_test.able @@ -1,4 +1 @@ -functio test () { - var a = 3; - var b = 4; -} \ No newline at end of file +bff a {+++<<>>>[]]]][[]]} \ No newline at end of file diff --git a/src/parser/item.rs b/src/parser/item.rs index 39e2fbf..d5f670f 100644 --- a/src/parser/item.rs +++ b/src/parser/item.rs @@ -12,7 +12,7 @@ pub enum Expr { }, BfFDeclaration { iden: String, - code: String, + body: String, }, Literal(Value), } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f4970c5..57cd844 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -100,8 +100,22 @@ impl<'a> Parser<'a> { self.require(Token::LeftBrace)?; // Parse function body let mut body = Vec::new(); - while let Some(token) = self.lexer.next() { - if token == Token::RightBrace { break } + loop { + let token = { + match self.lexer.next() { + Some(t) => t, + None => { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }) + } + } + }; + + if token == Token::RightBrace { + break; + } body.push(self.parse_expr(Some(token))?); } @@ -112,12 +126,27 @@ impl<'a> Parser<'a> { /// /// `bff [iden] { ... }` fn bff_declaration(&mut self) -> Result { - // TODO: Make it throw error when EOF let iden = self.require_iden()?; self.require(Token::LeftBrace)?; - let mut code = String::new(); - while let Some(token) = self.lexer.next() { - code.push_str(match token { + + let mut body = String::new(); + loop { + let token = { + match self.lexer.next() { + Some(t) => t, + None => { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }) + } + } + }; + + if token == Token::RightBrace { + break; + } + body.push_str(match token { Token::OpGt | Token::OpLt | Token::Addition @@ -130,6 +159,6 @@ impl<'a> Parser<'a> { _ => return Err(self.unexpected_token(None)), }); } - Ok(Expr::BfFDeclaration { iden, code }) + Ok(Expr::BfFDeclaration { iden, body }) } } diff --git a/src/variables.rs b/src/variables.rs index b3020a1..4bfed39 100644 --- a/src/variables.rs +++ b/src/variables.rs @@ -1,5 +1,4 @@ use rand::Rng; -use std::collections::HashMap; #[derive(Debug, Clone, PartialEq)] pub enum Abool { @@ -30,21 +29,4 @@ pub enum Value { pub struct Variable { melo: bool, value: Value, -} -pub fn test() { - let mut map = HashMap::new(); - let a = Variable { - melo: false, - value: Value::Str("1".to_string()), - }; - let b = Variable { - melo: false, - value: Value::Int(2), - }; - map.insert("a", a); - map.insert("b", b); - - for (key, value) in &map { - println!("{}: {:?}", key, value); - } -} +} \ No newline at end of file From f3779deeb54d77892de70e33132da729c1adac4a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 27 Apr 2021 13:48:56 +0200 Subject: [PATCH 6/6] Added parsing of conditionals --- able-script-test/parse_test.able | 4 +- src/base_55.rs | 5 ++- src/error.rs | 2 +- src/parser/item.rs | 9 +++++ src/parser/mod.rs | 54 ++++++++++++++++---------- src/parser/utils.rs | 25 +++++++++++- src/tokens.rs | 66 ++++++++++++++++---------------- src/variables.rs | 2 +- 8 files changed, 108 insertions(+), 59 deletions(-) diff --git a/able-script-test/parse_test.able b/able-script-test/parse_test.able index 9597bd1..6d7ceca 100644 --- a/able-script-test/parse_test.able +++ b/able-script-test/parse_test.able @@ -1 +1,3 @@ -bff a {+++<<>>>[]]]][[]]} \ No newline at end of file +if (true) { + var a = 3; +} \ No newline at end of file diff --git a/src/base_55.rs b/src/base_55.rs index 2efa1c3..bb9d5ce 100644 --- a/src/base_55.rs +++ b/src/base_55.rs @@ -126,8 +126,9 @@ pub fn num2char(number: i32) -> char { #[cfg(test)] mod tests { use super::*; - #[test] fn str_to_base55() { + #[test] + fn str_to_base55() { let chrs: Vec = "AbleScript".chars().map(char2num).collect(); assert_eq!(chrs, &[-1, 2, 12, 5, -19, 3, 18, 9, 16, 20]); } -} \ No newline at end of file +} diff --git a/src/error.rs b/src/error.rs index 0d725ac..1b3ac84 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,4 +11,4 @@ pub enum ErrorKind { SyntaxError(String), EndOfTokenStream, InvalidIdentifier, -} \ No newline at end of file +} diff --git a/src/parser/item.rs b/src/parser/item.rs index d5f670f..46616dc 100644 --- a/src/parser/item.rs +++ b/src/parser/item.rs @@ -1,5 +1,8 @@ use crate::variables::Value; +#[derive(Debug, Clone)] +pub struct Iden(pub String); + #[derive(Debug, Clone)] pub enum Expr { VariableDeclaration { @@ -14,5 +17,11 @@ pub enum Expr { iden: String, body: String, }, + If { + cond: Box, + body: Vec, + }, + Literal(Value), + Melo(Iden), } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 57cd844..36a44c0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3,11 +3,11 @@ mod utils; use item::Expr; -use crate::tokens::Token; use crate::{ error::{Error, ErrorKind}, variables::Value, }; +use crate::{parser::item::Iden, tokens::Token}; use logos::Logos; @@ -50,13 +50,28 @@ impl<'a> Parser<'a> { let start = self.lexer.span().start; match token { + // Control flow + Token::If => self.if_cond(), + + // Declarations Token::Variable => self.variable_declaration(), Token::Function => self.function_declaration(), Token::BfFunction => self.bff_declaration(), + + // Literals Token::String(x) => Ok(Expr::Literal(Value::Str(x))), Token::Integer(x) => Ok(Expr::Literal(Value::Int(x))), Token::Boolean(x) => Ok(Expr::Literal(Value::Bool(x))), Token::Aboolean(x) => Ok(Expr::Literal(Value::Abool(x))), + + // Prefix keywords + // Melo - ban variable from next usage (runtime error) + Token::Melo => { + let e = self.require_iden()?; + self.require(Token::Semicolon)?; + Ok(Expr::Melo(Iden(e))) + } + _ => Err(Error { kind: ErrorKind::SyntaxError("Unexpected identifier".to_owned()), position: start..self.lexer.span().end, @@ -99,25 +114,7 @@ impl<'a> Parser<'a> { self.require(Token::LeftBrace)?; // Parse function body - let mut body = Vec::new(); - loop { - let token = { - match self.lexer.next() { - Some(t) => t, - None => { - return Err(Error { - kind: ErrorKind::EndOfTokenStream, - position: self.lexer.span(), - }) - } - } - }; - - if token == Token::RightBrace { - break; - } - body.push(self.parse_expr(Some(token))?); - } + let body = self.parse_body()?; Ok(Expr::FunctionDeclaration { iden, body }) } @@ -161,4 +158,21 @@ impl<'a> Parser<'a> { } Ok(Expr::BfFDeclaration { iden, body }) } + + /// Parse If-expression + pub fn if_cond(&mut self) -> Result { + self.require(Token::LeftParenthesis)?; + let cond = self.lexer.next(); + let cond = self.parse_expr(cond)?; + self.require(Token::RightParenthesis)?; + + self.require(Token::LeftBrace)?; + + let body = self.parse_body()?; + + Ok(Expr::If { + cond: Box::new(cond), + body, + }) + } } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index 90b6138..42c2b63 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -2,7 +2,7 @@ use crate::error::{Error, ErrorKind}; use crate::tokens::Token; use crate::variables::Abool; -use super::Parser; +use super::{item::Expr, Parser}; pub fn abool2num(abool: Abool) -> i32 { match abool { @@ -51,4 +51,27 @@ impl<'a> Parser<'a> { position: self.lexer.span(), } } + + pub(super) fn parse_body(&mut self) -> Result, Error> { + let mut body = Vec::new(); + loop { + let token = { + match self.lexer.next() { + Some(t) => t, + None => { + return Err(Error { + kind: ErrorKind::EndOfTokenStream, + position: self.lexer.span(), + }) + } + } + }; + + if token == Token::RightBrace { + break; + } + body.push(self.parse_expr(Some(token))?); + } + Ok(body) + } } diff --git a/src/tokens.rs b/src/tokens.rs index 6ee599a..b21c4d5 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -4,6 +4,38 @@ use crate::variables::Abool; #[derive(Logos, Debug, PartialEq, Clone)] pub enum Token { + #[token("functio")] + Function, + + /// Brain fuck FFI + #[token("bff")] + BfFunction, + + /// Variable bro + #[token("var")] + Variable, + + /// Prints the preceding things + #[token("print")] + Print, + + /// Ban the following variable from ever being used again + #[token("melo")] + Melo, + + #[token("T-Dark")] + TDark, + + // Expressions + #[token("if")] + If, + + #[token("else")] + Else, + + #[token("loop")] + Loop, + // Literals /// True, False #[regex("true|false", get_bool)] @@ -88,38 +120,6 @@ pub enum Token { #[token("'.*'")] Char, - #[token("functio")] - Function, - - /// Brain fuck FFI - #[token("bff")] - BfFunction, - - /// Variable bro - #[token("var")] - Variable, - - /// Prints the preceding things - #[token("print")] - Print, - - /// Ban the following variable from ever being used again - #[token("melo")] - Melo, - - #[token("T-Dark")] - TDark, - - // Expressions - #[token("if")] - If, - - #[token("else")] - Else, - - #[token("loop")] - Loop, - #[regex(r"[ \t\n\f]+", logos::skip)] #[error] Error, @@ -184,4 +184,4 @@ mod tests { let result: Vec = lexer.collect(); assert_eq!(result, expected); } -} \ No newline at end of file +} diff --git a/src/variables.rs b/src/variables.rs index 4bfed39..cb476b9 100644 --- a/src/variables.rs +++ b/src/variables.rs @@ -29,4 +29,4 @@ pub enum Value { pub struct Variable { melo: bool, value: Value, -} \ No newline at end of file +}