From 83c549607cacb9905430686a6664e3137c75a700 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 27 Apr 2021 10:51:39 +0200 Subject: [PATCH] Continue work on parser, improved lexer - Added literal parsing (improved lexing) - Revised error handling --- able-script-test/parse_test.able | 12 +----- src/parser/item.rs | 18 +++++++-- src/parser/mod.rs | 64 +++++++++++++++----------------- src/parser/utils.rs | 18 ++++++--- src/tokens.rs | 43 ++++++++++++++++----- 5 files changed, 92 insertions(+), 63 deletions(-) diff --git a/able-script-test/parse_test.able b/able-script-test/parse_test.able index 72786dc..bfab6c5 100644 --- a/able-script-test/parse_test.able +++ b/able-script-test/parse_test.able @@ -1,10 +1,2 @@ -functio test() { - functio nested() { - var c = false; - } - var a = true; -} - -functio another() { - var b = false; -} \ No newline at end of file +var a = 3; +var b = 4; \ No newline at end of file diff --git a/src/parser/item.rs b/src/parser/item.rs index af6af74..39e2fbf 100644 --- a/src/parser/item.rs +++ b/src/parser/item.rs @@ -1,6 +1,18 @@ +use crate::variables::Value; + #[derive(Debug, Clone)] pub enum Expr { - VariableDeclaration { iden: String, init: Option }, - FunctionDeclaration { iden: String, body: Vec }, - BfFDeclaration { iden: String, code: String }, + VariableDeclaration { + iden: String, + init: Option>, + }, + FunctionDeclaration { + iden: String, + body: Vec, + }, + BfFDeclaration { + iden: String, + code: String, + }, + Literal(Value), } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d1d432e..5f7b08b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3,8 +3,11 @@ mod utils; use item::Expr; -use crate::error::{Error, ErrorKind}; use crate::tokens::Token; +use crate::{ + error::{Error, ErrorKind}, + variables::Value, +}; use logos::Logos; @@ -29,12 +32,13 @@ impl<'a> Parser<'a> { if token.is_none() { return Ok(self.ast.clone()); }; - let expr = self.parse_expr(&token)?; + + let expr = self.parse_expr(token)?; self.ast.push(expr); } } - fn parse_expr(&mut self, token: &Option) -> Result { + fn parse_expr(&mut self, token: Option) -> Result { if matches!(token, None) { return Err(Error { kind: ErrorKind::EndOfTokenStream, @@ -42,36 +46,23 @@ impl<'a> Parser<'a> { }); } - Ok(todo!()) - } + let token = token.unwrap(); + let start = self.lexer.span().start; - /* - /// Start parsing Token Vector into Abstract Syntax Tree - pub fn parse(&mut self) -> Vec { - let mut ast = vec![]; - while let Some(token) = self.lexer.next() { - let expr = match token { - Token::Variable => self.variable_declaration(), - Token::Function => self.function_declaration(), - Token::BfFunction => self.bff_declaration(), - Token::RightBrace => return ast, - _ => Err(Error { - kind: ErrorKind::SyntaxError, - position: 0..0, - }), - }; - match expr { - Ok(o) => ast.push(o), - Err(e) => { - e.panic(self.lexer.slice()); - break; - } - } + match token { + Token::Variable => self.variable_declaration(), + Token::Function => self.function_declaration(), + Token::BfFunction => self.bff_declaration(), + Token::String(x) => Ok(Expr::Literal(Value::Str(x))), + Token::Integer(x) => Ok(Expr::Literal(Value::Int(x))), + Token::Boolean(x) => Ok(Expr::Literal(Value::Bool(x))), + Token::Aboolean(x) => Ok(Expr::Literal(Value::Abool(x))), + _ => Err(Error { + kind: ErrorKind::SyntaxError("Unexpected identifier".to_owned()), + position: start..self.lexer.span().end, + }), } - - ast } - */ /// Parse variable declaration /// @@ -82,9 +73,10 @@ impl<'a> Parser<'a> { let init = match self.lexer.next() { Some(Token::Semicolon) => None, Some(Token::Assignment) => { - let value = self.require(Token::Boolean)?; // TODO: Shouldn't be limited to boolean (pattern match?) + let value = self.lexer.next(); + let value = self.parse_expr(value)?; // TODO: Shouldn't be limited to boolean (pattern match?) self.require(Token::Semicolon)?; - Some(value) + Some(Box::new(value)) } _ => { return Err(Error { @@ -106,7 +98,9 @@ impl<'a> Parser<'a> { // TODO: Arguments self.require(Token::RightParenthesis)?; self.require(Token::LeftBrace)?; - let body = vec![]; + let expr = self.lexer.next(); + let expr = self.parse_expr(expr); + let body = vec![expr?]; Ok(Expr::FunctionDeclaration { iden, body }) } @@ -115,6 +109,7 @@ impl<'a> Parser<'a> { /// /// `bff [iden] { ... }` fn bff_declaration(&mut self) -> Result { + // TODO: Make it throw error when EOF let iden = self.require(Token::Identifier)?; self.require(Token::LeftBrace)?; let mut code = String::new(); @@ -129,10 +124,9 @@ impl<'a> Parser<'a> { | Token::LeftBracket | Token::RightBracket => self.lexer.slice(), Token::RightBrace => break, - _ => break, + _ => return Err(self.unexpected_token(None)), }); } - self.require(Token::RightBrace)?; Ok(Expr::BfFDeclaration { iden, code }) } } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index cbf8d8c..218b2fa 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -23,13 +23,21 @@ pub fn num2abool(number: i32) -> Abool { impl<'a> Parser<'a> { /// Require type of token as next and return it's value (sometimes irrelevant) pub(super) fn require(&mut self, with: Token) -> Result { - if self.lexer.next() == Some(with) { + if self.lexer.next() == Some(with.clone()) { Ok(self.lexer.slice().to_owned()) } else { - Err(Error { - kind: ErrorKind::SyntaxError("Mysterious parse error".to_owned()), - position: self.lexer.span(), - }) + Err(self.unexpected_token(Some(with))) + } + } + + pub(super) fn unexpected_token(&mut self, expected: Option) -> Error { + Error { + kind: ErrorKind::SyntaxError(format!( + "Unexpected token: `{}` (required: `{:?}`)", + self.lexer.slice(), + expected + )), + position: self.lexer.span(), } } } diff --git a/src/tokens.rs b/src/tokens.rs index 8b458c5..fcc822a 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,23 +1,25 @@ -use logos::Logos; +use logos::{Lexer, Logos}; -#[derive(Logos, Debug, PartialEq)] +use crate::variables::Abool; + +#[derive(Logos, Debug, PartialEq, Clone)] pub enum Token { // Literals /// True, False - #[regex("true|false")] - Boolean, + #[regex("true|false", get_bool)] + Boolean(bool), /// Always, Sometimes, Never - #[regex("always|sometimes|never")] - Aboolean, + #[regex("always|sometimes|never", get_abool)] + Aboolean(Abool), /// String - #[regex("\"(\\.|[^\"])*\"")] - String, + #[regex("\"(\\.|[^\"])*\"", get_string)] + String(String), /// Integer - #[regex(r"[0-9]+")] - Integer, + #[regex(r"[0-9]+", get_int)] + Integer(i32), /// A C-complaint identifier #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")] @@ -122,3 +124,24 @@ pub enum Token { #[error] Error, } + +fn get_bool(lexer: &mut Lexer) -> Option { + lexer.slice().parse().ok() +} + +fn get_int(lexer: &mut Lexer) -> Option { + lexer.slice().parse().ok() +} + +fn get_string(lexer: &mut Lexer) -> String { + lexer.slice().to_owned() +} + +fn get_abool(lexer: &mut Lexer) -> Option { + match lexer.slice() { + "always" => Some(Abool::Always), + "sometimes" => Some(Abool::Sometimes), + "never" => Some(Abool::Never), + _ => None, + } +}