Removed custom Lexer, reorganised Token definition

- As peeking will not be involved in parsing, it was removed
This commit is contained in:
Erin 2021-06-06 20:28:13 +02:00 committed by ondra05
parent 935ba3b791
commit bccf5bc74b

View file

@ -1,69 +1,78 @@
use logos::{Lexer, Logos, Span}; use logos::{Lexer, Logos};
use crate::variables::Abool; use crate::variables::Abool;
pub struct PeekableLexer<'source> {
lexer: Lexer<'source, Token>,
peeked: Option<Option<Token>>,
}
impl<'source> PeekableLexer<'source> {
pub fn lexer(source: &'source str) -> Self {
Self {
lexer: Token::lexer(source),
peeked: None,
}
}
/// Returns a reference to the next() value without advancing the iterator.
#[inline]
pub fn peek(&mut self) -> &Option<Token> {
if self.peeked.is_none() {
self.peeked = Some(self.lexer.next());
}
self.peeked.as_ref().unwrap()
}
/// Get the range for the current token in `Source`.
#[inline]
pub fn span(&self) -> Span {
self.lexer.span()
}
/// Get a string slice of the current token.
#[inline]
pub fn slice(&self) -> &'source str {
self.lexer.slice()
}
/// Get a slice of remaining source, starting at the end of current token.
#[inline]
pub fn remainder(&self) -> &'source str {
self.lexer.remainder()
}
}
impl<'source> Iterator for PeekableLexer<'source> {
type Item = Token;
/// Advances the iterator and returns the next value.
///
/// Returns [`None`] when iteration is finished.
/// Individual iterator implementations may choose to resume iteration, and so calling `next()`
/// again may or may not eventually start returning [`Some(Item)`] again at some point.
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match self.peeked.take() {
Some(v) => v,
None => self.lexer.next(),
}
}
}
#[derive(Logos, Debug, PartialEq, Clone)] #[derive(Logos, Debug, PartialEq, Clone)]
pub enum Token { pub enum Token {
// Symbols
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token("{")]
LeftCurly,
#[token("}")]
RightCurly,
#[token(";")]
Semicolon,
#[token(".")]
Dot,
#[token(",")]
Comma,
// Operators
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("*")]
Star,
#[token("/")]
FwdSlash,
#[token("=")]
Equal,
// Logical operators
#[token("<")]
LessThan,
#[token(">")]
GreaterThan,
#[token("==")]
EqualEqual,
#[token("!=")]
NotEqual,
#[token("&")]
And,
#[token("|")]
Or,
#[token("!|aint")] // also add aint as a not keyword
Not,
// Keywords
#[token("functio")] #[token("functio")]
Function, Functio,
/// Brain fuck FFI /// Brain fuck FFI
#[token("bff")] #[token("bff")]
@ -84,7 +93,7 @@ pub enum Token {
#[token("T-Dark")] #[token("T-Dark")]
TDark, TDark,
// Expressions // Control flow keywords
#[token("if")] #[token("if")]
If, If,
@ -94,17 +103,22 @@ pub enum Token {
#[token("break")] #[token("break")]
Break, Break,
/// HopBack hops on the back of loop - like `continue`
#[token("hopback")] #[token("hopback")]
HopBack, HopBack,
// Literals // Literals
/// True, False /// True, False
#[regex("true|false", get_bool)] #[regex("true|false", get_bool)]
Boolean(bool), Bool(bool),
/// Always, Sometimes, Never /// Always, Sometimes, Never
#[regex("always|sometimes|never", get_abool)] #[regex("always|sometimes|never", get_abool)]
Aboolean(Abool), Abool(Abool),
/// Base52 based character ('a')
#[token("'.*'")]
Char,
/// String /// String
#[regex("\"(\\.|[^\"])*\"", get_string)] #[regex("\"(\\.|[^\"])*\"", get_string)]
@ -118,80 +132,11 @@ pub enum Token {
#[regex(r"[a-zA-Z_][a-zA-Z_0-9]*", get_iden)] #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*", get_iden)]
Identifier(String), Identifier(String),
#[regex("nul")]
Nul,
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token("{")]
LeftBrace,
#[token("}")]
RightBrace,
#[token(";")]
Semicolon,
#[token(".")]
FullStop,
#[token(",")]
Comma,
#[regex(r"owo.*")] #[regex(r"owo.*")]
Comment, Comment,
// Operators #[regex("nul")]
#[token("-")] Nul,
Subtract,
#[token("+")]
Addition,
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("=")]
Assignment,
// Logical operators
#[token("<")]
OpLt,
#[token(">")]
OpGt,
#[token("==")]
OpEq,
#[token("!=")]
OpNeq,
#[token("&")]
LogAnd,
#[token("|")]
LogOr,
#[token("!|aint")] // also add aint as a not keyword
LogNot,
/// Base52 based character ('a')
#[token("'.*'")]
Char,
#[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"[ \t\n\f]+", logos::skip)]
#[error] #[error]
@ -233,25 +178,25 @@ mod tests {
fn simple_fn() { fn simple_fn() {
let code = "functio test() { var a = 3; if a == 3 { a print } }"; let code = "functio test() { var a = 3; if a == 3 { a print } }";
let expected = &[ let expected = &[
Function, Functio,
Identifier("test".to_owned()), Identifier("test".to_owned()),
LeftParenthesis, LeftParenthesis,
RightParenthesis, RightParenthesis,
LeftBrace, LeftCurly,
Variable, Variable,
Identifier("a".to_owned()), Identifier("a".to_owned()),
Assignment, Equal,
Integer(3), Integer(3),
Semicolon, Semicolon,
If, If,
Identifier("a".to_owned()), Identifier("a".to_owned()),
OpEq, EqualEqual,
Integer(3), Integer(3),
LeftBrace, LeftCurly,
Identifier("a".to_owned()), Identifier("a".to_owned()),
Print, Print,
RightBrace, RightCurly,
RightBrace, RightCurly,
]; ];
let lexer = Token::lexer(code); let lexer = Token::lexer(code);
let result: Vec<Token> = lexer.collect(); let result: Vec<Token> = lexer.collect();