2021-06-06 18:28:13 +00:00
|
|
|
use logos::{Lexer, Logos};
|
2021-04-12 18:20:45 +00:00
|
|
|
|
2022-09-14 19:46:24 +00:00
|
|
|
#[derive(Logos, Debug, PartialEq, Eq, Clone)]
|
2023-05-18 18:33:58 +00:00
|
|
|
#[logos(skip r"[ \t\n\f]+")]
|
|
|
|
#[logos(skip r"owo .*")]
|
2023-01-04 14:30:03 +00:00
|
|
|
#[rustfmt::skip]
|
2021-06-06 18:28:13 +00:00
|
|
|
pub enum Token {
|
|
|
|
// Symbols
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("(")] LeftParen,
|
|
|
|
#[token(")")] RightParen,
|
|
|
|
#[token("[")] LeftBracket,
|
|
|
|
#[token("]")] RightBracket,
|
|
|
|
#[token("{")] LeftCurly,
|
|
|
|
#[token("}")] RightCurly,
|
|
|
|
#[token(";")] Semicolon,
|
|
|
|
#[token(",")] Comma,
|
2021-06-06 18:28:13 +00:00
|
|
|
|
|
|
|
// Operators
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("+")] Plus,
|
|
|
|
#[token("-")] Minus,
|
|
|
|
#[token("*")] Star,
|
|
|
|
#[token("/")] FwdSlash,
|
|
|
|
#[token("=:")] Assign,
|
|
|
|
#[token("<=")] Arrow,
|
2021-07-27 09:52:43 +00:00
|
|
|
|
2021-06-06 18:28:13 +00:00
|
|
|
// Logical operators
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("<")] LessThan,
|
|
|
|
#[token(">")] GreaterThan,
|
|
|
|
#[token("=")] Equals,
|
|
|
|
#[token("ain't")] Aint,
|
2021-06-06 18:28:13 +00:00
|
|
|
|
|
|
|
// Keywords
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("functio")] Functio,
|
|
|
|
#[token("bff")] Bff,
|
|
|
|
#[token("dim")] Dim,
|
|
|
|
#[token("print")] Print,
|
|
|
|
#[token("read")] Read,
|
|
|
|
#[token("melo")] Melo,
|
|
|
|
#[token("T-Dark")] TDark,
|
2021-04-27 11:48:56 +00:00
|
|
|
|
2021-06-06 18:28:13 +00:00
|
|
|
// Control flow keywords
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("unless")] Unless,
|
|
|
|
#[token("loop")] Loop,
|
|
|
|
#[token("enough")] Enough,
|
|
|
|
#[token("and again")] AndAgain,
|
|
|
|
#[token("finally")] Finally,
|
|
|
|
#[token("rlyeh")] Rlyeh,
|
2021-04-27 11:48:56 +00:00
|
|
|
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("rickroll")] Rickroll,
|
2021-06-13 05:11:02 +00:00
|
|
|
|
2021-04-18 20:33:55 +00:00
|
|
|
// Literals
|
2023-01-04 14:30:03 +00:00
|
|
|
#[token("/*", get_string)] String(String),
|
|
|
|
#[regex(r"-?[0-9]+", get_value)] Integer(isize),
|
|
|
|
#[regex(r"\p{XID_Start}", get_value)] Char(char),
|
2022-07-02 10:47:16 +00:00
|
|
|
#[regex(r"\p{XID_Start}[\p{XID_Continue}]+", get_ident)]
|
2022-05-17 17:03:02 +00:00
|
|
|
#[token("and ", |_| "and".to_owned())]
|
2021-04-27 09:09:19 +00:00
|
|
|
Identifier(String),
|
2021-04-11 22:22:06 +00:00
|
|
|
}
|
2021-04-27 08:51:39 +00:00
|
|
|
|
2021-10-04 21:03:23 +00:00
|
|
|
fn get_value<T: std::str::FromStr>(lexer: &mut Lexer<Token>) -> Option<T> {
|
2021-04-27 08:51:39 +00:00
|
|
|
lexer.slice().parse().ok()
|
|
|
|
}
|
|
|
|
|
2022-03-13 12:18:51 +00:00
|
|
|
fn get_string(lexer: &mut Lexer<Token>) -> Option<String> {
|
|
|
|
lexer.bump(lexer.remainder().find("*/")?);
|
2022-04-24 21:29:15 +00:00
|
|
|
|
|
|
|
let mut string = String::new();
|
|
|
|
let mut slice = &lexer.slice()[2..];
|
|
|
|
while let Some(escape_start) = slice.find('"') {
|
2022-04-24 21:33:11 +00:00
|
|
|
// Push predeceasing string
|
2022-05-03 21:19:10 +00:00
|
|
|
string.push_str(slice.get(..escape_start)?);
|
2022-04-24 21:33:11 +00:00
|
|
|
|
|
|
|
// Move slice behind escape start delimiter
|
2022-05-03 21:19:10 +00:00
|
|
|
slice = slice.get(escape_start + 1..)?;
|
2022-04-24 21:29:15 +00:00
|
|
|
|
2022-04-24 21:33:11 +00:00
|
|
|
// Get escape end delimiter position and parse string before it to
|
|
|
|
// a character from it's unicode value (base-12) and push it to string
|
2022-04-24 21:29:15 +00:00
|
|
|
let escape_end = slice.find('"')?;
|
|
|
|
string.push(
|
2022-05-03 21:19:10 +00:00
|
|
|
u32::from_str_radix(slice.get(..escape_end)?, 12)
|
2022-04-24 21:29:15 +00:00
|
|
|
.ok()
|
|
|
|
.and_then(char::from_u32)?,
|
|
|
|
);
|
|
|
|
|
2022-04-24 21:33:11 +00:00
|
|
|
// Move slice behind escape end delimiter
|
2022-05-03 21:19:10 +00:00
|
|
|
slice = slice.get(escape_end + 1..)?;
|
2022-04-24 21:29:15 +00:00
|
|
|
}
|
|
|
|
|
2022-04-24 21:33:11 +00:00
|
|
|
// Push remaining string
|
2022-05-03 21:19:10 +00:00
|
|
|
string.push_str(slice);
|
2022-03-13 12:18:51 +00:00
|
|
|
lexer.bump(2);
|
2022-04-02 18:41:59 +00:00
|
|
|
|
2022-03-13 12:18:51 +00:00
|
|
|
Some(string)
|
2021-04-27 08:51:39 +00:00
|
|
|
}
|
|
|
|
|
2021-10-04 21:00:18 +00:00
|
|
|
fn get_ident(lexer: &mut Lexer<Token>) -> String {
|
2021-04-27 09:09:19 +00:00
|
|
|
lexer.slice().to_owned()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::Token;
|
|
|
|
use super::Token::*;
|
|
|
|
use logos::Logos;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn simple_fn() {
|
2022-04-18 19:42:26 +00:00
|
|
|
let code = "functio test() { dim var 3; unless (var ain't 3) { var print } }";
|
2021-04-27 09:09:19 +00:00
|
|
|
let expected = &[
|
2021-06-06 18:28:13 +00:00
|
|
|
Functio,
|
2021-04-27 09:09:19 +00:00
|
|
|
Identifier("test".to_owned()),
|
2021-06-06 22:09:45 +00:00
|
|
|
LeftParen,
|
|
|
|
RightParen,
|
2021-06-06 18:28:13 +00:00
|
|
|
LeftCurly,
|
2022-04-18 18:34:08 +00:00
|
|
|
Dim,
|
|
|
|
Identifier("var".to_owned()),
|
2021-04-27 09:09:19 +00:00
|
|
|
Integer(3),
|
|
|
|
Semicolon,
|
2022-04-18 19:42:26 +00:00
|
|
|
Unless,
|
2022-04-18 18:34:08 +00:00
|
|
|
LeftParen,
|
|
|
|
Identifier("var".to_owned()),
|
2022-04-18 19:42:26 +00:00
|
|
|
Aint,
|
2021-04-27 09:09:19 +00:00
|
|
|
Integer(3),
|
2022-04-18 18:34:08 +00:00
|
|
|
RightParen,
|
2021-06-06 18:28:13 +00:00
|
|
|
LeftCurly,
|
2022-04-18 18:34:08 +00:00
|
|
|
Identifier("var".to_owned()),
|
2021-04-27 09:09:19 +00:00
|
|
|
Print,
|
2021-06-06 18:28:13 +00:00
|
|
|
RightCurly,
|
|
|
|
RightCurly,
|
2021-04-27 09:09:19 +00:00
|
|
|
];
|
2022-04-24 21:47:51 +00:00
|
|
|
|
2023-05-18 18:33:58 +00:00
|
|
|
let result: Vec<_> = Token::lexer(code).collect::<Result<_, _>>().unwrap();
|
2022-04-24 21:47:51 +00:00
|
|
|
assert_eq!(result, expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn escapes() {
|
|
|
|
let code = r#"/*»"720B""722B""7195"«*/"#;
|
|
|
|
let expected = &[Token::String("»にゃぁ«".to_owned())];
|
|
|
|
|
2023-05-18 18:33:58 +00:00
|
|
|
let result: Vec<_> = Token::lexer(code).collect::<Result<_, _>>().unwrap();
|
2021-04-27 09:09:19 +00:00
|
|
|
assert_eq!(result, expected);
|
|
|
|
}
|
2021-04-27 11:48:56 +00:00
|
|
|
}
|