rhea/src/syntax/token.rs

113 lines
2.6 KiB
Rust

use lasso::Spur;
use logos::Lexer;
use {lasso::Rodeo, logos::Logos};
#[derive(Default)]
pub struct Lextras {
pub interner: Rodeo,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum IntLit {
Signed(i64),
Unsigned(u64),
}
#[derive(Logos, Copy, Clone, Debug, PartialEq, Eq)]
#[logos(extras = Lextras)]
#[logos(skip r"[ \t\n\f]+")]
#[logos(skip r"-- .*")]
#[rustfmt::skip]
pub enum Token {
#[token("(")] LeftParen,
#[token(")")] RightParen,
#[token("{")] LeftCurly,
#[token("}")] RightCurly,
#[token(".")] Dot,
#[token(",")] Comma,
#[token(":")] Colon,
#[token(";")] Semicolon,
#[token("_")] Underscore,
#[token("")] //____
#[token("<-")] LArrow,
#[token("")] //____
#[token("->")] RArrow,
#[token(":>")] Pipe,
#[token("+")] Plus,
#[token("-")] Minus,
#[token("*")] Star,
#[token("/")] Slash,
#[token("&")] And,
#[token("|")] VLine,
#[token("~")] Tilde,
#[token("<")] Lt,
#[token(">")] Gt,
#[token("=")] Equ,
#[token("") ] //__
#[token("/=")] Nequ,
#[token("") ] //___
#[token("<=")] LtEqu,
#[token("") ] //___,
#[token(">=")] GtEqu,
#[token("match")] Match,
#[token("else")] Else,
#[token("loop")] Loop,
#[token("const")] Const,
#[token("var")] Var,
#[token("func")] Func,
// Modules aren't real here ondra just variables with imported functions
#[token("module")] Module,
#[regex(
r"\p{XID_Start}\p{XID_Continue}*",
|l| l.extras.interner.get_or_intern(l.slice())
)] Ident(Spur),
#[token("»", better_string)]
#[regex(
"\"[^\"]*\"",
|l| {
let slice = l.slice();
l.extras.interner.get_or_intern(&slice[1..slice.len() - 1])
}
)] String(Spur),
#[regex(
"-?[0-9]+",
|l| {
Some(if let Some(slice) = l.slice().strip_prefix('-') {
IntLit::Signed(slice.parse::<i64>().ok()?)
} else {
IntLit::Unsigned(l.slice().parse::<u64>().ok()?)
})
}
)] Int(IntLit),
Invalid,
}
// For Evy, with love.
fn better_string(lexer: &mut Lexer<Token>) -> Option<Spur> {
let mut count = 1;
for (ix, chr) in lexer.remainder().char_indices() {
match chr {
'«' => count -= 1,
'»' => count += 1,
_ => (),
}
if count == 0 {
let slice = &lexer.remainder()[..ix];
lexer.bump(ix + '«'.len_utf8());
return Some(lexer.extras.interner.get_or_intern(slice));
}
}
None
}