2022-03-06 09:04:48 -06:00
|
|
|
use chumsky::prelude::*;
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
|
|
|
pub enum Token {
|
|
|
|
// Keywords
|
|
|
|
KwLet, KwFun,
|
|
|
|
KwDo, KwEnd,
|
|
|
|
KwIf, KwThen, KwElse,
|
2022-03-06 11:20:18 -06:00
|
|
|
KwReturn,
|
2022-03-06 09:04:48 -06:00
|
|
|
|
|
|
|
// Literals
|
2022-03-07 02:15:43 -06:00
|
|
|
Int(i64), Boolean(bool),
|
2022-03-06 09:04:48 -06:00
|
|
|
String(String), Identifier(String),
|
|
|
|
|
|
|
|
// Operators
|
2022-03-15 19:36:39 -05:00
|
|
|
Plus, Minus, Multiply, Divide, Modulus,
|
2022-03-06 09:04:48 -06:00
|
|
|
Not, Equal, NotEqual, Less, Greater,
|
2022-03-12 16:00:42 -06:00
|
|
|
Pipe,
|
2022-03-11 19:37:47 -06:00
|
|
|
|
2022-03-06 09:04:48 -06:00
|
|
|
// Symbols & Delimiters
|
|
|
|
Assign,
|
|
|
|
Dot, Comma,
|
|
|
|
Colon, SemiColon,
|
|
|
|
OpenParen, CloseParen,
|
2022-03-11 19:37:47 -06:00
|
|
|
At,
|
2022-03-12 23:03:07 -06:00
|
|
|
Hole,
|
2022-03-06 09:04:48 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for Token {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
|
|
match self {
|
|
|
|
Token::KwLet => write!(f, "let"),
|
|
|
|
Token::KwFun => write!(f, "fun"),
|
|
|
|
Token::KwDo => write!(f, "do"),
|
|
|
|
Token::KwEnd => write!(f, "end"),
|
|
|
|
Token::KwIf => write!(f, "if"),
|
|
|
|
Token::KwThen => write!(f, "then"),
|
|
|
|
Token::KwElse => write!(f, "else"),
|
2022-03-06 11:20:18 -06:00
|
|
|
Token::KwReturn => write!(f, "return"),
|
2022-03-06 09:04:48 -06:00
|
|
|
|
|
|
|
Token::Int(i) => write!(f, "{}", i),
|
|
|
|
Token::Boolean(b) => write!(f, "{}", b),
|
|
|
|
Token::String(s) => write!(f, "{}", s),
|
|
|
|
Token::Identifier(s) => write!(f, "{}", s),
|
|
|
|
|
|
|
|
Token::Plus => write!(f, "+"),
|
|
|
|
Token::Minus => write!(f, "-"),
|
|
|
|
Token::Multiply => write!(f, "*"),
|
|
|
|
Token::Divide => write!(f, "/"),
|
2022-03-15 19:36:39 -05:00
|
|
|
Token::Modulus => write!(f, "%"),
|
2022-03-06 09:04:48 -06:00
|
|
|
Token::Not => write!(f, "!"),
|
|
|
|
Token::Equal => write!(f, "=="),
|
|
|
|
Token::NotEqual => write!(f, "!="),
|
|
|
|
Token::Less => write!(f, "<"),
|
|
|
|
Token::Greater => write!(f, ">"),
|
2022-03-12 16:00:42 -06:00
|
|
|
Token::Pipe => write!(f, "|>"),
|
2022-03-11 19:37:47 -06:00
|
|
|
|
2022-03-06 09:04:48 -06:00
|
|
|
Token::Assign => write!(f, "="),
|
|
|
|
Token::Dot => write!(f, "."),
|
|
|
|
Token::Comma => write!(f, ","),
|
|
|
|
Token::Colon => write!(f, ":"),
|
|
|
|
Token::SemiColon => write!(f, ";"),
|
|
|
|
Token::OpenParen => write!(f, "("),
|
|
|
|
Token::CloseParen => write!(f, ")"),
|
2022-03-11 19:37:47 -06:00
|
|
|
Token::At => write!(f, "@"),
|
2022-03-12 23:03:07 -06:00
|
|
|
Token::Hole => write!(f, "_"),
|
2022-03-06 09:04:48 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub type Span = std::ops::Range<usize>;
|
|
|
|
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = Simple<char>> {
|
|
|
|
let int = text::int(10)
|
|
|
|
.map(|s: String| Token::Int(s.parse().unwrap()));
|
|
|
|
|
|
|
|
let string = just('"')
|
|
|
|
.ignore_then(filter(|c| *c != '"').repeated())
|
|
|
|
.then_ignore(just('"'))
|
|
|
|
.collect::<String>()
|
|
|
|
.map(Token::String);
|
2022-03-12 23:03:07 -06:00
|
|
|
|
2022-03-06 09:04:48 -06:00
|
|
|
let symbol = choice((
|
|
|
|
just('+').to(Token::Plus),
|
|
|
|
just('-').to(Token::Minus),
|
|
|
|
just('*').to(Token::Multiply),
|
|
|
|
just('/').to(Token::Divide),
|
2022-03-12 16:00:42 -06:00
|
|
|
|
2022-03-06 09:04:48 -06:00
|
|
|
just('!').to(Token::Not),
|
|
|
|
just("==").to(Token::Equal),
|
2022-03-12 16:00:42 -06:00
|
|
|
|
|
|
|
just("|>").to(Token::Pipe),
|
|
|
|
|
2022-03-06 09:04:48 -06:00
|
|
|
just('<').to(Token::Less),
|
|
|
|
just('>').to(Token::Greater),
|
2022-03-12 23:03:07 -06:00
|
|
|
|
2022-03-06 09:04:48 -06:00
|
|
|
just('=').to(Token::Assign),
|
|
|
|
just('.').to(Token::Dot),
|
|
|
|
just(',').to(Token::Comma),
|
|
|
|
just(':').to(Token::Colon),
|
|
|
|
just(';').to(Token::SemiColon),
|
|
|
|
just('(').to(Token::OpenParen),
|
|
|
|
just(')').to(Token::CloseParen),
|
2022-03-11 19:37:47 -06:00
|
|
|
just('@').to(Token::At),
|
2022-03-12 23:03:07 -06:00
|
|
|
just('_').to(Token::Hole),
|
2022-03-06 09:04:48 -06:00
|
|
|
));
|
|
|
|
|
|
|
|
let keyword = text::ident().map(|s: String| match s.as_str() {
|
|
|
|
"true" => Token::Boolean(true),
|
|
|
|
"false" => Token::Boolean(false),
|
|
|
|
|
|
|
|
"let" => Token::KwLet,
|
|
|
|
"fun" => Token::KwFun,
|
|
|
|
"do" => Token::KwDo,
|
|
|
|
"end" => Token::KwEnd,
|
|
|
|
"if" => Token::KwIf,
|
|
|
|
"then" => Token::KwThen,
|
|
|
|
"else" => Token::KwElse,
|
2022-03-06 11:20:18 -06:00
|
|
|
"return" => Token::KwReturn,
|
2022-03-06 09:04:48 -06:00
|
|
|
_ => Token::Identifier(s),
|
|
|
|
});
|
|
|
|
|
|
|
|
let token = int
|
|
|
|
.or(string)
|
|
|
|
.or(symbol)
|
|
|
|
.or(keyword)
|
|
|
|
.recover_with(skip_then_retry_until([]));
|
|
|
|
|
2022-03-12 17:46:50 -06:00
|
|
|
// let comment = just("--").then(take_until(just('\n'))).padded();
|
|
|
|
let comment = just('-')
|
|
|
|
.then_ignore(just('{')
|
|
|
|
.ignore_then(none_of('}').ignored().repeated())
|
|
|
|
.then_ignore(just("}-"))
|
2022-03-12 19:30:13 -06:00
|
|
|
.or(just('-').ignore_then(none_of('\n').ignored().repeated()))
|
2022-03-12 17:46:50 -06:00
|
|
|
)
|
|
|
|
.padded()
|
|
|
|
.ignored()
|
|
|
|
.repeated();
|
2022-03-06 09:04:48 -06:00
|
|
|
|
|
|
|
token
|
2022-03-12 17:46:50 -06:00
|
|
|
.padded_by(comment)
|
2022-03-06 09:04:48 -06:00
|
|
|
.map_with_span(|token, span| (token, span))
|
|
|
|
.padded()
|
|
|
|
.repeated()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn lex(src: String) -> (Option<Vec<(Token, std::ops::Range<usize>)>>, Vec<Simple<char>>) {
|
|
|
|
let (tokens, lex_error) = lexer().parse_recovery(src.as_str());
|
|
|
|
return (tokens, lex_error);
|
2022-03-06 10:02:54 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn lex_let_simple() {
|
|
|
|
let (tokens, err) = lex("let x: Int = 1;".to_string());
|
|
|
|
|
|
|
|
assert_eq!(tokens, Some(vec![
|
|
|
|
(Token::KwLet, 0..3),
|
|
|
|
(Token::Identifier("x".to_string()), 4..5),
|
|
|
|
(Token::Colon, 5..6),
|
|
|
|
(Token::Identifier("Int".to_string()), 7..10),
|
|
|
|
(Token::Assign, 11..12),
|
|
|
|
(Token::Int(1), 13..14),
|
|
|
|
(Token::SemiColon, 14..15),
|
|
|
|
]));
|
|
|
|
assert_eq!(err, vec![]);
|
|
|
|
}
|
2022-03-06 09:04:48 -06:00
|
|
|
}
|