More complex tokenizer

This commit is contained in:
Able 2022-12-06 12:41:57 -06:00
parent 1abb705fd2
commit 28a31102ef
3 changed files with 91 additions and 28 deletions

View file

@ -0,0 +1,18 @@
#[derive(Debug)]
pub struct Path {
pub path: Vec<String>,
}
impl Path {
pub fn new(path: String) -> Self {
let mut path_vec_string = vec![];
for part in path.split(&['\\', '/'][..]) {
path_vec_string.push(part.to_string());
}
Path {
path: path_vec_string,
}
}
}

View file

@ -0,0 +1,11 @@
(module
(func $add (param $x i32) (param $y i32) (result i32)
(i32.add (get_local $x) (get_local $y))
)
(func (export "start")
(call $add
(i32.const 5)
(i32.const 10)
)
)
)

View file

@ -1,50 +1,84 @@
use logos::Logos;
use logos::{Lexer, Logos};
use crate::{WasmOperation, WasmType};
#[derive(Logos, Debug, PartialEq)]
pub enum Token {
#[token(".")]
Period,
// Or regular expressions.
#[token("i32")]
Int32,
#[token("(")]
RightParen,
#[token(")")]
LeftParen,
#[token("add")]
AddOp,
#[regex("[a-zA-Z_]+", keyword_parse)]
Keyword(String),
#[token("func")]
FuncIden,
#[token("export")]
Export,
#[token("module")]
Module,
#[regex(r"\$[a-zA-Z]+", function_name_parse)]
FunctionName(String),
#[token("param")]
Param,
#[token("result")]
Result,
#[regex("get_local")]
GetLocal,
#[regex(r"\$[a-zA-Z]+")]
FunctionName,
#[regex("add", operation_parser)]
Operation(WasmOperation),
#[regex(r##""[a-zA-Z]+""##)]
WasmString,
// Logos requires one token variant to handle errors,
// it can be named anything you wish.
#[regex("[0-9]+", number_parse)]
/// A number without a type attatched
Number(u64),
#[regex("[ui][0-9]+", number_type_parse)]
NumberType(WasmType),
#[error]
// We can also use this variant to define whitespace,
// or any other matches we wish to skip.
#[regex(r"[ \t\n\f]+", logos::skip)]
Error,
}
fn number_parse(lex: &mut Lexer<Token>) -> Option<u64> {
let slice = lex.slice();
let n: u64 = slice.parse().ok()?;
Some(n)
}
fn number_type_parse(lex: &mut Lexer<Token>) -> Option<WasmType> {
let slice = lex.slice();
use WasmType::*;
match slice {
"i32" => Some(I32),
_ => None,
}
}
fn keyword_parse(lex: &mut Lexer<Token>) -> Option<String> {
let slice = lex.slice();
match slice {
"const" => Some(slice.to_string()),
"module" => Some(slice.to_string()),
"param" => Some(slice.to_string()),
"result" => Some(slice.to_string()),
"call" => Some(slice.to_string()),
"func" => Some(slice.to_string()),
"get_local" => Some(slice.to_string()),
"export" => Some(slice.to_string()),
_ => None,
}
}
fn operation_parser(lex: &mut Lexer<Token>) -> Option<WasmOperation> {
let slice = lex.slice();
use WasmOperation::*;
match slice {
"add" => Some(Add),
_ => None,
}
}
fn function_name_parse(lex: &mut Lexer<Token>) -> Option<String> {
let slice = lex.slice();
Some(slice.to_string())
}