More complex tokenizer

pull/1/head
Able 2022-12-06 12:41:57 -06:00
parent 9a46555b0d
commit cd71815158
Signed by: able
GPG Key ID: 0BD8B45C30DCA887
3 changed files with 91 additions and 28 deletions

View File

@ -0,0 +1,18 @@
#[derive(Debug)]
pub struct Path {
pub path: Vec<String>,
}
impl Path {
pub fn new(path: String) -> Self {
let mut path_vec_string = vec![];
for part in path.split(&['\\', '/'][..]) {
path_vec_string.push(part.to_string());
}
Path {
path: path_vec_string,
}
}
}

View File

@ -0,0 +1,11 @@
(module
(func $add (param $x i32) (param $y i32) (result i32)
(i32.add (get_local $x) (get_local $y))
)
(func (export "start")
(call $add
(i32.const 5)
(i32.const 10)
)
)
)

View File

@ -1,50 +1,84 @@
use logos::Logos; use logos::{Lexer, Logos};
use crate::{WasmOperation, WasmType};
#[derive(Logos, Debug, PartialEq)] #[derive(Logos, Debug, PartialEq)]
pub enum Token { pub enum Token {
#[token(".")] #[token(".")]
Period, Period,
// Or regular expressions.
#[token("i32")]
Int32,
#[token("(")] #[token("(")]
RightParen, RightParen,
#[token(")")] #[token(")")]
LeftParen, LeftParen,
#[token("add")] #[regex("[a-zA-Z_]+", keyword_parse)]
AddOp, Keyword(String),
#[token("func")] #[regex(r"\$[a-zA-Z]+", function_name_parse)]
FuncIden, FunctionName(String),
#[token("export")]
Export,
#[token("module")]
Module,
#[token("param")] #[regex("add", operation_parser)]
Param, Operation(WasmOperation),
#[token("result")]
Result,
#[regex("get_local")]
GetLocal,
#[regex(r"\$[a-zA-Z]+")]
FunctionName,
#[regex(r##""[a-zA-Z]+""##)] #[regex(r##""[a-zA-Z]+""##)]
WasmString, WasmString,
// Logos requires one token variant to handle errors, #[regex("[0-9]+", number_parse)]
// it can be named anything you wish. /// A number without a type attatched
Number(u64),
#[regex("[ui][0-9]+", number_type_parse)]
NumberType(WasmType),
#[error] #[error]
// We can also use this variant to define whitespace,
// or any other matches we wish to skip.
#[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"[ \t\n\f]+", logos::skip)]
Error, Error,
} }
fn number_parse(lex: &mut Lexer<Token>) -> Option<u64> {
let slice = lex.slice();
let n: u64 = slice.parse().ok()?;
Some(n)
}
fn number_type_parse(lex: &mut Lexer<Token>) -> Option<WasmType> {
let slice = lex.slice();
use WasmType::*;
match slice {
"i32" => Some(I32),
_ => None,
}
}
fn keyword_parse(lex: &mut Lexer<Token>) -> Option<String> {
let slice = lex.slice();
match slice {
"const" => Some(slice.to_string()),
"module" => Some(slice.to_string()),
"param" => Some(slice.to_string()),
"result" => Some(slice.to_string()),
"call" => Some(slice.to_string()),
"func" => Some(slice.to_string()),
"get_local" => Some(slice.to_string()),
"export" => Some(slice.to_string()),
_ => None,
}
}
fn operation_parser(lex: &mut Lexer<Token>) -> Option<WasmOperation> {
let slice = lex.slice();
use WasmOperation::*;
match slice {
"add" => Some(Add),
_ => None,
}
}
fn function_name_parse(lex: &mut Lexer<Token>) -> Option<String> {
let slice = lex.slice();
Some(slice.to_string())
}