From cd7181515820dd2ee04ce8ed7ee25c1742ec882a Mon Sep 17 00:00:00 2001 From: Able Date: Tue, 6 Dec 2022 12:41:57 -0600 Subject: [PATCH] More complex tokenizer --- libraries/relib/src/lib.rs | 18 ++++++ programs/wat2wasm/assets/complex.wat | 11 ++++ programs/wat2wasm/src/tokenizer/mod.rs | 90 ++++++++++++++++++-------- 3 files changed, 91 insertions(+), 28 deletions(-) create mode 100644 libraries/relib/src/lib.rs create mode 100644 programs/wat2wasm/assets/complex.wat diff --git a/libraries/relib/src/lib.rs b/libraries/relib/src/lib.rs new file mode 100644 index 0000000..6b0ad7d --- /dev/null +++ b/libraries/relib/src/lib.rs @@ -0,0 +1,18 @@ +#[derive(Debug)] +pub struct Path { + pub path: Vec, +} + +impl Path { + pub fn new(path: String) -> Self { + let mut path_vec_string = vec![]; + + for part in path.split(&['\\', '/'][..]) { + path_vec_string.push(part.to_string()); + } + + Path { + path: path_vec_string, + } + } +} diff --git a/programs/wat2wasm/assets/complex.wat b/programs/wat2wasm/assets/complex.wat new file mode 100644 index 0000000..e22b040 --- /dev/null +++ b/programs/wat2wasm/assets/complex.wat @@ -0,0 +1,11 @@ +(module + (func $add (param $x i32) (param $y i32) (result i32) + (i32.add (get_local $x) (get_local $y)) + ) + (func (export "start") + (call $add + (i32.const 5) + (i32.const 10) + ) + ) +) diff --git a/programs/wat2wasm/src/tokenizer/mod.rs b/programs/wat2wasm/src/tokenizer/mod.rs index 36fee4c..679e00a 100644 --- a/programs/wat2wasm/src/tokenizer/mod.rs +++ b/programs/wat2wasm/src/tokenizer/mod.rs @@ -1,50 +1,84 @@ -use logos::Logos; +use logos::{Lexer, Logos}; + +use crate::{WasmOperation, WasmType}; #[derive(Logos, Debug, PartialEq)] pub enum Token { #[token(".")] Period, - // Or regular expressions. - #[token("i32")] - Int32, - #[token("(")] RightParen, #[token(")")] LeftParen, - #[token("add")] - AddOp, + #[regex("[a-zA-Z_]+", keyword_parse)] + Keyword(String), - #[token("func")] - FuncIden, - #[token("export")] - Export, - #[token("module")] - Module, + #[regex(r"\$[a-zA-Z]+", function_name_parse)] + FunctionName(String), - #[token("param")] - Param, - - #[token("result")] - Result, - - #[regex("get_local")] - GetLocal, - - #[regex(r"\$[a-zA-Z]+")] - FunctionName, + #[regex("add", operation_parser)] + Operation(WasmOperation), #[regex(r##""[a-zA-Z]+""##)] WasmString, - // Logos requires one token variant to handle errors, - // it can be named anything you wish. + #[regex("[0-9]+", number_parse)] + /// A number without a type attatched + Number(u64), + + #[regex("[ui][0-9]+", number_type_parse)] + NumberType(WasmType), + #[error] - // We can also use this variant to define whitespace, - // or any other matches we wish to skip. #[regex(r"[ \t\n\f]+", logos::skip)] Error, } + +fn number_parse(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + let n: u64 = slice.parse().ok()?; + Some(n) +} + +fn number_type_parse(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + use WasmType::*; + match slice { + "i32" => Some(I32), + _ => None, + } +} + +fn keyword_parse(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + match slice { + "const" => Some(slice.to_string()), + "module" => Some(slice.to_string()), + "param" => Some(slice.to_string()), + "result" => Some(slice.to_string()), + "call" => Some(slice.to_string()), + "func" => Some(slice.to_string()), + "get_local" => Some(slice.to_string()), + "export" => Some(slice.to_string()), + + _ => None, + } +} + +fn operation_parser(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + use WasmOperation::*; + match slice { + "add" => Some(Add), + _ => None, + } +} + +fn function_name_parse(lex: &mut Lexer) -> Option { + let slice = lex.slice(); + + Some(slice.to_string()) +}