From 9a46555b0d4a1bcd7a1d203e9b05a092de5d00cd Mon Sep 17 00:00:00 2001 From: Able Date: Mon, 5 Dec 2022 23:01:40 -0600 Subject: [PATCH] tokenize a simple wasm program --- Cargo.lock | 48 +++++++++++++++++++++++++ Cargo.toml | 1 + programs/wat2wasm/Cargo.toml | 9 +++++ programs/wat2wasm/assets/add.wat | 8 +++++ programs/wat2wasm/src/main.rs | 26 ++++++++++++++ programs/wat2wasm/src/tokenizer/mod.rs | 50 ++++++++++++++++++++++++++ 6 files changed, 142 insertions(+) create mode 100644 programs/wat2wasm/Cargo.toml create mode 100644 programs/wat2wasm/assets/add.wat create mode 100644 programs/wat2wasm/src/main.rs create mode 100644 programs/wat2wasm/src/tokenizer/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 2a42ba4..6bbf535 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,12 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" name = "basic_driver" version = "0.1.0" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitflags" version = "1.3.2" @@ -89,6 +95,12 @@ dependencies = [ name = "derelict_microarchitecture" version = "0.1.0" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "fs_extra" version = "1.2.0" @@ -159,6 +171,29 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "logos" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf8b031682c67a8e3d5446840f9573eb7fe26efe7ec8d195c9ac4c0647c502f1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d849148dbaf9661a6151d1ca82b13bb4c4c128146a88d05253b38d4e2f496c" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", +] + [[package]] name = "messaging" version = "0.1.0" @@ -221,6 +256,12 @@ dependencies = [ "versioning", ] +[[package]] +name = "regex-syntax" +version = "0.6.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" + [[package]] name = "ron" version = "0.7.1" @@ -386,3 +427,10 @@ name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wat2wasm" +version = "0.1.0" +dependencies = [ + "logos", +] diff --git a/Cargo.toml b/Cargo.toml index d3c197f..1fc1d5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,4 +28,5 @@ members = [ "programs/shell", "programs/table_view", "programs/undelete", + "programs/wat2wasm", ] diff --git a/programs/wat2wasm/Cargo.toml b/programs/wat2wasm/Cargo.toml new file mode 100644 index 0000000..3a454ee --- /dev/null +++ b/programs/wat2wasm/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "wat2wasm" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +logos = "*" diff --git a/programs/wat2wasm/assets/add.wat b/programs/wat2wasm/assets/add.wat new file mode 100644 index 0000000..343143f --- /dev/null +++ b/programs/wat2wasm/assets/add.wat @@ -0,0 +1,8 @@ +(module + (func $add (param $x i32) (param $y i32) (result i32) + get_local $x + get_local $y + i32.add + ) + (export "add" (func $add)) +) diff --git a/programs/wat2wasm/src/main.rs b/programs/wat2wasm/src/main.rs new file mode 100644 index 0000000..14e9ba5 --- /dev/null +++ b/programs/wat2wasm/src/main.rs @@ -0,0 +1,26 @@ +pub mod tokenizer; + +use std::fs; + +use logos::Logos; + +use crate::tokenizer::Token; + +fn main() { + // Read the contents of the file into a string + let contents = + fs::read_to_string("programs/wat2wasm/assets/add.wat").expect("Failed to read file"); + + let mut lex = Token::lexer(&contents); + for toke in lex { + println!("{:?}", toke); + } +} + +#[non_exhaustive] +pub enum WasmType { + I32, + I64, + F32, + F64, +} diff --git a/programs/wat2wasm/src/tokenizer/mod.rs b/programs/wat2wasm/src/tokenizer/mod.rs new file mode 100644 index 0000000..36fee4c --- /dev/null +++ b/programs/wat2wasm/src/tokenizer/mod.rs @@ -0,0 +1,50 @@ +use logos::Logos; + +#[derive(Logos, Debug, PartialEq)] +pub enum Token { + #[token(".")] + Period, + + // Or regular expressions. + #[token("i32")] + Int32, + + #[token("(")] + RightParen, + + #[token(")")] + LeftParen, + + #[token("add")] + AddOp, + + #[token("func")] + FuncIden, + #[token("export")] + Export, + #[token("module")] + Module, + + #[token("param")] + Param, + + #[token("result")] + Result, + + #[regex("get_local")] + GetLocal, + + #[regex(r"\$[a-zA-Z]+")] + FunctionName, + + #[regex(r##""[a-zA-Z]+""##)] + WasmString, + + // Logos requires one token variant to handle errors, + // it can be named anything you wish. + #[error] + // We can also use this variant to define whitespace, + // or any other matches we wish to skip. + #[regex(r"[ \t\n\f]+", logos::skip)] + Error, +}