From 2194e2726fca41c889a62252ac9bc9a275333bfd Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Apr 2021 20:20:45 +0200 Subject: [PATCH] Added tokenization - Added tokenization - Modified `Token` definition for make it compatible with Logos - And also obeyed our paperclip overlord and changed all names to be complaint with Rust conventions --- Cargo.lock | 84 ++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/main.rs | 11 ++++- src/parser.rs | 20 ++++---- src/tokens.rs | 123 ++++++++++++++++++++++++++++++++++++++++---------- 5 files changed, 203 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5bb1039..3fcf7f88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,6 +7,7 @@ name = "able-script" version = "0.1.0" dependencies = [ "clap", + "logos", ] [[package]] @@ -29,6 +30,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "beef" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409" + [[package]] name = "bitflags" version = "1.2.1" @@ -50,6 +57,12 @@ dependencies = [ "vec_map", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "hermit-abi" version = "0.1.18" @@ -65,12 +78,71 @@ version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" +[[package]] +name = "logos" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", + "utf8-ranges", +] + +[[package]] +name = "proc-macro2" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + [[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +[[package]] +name = "syn" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -86,6 +158,18 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "utf8-ranges" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 43cd114a..a792b2ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ edition = "2018" [dependencies] clap="*" +logos = "0.12" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 87b856c2..814bebbe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,8 @@ mod base_55; mod parser; pub mod tokens; +use logos::Logos; + fn main() { let matches = App::new("AbleScript") .version(env!("CARGO_PKG_VERSION")) @@ -21,7 +23,14 @@ fn main() { .get_matches(); match matches.value_of("file") { Some(file_path) => { - // Start parsing that file + // Read file + let source = std::fs::read_to_string(file_path).unwrap(); + + // Print token type: `value` + let mut lex = tokens::Token::lexer(&source); + while let Some(token) = lex.next() { + println!("{:?}: `{}`", token, lex.slice()); + } } None => { println!("hi"); diff --git a/src/parser.rs b/src/parser.rs index f2efb064..56fa18f3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,17 +1,17 @@ -use crate::tokens::{ABOOL, TOKENS}; +use crate::tokens::{Abool, Token}; -pub fn abool2num(abool: ABOOL) -> i32 { +pub fn abool2num(abool: Abool) -> i32 { match abool { - ABOOL::NEVER => -1, - ABOOL::SOMETIMES => 0, - ABOOL::ALWAYS => 1, + Abool::Never => -1, + Abool::Sometimes => 0, + Abool::Always => 1, } } -pub fn num2abool(number: i32) -> ABOOL { +pub fn num2abool(number: i32) -> Abool { match number { - -1 => ABOOL::NEVER, - 0 => ABOOL::SOMETIMES, - 1 => ABOOL::ALWAYS, - _ => ABOOL::SOMETIMES, + -1 => Abool::Never, + 0 => Abool::Sometimes, + 1 => Abool::Always, + _ => Abool::Sometimes, } } diff --git a/src/tokens.rs b/src/tokens.rs index 8eec8fb3..bf1e315d 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,27 +1,100 @@ -pub enum TOKENS { - LEFT_PARENTHESIS, // ( - RIGHT_PARENTHESIS, // ) - LEFT_BRACKET, // [ - RIGHT_BRACKET, // ] - LEFT_BRACE, // { - RIGHT_BRACE, // } - COMMENT { value: String }, // # - SUBTRACT, // - - ADDITION, // + - MULTIPLY, // * - DIVIDE, // / - CHAR, // Base52 based character - FUNCTION, // functio - BF_FUNCTION { name: String, functio: String }, // Brain fuck FFI - VARIABLE, // Variable bro - BOOLEAN { state: bool }, // True, False - ABOOLEAN { state: ABOOL }, // Always, Sometimes, Never - PRINT, // Prints the preceding things - MELO, // Ban the following variable from ever being used again - T_DARK, +use logos::Logos; + +#[derive(Logos, Debug, PartialEq)] +pub enum Token { + /// A C-complaint identifier + #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")] + Identifier, + + #[token("(")] + LeftParenthesis, + + #[token(")")] + RightParenthesis, + + #[token("[")] + LeftBracket, + + #[token("]")] + RightBracket, + + #[token("{")] + LeftBrace, + + #[token("}")] + RightBrace, + + #[token(";")] + Semicolon, + + #[regex(r"#.*")] + Comment, + + #[token("-")] + Subtract, + + #[token("+")] + Addition, + + #[token("*")] + Multiply, + + #[token("/")] + Divide, + + #[token("=")] + Assignment, + + /// Base52 based character ('a') + #[token("'.*'")] + Char, + + #[token("functio")] + Function, + + /// Brain fuck FFI + #[token("bff")] + BfFunction, + + /// Variable bro + #[token("var")] + Variable, + + /// True, False + #[regex("true|false")] + Boolean, + + /// Always, Sometimes, Never + #[regex("always|sometimes|never")] + Aboolean, + + /// String + #[regex("\"(\\.|[^\"])*\"")] + String, + + /// Integer + #[regex(r"[0-9]+")] + Integer, + + /// Prints the preceding things + #[token("print")] + Print, + + /// Ban the following variable from ever being used again + #[token("melo")] + Melo, + + #[token("T-Dark")] + TDark, + + #[regex(r"[ \t\n\f]+", logos::skip)] + #[error] + Error, } -pub enum ABOOL { - NEVER = -1, - SOMETIMES = 0, - ALWAYS = 1, + +#[derive(Debug, PartialEq)] +pub enum Abool { + Never = -1, + Sometimes = 0, + Always = 1, }