Added tokenization

- Added tokenization
- Modified `Token` definition for make it compatible with Logos
- And also obeyed our paperclip overlord and changed all names to be complaint with Rust conventions
pull/11/head
ondra05 2021-04-12 20:20:45 +02:00
parent a39b480b62
commit 3911ccbc82
5 changed files with 203 additions and 36 deletions

84
Cargo.lock generated
View File

@ -7,6 +7,7 @@ name = "able-script"
version = "0.1.0"
dependencies = [
"clap",
"logos",
]
[[package]]
@ -29,6 +30,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "beef"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409"
[[package]]
name = "bitflags"
version = "1.2.1"
@ -50,6 +57,12 @@ dependencies = [
"vec_map",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "hermit-abi"
version = "0.1.18"
@ -65,12 +78,71 @@ version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
[[package]]
name = "logos"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-derive"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
"utf8-ranges",
]
[[package]]
name = "proc-macro2"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-syntax"
version = "0.6.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "syn"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "textwrap"
version = "0.11.0"
@ -86,6 +158,18 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]]
name = "utf8-ranges"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
[[package]]
name = "vec_map"
version = "0.8.2"

View File

@ -8,3 +8,4 @@ edition = "2018"
[dependencies]
clap="*"
logos = "0.12"

View File

@ -5,6 +5,8 @@ mod base_55;
mod parser;
pub mod tokens;
use logos::Logos;
fn main() {
let matches = App::new("AbleScript")
.version(env!("CARGO_PKG_VERSION"))
@ -21,7 +23,14 @@ fn main() {
.get_matches();
match matches.value_of("file") {
Some(file_path) => {
// Start parsing that file
// Read file
let source = std::fs::read_to_string(file_path).unwrap();
// Print token type: `value`
let mut lex = tokens::Token::lexer(&source);
while let Some(token) = lex.next() {
println!("{:?}: `{}`", token, lex.slice());
}
}
None => {
println!("hi");

View File

@ -1,17 +1,17 @@
use crate::tokens::{ABOOL, TOKENS};
use crate::tokens::{Abool, Token};
pub fn abool2num(abool: ABOOL) -> i32 {
pub fn abool2num(abool: Abool) -> i32 {
match abool {
ABOOL::NEVER => -1,
ABOOL::SOMETIMES => 0,
ABOOL::ALWAYS => 1,
Abool::Never => -1,
Abool::Sometimes => 0,
Abool::Always => 1,
}
}
pub fn num2abool(number: i32) -> ABOOL {
pub fn num2abool(number: i32) -> Abool {
match number {
-1 => ABOOL::NEVER,
0 => ABOOL::SOMETIMES,
1 => ABOOL::ALWAYS,
_ => ABOOL::SOMETIMES,
-1 => Abool::Never,
0 => Abool::Sometimes,
1 => Abool::Always,
_ => Abool::Sometimes,
}
}

View File

@ -1,27 +1,100 @@
pub enum TOKENS {
LEFT_PARENTHESIS, // (
RIGHT_PARENTHESIS, // )
LEFT_BRACKET, // [
RIGHT_BRACKET, // ]
LEFT_BRACE, // {
RIGHT_BRACE, // }
COMMENT { value: String }, // #
SUBTRACT, // -
ADDITION, // +
MULTIPLY, // *
DIVIDE, // /
CHAR, // Base52 based character
FUNCTION, // functio
BF_FUNCTION { name: String, functio: String }, // Brain fuck FFI
VARIABLE, // Variable bro
BOOLEAN { state: bool }, // True, False
ABOOLEAN { state: ABOOL }, // Always, Sometimes, Never
PRINT, // Prints the preceding things
MELO, // Ban the following variable from ever being used again
T_DARK,
use logos::Logos;
#[derive(Logos, Debug, PartialEq)]
pub enum Token {
/// A C-complaint identifier
#[regex(r"[a-zA-Z_][a-zA-Z_0-9]*")]
Identifier,
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token("{")]
LeftBrace,
#[token("}")]
RightBrace,
#[token(";")]
Semicolon,
#[regex(r"#.*")]
Comment,
#[token("-")]
Subtract,
#[token("+")]
Addition,
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("=")]
Assignment,
/// Base52 based character ('a')
#[token("'.*'")]
Char,
#[token("functio")]
Function,
/// Brain fuck FFI
#[token("bff")]
BfFunction,
/// Variable bro
#[token("var")]
Variable,
/// True, False
#[regex("true|false")]
Boolean,
/// Always, Sometimes, Never
#[regex("always|sometimes|never")]
Aboolean,
/// String
#[regex("\"(\\.|[^\"])*\"")]
String,
/// Integer
#[regex(r"[0-9]+")]
Integer,
/// Prints the preceding things
#[token("print")]
Print,
/// Ban the following variable from ever being used again
#[token("melo")]
Melo,
#[token("T-Dark")]
TDark,
#[regex(r"[ \t\n\f]+", logos::skip)]
#[error]
Error,
}
pub enum ABOOL {
NEVER = -1,
SOMETIMES = 0,
ALWAYS = 1,
#[derive(Debug, PartialEq)]
pub enum Abool {
Never = -1,
Sometimes = 0,
Always = 1,
}