From 503888bceebdfa31c3a20c80930b27af6fd8817a Mon Sep 17 00:00:00 2001 From: Goren Barak Date: Fri, 24 Nov 2023 10:59:30 -0500 Subject: [PATCH] Started working on lexer. Temporarily using an external dependency. --- Cargo.lock | 88 +++++++++++++++++++++++++ Cargo.toml | 1 + src/codegen/fasm.rs | 2 +- src/lex/mod.rs | 1 - src/lex/parse.rs | 156 -------------------------------------------- src/lex/tok.rs | 130 +++++++++++++++--------------------- src/main.rs | 63 +++++++++--------- 7 files changed, 177 insertions(+), 264 deletions(-) delete mode 100644 src/lex/parse.rs diff --git a/Cargo.lock b/Cargo.lock index 03db218..fca05d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,94 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "logos" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "proc-macro2" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "skylang" version = "0.1.0" +dependencies = [ + "logos", +] + +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/Cargo.toml b/Cargo.toml index 4635933..a40e8ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +logos = "0.13.0" diff --git a/src/codegen/fasm.rs b/src/codegen/fasm.rs index d9f9c2c..d840aca 100644 --- a/src/codegen/fasm.rs +++ b/src/codegen/fasm.rs @@ -153,7 +153,7 @@ pub fn fasm_codegen(exprs: &Vec, not_a_function: bool) -> String { // Return something from a function. Expr::Return(e) => { // Do the operation that should later be returned. - asm_start.push_str(fasm_codegen!(fun: &e)); + asm_start.push_str(fasm_codegen!(fun: &e).as_str()); // Move the return value to rbp + 8. asm_start.push_str("mov [rbp + 8], rax"); // 8(%rbp) ← return_value diff --git a/src/lex/mod.rs b/src/lex/mod.rs index 41d230f..2f5793d 100644 --- a/src/lex/mod.rs +++ b/src/lex/mod.rs @@ -1,2 +1 @@ pub mod tok; -pub mod parse; diff --git a/src/lex/parse.rs b/src/lex/parse.rs deleted file mode 100644 index 55e2468..0000000 --- a/src/lex/parse.rs +++ /dev/null @@ -1,156 +0,0 @@ -#![allow(unused)] - -use super::tok::*; - - -pub fn match_single_char<'a>(word: &'a str) -> Option> { - macro_rules! tok { - ($tt:expr) => { - Some(Token::new($tt, word)) - }; - }; - - let tok = match word { - ";" => tok!(Semicolon), - "=" => tok!(Equal), - "(" => tok!(LeftParen), - ")" => tok!(RightParen), - "{" => tok!(LeftBrace), - "}" => tok!(RightBrace), - "," => tok!(Comma), - "." => tok!(Dot), - "-" => tok!(Minus), - "+" => tok!(Plus), - "/" => tok!(Slash), - "*" => tok!(Star), - "%" => tok!(Percent), - "!" => tok!(Bang), - ":" => tok!(Colon), - "<" => tok!(Less), - ">" => tok!(Greater), - - _ => None - }; - - tok -} - -pub fn match_keyword<'a>(word: &'a str) -> Option> { - macro_rules! tok { - ($tt:expr) => { - Some(Token::new($tt, word)) - }; - }; - - let tok = match word { - "fn" => tok!(Fn), - "let" => tok!(Let), - "if" => tok!(If), - "else" => tok!(Else), - "while" => tok!(While), - "elif" => tok!(Elif), - "return" => tok!(Return), - "for" => tok!(For), - "in" => tok!(In), - "break" => tok!(Break), - "continue" => tok!(Continue), - "true" => tok!(True), - "false" => tok!(False), - - _ => None - }; - - tok -} - -pub fn match_two_char<'a>(word: &'a str) -> Option> { - macro_rules! tok { - ($tt:expr) => { - Some(Token::new($tt, word)) - }; - }; - - let tok = match word { - "==" => tok!(EqualEqual), - "!=" => tok!(BangEqual), - "<=" => tok!(LessEqual), - ">=" => tok!(GreaterEqual), - - _ => None - }; - - tok -} - -pub fn match_string_literal<'a>(word: &'a str) -> Option> { - macro_rules! tok { - ($tt:expr) => { - Some(Token::new($tt, word)) - }; - }; - - - let mut chars = word.chars(); - - if word.starts_with("\"") { - chars.next(); - while let Some(char) = chars.next() { - if char == '\"' { - return tok!(String); - } - } - } - if word.starts_with("\'") { - while let Some(char) = chars.next() { - if char == '\'' { - return tok!(String); - } - } - } - - None -} - -pub fn match_int_literal<'a>(word: &'a str) -> Option> { - macro_rules! tok { - ($tt:expr) => { - Some(Token::new($tt, word)) - }; - }; - - let mut chars = word.chars(); - let mut tok = None; - while let Some(char) = chars.next() { - if char.is_digit(10) { - tok = tok!(Number); - } else { - return None; - } - } - - tok -} - -pub fn match_identifier<'a>(word: &'a str) -> Option> { - macro_rules! tok { - ($tt:expr) => { - Some(Token::new($tt, word)) - }; - }; - - let mut chars = word.chars().peekable(); - let mut tok: Option> = None; - if chars.peek().unwrap_or(&'❌').is_ascii_alphabetic() { - while let Some(char) = chars.next() { - if char.is_ascii() && match_single_char(char.to_string().as_str()).is_none() { - tok = tok!(Identifier); - } else { - return None; - } - } - } else { - return None; - } - - tok -} diff --git a/src/lex/tok.rs b/src/lex/tok.rs index c25f120..38267f1 100644 --- a/src/lex/tok.rs +++ b/src/lex/tok.rs @@ -1,128 +1,106 @@ #![allow(unused)] +use logos::Logos; +use logos::Lexer; +use core::iter::Peekable; + pub use TokenType::*; -use super::parse::*; -#[derive(Debug)] -pub struct Token<'a> { - tt: TokenType, - word: &'a str, -} - -#[derive(Debug)] +#[derive(Debug, Logos)] +#[logos(skip r"[ \t\n\f]+")] pub enum TokenType { - EOF, - // SINGLE CHARACTER TOKENS + #[token(";")] Semicolon, // ; + #[token("=")] Equal, // = + #[token("(")] LeftParen, // ( + #[token(")")] RightParen, // ) + #[token("{")] LeftBrace, // { + #[token("}")] RightBrace, // } + #[token(",")] Comma, // , + #[token(".")] Dot, // . + #[token("-")] Minus, // - + #[token("+")] Plus, // + + #[token("/")] Slash, // / + #[token("*")] Star, // * + #[token("%")] Percent, // % + #[token("!")] Bang, // ! + #[token(":")] Colon, // : + #[token("<")] Less, // < + #[token(">")] Greater, // > + #[token("|")] + Pipe, // | // KEYWORDS - Fn, // fn + #[token("fnaf")] + Fnaf, // fnaf + #[token("let")] Let, // let + #[token("if")] If, // if + #[token("else")] Else, // else + #[token("while")] While, // while + #[token("elif")] Elif, // elif + #[token("return")] Return, // return + #[token("for")] For, // for + #[token("in")] In, // in + #[token("break")] Break, // break + #[token("continue")] Continue, // continue // TWO CHARACTER TOKENS + #[token("==")] EqualEqual, // == + #[token("!=")] BangEqual, // != + #[token("<=")] LessEqual, // <= + #[token(">=")] GreaterEqual, // >= // LITERALS + #[regex("(\"[^\".+]\")|('[^'.+]')")] String, // A string literal. + #[regex("[0-9]+")] Number, // An integer. + #[regex(r#"[^[0-9]^"^-^[ \t\n\f]^\.^=^(^)^{^}.]+[^"^-^=^\..^[ \t\n\f]^(^)^{^}]*"#)] Identifier, // An identifier. + #[token("true")] True, // true + #[token("false")] False, // false - Null, // None - - // ERROR - Error, // A syntax error. + #[token("none")] + Null, // none } -#[derive(Debug)] -pub struct Lexer<'a> { - source: &'a str, - tokens: Vec>, - current: usize, - after: &'a str -} - -impl<'a> Lexer<'a> { - pub fn new() -> Self { - Lexer { - source: "", - tokens: Vec::new(), - current: 0, - after: "" - } - } -} - -impl<'a> std::iter::Iterator for Lexer<'a> { - type Item = Option; - - fn next(&mut self) -> Option { - unimplemented!("Iterating over lexer is not implemented."); - } -} - -impl<'a> From<&'a str> for Lexer<'a> { - fn from(value: &'a str) -> Self { - Lexer { - source: value, - tokens: Vec::new(), - current: 0, - after: value - } - } -} - -impl<'a> From<&'a std::string::String> for Lexer<'a> { - fn from(value: &'a std::string::String) -> Self { - Lexer { - source: value.as_str(), - tokens: Vec::new(), - current: 0, - after: value.as_str() - } - } -} - -impl<'a> Token<'a> { - pub fn new(tt: TokenType, word: &'a str) -> Self { - Token { - tt, - word - } +pub fn lex_str(this: &str) -> Vec { + let mut buf = Vec::new(); + let mut lexer = TokenType::lexer(this); + while let Some(Ok(token)) = lexer.next() { + buf.push(token); } - pub fn empty() -> Self { - Token { - tt: EOF, - word: "" - } - } + buf } diff --git a/src/main.rs b/src/main.rs index 41153fd..8443c19 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,43 +1,46 @@ #![allow(warnings)] pub mod lex; +use crate::lex::tok::*; pub mod codegen; use crate::codegen::fasm::*; use crate::parse::ast::*; pub mod parse; fn main() { - let fc = fasm_codegen!( - vec![ - Expr::VarDefinition(VarDefinition {name: "goren", value: Value::Number(10)}), - Expr::MathExpr(Math { - left: &Value::Var(VarReference { name: "goren"}), - right: &Value::Number(17), - operator: MathOperator::OP_MULT - } - ), - Expr::FunDefinition(FunDefinition { - name: "adder", contents: vec![ - Expr::MathExpr( - Math { - left: &Value::Param(ParamReference {param_number: 0}), - right: &Value::Param(ParamReference {param_number: 1}), - operator: MathOperator::OP_ADD - } - ) - ] - }), + // let fc = fasm_codegen!( + // vec![ + // Expr::VarDefinition(VarDefinition {name: "goren", value: Value::Number(10)}), + // Expr::MathExpr(Math { + // left: &Value::Var(VarReference { name: "goren"}), + // right: &Value::Number(17), + // operator: MathOperator::OP_MULT + // } + // ), + // Expr::FunDefinition(FunDefinition { + // name: "adder", contents: vec![ + // Expr::MathExpr( + // Math { + // left: &Value::Param(ParamReference {param_number: 0}), + // right: &Value::Param(ParamReference {param_number: 1}), + // operator: MathOperator::OP_ADD + // } + // ) + // ] + // }), - Expr::FunCall( - FunCall { - name: "adder", - params: vec![Value::Var(VarReference {name: "goren"}), Value::Number(6)] - } - ), + // Expr::FunCall( + // FunCall { + // name: "adder", + // params: vec![Value::Var(VarReference {name: "goren"}), Value::Number(6)] + // } + // ), - Expr::Breakpoint - ] - ); + // Expr::Breakpoint + // ] + // ); - println!("{}", fc); + + // println!("{}", fc); + println!("{:?}", lex_str("fnaf main() {}")); }