Started working on lexer. Temporarily using an external dependency.

This commit is contained in:
Goren Barak 2023-11-24 10:59:30 -05:00
parent 2f942d3371
commit 503888bcee
7 changed files with 177 additions and 264 deletions

88
Cargo.lock generated
View file

@ -2,6 +2,94 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "logos"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e"
dependencies = [
"logos-codegen",
]
[[package]]
name = "proc-macro2"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]] [[package]]
name = "skylang" name = "skylang"
version = "0.1.0" version = "0.1.0"
dependencies = [
"logos",
]
[[package]]
name = "syn"
version = "2.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

View file

@ -6,3 +6,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
logos = "0.13.0"

View file

@ -153,7 +153,7 @@ pub fn fasm_codegen(exprs: &Vec<Expr>, not_a_function: bool) -> String {
// Return something from a function. // Return something from a function.
Expr::Return(e) => { Expr::Return(e) => {
// Do the operation that should later be returned. // Do the operation that should later be returned.
asm_start.push_str(fasm_codegen!(fun: &e)); asm_start.push_str(fasm_codegen!(fun: &e).as_str());
// Move the return value to rbp + 8. // Move the return value to rbp + 8.
asm_start.push_str("mov [rbp + 8], rax"); asm_start.push_str("mov [rbp + 8], rax");
// 8(%rbp) ← return_value // 8(%rbp) ← return_value

View file

@ -1,2 +1 @@
pub mod tok; pub mod tok;
pub mod parse;

View file

@ -1,156 +0,0 @@
#![allow(unused)]
use super::tok::*;
pub fn match_single_char<'a>(word: &'a str) -> Option<Token<'a>> {
macro_rules! tok {
($tt:expr) => {
Some(Token::new($tt, word))
};
};
let tok = match word {
";" => tok!(Semicolon),
"=" => tok!(Equal),
"(" => tok!(LeftParen),
")" => tok!(RightParen),
"{" => tok!(LeftBrace),
"}" => tok!(RightBrace),
"," => tok!(Comma),
"." => tok!(Dot),
"-" => tok!(Minus),
"+" => tok!(Plus),
"/" => tok!(Slash),
"*" => tok!(Star),
"%" => tok!(Percent),
"!" => tok!(Bang),
":" => tok!(Colon),
"<" => tok!(Less),
">" => tok!(Greater),
_ => None
};
tok
}
pub fn match_keyword<'a>(word: &'a str) -> Option<Token<'a>> {
macro_rules! tok {
($tt:expr) => {
Some(Token::new($tt, word))
};
};
let tok = match word {
"fn" => tok!(Fn),
"let" => tok!(Let),
"if" => tok!(If),
"else" => tok!(Else),
"while" => tok!(While),
"elif" => tok!(Elif),
"return" => tok!(Return),
"for" => tok!(For),
"in" => tok!(In),
"break" => tok!(Break),
"continue" => tok!(Continue),
"true" => tok!(True),
"false" => tok!(False),
_ => None
};
tok
}
pub fn match_two_char<'a>(word: &'a str) -> Option<Token<'a>> {
macro_rules! tok {
($tt:expr) => {
Some(Token::new($tt, word))
};
};
let tok = match word {
"==" => tok!(EqualEqual),
"!=" => tok!(BangEqual),
"<=" => tok!(LessEqual),
">=" => tok!(GreaterEqual),
_ => None
};
tok
}
pub fn match_string_literal<'a>(word: &'a str) -> Option<Token<'a>> {
macro_rules! tok {
($tt:expr) => {
Some(Token::new($tt, word))
};
};
let mut chars = word.chars();
if word.starts_with("\"") {
chars.next();
while let Some(char) = chars.next() {
if char == '\"' {
return tok!(String);
}
}
}
if word.starts_with("\'") {
while let Some(char) = chars.next() {
if char == '\'' {
return tok!(String);
}
}
}
None
}
pub fn match_int_literal<'a>(word: &'a str) -> Option<Token<'a>> {
macro_rules! tok {
($tt:expr) => {
Some(Token::new($tt, word))
};
};
let mut chars = word.chars();
let mut tok = None;
while let Some(char) = chars.next() {
if char.is_digit(10) {
tok = tok!(Number);
} else {
return None;
}
}
tok
}
pub fn match_identifier<'a>(word: &'a str) -> Option<Token<'a>> {
macro_rules! tok {
($tt:expr) => {
Some(Token::new($tt, word))
};
};
let mut chars = word.chars().peekable();
let mut tok: Option<Token<'a>> = None;
if chars.peek().unwrap_or(&'❌').is_ascii_alphabetic() {
while let Some(char) = chars.next() {
if char.is_ascii() && match_single_char(char.to_string().as_str()).is_none() {
tok = tok!(Identifier);
} else {
return None;
}
}
} else {
return None;
}
tok
}

View file

@ -1,128 +1,106 @@
#![allow(unused)] #![allow(unused)]
use logos::Logos;
use logos::Lexer;
use core::iter::Peekable;
pub use TokenType::*; pub use TokenType::*;
use super::parse::*;
#[derive(Debug)] #[derive(Debug, Logos)]
pub struct Token<'a> { #[logos(skip r"[ \t\n\f]+")]
tt: TokenType,
word: &'a str,
}
#[derive(Debug)]
pub enum TokenType { pub enum TokenType {
EOF,
// SINGLE CHARACTER TOKENS // SINGLE CHARACTER TOKENS
#[token(";")]
Semicolon, // ; Semicolon, // ;
#[token("=")]
Equal, // = Equal, // =
#[token("(")]
LeftParen, // ( LeftParen, // (
#[token(")")]
RightParen, // ) RightParen, // )
#[token("{")]
LeftBrace, // { LeftBrace, // {
#[token("}")]
RightBrace, // } RightBrace, // }
#[token(",")]
Comma, // , Comma, // ,
#[token(".")]
Dot, // . Dot, // .
#[token("-")]
Minus, // - Minus, // -
#[token("+")]
Plus, // + Plus, // +
#[token("/")]
Slash, // / Slash, // /
#[token("*")]
Star, // * Star, // *
#[token("%")]
Percent, // % Percent, // %
#[token("!")]
Bang, // ! Bang, // !
#[token(":")]
Colon, // : Colon, // :
#[token("<")]
Less, // < Less, // <
#[token(">")]
Greater, // > Greater, // >
#[token("|")]
Pipe, // |
// KEYWORDS // KEYWORDS
Fn, // fn #[token("fnaf")]
Fnaf, // fnaf
#[token("let")]
Let, // let Let, // let
#[token("if")]
If, // if If, // if
#[token("else")]
Else, // else Else, // else
#[token("while")]
While, // while While, // while
#[token("elif")]
Elif, // elif Elif, // elif
#[token("return")]
Return, // return Return, // return
#[token("for")]
For, // for For, // for
#[token("in")]
In, // in In, // in
#[token("break")]
Break, // break Break, // break
#[token("continue")]
Continue, // continue Continue, // continue
// TWO CHARACTER TOKENS // TWO CHARACTER TOKENS
#[token("==")]
EqualEqual, // == EqualEqual, // ==
#[token("!=")]
BangEqual, // != BangEqual, // !=
#[token("<=")]
LessEqual, // <= LessEqual, // <=
#[token(">=")]
GreaterEqual, // >= GreaterEqual, // >=
// LITERALS // LITERALS
#[regex("(\"[^\".+]\")|('[^'.+]')")]
String, // A string literal. String, // A string literal.
#[regex("[0-9]+")]
Number, // An integer. Number, // An integer.
#[regex(r#"[^[0-9]^"^-^[ \t\n\f]^\.^=^(^)^{^}.]+[^"^-^=^\..^[ \t\n\f]^(^)^{^}]*"#)]
Identifier, // An identifier. Identifier, // An identifier.
#[token("true")]
True, // true True, // true
#[token("false")]
False, // false False, // false
Null, // None #[token("none")]
Null, // none
// ERROR
Error, // A syntax error.
} }
#[derive(Debug)] pub fn lex_str(this: &str) -> Vec<TokenType> {
pub struct Lexer<'a> { let mut buf = Vec::new();
source: &'a str, let mut lexer = TokenType::lexer(this);
tokens: Vec<Token<'a>>, while let Some(Ok(token)) = lexer.next() {
current: usize, buf.push(token);
after: &'a str }
}
buf
impl<'a> Lexer<'a> {
pub fn new() -> Self {
Lexer {
source: "",
tokens: Vec::new(),
current: 0,
after: ""
}
}
}
impl<'a> std::iter::Iterator for Lexer<'a> {
type Item = Option<char>;
fn next(&mut self) -> Option<Self::Item> {
unimplemented!("Iterating over lexer is not implemented.");
}
}
impl<'a> From<&'a str> for Lexer<'a> {
fn from(value: &'a str) -> Self {
Lexer {
source: value,
tokens: Vec::new(),
current: 0,
after: value
}
}
}
impl<'a> From<&'a std::string::String> for Lexer<'a> {
fn from(value: &'a std::string::String) -> Self {
Lexer {
source: value.as_str(),
tokens: Vec::new(),
current: 0,
after: value.as_str()
}
}
}
impl<'a> Token<'a> {
pub fn new(tt: TokenType, word: &'a str) -> Self {
Token {
tt,
word
}
}
pub fn empty() -> Self {
Token {
tt: EOF,
word: ""
}
}
} }

View file

@ -1,43 +1,46 @@
#![allow(warnings)] #![allow(warnings)]
pub mod lex; pub mod lex;
use crate::lex::tok::*;
pub mod codegen; pub mod codegen;
use crate::codegen::fasm::*; use crate::codegen::fasm::*;
use crate::parse::ast::*; use crate::parse::ast::*;
pub mod parse; pub mod parse;
fn main() { fn main() {
let fc = fasm_codegen!( // let fc = fasm_codegen!(
vec![ // vec![
Expr::VarDefinition(VarDefinition {name: "goren", value: Value::Number(10)}), // Expr::VarDefinition(VarDefinition {name: "goren", value: Value::Number(10)}),
Expr::MathExpr(Math { // Expr::MathExpr(Math {
left: &Value::Var(VarReference { name: "goren"}), // left: &Value::Var(VarReference { name: "goren"}),
right: &Value::Number(17), // right: &Value::Number(17),
operator: MathOperator::OP_MULT // operator: MathOperator::OP_MULT
} // }
), // ),
Expr::FunDefinition(FunDefinition { // Expr::FunDefinition(FunDefinition {
name: "adder", contents: vec![ // name: "adder", contents: vec![
Expr::MathExpr( // Expr::MathExpr(
Math { // Math {
left: &Value::Param(ParamReference {param_number: 0}), // left: &Value::Param(ParamReference {param_number: 0}),
right: &Value::Param(ParamReference {param_number: 1}), // right: &Value::Param(ParamReference {param_number: 1}),
operator: MathOperator::OP_ADD // operator: MathOperator::OP_ADD
} // }
) // )
] // ]
}), // }),
Expr::FunCall( // Expr::FunCall(
FunCall { // FunCall {
name: "adder", // name: "adder",
params: vec![Value::Var(VarReference {name: "goren"}), Value::Number(6)] // params: vec![Value::Var(VarReference {name: "goren"}), Value::Number(6)]
} // }
), // ),
Expr::Breakpoint // Expr::Breakpoint
] // ]
); // );
println!("{}", fc);
// println!("{}", fc);
println!("{:?}", lex_str("fnaf main() {}"));
} }