Started working on lexer. Temporarily using an external dependency.
This commit is contained in:
parent
2f942d3371
commit
503888bcee
88
Cargo.lock
generated
88
Cargo.lock
generated
|
@ -2,6 +2,94 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-codegen"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex-syntax",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e"
|
||||
dependencies = [
|
||||
"logos-codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||
|
||||
[[package]]
|
||||
name = "skylang"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"logos",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
|
|
@ -6,3 +6,4 @@ edition = "2021"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
logos = "0.13.0"
|
||||
|
|
|
@ -153,7 +153,7 @@ pub fn fasm_codegen(exprs: &Vec<Expr>, not_a_function: bool) -> String {
|
|||
// Return something from a function.
|
||||
Expr::Return(e) => {
|
||||
// Do the operation that should later be returned.
|
||||
asm_start.push_str(fasm_codegen!(fun: &e));
|
||||
asm_start.push_str(fasm_codegen!(fun: &e).as_str());
|
||||
// Move the return value to rbp + 8.
|
||||
asm_start.push_str("mov [rbp + 8], rax");
|
||||
// 8(%rbp) ← return_value
|
||||
|
|
|
@ -1,2 +1 @@
|
|||
pub mod tok;
|
||||
pub mod parse;
|
||||
|
|
156
src/lex/parse.rs
156
src/lex/parse.rs
|
@ -1,156 +0,0 @@
|
|||
#![allow(unused)]
|
||||
|
||||
use super::tok::*;
|
||||
|
||||
|
||||
pub fn match_single_char<'a>(word: &'a str) -> Option<Token<'a>> {
|
||||
macro_rules! tok {
|
||||
($tt:expr) => {
|
||||
Some(Token::new($tt, word))
|
||||
};
|
||||
};
|
||||
|
||||
let tok = match word {
|
||||
";" => tok!(Semicolon),
|
||||
"=" => tok!(Equal),
|
||||
"(" => tok!(LeftParen),
|
||||
")" => tok!(RightParen),
|
||||
"{" => tok!(LeftBrace),
|
||||
"}" => tok!(RightBrace),
|
||||
"," => tok!(Comma),
|
||||
"." => tok!(Dot),
|
||||
"-" => tok!(Minus),
|
||||
"+" => tok!(Plus),
|
||||
"/" => tok!(Slash),
|
||||
"*" => tok!(Star),
|
||||
"%" => tok!(Percent),
|
||||
"!" => tok!(Bang),
|
||||
":" => tok!(Colon),
|
||||
"<" => tok!(Less),
|
||||
">" => tok!(Greater),
|
||||
|
||||
_ => None
|
||||
};
|
||||
|
||||
tok
|
||||
}
|
||||
|
||||
pub fn match_keyword<'a>(word: &'a str) -> Option<Token<'a>> {
|
||||
macro_rules! tok {
|
||||
($tt:expr) => {
|
||||
Some(Token::new($tt, word))
|
||||
};
|
||||
};
|
||||
|
||||
let tok = match word {
|
||||
"fn" => tok!(Fn),
|
||||
"let" => tok!(Let),
|
||||
"if" => tok!(If),
|
||||
"else" => tok!(Else),
|
||||
"while" => tok!(While),
|
||||
"elif" => tok!(Elif),
|
||||
"return" => tok!(Return),
|
||||
"for" => tok!(For),
|
||||
"in" => tok!(In),
|
||||
"break" => tok!(Break),
|
||||
"continue" => tok!(Continue),
|
||||
"true" => tok!(True),
|
||||
"false" => tok!(False),
|
||||
|
||||
_ => None
|
||||
};
|
||||
|
||||
tok
|
||||
}
|
||||
|
||||
pub fn match_two_char<'a>(word: &'a str) -> Option<Token<'a>> {
|
||||
macro_rules! tok {
|
||||
($tt:expr) => {
|
||||
Some(Token::new($tt, word))
|
||||
};
|
||||
};
|
||||
|
||||
let tok = match word {
|
||||
"==" => tok!(EqualEqual),
|
||||
"!=" => tok!(BangEqual),
|
||||
"<=" => tok!(LessEqual),
|
||||
">=" => tok!(GreaterEqual),
|
||||
|
||||
_ => None
|
||||
};
|
||||
|
||||
tok
|
||||
}
|
||||
|
||||
pub fn match_string_literal<'a>(word: &'a str) -> Option<Token<'a>> {
|
||||
macro_rules! tok {
|
||||
($tt:expr) => {
|
||||
Some(Token::new($tt, word))
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
let mut chars = word.chars();
|
||||
|
||||
if word.starts_with("\"") {
|
||||
chars.next();
|
||||
while let Some(char) = chars.next() {
|
||||
if char == '\"' {
|
||||
return tok!(String);
|
||||
}
|
||||
}
|
||||
}
|
||||
if word.starts_with("\'") {
|
||||
while let Some(char) = chars.next() {
|
||||
if char == '\'' {
|
||||
return tok!(String);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn match_int_literal<'a>(word: &'a str) -> Option<Token<'a>> {
|
||||
macro_rules! tok {
|
||||
($tt:expr) => {
|
||||
Some(Token::new($tt, word))
|
||||
};
|
||||
};
|
||||
|
||||
let mut chars = word.chars();
|
||||
let mut tok = None;
|
||||
while let Some(char) = chars.next() {
|
||||
if char.is_digit(10) {
|
||||
tok = tok!(Number);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
tok
|
||||
}
|
||||
|
||||
pub fn match_identifier<'a>(word: &'a str) -> Option<Token<'a>> {
|
||||
macro_rules! tok {
|
||||
($tt:expr) => {
|
||||
Some(Token::new($tt, word))
|
||||
};
|
||||
};
|
||||
|
||||
let mut chars = word.chars().peekable();
|
||||
let mut tok: Option<Token<'a>> = None;
|
||||
if chars.peek().unwrap_or(&'❌').is_ascii_alphabetic() {
|
||||
while let Some(char) = chars.next() {
|
||||
if char.is_ascii() && match_single_char(char.to_string().as_str()).is_none() {
|
||||
tok = tok!(Identifier);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
|
||||
tok
|
||||
}
|
130
src/lex/tok.rs
130
src/lex/tok.rs
|
@ -1,128 +1,106 @@
|
|||
#![allow(unused)]
|
||||
use logos::Logos;
|
||||
use logos::Lexer;
|
||||
use core::iter::Peekable;
|
||||
|
||||
pub use TokenType::*;
|
||||
use super::parse::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token<'a> {
|
||||
tt: TokenType,
|
||||
word: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Logos)]
|
||||
#[logos(skip r"[ \t\n\f]+")]
|
||||
pub enum TokenType {
|
||||
EOF,
|
||||
|
||||
// SINGLE CHARACTER TOKENS
|
||||
#[token(";")]
|
||||
Semicolon, // ;
|
||||
#[token("=")]
|
||||
Equal, // =
|
||||
#[token("(")]
|
||||
LeftParen, // (
|
||||
#[token(")")]
|
||||
RightParen, // )
|
||||
#[token("{")]
|
||||
LeftBrace, // {
|
||||
#[token("}")]
|
||||
RightBrace, // }
|
||||
#[token(",")]
|
||||
Comma, // ,
|
||||
#[token(".")]
|
||||
Dot, // .
|
||||
#[token("-")]
|
||||
Minus, // -
|
||||
#[token("+")]
|
||||
Plus, // +
|
||||
#[token("/")]
|
||||
Slash, // /
|
||||
#[token("*")]
|
||||
Star, // *
|
||||
#[token("%")]
|
||||
Percent, // %
|
||||
#[token("!")]
|
||||
Bang, // !
|
||||
#[token(":")]
|
||||
Colon, // :
|
||||
#[token("<")]
|
||||
Less, // <
|
||||
#[token(">")]
|
||||
Greater, // >
|
||||
#[token("|")]
|
||||
Pipe, // |
|
||||
|
||||
// KEYWORDS
|
||||
Fn, // fn
|
||||
#[token("fnaf")]
|
||||
Fnaf, // fnaf
|
||||
#[token("let")]
|
||||
Let, // let
|
||||
#[token("if")]
|
||||
If, // if
|
||||
#[token("else")]
|
||||
Else, // else
|
||||
#[token("while")]
|
||||
While, // while
|
||||
#[token("elif")]
|
||||
Elif, // elif
|
||||
#[token("return")]
|
||||
Return, // return
|
||||
#[token("for")]
|
||||
For, // for
|
||||
#[token("in")]
|
||||
In, // in
|
||||
#[token("break")]
|
||||
Break, // break
|
||||
#[token("continue")]
|
||||
Continue, // continue
|
||||
|
||||
// TWO CHARACTER TOKENS
|
||||
#[token("==")]
|
||||
EqualEqual, // ==
|
||||
#[token("!=")]
|
||||
BangEqual, // !=
|
||||
#[token("<=")]
|
||||
LessEqual, // <=
|
||||
#[token(">=")]
|
||||
GreaterEqual, // >=
|
||||
|
||||
// LITERALS
|
||||
#[regex("(\"[^\".+]\")|('[^'.+]')")]
|
||||
String, // A string literal.
|
||||
#[regex("[0-9]+")]
|
||||
Number, // An integer.
|
||||
#[regex(r#"[^[0-9]^"^-^[ \t\n\f]^\.^=^(^)^{^}.]+[^"^-^=^\..^[ \t\n\f]^(^)^{^}]*"#)]
|
||||
Identifier, // An identifier.
|
||||
#[token("true")]
|
||||
True, // true
|
||||
#[token("false")]
|
||||
False, // false
|
||||
Null, // None
|
||||
|
||||
// ERROR
|
||||
Error, // A syntax error.
|
||||
#[token("none")]
|
||||
Null, // none
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Lexer<'a> {
|
||||
source: &'a str,
|
||||
tokens: Vec<Token<'a>>,
|
||||
current: usize,
|
||||
after: &'a str
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new() -> Self {
|
||||
Lexer {
|
||||
source: "",
|
||||
tokens: Vec::new(),
|
||||
current: 0,
|
||||
after: ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::iter::Iterator for Lexer<'a> {
|
||||
type Item = Option<char>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
unimplemented!("Iterating over lexer is not implemented.");
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Lexer<'a> {
|
||||
fn from(value: &'a str) -> Self {
|
||||
Lexer {
|
||||
source: value,
|
||||
tokens: Vec::new(),
|
||||
current: 0,
|
||||
after: value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a std::string::String> for Lexer<'a> {
|
||||
fn from(value: &'a std::string::String) -> Self {
|
||||
Lexer {
|
||||
source: value.as_str(),
|
||||
tokens: Vec::new(),
|
||||
current: 0,
|
||||
after: value.as_str()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
pub fn new(tt: TokenType, word: &'a str) -> Self {
|
||||
Token {
|
||||
tt,
|
||||
word
|
||||
}
|
||||
pub fn lex_str(this: &str) -> Vec<TokenType> {
|
||||
let mut buf = Vec::new();
|
||||
let mut lexer = TokenType::lexer(this);
|
||||
while let Some(Ok(token)) = lexer.next() {
|
||||
buf.push(token);
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Token {
|
||||
tt: EOF,
|
||||
word: ""
|
||||
}
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
|
63
src/main.rs
63
src/main.rs
|
@ -1,43 +1,46 @@
|
|||
#![allow(warnings)]
|
||||
|
||||
pub mod lex;
|
||||
use crate::lex::tok::*;
|
||||
pub mod codegen;
|
||||
use crate::codegen::fasm::*;
|
||||
use crate::parse::ast::*;
|
||||
pub mod parse;
|
||||
|
||||
fn main() {
|
||||
let fc = fasm_codegen!(
|
||||
vec![
|
||||
Expr::VarDefinition(VarDefinition {name: "goren", value: Value::Number(10)}),
|
||||
Expr::MathExpr(Math {
|
||||
left: &Value::Var(VarReference { name: "goren"}),
|
||||
right: &Value::Number(17),
|
||||
operator: MathOperator::OP_MULT
|
||||
}
|
||||
),
|
||||
Expr::FunDefinition(FunDefinition {
|
||||
name: "adder", contents: vec![
|
||||
Expr::MathExpr(
|
||||
Math {
|
||||
left: &Value::Param(ParamReference {param_number: 0}),
|
||||
right: &Value::Param(ParamReference {param_number: 1}),
|
||||
operator: MathOperator::OP_ADD
|
||||
}
|
||||
)
|
||||
]
|
||||
}),
|
||||
// let fc = fasm_codegen!(
|
||||
// vec![
|
||||
// Expr::VarDefinition(VarDefinition {name: "goren", value: Value::Number(10)}),
|
||||
// Expr::MathExpr(Math {
|
||||
// left: &Value::Var(VarReference { name: "goren"}),
|
||||
// right: &Value::Number(17),
|
||||
// operator: MathOperator::OP_MULT
|
||||
// }
|
||||
// ),
|
||||
// Expr::FunDefinition(FunDefinition {
|
||||
// name: "adder", contents: vec![
|
||||
// Expr::MathExpr(
|
||||
// Math {
|
||||
// left: &Value::Param(ParamReference {param_number: 0}),
|
||||
// right: &Value::Param(ParamReference {param_number: 1}),
|
||||
// operator: MathOperator::OP_ADD
|
||||
// }
|
||||
// )
|
||||
// ]
|
||||
// }),
|
||||
|
||||
Expr::FunCall(
|
||||
FunCall {
|
||||
name: "adder",
|
||||
params: vec![Value::Var(VarReference {name: "goren"}), Value::Number(6)]
|
||||
}
|
||||
),
|
||||
// Expr::FunCall(
|
||||
// FunCall {
|
||||
// name: "adder",
|
||||
// params: vec![Value::Var(VarReference {name: "goren"}), Value::Number(6)]
|
||||
// }
|
||||
// ),
|
||||
|
||||
Expr::Breakpoint
|
||||
]
|
||||
);
|
||||
// Expr::Breakpoint
|
||||
// ]
|
||||
// );
|
||||
|
||||
println!("{}", fc);
|
||||
|
||||
// println!("{}", fc);
|
||||
println!("{:?}", lex_str("fnaf main() {}"));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue