This commit is contained in:
Erin 2023-10-03 23:28:57 +02:00 committed by ondra05
parent 1190ac5bce
commit 840c75f891
7 changed files with 179 additions and 55 deletions

28
Cargo.lock generated
View file

@ -55,6 +55,26 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "literify"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd552332051e9b3db140d34a371dcc0ed378b72a9227b5273070af58ea34abf4"
dependencies = [
"litrs",
"proc-macro2",
"quote",
]
[[package]]
name = "litrs"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f17c3668f3cc1132437cdadc93dab05e52d592f06948d3f64828430c36e4a70"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "logos" name = "logos"
version = "0.13.0" version = "0.13.0"
@ -93,6 +113,12 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.67" version = "1.0.67"
@ -123,7 +149,9 @@ version = "0.1.0"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"lasso", "lasso",
"literify",
"logos", "logos",
"paste",
] ]
[[package]] [[package]]

View file

@ -6,4 +6,6 @@ edition = "2021"
[dependencies] [dependencies]
bumpalo = { version = "3", features = ["collections"] } bumpalo = { version = "3", features = ["collections"] }
lasso = "0.7" lasso = "0.7"
literify = "0.2"
logos = "0.13" logos = "0.13"
paste = "1.0"

View file

@ -3,6 +3,7 @@
mod syntax; mod syntax;
mod utils; mod utils;
use bumpalo::Bump;
use std::io::{stdin, Read}; use std::io::{stdin, Read};
use utils::default; use utils::default;
@ -10,5 +11,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut buf = default(); let mut buf = default();
stdin().read_to_string(&mut buf)?; stdin().read_to_string(&mut buf)?;
let arena = Bump::new();
println!("{:?}", syntax::parser::parse(&buf, &arena));
Ok(()) Ok(())
} }

View file

@ -44,15 +44,10 @@ pub enum Expr<'a> {
Call(ExprRef<'a>, ExprList<'a>), Call(ExprRef<'a>, ExprList<'a>),
Binary(Spanned<BinaryOp>, ExprRef<'a>, ExprRef<'a>), Binary(Spanned<BinaryOp>, ExprRef<'a>, ExprRef<'a>),
Unary(Spanned<UnaryOp>, ExprRef<'a>), Unary(Spanned<UnaryOp>, ExprRef<'a>),
Def {
kind: DefKind,
ident: Spanned<Ident>,
ty: Option<Spanned<Type>>,
init: Option<ExprRef<'a>>,
},
Set(ExprRef<'a>, ExprRef<'a>), Set(ExprRef<'a>, ExprRef<'a>),
Loop(ExprList<'a>), Loop(ExprList<'a>),
Block(ExprList<'a>), Block(ExprList<'a>),
Definition(Definition<'a>),
Switch { Switch {
on: ExprRef<'a>, on: ExprRef<'a>,
branches: &'a [(Spanned<Switcher>, ExprRef<'a>)], branches: &'a [(Spanned<Switcher>, ExprRef<'a>)],
@ -62,15 +57,25 @@ pub enum Expr<'a> {
branches: &'a [(ExprRef<'a>, ExprRef<'a>)], branches: &'a [(ExprRef<'a>, ExprRef<'a>)],
else_: ExprRef<'a>, else_: ExprRef<'a>,
}, },
Break(Option<ExprRef<'a>>),
Return(Option<ExprRef<'a>>),
Continue,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Definition<'a> {
Binding {
kind: DefKind,
ident: Spanned<Ident>,
ty: Option<Spanned<Type>>,
init: Option<ExprRef<'a>>,
},
Func { Func {
ident: Spanned<Ident>, ident: Spanned<Ident>,
params: &'a [(Spanned<Ident>, Spanned<Type>)], params: &'a [(Spanned<Ident>, Spanned<Type>)],
ret: Spanned<Type>, ret: Spanned<Type>,
body: ExprList<'a>, body: ExprList<'a>,
}, },
Break(Option<ExprRef<'a>>),
Return(Option<ExprRef<'a>>),
Continue,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]

View file

@ -1,2 +1,3 @@
pub mod ast; pub mod ast;
pub mod parser;
pub mod token; pub mod token;

63
src/syntax/parser.rs Normal file
View file

@ -0,0 +1,63 @@
use bumpalo::Bump;
use logos::Logos;
use super::{
ast::{Definition, Spanned},
token::Token,
};
type Lexer<'a> = logos::Lexer<'a, Token>;
macro_rules! extract {
(
$self:expr,
$(pat:pat),* $(,)?
) => {
};
}
struct Parser<'a, 'l> {
arena: &'a Bump,
lexer: Lexer<'l>,
}
impl<'a, 'l> Parser<'a, 'l> {
/// Poll next token
fn next(&mut self) -> Result<Token> {
match self.lexer.next() {
Some(Ok(token)) => Ok(token),
Some(Err(())) => Err(ErrorKind::InvalidToken),
None => Err(ErrorKind::UnexpectedEnd),
}
.map_err(|k| Spanned::new(k, self.lexer.span()))
}
/// Form an error
#[inline]
fn error(&self, kind: ErrorKind) -> Error {
Spanned::new(kind, self.lexer.span())
}
/// Parse everything or DIE!
fn run(self) -> Result<&'a [Definition<'a>]> {
Ok(self.arena.alloc_slice_copy(&[]))
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
InvalidToken,
UnexpectedEnd,
}
pub type Error = Spanned<ErrorKind>;
type Result<T, E = Error> = std::result::Result<T, E>;
pub fn parse<'a>(code: &str, arena: &'a Bump) -> Result<&'a [Definition<'a>]> {
Parser {
arena,
lexer: Token::lexer(code),
}
.run()
}

View file

@ -6,53 +6,72 @@ pub struct Extras {
pub interner: Rodeo, pub interner: Rodeo,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] macro_rules! token_def {
#[logos(extras = Extras)] (
#[logos(skip r"[ \t\n\f]+")] unit { $($u_name:ident : $($u_tok:literal),* $(,)?;)* }
#[logos(skip r"\\.*")] keyword { $($kw:tt),* $(,)* }
#[rustfmt::skip] else { $($e_tt:tt)* }
pub enum Token { ) => {
#[token("(")] LeftParen, literify::literify! (paste::paste! {
#[token(")")] RightParen, #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[token("{")] LeftCurly, #[logos(extras = Extras)]
#[token("}")] RightCurly, #[logos(skip r"[ \t\n\f]+")]
#[token(".")] Dot, #[logos(skip r"\\.*")]
#[token(",")] Comma, pub enum Token {
#[token(":")] Colon, $(
#[token(";")] Semicolon, $(#[token($u_tok)])*
$u_name,
)*
#[token("")] //______ $(
#[token("<-")] LArrow, #[token(~($kw))]
#[token("")] //______ [<$kw:camel>],
#[token("->")] RArrow, )*
#[token("+")] Plus, $($e_tt)*
#[token("-")] Minus, }
#[token("*")] Star,
#[token("/")] Slash,
#[token("=")] Equ, macro_rules! T {
#[token("")] //___ $($(
#[token("/=")] Neq, ($u_tok) => { $crate::syntax::token::Token::$u_name };
#[token("<")] Lt, )*)*
#[token(">")] Gt, }
#[token("")] //____ });
#[token("<=")] LtEq, };
#[token("")] //____ }
#[token(">=")] GtEq,
#[token("func")] Func, token_def!(
#[token("var")] Var, unit {
#[token("const")] Const, LeftParen : "(";
#[token("include")] Include, RightParen: ")";
#[token("switch")] Switch, LeftCurly : "{";
#[token("loop")] Loop, RightCurly: "}";
#[token("return")] Return, Dot : ".";
#[token("break")] Break, Comma : ",";
#[token("continue")] Continue, Colon : ":";
#[token("uninit")] Uninit, Semicolon : ";";
#[token("asm")] Asm,
LArrow: "", "<-";
RArrow: "", "->";
Plus : "+";
Minus : "-";
Star : "*";
Slash : "/";
Precent: "%";
Equ : "=";
Neq : "", "/=";
Lt : "<";
Gt : ">";
LtEq : "", "<=";
GtEq : "", ">=";
}
keyword { func, var, const, include, switch, loop,
return, break, continue, uninit, asm }
else {
#[regex( #[regex(
r"\p{XID_Start}\p{XID_Continue}*", r"\p{XID_Start}\p{XID_Continue}*",
|l| intern(l, l.slice()), |l| intern(l, l.slice()),
@ -70,7 +89,10 @@ pub enum Token {
"[0-9]+", "[0-9]+",
|l| l.slice().parse::<u64>().ok() |l| l.slice().parse::<u64>().ok()
)] Int(u64), )] Int(u64),
} }
);
pub(crate) use T;
fn intern(lexer: &mut Lexer<'_, Token>, s: &str) -> Spur { fn intern(lexer: &mut Lexer<'_, Token>, s: &str) -> Spur {
lexer.extras.interner.get_or_intern(s) lexer.extras.interner.get_or_intern(s)