holey-bytes/hblang/src/parser.rs

1222 lines
37 KiB
Rust
Raw Normal View History

2024-05-17 12:53:59 -05:00
use std::{
cell::{Cell, UnsafeCell},
2024-05-20 07:11:58 -05:00
io,
2024-05-19 11:20:42 -05:00
ops::{Deref, Not},
2024-05-17 12:53:59 -05:00
ptr::NonNull,
2024-05-20 07:11:58 -05:00
sync::atomic::AtomicUsize,
2024-05-17 12:53:59 -05:00
};
2024-05-09 16:41:59 -05:00
2024-05-12 04:52:58 -05:00
use crate::{
2024-06-23 02:09:33 -05:00
codegen,
2024-05-12 04:52:58 -05:00
ident::{self, Ident},
2024-05-19 11:20:42 -05:00
lexer::{Lexer, LineMap, Token, TokenKind},
2024-06-01 13:30:07 -05:00
log,
2024-05-12 04:52:58 -05:00
};
2024-05-12 16:19:45 -05:00
pub type Pos = u32;
pub type IdentFlags = u32;
2024-05-17 12:53:59 -05:00
pub type Symbols = Vec<Symbol>;
pub type FileId = u32;
2024-05-19 11:20:42 -05:00
pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> io::Result<FileId> + 'a);
2024-05-20 07:11:58 -05:00
pub mod idfl {
use super::*;
2024-05-17 12:53:59 -05:00
2024-05-20 07:11:58 -05:00
macro_rules! flags {
($($name:ident,)*) => {
$(pub const $name: IdentFlags = 1 << (std::mem::size_of::<IdentFlags>() * 8 - 1 - ${index(0)});)*
pub const ALL: IdentFlags = 0 $(| $name)*;
};
2024-05-17 12:53:59 -05:00
}
2024-05-20 07:11:58 -05:00
flags! {
MUTABLE,
REFERENCED,
2024-06-01 13:30:07 -05:00
COMPTIME,
2024-05-17 12:53:59 -05:00
}
2024-06-01 13:30:07 -05:00
pub fn index(i: IdentFlags) -> u16 {
(i & !ALL) as _
2024-05-17 12:53:59 -05:00
}
}
2024-05-19 11:20:42 -05:00
pub fn no_loader(_: &str, _: &str) -> io::Result<FileId> {
Err(io::ErrorKind::NotFound.into())
2024-05-17 12:53:59 -05:00
}
2024-05-19 11:20:42 -05:00
#[derive(Debug)]
2024-05-17 12:53:59 -05:00
pub struct Symbol {
pub name: Ident,
pub flags: IdentFlags,
}
2024-06-01 13:30:07 -05:00
#[derive(Clone, Copy)]
2024-05-17 12:53:59 -05:00
struct ScopeIdent {
2024-05-12 04:52:58 -05:00
ident: Ident,
declared: bool,
2024-05-17 12:53:59 -05:00
flags: IdentFlags,
2024-05-12 04:52:58 -05:00
}
2024-05-09 16:41:59 -05:00
pub struct Parser<'a, 'b> {
2024-06-25 14:41:12 -05:00
path: &'b str,
loader: Loader<'b>,
lexer: Lexer<'b>,
arena: &'b Arena<'a>,
token: Token,
symbols: &'b mut Symbols,
ns_bound: usize,
trailing_sep: bool,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
2024-05-09 16:41:59 -05:00
}
impl<'a, 'b> Parser<'a, 'b> {
2024-05-17 12:53:59 -05:00
pub fn new(arena: &'b Arena<'a>, symbols: &'b mut Symbols, loader: Loader<'b>) -> Self {
2024-05-13 02:38:33 -05:00
let mut lexer = Lexer::new("");
Self {
2024-05-17 12:53:59 -05:00
loader,
token: lexer.next(),
lexer,
2024-05-13 02:38:33 -05:00
path: "",
arena,
2024-05-17 12:53:59 -05:00
symbols,
2024-05-20 07:11:58 -05:00
ns_bound: 0,
2024-06-25 14:41:12 -05:00
trailing_sep: false,
2024-06-01 13:30:07 -05:00
idents: Vec::new(),
2024-05-20 07:11:58 -05:00
captured: Vec::new(),
}
2024-05-09 16:41:59 -05:00
}
2024-05-17 12:53:59 -05:00
pub fn file(&mut self, input: &'b str, path: &'b str) -> &'a [Expr<'a>] {
2024-05-13 02:38:33 -05:00
self.path = path;
self.lexer = Lexer::new(input);
self.token = self.lexer.next();
let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, Self::expr);
2024-05-17 12:53:59 -05:00
2024-05-12 04:52:58 -05:00
self.pop_scope(0);
let has_undeclared = !self.idents.is_empty();
for id in self.idents.drain(..) {
let (line, col) = self.lexer.line_col(ident::pos(id.ident));
eprintln!(
"{}:{}:{} => undeclared identifier: {}",
2024-05-12 17:02:32 -05:00
self.path,
2024-05-12 04:52:58 -05:00
line,
col,
self.lexer.slice(ident::range(id.ident))
);
}
if has_undeclared {
2024-05-17 12:53:59 -05:00
// TODO: we need error recovery
2024-05-12 04:52:58 -05:00
unreachable!();
}
f
2024-05-09 16:41:59 -05:00
}
fn next(&mut self) -> Token {
std::mem::replace(&mut self.token, self.lexer.next())
2024-05-09 16:41:59 -05:00
}
fn ptr_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.expr())
2024-05-09 16:41:59 -05:00
}
fn expr(&mut self) -> Expr<'a> {
2024-05-10 15:54:12 -05:00
let left = self.unit_expr();
self.bin_expr(left, 0)
}
2024-05-15 03:37:39 -05:00
fn bin_expr(&mut self, mut fold: Expr<'a>, min_prec: u8) -> Expr<'a> {
2024-05-10 15:54:12 -05:00
loop {
let Some(prec) = self.token.kind.precedence() else {
break;
};
2024-05-13 06:36:29 -05:00
if prec <= min_prec {
2024-05-10 15:54:12 -05:00
break;
}
2024-06-24 10:26:00 -05:00
let checkpoint = self.token.start;
2024-05-10 15:54:12 -05:00
let op = self.next().kind;
2024-06-01 13:30:07 -05:00
2024-06-24 10:26:00 -05:00
let op_ass = op.assign_op().map(|op| {
// this abomination reparses the left side, so that the desubaring adheres to the
// parser invariants.
let source = self.lexer.slice(0..checkpoint as usize);
let prev_lexer =
std::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos()));
let prev_token = std::mem::replace(&mut self.token, self.lexer.next());
let clone = self.expr();
self.lexer = prev_lexer;
self.token = prev_token;
(op, clone)
});
2024-05-10 15:54:12 -05:00
let right = self.unit_expr();
let right = self.bin_expr(right, prec);
2024-06-01 13:30:07 -05:00
let right = self.arena.alloc(right);
let left = self.arena.alloc(fold);
2024-05-15 03:37:39 -05:00
2024-06-24 10:26:00 -05:00
if let Some((op, clone)) = op_ass {
2024-05-20 07:11:58 -05:00
self.flag_idents(*left, idfl::MUTABLE);
2024-06-24 10:26:00 -05:00
let right = Expr::BinOp {
left: self.arena.alloc(clone),
op,
right,
};
2024-05-15 03:37:39 -05:00
fold = Expr::BinOp {
left,
op: TokenKind::Assign,
right: self.arena.alloc(right),
};
} else {
fold = Expr::BinOp { left, right, op };
if op == TokenKind::Assign {
2024-05-20 07:11:58 -05:00
self.flag_idents(*left, idfl::MUTABLE);
}
2024-05-15 03:37:39 -05:00
}
2024-05-10 15:54:12 -05:00
}
2024-05-15 03:37:39 -05:00
fold
2024-05-10 15:54:12 -05:00
}
2024-06-01 13:30:07 -05:00
fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, u16) {
2024-06-24 10:26:00 -05:00
let is_ct = token.kind == TokenKind::CtIdent;
2024-05-12 04:52:58 -05:00
let name = self.lexer.slice(token.range());
2024-06-23 02:09:33 -05:00
if let Some(builtin) = codegen::ty::from_str(name) {
2024-05-17 12:53:59 -05:00
return (builtin, 0);
2024-05-12 04:52:58 -05:00
}
2024-05-20 07:11:58 -05:00
let (i, id) = match self
2024-05-12 04:52:58 -05:00
.idents
.iter_mut()
2024-05-20 07:11:58 -05:00
.enumerate()
.rfind(|(_, elem)| self.lexer.slice(ident::range(elem.ident)) == name)
2024-05-12 04:52:58 -05:00
{
2024-05-20 07:11:58 -05:00
Some((_, elem)) if decl && elem.declared => {
2024-05-12 04:52:58 -05:00
self.report(format_args!("redeclaration of identifier: {name}"))
}
2024-05-20 07:11:58 -05:00
Some((i, elem)) => {
2024-05-17 12:53:59 -05:00
elem.flags += 1;
2024-05-20 07:11:58 -05:00
(i, elem)
}
2024-05-12 04:52:58 -05:00
None => {
let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent {
2024-05-17 12:53:59 -05:00
ident: id,
2024-05-12 04:52:58 -05:00
declared: false,
2024-05-17 12:53:59 -05:00
flags: 0,
2024-05-12 04:52:58 -05:00
});
2024-05-20 07:11:58 -05:00
(self.idents.len() - 1, self.idents.last_mut().unwrap())
2024-05-12 04:52:58 -05:00
}
};
id.declared |= decl;
2024-06-01 13:30:07 -05:00
id.flags |= idfl::COMPTIME * is_ct as u32;
if id.declared && self.ns_bound > i {
id.flags |= idfl::COMPTIME;
2024-05-20 07:11:58 -05:00
self.captured.push(id.ident);
}
2024-05-12 04:52:58 -05:00
2024-05-20 07:11:58 -05:00
(id.ident, idfl::index(id.flags))
2024-05-17 12:53:59 -05:00
}
fn move_str(&mut self, range: Token) -> &'a str {
self.arena.alloc_str(self.lexer.slice(range.range()))
2024-05-12 04:52:58 -05:00
}
2024-05-10 15:54:12 -05:00
fn unit_expr(&mut self) -> Expr<'a> {
2024-05-11 15:22:08 -05:00
use {Expr as E, TokenKind as T};
2024-05-12 04:52:58 -05:00
let frame = self.idents.len();
2024-05-09 16:41:59 -05:00
let token = self.next();
2024-05-20 07:11:58 -05:00
let prev_boundary = self.ns_bound;
let prev_captured = self.captured.len();
2024-05-11 09:04:13 -05:00
let mut expr = match token.kind {
2024-06-25 11:39:59 -05:00
T::Directive if self.lexer.slice(token.range()) == "use" => {
2024-05-19 11:20:42 -05:00
self.expect_advance(TokenKind::LParen);
let str = self.expect_advance(TokenKind::String);
self.expect_advance(TokenKind::RParen);
let path = self.lexer.slice(str.range()).trim_matches('"');
E::Mod {
pos: token.start,
path: self.arena.alloc_str(path),
id: match (self.loader)(path, self.path) {
Ok(id) => id,
Err(e) => self.report(format_args!("error loading dependency: {e:#?}")),
},
}
}
2024-06-25 11:39:59 -05:00
T::Directive => E::Directive {
2024-05-14 16:07:32 -05:00
pos: token.start,
2024-05-17 12:53:59 -05:00
name: self.move_str(token),
2024-05-14 16:07:32 -05:00
args: {
self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, Self::expr)
},
},
2024-05-12 13:10:50 -05:00
T::True => E::Bool {
pos: token.start,
value: true,
},
2024-05-12 05:16:40 -05:00
T::Struct => E::Struct {
2024-05-20 07:11:58 -05:00
fields: {
self.ns_bound = self.idents.len();
2024-05-12 05:16:40 -05:00
self.expect_advance(T::LBrace);
self.collect_list(T::Comma, T::RBrace, |s| {
let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon);
2024-06-01 13:30:07 -05:00
(s.move_str(name), s.expr())
2024-05-12 05:16:40 -05:00
})
},
2024-05-20 07:11:58 -05:00
captured: {
self.ns_bound = prev_boundary;
self.captured[prev_captured..].sort_unstable();
let preserved = self.captured[prev_captured..].partition_dedup().0.len();
self.captured.truncate(prev_captured + preserved);
self.arena.alloc_slice(&self.captured[prev_captured..])
},
pos: {
if self.ns_bound == 0 {
// we might save some memory
self.captured.clear();
}
token.start
},
2024-05-12 05:16:40 -05:00
},
2024-06-01 13:30:07 -05:00
T::Ident | T::CtIdent => {
2024-05-17 12:53:59 -05:00
let (id, index) = self.resolve_ident(token, self.token.kind == T::Decl);
let name = self.move_str(token);
2024-06-24 10:26:00 -05:00
E::Ident {
pos: token.start,
name,
id,
index,
}
2024-05-12 04:52:58 -05:00
}
2024-05-11 15:22:08 -05:00
T::If => E::If {
pos: token.start,
cond: self.ptr_expr(),
then: self.ptr_expr(),
else_: self.advance_if(T::Else).then(|| self.ptr_expr()),
},
T::Loop => E::Loop {
pos: token.start,
2024-05-11 11:16:27 -05:00
body: self.ptr_expr(),
},
2024-05-11 15:22:08 -05:00
T::Break => E::Break { pos: token.start },
T::Continue => E::Continue { pos: token.start },
T::Return => E::Return {
pos: token.start,
val: (self.token.kind != T::Semi).then(|| self.ptr_expr()),
2024-05-09 16:41:59 -05:00
},
2024-05-11 15:22:08 -05:00
T::Fn => E::Closure {
pos: token.start,
args: {
self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, |s| {
2024-06-01 13:30:07 -05:00
let name = s.advance_ident();
2024-05-17 12:53:59 -05:00
let (id, index) = s.resolve_ident(name, true);
2024-05-11 15:22:08 -05:00
s.expect_advance(T::Colon);
2024-05-12 04:52:58 -05:00
Arg {
2024-05-17 12:53:59 -05:00
name: s.move_str(name),
2024-05-12 04:52:58 -05:00
id,
2024-05-17 12:53:59 -05:00
index,
2024-05-12 04:52:58 -05:00
ty: s.expr(),
}
2024-05-11 09:04:13 -05:00
})
2024-05-11 15:22:08 -05:00
},
ret: {
self.expect_advance(T::Colon);
self.ptr_expr()
},
body: self.ptr_expr(),
},
2024-05-20 07:11:58 -05:00
T::Band | T::Mul | T::Xor => E::UnOp {
2024-05-12 04:52:58 -05:00
pos: token.start,
op: token.kind,
2024-05-17 12:53:59 -05:00
val: {
2024-06-01 13:30:07 -05:00
let expr = if token.kind == T::Xor {
let expr = self.expr();
self.arena.alloc(expr)
} else {
self.ptr_unit_expr()
};
2024-05-19 11:20:42 -05:00
if token.kind == T::Band {
2024-05-20 07:11:58 -05:00
self.flag_idents(*expr, idfl::REFERENCED);
2024-05-19 11:20:42 -05:00
}
2024-05-17 12:53:59 -05:00
expr
},
2024-05-12 04:52:58 -05:00
},
2024-05-11 15:22:08 -05:00
T::LBrace => E::Block {
pos: token.start,
stmts: self.collect_list(T::Semi, T::RBrace, Self::expr),
2024-05-09 16:41:59 -05:00
},
2024-05-11 15:22:08 -05:00
T::Number => E::Number {
pos: token.start,
2024-05-12 04:52:58 -05:00
value: match self.lexer.slice(token.range()).parse() {
2024-05-09 16:41:59 -05:00
Ok(value) => value,
Err(e) => self.report(format_args!("invalid number: {e}")),
},
},
2024-05-11 15:22:08 -05:00
T::LParen => {
2024-05-10 15:54:12 -05:00
let expr = self.expr();
2024-05-11 15:22:08 -05:00
self.expect_advance(T::RParen);
2024-05-10 15:54:12 -05:00
expr
}
2024-06-25 12:55:25 -05:00
T::Comment => Expr::Comment {
pos: token.start,
literal: self.move_str(token),
},
tok => self.report(format_args!("unexpected token: {tok:?}")),
2024-05-09 16:41:59 -05:00
};
2024-05-11 09:04:13 -05:00
loop {
2024-05-14 05:17:39 -05:00
let token = self.token;
if matches!(token.kind, T::LParen | T::Ctor | T::Dot | T::Tupl) {
2024-05-12 05:16:40 -05:00
self.next();
}
2024-05-14 05:17:39 -05:00
expr = match token.kind {
2024-05-12 05:16:40 -05:00
T::LParen => Expr::Call {
func: self.arena.alloc(expr),
2024-05-17 12:53:59 -05:00
args: self.collect_list(T::Comma, T::RParen, Self::expr),
2024-06-25 14:41:12 -05:00
trailing_comma: std::mem::take(&mut self.trailing_sep),
2024-05-12 05:16:40 -05:00
},
T::Ctor => E::Ctor {
2024-06-25 14:51:41 -05:00
pos: token.start,
ty: Some(self.arena.alloc(expr)),
2024-05-12 05:16:40 -05:00
fields: self.collect_list(T::Comma, T::RBrace, |s| {
let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon);
let val = s.expr();
2024-05-17 12:53:59 -05:00
(Some(s.move_str(name)), val)
2024-05-12 05:16:40 -05:00
}),
2024-06-25 14:51:41 -05:00
trailing_comma: std::mem::take(&mut self.trailing_sep),
2024-05-12 05:16:40 -05:00
},
2024-05-14 05:17:39 -05:00
T::Tupl => E::Ctor {
2024-06-25 14:51:41 -05:00
pos: token.start,
ty: Some(self.arena.alloc(expr)),
2024-05-14 05:17:39 -05:00
fields: self.collect_list(T::Comma, T::RParen, |s| (None, s.expr())),
2024-06-25 14:51:41 -05:00
trailing_comma: std::mem::take(&mut self.trailing_sep),
2024-05-14 05:17:39 -05:00
},
2024-05-12 05:16:40 -05:00
T::Dot => E::Field {
2024-05-12 06:13:36 -05:00
target: self.arena.alloc(expr),
field: {
2024-05-12 05:16:40 -05:00
let token = self.expect_advance(T::Ident);
2024-05-17 12:53:59 -05:00
self.move_str(token)
2024-05-12 05:16:40 -05:00
},
},
2024-05-11 09:04:13 -05:00
_ => break,
}
}
2024-05-12 04:52:58 -05:00
if matches!(token.kind, T::Return) {
self.expect_advance(T::Semi);
}
if matches!(token.kind, T::Loop | T::LBrace | T::Fn) {
self.pop_scope(frame);
}
2024-05-09 16:41:59 -05:00
expr
}
2024-06-01 13:30:07 -05:00
fn advance_ident(&mut self) -> Token {
if matches!(self.token.kind, TokenKind::Ident | TokenKind::CtIdent) {
self.next()
} else {
self.report(format_args!(
"expected identifier, found {:?}",
self.token.kind
))
}
}
2024-05-12 04:52:58 -05:00
fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame;
for i in frame..self.idents.len() {
2024-06-01 13:30:07 -05:00
if !&self.idents[i].declared {
2024-05-12 04:52:58 -05:00
self.idents.swap(i, undeclared_count);
undeclared_count += 1;
}
}
2024-05-17 12:53:59 -05:00
self.idents
.drain(undeclared_count..)
.map(|ident| Symbol {
name: ident.ident,
flags: ident.flags,
})
.collect_into(self.symbols);
2024-05-12 04:52:58 -05:00
}
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.unit_expr())
}
2024-05-11 15:22:08 -05:00
fn collect_list<T: Copy>(
&mut self,
delim: TokenKind,
end: TokenKind,
mut f: impl FnMut(&mut Self) -> T,
) -> &'a [T] {
self.collect(|s| {
s.advance_if(end).not().then(|| {
let val = f(s);
2024-06-25 14:41:12 -05:00
s.trailing_sep = s.advance_if(delim);
2024-05-11 15:22:08 -05:00
val
})
})
}
fn collect<T: Copy>(&mut self, mut f: impl FnMut(&mut Self) -> Option<T>) -> &'a [T] {
2024-05-11 09:04:13 -05:00
let vec = std::iter::from_fn(|| f(self)).collect::<Vec<_>>();
self.arena.alloc_slice(&vec)
}
2024-05-09 16:41:59 -05:00
fn advance_if(&mut self, kind: TokenKind) -> bool {
if self.token.kind == kind {
self.next();
true
} else {
false
}
}
2024-05-11 09:04:13 -05:00
fn expect_advance(&mut self, kind: TokenKind) -> Token {
2024-05-09 16:41:59 -05:00
if self.token.kind != kind {
self.report(format_args!(
"expected {:?}, found {:?}",
kind, self.token.kind
));
}
2024-05-11 09:04:13 -05:00
self.next()
2024-05-09 16:41:59 -05:00
}
2024-06-01 13:30:07 -05:00
#[track_caller]
2024-05-09 16:41:59 -05:00
fn report(&self, msg: impl std::fmt::Display) -> ! {
2024-06-01 13:30:07 -05:00
self.report_pos(self.token.start, msg)
}
#[track_caller]
fn report_pos(&self, pos: Pos, msg: impl std::fmt::Display) -> ! {
let (line, col) = self.lexer.line_col(pos);
2024-05-12 17:02:32 -05:00
eprintln!("{}:{}:{} => {}", self.path, line, col, msg);
2024-05-09 16:41:59 -05:00
unreachable!();
}
2024-05-17 12:53:59 -05:00
fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) {
match e {
Expr::Ident { id, .. } => find_ident(&mut self.idents, id).flags |= flags,
Expr::Field { target, .. } => self.flag_idents(*target, flags),
_ => {}
}
}
2024-05-09 16:41:59 -05:00
}
2024-05-17 12:53:59 -05:00
fn find_ident(idents: &mut [ScopeIdent], id: Ident) -> &mut ScopeIdent {
idents
.binary_search_by_key(&id, |si| si.ident)
.map(|i| &mut idents[i])
.unwrap()
}
pub fn find_symbol(symbols: &[Symbol], id: Ident) -> &Symbol {
symbols
.binary_search_by_key(&id, |s| s.name)
.map(|i| &symbols[i])
.unwrap()
}
2024-05-12 04:52:58 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Arg<'a> {
2024-05-17 12:53:59 -05:00
pub name: &'a str,
pub id: Ident,
2024-06-01 13:30:07 -05:00
pub index: u16,
2024-05-17 12:53:59 -05:00
pub ty: Expr<'a>,
2024-05-12 04:52:58 -05:00
}
2024-06-01 13:30:07 -05:00
macro_rules! generate_expr {
($(#[$meta:meta])* $vis:vis enum $name:ident<$lt:lifetime> {$(
$(#[$field_meta:meta])*
$variant:ident {
$($field:ident: $ty:ty,)*
},
)*}) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
$vis enum $name<$lt> {$(
$variant {
$($field: $ty,)*
},
)*}
impl<$lt> $name<$lt> {
pub fn pos(&self) -> Pos {
#[allow(unused_variables)]
match self {
2024-06-24 10:26:00 -05:00
$(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)*
2024-06-01 13:30:07 -05:00
}
}
pub fn used_bytes(&self) -> usize {
match self {$(
Self::$variant { $($field,)* } => {
2024-06-21 16:07:32 -05:00
#[allow(clippy::size_of_ref)]
2024-06-01 13:30:07 -05:00
let fields = [$(($field as *const _ as usize - self as *const _ as usize, std::mem::size_of_val($field)),)*];
let (last, size) = fields.iter().copied().max().unwrap();
last + size
},
)*}
}
}
};
(@first ($($first:tt)*), $($rest:tt)*) => { $($first)* };
(@last ($($ign:tt)*), $($rest:tt)*) => { $($rest)* };
(@last ($($last:tt)*),) => { $($last)* };
}
// it would be real nice if we could use relative pointers and still pattern match easily
generate_expr! {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Expr<'a> {
2024-06-25 12:55:25 -05:00
Comment {
pos: Pos,
literal: &'a str,
},
2024-06-01 13:30:07 -05:00
Break {
pos: Pos,
},
Continue {
pos: Pos,
},
Closure {
pos: Pos,
args: &'a [Arg<'a>],
ret: &'a Self,
body: &'a Self,
},
Call {
func: &'a Self,
args: &'a [Self],
2024-06-25 14:41:12 -05:00
trailing_comma: bool,
2024-06-01 13:30:07 -05:00
},
Return {
pos: Pos,
val: Option<&'a Self>,
},
Ident {
2024-06-24 10:26:00 -05:00
pos: Pos,
2024-06-01 13:30:07 -05:00
id: Ident,
name: &'a str,
index: u16,
},
Block {
pos: Pos,
stmts: &'a [Self],
},
Number {
pos: Pos,
value: u64,
},
BinOp {
left: &'a Self,
op: TokenKind,
right: &'a Self,
},
If {
pos: Pos,
cond: &'a Self,
then: &'a Self,
else_: Option<&'a Self>,
},
Loop {
pos: Pos,
body: &'a Self,
},
UnOp {
pos: Pos,
op: TokenKind,
val: &'a Self,
},
Struct {
pos: Pos,
fields: &'a [(&'a str, Self)],
captured: &'a [Ident],
},
Ctor {
pos: Pos,
ty: Option<&'a Self>,
fields: &'a [(Option<&'a str>, Self)],
2024-06-25 14:51:41 -05:00
trailing_comma: bool,
2024-06-01 13:30:07 -05:00
},
Field {
target: &'a Self,
field: &'a str,
},
Bool {
pos: Pos,
value: bool,
},
Directive {
pos: u32,
name: &'a str,
args: &'a [Self],
},
Mod {
pos: Pos,
id: FileId,
path: &'a str,
},
}
}
trait Poser {
2024-06-24 10:26:00 -05:00
fn posi(self) -> Pos;
2024-05-09 16:41:59 -05:00
}
2024-06-01 13:30:07 -05:00
impl Poser for Pos {
2024-06-24 10:26:00 -05:00
fn posi(self) -> Pos {
self
2024-05-12 16:19:45 -05:00
}
}
2024-06-01 13:30:07 -05:00
impl<'a> Poser for &Expr<'a> {
2024-06-24 10:26:00 -05:00
fn posi(self) -> Pos {
2024-06-01 13:30:07 -05:00
self.pos()
}
}
2024-06-25 14:41:12 -05:00
thread_local! {
static FMT_SOURCE: Cell<*const str> = const { Cell::new("") };
}
pub fn with_fmt_source<T>(source: &str, f: impl FnOnce() -> T) -> T {
FMT_SOURCE.with(|s| s.set(source));
let r = f();
FMT_SOURCE.with(|s| s.set(""));
r
}
impl<'a> std::fmt::Display for Expr<'a> {
2024-05-09 16:41:59 -05:00
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
thread_local! {
2024-05-19 11:20:42 -05:00
static INDENT: Cell<usize> = const { Cell::new(0) };
2024-05-09 16:41:59 -05:00
}
2024-05-19 11:20:42 -05:00
fn fmt_list<T>(
2024-05-14 16:07:32 -05:00
f: &mut std::fmt::Formatter,
end: &str,
2024-05-19 11:20:42 -05:00
list: &[T],
2024-05-14 16:07:32 -05:00
fmt: impl Fn(&T, &mut std::fmt::Formatter) -> std::fmt::Result,
) -> std::fmt::Result {
let first = &mut true;
for expr in list {
if !std::mem::take(first) {
write!(f, ", ")?;
}
fmt(expr, f)?;
}
write!(f, "{end}")
}
2024-06-25 14:41:12 -05:00
fn fmt_trailing_list<T>(
f: &mut std::fmt::Formatter,
end: &str,
list: &[T],
fmt: impl Fn(&T, &mut std::fmt::Formatter) -> std::fmt::Result,
) -> std::fmt::Result {
writeln!(f)?;
INDENT.with(|i| i.set(i.get() + 1));
let res = (|| {
for stmt in list {
for _ in 0..INDENT.with(|i| i.get()) {
write!(f, "\t")?;
}
fmt(stmt, f)?;
writeln!(f, ",")?;
}
Ok(())
})();
INDENT.with(|i| i.set(i.get() - 1));
for _ in 0..INDENT.with(|i| i.get()) {
write!(f, "\t")?;
}
write!(f, "{end}")?;
res
}
macro_rules! impl_parenter {
($($name:ident => $pat:pat,)*) => {
$(
struct $name<'a>(&'a Expr<'a>);
impl<'a> std::fmt::Display for $name<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if matches!(self.0, $pat) {
write!(f, "({})", self.0)
} else {
write!(f, "{}", self.0)
}
}
}
)*
};
}
impl_parenter! {
Unary => Expr::BinOp { .. },
Postfix => Expr::UnOp { .. } | Expr::BinOp { .. },
Consecutive => Expr::UnOp { .. },
}
2024-06-25 14:41:12 -05:00
{
let source = unsafe { &*FMT_SOURCE.with(|s| s.get()) };
let pos = self.pos();
if let Some(before) = source.get(..pos as usize) {
let trailing_whitespace = &before[before.trim_end().len()..];
let ncount = trailing_whitespace.chars().filter(|&c| c == '\n').count();
if ncount > 1 {
writeln!(f)?;
}
}
}
2024-05-10 15:54:12 -05:00
match *self {
2024-06-25 14:41:12 -05:00
Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()),
2024-05-19 11:20:42 -05:00
Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"),
Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)),
2024-05-14 16:07:32 -05:00
Self::Directive { name, args, .. } => {
write!(f, "@{name}(")?;
fmt_list(f, ")", args, std::fmt::Display::fmt)
}
2024-05-12 05:16:40 -05:00
Self::Struct { fields, .. } => {
write!(f, "struct {{")?;
2024-05-14 16:07:32 -05:00
fmt_list(f, "}", fields, |(name, val), f| write!(f, "{name}: {val}",))
2024-05-12 05:16:40 -05:00
}
2024-06-25 14:51:41 -05:00
Self::Ctor {
ty,
fields,
trailing_comma,
..
} => {
2024-05-14 05:17:39 -05:00
let (left, rith) = if fields.iter().any(|(name, _)| name.is_some()) {
2024-06-25 14:51:41 -05:00
('{', "}")
2024-05-14 05:17:39 -05:00
} else {
2024-06-25 14:51:41 -05:00
('(', ")")
2024-05-14 05:17:39 -05:00
};
if let Some(ty) = ty {
write!(f, "{}", Unary(ty))?;
2024-05-14 05:17:39 -05:00
}
2024-05-14 16:07:32 -05:00
write!(f, ".{left}")?;
2024-06-25 14:51:41 -05:00
let fmt_field = |(name, val): &_, f: &mut std::fmt::Formatter| {
2024-05-14 05:17:39 -05:00
if let Some(name) = name {
write!(f, "{name}: ")?;
}
2024-06-25 14:51:41 -05:00
write!(f, "{val}")
};
if trailing_comma {
fmt_trailing_list(f, rith, fields, fmt_field)
} else {
fmt_list(f, rith, fields, fmt_field)
2024-05-12 05:16:40 -05:00
}
}
Self::UnOp { op, val, .. } => write!(f, "{op}{}", Unary(val)),
2024-05-11 15:22:08 -05:00
Self::Break { .. } => write!(f, "break;"),
Self::Continue { .. } => write!(f, "continue;"),
Self::If {
cond, then, else_, ..
} => {
write!(f, "if {cond} {}", Consecutive(then))?;
2024-05-11 11:16:27 -05:00
if let Some(else_) = else_ {
2024-05-14 16:07:32 -05:00
write!(f, " else {else_}")?;
2024-05-11 11:16:27 -05:00
}
Ok(())
}
2024-05-14 16:07:32 -05:00
Self::Loop { body, .. } => write!(f, "loop {body}"),
2024-05-11 15:22:08 -05:00
Self::Closure {
ret, body, args, ..
} => {
2024-05-14 16:07:32 -05:00
write!(f, "fn(")?;
fmt_list(f, "", args, |arg, f| write!(f, "{}: {}", arg.name, arg.ty))?;
write!(f, "): {ret} {body}")
2024-05-11 09:04:13 -05:00
}
2024-06-25 14:41:12 -05:00
Self::Call {
func,
args,
trailing_comma,
} => {
write!(f, "{}(", Postfix(func))?;
2024-06-25 14:41:12 -05:00
if trailing_comma {
fmt_trailing_list(f, ")", args, std::fmt::Display::fmt)
} else {
fmt_list(f, ")", args, std::fmt::Display::fmt)
}
2024-05-11 09:04:13 -05:00
}
2024-05-14 16:07:32 -05:00
Self::Return { val: Some(val), .. } => write!(f, "return {val};"),
2024-05-11 15:22:08 -05:00
Self::Return { val: None, .. } => write!(f, "return;"),
2024-05-14 16:07:32 -05:00
Self::Ident { name, .. } => write!(f, "{name}"),
2024-05-11 15:22:08 -05:00
Self::Block { stmts, .. } => {
2024-06-25 14:41:12 -05:00
write!(f, "{{")?;
fmt_trailing_list(f, "}", stmts, std::fmt::Display::fmt)
2024-05-09 16:41:59 -05:00
}
2024-05-14 16:07:32 -05:00
Self::Number { value, .. } => write!(f, "{value}"),
Self::Bool { value, .. } => write!(f, "{value}"),
2024-05-10 15:54:12 -05:00
Self::BinOp { left, right, op } => {
let display_branch = |f: &mut std::fmt::Formatter, expr: &Self| {
if let Self::BinOp { op: lop, .. } = expr
&& op.precedence() > lop.precedence()
{
2024-05-14 16:07:32 -05:00
write!(f, "({expr})")
2024-05-10 15:54:12 -05:00
} else {
2024-05-14 16:07:32 -05:00
write!(f, "{expr}")
2024-05-10 15:54:12 -05:00
}
};
display_branch(f, left)?;
2024-05-14 16:07:32 -05:00
write!(f, " {op} ")?;
2024-05-10 15:54:12 -05:00
display_branch(f, right)
}
2024-05-09 16:41:59 -05:00
}
}
}
2024-05-17 12:53:59 -05:00
#[repr(C)]
2024-05-19 11:20:42 -05:00
pub struct AstInner<T: ?Sized> {
2024-05-17 12:53:59 -05:00
ref_count: AtomicUsize,
mem: ArenaChunk,
exprs: *const [Expr<'static>],
2024-05-19 11:20:42 -05:00
pub path: Box<str>,
pub nlines: LineMap,
pub symbols: T,
2024-05-17 12:53:59 -05:00
}
impl AstInner<[Symbol]> {
fn layout(syms: usize) -> std::alloc::Layout {
std::alloc::Layout::new::<AstInner<()>>()
.extend(std::alloc::Layout::array::<Symbol>(syms).unwrap())
.unwrap()
.0
}
fn new(content: &str, path: &str, loader: Loader) -> NonNull<Self> {
let arena = Arena::default();
let mut syms = Vec::new();
let mut parser = Parser::new(&arena, &mut syms, loader);
let exprs = parser.file(content, path) as *const [Expr<'static>];
syms.sort_unstable_by_key(|s| s.name);
let layout = Self::layout(syms.len());
unsafe {
2024-05-19 11:20:42 -05:00
let ptr = std::alloc::alloc(layout);
let inner: *mut Self = std::ptr::from_raw_parts_mut(ptr as *mut _, syms.len());
std::ptr::write(
inner as *mut AstInner<()>,
AstInner {
ref_count: AtomicUsize::new(1),
mem: arena.chunk.into_inner(),
exprs,
path: path.into(),
nlines: LineMap::new(content),
symbols: (),
},
);
2024-05-17 12:53:59 -05:00
std::ptr::addr_of_mut!((*inner).symbols)
.as_mut_ptr()
.copy_from_nonoverlapping(syms.as_ptr(), syms.len());
2024-05-19 11:20:42 -05:00
2024-05-17 12:53:59 -05:00
NonNull::new_unchecked(inner)
}
}
}
#[derive(PartialEq, Eq, Hash)]
pub struct Ast(NonNull<AstInner<[Symbol]>>);
impl Ast {
pub fn new(path: &str, content: &str, loader: Loader) -> Self {
Self(AstInner::new(content, path, loader))
}
pub fn exprs(&self) -> &[Expr] {
unsafe { &*self.inner().exprs }
}
2024-05-19 11:20:42 -05:00
fn inner(&self) -> &AstInner<[Symbol]> {
unsafe { self.0.as_ref() }
2024-05-17 12:53:59 -05:00
}
2024-05-20 07:11:58 -05:00
pub fn find_decl(&self, id: Result<Ident, &str>) -> Option<(&Expr, Ident)> {
2024-05-19 11:20:42 -05:00
self.exprs().iter().find_map(|expr| match expr {
Expr::BinOp {
left: &Expr::Ident { id: iden, name, .. },
op: TokenKind::Decl,
..
2024-05-20 07:11:58 -05:00
} if Ok(iden) == id || Err(name) == id => Some((expr, iden)),
2024-05-19 11:20:42 -05:00
_ => None,
})
2024-05-17 12:53:59 -05:00
}
2024-05-19 11:20:42 -05:00
}
2024-05-17 12:53:59 -05:00
2024-06-01 13:30:07 -05:00
impl std::fmt::Display for Ast {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for expr in self.exprs() {
writeln!(f, "{expr}\n")?;
}
Ok(())
}
}
2024-05-19 11:20:42 -05:00
impl Default for Ast {
fn default() -> Self {
Self(AstInner::new("", "", &no_loader))
}
}
#[derive(Clone, Copy)]
#[repr(packed)]
pub struct ExprRef(NonNull<Expr<'static>>);
impl ExprRef {
pub fn new(expr: &Expr) -> Self {
Self(NonNull::from(expr).cast())
}
pub fn get<'a>(&self, from: &'a Ast) -> Option<&'a Expr<'a>> {
2024-06-21 16:07:32 -05:00
ArenaChunk::contains(from.mem.base, self.0.as_ptr() as _).then_some(())?;
2024-05-19 11:20:42 -05:00
// SAFETY: the pointer is or was a valid reference in the past, if it points within one of
// arenas regions, it muts be walid, since arena does not give invalid pointers to its
// allocations
2024-06-21 16:07:32 -05:00
Some(unsafe { { self.0 }.as_ref() })
2024-05-17 12:53:59 -05:00
}
}
unsafe impl Send for Ast {}
unsafe impl Sync for Ast {}
impl Clone for Ast {
fn clone(&self) -> Self {
unsafe { self.0.as_ref() }
.ref_count
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Self(self.0)
}
}
impl Drop for Ast {
fn drop(&mut self) {
let inner = unsafe { self.0.as_ref() };
if inner
.ref_count
.fetch_sub(1, std::sync::atomic::Ordering::Relaxed)
== 1
{
unsafe { std::ptr::drop_in_place(self.0.as_ptr()) };
let layout = AstInner::layout(inner.symbols.len());
unsafe {
std::alloc::dealloc(self.0.as_ptr() as _, layout);
}
}
}
}
2024-05-19 11:20:42 -05:00
impl Deref for Ast {
type Target = AstInner<[Symbol]>;
fn deref(&self) -> &Self::Target {
self.inner()
}
}
#[derive(Default)]
pub struct Arena<'a> {
2024-05-17 12:53:59 -05:00
chunk: UnsafeCell<ArenaChunk>,
ph: std::marker::PhantomData<&'a ()>,
}
impl<'a> Arena<'a> {
pub fn alloc_str(&self, token: &str) -> &'a str {
let ptr = self.alloc_slice(token.as_bytes());
2024-06-01 13:30:07 -05:00
unsafe { std::str::from_utf8_unchecked(ptr) }
}
2024-06-01 13:30:07 -05:00
pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> {
let align = std::mem::align_of::<Expr<'a>>();
let size = expr.used_bytes();
let layout = unsafe { std::alloc::Layout::from_size_align_unchecked(size, align) };
let ptr = self.alloc_low(layout);
2024-06-01 13:30:07 -05:00
unsafe {
ptr.cast::<u64>()
.copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8)
};
unsafe { ptr.cast::<Expr<'a>>().as_ref() }
}
2024-06-01 13:30:07 -05:00
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &'a [T] {
2024-05-19 11:20:42 -05:00
if slice.is_empty() || std::mem::size_of::<T>() == 0 {
return &mut [];
}
let layout = std::alloc::Layout::array::<T>(slice.len()).unwrap();
let ptr = self.alloc_low(layout);
unsafe {
ptr.as_ptr()
.cast::<T>()
.copy_from_nonoverlapping(slice.as_ptr(), slice.len())
};
2024-06-01 13:30:07 -05:00
unsafe { std::slice::from_raw_parts(ptr.as_ptr() as _, slice.len()) }
}
fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull<u8> {
assert!(layout.align() <= ArenaChunk::ALIGN);
assert!(layout.size() <= ArenaChunk::CHUNK_SIZE);
2024-05-17 12:53:59 -05:00
let chunk = unsafe { &mut *self.chunk.get() };
2024-05-17 12:53:59 -05:00
if let Some(ptr) = chunk.alloc(layout) {
return ptr;
}
2024-05-19 11:20:42 -05:00
unsafe {
std::ptr::write(chunk, ArenaChunk::new(chunk.base));
}
2024-05-17 12:53:59 -05:00
chunk.alloc(layout).unwrap()
}
}
struct ArenaChunk {
base: *mut u8,
end: *mut u8,
}
impl Default for ArenaChunk {
fn default() -> Self {
Self {
base: std::ptr::null_mut(),
end: std::ptr::null_mut(),
}
}
}
impl ArenaChunk {
const CHUNK_SIZE: usize = 1 << 16;
const ALIGN: usize = std::mem::align_of::<Self>();
const NEXT_OFFSET: usize = Self::CHUNK_SIZE - std::mem::size_of::<*mut u8>();
const LAYOUT: std::alloc::Layout =
unsafe { std::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) };
fn new(next: *mut u8) -> Self {
let base = unsafe { std::alloc::alloc(Self::LAYOUT) };
2024-05-19 11:20:42 -05:00
let end = unsafe { base.add(Self::NEXT_OFFSET) };
Self::set_next(base, next);
Self { base, end }
}
fn set_next(curr: *mut u8, next: *mut u8) {
unsafe { std::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) };
}
fn next(curr: *mut u8) -> *mut u8 {
unsafe { std::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) }
}
fn alloc(&mut self, layout: std::alloc::Layout) -> Option<NonNull<u8>> {
let padding = self.end as usize - (self.end as usize & !(layout.align() - 1));
let size = layout.size() + padding;
if size > self.end as usize - self.base as usize {
return None;
}
unsafe { self.end = self.end.sub(size) };
unsafe { Some(NonNull::new_unchecked(self.end)) }
}
2024-05-19 11:20:42 -05:00
fn contains(base: *mut u8, arg: *mut u8) -> bool {
!base.is_null()
&& ((unsafe { base.add(Self::CHUNK_SIZE) } > arg && base <= arg)
|| Self::contains(Self::next(base), arg))
}
}
2024-05-17 12:53:59 -05:00
impl Drop for ArenaChunk {
fn drop(&mut self) {
2024-06-01 13:30:07 -05:00
log::inf!(
"dropping chunk of size: {}",
(Self::LAYOUT.size() - (self.end as usize - self.base as usize))
* !self.end.is_null() as usize
);
2024-05-17 12:53:59 -05:00
let mut current = self.base;
while !current.is_null() {
let next = Self::next(current);
unsafe { std::alloc::dealloc(current, Self::LAYOUT) };
current = next;
2024-06-01 13:30:07 -05:00
log::dbg!("deallocating full chunk");
2024-05-17 12:53:59 -05:00
}
}
}
2024-06-25 14:41:12 -05:00
#[cfg(test)]
mod test {
fn format(ident: &str, input: &str) {
let ast = super::Ast::new(ident, input, &super::no_loader);
let mut output = String::new();
super::with_fmt_source(input, || {
for expr in ast.exprs() {
use std::fmt::Write;
writeln!(output, "{expr}").unwrap();
}
});
let input_path = format!("formatter_{ident}.expected");
let output_path = format!("formatter_{ident}.actual");
std::fs::write(&input_path, input).unwrap();
std::fs::write(&output_path, output).unwrap();
let success = std::process::Command::new("diff")
.arg("-u")
.arg("--color")
.arg(&input_path)
.arg(&output_path)
.status()
.unwrap()
.success();
std::fs::remove_file(&input_path).unwrap();
std::fs::remove_file(&output_path).unwrap();
assert!(success, "test failed");
}
macro_rules! test {
($($name:ident => $input:expr;)*) => {$(
#[test]
fn $name() {
format(stringify!($name), $input);
}
)*};
}
test! {
comments => "// comment\n// comment\n\n// comment\n\n\
/* comment */\n/* comment */\n\n/* comment */\n";
some_ordinary_code => "loft := fn(): int return loft(1, 2, 3);\n";
some_arg_per_line_code => "loft := fn(): int return loft(\
\n\t1,\n\t2,\n\t3,\n);\n";
2024-06-25 14:51:41 -05:00
some_ordinary_struct => "loft := fn(): int return loft.{a: 1, b: 2};\n";
some_ordinary_fild_per_lin_struct => "loft := fn(): int return loft.{\
\n\ta: 1,\n\tb: 2,\n};\n";
2024-06-25 14:41:12 -05:00
}
}