holey-bytes/hblang/src/parser.rs

859 lines
26 KiB
Rust
Raw Normal View History

2024-05-11 09:04:13 -05:00
use std::{cell::Cell, ops::Not, ptr::NonNull};
2024-05-09 16:41:59 -05:00
2024-05-12 04:52:58 -05:00
use crate::{
codegen::bt,
ident::{self, Ident},
lexer::{Lexer, Token, TokenKind},
};
2024-05-12 16:19:45 -05:00
pub type Pos = u32;
pub type IdentFlags = u32;
pub const MUTABLE: IdentFlags = 1 << std::mem::size_of::<IdentFlags>() * 8 - 1;
pub const REFERENCED: IdentFlags = 1 << std::mem::size_of::<IdentFlags>() * 8 - 2;
pub fn ident_flag_index(flag: IdentFlags) -> u32 {
flag & !(MUTABLE | REFERENCED)
}
2024-05-12 04:52:58 -05:00
struct ScopeIdent<'a> {
ident: Ident,
declared: bool,
last: &'a Cell<IdentFlags>,
2024-05-12 04:52:58 -05:00
}
2024-05-09 16:41:59 -05:00
pub struct Parser<'a, 'b> {
path: &'a str,
lexer: Lexer<'a>,
arena: &'b Arena<'a>,
token: Token,
idents: Vec<ScopeIdent<'a>>,
referening: bool,
2024-05-09 16:41:59 -05:00
}
impl<'a, 'b> Parser<'a, 'b> {
2024-05-13 02:38:33 -05:00
pub fn new(arena: &'b Arena<'a>) -> Self {
let mut lexer = Lexer::new("");
2024-05-09 16:41:59 -05:00
let token = lexer.next();
Self {
lexer,
token,
2024-05-13 02:38:33 -05:00
path: "",
arena,
2024-05-12 04:52:58 -05:00
idents: Vec::new(),
referening: false,
}
2024-05-09 16:41:59 -05:00
}
2024-05-13 02:38:33 -05:00
pub fn file(&mut self, input: &'a str, path: &'a str) -> &'a [Expr<'a>] {
self.path = path;
self.lexer = Lexer::new(input);
self.token = self.lexer.next();
2024-05-12 04:52:58 -05:00
let f = self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr()));
self.pop_scope(0);
let has_undeclared = !self.idents.is_empty();
for id in self.idents.drain(..) {
let (line, col) = self.lexer.line_col(ident::pos(id.ident));
eprintln!(
"{}:{}:{} => undeclared identifier: {}",
2024-05-12 17:02:32 -05:00
self.path,
2024-05-12 04:52:58 -05:00
line,
col,
self.lexer.slice(ident::range(id.ident))
);
}
if has_undeclared {
unreachable!();
}
f
2024-05-09 16:41:59 -05:00
}
fn next(&mut self) -> Token {
std::mem::replace(&mut self.token, self.lexer.next())
2024-05-09 16:41:59 -05:00
}
fn ptr_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.expr())
2024-05-09 16:41:59 -05:00
}
fn expr(&mut self) -> Expr<'a> {
2024-05-10 15:54:12 -05:00
let left = self.unit_expr();
self.bin_expr(left, 0)
}
2024-05-15 03:37:39 -05:00
fn bin_expr(&mut self, mut fold: Expr<'a>, min_prec: u8) -> Expr<'a> {
2024-05-10 15:54:12 -05:00
loop {
let Some(prec) = self.token.kind.precedence() else {
break;
};
2024-05-13 06:36:29 -05:00
if prec <= min_prec {
2024-05-10 15:54:12 -05:00
break;
}
let op = self.next().kind;
let right = self.unit_expr();
let right = self.bin_expr(right, prec);
2024-05-15 03:37:39 -05:00
let right = &*self.arena.alloc(right);
let left = &*self.arena.alloc(fold);
if let Some(op) = op.assign_op() {
fold.mark_mut();
2024-05-15 03:37:39 -05:00
let right = Expr::BinOp { left, op, right };
fold = Expr::BinOp {
left,
op: TokenKind::Assign,
right: self.arena.alloc(right),
};
} else {
fold = Expr::BinOp { left, right, op };
if op == TokenKind::Assign {
fold.mark_mut();
}
2024-05-15 03:37:39 -05:00
}
2024-05-10 15:54:12 -05:00
}
2024-05-15 03:37:39 -05:00
fold
2024-05-10 15:54:12 -05:00
}
2024-05-12 04:52:58 -05:00
fn try_resolve_builtin(name: &str) -> Option<Ident> {
// FIXME: we actually do this the second time in the codegen
Some(match name {
2024-05-13 06:36:29 -05:00
"int" | "i64" => bt::INT,
"i8" => bt::I8,
"i16" => bt::I16,
"i32" => bt::I32,
"u8" => bt::U8,
"u16" => bt::U16,
"uint" | "u32" => bt::U32,
2024-05-12 04:52:58 -05:00
"bool" => bt::BOOL,
2024-05-12 13:10:50 -05:00
"void" => bt::VOID,
"never" => bt::NEVER,
2024-05-12 04:52:58 -05:00
_ => return None,
})
}
fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, Option<&'a Cell<IdentFlags>>) {
2024-05-12 04:52:58 -05:00
let name = self.lexer.slice(token.range());
if let Some(builtin) = Self::try_resolve_builtin(name) {
return (builtin, None);
}
let id = match self
.idents
.iter_mut()
.rfind(|elem| self.lexer.slice(ident::range(elem.ident)) == name)
{
Some(elem) if decl && elem.declared => {
self.report(format_args!("redeclaration of identifier: {name}"))
}
Some(elem) => {
elem.last.set(elem.last.get() + 1);
elem
}
2024-05-12 04:52:58 -05:00
None => {
let last = self.arena.alloc(Cell::new(0));
2024-05-12 04:52:58 -05:00
let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent {
ident: id,
declared: false,
last,
});
self.idents.last_mut().unwrap()
}
};
id.declared |= decl;
id.last
.set(id.last.get() | (REFERENCED * self.referening as u32));
2024-05-12 04:52:58 -05:00
(id.ident, Some(id.last))
2024-05-12 04:52:58 -05:00
}
2024-05-10 15:54:12 -05:00
fn unit_expr(&mut self) -> Expr<'a> {
2024-05-11 15:22:08 -05:00
use {Expr as E, TokenKind as T};
2024-05-12 04:52:58 -05:00
let frame = self.idents.len();
2024-05-09 16:41:59 -05:00
let token = self.next();
2024-05-11 09:04:13 -05:00
let mut expr = match token.kind {
2024-05-14 16:07:32 -05:00
T::Driective => E::Directive {
pos: token.start,
name: self.lexer.slice(token.range()),
args: {
self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, Self::expr)
},
},
2024-05-12 13:10:50 -05:00
T::True => E::Bool {
pos: token.start,
value: true,
},
2024-05-12 05:16:40 -05:00
T::Struct => E::Struct {
pos: token.start,
fields: {
self.expect_advance(T::LBrace);
self.collect_list(T::Comma, T::RBrace, |s| {
let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon);
let ty = s.expr();
(s.lexer.slice(name.range()), ty)
})
},
},
2024-05-12 04:52:58 -05:00
T::Ident => {
let (id, last) = self.resolve_ident(token, self.token.kind == T::Decl);
E::Ident {
name: self.lexer.slice(token.range()),
id,
last,
index: last.map_or(0, |l| ident_flag_index(l.get())),
}
2024-05-12 04:52:58 -05:00
}
2024-05-11 15:22:08 -05:00
T::If => E::If {
pos: token.start,
cond: self.ptr_expr(),
then: self.ptr_expr(),
else_: self.advance_if(T::Else).then(|| self.ptr_expr()),
},
T::Loop => E::Loop {
pos: token.start,
2024-05-11 11:16:27 -05:00
body: self.ptr_expr(),
},
2024-05-11 15:22:08 -05:00
T::Break => E::Break { pos: token.start },
T::Continue => E::Continue { pos: token.start },
T::Return => E::Return {
pos: token.start,
val: (self.token.kind != T::Semi).then(|| self.ptr_expr()),
2024-05-09 16:41:59 -05:00
},
2024-05-11 15:22:08 -05:00
T::Fn => E::Closure {
pos: token.start,
args: {
self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, |s| {
let name = s.expect_advance(T::Ident);
2024-05-12 04:52:58 -05:00
let (id, last) = s.resolve_ident(name, true);
2024-05-11 15:22:08 -05:00
s.expect_advance(T::Colon);
2024-05-12 04:52:58 -05:00
Arg {
name: s.lexer.slice(name.range()),
id,
last,
ty: s.expr(),
}
2024-05-11 09:04:13 -05:00
})
2024-05-11 15:22:08 -05:00
},
ret: {
self.expect_advance(T::Colon);
self.ptr_expr()
},
body: self.ptr_expr(),
},
2024-05-15 03:37:39 -05:00
T::Band | T::Mul => E::UnOp {
2024-05-12 04:52:58 -05:00
pos: token.start,
op: token.kind,
val: match token.kind {
T::Band => self.referenced(Self::ptr_unit_expr),
_ => self.ptr_unit_expr(),
},
2024-05-12 04:52:58 -05:00
},
2024-05-11 15:22:08 -05:00
T::LBrace => E::Block {
pos: token.start,
stmts: self.collect_list(T::Semi, T::RBrace, Self::expr),
2024-05-09 16:41:59 -05:00
},
2024-05-11 15:22:08 -05:00
T::Number => E::Number {
pos: token.start,
2024-05-12 04:52:58 -05:00
value: match self.lexer.slice(token.range()).parse() {
2024-05-09 16:41:59 -05:00
Ok(value) => value,
Err(e) => self.report(format_args!("invalid number: {e}")),
},
},
2024-05-11 15:22:08 -05:00
T::LParen => {
2024-05-10 15:54:12 -05:00
let expr = self.expr();
2024-05-11 15:22:08 -05:00
self.expect_advance(T::RParen);
2024-05-10 15:54:12 -05:00
expr
}
tok => self.report(format_args!("unexpected token: {tok:?}")),
2024-05-09 16:41:59 -05:00
};
2024-05-11 09:04:13 -05:00
loop {
2024-05-14 05:17:39 -05:00
let token = self.token;
if matches!(token.kind, T::LParen | T::Ctor | T::Dot | T::Tupl) {
2024-05-12 05:16:40 -05:00
self.next();
}
2024-05-14 05:17:39 -05:00
expr = match token.kind {
2024-05-12 05:16:40 -05:00
T::LParen => Expr::Call {
func: self.arena.alloc(expr),
args: self
.calcel_ref()
.collect_list(T::Comma, T::RParen, Self::expr),
2024-05-12 05:16:40 -05:00
},
T::Ctor => E::Ctor {
2024-05-14 05:17:39 -05:00
pos: token.start,
ty: Some(self.arena.alloc(expr)),
2024-05-12 05:16:40 -05:00
fields: self.collect_list(T::Comma, T::RBrace, |s| {
let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon);
let val = s.expr();
2024-05-14 05:17:39 -05:00
(Some(s.lexer.slice(name.range())), val)
2024-05-12 05:16:40 -05:00
}),
},
2024-05-14 05:17:39 -05:00
T::Tupl => E::Ctor {
pos: token.start,
ty: Some(self.arena.alloc(expr)),
fields: self.collect_list(T::Comma, T::RParen, |s| (None, s.expr())),
},
2024-05-12 05:16:40 -05:00
T::Dot => E::Field {
2024-05-12 06:13:36 -05:00
target: self.arena.alloc(expr),
field: {
2024-05-12 05:16:40 -05:00
let token = self.expect_advance(T::Ident);
self.lexer.slice(token.range())
},
},
2024-05-11 09:04:13 -05:00
_ => break,
}
}
2024-05-12 04:52:58 -05:00
if matches!(token.kind, T::Return) {
self.expect_advance(T::Semi);
}
if matches!(token.kind, T::Loop | T::LBrace | T::Fn) {
self.pop_scope(frame);
}
2024-05-09 16:41:59 -05:00
expr
}
fn referenced<T>(&mut self, f: impl Fn(&mut Self) -> T) -> T {
if self.referening {
self.report("cannot take reference of reference, (souwy)");
}
self.referening = true;
let expr = f(self);
self.referening = false;
expr
}
2024-05-12 04:52:58 -05:00
fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame;
for i in frame..self.idents.len() {
if !self.idents[i].declared {
self.idents.swap(i, undeclared_count);
undeclared_count += 1;
}
}
self.idents.drain(undeclared_count..);
2024-05-12 04:52:58 -05:00
}
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.unit_expr())
}
2024-05-11 15:22:08 -05:00
fn collect_list<T: Copy>(
&mut self,
delim: TokenKind,
end: TokenKind,
mut f: impl FnMut(&mut Self) -> T,
) -> &'a [T] {
self.collect(|s| {
s.advance_if(end).not().then(|| {
let val = f(s);
s.advance_if(delim);
val
})
})
}
fn collect<T: Copy>(&mut self, mut f: impl FnMut(&mut Self) -> Option<T>) -> &'a [T] {
2024-05-11 09:04:13 -05:00
let vec = std::iter::from_fn(|| f(self)).collect::<Vec<_>>();
self.arena.alloc_slice(&vec)
}
2024-05-09 16:41:59 -05:00
fn advance_if(&mut self, kind: TokenKind) -> bool {
if self.token.kind == kind {
self.next();
true
} else {
false
}
}
2024-05-11 09:04:13 -05:00
fn expect_advance(&mut self, kind: TokenKind) -> Token {
2024-05-09 16:41:59 -05:00
if self.token.kind != kind {
self.report(format_args!(
"expected {:?}, found {:?}",
kind, self.token.kind
));
}
2024-05-11 09:04:13 -05:00
self.next()
2024-05-09 16:41:59 -05:00
}
fn report(&self, msg: impl std::fmt::Display) -> ! {
let (line, col) = self.lexer.line_col(self.token.start);
2024-05-12 17:02:32 -05:00
eprintln!("{}:{}:{} => {}", self.path, line, col, msg);
2024-05-09 16:41:59 -05:00
unreachable!();
}
fn calcel_ref(&mut self) -> &mut Self {
self.referening = false;
self
}
2024-05-09 16:41:59 -05:00
}
2024-05-12 04:52:58 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Arg<'a> {
pub name: &'a str,
pub id: Ident,
pub last: Option<&'a Cell<IdentFlags>>,
2024-05-12 04:52:58 -05:00
pub ty: Expr<'a>,
}
2024-05-10 14:33:42 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Expr<'a> {
2024-05-11 15:22:08 -05:00
Break {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-11 15:22:08 -05:00
},
Continue {
2024-05-12 16:19:45 -05:00
pos: Pos,
},
Closure {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 04:52:58 -05:00
args: &'a [Arg<'a>],
2024-05-12 05:16:40 -05:00
ret: &'a Self,
body: &'a Self,
},
2024-05-11 09:04:13 -05:00
Call {
2024-05-12 05:16:40 -05:00
func: &'a Self,
args: &'a [Self],
2024-05-11 09:04:13 -05:00
},
Return {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 05:16:40 -05:00
val: Option<&'a Self>,
},
Ident {
name: &'a str,
id: Ident,
index: u32,
last: Option<&'a Cell<IdentFlags>>,
},
Block {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 05:16:40 -05:00
stmts: &'a [Self],
},
Number {
2024-05-12 16:19:45 -05:00
pos: Pos,
value: u64,
},
2024-05-10 15:54:12 -05:00
BinOp {
2024-05-12 05:16:40 -05:00
left: &'a Self,
2024-05-10 15:54:12 -05:00
op: TokenKind,
2024-05-12 05:16:40 -05:00
right: &'a Self,
},
If {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 05:16:40 -05:00
cond: &'a Self,
then: &'a Self,
else_: Option<&'a Self>,
2024-05-11 11:16:27 -05:00
},
Loop {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 05:16:40 -05:00
body: &'a Self,
2024-05-10 15:54:12 -05:00
},
2024-05-12 04:52:58 -05:00
UnOp {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 04:52:58 -05:00
op: TokenKind,
2024-05-12 05:16:40 -05:00
val: &'a Self,
},
Struct {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 05:16:40 -05:00
fields: &'a [(&'a str, Self)],
},
Ctor {
2024-05-14 05:17:39 -05:00
pos: Pos,
ty: Option<&'a Self>,
fields: &'a [(Option<&'a str>, Self)],
2024-05-12 05:16:40 -05:00
},
Field {
2024-05-12 06:13:36 -05:00
target: &'a Self,
field: &'a str,
2024-05-12 04:52:58 -05:00
},
2024-05-12 13:10:50 -05:00
Bool {
2024-05-12 16:19:45 -05:00
pos: Pos,
2024-05-12 13:10:50 -05:00
value: bool,
},
2024-05-14 16:07:32 -05:00
Directive {
pos: u32,
name: &'a str,
args: &'a [Self],
},
2024-05-09 16:41:59 -05:00
}
2024-05-12 16:19:45 -05:00
impl<'a> Expr<'a> {
pub fn pos(&self) -> Pos {
match self {
Self::Call { func, .. } => func.pos(),
Self::Ident { id, .. } => ident::pos(*id),
2024-05-14 05:17:39 -05:00
Self::Break { pos }
2024-05-14 16:07:32 -05:00
| Self::Directive { pos, .. }
2024-05-14 05:17:39 -05:00
| Self::Continue { pos }
| Self::Closure { pos, .. }
| Self::Block { pos, .. }
| Self::Number { pos, .. }
| Self::Return { pos, .. }
| Self::If { pos, .. }
| Self::Loop { pos, .. }
| Self::UnOp { pos, .. }
| Self::Struct { pos, .. }
| Self::Ctor { pos, .. }
| Self::Bool { pos, .. } => *pos,
2024-05-12 16:19:45 -05:00
Self::BinOp { left, .. } => left.pos(),
Self::Field { target, .. } => target.pos(),
}
}
fn mark_mut(&self) {
match self {
Self::Ident { last, .. } => _ = last.map(|l| l.set(l.get() | MUTABLE)),
Self::Field { target, .. } => target.mark_mut(),
_ => {}
}
}
2024-05-12 16:19:45 -05:00
}
impl<'a> std::fmt::Display for Expr<'a> {
2024-05-09 16:41:59 -05:00
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
thread_local! {
static INDENT: Cell<usize> = Cell::new(0);
}
2024-05-14 16:07:32 -05:00
fn fmt_list<'a, T>(
f: &mut std::fmt::Formatter,
end: &str,
list: &'a [T],
fmt: impl Fn(&T, &mut std::fmt::Formatter) -> std::fmt::Result,
) -> std::fmt::Result {
let first = &mut true;
for expr in list {
if !std::mem::take(first) {
write!(f, ", ")?;
}
fmt(expr, f)?;
}
write!(f, "{end}")
}
macro_rules! impl_parenter {
($($name:ident => $pat:pat,)*) => {
$(
struct $name<'a>(&'a Expr<'a>);
impl<'a> std::fmt::Display for $name<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if matches!(self.0, $pat) {
write!(f, "({})", self.0)
} else {
write!(f, "{}", self.0)
}
}
}
)*
};
}
impl_parenter! {
Unary => Expr::BinOp { .. },
Postfix => Expr::UnOp { .. } | Expr::BinOp { .. },
Consecutive => Expr::UnOp { .. },
}
2024-05-10 15:54:12 -05:00
match *self {
Self::Field { target, field } => {
write!(f, "{}.{field}", Postfix(target))
}
2024-05-14 16:07:32 -05:00
Self::Directive { name, args, .. } => {
write!(f, "@{name}(")?;
fmt_list(f, ")", args, std::fmt::Display::fmt)
}
2024-05-12 05:16:40 -05:00
Self::Struct { fields, .. } => {
write!(f, "struct {{")?;
2024-05-14 16:07:32 -05:00
fmt_list(f, "}", fields, |(name, val), f| write!(f, "{name}: {val}",))
2024-05-12 05:16:40 -05:00
}
2024-05-14 05:17:39 -05:00
Self::Ctor { ty, fields, .. } => {
let (left, rith) = if fields.iter().any(|(name, _)| name.is_some()) {
('{', '}')
} else {
('(', ')')
};
if let Some(ty) = ty {
write!(f, "{}", Unary(ty))?;
2024-05-14 05:17:39 -05:00
}
2024-05-14 16:07:32 -05:00
write!(f, ".{left}")?;
2024-05-12 05:16:40 -05:00
let first = &mut true;
for (name, val) in fields {
if !std::mem::take(first) {
write!(f, ", ")?;
}
2024-05-14 05:17:39 -05:00
if let Some(name) = name {
write!(f, "{name}: ")?;
}
write!(f, "{val}")?;
2024-05-12 05:16:40 -05:00
}
2024-05-14 05:17:39 -05:00
write!(f, "{rith}")
2024-05-12 05:16:40 -05:00
}
Self::UnOp { op, val, .. } => write!(f, "{op}{}", Unary(val)),
2024-05-11 15:22:08 -05:00
Self::Break { .. } => write!(f, "break;"),
Self::Continue { .. } => write!(f, "continue;"),
Self::If {
cond, then, else_, ..
} => {
write!(f, "if {cond} {}", Consecutive(then))?;
2024-05-11 11:16:27 -05:00
if let Some(else_) = else_ {
2024-05-14 16:07:32 -05:00
write!(f, " else {else_}")?;
2024-05-11 11:16:27 -05:00
}
Ok(())
}
2024-05-14 16:07:32 -05:00
Self::Loop { body, .. } => write!(f, "loop {body}"),
2024-05-11 15:22:08 -05:00
Self::Closure {
ret, body, args, ..
} => {
2024-05-14 16:07:32 -05:00
write!(f, "fn(")?;
fmt_list(f, "", args, |arg, f| write!(f, "{}: {}", arg.name, arg.ty))?;
write!(f, "): {ret} {body}")
2024-05-11 09:04:13 -05:00
}
Self::Call { func, args } => {
write!(f, "{}(", Postfix(func))?;
2024-05-14 16:07:32 -05:00
fmt_list(f, ")", args, std::fmt::Display::fmt)
2024-05-11 09:04:13 -05:00
}
2024-05-14 16:07:32 -05:00
Self::Return { val: Some(val), .. } => write!(f, "return {val};"),
2024-05-11 15:22:08 -05:00
Self::Return { val: None, .. } => write!(f, "return;"),
2024-05-14 16:07:32 -05:00
Self::Ident { name, .. } => write!(f, "{name}"),
2024-05-11 15:22:08 -05:00
Self::Block { stmts, .. } => {
2024-05-09 16:41:59 -05:00
writeln!(f, "{{")?;
INDENT.with(|i| i.set(i.get() + 1));
let res = (|| {
2024-05-10 15:54:12 -05:00
for stmt in stmts {
2024-05-09 16:41:59 -05:00
for _ in 0..INDENT.with(|i| i.get()) {
write!(f, " ")?;
}
2024-05-14 16:07:32 -05:00
writeln!(f, "{stmt}")?;
2024-05-09 16:41:59 -05:00
}
Ok(())
})();
2024-05-09 16:41:59 -05:00
INDENT.with(|i| i.set(i.get() - 1));
write!(f, "}}")?;
res
}
2024-05-14 16:07:32 -05:00
Self::Number { value, .. } => write!(f, "{value}"),
Self::Bool { value, .. } => write!(f, "{value}"),
2024-05-10 15:54:12 -05:00
Self::BinOp { left, right, op } => {
let display_branch = |f: &mut std::fmt::Formatter, expr: &Self| {
if let Self::BinOp { op: lop, .. } = expr
&& op.precedence() > lop.precedence()
{
2024-05-14 16:07:32 -05:00
write!(f, "({expr})")
2024-05-10 15:54:12 -05:00
} else {
2024-05-14 16:07:32 -05:00
write!(f, "{expr}")
2024-05-10 15:54:12 -05:00
}
};
display_branch(f, left)?;
2024-05-14 16:07:32 -05:00
write!(f, " {op} ")?;
2024-05-10 15:54:12 -05:00
display_branch(f, right)
}
2024-05-09 16:41:59 -05:00
}
}
}
#[derive(Default)]
pub struct Arena<'a> {
chunk: Cell<ArenaChunk>,
ph: std::marker::PhantomData<&'a ()>,
}
impl<'a> Arena<'a> {
pub fn alloc_str(&self, token: &str) -> &'a str {
let ptr = self.alloc_slice(token.as_bytes());
unsafe { std::str::from_utf8_unchecked_mut(ptr) }
}
pub fn alloc<T>(&self, value: T) -> &'a mut T {
let layout = std::alloc::Layout::new::<T>();
let ptr = self.alloc_low(layout);
unsafe { ptr.cast::<T>().write(value) };
unsafe { ptr.cast::<T>().as_mut() }
}
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &'a mut [T] {
let layout = std::alloc::Layout::array::<T>(slice.len()).unwrap();
let ptr = self.alloc_low(layout);
unsafe {
ptr.as_ptr()
.cast::<T>()
.copy_from_nonoverlapping(slice.as_ptr(), slice.len())
};
unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr() as _, slice.len()) }
}
pub fn clear(&mut self) {
let chunk = self.chunk.get_mut();
if chunk.base.is_null() {
return;
}
loop {
let prev = ArenaChunk::prev(chunk.base);
if prev.is_null() {
break;
}
chunk.base = prev;
}
chunk.end = unsafe { chunk.base.add(ArenaChunk::PREV_OFFSET) };
}
fn with_chunk<R>(&self, f: impl FnOnce(&mut ArenaChunk) -> R) -> R {
let mut chunk = self.chunk.get();
let r = f(&mut chunk);
self.chunk.set(chunk);
r
}
fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull<u8> {
assert!(layout.align() <= ArenaChunk::ALIGN);
assert!(layout.size() <= ArenaChunk::CHUNK_SIZE);
self.with_chunk(|chunk| {
if let Some(ptr) = chunk.alloc(layout) {
return ptr;
}
if let Some(prev) = ArenaChunk::reset(ArenaChunk::prev(chunk.base)) {
*chunk = prev;
} else {
*chunk = ArenaChunk::new(chunk.base);
}
chunk.alloc(layout).unwrap()
})
}
}
impl<'a> Drop for Arena<'a> {
fn drop(&mut self) {
use ArenaChunk as AC;
let mut current = self.chunk.get().base;
let mut prev = AC::prev(current);
while !prev.is_null() {
let next = AC::next(prev);
unsafe { std::alloc::dealloc(prev, AC::LAYOUT) };
prev = next;
}
while !current.is_null() {
let next = AC::next(current);
unsafe { std::alloc::dealloc(current, AC::LAYOUT) };
current = next;
}
}
}
#[derive(Clone, Copy)]
struct ArenaChunk {
base: *mut u8,
end: *mut u8,
}
impl Default for ArenaChunk {
fn default() -> Self {
Self {
base: std::ptr::null_mut(),
end: std::ptr::null_mut(),
}
}
}
impl ArenaChunk {
const CHUNK_SIZE: usize = 1 << 16;
const ALIGN: usize = std::mem::align_of::<Self>();
const NEXT_OFFSET: usize = Self::CHUNK_SIZE - std::mem::size_of::<*mut u8>();
const PREV_OFFSET: usize = Self::NEXT_OFFSET - std::mem::size_of::<*mut u8>();
const LAYOUT: std::alloc::Layout =
unsafe { std::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) };
fn new(next: *mut u8) -> Self {
let base = unsafe { std::alloc::alloc(Self::LAYOUT) };
let end = unsafe { base.add(Self::PREV_OFFSET) };
if !next.is_null() {
Self::set_prev(next, base);
}
Self::set_next(base, next);
Self::set_prev(base, std::ptr::null_mut());
Self { base, end }
}
fn set_next(curr: *mut u8, next: *mut u8) {
unsafe { std::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) };
}
fn set_prev(curr: *mut u8, prev: *mut u8) {
unsafe { std::ptr::write(curr.add(Self::PREV_OFFSET) as *mut _, prev) };
}
fn next(curr: *mut u8) -> *mut u8 {
unsafe { std::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) }
}
fn prev(curr: *mut u8) -> *mut u8 {
if curr.is_null() {
return std::ptr::null_mut();
}
unsafe { std::ptr::read(curr.add(Self::PREV_OFFSET) as *mut _) }
}
fn reset(prev: *mut u8) -> Option<Self> {
if prev.is_null() {
return None;
}
Some(Self {
base: prev,
end: unsafe { prev.add(Self::CHUNK_SIZE) },
})
}
fn alloc(&mut self, layout: std::alloc::Layout) -> Option<NonNull<u8>> {
let padding = self.end as usize - (self.end as usize & !(layout.align() - 1));
let size = layout.size() + padding;
if size > self.end as usize - self.base as usize {
return None;
}
unsafe { self.end = self.end.sub(size) };
unsafe { Some(NonNull::new_unchecked(self.end)) }
}
}
2024-05-09 16:41:59 -05:00
#[cfg(test)]
mod tests {
fn parse(input: &'static str, output: &mut String) {
use std::fmt::Write;
let mut arena = super::Arena::default();
2024-05-13 02:38:33 -05:00
let mut parser = super::Parser::new(&arena);
for expr in parser.file(input, "test") {
2024-05-09 16:41:59 -05:00
writeln!(output, "{}", expr).unwrap();
}
arena.clear();
2024-05-09 16:41:59 -05:00
}
crate::run_tests! { parse:
example => include_str!("../examples/main_fn.hb");
2024-05-10 15:54:12 -05:00
arithmetic => include_str!("../examples/arithmetic.hb");
2024-05-09 16:41:59 -05:00
}
}