holey-bytes/hblang/src/parser.rs
2024-09-30 19:27:00 +02:00

1579 lines
49 KiB
Rust

use {
crate::{
ident::{self, Ident},
lexer::{self, Lexer, Token, TokenKind},
},
alloc::{boxed::Box, string::String, vec::Vec},
core::{
cell::UnsafeCell,
fmt::{self, Write},
ops::{Deref, Not},
ptr::NonNull,
sync::atomic::AtomicUsize,
},
};
pub type Pos = u32;
pub type IdentFlags = u32;
pub type Symbols = Vec<Symbol>;
pub type FileId = u32;
pub type IdentIndex = u16;
pub type LoaderError = String;
pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> Result<FileId, LoaderError> + 'a);
pub mod idfl {
use super::*;
macro_rules! flags {
($($name:ident,)*) => {
$(pub const $name: IdentFlags = 1 << (core::mem::size_of::<IdentFlags>() * 8 - 1 - ${index(0)});)*
pub const ALL: IdentFlags = 0 $(| $name)*;
};
}
flags! {
MUTABLE,
REFERENCED,
COMPTIME,
}
}
pub fn no_loader(_: &str, _: &str) -> Result<FileId, LoaderError> {
Err(String::new())
}
#[derive(Debug)]
pub struct Symbol {
pub name: Ident,
pub flags: IdentFlags,
}
#[derive(Clone, Copy)]
struct ScopeIdent {
ident: Ident,
declared: bool,
flags: IdentFlags,
}
pub struct Parser<'a, 'b> {
path: &'b str,
loader: Loader<'b>,
lexer: Lexer<'b>,
arena: &'b Arena<'a>,
token: Token,
symbols: &'b mut Symbols,
ns_bound: usize,
trailing_sep: bool,
packed: bool,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
}
impl<'a, 'b> Parser<'a, 'b> {
pub fn new(arena: &'b Arena<'a>, symbols: &'b mut Symbols, loader: Loader<'b>) -> Self {
let mut lexer = Lexer::new("");
Self {
loader,
token: lexer.next(),
lexer,
path: "",
arena,
symbols,
ns_bound: 0,
trailing_sep: false,
packed: false,
idents: Vec::new(),
captured: Vec::new(),
}
}
pub fn file(&mut self, input: &'b str, path: &'b str) -> &'a [Expr<'a>] {
self.path = path;
self.lexer = Lexer::new(input);
self.token = self.lexer.next();
let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true));
self.pop_scope(0);
let mut errors = String::new();
for id in self.idents.drain(..) {
report_to(
self.lexer.source(),
self.path,
ident::pos(id.ident),
format_args!("undeclared identifier: {}", self.lexer.slice(ident::range(id.ident))),
&mut errors,
);
}
if !errors.is_empty() {
// TODO: we need error recovery
log::error!("{errors}");
unreachable!();
}
f
}
fn next(&mut self) -> Token {
core::mem::replace(&mut self.token, self.lexer.next())
}
fn ptr_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.expr())
}
fn expr_low(&mut self, top_level: bool) -> Expr<'a> {
let left = self.unit_expr();
self.bin_expr(left, 0, top_level)
}
fn expr(&mut self) -> Expr<'a> {
self.expr_low(false)
}
fn bin_expr(&mut self, mut fold: Expr<'a>, min_prec: u8, top_level: bool) -> Expr<'a> {
loop {
let Some(prec) = self.token.kind.precedence() else {
break;
};
if prec <= min_prec {
break;
}
let checkpoint = self.token.start;
let op = self.next().kind;
if op == TokenKind::Decl {
self.declare_rec(&fold, top_level);
}
let op_ass = op.ass_op().map(|op| {
// this abomination reparses the left side, so that the desubaring adheres to the
// parser invariants.
let source = self.lexer.slice(0..checkpoint as usize);
let prev_lexer =
core::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos()));
let prev_token = core::mem::replace(&mut self.token, self.lexer.next());
let clone = self.expr();
self.lexer = prev_lexer;
self.token = prev_token;
(op, clone)
});
let right = self.unit_expr();
let right = self.bin_expr(right, prec, false);
let right = self.arena.alloc(right);
let left = self.arena.alloc(fold);
if let Some((op, clone)) = op_ass {
self.flag_idents(*left, idfl::MUTABLE);
let right = Expr::BinOp { left: self.arena.alloc(clone), op, right };
fold = Expr::BinOp { left, op: TokenKind::Assign, right: self.arena.alloc(right) };
} else {
fold = Expr::BinOp { left, right, op };
if op == TokenKind::Assign {
self.flag_idents(*left, idfl::MUTABLE);
}
}
}
fold
}
fn declare_rec(&mut self, expr: &Expr, top_level: bool) {
match *expr {
Expr::Ident { pos, id, is_first, .. } => self.declare(pos, id, is_first || top_level),
Expr::Ctor { fields, .. } => {
for CtorField { value, .. } in fields {
self.declare_rec(value, top_level)
}
}
_ => self.report(expr.pos(), "cant declare this shit (yet)"),
}
}
fn declare(&mut self, pos: Pos, id: Ident, valid_order: bool) {
if !valid_order {
self.report(
pos,
format_args!(
"out of order declaration not allowed: {}",
self.lexer.slice(ident::range(id))
),
);
}
let index = self.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up");
if core::mem::replace(&mut self.idents[index].declared, true) {
self.report(
pos,
format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))),
)
}
}
fn resolve_ident(&mut self, token: Token) -> (Ident, bool) {
let is_ct = token.kind == TokenKind::CtIdent;
let name = self.lexer.slice(token.range());
if let Some(builtin) = crate::ty::from_str(name) {
return (builtin, false);
}
let (i, id, bl) = match self
.idents
.iter_mut()
.enumerate()
.rfind(|(_, elem)| self.lexer.slice(ident::range(elem.ident)) == name)
{
Some((i, elem)) => (i, elem, false),
None => {
let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent { ident: id, declared: false, flags: 0 });
(self.idents.len() - 1, self.idents.last_mut().unwrap(), true)
}
};
id.flags |= idfl::COMPTIME * is_ct as u32;
if id.declared && self.ns_bound > i {
id.flags |= idfl::COMPTIME;
self.captured.push(id.ident);
}
(id.ident, bl)
}
fn move_str(&mut self, range: Token) -> &'a str {
self.arena.alloc_str(self.lexer.slice(range.range()))
}
fn unit_expr(&mut self) -> Expr<'a> {
use {Expr as E, TokenKind as T};
let frame = self.idents.len();
let token @ Token { start: pos, .. } = self.next();
let prev_boundary = self.ns_bound;
let prev_captured = self.captured.len();
let mut expr = match token.kind {
T::Ct => E::Ct { pos, value: self.ptr_expr() },
T::Directive if self.lexer.slice(token.range()) == "use" => {
self.expect_advance(TokenKind::LParen);
let str = self.expect_advance(TokenKind::DQuote);
self.expect_advance(TokenKind::RParen);
let path = self.lexer.slice(str.range()).trim_matches('"');
E::Mod {
pos,
path: self.arena.alloc_str(path),
id: match (self.loader)(path, self.path) {
Ok(id) => id,
Err(e) => {
self.report(str.start, format_args!("error loading dependency: {e:#}"))
}
},
}
}
T::Directive => E::Directive {
pos: pos - 1, // need to undo the directive shift
name: self.move_str(token),
args: {
self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, Self::expr)
},
},
T::True => E::Bool { pos, value: true },
T::False => E::Bool { pos, value: false },
T::Idk => E::Idk { pos },
T::DQuote => E::String { pos, literal: self.move_str(token) },
T::Packed => {
self.packed = true;
let expr = self.unit_expr();
if self.packed {
self.report(
expr.pos(),
"this can not be packed \
(unlike your mom that gets packed every day by me)",
);
}
expr
}
T::Struct => E::Struct {
packed: core::mem::take(&mut self.packed),
fields: {
self.ns_bound = self.idents.len();
self.expect_advance(T::LBrace);
self.collect_list(T::Comma, T::RBrace, |s| {
let tok = s.token;
if s.advance_if(T::Comment) {
CommentOr::Comment { literal: s.move_str(tok), pos: tok.start }
} else {
let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon);
CommentOr::Or(StructField {
pos: name.start,
name: s.move_str(name),
ty: s.expr(),
})
}
})
},
captured: {
self.ns_bound = prev_boundary;
self.captured[prev_captured..].sort_unstable();
let preserved = self.captured[prev_captured..].partition_dedup().0.len();
self.captured.truncate(prev_captured + preserved);
self.arena.alloc_slice(&self.captured[prev_captured..])
},
pos: {
if self.ns_bound == 0 {
// we might save some memory
self.captured.clear();
}
pos
},
trailing_comma: core::mem::take(&mut self.trailing_sep),
},
T::Ident | T::CtIdent => {
let (id, is_first) = self.resolve_ident(token);
let name = self.move_str(token);
E::Ident { pos, is_ct: token.kind == T::CtIdent, name, id, is_first }
}
T::If => E::If {
pos,
cond: self.ptr_expr(),
then: self.ptr_expr(),
else_: self.advance_if(T::Else).then(|| self.ptr_expr()),
},
T::Loop => E::Loop { pos, body: self.ptr_expr() },
T::Break => E::Break { pos },
T::Continue => E::Continue { pos },
T::Return => E::Return {
pos,
val: (!matches!(
self.token.kind,
T::Semi | T::RBrace | T::RBrack | T::RParen | T::Comma
))
.then(|| self.ptr_expr()),
},
T::Fn => E::Closure {
pos,
args: {
self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, |s| {
let name = s.advance_ident();
let (id, _) = s.resolve_ident(name);
s.declare(name.start, id, true);
s.expect_advance(T::Colon);
Arg {
pos: name.start,
name: s.move_str(name),
is_ct: name.kind == T::CtIdent,
id,
ty: s.expr(),
}
})
},
ret: {
self.expect_advance(T::Colon);
self.ptr_expr()
},
body: self.ptr_expr(),
},
T::Ctor => self.ctor(pos, None),
T::Tupl => self.tupl(pos, None),
T::LBrack => E::Slice {
item: self.ptr_unit_expr(),
size: self.advance_if(T::Semi).then(|| self.ptr_expr()),
pos: {
self.expect_advance(T::RBrack);
pos
},
},
T::Band | T::Mul | T::Xor | T::Sub => E::UnOp {
pos,
op: token.kind,
val: {
let expr = self.ptr_unit_expr();
if token.kind == T::Band {
self.flag_idents(*expr, idfl::REFERENCED);
}
expr
},
},
T::LBrace => E::Block { pos, stmts: self.collect_list(T::Semi, T::RBrace, Self::expr) },
T::Number => {
let slice = self.lexer.slice(token.range());
let (slice, radix) = match &slice.get(0..2) {
Some("0x") => (slice.trim_start_matches("0x"), Radix::Hex),
Some("0b") => (slice.trim_start_matches("0b"), Radix::Binary),
Some("0o") => (slice.trim_start_matches("0o"), Radix::Octal),
_ => (slice, Radix::Decimal),
};
E::Number {
pos,
value: match u64::from_str_radix(slice, radix as u32) {
Ok(value) => value,
Err(e) => self.report(token.start, format_args!("invalid number: {e}")),
} as i64,
radix,
}
}
T::LParen => {
let expr = self.expr();
self.expect_advance(T::RParen);
expr
}
T::Comment => Expr::Comment { pos, literal: self.move_str(token) },
tok => self.report(token.start, format_args!("unexpected token: {tok:?}")),
};
loop {
let token = self.token;
if matches!(token.kind, T::LParen | T::Ctor | T::Dot | T::Tupl | T::LBrack) {
self.next();
}
expr = match token.kind {
T::LParen => Expr::Call {
func: self.arena.alloc(expr),
args: self.collect_list(T::Comma, T::RParen, Self::expr),
trailing_comma: core::mem::take(&mut self.trailing_sep),
},
T::Ctor => self.ctor(token.start, Some(expr)),
T::Tupl => self.tupl(token.start, Some(expr)),
T::LBrack => E::Index {
base: self.arena.alloc(expr),
index: {
let index = self.expr();
self.expect_advance(T::RBrack);
self.arena.alloc(index)
},
},
T::Dot => E::Field {
target: self.arena.alloc(expr),
pos: token.start,
name: {
let token = self.expect_advance(T::Ident);
self.move_str(token)
},
},
_ => break,
}
}
if matches!(token.kind, T::Loop | T::LBrace | T::Fn) {
self.pop_scope(frame);
}
expr
}
fn tupl(&mut self, pos: Pos, ty: Option<Expr<'a>>) -> Expr<'a> {
Expr::Tupl {
pos,
ty: ty.map(|ty| self.arena.alloc(ty)),
fields: self.collect_list(TokenKind::Comma, TokenKind::RParen, Self::expr),
trailing_comma: core::mem::take(&mut self.trailing_sep),
}
}
fn ctor(&mut self, pos: Pos, ty: Option<Expr<'a>>) -> Expr<'a> {
Expr::Ctor {
pos,
ty: ty.map(|ty| self.arena.alloc(ty)),
fields: self.collect_list(TokenKind::Comma, TokenKind::RBrace, |s| {
let name_tok = s.advance_ident();
let name = s.move_str(name_tok);
CtorField {
pos: name_tok.start,
name,
value: if s.advance_if(TokenKind::Colon) {
s.expr()
} else {
let (id, is_first) = s.resolve_ident(name_tok);
Expr::Ident { pos: name_tok.start, is_ct: false, id, name, is_first }
},
}
}),
trailing_comma: core::mem::take(&mut self.trailing_sep),
}
}
fn advance_ident(&mut self) -> Token {
if matches!(self.token.kind, TokenKind::Ident | TokenKind::CtIdent) {
self.next()
} else {
self.report(
self.token.start,
format_args!("expected identifier, found {:?}", self.token.kind),
)
}
}
fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame;
for i in frame..self.idents.len() {
if !&self.idents[i].declared {
self.idents.swap(i, undeclared_count);
undeclared_count += 1;
}
}
self.idents
.drain(undeclared_count..)
.map(|ident| Symbol { name: ident.ident, flags: ident.flags })
.collect_into(self.symbols);
}
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.unit_expr())
}
fn collect_list<T: Copy>(
&mut self,
delim: TokenKind,
end: TokenKind,
mut f: impl FnMut(&mut Self) -> T,
) -> &'a [T] {
self.collect(|s| {
s.advance_if(end).not().then(|| {
let val = f(s);
s.trailing_sep = s.advance_if(delim);
val
})
})
}
fn collect<T: Copy>(&mut self, mut f: impl FnMut(&mut Self) -> Option<T>) -> &'a [T] {
// TODO: avoid this allocation
let vec = core::iter::from_fn(|| f(self)).collect::<Vec<_>>();
self.arena.alloc_slice(&vec)
}
fn advance_if(&mut self, kind: TokenKind) -> bool {
if self.token.kind == kind {
self.next();
true
} else {
false
}
}
fn expect_advance(&mut self, kind: TokenKind) -> Token {
if self.token.kind != kind {
self.report(
self.token.start,
format_args!("expected {:?}, found {:?}", kind, self.token.kind),
);
}
self.next()
}
#[track_caller]
fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! {
let mut str = String::new();
report_to(self.lexer.source(), self.path, pos, msg, &mut str);
log::error!("{str}");
unreachable!();
}
fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) {
match e {
Expr::Ident { id, .. } => find_ident(&mut self.idents, id).flags |= flags,
Expr::Field { target, .. } => self.flag_idents(*target, flags),
_ => {}
}
}
}
fn find_ident(idents: &mut [ScopeIdent], id: Ident) -> &mut ScopeIdent {
idents.binary_search_by_key(&id, |si| si.ident).map(|i| &mut idents[i]).unwrap()
}
pub fn find_symbol(symbols: &[Symbol], id: Ident) -> &Symbol {
symbols.binary_search_by_key(&id, |s| s.name).map(|i| &symbols[i]).unwrap()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Arg<'a> {
pub pos: u32,
pub name: &'a str,
pub id: Ident,
pub is_ct: bool,
pub ty: Expr<'a>,
}
impl Poser for Arg<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
macro_rules! generate_expr {
($(#[$meta:meta])* $vis:vis enum $name:ident<$lt:lifetime> {$(
$(#[$field_meta:meta])*
$variant:ident {
$($field:ident: $ty:ty,)*
},
)*}) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
$vis enum $name<$lt> {$(
$(#[$field_meta])*
$variant {
$($field: $ty,)*
},
)*}
impl<$lt> $name<$lt> {
pub fn pos(&self) -> Pos {
#[allow(unused_variables)]
match self {
$(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)*
}
}
pub fn used_bytes(&self) -> usize {
match self {$(
Self::$variant { $($field,)* } => {
#[allow(clippy::size_of_ref)]
let fields = [$(($field as *const _ as usize - self as *const _ as usize, core::mem::size_of_val($field)),)*];
let (last, size) = fields.iter().copied().max().unwrap();
last + size
},
)*}
}
}
};
(@filed_names $variant:ident $ident1:ident) => { Self::$variant { $ident1: a } };
(@first ($($first:tt)*), $($rest:tt)*) => { $($first)* };
(@last ($($ign:tt)*), $($rest:tt)*) => { $($rest)* };
(@last ($($last:tt)*),) => { $($last)* };
}
#[repr(u32)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Radix {
Hex = 16,
Octal = 8,
Binary = 2,
Decimal = 10,
}
generate_expr! {
/// `LIST(start, sep, end, elem) => start { elem sep } [elem] end`
/// `OP := grep for `#define OP:`
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Expr<'a> {
/// `'ct' Expr`
Ct {
pos: Pos,
value: &'a Self,
},
/// `'"([^"]|\\")"'`
String {
pos: Pos,
literal: &'a str,
},
/// `'//[^\n]' | '/*' { '([^/*]|*/)*' | Comment } '*/'
Comment {
pos: Pos,
literal: &'a str,
},
/// `'break'`
Break {
pos: Pos,
},
/// `'continue'`
Continue {
pos: Pos,
},
/// `'fn' LIST('(', ',', ')', Ident ':' Expr) ':' Expr Expr`
Closure {
pos: Pos,
args: &'a [Arg<'a>],
ret: &'a Self,
body: &'a Self,
},
/// `Expr LIST('(', ',', ')', Expr)`
Call {
func: &'a Self,
args: &'a [Self],
trailing_comma: bool,
},
/// `'return' [Expr]`
Return {
pos: Pos,
val: Option<&'a Self>,
},
/// note: ':unicode:' is any utf-8 character except ascii
/// `'[a-zA-Z_:unicode:][a-zA-Z0-9_:unicode:]*'`
Ident {
pos: Pos,
is_ct: bool,
is_first: bool,
id: Ident,
name: &'a str,
},
/// `LIST('{', [';'], '}', Expr)`
Block {
pos: Pos,
stmts: &'a [Self],
},
/// `'0b[01]+' | '0o[0-7]+' | '[0-9]+' | '0b[01]+'`
Number {
pos: Pos,
value: i64,
radix: Radix,
},
/// node: precedence defined in `OP` applies
/// `Expr OP Expr`
BinOp {
left: &'a Self,
op: TokenKind,
right: &'a Self,
},
/// `'if' Expr Expr [else Expr]`
If {
pos: Pos,
cond: &'a Self,
then: &'a Self,
else_: Option<&'a Self>,
},
/// `'loop' Expr`
Loop {
pos: Pos,
body: &'a Self,
},
/// `('&' | '*' | '^') Expr`
UnOp {
pos: Pos,
op: TokenKind,
val: &'a Self,
},
/// `'struct' LIST('{', ',', '}', Ident ':' Expr)`
Struct {
pos: Pos,
fields: &'a [CommentOr<'a, StructField<'a>>],
captured: &'a [Ident],
trailing_comma: bool,
packed: bool,
},
/// `[Expr] LIST('.{', ',', '}', Ident [':' Expr])`
Ctor {
pos: Pos,
ty: Option<&'a Self>,
fields: &'a [CtorField<'a>],
trailing_comma: bool,
},
/// `[Expr] LIST('.(', ',', ')', Ident [':' Expr])`
Tupl {
pos: Pos,
ty: Option<&'a Self>,
fields: &'a [Self],
trailing_comma: bool,
},
/// `'[' Expr [';' Expr] ']'`
Slice {
pos: Pos,
size: Option<&'a Self>,
item: &'a Self,
},
/// `Expr '[' Expr ']'`
Index {
base: &'a Self,
index: &'a Self,
},
/// `Expr '.' Ident`
Field {
target: &'a Self,
pos: Pos,
name: &'a str,
},
/// `'true' | 'false'`
Bool {
pos: Pos,
value: bool,
},
/// `'idk'`
Idk {
pos: Pos,
},
/// `'@' Ident List('(', ',', ')', Expr)`
Directive {
pos: u32,
name: &'a str,
args: &'a [Self],
},
/// `'@use' '(' String ')'`
Mod {
pos: Pos,
id: FileId,
path: &'a str,
},
}
}
impl<'a> Expr<'a> {
pub fn declares(&self, iden: Result<Ident, &str>) -> Option<Ident> {
match *self {
Self::Ident { id, name, .. } if iden == Ok(id) || iden == Err(name) => Some(id),
Self::Ctor { fields, .. } => fields.iter().find_map(|f| f.value.declares(iden)),
_ => None,
}
}
pub fn has_ct(&self, symbols: &[Symbol]) -> bool {
match *self {
Self::Ident { id, .. } => find_symbol(symbols, id).flags & idfl::COMPTIME != 0,
Self::Ctor { fields, .. } => fields.iter().any(|f| f.value.has_ct(symbols)),
_ => false,
}
}
pub fn find_pattern_path<F: FnOnce(&Expr)>(
&self,
ident: Ident,
target: &Expr,
mut with_final: F,
) -> Result<(), F> {
match *self {
Self::Ident { id, .. } if id == ident => {
with_final(target);
Ok(())
}
Self::Ctor { fields, .. } => {
for &CtorField { name, value, pos } in fields {
match value.find_pattern_path(
ident,
&Expr::Field { pos, target, name },
with_final,
) {
Ok(()) => return Ok(()),
Err(e) => with_final = e,
}
}
Err(with_final)
}
_ => Err(with_final),
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct StructField<'a> {
pub pos: Pos,
pub name: &'a str,
pub ty: Expr<'a>,
}
impl Poser for StructField<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CtorField<'a> {
pub pos: Pos,
pub name: &'a str,
pub value: Expr<'a>,
}
impl Poser for CtorField<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
trait Poser {
fn posi(&self) -> Pos;
}
impl Poser for Pos {
fn posi(&self) -> Pos {
*self
}
}
impl<'a> Poser for Expr<'a> {
fn posi(&self) -> Pos {
self.pos()
}
}
impl<'a, T: Poser> Poser for CommentOr<'a, T> {
fn posi(&self) -> Pos {
match self {
CommentOr::Or(expr) => expr.posi(),
CommentOr::Comment { pos, .. } => *pos,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum CommentOr<'a, T> {
Or(T),
Comment { literal: &'a str, pos: Pos },
}
impl<'a, T: Copy> CommentOr<'a, T> {
pub fn or(&self) -> Option<T> {
match *self {
CommentOr::Or(v) => Some(v),
CommentOr::Comment { .. } => None,
}
}
}
pub struct Formatter<'a> {
source: &'a str,
depth: usize,
disp_buff: String,
}
impl<'a> Formatter<'a> {
pub fn new(source: &'a str) -> Self {
Self { source, depth: 0, disp_buff: Default::default() }
}
fn fmt_list<T: Poser>(
&mut self,
f: &mut String,
trailing: bool,
end: &str,
sep: &str,
list: &[T],
fmt: impl Fn(&mut Self, &T, &mut String) -> fmt::Result,
) -> fmt::Result {
self.fmt_list_low(f, trailing, end, sep, list, |s, v, f| {
fmt(s, v, f)?;
Ok(true)
})
}
fn fmt_list_low<T: Poser>(
&mut self,
f: &mut String,
trailing: bool,
end: &str,
sep: &str,
list: &[T],
fmt: impl Fn(&mut Self, &T, &mut String) -> Result<bool, fmt::Error>,
) -> fmt::Result {
if !trailing {
let mut first = true;
for expr in list {
if !core::mem::take(&mut first) {
write!(f, "{sep} ")?;
}
first = !fmt(self, expr, f)?;
}
return write!(f, "{end}");
}
writeln!(f)?;
self.depth += 1;
let res = (|| {
for (i, stmt) in list.iter().enumerate() {
for _ in 0..self.depth {
write!(f, "\t")?;
}
let add_sep = fmt(self, stmt, f)?;
if add_sep {
write!(f, "{sep}")?;
}
if let Some(expr) = list.get(i + 1)
&& let Some(rest) = self.source.get(expr.posi() as usize..)
{
if insert_needed_semicolon(rest) {
write!(f, ";")?;
}
if preserve_newlines(&self.source[..expr.posi() as usize]) > 1 {
writeln!(f)?;
}
}
if add_sep {
writeln!(f)?;
}
}
Ok(())
})();
self.depth -= 1;
for _ in 0..self.depth {
write!(f, "\t")?;
}
write!(f, "{end}")?;
res
}
fn fmt_paren(
&mut self,
expr: &Expr,
f: &mut String,
cond: impl FnOnce(&Expr) -> bool,
) -> fmt::Result {
if cond(expr) {
write!(f, "(")?;
self.fmt(expr, f)?;
write!(f, ")")
} else {
self.fmt(expr, f)
}
}
pub fn fmt(&mut self, expr: &Expr, f: &mut String) -> fmt::Result {
macro_rules! impl_parenter {
($($name:ident => $pat:pat,)*) => {
$(
let $name = |e: &Expr| matches!(e, $pat);
)*
};
}
impl_parenter! {
unary => Expr::BinOp { .. },
postfix => Expr::UnOp { .. } | Expr::BinOp { .. },
consecutive => Expr::UnOp { .. },
}
match *expr {
Expr::Ct { value, .. } => {
write!(f, "$: ")?;
self.fmt(value, f)
}
Expr::String { literal, .. } => write!(f, "{literal}"),
Expr::Comment { literal, .. } => write!(f, "{}", literal.trim_end()),
Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"),
Expr::Field { target, name: field, .. } => {
self.fmt_paren(target, f, postfix)?;
write!(f, ".{field}")
}
Expr::Directive { name, args, .. } => {
write!(f, "@{name}(")?;
self.fmt_list(f, false, ")", ",", args, Self::fmt)
}
Expr::Struct { fields, trailing_comma, packed, .. } => {
if packed {
write!(f, "packed ")?;
}
write!(f, "struct {{")?;
self.fmt_list_low(f, trailing_comma, "}", ",", fields, |s, field, f| {
match field {
CommentOr::Or(StructField { name, ty, .. }) => {
write!(f, "{name}: ")?;
s.fmt(ty, f)?
}
CommentOr::Comment { literal, .. } => write!(f, "{literal}")?,
}
Ok(field.or().is_some())
})
}
Expr::Ctor { ty, fields, trailing_comma, .. } => {
if let Some(ty) = ty {
self.fmt_paren(ty, f, unary)?;
}
write!(f, ".{{")?;
self.fmt_list(
f,
trailing_comma,
"}",
",",
fields,
|s: &mut Self, CtorField { name, value, .. }: &_, f| {
if matches!(value, Expr::Ident { name: n, .. } if name == n) {
write!(f, "{name}")
} else {
write!(f, "{name}: ")?;
s.fmt(value, f)
}
},
)
}
Expr::Tupl { ty, fields, trailing_comma, .. } => {
if let Some(ty) = ty {
self.fmt_paren(ty, f, unary)?;
}
write!(f, ".(")?;
self.fmt_list(f, trailing_comma, ")", ",", fields, Self::fmt)
}
Expr::Slice { item, size, .. } => {
write!(f, "[")?;
self.fmt(item, f)?;
if let Some(size) = size {
write!(f, "; ")?;
self.fmt(size, f)?;
}
write!(f, "]")
}
Expr::Index { base, index } => {
self.fmt(base, f)?;
write!(f, "[")?;
self.fmt(index, f)?;
write!(f, "]")
}
Expr::UnOp { op, val, .. } => {
write!(f, "{op}")?;
self.fmt_paren(val, f, unary)
}
Expr::Break { .. } => write!(f, "break"),
Expr::Continue { .. } => write!(f, "continue"),
Expr::If { cond, then, else_, .. } => {
write!(f, "if ")?;
self.fmt(cond, f)?;
write!(f, " ")?;
self.fmt_paren(then, f, consecutive)?;
if let Some(e) = else_ {
write!(f, " else ")?;
self.fmt(e, f)?;
}
Ok(())
}
Expr::Loop { body, .. } => {
write!(f, "loop ")?;
self.fmt(body, f)
}
Expr::Closure { ret, body, args, .. } => {
write!(f, "fn(")?;
self.fmt_list(f, false, "", ",", args, |s, arg, f| {
if arg.is_ct {
write!(f, "$")?;
}
write!(f, "{}: ", arg.name)?;
s.fmt(&arg.ty, f)
})?;
write!(f, "): ")?;
self.fmt(ret, f)?;
write!(f, " ")?;
self.fmt_paren(body, f, consecutive)?;
Ok(())
}
Expr::Call { func, args, trailing_comma } => {
self.fmt_paren(func, f, postfix)?;
write!(f, "(")?;
self.fmt_list(f, trailing_comma, ")", ",", args, Self::fmt)
}
Expr::Return { val: Some(val), .. } => {
write!(f, "return ")?;
self.fmt(val, f)
}
Expr::Return { val: None, .. } => write!(f, "return"),
Expr::Ident { name, is_ct: true, .. } => write!(f, "${name}"),
Expr::Ident { name, is_ct: false, .. } => write!(f, "{name}"),
Expr::Block { stmts, .. } => {
write!(f, "{{")?;
self.fmt_list(f, true, "}", "", stmts, Self::fmt)
}
Expr::Number { value, radix, .. } => match radix {
Radix::Decimal => write!(f, "{value}"),
Radix::Hex => write!(f, "{value:#X}"),
Radix::Octal => write!(f, "{value:#o}"),
Radix::Binary => write!(f, "{value:#b}"),
},
Expr::Bool { value, .. } => write!(f, "{value}"),
Expr::Idk { .. } => write!(f, "idk"),
Expr::BinOp {
left,
op: TokenKind::Assign,
right: Expr::BinOp { left: lleft, op, right },
} if {
let mut b = core::mem::take(&mut self.disp_buff);
self.fmt(lleft, &mut b)?;
let len = b.len();
self.fmt(left, &mut b)?;
let (lleft, left) = b.split_at(len);
let res = lleft == left;
b.clear();
self.disp_buff = b;
res
} =>
{
self.fmt(left, f)?;
write!(f, " {op}= ")?;
self.fmt(right, f)
}
Expr::BinOp { right, op, left } => {
let pec_miss = |e: &Expr| {
matches!(
e, Expr::BinOp { op: lop, .. } if op.precedence() > lop.precedence()
)
};
self.fmt_paren(left, f, pec_miss)?;
if let Some(mut prev) = self.source.get(..right.pos() as usize) {
prev = prev.trim_end();
let estimate_bound =
prev.rfind(|c: char| c.is_ascii_whitespace()).map_or(prev.len(), |i| i + 1);
let exact_bound = lexer::Lexer::new(&prev[estimate_bound..]).last().start;
prev = &prev[..exact_bound as usize + estimate_bound];
if preserve_newlines(prev) > 0 {
writeln!(f)?;
for _ in 0..self.depth + 1 {
write!(f, "\t")?;
}
write!(f, "{op} ")?;
} else {
write!(f, " {op} ")?;
}
} else {
write!(f, " {op} ")?;
}
self.fmt_paren(right, f, pec_miss)
}
}
}
}
pub fn preserve_newlines(source: &str) -> usize {
source[source.trim_end().len()..].chars().filter(|&c| c == '\n').count()
}
pub fn insert_needed_semicolon(source: &str) -> bool {
let kind = lexer::Lexer::new(source).next().kind;
kind.precedence().is_some() || matches!(kind, TokenKind::Ctor | TokenKind::Tupl)
}
#[repr(C)]
pub struct AstInner<T: ?Sized> {
ref_count: AtomicUsize,
mem: ArenaChunk,
exprs: *const [Expr<'static>],
pub path: Box<str>,
pub file: Box<str>,
pub symbols: T,
}
impl AstInner<[Symbol]> {
fn layout(syms: usize) -> core::alloc::Layout {
core::alloc::Layout::new::<AstInner<()>>()
.extend(core::alloc::Layout::array::<Symbol>(syms).unwrap())
.unwrap()
.0
}
fn new(content: String, path: &str, loader: Loader) -> NonNull<Self> {
let arena = Arena::default();
let mut syms = Vec::new();
let mut parser = Parser::new(&arena, &mut syms, loader);
let exprs = parser.file(&content, path) as *const [Expr<'static>];
syms.sort_unstable_by_key(|s| s.name);
let layout = Self::layout(syms.len());
unsafe {
let ptr = alloc::alloc::alloc(layout);
let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, syms.len());
core::ptr::write(inner as *mut AstInner<()>, AstInner {
ref_count: AtomicUsize::new(1),
mem: arena.chunk.into_inner(),
exprs,
path: path.into(),
file: content.into(),
symbols: (),
});
core::ptr::addr_of_mut!((*inner).symbols)
.as_mut_ptr()
.copy_from_nonoverlapping(syms.as_ptr(), syms.len());
NonNull::new_unchecked(inner)
}
}
pub fn report_to(&self, pos: Pos, msg: impl fmt::Display, out: &mut impl fmt::Write) {
report_to(&self.file, &self.path, pos, msg, out);
}
}
pub fn report_to(
file: &str,
path: &str,
pos: Pos,
msg: impl fmt::Display,
out: &mut impl fmt::Write,
) {
let (line, mut col) = lexer::line_col(file.as_bytes(), pos);
#[cfg(feature = "std")]
let disp = crate::fs::display_rel_path(path);
#[cfg(not(feature = "std"))]
let disp = path;
_ = writeln!(out, "{}:{}:{}: {}", disp, line, col, msg);
let line = &file[file[..pos as usize].rfind('\n').map_or(0, |i| i + 1)
..file[pos as usize..].find('\n').unwrap_or(file.len()) + pos as usize];
col += line.matches('\t').count() * 3;
_ = writeln!(out, "{}", line.replace("\t", " "));
_ = writeln!(out, "{}^", " ".repeat(col - 1));
todo!()
}
#[derive(PartialEq, Eq, Hash)]
pub struct Ast(NonNull<AstInner<[Symbol]>>);
impl Ast {
pub fn new(path: &str, content: String, loader: Loader) -> Self {
Self(AstInner::new(content, path, loader))
}
pub fn exprs(&self) -> &[Expr] {
unsafe { &*self.inner().exprs }
}
fn inner(&self) -> &AstInner<[Symbol]> {
unsafe { self.0.as_ref() }
}
pub fn find_decl(&self, id: Result<Ident, &str>) -> Option<(&Expr, Ident)> {
self.exprs().iter().find_map(|expr| match expr {
Expr::BinOp { left, op: TokenKind::Decl, .. } => left.declares(id).map(|id| (expr, id)),
_ => None,
})
}
pub fn ident_str(&self, ident: Ident) -> &str {
&self.file[ident::range(ident)]
}
}
impl Default for Ast {
fn default() -> Self {
Self(AstInner::new(String::new(), "", &no_loader))
}
}
#[derive(Clone, Copy)]
#[repr(packed)]
pub struct ExprRef(NonNull<Expr<'static>>);
impl ExprRef {
pub fn new(expr: &Expr) -> Self {
Self(NonNull::from(expr).cast())
}
pub fn get<'a>(&self, from: &'a Ast) -> Option<&'a Expr<'a>> {
ArenaChunk::contains(from.mem.base, self.0.as_ptr() as _).then_some(())?;
// SAFETY: the pointer is or was a valid reference in the past, if it points within one of
// arenas regions, it muts be walid, since arena does not give invalid pointers to its
// allocations
Some(unsafe { { self.0 }.as_ref() })
}
pub fn dangling() -> Self {
Self(NonNull::dangling())
}
}
impl Default for ExprRef {
fn default() -> Self {
Self::dangling()
}
}
unsafe impl Send for Ast {}
unsafe impl Sync for Ast {}
impl Clone for Ast {
fn clone(&self) -> Self {
unsafe { self.0.as_ref() }.ref_count.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
Self(self.0)
}
}
impl Drop for Ast {
fn drop(&mut self) {
let inner = unsafe { self.0.as_ref() };
if inner.ref_count.fetch_sub(1, core::sync::atomic::Ordering::Relaxed) == 1 {
let layout = AstInner::layout(inner.symbols.len());
unsafe {
alloc::alloc::dealloc(self.0.as_ptr() as _, layout);
}
}
}
}
impl Deref for Ast {
type Target = AstInner<[Symbol]>;
fn deref(&self) -> &Self::Target {
self.inner()
}
}
#[derive(Default)]
pub struct Arena<'a> {
chunk: UnsafeCell<ArenaChunk>,
ph: core::marker::PhantomData<&'a ()>,
}
impl<'a> Arena<'a> {
pub fn alloc_str(&self, token: &str) -> &'a str {
let ptr = self.alloc_slice(token.as_bytes());
unsafe { core::str::from_utf8_unchecked(ptr) }
}
pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> {
let align = core::mem::align_of::<Expr<'a>>();
let size = expr.used_bytes();
let layout = unsafe { core::alloc::Layout::from_size_align_unchecked(size, align) };
let ptr = self.alloc_low(layout);
unsafe {
ptr.cast::<u64>().copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8)
};
unsafe { ptr.cast::<Expr<'a>>().as_ref() }
}
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &'a [T] {
if slice.is_empty() || core::mem::size_of::<T>() == 0 {
return &mut [];
}
let layout = core::alloc::Layout::array::<T>(slice.len()).unwrap();
let ptr = self.alloc_low(layout);
unsafe { ptr.as_ptr().cast::<T>().copy_from_nonoverlapping(slice.as_ptr(), slice.len()) };
unsafe { core::slice::from_raw_parts(ptr.as_ptr() as _, slice.len()) }
}
fn alloc_low(&self, layout: core::alloc::Layout) -> NonNull<u8> {
assert!(layout.align() <= ArenaChunk::ALIGN);
assert!(layout.size() <= ArenaChunk::CHUNK_SIZE);
let chunk = unsafe { &mut *self.chunk.get() };
if let Some(ptr) = chunk.alloc(layout) {
return ptr;
}
unsafe {
core::ptr::write(chunk, ArenaChunk::new(chunk.base));
}
chunk.alloc(layout).unwrap()
}
}
struct ArenaChunk {
base: *mut u8,
end: *mut u8,
}
impl Default for ArenaChunk {
fn default() -> Self {
Self { base: core::ptr::null_mut(), end: core::ptr::null_mut() }
}
}
impl ArenaChunk {
const ALIGN: usize = core::mem::align_of::<Self>();
const CHUNK_SIZE: usize = 1 << 16;
const LAYOUT: core::alloc::Layout =
unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) };
const NEXT_OFFSET: usize = Self::CHUNK_SIZE - core::mem::size_of::<*mut u8>();
fn new(next: *mut u8) -> Self {
let base = unsafe { alloc::alloc::alloc(Self::LAYOUT) };
let end = unsafe { base.add(Self::NEXT_OFFSET) };
Self::set_next(base, next);
Self { base, end }
}
fn set_next(curr: *mut u8, next: *mut u8) {
unsafe { core::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) };
}
fn next(curr: *mut u8) -> *mut u8 {
unsafe { core::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) }
}
fn alloc(&mut self, layout: core::alloc::Layout) -> Option<NonNull<u8>> {
let padding = self.end as usize - (self.end as usize & !(layout.align() - 1));
let size = layout.size() + padding;
if size > self.end as usize - self.base as usize {
return None;
}
unsafe { self.end = self.end.sub(size) };
unsafe { Some(NonNull::new_unchecked(self.end)) }
}
fn contains(base: *mut u8, arg: *mut u8) -> bool {
!base.is_null()
&& ((unsafe { base.add(Self::CHUNK_SIZE) } > arg && base <= arg)
|| Self::contains(Self::next(base), arg))
}
}
impl Drop for ArenaChunk {
fn drop(&mut self) {
//log::inf!(
// "dropping chunk of size: {}",
// (Self::LAYOUT.size() - (self.end as usize - self.base as usize))
// * !self.end.is_null() as usize
//);
let mut current = self.base;
while !current.is_null() {
let next = Self::next(current);
unsafe { alloc::alloc::dealloc(current, Self::LAYOUT) };
current = next;
//log::dbg!("deallocating full chunk");
}
}
}
#[cfg(test)]
pub mod test {
use {alloc::borrow::ToOwned, std::string::String};
pub fn format(ident: &str, input: &str) {
let ast = super::Ast::new(ident, input.to_owned(), &|_, _| Ok(0));
let mut output = String::new();
crate::fs::format_to(&ast, input, &mut output).unwrap();
let input_path = format!("formatter_{ident}.expected");
let output_path = format!("formatter_{ident}.actual");
std::fs::write(&input_path, input).unwrap();
std::fs::write(&output_path, output).unwrap();
let success = std::process::Command::new("diff")
.arg("-u")
.arg("--color")
.arg(&input_path)
.arg(&output_path)
.status()
.unwrap()
.success();
std::fs::remove_file(&input_path).unwrap();
std::fs::remove_file(&output_path).unwrap();
assert!(success, "test failed");
}
macro_rules! test {
($($name:ident => $input:expr;)*) => {$(
#[test]
fn $name() {
format(stringify!($name), $input);
}
)*};
}
test! {
comments => "// comment\n// comment\n\n// comment\n\n\
/* comment */\n/* comment */\n\n/* comment */";
some_ordinary_code => "loft := fn(): int return loft(1, 2, 3)";
some_arg_per_line_code => "loft := fn(): int return loft(\
\n\t1,\n\t2,\n\t3,\n)";
some_ordinary_struct => "loft := fn(): int return loft.{a: 1, b: 2}";
some_ordinary_fild_per_lin_struct => "loft := fn(): int return loft.{\
\n\ta: 1,\n\tb: 2,\n}";
code_block => "loft := fn(): int {\n\tloft()\n\treturn 1\n}";
}
}