holey-bytes/lang/src/parser.rs

1667 lines
50 KiB
Rust
Raw Normal View History

2024-07-08 00:22:53 -05:00
use {
crate::{
2024-10-04 14:44:29 -05:00
fmt::Formatter,
2024-09-01 14:15:29 -05:00
lexer::{self, Lexer, Token, TokenKind},
2024-11-08 03:25:34 -06:00
ty::{Global, Module},
utils::Ent as _,
2024-10-27 07:57:00 -05:00
Ident,
2024-07-08 00:22:53 -05:00
},
2024-09-30 12:09:17 -05:00
alloc::{boxed::Box, string::String, vec::Vec},
core::{
2024-10-10 06:04:17 -05:00
alloc::Layout,
cell::{RefCell, UnsafeCell},
2024-10-01 08:28:18 -05:00
fmt::{self},
intrinsics::unlikely,
2024-10-01 08:28:18 -05:00
marker::PhantomData,
ops::Deref,
2024-07-08 00:22:53 -05:00
ptr::NonNull,
sync::atomic::AtomicUsize,
},
2024-05-12 04:52:58 -05:00
};
2024-05-12 16:19:45 -05:00
pub type Pos = u32;
pub type IdentFlags = u32;
2024-07-08 00:22:53 -05:00
pub type IdentIndex = u16;
2024-09-30 12:09:17 -05:00
pub type LoaderError = String;
2024-11-08 03:25:34 -06:00
pub type Loader<'a> = &'a mut (dyn FnMut(&str, &str, FileKind) -> Result<usize, LoaderError> + 'a);
2024-10-13 08:22:16 -05:00
#[derive(PartialEq, Eq, Debug)]
pub enum FileKind {
Module,
Embed,
}
trait Trans {
fn trans(self) -> Self;
}
impl<T> Trans for Option<Option<T>> {
fn trans(self) -> Self {
match self {
Some(None) => None,
Some(Some(v)) => Some(Some(v)),
None => Some(None),
}
}
}
2024-10-10 06:04:17 -05:00
pub const SOURCE_TO_AST_FACTOR: usize = 7 * (core::mem::size_of::<usize>() / 4) + 1;
2024-05-20 07:11:58 -05:00
pub mod idfl {
use super::*;
2024-05-17 12:53:59 -05:00
2024-05-20 07:11:58 -05:00
macro_rules! flags {
($($name:ident,)*) => {
2024-09-30 12:09:17 -05:00
$(pub const $name: IdentFlags = 1 << (core::mem::size_of::<IdentFlags>() * 8 - 1 - ${index(0)});)*
2024-05-20 07:11:58 -05:00
pub const ALL: IdentFlags = 0 $(| $name)*;
};
2024-05-17 12:53:59 -05:00
}
2024-05-20 07:11:58 -05:00
flags! {
MUTABLE,
REFERENCED,
2024-06-01 13:30:07 -05:00
COMPTIME,
2024-05-17 12:53:59 -05:00
}
}
2024-11-08 03:25:34 -06:00
pub fn no_loader(_: &str, _: &str, _: FileKind) -> Result<usize, LoaderError> {
2024-10-13 08:22:16 -05:00
Ok(0)
2024-05-17 12:53:59 -05:00
}
2024-05-19 11:20:42 -05:00
#[derive(Debug)]
2024-05-17 12:53:59 -05:00
pub struct Symbol {
2024-07-08 00:22:53 -05:00
pub name: Ident,
2024-05-17 12:53:59 -05:00
pub flags: IdentFlags,
}
2024-06-01 13:30:07 -05:00
#[derive(Clone, Copy)]
2024-05-17 12:53:59 -05:00
struct ScopeIdent {
2024-07-08 00:22:53 -05:00
ident: Ident,
2024-05-12 04:52:58 -05:00
declared: bool,
ordered: bool,
2024-11-17 11:15:58 -06:00
used: bool,
2024-07-08 00:22:53 -05:00
flags: IdentFlags,
2024-05-12 04:52:58 -05:00
}
2024-05-09 16:41:59 -05:00
pub struct Parser<'a, 'b> {
2024-07-08 00:22:53 -05:00
path: &'b str,
loader: Loader<'b>,
2024-10-01 08:28:18 -05:00
lexer: Lexer<'a>,
2024-10-10 01:35:17 -05:00
arena: &'a Arena,
ctx: &'b mut Ctx,
2024-07-08 00:22:53 -05:00
token: Token,
ns_bound: usize,
2024-06-25 14:41:12 -05:00
trailing_sep: bool,
2024-09-22 11:17:30 -05:00
packed: bool,
2024-05-09 16:41:59 -05:00
}
impl<'a, 'b> Parser<'a, 'b> {
2024-10-10 01:35:17 -05:00
pub fn parse(
ctx: &'b mut Ctx,
2024-10-10 01:35:17 -05:00
input: &'a str,
path: &'b str,
2024-10-01 08:28:18 -05:00
loader: Loader<'b>,
2024-10-10 01:35:17 -05:00
arena: &'a Arena,
) -> &'a [Expr<'a>] {
let mut lexer = Lexer::new(input);
Self {
2024-05-17 12:53:59 -05:00
loader,
2024-10-12 06:07:49 -05:00
token: lexer.eat(),
lexer,
2024-10-10 01:35:17 -05:00
path,
ctx,
arena,
2024-05-20 07:11:58 -05:00
ns_bound: 0,
2024-06-25 14:41:12 -05:00
trailing_sep: false,
2024-09-22 11:17:30 -05:00
packed: false,
}
2024-10-10 01:35:17 -05:00
.file()
2024-05-09 16:41:59 -05:00
}
2024-10-10 01:35:17 -05:00
fn file(&mut self) -> &'a [Expr<'a>] {
2024-07-08 00:22:53 -05:00
let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true));
2024-05-17 12:53:59 -05:00
2024-05-12 04:52:58 -05:00
self.pop_scope(0);
2024-10-10 01:35:17 -05:00
if !self.ctx.idents.is_empty() {
2024-05-17 12:53:59 -05:00
// TODO: we need error recovery
let mut idents = core::mem::take(&mut self.ctx.idents);
for id in idents.drain(..) {
self.report(
2024-10-27 07:57:00 -05:00
id.ident.pos(),
format_args!("undeclared identifier: {}", self.lexer.slice(id.ident.range())),
);
}
self.ctx.idents = idents;
2024-05-12 04:52:58 -05:00
}
f
2024-05-09 16:41:59 -05:00
}
fn next(&mut self) -> Token {
2024-10-12 06:07:49 -05:00
core::mem::replace(&mut self.token, self.lexer.eat())
2024-05-09 16:41:59 -05:00
}
fn ptr_expr(&mut self) -> Option<&'a Expr<'a>> {
Some(self.arena.alloc(self.expr()?))
2024-05-09 16:41:59 -05:00
}
fn expr_low(&mut self, top_level: bool) -> Option<Expr<'a>> {
let left = self.unit_expr()?;
2024-07-08 00:22:53 -05:00
self.bin_expr(left, 0, top_level)
}
fn expr(&mut self) -> Option<Expr<'a>> {
2024-07-08 00:22:53 -05:00
self.expr_low(false)
2024-05-10 15:54:12 -05:00
}
fn bin_expr(&mut self, mut fold: Expr<'a>, min_prec: u8, top_level: bool) -> Option<Expr<'a>> {
2024-05-10 15:54:12 -05:00
loop {
2024-10-27 10:07:46 -05:00
let Some(prec) = self.token.kind.precedence() else { break };
2024-05-10 15:54:12 -05:00
2024-05-13 06:36:29 -05:00
if prec <= min_prec {
2024-05-10 15:54:12 -05:00
break;
}
let Token { kind: op, start: pos, .. } = self.next();
2024-06-01 13:30:07 -05:00
2024-07-08 00:22:53 -05:00
if op == TokenKind::Decl {
self.declare_rec(&fold, top_level);
}
let right = self.unit_expr()?;
let right = self.bin_expr(right, prec, false)?;
2024-06-01 13:30:07 -05:00
let right = self.arena.alloc(right);
let left = self.arena.alloc(fold);
2024-05-15 03:37:39 -05:00
2024-10-10 01:35:17 -05:00
if let Some(op) = op.ass_op() {
2024-05-20 07:11:58 -05:00
self.flag_idents(*left, idfl::MUTABLE);
let right = Expr::BinOp { left: self.arena.alloc(fold), pos, op, right };
fold = Expr::BinOp {
left,
pos,
op: TokenKind::Assign,
right: self.arena.alloc(right),
};
2024-05-15 03:37:39 -05:00
} else {
fold = Expr::BinOp { left, right, pos, op };
if op == TokenKind::Assign {
2024-05-20 07:11:58 -05:00
self.flag_idents(*left, idfl::MUTABLE);
}
2024-05-15 03:37:39 -05:00
}
2024-05-10 15:54:12 -05:00
}
Some(fold)
2024-05-10 15:54:12 -05:00
}
2024-07-08 00:22:53 -05:00
fn declare_rec(&mut self, expr: &Expr, top_level: bool) {
match *expr {
Expr::Ident { pos, id, is_first, .. } => {
self.declare(pos, id, !top_level, is_first || top_level)
}
2024-07-08 00:22:53 -05:00
Expr::Ctor { fields, .. } => {
for CtorField { value, .. } in fields {
self.declare_rec(value, top_level)
}
}
_ => _ = self.report(expr.pos(), "cant declare this shit (yet)"),
2024-07-08 00:22:53 -05:00
}
}
fn declare(&mut self, pos: Pos, id: Ident, ordered: bool, valid_order: bool) {
2024-09-20 09:37:51 -05:00
if !valid_order {
2024-09-18 02:47:52 -05:00
self.report(
2024-07-08 00:22:53 -05:00
pos,
format_args!(
"out of order declaration not allowed: {}",
2024-10-27 07:57:00 -05:00
self.lexer.slice(id.range())
2024-07-08 00:22:53 -05:00
),
);
}
2024-10-19 03:17:36 -05:00
let Ok(index) = self.ctx.idents.binary_search_by_key(&id, |s| s.ident) else {
self.report(pos, "the identifier is rezerved for a builtin (proably)");
return;
2024-10-19 03:17:36 -05:00
};
2024-10-10 01:35:17 -05:00
if core::mem::replace(&mut self.ctx.idents[index].declared, true) {
2024-09-18 02:47:52 -05:00
self.report(
2024-07-08 00:22:53 -05:00
pos,
2024-10-27 07:57:00 -05:00
format_args!("redeclaration of identifier: {}", self.lexer.slice(id.range())),
);
return;
2024-07-08 00:22:53 -05:00
}
2024-10-10 01:35:17 -05:00
self.ctx.idents[index].ordered = ordered;
2024-07-08 00:22:53 -05:00
}
2024-09-20 09:37:51 -05:00
fn resolve_ident(&mut self, token: Token) -> (Ident, bool) {
2024-06-24 10:26:00 -05:00
let is_ct = token.kind == TokenKind::CtIdent;
2024-05-12 04:52:58 -05:00
let name = self.lexer.slice(token.range());
if let Some(builtin) = crate::ty::from_str(name) {
2024-10-27 07:57:00 -05:00
return (Ident::builtin(builtin), false);
2024-05-12 04:52:58 -05:00
}
2024-09-20 09:37:51 -05:00
let (i, id, bl) = match self
2024-10-10 01:35:17 -05:00
.ctx
2024-05-12 04:52:58 -05:00
.idents
.iter_mut()
2024-05-20 07:11:58 -05:00
.enumerate()
2024-10-27 07:57:00 -05:00
.rfind(|(_, elem)| self.lexer.slice(elem.ident.range()) == name)
2024-05-12 04:52:58 -05:00
{
Some((i, elem)) => {
elem.used = true;
(i, elem, false)
}
2024-05-12 04:52:58 -05:00
None => {
2024-10-27 07:57:00 -05:00
let ident = match Ident::new(token.start, name.len() as _) {
None => {
self.report(token.start, "identifier can at most have 64 characters");
2024-10-27 07:57:00 -05:00
Ident::new(token.start, 63).unwrap()
}
Some(id) => id,
};
2024-10-10 01:35:17 -05:00
self.ctx.idents.push(ScopeIdent {
ident,
declared: false,
2024-11-17 11:15:58 -06:00
used: false,
ordered: false,
flags: 0,
});
2024-10-10 01:35:17 -05:00
(self.ctx.idents.len() - 1, self.ctx.idents.last_mut().unwrap(), true)
2024-05-12 04:52:58 -05:00
}
};
2024-06-01 13:30:07 -05:00
id.flags |= idfl::COMPTIME * is_ct as u32;
if id.declared && id.ordered && self.ns_bound > i {
2024-06-01 13:30:07 -05:00
id.flags |= idfl::COMPTIME;
2024-10-10 01:35:17 -05:00
self.ctx.captured.push(id.ident);
2024-05-20 07:11:58 -05:00
}
2024-05-12 04:52:58 -05:00
2024-09-20 09:37:51 -05:00
(id.ident, bl)
2024-05-17 12:53:59 -05:00
}
2024-10-01 08:28:18 -05:00
fn tok_str(&mut self, range: Token) -> &'a str {
self.lexer.slice(range.range())
2024-05-12 04:52:58 -05:00
}
fn unit_expr(&mut self) -> Option<Expr<'a>> {
2024-05-11 15:22:08 -05:00
use {Expr as E, TokenKind as T};
if matches!(
self.token.kind,
T::RParen | T::RBrace | T::RBrack | T::Comma | T::Semi | T::Else
) {
self.report(self.token.start, "expected expression")?;
}
2024-10-10 01:35:17 -05:00
let frame = self.ctx.idents.len();
let token @ Token { start: pos, .. } = self.next();
2024-05-20 07:11:58 -05:00
let prev_boundary = self.ns_bound;
2024-10-10 01:35:17 -05:00
let prev_captured = self.ctx.captured.len();
2024-11-24 07:47:38 -06:00
let mut must_trail = false;
2024-05-11 09:04:13 -05:00
let mut expr = match token.kind {
T::Ct => E::Ct { pos, value: self.ptr_expr()? },
2024-11-24 07:47:38 -06:00
T::Slf => E::Slf { pos },
2024-06-25 11:39:59 -05:00
T::Directive if self.lexer.slice(token.range()) == "use" => {
self.expect_advance(TokenKind::LParen)?;
let str = self.expect_advance(TokenKind::DQuote)?;
self.expect_advance(TokenKind::RParen)?;
2024-10-10 01:35:17 -05:00
let path = self.lexer.slice(str.range());
let path = &path[1..path.len() - 1];
2024-05-19 11:20:42 -05:00
E::Mod {
pos,
2024-10-10 01:35:17 -05:00
path,
2024-10-13 08:22:16 -05:00
id: match (self.loader)(path, self.path, FileKind::Module) {
2024-11-08 03:25:34 -06:00
Ok(id) => Module::new(id),
2024-09-18 02:47:52 -05:00
Err(e) => {
self.report(str.start, format_args!("error loading dependency: {e:#}"))?
2024-09-18 02:47:52 -05:00
}
2024-05-19 11:20:42 -05:00
},
}
}
2024-10-13 08:22:16 -05:00
T::Directive if self.lexer.slice(token.range()) == "embed" => {
self.expect_advance(TokenKind::LParen)?;
let str = self.expect_advance(TokenKind::DQuote)?;
self.expect_advance(TokenKind::RParen)?;
2024-10-13 08:22:16 -05:00
let path = self.lexer.slice(str.range());
let path = &path[1..path.len() - 1];
E::Embed {
pos,
path,
id: match (self.loader)(path, self.path, FileKind::Embed) {
2024-11-08 03:25:34 -06:00
Ok(id) => Global::new(id),
Err(e) => self.report(
str.start,
format_args!("error loading embedded file: {e:#}"),
)?,
2024-10-13 08:22:16 -05:00
},
}
}
2024-06-25 11:39:59 -05:00
T::Directive => E::Directive {
2024-09-15 13:14:56 -05:00
pos: pos - 1, // need to undo the directive shift
2024-10-01 08:28:18 -05:00
name: self.tok_str(token),
2024-05-14 16:07:32 -05:00
args: {
self.expect_advance(T::LParen)?;
2024-05-14 16:07:32 -05:00
self.collect_list(T::Comma, T::RParen, Self::expr)
},
},
T::True => E::Bool { pos, value: true },
T::False => E::Bool { pos, value: false },
2024-10-27 13:55:11 -05:00
T::Null => E::Null { pos },
T::Idk => E::Idk { pos },
2024-11-03 03:15:03 -06:00
T::Die => E::Die { pos },
2024-10-01 08:28:18 -05:00
T::DQuote => E::String { pos, literal: self.tok_str(token) },
2024-09-22 11:17:30 -05:00
T::Packed => {
self.packed = true;
let expr = self.unit_expr()?;
2024-09-22 11:17:30 -05:00
if self.packed {
self.report(
expr.pos(),
"this can not be packed \
(unlike your mom that gets packed every day by me)",
);
}
expr
}
2024-05-12 05:16:40 -05:00
T::Struct => E::Struct {
2024-09-30 12:09:17 -05:00
packed: core::mem::take(&mut self.packed),
2024-07-08 00:22:53 -05:00
fields: {
2024-10-10 01:35:17 -05:00
self.ns_bound = self.ctx.idents.len();
self.expect_advance(T::LBrace)?;
2024-05-12 05:16:40 -05:00
self.collect_list(T::Comma, T::RBrace, |s| {
let tok = s.token;
Some(if s.advance_if(T::Comment) {
2024-10-01 08:28:18 -05:00
CommentOr::Comment { literal: s.tok_str(tok), pos: tok.start }
2024-11-24 07:47:38 -06:00
} else if s.lexer.taste().kind == T::Colon {
let name = s.expect_advance(T::Ident)?;
s.expect_advance(T::Colon)?;
2024-11-24 07:47:38 -06:00
CommentOr::Or(Ok(StructField {
pos: name.start,
2024-10-01 08:28:18 -05:00
name: s.tok_str(name),
ty: s.expr()?,
2024-11-24 07:47:38 -06:00
}))
} else {
must_trail = true;
CommentOr::Or(Err(
s.collect_list_low(T::Semi, T::RBrace, true, |s| s.expr_low(true))
))
})
2024-05-12 05:16:40 -05:00
})
},
2024-05-20 07:11:58 -05:00
captured: {
self.ns_bound = prev_boundary;
2024-10-12 06:07:49 -05:00
let captured = &mut self.ctx.captured[prev_captured..];
crate::quad_sort(captured, core::cmp::Ord::cmp);
let preserved = captured.partition_dedup().0.len();
2024-10-10 01:35:17 -05:00
self.ctx.captured.truncate(prev_captured + preserved);
self.arena.alloc_slice(&self.ctx.captured[prev_captured..])
2024-05-20 07:11:58 -05:00
},
2024-07-08 00:22:53 -05:00
pos: {
2024-05-20 07:11:58 -05:00
if self.ns_bound == 0 {
// we might save some memory
2024-10-10 01:35:17 -05:00
self.ctx.captured.clear();
2024-05-20 07:11:58 -05:00
}
pos
2024-05-20 07:11:58 -05:00
},
2024-11-24 07:47:38 -06:00
trailing_comma: core::mem::take(&mut self.trailing_sep) || must_trail,
2024-05-12 05:16:40 -05:00
},
2024-11-17 09:25:39 -06:00
T::Enum => E::Enum {
pos,
variants: {
self.expect_advance(T::LBrace)?;
self.collect_list(T::Comma, T::RBrace, |s| {
let tok = s.token;
Some(if s.advance_if(T::Comment) {
CommentOr::Comment { literal: s.tok_str(tok), pos: tok.start }
} else {
let name = s.expect_advance(T::Ident)?;
CommentOr::Or(EnumField { pos: name.start, name: s.tok_str(name) })
})
})
},
trailing_comma: core::mem::take(&mut self.trailing_sep),
},
2024-06-01 13:30:07 -05:00
T::Ident | T::CtIdent => {
2024-09-20 09:37:51 -05:00
let (id, is_first) = self.resolve_ident(token);
E::Ident {
pos: pos - (token.kind == T::CtIdent) as Pos,
is_ct: token.kind == T::CtIdent,
id,
is_first,
}
2024-05-12 04:52:58 -05:00
}
2024-11-03 15:27:37 -06:00
T::Under => E::Wildcard { pos },
2024-05-11 15:22:08 -05:00
T::If => E::If {
pos,
cond: self.ptr_expr()?,
then: self.ptr_expr()?,
else_: self.advance_if(T::Else).then(|| self.ptr_expr()).trans()?,
2024-05-11 15:22:08 -05:00
},
2024-11-17 09:25:39 -06:00
T::Match => E::Match {
pos,
value: self.ptr_expr()?,
branches: {
self.expect_advance(T::LBrace)?;
self.collect_list(T::Comma, T::RBrace, |s| {
Some(MatchBranch {
pat: s.expr()?,
pos: s.expect_advance(T::TArrow)?.start,
body: s.expr()?,
})
})
},
},
T::Loop => E::Loop { pos, body: self.ptr_expr()? },
T::Break => E::Break { pos },
T::Continue => E::Continue { pos },
2024-05-11 15:22:08 -05:00
T::Return => E::Return {
pos,
2024-07-19 06:44:35 -05:00
val: (!matches!(
self.token.kind,
T::Semi | T::RBrace | T::RBrack | T::RParen | T::Comma
))
.then(|| self.ptr_expr())
.trans()?,
2024-05-09 16:41:59 -05:00
},
2024-05-11 15:22:08 -05:00
T::Fn => E::Closure {
pos,
2024-05-11 15:22:08 -05:00
args: {
self.expect_advance(T::LParen)?;
2024-05-11 15:22:08 -05:00
self.collect_list(T::Comma, T::RParen, |s| {
let name = s.advance_ident()?;
2024-09-20 09:37:51 -05:00
let (id, _) = s.resolve_ident(name);
s.declare(name.start, id, true, true);
s.expect_advance(T::Colon)?;
Some(Arg {
pos: name.start,
2024-10-01 08:28:18 -05:00
name: s.tok_str(name),
2024-07-19 14:04:22 -05:00
is_ct: name.kind == T::CtIdent,
id,
ty: s.expr()?,
})
2024-05-11 09:04:13 -05:00
})
2024-05-11 15:22:08 -05:00
},
2024-07-08 00:22:53 -05:00
ret: {
self.expect_advance(T::Colon)?;
self.ptr_expr()?
2024-05-11 15:22:08 -05:00
},
body: self.ptr_expr()?,
2024-05-11 15:22:08 -05:00
},
T::Ctor => self.ctor(pos, None),
T::Tupl => self.tupl(pos, None),
2024-07-08 11:08:58 -05:00
T::LBrack => E::Slice {
item: self.ptr_unit_expr()?,
size: self.advance_if(T::Semi).then(|| self.ptr_expr()).trans()?,
2024-07-08 11:08:58 -05:00
pos: {
self.expect_advance(T::RBrack)?;
pos
2024-07-08 11:08:58 -05:00
},
},
2024-11-17 09:25:39 -06:00
T::Band | T::Mul | T::Xor | T::Sub | T::Que | T::Not | T::Dot => E::UnOp {
pos,
2024-07-08 00:22:53 -05:00
op: token.kind,
2024-05-17 12:53:59 -05:00
val: {
2024-11-17 09:25:39 -06:00
let prev_ident_stack = self.ctx.idents.len();
let expr = self.ptr_unit_expr()?;
2024-05-19 11:20:42 -05:00
if token.kind == T::Band {
2024-05-20 07:11:58 -05:00
self.flag_idents(*expr, idfl::REFERENCED);
2024-05-19 11:20:42 -05:00
}
2024-11-17 09:25:39 -06:00
if token.kind == T::Dot {
self.ctx.idents.truncate(prev_ident_stack);
}
2024-05-17 12:53:59 -05:00
expr
},
2024-05-12 04:52:58 -05:00
},
T::LBrace => E::Block { pos, stmts: self.collect_list(T::Semi, T::RBrace, Self::expr) },
T::Number => {
let slice = self.lexer.slice(token.range());
let (slice, radix) = match &slice.get(0..2) {
2024-10-10 01:35:17 -05:00
Some("0x") => (&slice[2..], Radix::Hex),
Some("0b") => (&slice[2..], Radix::Binary),
Some("0o") => (&slice[2..], Radix::Octal),
_ => (slice, Radix::Decimal),
};
E::Number {
pos,
2024-09-04 10:13:43 -05:00
value: match u64::from_str_radix(slice, radix as u32) {
2024-09-01 12:42:04 -05:00
Ok(value) => value,
Err(e) => self.report(token.start, format_args!("invalid number: {e}"))?,
2024-09-04 10:13:43 -05:00
} as i64,
radix,
}
}
2024-10-29 07:36:12 -05:00
T::Float => E::Float {
pos,
value: match <f64 as core::str::FromStr>::from_str(self.lexer.slice(token.range()))
{
Ok(f) => f.to_bits(),
Err(e) => self.report(token.start, format_args!("invalid float: {e}"))?,
},
},
2024-05-11 15:22:08 -05:00
T::LParen => {
let expr = self.expr()?;
self.expect_advance(T::RParen)?;
2024-05-10 15:54:12 -05:00
expr
}
2024-10-01 08:28:18 -05:00
T::Comment => Expr::Comment { pos, literal: self.tok_str(token) },
tok => self.report(token.start, format_args!("unexpected token: {tok}"))?,
2024-05-09 16:41:59 -05:00
};
2024-05-11 09:04:13 -05:00
loop {
2024-05-14 05:17:39 -05:00
let token = self.token;
2024-07-08 11:08:58 -05:00
if matches!(token.kind, T::LParen | T::Ctor | T::Dot | T::Tupl | T::LBrack) {
2024-05-12 05:16:40 -05:00
self.next();
}
2024-05-14 05:17:39 -05:00
expr = match token.kind {
2024-05-12 05:16:40 -05:00
T::LParen => Expr::Call {
func: self.arena.alloc(expr),
2024-05-17 12:53:59 -05:00
args: self.collect_list(T::Comma, T::RParen, Self::expr),
2024-09-30 12:09:17 -05:00
trailing_comma: core::mem::take(&mut self.trailing_sep),
2024-05-12 05:16:40 -05:00
},
2024-07-07 12:16:15 -05:00
T::Ctor => self.ctor(token.start, Some(expr)),
T::Tupl => self.tupl(token.start, Some(expr)),
2024-07-08 11:08:58 -05:00
T::LBrack => E::Index {
base: self.arena.alloc(expr),
index: {
let index = self.expr()?;
self.expect_advance(T::RBrack)?;
2024-07-08 11:08:58 -05:00
self.arena.alloc(index)
},
},
2024-05-12 05:16:40 -05:00
T::Dot => E::Field {
2024-05-12 06:13:36 -05:00
target: self.arena.alloc(expr),
2024-09-30 12:09:17 -05:00
pos: token.start,
2024-07-08 00:22:53 -05:00
name: {
let token = self.expect_advance(T::Ident)?;
2024-10-01 08:28:18 -05:00
self.tok_str(token)
2024-05-12 05:16:40 -05:00
},
},
2024-05-11 09:04:13 -05:00
_ => break,
}
}
2024-11-24 07:47:38 -06:00
if matches!(token.kind, T::Loop | T::LBrace | T::Fn | T::Struct) {
2024-05-12 04:52:58 -05:00
self.pop_scope(frame);
}
2024-05-09 16:41:59 -05:00
Some(expr)
2024-05-09 16:41:59 -05:00
}
2024-07-07 12:16:15 -05:00
fn tupl(&mut self, pos: Pos, ty: Option<Expr<'a>>) -> Expr<'a> {
Expr::Tupl {
pos,
ty: ty.map(|ty| self.arena.alloc(ty)),
fields: self.collect_list(TokenKind::Comma, TokenKind::RParen, Self::expr),
2024-09-30 12:09:17 -05:00
trailing_comma: core::mem::take(&mut self.trailing_sep),
2024-07-07 12:16:15 -05:00
}
}
fn ctor(&mut self, pos: Pos, ty: Option<Expr<'a>>) -> Expr<'a> {
Expr::Ctor {
pos,
ty: ty.map(|ty| self.arena.alloc(ty)),
fields: self.collect_list(TokenKind::Comma, TokenKind::RBrace, |s| {
let name_tok = s.advance_ident()?;
2024-10-01 08:28:18 -05:00
let name = s.tok_str(name_tok);
Some(CtorField {
2024-07-08 00:22:53 -05:00
pos: name_tok.start,
name,
value: if s.advance_if(TokenKind::Colon) {
s.expr()?
2024-07-08 00:22:53 -05:00
} else {
2024-09-20 09:37:51 -05:00
let (id, is_first) = s.resolve_ident(name_tok);
Expr::Ident { pos: name_tok.start, is_ct: false, id, is_first }
2024-07-08 00:22:53 -05:00
},
})
2024-07-07 12:16:15 -05:00
}),
2024-09-30 12:09:17 -05:00
trailing_comma: core::mem::take(&mut self.trailing_sep),
2024-07-07 12:16:15 -05:00
}
}
fn advance_ident(&mut self) -> Option<Token> {
let next = self.next();
if matches!(next.kind, TokenKind::Ident | TokenKind::CtIdent) {
Some(next)
2024-06-01 13:30:07 -05:00
} else {
self.report(self.token.start, format_args!("expected identifier, found {}", next.kind))?
2024-06-01 13:30:07 -05:00
}
}
2024-05-12 04:52:58 -05:00
fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame;
2024-10-10 01:35:17 -05:00
for i in frame..self.ctx.idents.len() {
if !&self.ctx.idents[i].declared {
self.ctx.idents.swap(i, undeclared_count);
2024-05-12 04:52:58 -05:00
undeclared_count += 1;
2024-11-17 11:15:58 -06:00
} else if !self.ctx.idents[i].used {
self.warn(self.ctx.idents[i].ident.pos(), "unused identifier");
2024-05-12 04:52:58 -05:00
}
}
2024-10-10 01:35:17 -05:00
self.ctx
.idents
2024-05-17 12:53:59 -05:00
.drain(undeclared_count..)
2024-07-08 00:22:53 -05:00
.map(|ident| Symbol { name: ident.ident, flags: ident.flags })
2024-10-10 01:35:17 -05:00
.collect_into(&mut self.ctx.symbols);
2024-05-12 04:52:58 -05:00
}
fn ptr_unit_expr(&mut self) -> Option<&'a Expr<'a>> {
Some(self.arena.alloc(self.unit_expr()?))
2024-05-12 04:52:58 -05:00
}
2024-05-11 15:22:08 -05:00
fn collect_list<T: Copy>(
&mut self,
delim: TokenKind,
end: TokenKind,
2024-11-24 07:47:38 -06:00
f: impl FnMut(&mut Self) -> Option<T>,
) -> &'a [T] {
self.collect_list_low(delim, end, false, f)
}
fn collect_list_low<T: Copy>(
&mut self,
delim: TokenKind,
end: TokenKind,
keep_end: bool,
mut f: impl FnMut(&mut Self) -> Option<T>,
2024-05-11 15:22:08 -05:00
) -> &'a [T] {
2024-10-14 15:04:18 -05:00
let mut trailing_sep = false;
2024-10-10 01:35:17 -05:00
let mut view = self.ctx.stack.view();
2024-11-24 07:47:38 -06:00
'o: while (keep_end && self.token.kind != end) || (!keep_end && !self.advance_if(end)) {
let val = match f(self) {
Some(val) => val,
None => {
let mut paren = None::<TokenKind>;
let mut depth = 0;
loop {
let tok = self.next();
if tok.kind == TokenKind::Eof {
break 'o;
}
if let Some(par) = paren {
if par == tok.kind {
depth += 1;
} else if tok.kind.closing() == par.closing() {
depth -= 1;
if depth == 0 {
paren = None;
}
}
} else if tok.kind == delim {
continue 'o;
} else if tok.kind == end {
break 'o;
} else if tok.kind.closing().is_some() && paren.is_none() {
paren = Some(tok.kind);
depth = 1;
}
}
}
};
2024-10-14 15:04:18 -05:00
trailing_sep = self.advance_if(delim);
2024-10-10 01:35:17 -05:00
unsafe { self.ctx.stack.push(&mut view, val) };
2024-10-01 08:28:18 -05:00
}
2024-10-14 15:04:18 -05:00
self.trailing_sep = trailing_sep;
2024-10-10 01:35:17 -05:00
self.arena.alloc_slice(unsafe { self.ctx.stack.finalize(view) })
}
2024-05-09 16:41:59 -05:00
fn advance_if(&mut self, kind: TokenKind) -> bool {
if self.token.kind == kind {
self.next();
true
} else {
false
}
}
#[must_use]
fn expect_advance(&mut self, kind: TokenKind) -> Option<Token> {
let next = self.next();
if next.kind != kind {
self.report(next.start, format_args!("expected {}, found {}", kind, next.kind))?
} else {
Some(next)
2024-05-09 16:41:59 -05:00
}
}
2024-11-17 11:15:58 -06:00
#[track_caller]
fn warn(&mut self, pos: Pos, msg: impl fmt::Display) {
if log::log_enabled!(log::Level::Error) {
use core::fmt::Write;
writeln!(
self.ctx.warnings.get_mut(),
"(W) {}",
Report::new(self.lexer.source(), self.path, pos, msg)
)
.unwrap();
}
}
2024-06-01 13:30:07 -05:00
#[track_caller]
fn report(&mut self, pos: Pos, msg: impl fmt::Display) -> Option<!> {
if log::log_enabled!(log::Level::Error) {
use core::fmt::Write;
writeln!(
self.ctx.errors.get_mut(),
"{}",
Report::new(self.lexer.source(), self.path, pos, msg)
)
.unwrap();
}
None
2024-05-09 16:41:59 -05:00
}
2024-05-17 12:53:59 -05:00
fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) {
match e {
2024-10-10 01:35:17 -05:00
Expr::Ident { id, .. } => find_ident(&mut self.ctx.idents, id).flags |= flags,
2024-05-17 12:53:59 -05:00
Expr::Field { target, .. } => self.flag_idents(*target, flags),
_ => {}
}
}
2024-05-09 16:41:59 -05:00
}
2024-05-17 12:53:59 -05:00
fn find_ident(idents: &mut [ScopeIdent], id: Ident) -> &mut ScopeIdent {
2024-07-08 00:22:53 -05:00
idents.binary_search_by_key(&id, |si| si.ident).map(|i| &mut idents[i]).unwrap()
2024-05-17 12:53:59 -05:00
}
pub fn find_symbol(symbols: &[Symbol], id: Ident) -> &Symbol {
2024-10-19 12:37:02 -05:00
// TODO: we can turn this to direct index
2024-07-08 00:22:53 -05:00
symbols.binary_search_by_key(&id, |s| s.name).map(|i| &symbols[i]).unwrap()
2024-05-17 12:53:59 -05:00
}
2024-05-12 04:52:58 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Arg<'a> {
pub pos: u32,
2024-07-08 00:22:53 -05:00
pub name: &'a str,
pub id: Ident,
2024-07-19 14:04:22 -05:00
pub is_ct: bool,
2024-07-08 00:22:53 -05:00
pub ty: Expr<'a>,
2024-05-12 04:52:58 -05:00
}
impl Poser for Arg<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
2024-06-01 13:30:07 -05:00
macro_rules! generate_expr {
($(#[$meta:meta])* $vis:vis enum $name:ident<$lt:lifetime> {$(
$(#[$field_meta:meta])*
$variant:ident {
2024-09-03 10:51:28 -05:00
2024-06-01 13:30:07 -05:00
$($field:ident: $ty:ty,)*
},
)*}) => {
2024-10-10 01:35:17 -05:00
$(#[$meta])*
2024-06-01 13:30:07 -05:00
$vis enum $name<$lt> {$(
2024-09-03 10:51:28 -05:00
$(#[$field_meta])*
2024-06-01 13:30:07 -05:00
$variant {
$($field: $ty,)*
},
)*}
impl<$lt> $name<$lt> {
2024-10-10 06:04:17 -05:00
pub fn used_bytes(&self) -> usize {
match self {
$(Self::$variant { $($field),* } => {
0 $(.max($field as *const _ as usize - self as *const _ as usize
+ core::mem::size_of::<$ty>()))*
})*
}
}
2024-06-01 13:30:07 -05:00
pub fn pos(&self) -> Pos {
2024-10-27 07:57:00 -05:00
#[expect(unused_variables)]
2024-06-01 13:30:07 -05:00
match self {
2024-06-24 10:26:00 -05:00
$(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)*
2024-06-01 13:30:07 -05:00
}
}
}
};
2024-07-19 14:04:22 -05:00
(@filed_names $variant:ident $ident1:ident) => { Self::$variant { $ident1: a } };
2024-06-01 13:30:07 -05:00
(@first ($($first:tt)*), $($rest:tt)*) => { $($first)* };
(@last ($($ign:tt)*), $($rest:tt)*) => { $($rest)* };
(@last ($($last:tt)*),) => { $($last)* };
}
2024-10-10 06:04:17 -05:00
#[repr(u8)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Radix {
Hex = 16,
Octal = 8,
Binary = 2,
Decimal = 10,
}
2024-06-01 13:30:07 -05:00
generate_expr! {
2024-09-03 10:51:28 -05:00
/// `LIST(start, sep, end, elem) => start { elem sep } [elem] end`
/// `OP := grep for `#define OP:`
2024-06-01 13:30:07 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Expr<'a> {
2024-09-03 10:51:28 -05:00
/// `'ct' Expr`
Ct {
pos: Pos,
2024-07-19 14:04:22 -05:00
value: &'a Self,
},
2024-11-24 07:47:38 -06:00
/// `'Self'`
Slf {
pos: Pos,
},
2024-09-03 10:51:28 -05:00
/// `'"([^"]|\\")"'`
2024-07-02 07:49:05 -05:00
String {
pos: Pos,
literal: &'a str,
},
2024-09-03 10:51:28 -05:00
/// `'//[^\n]' | '/*' { '([^/*]|*/)*' | Comment } '*/'
2024-06-25 12:55:25 -05:00
Comment {
pos: Pos,
literal: &'a str,
},
2024-09-03 10:51:28 -05:00
/// `'break'`
2024-06-01 13:30:07 -05:00
Break {
pos: Pos,
},
2024-09-03 10:51:28 -05:00
/// `'continue'`
2024-06-01 13:30:07 -05:00
Continue {
pos: Pos,
},
2024-09-03 10:51:28 -05:00
/// `'fn' LIST('(', ',', ')', Ident ':' Expr) ':' Expr Expr`
2024-06-01 13:30:07 -05:00
Closure {
pos: Pos,
args: &'a [Arg<'a>],
ret: &'a Self,
body: &'a Self,
},
2024-09-03 10:51:28 -05:00
/// `Expr LIST('(', ',', ')', Expr)`
2024-06-01 13:30:07 -05:00
Call {
func: &'a Self,
args: &'a [Self],
2024-06-25 14:41:12 -05:00
trailing_comma: bool,
2024-06-01 13:30:07 -05:00
},
2024-09-03 10:51:28 -05:00
/// `'return' [Expr]`
2024-06-01 13:30:07 -05:00
Return {
pos: Pos,
val: Option<&'a Self>,
},
2024-11-03 15:27:37 -06:00
Wildcard {
pos: Pos,
},
2024-09-03 10:51:28 -05:00
/// note: ':unicode:' is any utf-8 character except ascii
/// `'[a-zA-Z_:unicode:][a-zA-Z0-9_:unicode:]*'`
2024-06-01 13:30:07 -05:00
Ident {
2024-09-20 09:37:51 -05:00
pos: Pos,
2024-07-19 14:04:22 -05:00
is_ct: bool,
2024-09-20 09:37:51 -05:00
is_first: bool,
id: Ident,
//name: &'a str,
2024-06-01 13:30:07 -05:00
},
2024-09-03 10:51:28 -05:00
/// `LIST('{', [';'], '}', Expr)`
2024-06-01 13:30:07 -05:00
Block {
pos: Pos,
stmts: &'a [Self],
},
2024-09-03 10:51:28 -05:00
/// `'0b[01]+' | '0o[0-7]+' | '[0-9]+' | '0b[01]+'`
2024-06-01 13:30:07 -05:00
Number {
pos: Pos,
2024-09-03 10:51:28 -05:00
value: i64,
radix: Radix,
2024-06-01 13:30:07 -05:00
},
2024-10-29 07:36:12 -05:00
/// `'[0-9]+.[0-9]*'`
Float {
pos: Pos,
value: u64,
},
2024-09-03 10:51:28 -05:00
/// node: precedence defined in `OP` applies
/// `Expr OP Expr`
2024-06-01 13:30:07 -05:00
BinOp {
left: &'a Self,
pos: Pos,
2024-06-01 13:30:07 -05:00
op: TokenKind,
right: &'a Self,
},
2024-09-03 10:51:28 -05:00
/// `'if' Expr Expr [else Expr]`
2024-06-01 13:30:07 -05:00
If {
pos: Pos,
cond: &'a Self,
then: &'a Self,
else_: Option<&'a Self>,
},
2024-11-17 09:25:39 -06:00
Match {
pos: Pos,
value: &'a Self,
branches: &'a [MatchBranch<'a>],
},
2024-09-03 10:51:28 -05:00
/// `'loop' Expr`
2024-06-01 13:30:07 -05:00
Loop {
pos: Pos,
body: &'a Self,
},
2024-09-03 10:51:28 -05:00
/// `('&' | '*' | '^') Expr`
2024-06-01 13:30:07 -05:00
UnOp {
pos: Pos,
op: TokenKind,
val: &'a Self,
},
2024-09-03 10:51:28 -05:00
/// `'struct' LIST('{', ',', '}', Ident ':' Expr)`
2024-06-01 13:30:07 -05:00
Struct {
pos: Pos,
2024-11-24 07:47:38 -06:00
fields: &'a [CommentOr<'a, Result<StructField<'a>, &'a[Self]>>],
2024-06-01 13:30:07 -05:00
captured: &'a [Ident],
2024-07-19 14:04:22 -05:00
trailing_comma: bool,
2024-09-22 11:17:30 -05:00
packed: bool,
2024-06-01 13:30:07 -05:00
},
2024-11-17 09:25:39 -06:00
/// `'enum' LIST('{', ',', '}', Ident)`
Enum {
pos: Pos,
variants: &'a [CommentOr<'a, EnumField<'a>>],
trailing_comma: bool,
},
2024-09-03 10:51:28 -05:00
/// `[Expr] LIST('.{', ',', '}', Ident [':' Expr])`
2024-06-01 13:30:07 -05:00
Ctor {
pos: Pos,
ty: Option<&'a Self>,
2024-07-08 00:22:53 -05:00
fields: &'a [CtorField<'a>],
2024-07-07 12:16:15 -05:00
trailing_comma: bool,
},
2024-09-03 10:51:28 -05:00
/// `[Expr] LIST('.(', ',', ')', Ident [':' Expr])`
2024-07-07 12:16:15 -05:00
Tupl {
pos: Pos,
ty: Option<&'a Self>,
fields: &'a [Self],
2024-06-25 14:51:41 -05:00
trailing_comma: bool,
2024-06-01 13:30:07 -05:00
},
2024-09-03 10:51:28 -05:00
/// `'[' Expr [';' Expr] ']'`
2024-07-08 11:08:58 -05:00
Slice {
pos: Pos,
size: Option<&'a Self>,
item: &'a Self,
},
2024-09-03 10:51:28 -05:00
/// `Expr '[' Expr ']'`
2024-07-08 11:08:58 -05:00
Index {
base: &'a Self,
index: &'a Self,
},
2024-09-03 10:51:28 -05:00
/// `Expr '.' Ident`
2024-06-01 13:30:07 -05:00
Field {
target: &'a Self,
2024-10-10 01:35:17 -05:00
// we put it second place because its the pos of '.'
2024-09-30 12:09:17 -05:00
pos: Pos,
2024-07-08 00:22:53 -05:00
name: &'a str,
2024-06-01 13:30:07 -05:00
},
2024-09-03 10:51:28 -05:00
/// `'true' | 'false'`
2024-06-01 13:30:07 -05:00
Bool {
pos: Pos,
value: bool,
},
2024-10-27 13:55:11 -05:00
/// `'null'`
Null {
pos: Pos,
},
/// `'idk'`
Idk {
pos: Pos,
},
2024-11-03 03:15:03 -06:00
/// `'die'`
Die {
pos: Pos,
},
2024-09-03 10:51:28 -05:00
/// `'@' Ident List('(', ',', ')', Expr)`
2024-06-01 13:30:07 -05:00
Directive {
2024-10-10 01:35:17 -05:00
pos: Pos,
2024-06-01 13:30:07 -05:00
name: &'a str,
args: &'a [Self],
},
2024-09-03 10:51:28 -05:00
/// `'@use' '(' String ')'`
2024-06-01 13:30:07 -05:00
Mod {
pos: Pos,
2024-11-08 03:25:34 -06:00
id: Module,
2024-06-01 13:30:07 -05:00
path: &'a str,
},
2024-10-13 08:22:16 -05:00
/// `'@use' '(' String ')'`
Embed {
pos: Pos,
2024-11-08 03:25:34 -06:00
id: Global,
2024-10-13 08:22:16 -05:00
path: &'a str,
},
2024-06-01 13:30:07 -05:00
}
}
2024-10-12 08:04:58 -05:00
impl Expr<'_> {
pub fn declares(&self, iden: Result<Ident, &str>, source: &str) -> Option<Ident> {
2024-07-08 00:22:53 -05:00
match *self {
2024-10-27 07:57:00 -05:00
Self::Ident { id, .. } if iden == Ok(id) || iden == Err(&source[id.range()]) => {
Some(id)
}
Self::Ctor { fields, .. } => fields.iter().find_map(|f| f.value.declares(iden, source)),
2024-07-08 00:22:53 -05:00
_ => None,
}
}
pub fn has_ct(&self, symbols: &[Symbol]) -> bool {
match *self {
Self::Ident { id, .. } => find_symbol(symbols, id).flags & idfl::COMPTIME != 0,
Self::Ctor { fields, .. } => fields.iter().any(|f| f.value.has_ct(symbols)),
_ => false,
}
}
2024-11-08 03:25:34 -06:00
pub fn find_pattern_path<T, F: FnOnce(&Expr, bool) -> T>(
2024-07-08 00:22:53 -05:00
&self,
ident: Ident,
target: &Expr,
mut with_final: F,
) -> Result<T, F> {
2024-07-08 00:22:53 -05:00
match *self {
2024-11-08 03:25:34 -06:00
Self::Ident { id, is_ct, .. } if id == ident => Ok(with_final(target, is_ct)),
2024-07-08 00:22:53 -05:00
Self::Ctor { fields, .. } => {
2024-09-30 12:09:17 -05:00
for &CtorField { name, value, pos } in fields {
match value.find_pattern_path(
ident,
&Expr::Field { pos, target, name },
with_final,
) {
Ok(value) => return Ok(value),
2024-07-08 00:22:53 -05:00
Err(e) => with_final = e,
}
}
Err(with_final)
}
_ => Err(with_final),
}
}
}
2024-11-17 09:25:39 -06:00
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct MatchBranch<'a> {
pub pat: Expr<'a>,
pub pos: Pos,
pub body: Expr<'a>,
}
impl Poser for MatchBranch<'_> {
fn posi(&self) -> Pos {
self.pat.pos()
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct EnumField<'a> {
pub pos: Pos,
pub name: &'a str,
}
impl Poser for EnumField<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct StructField<'a> {
pub pos: Pos,
pub name: &'a str,
pub ty: Expr<'a>,
}
impl Poser for StructField<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
2024-07-08 00:22:53 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CtorField<'a> {
pub pos: Pos,
pub name: &'a str,
pub value: Expr<'a>,
}
impl Poser for CtorField<'_> {
fn posi(&self) -> Pos {
self.pos
}
}
2024-10-04 14:44:29 -05:00
pub trait Poser {
fn posi(&self) -> Pos;
2024-05-09 16:41:59 -05:00
}
2024-11-24 07:47:38 -06:00
impl<O: Poser, E: Poser> Poser for Result<O, E> {
fn posi(&self) -> Pos {
self.as_ref().map_or_else(Poser::posi, Poser::posi)
}
}
impl<T: Poser> Poser for &[T] {
fn posi(&self) -> Pos {
self[0].posi()
}
}
2024-06-01 13:30:07 -05:00
impl Poser for Pos {
fn posi(&self) -> Pos {
*self
2024-05-12 16:19:45 -05:00
}
}
2024-10-12 08:04:58 -05:00
impl Poser for Expr<'_> {
fn posi(&self) -> Pos {
2024-06-01 13:30:07 -05:00
self.pos()
}
}
2024-10-12 08:04:58 -05:00
impl<T: Poser> Poser for CommentOr<'_, T> {
fn posi(&self) -> Pos {
match self {
CommentOr::Or(expr) => expr.posi(),
CommentOr::Comment { pos, .. } => *pos,
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum CommentOr<'a, T> {
Or(T),
Comment { literal: &'a str, pos: Pos },
}
2024-10-12 08:04:58 -05:00
impl<T: Copy> CommentOr<'_, T> {
pub fn or(&self) -> Option<T> {
match *self {
CommentOr::Or(v) => Some(v),
CommentOr::Comment { .. } => None,
}
}
}
2024-10-01 08:28:18 -05:00
pub struct Display<'a> {
source: &'a str,
expr: &'a Expr<'a>,
}
2024-10-04 14:44:29 -05:00
2024-10-01 08:28:18 -05:00
impl<'a> Display<'a> {
pub fn new(source: &'a str, expr: &'a Expr<'a>) -> Self {
Self { source, expr }
}
}
impl core::fmt::Display for Display<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Formatter::new(self.source).fmt(self.expr, f)
}
}
2024-10-10 01:35:17 -05:00
#[derive(Default)]
pub struct Ctx {
pub errors: RefCell<String>,
2024-11-17 11:15:58 -06:00
pub warnings: RefCell<String>,
symbols: Vec<Symbol>,
2024-10-10 01:35:17 -05:00
stack: StackAlloc,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
}
impl Ctx {
pub fn clear(&mut self) {
self.errors.get_mut().clear();
debug_assert_eq!(self.symbols.len(), 0);
debug_assert_eq!(self.stack.len, 0);
debug_assert_eq!(self.idents.len(), 0);
debug_assert_eq!(self.captured.len(), 0);
}
}
2024-05-17 12:53:59 -05:00
#[repr(C)]
2024-05-19 11:20:42 -05:00
pub struct AstInner<T: ?Sized> {
2024-05-17 12:53:59 -05:00
ref_count: AtomicUsize,
2024-10-10 06:04:17 -05:00
pub mem: ArenaChunk,
2024-07-08 00:22:53 -05:00
exprs: *const [Expr<'static>],
2024-05-19 11:20:42 -05:00
2024-07-08 00:22:53 -05:00
pub path: Box<str>,
2024-09-01 14:15:29 -05:00
pub file: Box<str>,
2024-05-19 11:20:42 -05:00
pub symbols: T,
2024-05-17 12:53:59 -05:00
}
impl AstInner<[Symbol]> {
2024-09-30 12:09:17 -05:00
fn layout(syms: usize) -> core::alloc::Layout {
core::alloc::Layout::new::<AstInner<()>>()
.extend(core::alloc::Layout::array::<Symbol>(syms).unwrap())
2024-05-17 12:53:59 -05:00
.unwrap()
.0
}
fn new(file: Box<str>, path: Box<str>, ctx: &mut Ctx, loader: Loader) -> NonNull<Self> {
let arena = Arena::with_capacity(
SOURCE_TO_AST_FACTOR * file.bytes().filter(|b| !b.is_ascii_whitespace()).count(),
);
2024-10-01 08:28:18 -05:00
let exprs =
unsafe { core::mem::transmute(Parser::parse(ctx, &file, &path, loader, &arena)) };
2024-05-17 12:53:59 -05:00
2024-10-12 06:07:49 -05:00
crate::quad_sort(&mut ctx.symbols, |a, b| a.name.cmp(&b.name));
2024-05-17 12:53:59 -05:00
2024-10-10 01:35:17 -05:00
let layout = Self::layout(ctx.symbols.len());
2024-05-17 12:53:59 -05:00
unsafe {
2024-09-30 12:09:17 -05:00
let ptr = alloc::alloc::alloc(layout);
2024-10-10 01:35:17 -05:00
let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, ctx.symbols.len());
2024-05-19 11:20:42 -05:00
2024-09-30 12:09:17 -05:00
core::ptr::write(inner as *mut AstInner<()>, AstInner {
2024-07-08 00:22:53 -05:00
ref_count: AtomicUsize::new(1),
mem: arena.chunk.into_inner(),
exprs,
path,
2024-10-01 08:28:18 -05:00
file,
2024-07-08 00:22:53 -05:00
symbols: (),
});
2024-09-30 12:09:17 -05:00
core::ptr::addr_of_mut!((*inner).symbols)
2024-05-17 12:53:59 -05:00
.as_mut_ptr()
2024-10-10 01:35:17 -05:00
.copy_from_nonoverlapping(ctx.symbols.as_ptr(), ctx.symbols.len());
ctx.symbols.clear();
2024-05-19 11:20:42 -05:00
2024-05-17 12:53:59 -05:00
NonNull::new_unchecked(inner)
}
}
2024-09-13 08:12:20 -05:00
2024-10-10 12:01:12 -05:00
pub fn report<D>(&self, pos: Pos, msg: D) -> Report<D> {
Report::new(&self.file, &self.path, pos, msg)
2024-09-13 08:12:20 -05:00
}
2024-05-17 12:53:59 -05:00
}
2024-10-10 12:01:12 -05:00
pub struct Report<'a, D> {
file: &'a str,
path: &'a str,
pos: Pos,
msg: D,
}
impl<'a, D> Report<'a, D> {
pub fn new(file: &'a str, path: &'a str, pos: Pos, msg: D) -> Self {
Self { file, path, pos, msg }
}
}
impl<D: core::fmt::Display> core::fmt::Display for Report<'_, D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
report_to(self.file, self.path, self.pos, &self.msg, f);
Ok(())
}
}
2024-10-12 06:07:49 -05:00
fn report_to(file: &str, path: &str, pos: Pos, msg: &dyn fmt::Display, out: &mut impl fmt::Write) {
let (line, mut col) = lexer::line_col(file.as_bytes(), pos);
#[cfg(feature = "std")]
let disp = crate::fs::display_rel_path(path);
#[cfg(not(feature = "std"))]
let disp = path;
_ = writeln!(out, "{}:{}:{}: {}", disp, line, col, msg);
let line = &file[file[..pos as usize].rfind('\n').map_or(0, |i| i + 1)
..file[pos as usize..].find('\n').map_or(file.len(), |i| i + pos as usize)];
col += line.chars().take_while(|c| c.is_whitespace()).filter(|&c| c == '\t').count() * 3;
2024-10-12 06:07:49 -05:00
let mut has_non_whitespace = false;
2024-10-27 05:32:34 -05:00
for char in line.chars() {
if char == '\t' && !has_non_whitespace {
2024-10-27 05:32:34 -05:00
_ = out.write_str(" ");
} else {
_ = out.write_char(char);
}
has_non_whitespace |= !char.is_whitespace();
2024-10-27 05:32:34 -05:00
}
_ = out.write_char('\n');
for _ in 0..col - 1 {
_ = out.write_str(" ");
}
_ = out.write_str("^\n");
2024-10-12 06:07:49 -05:00
}
2024-05-17 12:53:59 -05:00
#[derive(PartialEq, Eq, Hash)]
pub struct Ast(NonNull<AstInner<[Symbol]>>);
impl Ast {
pub fn new(
path: impl Into<Box<str>>,
content: impl Into<Box<str>>,
ctx: &mut Ctx,
loader: Loader,
) -> Self {
Self(AstInner::new(content.into(), path.into(), ctx, loader))
2024-05-17 12:53:59 -05:00
}
pub fn exprs(&self) -> &[Expr] {
unsafe { &*self.inner().exprs }
}
2024-05-19 11:20:42 -05:00
fn inner(&self) -> &AstInner<[Symbol]> {
unsafe { self.0.as_ref() }
2024-05-17 12:53:59 -05:00
}
2024-05-20 07:11:58 -05:00
pub fn find_decl(&self, id: Result<Ident, &str>) -> Option<(&Expr, Ident)> {
2024-11-24 07:47:38 -06:00
find_decl(self.exprs(), &self.file, id)
2024-05-17 12:53:59 -05:00
}
2024-09-03 10:51:28 -05:00
pub fn ident_str(&self, ident: Ident) -> &str {
2024-10-27 07:57:00 -05:00
&self.file[ident.range()]
2024-09-03 10:51:28 -05:00
}
2024-05-19 11:20:42 -05:00
}
2024-05-17 12:53:59 -05:00
2024-11-24 07:47:38 -06:00
pub fn find_decl<'a>(
exprs: &'a [Expr<'a>],
file: &str,
id: Result<Ident, &str>,
) -> Option<(&'a Expr<'a>, Ident)> {
exprs.iter().find_map(|expr| match expr {
Expr::BinOp { left, op: TokenKind::Decl, .. } => {
left.declares(id, file).map(|id| (expr, id))
}
_ => None,
})
}
2024-05-19 11:20:42 -05:00
impl Default for Ast {
fn default() -> Self {
Self(AstInner::new("".into(), "".into(), &mut Ctx::default(), &mut no_loader))
2024-05-19 11:20:42 -05:00
}
}
2024-11-08 03:25:34 -06:00
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
2024-05-19 11:20:42 -05:00
#[repr(packed)]
pub struct ExprRef(NonNull<Expr<'static>>);
impl ExprRef {
pub fn new(expr: &Expr) -> Self {
Self(NonNull::from(expr).cast())
}
pub fn get<'a>(&self, from: &'a Ast) -> &'a Expr<'a> {
assert!(from.mem.contains(self.0.as_ptr() as _));
2024-05-19 11:20:42 -05:00
// SAFETY: the pointer is or was a valid reference in the past, if it points within one of
// arenas regions, it muts be walid, since arena does not give invalid pointers to its
// allocations
unsafe { { self.0 }.as_ref() }
2024-05-17 12:53:59 -05:00
}
2024-09-04 09:54:34 -05:00
pub fn dangling() -> Self {
Self(NonNull::dangling())
}
}
impl Default for ExprRef {
fn default() -> Self {
Self::dangling()
}
2024-05-17 12:53:59 -05:00
}
unsafe impl Send for Ast {}
unsafe impl Sync for Ast {}
impl Clone for Ast {
fn clone(&self) -> Self {
2024-09-30 12:09:17 -05:00
unsafe { self.0.as_ref() }.ref_count.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
2024-05-17 12:53:59 -05:00
Self(self.0)
}
}
impl Drop for Ast {
fn drop(&mut self) {
let inner = unsafe { self.0.as_ref() };
2024-09-30 12:09:17 -05:00
if inner.ref_count.fetch_sub(1, core::sync::atomic::Ordering::Relaxed) == 1 {
2024-10-01 08:28:18 -05:00
let inner = unsafe { self.0.as_mut() };
let len = inner.symbols.len();
unsafe { core::ptr::drop_in_place(inner) };
let layout = AstInner::layout(len);
2024-05-17 12:53:59 -05:00
unsafe {
2024-09-30 12:09:17 -05:00
alloc::alloc::dealloc(self.0.as_ptr() as _, layout);
2024-05-17 12:53:59 -05:00
}
}
}
}
2024-05-19 11:20:42 -05:00
impl Deref for Ast {
type Target = AstInner<[Symbol]>;
fn deref(&self) -> &Self::Target {
self.inner()
}
}
2024-10-10 01:35:17 -05:00
struct StackAllocView<T> {
2024-10-01 08:28:18 -05:00
prev: usize,
base: usize,
_ph: PhantomData<T>,
}
2024-10-10 01:35:17 -05:00
struct StackAlloc {
2024-10-01 08:28:18 -05:00
data: *mut u8,
len: usize,
cap: usize,
}
impl StackAlloc {
const MAX_ALIGN: usize = 16;
fn view<T: Copy>(&mut self) -> StackAllocView<T> {
let prev = self.len;
let align = core::mem::align_of::<T>();
assert!(align <= Self::MAX_ALIGN);
self.len = (self.len + align - 1) & !(align - 1);
StackAllocView { base: self.len, prev, _ph: PhantomData }
}
unsafe fn push<T: Copy>(&mut self, _view: &mut StackAllocView<T>, value: T) {
if unlikely(self.len + core::mem::size_of::<T>() > self.cap) {
let next_cap = self.cap.max(16 * 32).max(core::mem::size_of::<T>()) * 2;
2024-10-01 08:28:18 -05:00
if self.cap == 0 {
let layout =
core::alloc::Layout::from_size_align_unchecked(next_cap, Self::MAX_ALIGN);
self.data = alloc::alloc::alloc(layout);
} else {
let old_layout =
core::alloc::Layout::from_size_align_unchecked(self.cap, Self::MAX_ALIGN);
self.data = alloc::alloc::realloc(self.data, old_layout, next_cap);
}
self.cap = next_cap;
}
let dst = self.data.add(self.len) as *mut T;
2024-10-12 08:04:58 -05:00
debug_assert!(dst.is_aligned());
2024-10-01 08:28:18 -05:00
self.len += core::mem::size_of::<T>();
core::ptr::write(dst, value);
}
unsafe fn finalize<T: Copy>(&mut self, view: StackAllocView<T>) -> &[T] {
if unlikely(self.cap == 0) {
return &[];
}
let slice = core::slice::from_ptr_range(
self.data.add(view.base) as *const T..self.data.add(self.len) as *const T,
);
self.len = view.prev;
slice
}
}
impl Default for StackAlloc {
fn default() -> Self {
Self { data: core::ptr::null_mut(), len: 0, cap: 0 }
}
}
impl Drop for StackAlloc {
fn drop(&mut self) {
let layout =
unsafe { core::alloc::Layout::from_size_align_unchecked(self.cap, Self::MAX_ALIGN) };
unsafe { alloc::alloc::dealloc(self.data, layout) };
}
}
#[derive(Default)]
2024-10-10 01:35:17 -05:00
pub struct Arena {
2024-05-17 12:53:59 -05:00
chunk: UnsafeCell<ArenaChunk>,
}
2024-10-10 01:35:17 -05:00
impl Arena {
2024-10-10 06:04:17 -05:00
pub fn with_capacity(cap: usize) -> Arena {
Self { chunk: UnsafeCell::new(ArenaChunk::new(cap, ArenaChunk::default())) }
}
2024-10-10 01:35:17 -05:00
pub fn alloc<'a>(&'a self, expr: Expr<'a>) -> &'a Expr<'a> {
2024-10-10 06:04:17 -05:00
let layout = core::alloc::Layout::from_size_align(
expr.used_bytes(),
core::mem::align_of::<Expr<'a>>(),
)
.unwrap();
let ptr = self.alloc_low(layout);
2024-06-01 13:30:07 -05:00
unsafe {
ptr.cast::<u8>().copy_from_nonoverlapping(NonNull::from(&expr).cast(), layout.size())
2024-06-01 13:30:07 -05:00
};
unsafe { ptr.cast::<Expr<'a>>().as_ref() }
}
2024-10-10 01:35:17 -05:00
pub fn alloc_slice<'a, T: Copy>(&'a self, slice: &[T]) -> &'a [T] {
2024-09-30 12:09:17 -05:00
if slice.is_empty() || core::mem::size_of::<T>() == 0 {
2024-05-19 11:20:42 -05:00
return &mut [];
}
2024-09-30 12:09:17 -05:00
let layout = core::alloc::Layout::array::<T>(slice.len()).unwrap();
let ptr = self.alloc_low(layout);
2024-07-08 00:22:53 -05:00
unsafe { ptr.as_ptr().cast::<T>().copy_from_nonoverlapping(slice.as_ptr(), slice.len()) };
2024-09-30 12:09:17 -05:00
unsafe { core::slice::from_raw_parts(ptr.as_ptr() as _, slice.len()) }
}
2024-09-30 12:09:17 -05:00
fn alloc_low(&self, layout: core::alloc::Layout) -> NonNull<u8> {
2024-05-17 12:53:59 -05:00
let chunk = unsafe { &mut *self.chunk.get() };
2024-05-17 12:53:59 -05:00
if let Some(ptr) = chunk.alloc(layout) {
return ptr;
}
const EXPANSION_ALLOC: usize = 1024 * 4 - core::mem::size_of::<ArenaChunk>();
if layout.size() > EXPANSION_ALLOC {
let next_ptr = chunk.next_ptr();
if next_ptr.is_null() {
unsafe {
core::ptr::write(
chunk,
ArenaChunk::new(
layout.size() + layout.align() - 1 + core::mem::size_of::<ArenaChunk>(),
Default::default(),
),
);
}
} else {
unsafe {
let chunk = ArenaChunk::new(
layout.size() + layout.align() - 1 + core::mem::size_of::<ArenaChunk>(),
core::ptr::read(next_ptr),
);
let alloc = chunk.base.add(chunk.base.align_offset(layout.align()));
core::ptr::write(next_ptr, chunk);
return NonNull::new_unchecked(alloc);
}
}
} else {
unsafe {
core::ptr::write(chunk, ArenaChunk::new(EXPANSION_ALLOC, core::ptr::read(chunk)));
}
}
2024-05-17 12:53:59 -05:00
chunk.alloc(layout).unwrap()
}
pub fn clear(&mut self) {
let size = self.chunk.get_mut().size();
if self.chunk.get_mut().next().is_some() {
self.chunk = ArenaChunk::new(size + 1024, Default::default()).into();
} else {
self.chunk.get_mut().reset();
}
}
}
2024-10-10 06:04:17 -05:00
pub struct ArenaChunk {
base: *mut u8,
2024-07-08 00:22:53 -05:00
end: *mut u8,
2024-10-10 06:04:17 -05:00
size: usize,
}
impl Default for ArenaChunk {
fn default() -> Self {
2024-10-10 06:04:17 -05:00
Self {
base: core::mem::size_of::<Self>() as _,
end: core::mem::size_of::<Self>() as _,
size: 0,
}
}
}
impl ArenaChunk {
2024-10-10 06:04:17 -05:00
fn layout(size: usize) -> Layout {
Layout::new::<Self>().extend(Layout::array::<u8>(size).unwrap()).unwrap().0
}
2024-10-10 06:04:17 -05:00
fn new(size: usize, next: Self) -> Self {
let mut base = unsafe { alloc::alloc::alloc(Self::layout(size)) };
let end = unsafe { base.add(size) };
unsafe { core::ptr::write(base.cast(), next) };
base = unsafe { base.add(core::mem::size_of::<Self>()) };
Self { base, end, size }
}
2024-09-30 12:09:17 -05:00
fn alloc(&mut self, layout: core::alloc::Layout) -> Option<NonNull<u8>> {
let padding = self.end as usize - (self.end as usize & !(layout.align() - 1));
let size = layout.size() + padding;
if size > self.end as usize - self.base as usize {
return None;
}
unsafe { self.end = self.end.sub(size) };
debug_assert!(self.end >= self.base, "{:?} {:?}", self.end, self.base);
unsafe { Some(NonNull::new_unchecked(self.end)) }
}
2024-05-19 11:20:42 -05:00
2024-10-10 06:04:17 -05:00
fn next(&self) -> Option<&Self> {
unsafe { self.next_ptr().as_ref() }
}
fn next_ptr(&self) -> *mut Self {
unsafe { self.base.cast::<Self>().sub(1) }
2024-10-10 06:04:17 -05:00
}
fn contains(&self, arg: *mut u8) -> bool {
(self.base <= arg && unsafe { self.base.add(self.size) } > arg)
|| self.next().is_some_and(|s| s.contains(arg))
2024-10-10 06:04:17 -05:00
}
pub fn size(&self) -> usize {
self.base as usize + self.size - self.end as usize + self.next().map_or(0, Self::size)
2024-05-19 11:20:42 -05:00
}
fn reset(&mut self) {
self.end = unsafe { self.base.add(self.size) };
}
}
2024-05-17 12:53:59 -05:00
impl Drop for ArenaChunk {
fn drop(&mut self) {
2024-10-10 06:04:17 -05:00
if self.size == 0 {
return;
}
_ = self.next().map(|r| unsafe { core::ptr::read(r) });
unsafe {
alloc::alloc::dealloc(
self.base.sub(core::mem::size_of::<Self>()),
Self::layout(self.size),
)
2024-05-17 12:53:59 -05:00
}
}
}