From 98862edd58be781068f37221328265256c9c60be Mon Sep 17 00:00:00 2001 From: mlokr Date: Sat, 1 Jun 2024 20:30:07 +0200 Subject: [PATCH] other stuff --- after-ops.txt | 46 +++++ befor-ops.txt | 46 +++++ hblang/examples/generic_types.hb | 2 +- hblang/src/codegen.rs | 91 ++++++-- hblang/src/lexer.rs | 33 +-- hblang/src/lib.rs | 25 +-- hblang/src/main.rs | 28 +-- hblang/src/parser.rs | 345 +++++++++++++++++++------------ 8 files changed, 416 insertions(+), 200 deletions(-) create mode 100644 after-ops.txt create mode 100644 befor-ops.txt diff --git a/after-ops.txt b/after-ops.txt new file mode 100644 index 00000000..43e18535 --- /dev/null +++ b/after-ops.txt @@ -0,0 +1,46 @@ +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 544 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 3200 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1032 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 224 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 3240 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1144 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1352 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1400 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1128 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1632 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1528 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 2496 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 2440 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 600 +Dbg: deallocating full chunk +test parser::tests::arithmetic ... Dbg: dropping chunk of size: 544 +Dbg: deallocating full chunk +test parser::tests::example ... Dbg: dropping chunk of size: 224 +Dbg: deallocating full chunk diff --git a/befor-ops.txt b/befor-ops.txt new file mode 100644 index 00000000..b1f2f2dd --- /dev/null +++ b/befor-ops.txt @@ -0,0 +1,46 @@ +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 936 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 4040 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1112 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 296 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 4328 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1464 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1616 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1864 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 1504 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 2160 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 2000 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 3048 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 2960 +Dbg: deallocating full chunk +Dbg: dropping chunk of size: 0 +Dbg: dropping chunk of size: 848 +Dbg: deallocating full chunk +test parser::tests::arithmetic ... Dbg: dropping chunk of size: 936 +Dbg: deallocating full chunk +test parser::tests::example ... Dbg: dropping chunk of size: 296 +Dbg: deallocating full chunk diff --git a/hblang/examples/generic_types.hb b/hblang/examples/generic_types.hb index fa1e2d05..feaf7e02 100644 --- a/hblang/examples/generic_types.hb +++ b/hblang/examples/generic_types.hb @@ -1,4 +1,4 @@ -Vec := fn(Elem: type): type { +Vec := fn($Elem: type): type { return struct { data: ^Elem, len: uint, diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index fdc86633..b6103997 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -7,7 +7,7 @@ use hbvm::Vm; use crate::{ ident::{self, Ident}, - parser::{idfl, ExprRef}, + parser::{idfl, ExprRef, FileId, Pos}, HashMap, }; @@ -33,7 +33,7 @@ fn align_up(value: u64, align: u64) -> u64 { } struct ItemId { - file: parser::FileId, + file: FileId, expr: parser::ExprRef, id: u32, } @@ -299,6 +299,7 @@ type_kind! { Pointer, Func, Global, + Module, } } @@ -508,6 +509,7 @@ impl<'a> std::fmt::Display for TypeDisplay<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use TypeKind as TK; match TK::from_ty(self.ty) { + TK::Module(idx) => write!(f, "module{}", idx), TK::Builtin(ty) => write!(f, "{}", bt::to_str(ty)), TK::Pointer(ty) => { write!(f, "^{}", self.rety(self.codegen.pointers[ty as usize])) @@ -561,13 +563,13 @@ struct Linked { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] struct SymKey { id: Ident, - file: parser::FileId, + file: FileId, } #[derive(Default)] pub struct Codegen { cf: parser::Ast, - cf_id: parser::FileId, + cf_id: FileId, ret: Type, ret_reg: Option, @@ -634,7 +636,7 @@ impl Codegen { pub fn generate(&mut self) { self.lazy_init(); - self.find_and_declare(0, Err("main")); + self.find_and_declare(0, 0, Err("main")); self.code.prelude(); self.complete_call_graph(); } @@ -750,7 +752,7 @@ impl Codegen { TypeDisplay::new(self, ty) } - fn unwrap_struct(&self, ty: Type, pos: parser::Pos, context: impl std::fmt::Display) -> Type { + fn unwrap_struct(&self, ty: Type, pos: Pos, context: impl std::fmt::Display) -> Type { match TypeKind::from_ty(ty) { TypeKind::Struct(idx) => idx, _ => self.report( @@ -760,8 +762,7 @@ impl Codegen { } } - fn offset_of(&self, pos: parser::Pos, ty: Type, field: Result<&str, usize>) -> (u64, Type) { - let idx = self.unwrap_struct(ty, pos, "field access"); + fn offset_of(&self, pos: Pos, idx: u32, field: Result<&str, usize>) -> (u64, Type) { let record = &self.structs[idx as usize]; let mut offset = 0; for (i, &(ref name, ty)) in record.fields.iter().enumerate() { @@ -897,7 +898,11 @@ impl Codegen { match value.loc { Loc::RegRef(reg) | Loc::Reg(LinReg(reg, ..)) => self.vm.read_reg(reg).0 as _, - _ => unreachable!(), + Loc::Deref(LinReg(reg, ..), .., off) | Loc::DerefRef(reg, .., off) => { + let ptr = unsafe { (self.vm.read_reg(reg).0 as *const u8).add(off as _) }; + unsafe { std::ptr::read(ptr as *const Type) } + } + v => unreachable!("{v:?}"), } } @@ -992,6 +997,7 @@ impl Codegen { use instrs as i; let value = match *expr { + E::Mod { id, .. } => Some(Value::ty(TypeKind::Module(id).encode())), E::Struct { fields, captured, .. } => { @@ -1010,7 +1016,7 @@ impl Codegen { .map(|&id| E::Ident { id, name: "booodab", - index: u32::MAX, + index: u16::MAX, }) .map(|expr| self.expr(&expr)) .collect::>>()?; @@ -1217,7 +1223,7 @@ impl Codegen { } for (i, (name, field)) in fields.iter().enumerate() { - let (offset, ty) = self.offset_of(field.pos(), ty, name.ok_or(i)); + let (offset, ty) = self.offset_of(field.pos(), stuct, name.ok_or(i)); let loc = loc.offset_ref(offset); self.expr_ctx(field, Ctx::Dest(Value { ty, loc }))?; } @@ -1225,6 +1231,7 @@ impl Codegen { return Some(Value { ty, loc }); } E::Field { target, field } => { + let checkpoint = self.code.code.len(); let mut tal = self.expr(target)?; if let TypeKind::Pointer(ty) = TypeKind::from_ty(tal.ty) { tal.ty = self.pointers[ty as usize]; @@ -1237,9 +1244,28 @@ impl Codegen { } }; } - let (offset, ty) = self.offset_of(target.pos(), tal.ty, Ok(field)); - let loc = tal.loc.offset(offset); - Some(Value { ty, loc }) + + match TypeKind::from_ty(tal.ty) { + TypeKind::Struct(idx) => { + let (offset, ty) = self.offset_of(target.pos(), idx, Ok(field)); + let loc = tal.loc.offset(offset); + Some(Value { ty, loc }) + } + TypeKind::Builtin(bt::TYPE) => { + self.code.code.truncate(checkpoint); + match TypeKind::from_ty(self.ty(target)) { + TypeKind::Module(idx) => Some(Value::ty( + self.find_and_declare(target.pos(), idx, Err(field)) + .encode(), + )), + _ => todo!(), + } + } + smh => self.report( + target.pos(), + format_args!("the field operation is not supported: {smh:?}"), + ), + } } E::UnOp { op: T::Band, @@ -1370,10 +1396,13 @@ impl Codegen { } E::Ident { id, .. } => match self .symbols - .get(&SymKey { id, file: 0 }) + .get(&SymKey { + id, + file: self.cf_id, + }) .copied() .map(TypeKind::from_ty) - .unwrap_or_else(|| self.find_and_declare(0, Ok(id))) + .unwrap_or_else(|| self.find_and_declare(ident::pos(id), self.cf_id, Ok(id))) { TypeKind::Global(id) => self.handle_global(id), tk => Some(Value::ty(tk.encode())), @@ -1637,7 +1666,7 @@ impl Codegen { match ctx { Ctx::Dest(dest) => { - _ = self.assert_ty(expr.pos(), dest.ty, value.ty); + _ = self.assert_ty(expr.pos(), value.ty, dest.ty); self.assign(dest.ty, dest.loc, value.loc)?; Some(Value { ty: dest.ty, @@ -1805,6 +1834,11 @@ impl Codegen { match size { 0 => {} + ..=8 if let Loc::Imm(imm) = left + && let Loc::RegRef(reg) = right => + { + self.code.encode(instrs::li64(reg, imm)) + } ..=8 => { let lhs = self.loc_to_reg(left, size); match right { @@ -1860,9 +1894,21 @@ impl Codegen { } } - fn find_and_declare(&mut self, file: parser::FileId, name: Result) -> TypeKind { + fn find_and_declare(&mut self, pos: Pos, file: FileId, name: Result) -> TypeKind { let f = self.files[file as usize].clone(); - let (expr, id) = f.find_decl(name).expect("TODO: error"); + let Some((expr, id)) = f.find_decl(name) else { + self.report( + pos, + match name { + Ok(_) => format!("undefined indentifier"), + Err("main") => { + format!("compilation root is missing main function: {f}") + } + Err(name) => todo!("somehow we did not handle: {name:?}"), + }, + ); + }; + let sym = match expr { E::BinOp { left: &E::Ident { .. }, @@ -2126,7 +2172,7 @@ impl Codegen { } #[must_use] - fn assert_ty(&self, pos: parser::Pos, ty: Type, expected: Type) -> Type { + fn assert_ty(&self, pos: Pos, ty: Type, expected: Type) -> Type { if let Some(res) = bt::try_upcast(ty, expected) { res } else { @@ -2136,7 +2182,7 @@ impl Codegen { } } - fn report(&self, pos: parser::Pos, msg: impl std::fmt::Display) -> ! { + fn report(&self, pos: Pos, msg: impl std::fmt::Display) -> ! { let (line, col) = self.cf.nlines.line_col(pos); println!("{}:{}:{}: {}", self.cf.path, line, col, msg); unreachable!(); @@ -2316,7 +2362,7 @@ impl hbvm::mem::Memory for LoggedMem { #[cfg(test)] mod tests { - use crate::codegen::LoggedMem; + use crate::{codegen::LoggedMem, log}; use super::parser; @@ -2355,6 +2401,7 @@ mod tests { writeln!(output, "code size: {}", out.len()).unwrap(); writeln!(output, "ret: {:?}", vm.read_reg(1).0).unwrap(); writeln!(output, "status: {:?}", stat).unwrap(); + log::inf!("input lenght: {}", input.len()); } crate::run_tests! { generate: diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index b6aaf4a9..75c89360 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -86,6 +86,7 @@ macro_rules! gen_token_kind { gen_token_kind! { pub enum TokenKind { #[patterns] + CtIdent, Ident, Number, Eof, @@ -188,6 +189,12 @@ impl<'a> Lexer<'a> { }; }; + let advance_ident = |s: &mut Self| { + while let Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') = s.peek() { + s.advance(); + } + }; + let kind = match c { b'\n' | b'\r' | b'\t' | b' ' => continue, b'0'..=b'9' => { @@ -196,18 +203,20 @@ impl<'a> Lexer<'a> { } T::Number } - c @ (b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'@') => { - while let Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') = self.peek() { - self.advance(); - } - - if c == b'@' { - start += 1; - T::Driective - } else { - let ident = &self.bytes[start as usize..self.pos as usize]; - T::from_ident(ident) - } + b'@' => { + start += 1; + advance_ident(self); + T::Driective + } + b'$' => { + start += 1; + advance_ident(self); + T::CtIdent + } + b'a'..=b'z' | b'A'..=b'Z' | b'_' => { + advance_ident(self); + let ident = &self.bytes[start as usize..self.pos as usize]; + T::from_ident(ident) } b'"' => { while let Some(c) = self.advance() { diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 351428f3..0abc4db7 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(vec_pop_if)] #![feature(if_let_guard)] #![feature(slice_partition_dedup)] #![feature(noop_waker)] @@ -137,7 +138,7 @@ impl TaskQueueInner { } } -pub fn parse_all(threads: usize) -> io::Result> { +pub fn parse_all(threads: usize, root: &str) -> io::Result> { const GIT_DEPS_DIR: &str = "git-deps"; enum ImportPath<'a> { @@ -198,20 +199,15 @@ pub fn parse_all(threads: usize) -> io::Result> { impl<'a> ImportPath<'a> { fn resolve(&self, from: &str) -> Result { - match self { - Self::Root { path } => Ok(Path::new(path).to_owned()), - Self::Rel { path } => { - let path = PathBuf::from_iter([from, path]); - match path.canonicalize() { - Ok(path) => Ok(path), - Err(e) => Err(CantLoadFile(path, e)), - } - } + let path = match self { + Self::Root { path } => PathBuf::from(path), + Self::Rel { path } => PathBuf::from_iter([from, path]), Self::Git { path, link, .. } => { let link = preprocess_git(link); - Ok(PathBuf::from_iter([GIT_DEPS_DIR, link, path])) + PathBuf::from_iter([GIT_DEPS_DIR, link, path]) } - } + }; + path.canonicalize().map_err(|e| CantLoadFile(path, e)) } } @@ -348,6 +344,7 @@ pub fn parse_all(threads: usize) -> io::Result> { }; let execute_task = |(_, path, command): Task, buffer: &mut Vec| { + log::dbg!("{path:?}"); if let Some(mut command) = command { let output = command.output()?; if !output.status.success() { @@ -384,6 +381,10 @@ pub fn parse_all(threads: usize) -> io::Result> { } }; + let path = Path::new(root).canonicalize()?; + seen.lock().unwrap().insert(path.clone(), 0); + tasks.push((0, path, None)); + std::thread::scope(|s| (0..threads).for_each(|_| _ = s.spawn(thread))); ast.into_inner() diff --git a/hblang/src/main.rs b/hblang/src/main.rs index e0177a69..f679776c 100644 --- a/hblang/src/main.rs +++ b/hblang/src/main.rs @@ -1,24 +1,12 @@ -use std::io; +fn main() -> std::io::Result<()> { + let root = std::env::args() + .nth(1) + .unwrap_or_else(|| "main.hb".to_string()); -use hblang::{codegen, parser}; + let parsed = hblang::parse_all(1, &root)?; + let mut codegen = hblang::codegen::Codegen::default(); + codegen.files = parsed; -fn main() -> io::Result<()> { - if std::env::args().len() == 1 { - eprintln!("Usage: hblang ..."); - eprintln!(" 1. compiled binary will be printed to stdout"); - eprintln!(" 2. order of files matters"); - std::process::exit(1); - } - - let files = std::env::args() - .skip(1) - .map(|path| std::fs::read_to_string(&path).map(|src| (path, src))) - .collect::>>()?; - - let mut codegen = codegen::Codegen::default(); - for (path, content) in files.iter() { - codegen.files = vec![parser::Ast::new(&path, &content, &parser::no_loader)]; - codegen.generate(); - } + codegen.generate(); codegen.dump(&mut std::io::stdout()) } diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 629caf51..09cba22e 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -10,6 +10,7 @@ use crate::{ codegen::bt, ident::{self, Ident}, lexer::{Lexer, LineMap, Token, TokenKind}, + log, }; pub type Pos = u32; @@ -31,11 +32,11 @@ pub mod idfl { flags! { MUTABLE, REFERENCED, - CAPTURED, + COMPTIME, } - pub fn index(i: IdentFlags) -> u32 { - i & !ALL + pub fn index(i: IdentFlags) -> u16 { + (i & !ALL) as _ } } @@ -49,6 +50,7 @@ pub struct Symbol { pub flags: IdentFlags, } +#[derive(Clone, Copy)] struct ScopeIdent { ident: Ident, declared: bool, @@ -61,9 +63,9 @@ pub struct Parser<'a, 'b> { lexer: Lexer<'b>, arena: &'b Arena<'a>, token: Token, - idents: Vec, symbols: &'b mut Symbols, ns_bound: usize, + idents: Vec, captured: Vec, } @@ -76,9 +78,9 @@ impl<'a, 'b> Parser<'a, 'b> { lexer, path: "", arena, - idents: Vec::new(), symbols, ns_bound: 0, + idents: Vec::new(), captured: Vec::new(), } } @@ -135,10 +137,11 @@ impl<'a, 'b> Parser<'a, 'b> { } let op = self.next().kind; + let right = self.unit_expr(); let right = self.bin_expr(right, prec); - let right = &*self.arena.alloc(right); - let left = &*self.arena.alloc(fold); + let right = self.arena.alloc(right); + let left = self.arena.alloc(fold); if let Some(op) = op.assign_op() { self.flag_idents(*left, idfl::MUTABLE); @@ -159,7 +162,8 @@ impl<'a, 'b> Parser<'a, 'b> { fold } - fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, u32) { + fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, u16) { + let is_ct = self.token.kind == TokenKind::CtIdent; let name = self.lexer.slice(token.range()); if let Some(builtin) = bt::from_str(name) { @@ -191,8 +195,9 @@ impl<'a, 'b> Parser<'a, 'b> { }; id.declared |= decl; - if self.ns_bound > i && id.declared { - id.flags |= idfl::CAPTURED; + id.flags |= idfl::COMPTIME * is_ct as u32; + if id.declared && self.ns_bound > i { + id.flags |= idfl::COMPTIME; self.captured.push(id.ident); } @@ -244,8 +249,7 @@ impl<'a, 'b> Parser<'a, 'b> { self.collect_list(T::Comma, T::RBrace, |s| { let name = s.expect_advance(T::Ident); s.expect_advance(T::Colon); - let ty = s.expr(); - (s.move_str(name), ty) + (s.move_str(name), s.expr()) }) }, captured: { @@ -263,7 +267,7 @@ impl<'a, 'b> Parser<'a, 'b> { token.start }, }, - T::Ident => { + T::Ident | T::CtIdent => { let (id, index) = self.resolve_ident(token, self.token.kind == T::Decl); let name = self.move_str(token); E::Ident { name, id, index } @@ -289,7 +293,7 @@ impl<'a, 'b> Parser<'a, 'b> { args: { self.expect_advance(T::LParen); self.collect_list(T::Comma, T::RParen, |s| { - let name = s.expect_advance(T::Ident); + let name = s.advance_ident(); let (id, index) = s.resolve_ident(name, true); s.expect_advance(T::Colon); Arg { @@ -310,7 +314,12 @@ impl<'a, 'b> Parser<'a, 'b> { pos: token.start, op: token.kind, val: { - let expr = self.ptr_unit_expr(); + let expr = if token.kind == T::Xor { + let expr = self.expr(); + self.arena.alloc(expr) + } else { + self.ptr_unit_expr() + }; if token.kind == T::Band { self.flag_idents(*expr, idfl::REFERENCED); } @@ -384,10 +393,21 @@ impl<'a, 'b> Parser<'a, 'b> { expr } + fn advance_ident(&mut self) -> Token { + if matches!(self.token.kind, TokenKind::Ident | TokenKind::CtIdent) { + self.next() + } else { + self.report(format_args!( + "expected identifier, found {:?}", + self.token.kind + )) + } + } + fn pop_scope(&mut self, frame: usize) { let mut undeclared_count = frame; for i in frame..self.idents.len() { - if !self.idents[i].declared { + if !&self.idents[i].declared { self.idents.swap(i, undeclared_count); undeclared_count += 1; } @@ -445,8 +465,14 @@ impl<'a, 'b> Parser<'a, 'b> { self.next() } + #[track_caller] fn report(&self, msg: impl std::fmt::Display) -> ! { - let (line, col) = self.lexer.line_col(self.token.start); + self.report_pos(self.token.start, msg) + } + + #[track_caller] + fn report_pos(&self, pos: Pos, msg: impl std::fmt::Display) -> ! { + let (line, col) = self.lexer.line_col(pos); eprintln!("{}:{}:{} => {}", self.path, line, col, msg); unreachable!(); } @@ -478,120 +504,157 @@ pub fn find_symbol(symbols: &[Symbol], id: Ident) -> &Symbol { pub struct Arg<'a> { pub name: &'a str, pub id: Ident, - pub index: u32, + pub index: u16, pub ty: Expr<'a>, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Expr<'a> { - Break { - pos: Pos, - }, - Continue { - pos: Pos, - }, - Closure { - pos: Pos, - args: &'a [Arg<'a>], - ret: &'a Self, - body: &'a Self, - }, - Call { - func: &'a Self, - args: &'a [Self], - }, - Return { - pos: Pos, - val: Option<&'a Self>, - }, - Ident { - name: &'a str, - id: Ident, - index: u32, - }, - Block { - pos: Pos, - stmts: &'a [Self], - }, - Number { - pos: Pos, - value: u64, - }, - BinOp { - left: &'a Self, - op: TokenKind, - right: &'a Self, - }, - If { - pos: Pos, - cond: &'a Self, - then: &'a Self, - else_: Option<&'a Self>, - }, - Loop { - pos: Pos, - body: &'a Self, - }, - UnOp { - pos: Pos, - op: TokenKind, - val: &'a Self, - }, - Struct { - pos: Pos, - fields: &'a [(&'a str, Self)], - captured: &'a [Ident], - }, - Ctor { - pos: Pos, - ty: Option<&'a Self>, - fields: &'a [(Option<&'a str>, Self)], - }, - Field { - target: &'a Self, - field: &'a str, - }, - Bool { - pos: Pos, - value: bool, - }, - Directive { - pos: u32, - name: &'a str, - args: &'a [Self], - }, - Mod { - pos: Pos, - id: FileId, - path: &'a str, - }, +macro_rules! generate_expr { + ($(#[$meta:meta])* $vis:vis enum $name:ident<$lt:lifetime> {$( + $(#[$field_meta:meta])* + $variant:ident { + $($field:ident: $ty:ty,)* + }, + )*}) => { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + $vis enum $name<$lt> {$( + $variant { + $($field: $ty,)* + }, + )*} + + impl<$lt> $name<$lt> { + pub fn pos(&self) -> Pos { + #[allow(unused_variables)] + match self { + $(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(self),)* + } + } + + pub fn used_bytes(&self) -> usize { + match self {$( + Self::$variant { $($field,)* } => { + let fields = [$(($field as *const _ as usize - self as *const _ as usize, std::mem::size_of_val($field)),)*]; + let (last, size) = fields.iter().copied().max().unwrap(); + last + size + }, + )*} + } + } + }; + + (@first ($($first:tt)*), $($rest:tt)*) => { $($first)* }; + (@last ($($ign:tt)*), $($rest:tt)*) => { $($rest)* }; + (@last ($($last:tt)*),) => { $($last)* }; } -impl<'a> Expr<'a> { - pub fn pos(&self) -> Pos { - match self { - Self::Call { func, .. } => func.pos(), - Self::Ident { id, .. } => ident::pos(*id), - Self::Break { pos } - | Self::Mod { pos, .. } - | Self::Directive { pos, .. } - | Self::Continue { pos } - | Self::Closure { pos, .. } - | Self::Block { pos, .. } - | Self::Number { pos, .. } - | Self::Return { pos, .. } - | Self::If { pos, .. } - | Self::Loop { pos, .. } - | Self::UnOp { pos, .. } - | Self::Struct { pos, .. } - | Self::Ctor { pos, .. } - | Self::Bool { pos, .. } => *pos, - Self::BinOp { left, .. } => left.pos(), - Self::Field { target, .. } => target.pos(), +// it would be real nice if we could use relative pointers and still pattern match easily +generate_expr! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum Expr<'a> { + Break { + pos: Pos, + }, + Continue { + pos: Pos, + }, + Closure { + pos: Pos, + args: &'a [Arg<'a>], + ret: &'a Self, + body: &'a Self, + }, + Call { + func: &'a Self, + args: &'a [Self], + }, + Return { + pos: Pos, + val: Option<&'a Self>, + }, + Ident { + id: Ident, + name: &'a str, + index: u16, + }, + Block { + pos: Pos, + stmts: &'a [Self], + }, + Number { + pos: Pos, + value: u64, + }, + BinOp { + left: &'a Self, + op: TokenKind, + right: &'a Self, + }, + If { + pos: Pos, + cond: &'a Self, + then: &'a Self, + else_: Option<&'a Self>, + }, + Loop { + pos: Pos, + body: &'a Self, + }, + UnOp { + pos: Pos, + op: TokenKind, + val: &'a Self, + }, + Struct { + pos: Pos, + fields: &'a [(&'a str, Self)], + captured: &'a [Ident], + }, + Ctor { + pos: Pos, + ty: Option<&'a Self>, + fields: &'a [(Option<&'a str>, Self)], + }, + Field { + target: &'a Self, + field: &'a str, + }, + Bool { + pos: Pos, + value: bool, + }, + Directive { + pos: u32, + name: &'a str, + args: &'a [Self], + }, + Mod { + pos: Pos, + id: FileId, + path: &'a str, + }, + } +} + +trait Poser { + fn posi(self, expr: &Expr) -> Pos; +} + +impl Poser for Pos { + fn posi(self, expr: &Expr) -> Pos { + if matches!(expr, Expr::Ident { .. }) { + ident::pos(self) + } else { + self } } } +impl<'a> Poser for &Expr<'a> { + fn posi(self, _: &Expr) -> Pos { + self.pos() + } +} + impl<'a> std::fmt::Display for Expr<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { thread_local! { @@ -817,6 +880,15 @@ impl Ast { } } +impl std::fmt::Display for Ast { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for expr in self.exprs() { + writeln!(f, "{expr}\n")?; + } + Ok(()) + } +} + impl Default for Ast { fn default() -> Self { Self(AstInner::new("", "", &no_loader)) @@ -888,21 +960,22 @@ pub struct Arena<'a> { impl<'a> Arena<'a> { pub fn alloc_str(&self, token: &str) -> &'a str { let ptr = self.alloc_slice(token.as_bytes()); - unsafe { std::str::from_utf8_unchecked_mut(ptr) } + unsafe { std::str::from_utf8_unchecked(ptr) } } - pub fn alloc(&self, value: T) -> &'a mut T { - if std::mem::size_of::() == 0 { - return unsafe { NonNull::dangling().as_mut() }; - } - - let layout = std::alloc::Layout::new::(); + pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> { + let align = std::mem::align_of::>(); + let size = expr.used_bytes(); + let layout = unsafe { std::alloc::Layout::from_size_align_unchecked(size, align) }; let ptr = self.alloc_low(layout); - unsafe { ptr.cast::().write(value) }; - unsafe { ptr.cast::().as_mut() } + unsafe { + ptr.cast::() + .copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8) + }; + unsafe { ptr.cast::>().as_ref() } } - pub fn alloc_slice(&self, slice: &[T]) -> &'a mut [T] { + pub fn alloc_slice(&self, slice: &[T]) -> &'a [T] { if slice.is_empty() || std::mem::size_of::() == 0 { return &mut []; } @@ -914,7 +987,7 @@ impl<'a> Arena<'a> { .cast::() .copy_from_nonoverlapping(slice.as_ptr(), slice.len()) }; - unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr() as _, slice.len()) } + unsafe { std::slice::from_raw_parts(ptr.as_ptr() as _, slice.len()) } } fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull { @@ -990,11 +1063,17 @@ impl ArenaChunk { impl Drop for ArenaChunk { fn drop(&mut self) { + log::inf!( + "dropping chunk of size: {}", + (Self::LAYOUT.size() - (self.end as usize - self.base as usize)) + * !self.end.is_null() as usize + ); let mut current = self.base; while !current.is_null() { let next = Self::next(current); unsafe { std::alloc::dealloc(current, Self::LAYOUT) }; current = next; + log::dbg!("deallocating full chunk"); } } }