From c1b00b6d6bb2b6a61d892d9a2dde06968711e7c4 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Mon, 30 Sep 2024 19:09:17 +0200 Subject: [PATCH] making nostd compat work --- Cargo.lock | 1 + hbbytecode/Cargo.toml | 1 + hbbytecode/build.rs | 2 +- hbbytecode/src/lib.rs | 74 ++++-- hblang/Cargo.toml | 5 + hblang/README.md | 11 +- hblang/src/codegen.rs | 207 +++++++++------ hblang/src/fs.rs | 286 ++++++++++++++++++++ hblang/src/lexer.rs | 20 +- hblang/src/lib.rs | 604 ++++++++++++++++-------------------------- hblang/src/main.rs | 11 +- hblang/src/parser.rs | 515 ++++++++++++++++++----------------- hblang/src/son.rs | 454 ++++++++++++++++++------------- hblang/src/vc.rs | 51 ++-- 14 files changed, 1300 insertions(+), 942 deletions(-) create mode 100644 hblang/src/fs.rs diff --git a/Cargo.lock b/Cargo.lock index 8224e83..854127f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -123,6 +123,7 @@ name = "hblang" version = "0.1.0" dependencies = [ "env_logger", + "hashbrown", "hbbytecode", "hbvm", "regalloc2", diff --git a/hbbytecode/Cargo.toml b/hbbytecode/Cargo.toml index a542869..95c0c1e 100644 --- a/hbbytecode/Cargo.toml +++ b/hbbytecode/Cargo.toml @@ -7,3 +7,4 @@ edition = "2018" default = ["disasm"] std = [] disasm = ["std"] + diff --git a/hbbytecode/build.rs b/hbbytecode/build.rs index 9b6ee59..dd41617 100644 --- a/hbbytecode/build.rs +++ b/hbbytecode/build.rs @@ -119,7 +119,7 @@ fn gen_instrs(generated: &mut String) -> Result<(), Box> "/// This assumes the instruction byte is still at the beginning of the buffer" )?; writeln!(generated, "#[cfg(feature = \"disasm\")]")?; - writeln!(generated, "pub fn parse_args(bytes: &mut &[u8], kind: {instr}, buf: &mut std::vec::Vec<{oper}>) -> Option<()> {{")?; + writeln!(generated, "pub fn parse_args(bytes: &mut &[u8], kind: {instr}, buf: &mut alloc::vec::Vec<{oper}>) -> Option<()> {{")?; writeln!(generated, " match kind {{")?; let mut instrs = instructions().collect::>(); instrs.sort_unstable_by_key(|&[.., ty, _]| ty); diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index cc4f1a3..9609a99 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -1,7 +1,7 @@ #![no_std] -#[cfg(feature = "std")] -extern crate std; +#[cfg(feature = "disasm")] +extern crate alloc; pub use crate::instrs::*; use core::convert::TryFrom; @@ -83,31 +83,72 @@ pub enum DisasmItem { } #[cfg(feature = "disasm")] -pub fn disasm( +#[derive(Debug)] +pub enum DisasmError<'a> { + InvalidInstruction(u8), + InstructionOutOfBounds(&'a str), + FmtFailed(core::fmt::Error), + HasOutOfBoundsJumps, + HasDirectInstructionCycles, +} + +#[cfg(feature = "disasm")] +impl From for DisasmError<'_> { + fn from(value: core::fmt::Error) -> Self { + Self::FmtFailed(value) + } +} + +#[cfg(feature = "disasm")] +impl core::fmt::Display for DisasmError<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + DisasmError::InvalidInstruction(b) => write!(f, "invalid instruction opcode: {b}"), + DisasmError::InstructionOutOfBounds(name) => { + write!(f, "instruction would go out of bounds of {name} symbol") + } + DisasmError::FmtFailed(error) => write!(f, "fmt failed: {error}"), + DisasmError::HasOutOfBoundsJumps => write!( + f, + "the code contained jumps that dont got neither to a \ + valid symbol or local insturction" + ), + DisasmError::HasDirectInstructionCycles => { + writeln!(f, "found instruction that jumps to itself") + } + } + } +} + +#[cfg(feature = "disasm")] +impl core::error::Error for DisasmError<'_> {} + +#[cfg(feature = "disasm")] +pub fn disasm<'a>( binary: &mut &[u8], - functions: &std::collections::BTreeMap, - out: &mut impl std::io::Write, + functions: &alloc::collections::BTreeMap, + out: &mut alloc::string::String, mut eca_handler: impl FnMut(&mut &[u8]), -) -> std::io::Result<()> { +) -> Result<(), DisasmError<'a>> { use { self::instrs::Instr, - std::{ - collections::{hash_map::Entry, HashMap}, - convert::TryInto, + alloc::{ + collections::btree_map::{BTreeMap, Entry}, vec::Vec, }, + core::{convert::TryInto, fmt::Write}, }; - fn instr_from_byte(b: u8) -> std::io::Result { - b.try_into().map_err(|_| std::io::ErrorKind::InvalidData.into()) + fn instr_from_byte(b: u8) -> Result> { + b.try_into().map_err(DisasmError::InvalidInstruction) } - let mut labels = HashMap::::default(); + let mut labels = BTreeMap::::default(); let mut buf = Vec::::new(); let mut has_cycle = false; let mut has_oob = false; - '_offset_pass: for (&off, &(_name, len, kind)) in functions.iter() { + '_offset_pass: for (&off, &(name, len, kind)) in functions.iter() { if matches!(kind, DisasmItem::Global) { continue; } @@ -123,7 +164,8 @@ pub fn disasm( break; } let Ok(inst) = instr_from_byte(byte) else { break }; - instrs::parse_args(binary, inst, &mut buf).ok_or(std::io::ErrorKind::OutOfMemory)?; + instrs::parse_args(binary, inst, &mut buf) + .ok_or(DisasmError::InstructionOutOfBounds(name))?; for op in buf.drain(..) { let rel = match op { @@ -242,11 +284,11 @@ pub fn disasm( } if has_oob { - return Err(std::io::ErrorKind::InvalidInput.into()); + return Err(DisasmError::HasOutOfBoundsJumps); } if has_cycle { - return Err(std::io::ErrorKind::TimedOut.into()); + return Err(DisasmError::HasDirectInstructionCycles); } Ok(()) diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml index 681f712..c0b996d 100644 --- a/hblang/Cargo.toml +++ b/hblang/Cargo.toml @@ -8,9 +8,14 @@ name = "hbc" path = "src/main.rs" [dependencies] +hashbrown = { version = "0.14.5", default-features = false } hbbytecode = { version = "0.1.0", path = "../hbbytecode" } hbvm = { path = "../hbvm", features = ["nightly"] } regalloc2 = { git = "https://github.com/jakubDoka/regalloc2", branch = "reuse-allocations", features = [] } [dev-dependencies] env_logger = "0.11.5" + +[features] +default = ["std"] +std = [] diff --git a/hblang/README.md b/hblang/README.md index 681727d..c4ec734 100644 --- a/hblang/README.md +++ b/hblang/README.md @@ -169,11 +169,12 @@ main := fn(): int { } finst := Ty2.{ty: .{a: 4, b: 1}, c: 3} - inst := odher_pass(finst) - if inst.c == 3 { - return pass(&inst.ty) - } - return 0 + //inst := odher_pass(finst) + //if finst.c == 3 { + //return finst.ty.a - finst.ty.b + return pass(&finst.ty) + //} + //return 0 } pass := fn(t: ^Ty): int { diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index 88c905b..4ea7125 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -11,8 +11,8 @@ use { ty, Field, Func, Global, LoggedMem, OffsetIter, ParamAlloc, Reloc, Sig, Struct, SymKey, TypedReloc, Types, }, - core::panic, - std::fmt::Display, + alloc::{borrow::ToOwned, boxed::Box, string::String, vec::Vec}, + core::{fmt::Display, panic}, }; type Offset = u32; @@ -21,7 +21,8 @@ type ArrayLen = u32; fn load_value(ptr: *const u8, size: u32) -> u64 { let mut dst = [0u8; 8]; - dst[..size as usize].copy_from_slice(unsafe { std::slice::from_raw_parts(ptr, size as usize) }); + dst[..size as usize] + .copy_from_slice(unsafe { core::slice::from_raw_parts(ptr, size as usize) }); u64::from_ne_bytes(dst) } @@ -36,7 +37,8 @@ fn ensure_loaded(value: CtValue, derefed: bool, size: u32) -> u64 { mod stack { use { super::{Offset, Size}, - std::num::NonZeroU32, + alloc::vec::Vec, + core::num::NonZeroU32, }; impl crate::Reloc { @@ -79,7 +81,17 @@ mod stack { impl Drop for Id { fn drop(&mut self) { - if !std::thread::panicking() && !self.is_ref() { + let is_panicking = { + #[cfg(feature = "std")] + { + std::thread::panicking() + } + #[cfg(not(feature = "std"))] + { + false + } + }; + if !is_panicking && !self.is_ref() { unreachable!("stack id leaked: {:?}", self.0); } } @@ -110,7 +122,7 @@ mod stack { } pub fn free(&mut self, id: Id) { - std::mem::forget(id); + core::mem::forget(id); //if id.is_ref() {} //let meta = &mut self.meta[id.index()]; //meta.rc -= 1; @@ -151,6 +163,8 @@ mod stack { } mod reg { + use alloc::vec::Vec; + pub const STACK_PTR: Reg = 254; pub const ZERO: Reg = 0; pub const RET: Reg = 1; @@ -158,9 +172,9 @@ mod reg { type Reg = u8; - #[cfg(debug_assertions)] + #[cfg(feature = "std")] type Bt = std::backtrace::Backtrace; - #[cfg(not(debug_assertions))] + #[cfg(not(feature = "std"))] type Bt = (); #[derive(Default, Debug)] @@ -196,12 +210,20 @@ mod reg { } } - #[cfg(debug_assertions)] + #[cfg(feature = "std")] impl Drop for Id { fn drop(&mut self) { - if !std::thread::panicking() - && let Some(bt) = self.1.take() - { + let is_panicking = { + #[cfg(feature = "std")] + { + std::thread::panicking() + } + #[cfg(not(feature = "std"))] + { + false + } + }; + if !is_panicking && let Some(bt) = self.1.take() { unreachable!("reg id leaked: {:?} {bt}", self.0); } } @@ -225,9 +247,9 @@ mod reg { self.max_used = self.max_used.max(reg); Id( reg, - #[cfg(debug_assertions)] + #[cfg(feature = "std")] Some(std::backtrace::Backtrace::capture()), - #[cfg(not(debug_assertions))] + #[cfg(not(feature = "std"))] Some(()), ) } @@ -235,7 +257,7 @@ mod reg { pub fn free(&mut self, reg: Id) { if reg.1.is_some() { self.free.push(reg.0); - std::mem::forget(reg); + core::mem::forget(reg); } } @@ -256,15 +278,15 @@ impl Value { } fn void() -> Self { - Self { ty: ty::VOID.into(), loc: Loc::ct(0) } + Self { ty: ty::Id::VOID, loc: Loc::ct(0) } } fn imm(value: u64) -> Self { - Self { ty: ty::UINT.into(), loc: Loc::ct(value) } + Self { ty: ty::Id::UINT, loc: Loc::ct(value) } } fn ty(ty: ty::Id) -> Self { - Self { ty: ty::TYPE.into(), loc: Loc::ct(ty.repr() as u64) } + Self { ty: ty::Id::TYPE, loc: Loc::ct(ty.repr() as u64) } } } @@ -365,7 +387,7 @@ impl Loc { return None; } - Some(std::mem::replace(self, self.as_ref())) + Some(core::mem::replace(self, self.as_ref())) } fn is_ref(&self) -> bool { @@ -376,7 +398,7 @@ impl Loc { match *self { Self::Ct { derefed: false, value } => Some(ty::Id::from(value.0)), Self::Ct { derefed: true, value } => { - Some(unsafe { std::ptr::read(value.0 as *const u8 as _) }) + Some(unsafe { core::ptr::read(value.0 as *const u8 as _) }) } Self::Rt { .. } => None, } @@ -655,7 +677,7 @@ mod trap { impl $name { $vis fn size(&self) -> usize { 1 + match self { - $(Self::$variant(_) => std::mem::size_of::<$variant>(),)* + $(Self::$variant(_) => core::mem::size_of::<$variant>(),)* } } } @@ -684,7 +706,7 @@ mod trap { impl Trap { pub fn as_slice(&self) -> &[u8] { - unsafe { std::slice::from_raw_parts(self as *const _ as _, self.size()) } + unsafe { core::slice::from_raw_parts(self as *const _ as _, self.size()) } } } } @@ -714,15 +736,21 @@ impl Codegen { fn build_struct( &mut self, - explicit_alignment: Option, + explicit_alignment: Option, fields: &[CommentOr], ) -> ty::Struct { - let fields = fields - .iter() - .filter_map(CommentOr::or) - .map(|sf| Field { name: sf.name.into(), ty: self.ty(&sf.ty) }) - .collect(); - self.tys.structs.push(Struct { name: 0, file: 0, fields, explicit_alignment }); + let prev_tmp = self.tys.fields_tmp.len(); + for sf in fields.iter().filter_map(CommentOr::or) { + let f = Field { name: self.tys.field_names.intern(sf.name), ty: self.ty(&sf.ty) }; + self.tys.fields_tmp.push(f); + } + self.tys.structs.push(Struct { + field_start: self.tys.fields.len() as _, + explicit_alignment, + ..Default::default() + }); + self.tys.fields.extend(self.tys.fields_tmp.drain(prev_tmp..)); + self.tys.structs.len() as u32 - 1 } @@ -785,7 +813,7 @@ impl Codegen { base_val.loc = self.make_loc_owned(base_val.loc, base_val.ty); } let index_val = self.expr(index)?; - _ = self.assert_ty(index.pos(), index_val.ty, ty::INT.into(), "subsctipt"); + _ = self.assert_ty(index.pos(), index_val.ty, ty::Id::INT, "subsctipt"); if let ty::Kind::Ptr(ty) = base_val.ty.expand() { base_val.ty = self.tys.ptrs[ty as usize].base; @@ -860,12 +888,12 @@ impl Codegen { let ret_reloc_base = self.ci.ret_relocs.len(); let loc = self.alloc_ret(sig.ret, ctx, true); - let prev_ret_reg = std::mem::replace(&mut self.ci.inline_ret_loc, loc); + let prev_ret_reg = core::mem::replace(&mut self.ci.inline_ret_loc, loc); let fuc = &self.tys.funcs[func as usize]; - let prev_file = std::mem::replace(&mut self.ci.file, fuc.file); - let prev_ret = std::mem::replace(&mut self.ci.ret, Some(sig.ret)); + let prev_file = core::mem::replace(&mut self.ci.file, fuc.file); + let prev_ret = core::mem::replace(&mut self.ci.ret, Some(sig.ret)); self.expr(body); - let loc = std::mem::replace(&mut self.ci.inline_ret_loc, prev_ret_reg); + let loc = core::mem::replace(&mut self.ci.inline_ret_loc, prev_ret_reg); self.ci.file = prev_file; self.ci.ret = prev_ret; @@ -987,9 +1015,7 @@ impl Codegen { ctx.ty = Some(ty); return self.expr_ctx(val, ctx); } - E::Bool { value, .. } => { - Some(Value { ty: ty::BOOL.into(), loc: Loc::ct(value as u64) }) - } + E::Bool { value, .. } => Some(Value { ty: ty::Id::BOOL, loc: Loc::ct(value as u64) }), E::Idk { pos } => { let Some(ty) = ctx.ty else { self.report( @@ -1025,13 +1051,13 @@ impl Codegen { self.report(pos, "string literal must end with null byte (for now)"); } - let report = |bytes: &std::str::Bytes, message| { + let report = |bytes: &core::str::Bytes, message| { self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message) }; let mut str = Vec::::with_capacity(literal.len()); - let decode_braces = |str: &mut Vec, bytes: &mut std::str::Bytes| { + let decode_braces = |str: &mut Vec, bytes: &mut core::str::Bytes| { while let Some(b) = bytes.next() && b != b'}' { @@ -1095,7 +1121,7 @@ impl Codegen { ); }; for &CtorField { pos, name, ref value, .. } in fields { - let Some((offset, ty)) = self.tys.offset_of(stru, name) else { + let Some((offset, ty)) = OffsetIter::offset_of(&self.tys, stru, name) else { self.report(pos, format_args!("field not found: {name:?}")); }; let loc = loc.as_ref().offset(offset); @@ -1115,7 +1141,7 @@ impl Codegen { match ty.expand() { ty::Kind::Struct(stru) => { - let mut oiter = OffsetIter::new(stru); + let mut oiter = OffsetIter::new(stru, &self.tys); for field in fields { let (ty, offset) = oiter.next_ty(&self.tys).unwrap(); let loc = loc.as_ref().offset(offset); @@ -1151,7 +1177,7 @@ impl Codegen { return Some(Value { ty, loc }); } } - E::Field { target, name: field } => { + E::Field { target, name: field, pos } => { let checkpoint = self.ci.snap(); let mut tal = self.expr(target)?; @@ -1162,8 +1188,9 @@ impl Codegen { match tal.ty.expand() { ty::Kind::Struct(idx) => { - let Some((offset, ty)) = self.tys.offset_of(idx, field) else { - self.report(target.pos(), format_args!("field not found: {field:?}")); + let Some((offset, ty)) = OffsetIter::offset_of(&self.tys, idx, field) + else { + self.report(pos, format_args!("field not found: {field:?}")); }; Some(Value { ty, loc: tal.loc.offset(offset) }) } @@ -1237,7 +1264,7 @@ impl Codegen { }; *drfd = false; - let offset = std::mem::take(offset) as _; + let offset = core::mem::take(offset) as _; if reg.is_ref() { let new_reg = self.ci.regs.allocate(); self.stack_offset(new_reg.get(), reg.get(), stack.as_ref(), offset); @@ -1247,7 +1274,7 @@ impl Codegen { } // FIXME: we might be able to track this but it will be pain - std::mem::forget(stack.take()); + core::mem::forget(stack.take()); Some(Value { ty: self.tys.make_ptr(val.ty), loc: val.loc }) } @@ -1278,7 +1305,7 @@ impl Codegen { self.assign_pattern(left, value) } E::Call { func: fast, args, .. } => { - log::trc!("call {fast}"); + log::trc!("call {}", self.ast_display(fast)); let func_ty = self.ty(fast); let ty::Kind::Func(mut func) = func_ty.expand() else { self.report(fast.pos(), "can't call this, maybe in the future"); @@ -1403,7 +1430,7 @@ impl Codegen { } E::Number { value, pos, .. } => Some(Value { ty: { - let ty = ctx.ty.map(ty::Id::strip_pointer).unwrap_or(ty::INT.into()); + let ty = ctx.ty.map(ty::Id::strip_pointer).unwrap_or(ty::Id::INT); if !ty.is_integer() && !ty.is_pointer() { self.report( pos, @@ -1438,7 +1465,7 @@ impl Codegen { self.ci.regs.free(left_reg); self.ci.regs.free(right_reg); if swapped { - std::mem::swap(&mut then, &mut else_); + core::mem::swap(&mut then, &mut else_); } } else { let cond = self.expr_ctx(cond, Ctx::default().with_ty(ty::BOOL))?; @@ -1497,7 +1524,7 @@ impl Codegen { debug_assert!(off > 0); } - let mut vars = std::mem::take(&mut self.ci.vars); + let mut vars = core::mem::take(&mut self.ci.vars); for var in vars.drain(loopa.var_count as usize..) { self.ci.free_loc(var.value.loc); } @@ -1535,7 +1562,7 @@ impl Codegen { let jump = self.ci.code.len() as i64 - jump_offset as i64; write_reloc(&mut self.ci.code, jump_offset, jump, 2); - Some(Value { ty: ty::BOOL.into(), loc: Loc::reg(lhs) }) + Some(Value { ty: ty::Id::BOOL, loc: Loc::reg(lhs) }) } E::BinOp { left, op, right } if op != T::Decl => 'ops: { let left = self.expr_ctx(left, Ctx { @@ -1587,7 +1614,7 @@ impl Codegen { if derefed { let mut dst = [0u8; 8]; dst[..size as usize].copy_from_slice(unsafe { - std::slice::from_raw_parts(imm as _, rsize as usize) + core::slice::from_raw_parts(imm as _, rsize as usize) }); imm = u64::from_ne_bytes(dst); } @@ -1673,7 +1700,12 @@ impl Codegen { }?; if let Some(ty) = ctx.ty { - _ = self.assert_ty(expr.pos(), value.ty, ty, format_args!("'{expr}'")); + _ = self.assert_ty( + expr.pos(), + value.ty, + ty, + format_args!("'{}'", self.ast_display(expr)), + ); } Some(match ctx.loc { @@ -1713,7 +1745,7 @@ impl Codegen { } else { debug_assert_eq!( ty, - ty::TYPE.into(), + ty::Id::TYPE, "TODO: we dont support anything except type generics" ); let arg = self.expr_ctx(arg, Ctx::default().with_ty(ty))?; @@ -1773,10 +1805,10 @@ impl Codegen { ..Default::default() }; ci.regs.init(); - std::mem::swap(&mut self.ci, &mut ci); + core::mem::swap(&mut self.ci, &mut ci); let value = self.expr(expr).unwrap(); self.ci.free_loc(value.loc); - std::mem::swap(&mut self.ci, &mut ci); + core::mem::swap(&mut self.ci, &mut ci); value.ty } @@ -1861,7 +1893,7 @@ impl Codegen { }; for &CtorField { pos, name, ref value } in fields { - let Some((offset, ty)) = self.tys.offset_of(idx, name) else { + let Some((offset, ty)) = OffsetIter::offset_of(&self.tys, idx, name) else { self.report(pos, format_args!("field not found: {name:?}")); }; let loc = self.ci.dup_loc(&right.loc).offset(offset); @@ -1889,7 +1921,7 @@ impl Codegen { match ty.expand() { ty::Kind::Struct(stru) => { - let field_count = self.tys.structs[stru as usize].fields.len(); + let field_count = self.tys.struct_field_range(stru).len(); if field_count != field_len { self.report( pos, @@ -1936,7 +1968,7 @@ impl Codegen { .or_else(|| right.take_owned()) .unwrap_or_else(|| Loc::stack(self.ci.stack.allocate(self.tys.size_of(ty)))); - let mut oiter = OffsetIter::new(stuct); + let mut oiter = OffsetIter::new(stuct, &self.tys); while let Some((ty, offset)) = oiter.next_ty(&self.tys) { let ctx = Ctx::from(Value { ty, loc: loc.as_ref().offset(offset) }); let left = left.as_ref().offset(offset); @@ -2041,7 +2073,7 @@ impl Codegen { ret: Some(sig.ret), ..self.pool.cis.pop().unwrap_or_default() }; - let prev_ci = std::mem::replace(&mut self.ci, repl); + let prev_ci = core::mem::replace(&mut self.ci, repl); self.ci.regs.init(); let Expr::BinOp { @@ -2050,7 +2082,7 @@ impl Codegen { right: &Expr::Closure { body, args, .. }, } = expr else { - unreachable!("{expr}") + unreachable!("{}", self.ast_display(expr)) }; self.ci.emit_prelude(); @@ -2085,10 +2117,10 @@ impl Codegen { self.ci.finalize(); self.ci.emit(jala(ZERO, RET_ADDR, 0)); - self.ci.regs.free(std::mem::take(&mut self.ci.ret_reg)); + self.ci.regs.free(core::mem::take(&mut self.ci.ret_reg)); self.tys.funcs[id as usize].code.append(&mut self.ci.code); self.tys.funcs[id as usize].relocs.append(&mut self.ci.relocs); - self.pool.cis.push(std::mem::replace(&mut self.ci, prev_ci)); + self.pool.cis.push(core::mem::replace(&mut self.ci, prev_ci)); self.ct.vm.write_reg(reg::STACK_PTR, ct_stack_base); } @@ -2335,12 +2367,12 @@ impl Codegen { let mut values = self.ct.vm.read_reg(2).0 as *const u8; for &id in captured { - let ty: ty::Id = unsafe { std::ptr::read_unaligned(values.cast()) }; + let ty: ty::Id = unsafe { core::ptr::read_unaligned(values.cast()) }; unsafe { values = values.add(4) }; let size = self.tys.size_of(ty) as usize; let mut imm = [0u8; 8]; assert!(size <= imm.len(), "TODO"); - unsafe { std::ptr::copy_nonoverlapping(values, imm.as_mut_ptr(), size) }; + unsafe { core::ptr::copy_nonoverlapping(values, imm.as_mut_ptr(), size) }; self.ci.vars.push(Variable { id, value: Value::new(ty, Loc::ct(u64::from_ne_bytes(imm))), @@ -2381,7 +2413,7 @@ impl Codegen { let Some((expr, ident)) = f.find_decl(name) else { match name { Ok(_) => self.report(pos, format_args!("undefined indentifier: {lit_name}")), - Err("main") => self.report(pos, format_args!("missing main function: {f}")), + Err("main") => self.report(pos, format_args!("missing main function")), Err(name) => self.report(pos, format_args!("undefined indentifier: {name}")), } }; @@ -2399,7 +2431,7 @@ impl Codegen { return existing.expand(); } - let prev_file = std::mem::replace(&mut self.ci.file, file); + let prev_file = core::mem::replace(&mut self.ci.file, file); let sym = match expr { Expr::BinOp { left: &Expr::Ident { .. }, @@ -2509,17 +2541,17 @@ impl Codegen { ) -> Result { log::trc!("eval"); - let mut prev_ci = std::mem::replace(&mut self.ci, ci); + let mut prev_ci = core::mem::replace(&mut self.ci, ci); self.ci.task_base = self.tasks.len(); self.ci.regs.init(); let ret = compile(self, &mut prev_ci); - let mut rr = std::mem::take(&mut self.ci.ret_reg); + let mut rr = core::mem::take(&mut self.ci.ret_reg); let is_on_stack = !rr.is_ref(); if !rr.is_ref() { self.ci.emit(instrs::cp(1, rr.get())); let rref = rr.as_ref(); - self.ci.regs.free(std::mem::replace(&mut rr, rref)); + self.ci.regs.free(core::mem::replace(&mut rr, rref)); } if ret.is_ok() { @@ -2539,20 +2571,20 @@ impl Codegen { self.tys.dump_reachable(last_fn as _, &mut self.ct.code); let entry = &mut self.ct.code[self.tys.funcs[last_fn].offset as usize] as *mut _ as _; - let prev_pc = std::mem::replace(&mut self.ct.vm.pc, hbvm::mem::Address::new(entry)) + let prev_pc = core::mem::replace(&mut self.ct.vm.pc, hbvm::mem::Address::new(entry)) - self.ct.code.as_ptr() as usize; #[cfg(debug_assertions)] { - let mut vc = Vec::::new(); + let mut vc = String::new(); if let Err(e) = self.tys.disasm(&self.ct.code, &self.files, &mut vc, |bts| { if let Some(trap) = Self::read_trap(bts.as_ptr() as _) { bts.take(..trap.size() + 1).unwrap(); } }) { - panic!("{e} {}", String::from_utf8(vc).unwrap()); + panic!("{e} {}", vc); } else { - log::trc!("{}", String::from_utf8(vc).unwrap()); + log::trc!("{}", vc); } } @@ -2562,14 +2594,14 @@ impl Codegen { self.tys.funcs.pop().unwrap(); } - self.pool.cis.push(std::mem::replace(&mut self.ci, prev_ci)); + self.pool.cis.push(core::mem::replace(&mut self.ci, prev_ci)); log::trc!("eval-end"); ret } - pub fn disasm(&mut self, output: &mut impl std::io::Write) -> std::io::Result<()> { + pub fn disasm(&mut self, output: &mut String) -> Result<(), DisasmError> { let mut bin = Vec::new(); self.assemble(&mut bin); self.tys.disasm(&bin, &self.files, output, |_| {}) @@ -2590,6 +2622,12 @@ impl Codegen { ty::Display::new(&self.tys, &self.files, ty) } + fn ast_display(&self, ast: &Expr) -> String { + let mut s = String::new(); + parser::Formatter::new(&self.cfile().file).fmt(ast, &mut s).unwrap(); + s + } + #[must_use] #[track_caller] fn assert_ty(&self, pos: Pos, ty: ty::Id, expected: ty::Id, hint: impl Display) -> ty::Id { @@ -2609,14 +2647,14 @@ impl Codegen { } } - fn report_log(&self, pos: Pos, msg: impl std::fmt::Display) { + fn report_log(&self, pos: Pos, msg: impl core::fmt::Display) { let mut out = String::new(); self.cfile().report_to(pos, msg, &mut out); - eprintln!("{out}"); + crate::log::eprintln!("{out}"); } #[track_caller] - fn report(&self, pos: Pos, msg: impl std::fmt::Display) -> ! { + fn report(&self, pos: Pos, msg: impl core::fmt::Display) -> ! { self.report_log(pos, msg); unreachable!(); } @@ -2627,9 +2665,10 @@ impl Codegen { ast.pos(), format_args!( "compiler does not (yet) know how to handle ({hint}):\n\ - {ast:}\n\ + {:}\n\ info for weak people:\n\ - {ast:#?}" + {ast:#?}", + self.ast_display(ast) ), ) } @@ -2671,6 +2710,8 @@ impl Codegen { #[cfg(test)] mod tests { + use alloc::{string::String, vec::Vec}; + fn generate(ident: &'static str, input: &'static str, output: &mut String) { let mut codegen = super::Codegen { files: crate::test_parse_files(ident, input), ..Default::default() }; @@ -2679,9 +2720,7 @@ mod tests { let mut out = Vec::new(); codegen.assemble(&mut out); - let mut buf = Vec::::new(); - let err = codegen.tys.disasm(&out, &codegen.files, &mut buf, |_| {}); - output.push_str(String::from_utf8(buf).unwrap().as_str()); + let err = codegen.tys.disasm(&out, &codegen.files, output, |_| {}); if err.is_err() { return; } diff --git a/hblang/src/fs.rs b/hblang/src/fs.rs new file mode 100644 index 0000000..7f2f48e --- /dev/null +++ b/hblang/src/fs.rs @@ -0,0 +1,286 @@ +use { + crate::{ + codegen, + parser::{self, Ast}, + }, + alloc::{string::String, vec::Vec}, + core::fmt::Write, + hashbrown::hash_map, + std::{ + collections::VecDeque, + ffi::OsStr, + io, + path::{Path, PathBuf}, + string::ToString, + sync::Mutex, + }, +}; + +#[derive(Default)] +pub struct Options { + pub fmt: bool, + pub fmt_stdout: bool, + pub dump_asm: bool, + pub extra_threads: usize, +} + +pub fn format_to(ast: &parser::Ast, source: &str, out: &mut String) -> core::fmt::Result { + for (i, expr) in ast.exprs().iter().enumerate() { + parser::Formatter::new(&ast.file).fmt(expr, out)?; + if let Some(expr) = ast.exprs().get(i + 1) + && let Some(rest) = source.get(expr.pos() as usize..) + { + if parser::insert_needed_semicolon(rest) { + write!(out, ";")?; + } + if parser::preserve_newlines(&source[..expr.pos() as usize]) > 1 { + writeln!(out)?; + } + } + + if i + 1 != ast.exprs().len() { + writeln!(out)?; + } + } + + Ok(()) +} + +pub fn run_compiler(root_file: &str, options: Options, out: &mut Vec) -> std::io::Result<()> { + let parsed = parse_from_fs(options.extra_threads, root_file)?; + + fn format_ast(ast: parser::Ast) -> std::io::Result<()> { + let mut output = String::new(); + let source = std::fs::read_to_string(&*ast.path)?; + format_to(&ast, &source, &mut output).unwrap(); + std::fs::write(&*ast.path, output)?; + Ok(()) + } + + if options.fmt { + for parsed in parsed { + format_ast(parsed)?; + } + } else if options.fmt_stdout { + let ast = parsed.into_iter().next().unwrap(); + let source = std::fs::read_to_string(&*ast.path)?; + format_to(&ast, &source, unsafe { std::mem::transmute::<&mut Vec, &mut String>(out) }) + .unwrap(); + } else { + let mut codegen = codegen::Codegen::default(); + codegen.files = parsed; + + codegen.generate(); + if options.dump_asm { + codegen + .disasm(unsafe { std::mem::transmute::<&mut Vec, &mut String>(out) }) + .map_err(|e| io::Error::other(e.to_string()))?; + } else { + codegen.assemble(out); + } + } + + Ok(()) +} + +struct TaskQueue { + inner: Mutex>, +} + +impl TaskQueue { + fn new(max_waiters: usize) -> Self { + Self { inner: Mutex::new(TaskQueueInner::new(max_waiters)) } + } + + pub fn push(&self, message: T) { + self.extend([message]); + } + + pub fn extend(&self, messages: impl IntoIterator) { + self.inner.lock().unwrap().push(messages); + } + + pub fn pop(&self) -> Option { + TaskQueueInner::pop(&self.inner) + } +} + +enum TaskSlot { + Waiting, + Delivered(T), + Closed, +} + +struct TaskQueueInner { + max_waiters: usize, + messages: VecDeque, + parked: VecDeque<(*mut TaskSlot, std::thread::Thread)>, +} + +unsafe impl Send for TaskQueueInner {} +unsafe impl Sync for TaskQueueInner {} + +impl TaskQueueInner { + fn new(max_waiters: usize) -> Self { + Self { max_waiters, messages: Default::default(), parked: Default::default() } + } + + fn push(&mut self, messages: impl IntoIterator) { + for msg in messages { + if let Some((dest, thread)) = self.parked.pop_front() { + unsafe { *dest = TaskSlot::Delivered(msg) }; + thread.unpark(); + } else { + self.messages.push_back(msg); + } + } + } + + fn pop(s: &Mutex) -> Option { + let mut res = TaskSlot::Waiting; + { + let mut s = s.lock().unwrap(); + if let Some(msg) = s.messages.pop_front() { + return Some(msg); + } + + if s.max_waiters == s.parked.len() + 1 { + for (dest, thread) in s.parked.drain(..) { + unsafe { *dest = TaskSlot::Closed }; + thread.unpark(); + } + return None; + } + + s.parked.push_back((&mut res, std::thread::current())); + } + + loop { + std::thread::park(); + + let _s = s.lock().unwrap(); + match core::mem::replace(&mut res, TaskSlot::Waiting) { + TaskSlot::Delivered(msg) => return Some(msg), + TaskSlot::Closed => return None, + TaskSlot::Waiting => {} + } + } + } +} + +pub fn parse_from_fs(extra_threads: usize, root: &str) -> io::Result> { + fn resolve(path: &str, from: &str) -> Result { + let path = match Path::new(from).parent() { + Some(parent) => PathBuf::from_iter([parent, Path::new(path)]), + None => PathBuf::from(path), + }; + + path.canonicalize().map_err(|source| CantLoadFile { path, source }) + } + + #[derive(Debug)] + struct CantLoadFile { + path: PathBuf, + source: io::Error, + } + + impl core::fmt::Display for CantLoadFile { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "can't load file: {}", display_rel_path(&self.path),) + } + } + + impl core::error::Error for CantLoadFile { + fn source(&self) -> Option<&(dyn core::error::Error + 'static)> { + Some(&self.source) + } + } + + impl From for io::Error { + fn from(e: CantLoadFile) -> Self { + io::Error::new(io::ErrorKind::InvalidData, e) + } + } + + type Task = (u32, PathBuf); + + let seen = Mutex::new(crate::HashMap::::default()); + let tasks = TaskQueue::::new(extra_threads + 1); + let ast = Mutex::new(Vec::>::new()); + + let loader = |path: &str, from: &str| { + if path.starts_with("rel:") { + return Err(io::Error::new( + io::ErrorKind::Other, + "`rel:` prefix was removed and is now equivalent to no prefix (remove it)" + .to_string(), + )); + } + + let physiscal_path = resolve(path, from)?; + + let id = { + let mut seen = seen.lock().unwrap(); + let len = seen.len(); + match seen.entry(physiscal_path.clone()) { + hash_map::Entry::Occupied(entry) => { + return Ok(*entry.get()); + } + hash_map::Entry::Vacant(entry) => { + entry.insert(len as _); + len as u32 + } + } + }; + + if !physiscal_path.exists() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + format!("can't find file: {}", display_rel_path(&physiscal_path)), + )); + } + + tasks.push((id, physiscal_path)); + Ok(id) + }; + + let execute_task = |(_, path): Task| { + let path = path.to_str().ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("path contains invalid characters: {}", display_rel_path(&path)), + ) + })?; + Ok(Ast::new(path, std::fs::read_to_string(path)?, &|path, from| { + loader(path, from).map_err(|e| e.to_string()) + })) + }; + + let thread = || { + while let Some(task @ (indx, ..)) = tasks.pop() { + let res = execute_task(task); + let mut ast = ast.lock().unwrap(); + let len = ast.len().max(indx as usize + 1); + ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into())); + ast[indx as usize] = res; + } + }; + + let path = Path::new(root).canonicalize()?; + seen.lock().unwrap().insert(path.clone(), 0); + tasks.push((0, path)); + + if extra_threads == 0 { + thread(); + } else { + std::thread::scope(|s| (0..extra_threads + 1).for_each(|_| _ = s.spawn(thread))); + } + + ast.into_inner().unwrap().into_iter().collect::>>() +} + +pub fn display_rel_path(path: &(impl AsRef + ?Sized)) -> std::path::Display { + static CWD: std::sync::LazyLock = + std::sync::LazyLock::new(|| std::env::current_dir().unwrap_or_default()); + std::path::Path::new(path).strip_prefix(&*CWD).unwrap_or(std::path::Path::new(path)).display() +} diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index a8a419a..3b2ab8c 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -19,7 +19,7 @@ pub struct Token { } impl Token { - pub fn range(&self) -> std::ops::Range { + pub fn range(&self) -> core::ops::Range { self.start as usize..self.end as usize } } @@ -44,8 +44,8 @@ macro_rules! gen_token_kind { )* } ) => { - impl std::fmt::Display for $name { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + impl core::fmt::Display for $name { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { f.write_str(self.name()) } } @@ -59,7 +59,7 @@ macro_rules! gen_token_kind { $( Self::$punkt => stringify!($punkt_lit), )* $($( Self::$op => $op_lit, $(Self::$assign => concat!($op_lit, "="),)?)*)* - _ => unsafe { std::str::from_utf8_unchecked(std::slice::from_ref(&sf)) }, + _ => unsafe { core::str::from_utf8_unchecked(core::slice::from_ref(&sf)) }, } } @@ -256,7 +256,7 @@ impl TokenKind { if ascii_mask(b"|+-*/%^&79") & (1u128 << id) == 0 { return None; } - Some(unsafe { std::mem::transmute::(id) }) + Some(unsafe { core::mem::transmute::(id) }) } pub fn is_comutative(self) -> bool { @@ -375,15 +375,15 @@ impl<'a> Lexer<'a> { } pub fn source(&self) -> &'a str { - unsafe { std::str::from_utf8_unchecked(self.bytes) } + unsafe { core::str::from_utf8_unchecked(self.bytes) } } - pub fn slice(&self, tok: std::ops::Range) -> &'a str { - unsafe { std::str::from_utf8_unchecked(&self.bytes[tok]) } + pub fn slice(&self, tok: core::ops::Range) -> &'a str { + unsafe { core::str::from_utf8_unchecked(&self.bytes[tok]) } } fn peek(&self) -> Option { - if std::intrinsics::unlikely(self.pos >= self.bytes.len() as u32) { + if core::intrinsics::unlikely(self.pos >= self.bytes.len() as u32) { None } else { Some(unsafe { *self.bytes.get_unchecked(self.pos as usize) }) @@ -423,7 +423,7 @@ impl<'a> Lexer<'a> { } }; - let identity = |s: u8| unsafe { std::mem::transmute::(s) }; + let identity = |s: u8| unsafe { core::mem::transmute::(s) }; let kind = match c { ..=b' ' => continue, diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 854248b..e3c18dd 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -17,10 +17,15 @@ slice_take, map_try_insert, extract_if, - ptr_internals + ptr_internals, + iter_intersperse )] +#![warn(clippy::dbg_macro)] #![allow(stable_features, internal_features)] +#![no_std] +#[cfg(feature = "std")] +pub use fs::*; use { self::{ ident::Ident, @@ -29,19 +34,18 @@ use { son::reg, ty::ArrayLen, }, + alloc::{collections::BTreeMap, string::String, vec::Vec}, + core::{cell::Cell, fmt::Display, hash::BuildHasher, ops::Range}, + hashbrown::hash_map, hbbytecode as instrs, - parser::Ast, - std::{ - collections::{hash_map, BTreeMap, VecDeque}, - fmt::Display, - io, - ops::Range, - path::{Path, PathBuf}, - rc::Rc, - sync::Mutex, - }, }; +#[macro_use] +extern crate alloc; + +#[cfg(any(feature = "std", test))] +extern crate std; + #[cfg(test)] const README: &str = include_str!("../README.md"); @@ -50,12 +54,14 @@ macro_rules! run_tests { ($runner:path: $($name:ident;)*) => {$( #[test] fn $name() { - $crate::run_test(std::any::type_name_of_val(&$name), stringify!($name), $crate::README, $runner); + $crate::run_test(core::any::type_name_of_val(&$name), stringify!($name), $crate::README, $runner); } )*}; } pub mod codegen; +#[cfg(any(feature = "std", test))] +pub mod fs; pub mod parser; pub mod son; @@ -104,7 +110,7 @@ mod ident { ((pos + 1) << LEN_BITS) | len } - pub fn range(ident: u32) -> std::ops::Range { + pub fn range(ident: u32) -> core::ops::Range { let (len, pos) = (len(ident) as usize, pos(ident) as usize); pos..pos + len } @@ -140,10 +146,24 @@ mod log { } }; + macro_rules! eprintln { + ($($tt:tt)*) => { + #[cfg(test)] + { + //std::eprintln!($($tt)*) + format_args!($($tt)*) + } + #[cfg(not(test))] + { + format_args!($($tt)*) + } + }; + } + macro_rules! log { ($level:expr, $fmt:literal $($expr:tt)*) => { if $level <= $crate::log::LOG_LEVEL { - eprintln!("{:?}: {}", $level, format_args!($fmt $($expr)*)); + $crate::log::eprintln!("{:?}: {}", $level, format_args!($fmt $($expr)*)); } }; @@ -161,7 +181,7 @@ mod log { macro_rules! trc { ($($arg:tt)*) => { $crate::log::log!($crate::log::Level::Trc, $($arg)*) }; } #[allow(unused_imports)] - pub(crate) use {dbg, err, inf, log, trc, wrn}; + pub(crate) use {dbg, eprintln, err, inf, log, trc, wrn}; } mod ty { @@ -171,7 +191,7 @@ mod ty { lexer::TokenKind, parser::{self}, }, - std::{num::NonZeroU32, ops::Range}, + core::{num::NonZeroU32, ops::Range}, }; pub type ArrayLen = u32; @@ -317,6 +337,11 @@ mod ty { *(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) });)* } + #[allow(dead_code)] + impl Id { + $(pub const $name: Self = Kind::Builtin($name).compress();)* + } + pub fn from_str(name: &str) -> Option { match name.as_bytes() { $(__lc_names::$name => Some($name),)* @@ -326,7 +351,7 @@ mod ty { pub fn to_str(ty: Builtin) -> &'static str { match ty { - $($name => unsafe { std::str::from_utf8_unchecked(__lc_names::$name) },)* + $($name => unsafe { core::str::from_utf8_unchecked(__lc_names::$name) },)* v => unreachable!("invalid type: {}", v), } } @@ -360,7 +385,7 @@ mod ty { impl $name { const FLAG_BITS: u32 = (${count($variant)} as u32).next_power_of_two().ilog2(); - const FLAG_OFFSET: u32 = std::mem::size_of::() as u32 * 8 - Self::FLAG_BITS; + const FLAG_OFFSET: u32 = core::mem::size_of::() as u32 * 8 - Self::FLAG_BITS; const INDEX_MASK: u32 = (1 << (32 - Self::FLAG_BITS)) - 1; $vis fn from_ty(ty: Id) -> Self { @@ -422,8 +447,8 @@ mod ty { } } - impl<'a> std::fmt::Display for Display<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + impl<'a> core::fmt::Display for Display<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use Kind as TK; match TK::from_ty(self.ty) { TK::Module(idx) => write!(f, "module{}", idx), @@ -435,7 +460,8 @@ mod ty { let record = &self.tys.structs[idx as usize]; if ident::is_null(record.name) { write!(f, "[{idx}]{{")?; - for (i, &super::Field { ref name, ty }) in record.fields.iter().enumerate() + for (i, &super::Field { ref name, ty }) in + self.tys.struct_fields(idx).iter().enumerate() { if i != 0 { write!(f, ", ")?; @@ -572,15 +598,18 @@ impl Reloc { } struct Field { - name: Rc, + name: Ident, ty: ty::Id, } +#[derive(Default)] struct Struct { name: Ident, file: FileId, - explicit_alignment: Option, - fields: Rc<[Field]>, + size: Cell, + align: Cell, + explicit_alignment: Option, + field_start: u32, } struct Ptr { @@ -618,6 +647,77 @@ struct AbleOsExecutableHeader { metadata_length: u64, } +struct IdentEntry { + hash: u32, + ident: Ident, +} + +impl core::hash::Hash for IdentEntry { + fn hash(&self, state: &mut H) { + state.write_u64((self.hash as u64) << 32); + } +} + +#[derive(Default)] +struct IdentityHasher(u64); + +impl core::hash::Hasher for IdentityHasher { + fn finish(&self) -> u64 { + self.0 + } + + fn write(&mut self, _: &[u8]) { + unimplemented!() + } + + fn write_u64(&mut self, i: u64) { + self.0 = i; + } +} + +#[derive(Default)] +struct IdentInterner { + lookup: hashbrown::HashMap>, + strings: String, +} + +impl IdentInterner { + fn intern(&mut self, ident: &str) -> Ident { + let hash = FnvBuildHasher::default().hash_one(ident) & 0xFFFFFFFF00000000; + match self.lookup.raw_entry_mut().from_hash( + hash, + |k| unsafe { self.strings.get_unchecked(ident::range(k.ident)) } == ident, + ) { + hash_map::RawEntryMut::Occupied(o) => o.get_key_value().0.ident, + hash_map::RawEntryMut::Vacant(v) => { + let id = ident::new(self.strings.len() as _, ident.len() as _); + self.strings.push_str(ident); + v.insert_hashed_nocheck( + hash, + IdentEntry { hash: (hash >> 32) as _, ident: id }, + (), + ); + id + } + } + } + + fn ident_str(&self, ident: Ident) -> &str { + &self.strings[ident::range(ident)] + } + + fn project(&self, ident: &str) -> Option { + let hash = FnvBuildHasher::default().hash_one(ident) & 0xFFFFFFFF00000000; + self.lookup + .raw_entry() + .from_hash( + hash, + |k| unsafe { self.strings.get_unchecked(ident::range(k.ident)) } == ident, + ) + .map(|(k, _)| k.ident) + } +} + #[derive(Default)] struct Types { syms: HashMap, @@ -626,13 +726,29 @@ struct Types { args: Vec, globals: Vec, structs: Vec, + fields: Vec, + fields_tmp: Vec, + field_names: IdentInterner, ptrs: Vec, arrays: Vec, } -const HEADER_SIZE: usize = std::mem::size_of::(); +const HEADER_SIZE: usize = core::mem::size_of::(); impl Types { + fn struct_field_range(&self, strct: ty::Struct) -> Range { + let start = self.structs[strct as usize].field_start as usize; + let end = self + .structs + .get(strct as usize + 1) + .map_or(self.fields.len(), |s| s.field_start as usize); + start..end + } + + fn struct_fields(&self, strct: ty::Struct) -> &[Field] { + &self.fields[self.struct_field_range(strct)] + } + /// returns none if comptime eval is required fn ty(&mut self, file: FileId, expr: &Expr, files: &[parser::Ast]) -> Option { Some(match *expr { @@ -658,19 +774,22 @@ impl Types { return Some(ty); } - let fields = fields - .iter() - .filter_map(CommentOr::or) - .map(|sf| { - Some(Field { name: sf.name.into(), ty: self.ty(file, &sf.ty, files)? }) - }) - .collect::>()?; + let prev_tmp = self.fields_tmp.len(); + for field in fields.iter().filter_map(CommentOr::or) { + let Some(ty) = self.ty(file, &field.ty, files) else { + self.fields_tmp.truncate(prev_tmp); + return None; + }; + self.fields_tmp.push(Field { name: self.field_names.intern(field.name), ty }); + } + self.structs.push(Struct { - name: 0, file, - fields, + field_start: self.fields.len() as _, explicit_alignment: packed.then_some(1), + ..Default::default() }); + self.fields.extend(self.fields_tmp.drain(prev_tmp..)); let ty = ty::Kind::Struct(self.structs.len() as u32 - 1).compress(); self.syms.insert(sym, ty); @@ -759,13 +878,13 @@ impl Types { } } - pub fn disasm( - &self, + pub fn disasm<'a>( + &'a self, mut sluce: &[u8], - files: &[parser::Ast], - output: &mut impl std::io::Write, + files: &'a [parser::Ast], + output: &mut String, eca_handler: impl FnMut(&mut &[u8]), - ) -> std::io::Result<()> { + ) -> Result<(), hbbytecode::DisasmError<'a>> { use instrs::DisasmItem; let functions = self .funcs @@ -787,7 +906,7 @@ impl Types { }) .chain(self.globals.iter().filter(|g| task::is_done(g.offset)).map(|g| { let name = if g.file == u32::MAX { - std::str::from_utf8(&g.data).unwrap() + core::str::from_utf8(&g.data).unwrap() } else { let file = &files[g.file as usize]; file.ident_str(g.name) @@ -802,13 +921,6 @@ impl Types { ParamAlloc(2 + (9..=16).contains(&self.size_of(ret.into())) as u8..12) } - fn offset_of(&self, idx: ty::Struct, field: &str) -> Option<(Offset, ty::Id)> { - OffsetIter::new(idx) - .into_iter(self) - .find(|(f, _)| f.name.as_ref() == field) - .map(|(f, off)| (off, f.ty)) - } - fn make_ptr(&mut self, base: ty::Id) -> ty::Id { ty::Kind::Ptr(self.make_ptr_low(base)).compress() } @@ -867,8 +979,13 @@ impl Types { } } ty::Kind::Struct(stru) => { - let mut oiter = OffsetIter::new(stru); + if self.structs[stru as usize].size.get() != 0 { + return self.structs[stru as usize].size.get(); + } + + let mut oiter = OffsetIter::new(stru, self); while oiter.next(self).is_some() {} + self.structs[stru as usize].size.set(oiter.offset); oiter.offset } ty => unimplemented!("size_of: {:?}", ty), @@ -878,14 +995,21 @@ impl Types { fn align_of(&self, ty: ty::Id) -> Size { match ty.expand() { ty::Kind::Struct(stru) => { - self.structs[stru as usize].explicit_alignment.unwrap_or_else(|| { - self.structs[stru as usize] - .fields - .iter() - .map(|&Field { ty, .. }| self.align_of(ty)) - .max() - .unwrap_or(1) - }) + if self.structs[stru as usize].align.get() != 0 { + return self.structs[stru as usize].align.get() as _; + } + let align = self.structs[stru as usize].explicit_alignment.map_or_else( + || { + self.struct_fields(stru) + .iter() + .map(|&Field { ty, .. }| self.align_of(ty)) + .max() + .unwrap_or(1) + }, + |a| a as _, + ); + self.structs[stru as usize].align.set(align.try_into().unwrap()); + align } ty::Kind::Slice(arr) => { let arr = &self.arrays[arr as usize]; @@ -904,30 +1028,39 @@ impl Types { _ => None, } } + + fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option { + let name = self.field_names.project(name)?; + self.struct_fields(s).iter().position(|f| f.name == name) + } } struct OffsetIter { strct: ty::Struct, offset: Offset, - index: usize, -} - -fn align_up(value: Size, align: Size) -> Size { - (value + align - 1) & !(align - 1) + fields: Range, } impl OffsetIter { - fn new(strct: ty::Struct) -> Self { - Self { strct, offset: 0, index: 0 } + fn new(strct: ty::Struct, tys: &Types) -> Self { + Self { strct, offset: 0, fields: tys.struct_field_range(strct) } + } + + fn offset_of(tys: &Types, idx: ty::Struct, field: &str) -> Option<(Offset, ty::Id)> { + let field_id = tys.field_names.project(field)?; + OffsetIter::new(idx, tys) + .into_iter(tys) + .find(|(f, _)| f.name == field_id) + .map(|(f, off)| (off, f.ty)) } fn next<'a>(&mut self, tys: &'a Types) -> Option<(&'a Field, Offset)> { let stru = &tys.structs[self.strct as usize]; - let field = stru.fields.get(self.index)?; - self.index += 1; - let align = stru.explicit_alignment.unwrap_or_else(|| tys.align_of(field.ty)); + let field = &tys.fields[self.fields.next()?]; + + let align = stru.explicit_alignment.map_or_else(|| tys.align_of(field.ty), |a| a as u32); + self.offset = (self.offset + align - 1) & !(align - 1); - self.offset = align_up(self.offset, align); let off = self.offset; self.offset += tys.size_of(field.ty); Some((field, off)) @@ -939,210 +1072,17 @@ impl OffsetIter { } fn into_iter(mut self, tys: &Types) -> impl Iterator { - std::iter::from_fn(move || self.next(tys)) + core::iter::from_fn(move || self.next(tys)) } } -struct TaskQueue { - inner: Mutex>, -} - -impl TaskQueue { - fn new(max_waiters: usize) -> Self { - Self { inner: Mutex::new(TaskQueueInner::new(max_waiters)) } - } - - pub fn push(&self, message: T) { - self.extend([message]); - } - - pub fn extend(&self, messages: impl IntoIterator) { - self.inner.lock().unwrap().push(messages); - } - - pub fn pop(&self) -> Option { - TaskQueueInner::pop(&self.inner) - } -} - -enum TaskSlot { - Waiting, - Delivered(T), - Closed, -} - -struct TaskQueueInner { - max_waiters: usize, - messages: VecDeque, - parked: VecDeque<(*mut TaskSlot, std::thread::Thread)>, -} - -unsafe impl Send for TaskQueueInner {} -unsafe impl Sync for TaskQueueInner {} - -impl TaskQueueInner { - fn new(max_waiters: usize) -> Self { - Self { max_waiters, messages: Default::default(), parked: Default::default() } - } - - fn push(&mut self, messages: impl IntoIterator) { - for msg in messages { - if let Some((dest, thread)) = self.parked.pop_front() { - unsafe { *dest = TaskSlot::Delivered(msg) }; - thread.unpark(); - } else { - self.messages.push_back(msg); - } - } - } - - fn pop(s: &Mutex) -> Option { - let mut res = TaskSlot::Waiting; - { - let mut s = s.lock().unwrap(); - if let Some(msg) = s.messages.pop_front() { - return Some(msg); - } - - if s.max_waiters == s.parked.len() + 1 { - for (dest, thread) in s.parked.drain(..) { - unsafe { *dest = TaskSlot::Closed }; - thread.unpark(); - } - return None; - } - - s.parked.push_back((&mut res, std::thread::current())); - } - - loop { - std::thread::park(); - - let _s = s.lock().unwrap(); - match std::mem::replace(&mut res, TaskSlot::Waiting) { - TaskSlot::Delivered(msg) => return Some(msg), - TaskSlot::Closed => return None, - TaskSlot::Waiting => {} - } - } - } -} - -pub fn parse_from_fs(extra_threads: usize, root: &str) -> io::Result> { - fn resolve(path: &str, from: &str) -> Result { - let path = match Path::new(from).parent() { - Some(parent) => PathBuf::from_iter([parent, Path::new(path)]), - None => PathBuf::from(path), - }; - - path.canonicalize().map_err(|source| CantLoadFile { path, source }) - } - - #[derive(Debug)] - struct CantLoadFile { - path: PathBuf, - source: io::Error, - } - - impl std::fmt::Display for CantLoadFile { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "can't load file: {}", parser::display_rel_path(&self.path),) - } - } - - impl std::error::Error for CantLoadFile { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - Some(&self.source) - } - } - - impl From for io::Error { - fn from(e: CantLoadFile) -> Self { - io::Error::new(io::ErrorKind::InvalidData, e) - } - } - - type Task = (u32, PathBuf); - - let seen = Mutex::new(HashMap::::default()); - let tasks = TaskQueue::::new(extra_threads + 1); - let ast = Mutex::new(Vec::>::new()); - - let loader = |path: &str, from: &str| { - if path.starts_with("rel:") { - return Err(io::Error::new( - io::ErrorKind::Other, - "`rel:` prefix was removed and is now equivalent to no prefix (remove it)" - .to_string(), - )); - } - - let physiscal_path = resolve(path, from)?; - - let id = { - let mut seen = seen.lock().unwrap(); - let len = seen.len(); - match seen.entry(physiscal_path.clone()) { - hash_map::Entry::Occupied(entry) => { - return Ok(*entry.get()); - } - hash_map::Entry::Vacant(entry) => { - entry.insert(len as _); - len as u32 - } - } - }; - - if !physiscal_path.exists() { - return Err(io::Error::new( - io::ErrorKind::NotFound, - format!("can't find file: {}", parser::display_rel_path(&physiscal_path)), - )); - } - - tasks.push((id, physiscal_path)); - Ok(id) - }; - - let execute_task = |(_, path): Task| { - let path = path.to_str().ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("path contains invalid characters: {}", parser::display_rel_path(&path)), - ) - })?; - Ok(Ast::new(path, std::fs::read_to_string(path)?, &loader)) - }; - - let thread = || { - while let Some(task @ (indx, ..)) = tasks.pop() { - let res = execute_task(task); - let mut ast = ast.lock().unwrap(); - let len = ast.len().max(indx as usize + 1); - ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into())); - ast[indx as usize] = res; - } - }; - - let path = Path::new(root).canonicalize()?; - seen.lock().unwrap().insert(path.clone(), 0); - tasks.push((0, path)); - - if extra_threads == 0 { - thread(); - } else { - std::thread::scope(|s| (0..extra_threads + 1).for_each(|_| _ = s.spawn(thread))); - } - - ast.into_inner().unwrap().into_iter().collect::>>() -} - -type HashMap = std::collections::HashMap>; -type _HashSet = std::collections::HashSet>; +type HashMap = hashbrown::HashMap; +type _HashSet = hashbrown::HashSet; +type FnvBuildHasher = core::hash::BuildHasherDefault; struct FnvHasher(u64); -impl std::hash::Hasher for FnvHasher { +impl core::hash::Hasher for FnvHasher { fn finish(&self) -> u64 { self.0 } @@ -1170,7 +1110,7 @@ pub fn run_test( input: &'static str, test: fn(&'static str, &'static str, &mut String), ) { - use std::{io::Write, path::PathBuf}; + use std::{io::Write, path::PathBuf, string::ToString}; let filter = std::env::var("PT_FILTER").unwrap_or_default(); if !filter.is_empty() && !name.contains(&filter) { @@ -1183,7 +1123,7 @@ pub fn run_test( impl Drop for DumpOut<'_> { fn drop(&mut self) { if std::thread::panicking() { - println!("{}", self.0); + std::println!("{}", self.0); } } } @@ -1234,6 +1174,8 @@ pub fn run_test( #[cfg(test)] fn test_parse_files(ident: &'static str, input: &'static str) -> Vec { + use std::{borrow::ToOwned, string::ToString}; + fn find_block(mut input: &'static str, test_name: &'static str) -> &'static str { const CASE_PREFIX: &str = "#### "; const CASE_SUFFIX: &str = "\n```hb"; @@ -1277,7 +1219,7 @@ fn test_parse_files(ident: &'static str, input: &'static str) -> Vec Vec writeln!(output, "ev: Ecall").unwrap(), // compatibility with a test 69 => { let [size, align] = [vm.read_reg(3).0 as usize, vm.read_reg(4).0 as usize]; - let layout = std::alloc::Layout::from_size_align(size, align).unwrap(); - let ptr = unsafe { std::alloc::alloc(layout) }; + let layout = core::alloc::Layout::from_size_align(size, align).unwrap(); + let ptr = unsafe { alloc::alloc::alloc(layout) }; vm.write_reg(1, ptr as u64); } 96 => { @@ -1319,8 +1261,8 @@ fn test_run_vm(out: &[u8], output: &mut String) { vm.read_reg(5).0 as usize, ]; - let layout = std::alloc::Layout::from_size_align(size, align).unwrap(); - unsafe { std::alloc::dealloc(ptr as *mut u8, layout) }; + let layout = core::alloc::Layout::from_size_align(size, align).unwrap(); + unsafe { alloc::alloc::dealloc(ptr as *mut u8, layout) }; } 3 => vm.write_reg(1, 42), unknown => unreachable!("unknown ecall: {unknown:?}"), @@ -1339,81 +1281,6 @@ fn test_run_vm(out: &[u8], output: &mut String) { writeln!(output, "status: {:?}", stat).unwrap(); } -#[derive(Default)] -pub struct Options { - pub fmt: bool, - pub fmt_stdout: bool, - pub dump_asm: bool, - pub extra_threads: usize, -} - -fn format_to( - ast: &parser::Ast, - source: &str, - out: &mut impl std::io::Write, -) -> std::io::Result<()> { - parser::with_fmt_source(source, || { - for (i, expr) in ast.exprs().iter().enumerate() { - write!(out, "{expr}")?; - if let Some(expr) = ast.exprs().get(i + 1) - && let Some(rest) = source.get(expr.pos() as usize..) - { - if parser::insert_needed_semicolon(rest) { - write!(out, ";")?; - } - if parser::preserve_newlines(&source[..expr.pos() as usize]) > 1 { - writeln!(out)?; - } - } - - if i + 1 != ast.exprs().len() { - writeln!(out)?; - } - } - std::io::Result::Ok(()) - }) -} - -pub fn run_compiler( - root_file: &str, - options: Options, - out: &mut impl std::io::Write, -) -> io::Result<()> { - let parsed = parse_from_fs(options.extra_threads, root_file)?; - - fn format_ast(ast: parser::Ast) -> std::io::Result<()> { - let mut output = Vec::new(); - let source = std::fs::read_to_string(&*ast.path)?; - format_to(&ast, &source, &mut output)?; - std::fs::write(&*ast.path, output)?; - Ok(()) - } - - if options.fmt { - for parsed in parsed { - format_ast(parsed)?; - } - } else if options.fmt_stdout { - let ast = parsed.into_iter().next().unwrap(); - let source = std::fs::read_to_string(&*ast.path)?; - format_to(&ast, &source, out)?; - } else { - let mut codegen = codegen::Codegen::default(); - codegen.files = parsed; - - codegen.generate(); - if options.dump_asm { - codegen.disasm(out)?; - } else { - let mut buf = Vec::new(); - codegen.assemble(&mut buf); - out.write_all(&buf)?; - } - } - - Ok(()) -} - #[derive(Default)] pub struct LoggedMem { pub mem: hbvm::mem::HostMemory, @@ -1424,10 +1291,10 @@ pub struct LoggedMem { impl LoggedMem { unsafe fn display_instr(&mut self, instr: hbbytecode::Instr, addr: hbvm::mem::Address) { - let novm: *const hbvm::Vm = std::ptr::null(); - let offset = std::ptr::addr_of!((*novm).memory) as usize; + let novm: *const hbvm::Vm = core::ptr::null(); + let offset = core::ptr::addr_of!((*novm).memory) as usize; let regs = unsafe { - &*std::ptr::addr_of!( + &*core::ptr::addr_of!( (*(((self as *mut _ as *mut u8).sub(offset)) as *const hbvm::Vm)) .registers ) @@ -1435,9 +1302,9 @@ impl LoggedMem { let mut bytes = core::slice::from_raw_parts( (addr.get() - 1) as *const u8, - std::mem::size_of::() + 1, + core::mem::size_of::() + 1, ); - use std::fmt::Write; + use core::fmt::Write; hbbytecode::parse_args(&mut bytes, instr, &mut self.op_buf).unwrap(); debug_assert!(bytes.is_empty()); self.disp_buf.clear(); @@ -1482,7 +1349,7 @@ impl hbvm::mem::Memory for LoggedMem { unsafe fn prog_read(&mut self, addr: hbvm::mem::Address) -> T { if log::LOG_LEVEL == log::Level::Trc { - if std::any::TypeId::of::() == std::any::TypeId::of::() { + if core::any::TypeId::of::() == core::any::TypeId::of::() { if let Some(instr) = self.prev_instr { self.display_instr::<()>(instr, addr); } @@ -1500,37 +1367,10 @@ impl hbvm::mem::Memory for LoggedMem { struct AsHex<'a>(&'a [u8]); impl Display for AsHex<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { for &b in self.0 { write!(f, "{b:02x}")?; } Ok(()) } } - -#[cfg(test)] -mod test { - use std::sync::Arc; - - #[test] - fn task_queue_sanity() { - let queue = Arc::new(super::TaskQueue::new(1000)); - - let threads = (0..10) - .map(|_| { - let queue = queue.clone(); - std::thread::spawn(move || { - for _ in 0..100 { - queue.extend([queue.pop().unwrap()]); - } - }) - }) - .collect::>(); - - queue.extend(0..5); - - for t in threads { - t.join().unwrap(); - } - } -} diff --git a/hblang/src/main.rs b/hblang/src/main.rs index 17f719f..3303417 100644 --- a/hblang/src/main.rs +++ b/hblang/src/main.rs @@ -1,6 +1,7 @@ -use std::num::NonZeroUsize; - +#[cfg(feature = "std")] fn main() -> std::io::Result<()> { + use std::{io::Write, num::NonZeroUsize}; + let args = std::env::args().collect::>(); let args = args.iter().map(String::as_str).collect::>(); @@ -10,6 +11,7 @@ fn main() -> std::io::Result<()> { return Err(std::io::ErrorKind::Other.into()); } + let mut out = Vec::new(); hblang::run_compiler( args.iter().filter(|a| !a.starts_with('-')).nth(1).copied().unwrap_or("main.hb"), hblang::Options { @@ -23,6 +25,7 @@ fn main() -> std::io::Result<()> { .map_or(1, NonZeroUsize::get) - 1, }, - &mut std::io::stdout(), - ) + &mut out, + )?; + std::io::stdout().write_all(&out) } diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 960d115..bcf3046 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -3,12 +3,11 @@ use { ident::{self, Ident}, lexer::{self, Lexer, Token, TokenKind}, }, - std::{ - cell::{Cell, UnsafeCell}, - ffi::OsStr, - fmt, io, + alloc::{boxed::Box, string::String, vec::Vec}, + core::{ + cell::UnsafeCell, + fmt::{self, Write}, ops::{Deref, Not}, - path::PathBuf, ptr::NonNull, sync::atomic::AtomicUsize, }, @@ -19,14 +18,15 @@ pub type IdentFlags = u32; pub type Symbols = Vec; pub type FileId = u32; pub type IdentIndex = u16; -pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> io::Result + 'a); +pub type LoaderError = String; +pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> Result + 'a); pub mod idfl { use super::*; macro_rules! flags { ($($name:ident,)*) => { - $(pub const $name: IdentFlags = 1 << (std::mem::size_of::() * 8 - 1 - ${index(0)});)* + $(pub const $name: IdentFlags = 1 << (core::mem::size_of::() * 8 - 1 - ${index(0)});)* pub const ALL: IdentFlags = 0 $(| $name)*; }; } @@ -38,8 +38,8 @@ pub mod idfl { } } -pub fn no_loader(_: &str, _: &str) -> io::Result { - Err(io::ErrorKind::NotFound.into()) +pub fn no_loader(_: &str, _: &str) -> Result { + Err(String::new()) } #[derive(Debug)] @@ -108,7 +108,7 @@ impl<'a, 'b> Parser<'a, 'b> { if !errors.is_empty() { // TODO: we need error recovery - eprintln!("{errors}"); + crate::log::eprintln!("{errors}"); unreachable!(); } @@ -116,7 +116,7 @@ impl<'a, 'b> Parser<'a, 'b> { } fn next(&mut self) -> Token { - std::mem::replace(&mut self.token, self.lexer.next()) + core::mem::replace(&mut self.token, self.lexer.next()) } fn ptr_expr(&mut self) -> &'a Expr<'a> { @@ -154,8 +154,8 @@ impl<'a, 'b> Parser<'a, 'b> { // parser invariants. let source = self.lexer.slice(0..checkpoint as usize); let prev_lexer = - std::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos())); - let prev_token = std::mem::replace(&mut self.token, self.lexer.next()); + core::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos())); + let prev_token = core::mem::replace(&mut self.token, self.lexer.next()); let clone = self.expr(); self.lexer = prev_lexer; self.token = prev_token; @@ -208,7 +208,7 @@ impl<'a, 'b> Parser<'a, 'b> { } let index = self.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up"); - if std::mem::replace(&mut self.idents[index].declared, true) { + if core::mem::replace(&mut self.idents[index].declared, true) { self.report( pos, format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))), @@ -301,7 +301,7 @@ impl<'a, 'b> Parser<'a, 'b> { expr } T::Struct => E::Struct { - packed: std::mem::take(&mut self.packed), + packed: core::mem::take(&mut self.packed), fields: { self.ns_bound = self.idents.len(); self.expect_advance(T::LBrace); @@ -334,7 +334,7 @@ impl<'a, 'b> Parser<'a, 'b> { } pos }, - trailing_comma: std::mem::take(&mut self.trailing_sep), + trailing_comma: core::mem::take(&mut self.trailing_sep), }, T::Ident | T::CtIdent => { let (id, is_first) = self.resolve_ident(token); @@ -440,7 +440,7 @@ impl<'a, 'b> Parser<'a, 'b> { T::LParen => Expr::Call { func: self.arena.alloc(expr), args: self.collect_list(T::Comma, T::RParen, Self::expr), - trailing_comma: std::mem::take(&mut self.trailing_sep), + trailing_comma: core::mem::take(&mut self.trailing_sep), }, T::Ctor => self.ctor(token.start, Some(expr)), T::Tupl => self.tupl(token.start, Some(expr)), @@ -454,6 +454,7 @@ impl<'a, 'b> Parser<'a, 'b> { }, T::Dot => E::Field { target: self.arena.alloc(expr), + pos: token.start, name: { let token = self.expect_advance(T::Ident); self.move_str(token) @@ -475,7 +476,7 @@ impl<'a, 'b> Parser<'a, 'b> { pos, ty: ty.map(|ty| self.arena.alloc(ty)), fields: self.collect_list(TokenKind::Comma, TokenKind::RParen, Self::expr), - trailing_comma: std::mem::take(&mut self.trailing_sep), + trailing_comma: core::mem::take(&mut self.trailing_sep), } } @@ -497,7 +498,7 @@ impl<'a, 'b> Parser<'a, 'b> { }, } }), - trailing_comma: std::mem::take(&mut self.trailing_sep), + trailing_comma: core::mem::take(&mut self.trailing_sep), } } @@ -548,7 +549,7 @@ impl<'a, 'b> Parser<'a, 'b> { fn collect(&mut self, mut f: impl FnMut(&mut Self) -> Option) -> &'a [T] { // TODO: avoid this allocation - let vec = std::iter::from_fn(|| f(self)).collect::>(); + let vec = core::iter::from_fn(|| f(self)).collect::>(); self.arena.alloc_slice(&vec) } @@ -575,7 +576,7 @@ impl<'a, 'b> Parser<'a, 'b> { fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! { let mut str = String::new(); report_to(self.lexer.source(), self.path, pos, msg, &mut str); - eprintln!("{str}"); + crate::log::eprintln!("{str}"); unreachable!(); } @@ -639,7 +640,7 @@ macro_rules! generate_expr { match self {$( Self::$variant { $($field,)* } => { #[allow(clippy::size_of_ref)] - let fields = [$(($field as *const _ as usize - self as *const _ as usize, std::mem::size_of_val($field)),)*]; + let fields = [$(($field as *const _ as usize - self as *const _ as usize, core::mem::size_of_val($field)),)*]; let (last, size) = fields.iter().copied().max().unwrap(); last + size }, @@ -791,6 +792,7 @@ generate_expr! { /// `Expr '.' Ident` Field { target: &'a Self, + pos: Pos, name: &'a str, }, /// `'true' | 'false'` @@ -846,9 +848,12 @@ impl<'a> Expr<'a> { Ok(()) } Self::Ctor { fields, .. } => { - for CtorField { name, value, .. } in fields { - match value.find_pattern_path(ident, &Expr::Field { target, name }, with_final) - { + for &CtorField { name, value, pos } in fields { + match value.find_pattern_path( + ident, + &Expr::Field { pos, target, name }, + with_final, + ) { Ok(()) => return Ok(()), Err(e) => with_final = e, } @@ -926,243 +931,283 @@ impl<'a, T: Copy> CommentOr<'a, T> { } } -thread_local! { - static FMT_SOURCE: Cell<*const str> = const { Cell::new("") }; +pub struct Formatter<'a> { + source: &'a str, + depth: usize, + disp_buff: String, } -pub fn with_fmt_source(source: &str, f: impl FnOnce() -> T) -> T { - FMT_SOURCE.with(|s| s.set(source)); - let r = f(); - FMT_SOURCE.with(|s| s.set("")); - r -} +impl<'a> Formatter<'a> { + pub fn new(source: &'a str) -> Self { + Self { source, depth: 0, disp_buff: Default::default() } + } -impl<'a> fmt::Display for Expr<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - thread_local! { - static INDENT: Cell = const { Cell::new(0) }; - static DISPLAY_BUFFER: Cell = const { Cell::new(String::new()) }; - } + fn fmt_list( + &mut self, + f: &mut String, + trailing: bool, + end: &str, + sep: &str, + list: &[T], + fmt: impl Fn(&mut Self, &T, &mut String) -> fmt::Result, + ) -> fmt::Result { + self.fmt_list_low(f, trailing, end, sep, list, |s, v, f| { + fmt(s, v, f)?; + Ok(true) + }) + } - fn fmt_list( - f: &mut fmt::Formatter, - trailing: bool, - end: &str, - sep: &str, - list: &[T], - fmt: impl Fn(&T, &mut fmt::Formatter) -> fmt::Result, - ) -> fmt::Result { - fmt_list_low(f, trailing, end, sep, list, |v, f| { - fmt(v, f)?; - Ok(true) - }) - } - - fn fmt_list_low( - f: &mut fmt::Formatter, - trailing: bool, - end: &str, - sep: &str, - list: &[T], - fmt: impl Fn(&T, &mut fmt::Formatter) -> Result, - ) -> fmt::Result { - if !trailing { - let mut first = true; - for expr in list { - if !std::mem::take(&mut first) { - write!(f, "{sep} ")?; - } - first = !fmt(expr, f)?; + fn fmt_list_low( + &mut self, + f: &mut String, + trailing: bool, + end: &str, + sep: &str, + list: &[T], + fmt: impl Fn(&mut Self, &T, &mut String) -> Result, + ) -> fmt::Result { + if !trailing { + let mut first = true; + for expr in list { + if !core::mem::take(&mut first) { + write!(f, "{sep} ")?; } - return write!(f, "{end}"); + first = !fmt(self, expr, f)?; } + return write!(f, "{end}"); + } - writeln!(f)?; - INDENT.with(|i| i.set(i.get() + 1)); - let res = (|| { - for (i, stmt) in list.iter().enumerate() { - for _ in 0..INDENT.with(|i| i.get()) { - write!(f, "\t")?; + writeln!(f)?; + self.depth += 1; + let res = (|| { + for (i, stmt) in list.iter().enumerate() { + for _ in 0..self.depth { + write!(f, "\t")?; + } + let add_sep = fmt(self, stmt, f)?; + if add_sep { + write!(f, "{sep}")?; + } + if let Some(expr) = list.get(i + 1) + && let Some(rest) = self.source.get(expr.posi() as usize..) + { + if insert_needed_semicolon(rest) { + write!(f, ";")?; } - let add_sep = fmt(stmt, f)?; - if add_sep { - write!(f, "{sep}")?; - } - let source: &str = unsafe { &*FMT_SOURCE.with(|s| s.get()) }; - if let Some(expr) = list.get(i + 1) - && let Some(rest) = source.get(expr.posi() as usize..) - { - if insert_needed_semicolon(rest) { - write!(f, ";")?; - } - if preserve_newlines(&source[..expr.posi() as usize]) > 1 { - writeln!(f)?; - } - } - if add_sep { + if preserve_newlines(&self.source[..expr.posi() as usize]) > 1 { writeln!(f)?; } } - Ok(()) - })(); - INDENT.with(|i| i.set(i.get() - 1)); - - for _ in 0..INDENT.with(|i| i.get()) { - write!(f, "\t")?; + if add_sep { + writeln!(f)?; + } } - write!(f, "{end}")?; - res - } + Ok(()) + })(); + self.depth -= 1; + for _ in 0..self.depth { + write!(f, "\t")?; + } + write!(f, "{end}")?; + res + } + + fn fmt_paren( + &mut self, + expr: &Expr, + f: &mut String, + cond: impl FnOnce(&Expr) -> bool, + ) -> fmt::Result { + if cond(expr) { + write!(f, "(")?; + self.fmt(expr, f)?; + write!(f, ")") + } else { + self.fmt(expr, f) + } + } + + pub fn fmt(&mut self, expr: &Expr, f: &mut String) -> fmt::Result { macro_rules! impl_parenter { ($($name:ident => $pat:pat,)*) => { $( - struct $name<'a>(&'a Expr<'a>); - - impl<'a> std::fmt::Display for $name<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - if matches!(self.0, $pat) { - write!(f, "({})", self.0) - } else { - write!(f, "{}", self.0) - } - } - } + let $name = |e: &Expr| matches!(e, $pat); )* }; } impl_parenter! { - Unary => Expr::BinOp { .. }, - Postfix => Expr::UnOp { .. } | Expr::BinOp { .. }, - Consecutive => Expr::UnOp { .. }, + unary => Expr::BinOp { .. }, + postfix => Expr::UnOp { .. } | Expr::BinOp { .. }, + consecutive => Expr::UnOp { .. }, } - match *self { - Self::Ct { value, .. } => write!(f, "$: {}", value), - Self::String { literal, .. } => write!(f, "{}", literal), - Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()), - Self::Mod { path, .. } => write!(f, "@use(\"{path}\")"), - Self::Field { target, name: field } => write!(f, "{}.{field}", Postfix(target)), - Self::Directive { name, args, .. } => { - write!(f, "@{name}(")?; - fmt_list(f, false, ")", ",", args, fmt::Display::fmt) + match *expr { + Expr::Ct { value, .. } => { + write!(f, "$: ")?; + self.fmt(value, f) } - Self::Struct { fields, trailing_comma, packed, .. } => { + Expr::String { literal, .. } => write!(f, "{literal}"), + Expr::Comment { literal, .. } => write!(f, "{}", literal.trim_end()), + Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"), + Expr::Field { target, name: field, .. } => { + self.fmt_paren(target, f, postfix)?; + write!(f, ".{field}") + } + Expr::Directive { name, args, .. } => { + write!(f, "@{name}(")?; + self.fmt_list(f, false, ")", ",", args, Self::fmt) + } + Expr::Struct { fields, trailing_comma, packed, .. } => { if packed { write!(f, "packed ")?; } write!(f, "struct {{")?; - fmt_list_low(f, trailing_comma, "}", ",", fields, |field, f| { + self.fmt_list_low(f, trailing_comma, "}", ",", fields, |s, field, f| { match field { - CommentOr::Or(StructField { name, ty, .. }) => write!(f, "{name}: {ty}")?, + CommentOr::Or(StructField { name, ty, .. }) => { + write!(f, "{name}: ")?; + s.fmt(ty, f)? + } CommentOr::Comment { literal, .. } => write!(f, "{literal}")?, } Ok(field.or().is_some()) }) } - Self::Ctor { ty, fields, trailing_comma, .. } => { + Expr::Ctor { ty, fields, trailing_comma, .. } => { if let Some(ty) = ty { - write!(f, "{}", Unary(ty))?; + self.fmt_paren(ty, f, unary)?; } write!(f, ".{{")?; - let fmt_field = |CtorField { name, value, .. }: &_, f: &mut fmt::Formatter| { - if matches!(value, Expr::Ident { name: n, .. } if name == n) { - write!(f, "{name}") - } else { - write!(f, "{name}: {value}") - } - }; - fmt_list(f, trailing_comma, "}", ",", fields, fmt_field) + self.fmt_list( + f, + trailing_comma, + "}", + ",", + fields, + |s: &mut Self, CtorField { name, value, .. }: &_, f| { + if matches!(value, Expr::Ident { name: n, .. } if name == n) { + write!(f, "{name}") + } else { + write!(f, "{name}: ")?; + s.fmt(value, f) + } + }, + ) } - Self::Tupl { ty, fields, trailing_comma, .. } => { + Expr::Tupl { ty, fields, trailing_comma, .. } => { if let Some(ty) = ty { - write!(f, "{}", Unary(ty))?; + self.fmt_paren(ty, f, unary)?; } write!(f, ".(")?; - fmt_list(f, trailing_comma, ")", ",", fields, fmt::Display::fmt) + self.fmt_list(f, trailing_comma, ")", ",", fields, Self::fmt) } - Self::Slice { item, size, .. } => match size { - Some(size) => write!(f, "[{item}; {size}]"), - None => write!(f, "[{item}]"), - }, - Self::Index { base, index } => write!(f, "{base}[{index}]"), - Self::UnOp { op, val, .. } => write!(f, "{op}{}", Unary(val)), - Self::Break { .. } => write!(f, "break"), - Self::Continue { .. } => write!(f, "continue"), - Self::If { cond, then, else_, .. } => { - write!(f, "if {cond} {}", Consecutive(then))?; - if let Some(else_) = else_ { - write!(f, " else {else_}")?; + Expr::Slice { item, size, .. } => { + write!(f, "[")?; + self.fmt(item, f)?; + if let Some(size) = size { + write!(f, "; ")?; + self.fmt(size, f)?; + } + write!(f, "]") + } + Expr::Index { base, index } => { + self.fmt(base, f)?; + write!(f, "[")?; + self.fmt(index, f)?; + write!(f, "]") + } + Expr::UnOp { op, val, .. } => { + write!(f, "{op}")?; + self.fmt_paren(val, f, unary) + } + Expr::Break { .. } => write!(f, "break"), + Expr::Continue { .. } => write!(f, "continue"), + Expr::If { cond, then, else_, .. } => { + write!(f, "if ")?; + self.fmt(cond, f)?; + write!(f, " ")?; + self.fmt_paren(then, f, consecutive)?; + if let Some(e) = else_ { + write!(f, " else ")?; + self.fmt(e, f)?; } Ok(()) } - Self::Loop { body, .. } => write!(f, "loop {body}"), - Self::Closure { ret, body, args, .. } => { + Expr::Loop { body, .. } => { + write!(f, "loop ")?; + self.fmt(body, f) + } + Expr::Closure { ret, body, args, .. } => { write!(f, "fn(")?; - fmt_list(f, false, "", ",", args, |arg, f| { + self.fmt_list(f, false, "", ",", args, |s, arg, f| { if arg.is_ct { write!(f, "$")?; } - write!(f, "{}: {}", arg.name, arg.ty) + write!(f, "{}: ", arg.name)?; + s.fmt(&arg.ty, f) })?; - write!(f, "): {ret} {body}")?; + write!(f, "): ")?; + self.fmt(ret, f)?; + write!(f, " ")?; + self.fmt_paren(body, f, consecutive)?; Ok(()) } - Self::Call { func, args, trailing_comma } => { - write!(f, "{}(", Postfix(func))?; - fmt_list(f, trailing_comma, ")", ",", args, fmt::Display::fmt) + Expr::Call { func, args, trailing_comma } => { + self.fmt_paren(func, f, postfix)?; + write!(f, "(")?; + self.fmt_list(f, trailing_comma, ")", ",", args, Self::fmt) } - Self::Return { val: Some(val), .. } => write!(f, "return {val}"), - Self::Return { val: None, .. } => write!(f, "return"), - Self::Ident { name, is_ct: true, .. } => write!(f, "${name}"), - Self::Ident { name, is_ct: false, .. } => write!(f, "{name}"), - Self::Block { stmts, .. } => { + Expr::Return { val: Some(val), .. } => { + write!(f, "return ")?; + self.fmt(val, f) + } + Expr::Return { val: None, .. } => write!(f, "return"), + Expr::Ident { name, is_ct: true, .. } => write!(f, "${name}"), + Expr::Ident { name, is_ct: false, .. } => write!(f, "{name}"), + Expr::Block { stmts, .. } => { write!(f, "{{")?; - fmt_list(f, true, "}", "", stmts, fmt::Display::fmt) + self.fmt_list(f, true, "}", "", stmts, Self::fmt) } - Self::Number { value, radix, .. } => match radix { + Expr::Number { value, radix, .. } => match radix { Radix::Decimal => write!(f, "{value}"), Radix::Hex => write!(f, "{value:#X}"), Radix::Octal => write!(f, "{value:#o}"), Radix::Binary => write!(f, "{value:#b}"), }, - Self::Bool { value, .. } => write!(f, "{value}"), - Self::Idk { .. } => write!(f, "idk"), - Self::BinOp { + Expr::Bool { value, .. } => write!(f, "{value}"), + Expr::Idk { .. } => write!(f, "idk"), + Expr::BinOp { left, op: TokenKind::Assign, - right: Self::BinOp { left: lleft, op, right }, - } if DISPLAY_BUFFER.with(|cb| { - use std::fmt::Write; - let mut b = cb.take(); - write!(b, "{lleft}").unwrap(); + right: Expr::BinOp { left: lleft, op, right }, + } if { + let mut b = core::mem::take(&mut self.disp_buff); + self.fmt(lleft, &mut b)?; let len = b.len(); - write!(b, "{left}").unwrap(); + self.fmt(left, &mut b)?; let (lleft, left) = b.split_at(len); let res = lleft == left; b.clear(); - cb.set(b); + self.disp_buff = b; res - }) => + } => { - write!(f, "{left} {op}= {right}") + self.fmt(left, f)?; + write!(f, " {op}= ")?; + self.fmt(right, f) } - Self::BinOp { right, op, left } => { - let display_branch = |f: &mut fmt::Formatter, expr: &Self| { - if let Self::BinOp { op: lop, .. } = expr - && op.precedence() > lop.precedence() - { - write!(f, "({expr})") - } else { - write!(f, "{expr}") - } + Expr::BinOp { right, op, left } => { + let pec_miss = |e: &Expr| { + matches!( + e, Expr::BinOp { op: lop, .. } if op.precedence() > lop.precedence() + ) }; - let source: &str = unsafe { &*FMT_SOURCE.with(|s| s.get()) }; - display_branch(f, left)?; - if let Some(mut prev) = source.get(..right.pos() as usize) { + self.fmt_paren(left, f, pec_miss)?; + if let Some(mut prev) = self.source.get(..right.pos() as usize) { prev = prev.trim_end(); let estimate_bound = prev.rfind(|c: char| c.is_ascii_whitespace()).map_or(prev.len(), |i| i + 1); @@ -1170,7 +1215,7 @@ impl<'a> fmt::Display for Expr<'a> { prev = &prev[..exact_bound as usize + estimate_bound]; if preserve_newlines(prev) > 0 { writeln!(f)?; - for _ in 0..INDENT.with(|i| i.get()) + 1 { + for _ in 0..self.depth + 1 { write!(f, "\t")?; } write!(f, "{op} ")?; @@ -1180,7 +1225,7 @@ impl<'a> fmt::Display for Expr<'a> { } else { write!(f, " {op} ")?; } - display_branch(f, right) + self.fmt_paren(right, f, pec_miss) } } } @@ -1207,9 +1252,9 @@ pub struct AstInner { } impl AstInner<[Symbol]> { - fn layout(syms: usize) -> std::alloc::Layout { - std::alloc::Layout::new::>() - .extend(std::alloc::Layout::array::(syms).unwrap()) + fn layout(syms: usize) -> core::alloc::Layout { + core::alloc::Layout::new::>() + .extend(core::alloc::Layout::array::(syms).unwrap()) .unwrap() .0 } @@ -1225,10 +1270,10 @@ impl AstInner<[Symbol]> { let layout = Self::layout(syms.len()); unsafe { - let ptr = std::alloc::alloc(layout); - let inner: *mut Self = std::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); + let ptr = alloc::alloc::alloc(layout); + let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); - std::ptr::write(inner as *mut AstInner<()>, AstInner { + core::ptr::write(inner as *mut AstInner<()>, AstInner { ref_count: AtomicUsize::new(1), mem: arena.chunk.into_inner(), exprs, @@ -1236,7 +1281,7 @@ impl AstInner<[Symbol]> { file: content.into(), symbols: (), }); - std::ptr::addr_of_mut!((*inner).symbols) + core::ptr::addr_of_mut!((*inner).symbols) .as_mut_ptr() .copy_from_nonoverlapping(syms.as_ptr(), syms.len()); @@ -1249,12 +1294,6 @@ impl AstInner<[Symbol]> { } } -pub fn display_rel_path(path: &(impl AsRef + ?Sized)) -> std::path::Display { - static CWD: std::sync::LazyLock = - std::sync::LazyLock::new(|| std::env::current_dir().unwrap_or_default()); - std::path::Path::new(path).strip_prefix(&*CWD).unwrap_or(std::path::Path::new(path)).display() -} - pub fn report_to( file: &str, path: &str, @@ -1263,7 +1302,11 @@ pub fn report_to( out: &mut impl fmt::Write, ) { let (line, mut col) = lexer::line_col(file.as_bytes(), pos); - _ = writeln!(out, "{}:{}:{}: {}", display_rel_path(path), line, col, msg); + #[cfg(feature = "std")] + let disp = crate::fs::display_rel_path(path); + #[cfg(not(feature = "std"))] + let disp = path; + _ = writeln!(out, "{}:{}:{}: {}", disp, line, col, msg); let line = &file[file[..pos as usize].rfind('\n').map_or(0, |i| i + 1) ..file[pos as usize..].find('\n').unwrap_or(file.len()) + pos as usize]; @@ -1271,6 +1314,7 @@ pub fn report_to( _ = writeln!(out, "{}", line.replace("\t", " ")); _ = writeln!(out, "{}^", " ".repeat(col - 1)); + todo!() } #[derive(PartialEq, Eq, Hash)] @@ -1301,15 +1345,6 @@ impl Ast { } } -impl fmt::Display for Ast { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for expr in self.exprs() { - writeln!(f, "{expr}\n")?; - } - Ok(()) - } -} - impl Default for Ast { fn default() -> Self { Self(AstInner::new(String::new(), "", &no_loader)) @@ -1349,7 +1384,7 @@ unsafe impl Sync for Ast {} impl Clone for Ast { fn clone(&self) -> Self { - unsafe { self.0.as_ref() }.ref_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + unsafe { self.0.as_ref() }.ref_count.fetch_add(1, core::sync::atomic::Ordering::Relaxed); Self(self.0) } } @@ -1357,10 +1392,10 @@ impl Clone for Ast { impl Drop for Ast { fn drop(&mut self) { let inner = unsafe { self.0.as_ref() }; - if inner.ref_count.fetch_sub(1, std::sync::atomic::Ordering::Relaxed) == 1 { + if inner.ref_count.fetch_sub(1, core::sync::atomic::Ordering::Relaxed) == 1 { let layout = AstInner::layout(inner.symbols.len()); unsafe { - std::alloc::dealloc(self.0.as_ptr() as _, layout); + alloc::alloc::dealloc(self.0.as_ptr() as _, layout); } } } @@ -1377,19 +1412,19 @@ impl Deref for Ast { #[derive(Default)] pub struct Arena<'a> { chunk: UnsafeCell, - ph: std::marker::PhantomData<&'a ()>, + ph: core::marker::PhantomData<&'a ()>, } impl<'a> Arena<'a> { pub fn alloc_str(&self, token: &str) -> &'a str { let ptr = self.alloc_slice(token.as_bytes()); - unsafe { std::str::from_utf8_unchecked(ptr) } + unsafe { core::str::from_utf8_unchecked(ptr) } } pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> { - let align = std::mem::align_of::>(); + let align = core::mem::align_of::>(); let size = expr.used_bytes(); - let layout = unsafe { std::alloc::Layout::from_size_align_unchecked(size, align) }; + let layout = unsafe { core::alloc::Layout::from_size_align_unchecked(size, align) }; let ptr = self.alloc_low(layout); unsafe { ptr.cast::().copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8) @@ -1398,17 +1433,17 @@ impl<'a> Arena<'a> { } pub fn alloc_slice(&self, slice: &[T]) -> &'a [T] { - if slice.is_empty() || std::mem::size_of::() == 0 { + if slice.is_empty() || core::mem::size_of::() == 0 { return &mut []; } - let layout = std::alloc::Layout::array::(slice.len()).unwrap(); + let layout = core::alloc::Layout::array::(slice.len()).unwrap(); let ptr = self.alloc_low(layout); unsafe { ptr.as_ptr().cast::().copy_from_nonoverlapping(slice.as_ptr(), slice.len()) }; - unsafe { std::slice::from_raw_parts(ptr.as_ptr() as _, slice.len()) } + unsafe { core::slice::from_raw_parts(ptr.as_ptr() as _, slice.len()) } } - fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull { + fn alloc_low(&self, layout: core::alloc::Layout) -> NonNull { assert!(layout.align() <= ArenaChunk::ALIGN); assert!(layout.size() <= ArenaChunk::CHUNK_SIZE); @@ -1419,7 +1454,7 @@ impl<'a> Arena<'a> { } unsafe { - std::ptr::write(chunk, ArenaChunk::new(chunk.base)); + core::ptr::write(chunk, ArenaChunk::new(chunk.base)); } chunk.alloc(layout).unwrap() @@ -1433,33 +1468,33 @@ struct ArenaChunk { impl Default for ArenaChunk { fn default() -> Self { - Self { base: std::ptr::null_mut(), end: std::ptr::null_mut() } + Self { base: core::ptr::null_mut(), end: core::ptr::null_mut() } } } impl ArenaChunk { - const ALIGN: usize = std::mem::align_of::(); + const ALIGN: usize = core::mem::align_of::(); const CHUNK_SIZE: usize = 1 << 16; - const LAYOUT: std::alloc::Layout = - unsafe { std::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; - const NEXT_OFFSET: usize = Self::CHUNK_SIZE - std::mem::size_of::<*mut u8>(); + const LAYOUT: core::alloc::Layout = + unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; + const NEXT_OFFSET: usize = Self::CHUNK_SIZE - core::mem::size_of::<*mut u8>(); fn new(next: *mut u8) -> Self { - let base = unsafe { std::alloc::alloc(Self::LAYOUT) }; + let base = unsafe { alloc::alloc::alloc(Self::LAYOUT) }; let end = unsafe { base.add(Self::NEXT_OFFSET) }; Self::set_next(base, next); Self { base, end } } fn set_next(curr: *mut u8, next: *mut u8) { - unsafe { std::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) }; + unsafe { core::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) }; } fn next(curr: *mut u8) -> *mut u8 { - unsafe { std::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) } + unsafe { core::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) } } - fn alloc(&mut self, layout: std::alloc::Layout) -> Option> { + fn alloc(&mut self, layout: core::alloc::Layout) -> Option> { let padding = self.end as usize - (self.end as usize & !(layout.align() - 1)); let size = layout.size() + padding; if size > self.end as usize - self.base as usize { @@ -1486,7 +1521,7 @@ impl Drop for ArenaChunk { let mut current = self.base; while !current.is_null() { let next = Self::next(current); - unsafe { std::alloc::dealloc(current, Self::LAYOUT) }; + unsafe { alloc::alloc::dealloc(current, Self::LAYOUT) }; current = next; //log::dbg!("deallocating full chunk"); } @@ -1495,10 +1530,12 @@ impl Drop for ArenaChunk { #[cfg(test)] pub mod test { + use {alloc::borrow::ToOwned, std::string::String}; + pub fn format(ident: &str, input: &str) { let ast = super::Ast::new(ident, input.to_owned(), &|_, _| Ok(0)); - let mut output = Vec::new(); - crate::format_to(&ast, input, &mut output).unwrap(); + let mut output = String::new(); + crate::fs::format_to(&ast, input, &mut output).unwrap(); let input_path = format!("formatter_{ident}.expected"); let output_path = format!("formatter_{ident}.actual"); diff --git a/hblang/src/son.rs b/hblang/src/son.rs index dc4ddff..2b3ac23 100644 --- a/hblang/src/son.rs +++ b/hblang/src/son.rs @@ -12,19 +12,20 @@ use { task, ty::{self}, vc::{BitSet, Vc}, - Func, HashMap, Offset, Reloc, Sig, Size, SymKey, TypedReloc, Types, + Func, HashMap, IdentityHasher, Offset, OffsetIter, Reloc, Sig, SymKey, TypedReloc, Types, }, - core::{fmt, format_args as fa}, - regalloc2::VReg, - std::{ + alloc::{borrow::ToOwned, string::String, vec::Vec}, + core::{ assert_matches::debug_assert_matches, cell::RefCell, - collections::hash_map, convert::identity, - fmt::{Debug, Display, Write}, + fmt::{self, Debug, Display, Write}, + format_args as fa, hash::{Hash as _, Hasher}, mem, ops, }, + hashbrown::hash_map, + regalloc2::VReg, }; const VOID: Nid = 0; @@ -48,34 +49,13 @@ struct LookupEntry { hash: u64, } -#[derive(Default)] -struct IdentityHash(u64); - -impl std::hash::Hasher for IdentityHash { - fn finish(&self) -> u64 { - self.0 - } - - fn write(&mut self, _: &[u8]) { - unimplemented!() - } - - fn write_u64(&mut self, i: u64) { - self.0 = i; - } -} - -impl std::hash::Hash for LookupEntry { +impl core::hash::Hash for LookupEntry { fn hash(&self, state: &mut H) { state.write_u64(self.hash); } } -type Lookup = std::collections::hash_map::HashMap< - LookupEntry, - (), - std::hash::BuildHasherDefault, ->; +type Lookup = hashbrown::HashMap>; struct Nodes { values: Vec>, @@ -96,6 +76,16 @@ impl Default for Nodes { } impl Nodes { + fn trace_mem(&self, mut op: Nid) -> Nid { + loop { + op = match self[op].kind { + Kind::Stre { .. } => self[op].inputs[2], + Kind::Ptr { .. } | Kind::Load { .. } => self[op].inputs[1], + _ => break op, + }; + } + } + fn remove_low(&mut self, id: Nid) -> Node { let value = mem::replace(&mut self.values[id as usize], Err(self.free)).unwrap(); self.free = id; @@ -119,7 +109,7 @@ impl Nodes { let (raw_entry, hash) = Self::find_node(&mut self.lookup, &self.values, &node); let entry = match raw_entry { - hash_map::RawEntryMut::Occupied(mut o) => return o.get_key_value().0.nid, + hash_map::RawEntryMut::Occupied(o) => return o.get_key_value().0.nid, hash_map::RawEntryMut::Vacant(v) => v, }; @@ -149,7 +139,7 @@ impl Nodes { values: &[Result], node: &Node, ) -> ( - hash_map::RawEntryMut<'a, LookupEntry, (), std::hash::BuildHasherDefault>, + hash_map::RawEntryMut<'a, LookupEntry, (), core::hash::BuildHasherDefault>, u64, ) { let mut hasher = crate::FnvHasher::default(); @@ -258,7 +248,7 @@ impl Nodes { // this is more general the pushing constants to left to help deduplicate expressions more let mut changed = false; if op.is_comutative() && self[lhs].key() < self[rhs].key() { - std::mem::swap(&mut lhs, &mut rhs); + core::mem::swap(&mut lhs, &mut rhs); changed = true; } @@ -341,21 +331,22 @@ impl Nodes { return Some(self[target].inputs[1]); } } - K::Stre => { + K::Stre { offset } => { let parent = self[target].inputs[2]; - if self[parent].kind == K::Stre && self[parent].outputs.len() == 1 { + + if self[parent].kind == (K::Stre { offset }) && self[parent].outputs.len() == 1 { return Some(self.modify_input(parent, 1, self[target].inputs[1])); } } - K::Load => { + K::Load { offset } => { let parent = self[target].inputs[1]; - if self[parent].kind == K::Stre && self[parent].offset != u32::MAX { + if self[parent].kind == (K::Stre { offset }) && self[parent].offset != u32::MAX { debug_assert_eq!(self[target].ty, self[parent].ty, "TODO"); return Some(self[parent].inputs[1]); } - if self[parent].kind == K::Load && self[parent].offset != u32::MAX { + if self[parent].kind == (K::Load { offset }) && self[parent].offset != u32::MAX { return Some(parent); } } @@ -396,7 +387,7 @@ impl Nodes { self.values[target as usize].as_ref().unwrap(), ); match entry { - hash_map::RawEntryMut::Occupied(mut other) => { + hash_map::RawEntryMut::Occupied(other) => { let rpl = other.get_key_value().0.nid; self[target].inputs[inp_index] = prev; self.replace(target, rpl); @@ -424,8 +415,8 @@ impl Nodes { self.values.iter().enumerate().filter_map(|(i, s)| Some((i as _, s.as_ref().ok()?))) } - fn graphviz_low(&self, out: &mut String) -> std::fmt::Result { - use std::fmt::Write; + fn graphviz_low(&self, out: &mut String) -> core::fmt::Result { + use core::fmt::Write; for (i, node) in self.iter() { let color = if self.is_cfg(i) { "yellow" } else { "white" }; @@ -445,7 +436,7 @@ impl Nodes { } #[allow(clippy::format_in_format_args)] - fn basic_blocks_instr(&mut self, out: &mut String, node: Nid) -> std::fmt::Result { + fn basic_blocks_instr(&mut self, out: &mut String, node: Nid) -> core::fmt::Result { if self[node].kind != Kind::Loop && self[node].kind != Kind::Region { write!(out, " {node:>2}-c{:>2}: ", self[node].ralloc_backref)?; } @@ -468,8 +459,8 @@ impl Nodes { Kind::Then => write!(out, "ctrl: {:<5}", "then"), Kind::Else => write!(out, "ctrl: {:<5}", "else"), Kind::Stck => write!(out, "stck: "), - Kind::Load => write!(out, "load"), - Kind::Stre => write!(out, "stre"), + Kind::Load { offset } => write!(out, "load: {offset:<5}"), + Kind::Stre { offset } => write!(out, "stre: {offset:<5}"), _ => unreachable!(), }?; @@ -485,7 +476,7 @@ impl Nodes { Ok(()) } - fn basic_blocks_low(&mut self, out: &mut String, mut node: Nid) -> std::fmt::Result { + fn basic_blocks_low(&mut self, out: &mut String, mut node: Nid) -> core::fmt::Result { let iter = |nodes: &Nodes, node| nodes[node].outputs.clone().into_iter().rev(); while self.visited.set(node) { match self[node].kind { @@ -558,7 +549,7 @@ impl Nodes { let mut print_ret = true; for o in iter(self, node) { if self[o].inputs[0] == node - && (self[node].outputs[0] != o || std::mem::take(&mut print_ret)) + && (self[node].outputs[0] != o || core::mem::take(&mut print_ret)) { self.basic_blocks_instr(out, o)?; } @@ -761,10 +752,15 @@ impl ops::IndexMut for Nodes { pub enum Kind { #[default] Start, + // [ctrl] + Entry, + Mem, // [terms...] End, // [ctrl, cond] If, + Then, + Else, // [lhs, rhs] Region, // [entry, back] @@ -780,11 +776,6 @@ pub enum Kind { Arg { index: u32, }, - // [ctrl] - Entry, - Mem, - Then, - Else, // [ctrl, oper] UnOp { op: lexer::TokenKind, @@ -799,11 +790,18 @@ pub enum Kind { }, // [ctrl] Stck, - // [ctrl, memory] - Load, + Ptr { + offset: Offset, + }, // [ctrl, memory] - Stre, + Load { + offset: Offset, + }, + // [ctrl, value, memory] + Stre { + offset: Offset, + }, } impl Kind { @@ -834,7 +832,7 @@ impl Kind { } impl fmt::Display for Kind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { Kind::CInt { value } => write!(f, "#{value}"), Kind::Entry => write!(f, "ctrl[entry]"), @@ -915,7 +913,6 @@ struct ItemCtx { call_count: u16, filled: Vec, - stack_size: Size, loops: Vec, vars: Vec, memories: Vec, @@ -1002,16 +999,10 @@ impl Codegen { mut region: Nid, offset: Offset, kind: Kind, - ty: ty::Id, + mut ty: ty::Id, mut inps: Vc, ) -> Nid { - loop { - match self.ci.nodes[region].kind { - Kind::Arg { .. } | Kind::Stck => break, - Kind::Stre => region = self.ci.nodes[region].inputs[2], - k => unreachable!("{k:?}"), - } - } + region = self.ci.nodes.trace_mem(region); let size = self.tys.size_of(ty); let insert_start = self @@ -1036,15 +1027,19 @@ impl Codegen { inps.push(region); } + if matches!(kind, Kind::Ptr { .. }) { + ty = self.tys.make_ptr(ty); + } + let (new_op, peeped) = self.ci.nodes.new_node_low(ty, kind, inps); - if !peeped { + if !peeped && !matches!(kind, Kind::Ptr { .. }) { for mk in &self.ci.memories[insert_start..insert_end] { self.ci.nodes.unlock(mk.node); } self.ci.memories.splice( insert_start..insert_end, - std::iter::once(MemKey { node: new_op, region, offset }), + core::iter::once(MemKey { node: new_op, region, offset }), ); self.ci.nodes.lock(new_op); } @@ -1052,11 +1047,15 @@ impl Codegen { } fn store_mem(&mut self, region: Nid, offset: Offset, value: Nid) -> Nid { - self.mem_op(region, offset, Kind::Stre, self.tof(value), [VOID, value].into()) + self.mem_op(region, offset, Kind::Stre { offset }, self.tof(value), [VOID, value].into()) } fn load_mem(&mut self, region: Nid, offset: Offset, ty: ty::Id) -> Nid { - self.mem_op(region, offset, Kind::Load, ty, [VOID].into()) + self.mem_op(region, offset, Kind::Load { offset }, ty, [VOID].into()) + } + + fn ptr_mem(&mut self, region: Nid, offset: Offset, ty: ty::Id) -> Nid { + self.mem_op(region, offset, Kind::Ptr { offset }, ty, [VOID].into()) } pub fn generate(&mut self) { @@ -1078,8 +1077,8 @@ impl Codegen { } fn expr_ctx(&mut self, expr: &Expr, ctx: Ctx) -> Option { - let msg = "i know nothing about this name gal which is vired \ - because we parsed succesfully"; + let msg = "i know nothing about this name, gal, which is vired \ + because we parsed succesfully"; // ordered by complexity of the expression match *expr { Expr::Comment { .. } => Some(VOID), @@ -1098,7 +1097,7 @@ impl Codegen { Some(self.ci.vars[index].value) } Expr::Number { value, .. } => Some(self.ci.nodes.new_node( - ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::INT.into()), + ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT), Kind::CInt { value }, [VOID], )), @@ -1126,21 +1125,55 @@ impl Codegen { None } + Expr::Field { target, name, pos } => { + let vtarget = self.expr(target)?; + let tty = self.tof(vtarget); + + let ty::Kind::Struct(s) = self.tys.base_of(tty).unwrap_or(tty).expand() else { + self.report( + pos, + fa!( + "the '{}' is not a struct, or pointer to one, \ + but accessing fields is only possible on structs", + self.ty_display(tty) + ), + ); + return Some(NEVER); + }; + + let Some((ty, offset)) = OffsetIter::offset_of(&self.tys, s, name) else { + let field_list = self + .tys + .struct_fields(s) + .iter() + .map(|f| self.tys.field_names.ident_str(f.name)) + .intersperse("', '") + .collect::(); + self.report( + pos, + fa!( + "the '{}' does not have this field, \ + but it does have '{field_list}'", + self.ty_display(tty) + ), + ); + return Some(NEVER); + }; + + Some(self.load_mem(vtarget, ty, offset)) + } Expr::UnOp { op: TokenKind::Band, val, .. } => { let ctx = Ctx { ty: ctx.ty.and_then(|ty| self.tys.base_of(ty)) }; let mut val = self.expr_ctx(val, ctx)?; let ty = self.tof(val); if !matches!(self.ci.nodes[val].kind, Kind::Stck) { - let ptr = self.tys.make_ptr(ty); - let stck = self.ci.nodes.new_node_nop(ptr, Kind::Stck, [VOID, MEM]); - self.ci.nodes[stck].offset = self.ci.stack_size; - self.ci.stack_size += self.tys.size_of(ty); + let stck = self.ci.nodes.new_node_nop(ty, Kind::Stck, [VOID, MEM]); self.store_mem(stck, 0, val); val = stck; } - Some(val) + Some(self.ptr_mem(val, 0, ty)) } Expr::UnOp { op: TokenKind::Mul, val, pos } => { let ctx = Ctx { ty: ctx.ty.map(|ty| self.tys.make_ptr(ty)) }; @@ -1176,7 +1209,7 @@ impl Codegen { return Some(NEVER); }; - let prev = std::mem::replace(&mut var.value, value); + let prev = core::mem::replace(&mut var.value, value); self.ci.nodes.unlock_remove(prev); Some(VOID) } @@ -1192,7 +1225,7 @@ impl Codegen { pos, fa!("the '{}' can not be dereferneced", self.ty_display(self.tof(val))), ); - ty::NEVER.into() + ty::Id::NEVER }); let value = self.expr_ctx(right, Ctx::default().with_ty(base))?; _ = self.assert_ty(right.pos(), self.tof(value), base, true, "stored value"); @@ -1271,6 +1304,7 @@ impl Codegen { fa!("argument {}", carg.name), ); if ty.is_pointer() { + value = self.ci.nodes.trace_mem(value); value = self .ci .memories @@ -1308,7 +1342,49 @@ impl Codegen { return Some(NEVER); }; - todo!() + // TODO: dont allocate + let mut offs = OffsetIter::new(s, &self.tys) + .into_iter(&self.tys) + .map(|(f, o)| (f.ty, o)) + .collect::>(); + let mem = self.ci.nodes.new_node(sty, Kind::Stck, [VOID, MEM]); + for field in fields { + let Some(index) = self.tys.find_struct_field(s, field.name) else { + self.report( + field.pos, + fa!("struct '{}' does not have this field", self.ty_display(sty)), + ); + continue; + }; + + let (ty, offset) = + core::mem::replace(&mut offs[index], (ty::Id::UNDECLARED, field.pos)); + + if ty == ty::Id::UNDECLARED { + self.report(field.pos, "the struct field is already initialized"); + self.report(offset, "previous initialization is here"); + continue; + } + + let value = self.expr_ctx(&field.value, Ctx::default().with_ty(ty))?; + self.store_mem(mem, offset, value); + } + + let field_list = self + .tys + .struct_fields(s) + .iter() + .zip(offs) + .filter(|&(_, (ty, _))| ty != ty::Id::UNDECLARED) + .map(|(f, _)| self.tys.field_names.ident_str(f.name)) + .intersperse(", ") + .collect::(); + + if !field_list.is_empty() { + self.report(pos, fa!("the struct initializer is missing {field_list}")); + } + + Some(mem) } Expr::Block { stmts, .. } => { let base = self.ci.vars.len(); @@ -1320,7 +1396,7 @@ impl Codegen { _ = self.assert_ty( stmt.pos(), self.tof(id), - ty::VOID.into(), + ty::Id::VOID, true, "statement", ); @@ -1342,7 +1418,7 @@ impl Codegen { self.ci.loops.push(Loop { node: self.ci.ctrl, ctrl: [Nid::MAX; 2], - ctrl_scope: std::array::from_fn(|_| vec![]), + ctrl_scope: core::array::from_fn(|_| vec![]), scope: self.ci.vars.clone(), }); @@ -1386,7 +1462,7 @@ impl Codegen { self.ci.nodes.lock(self.ci.ctrl); - std::mem::swap(&mut self.ci.vars, &mut bres); + core::mem::swap(&mut self.ci.vars, &mut bres); for ((dest_var, mut scope_var), loop_var) in self.ci.vars.iter_mut().zip(scope).zip(bres) @@ -1466,7 +1542,7 @@ impl Codegen { self.ci.ctrl = self.ci.nodes.new_node(ty::VOID, Kind::Then, [if_node]); let lcntrl = self.expr(then).map_or(Nid::MAX, |_| self.ci.ctrl); - let mut then_scope = std::mem::replace(&mut self.ci.vars, else_scope); + let mut then_scope = core::mem::replace(&mut self.ci.vars, else_scope); self.ci.ctrl = self.ci.nodes.new_node(ty::VOID, Kind::Else, [if_node]); let rcntrl = if let Some(else_) = else_ { self.expr(else_).map_or(Nid::MAX, |_| self.ci.ctrl) @@ -1495,7 +1571,7 @@ impl Codegen { self.ci.ctrl = self.ci.nodes.new_node(ty::VOID, Kind::Region, [lcntrl, rcntrl]); - else_scope = std::mem::take(&mut self.ci.vars); + else_scope = core::mem::take(&mut self.ci.vars); Self::merge_scopes( &mut self.ci.nodes, @@ -1531,7 +1607,7 @@ impl Codegen { } } else { let reg = self.ci.nodes.new_node(ty::VOID, Kind::Region, [self.ci.ctrl, loob.ctrl[id]]); - let mut scope = std::mem::take(&mut loob.ctrl_scope[id]); + let mut scope = core::mem::take(&mut loob.ctrl_scope[id]); Self::merge_scopes( &mut self.ci.nodes, @@ -1608,16 +1684,18 @@ impl Codegen { ret: Some(sig.ret), ..self.pool.cis.pop().unwrap_or_default() }; - let prev_ci = std::mem::replace(&mut self.ci, repl); + let prev_ci = core::mem::replace(&mut self.ci, repl); let start = self.ci.nodes.new_node(ty::VOID, Kind::Start, []); debug_assert_eq!(start, VOID); let end = self.ci.nodes.new_node(ty::NEVER, Kind::End, []); debug_assert_eq!(end, NEVER); + self.ci.nodes.lock(end); self.ci.ctrl = self.ci.nodes.new_node(ty::VOID, Kind::Entry, [VOID]); debug_assert_eq!(self.ci.ctrl, ENTRY); let mem = self.ci.nodes.new_node(ty::VOID, Kind::Mem, [VOID]); debug_assert_eq!(mem, MEM); + self.ci.nodes.lock(mem); let Expr::BinOp { left: Expr::Ident { .. }, @@ -1625,7 +1703,7 @@ impl Codegen { right: &Expr::Closure { body, args, .. }, } = expr else { - unreachable!("{expr}") + unreachable!("{}", self.ast_display(expr)) }; let mut sig_args = sig.args.range(); @@ -1644,14 +1722,32 @@ impl Codegen { self.report(body.pos(), "expected all paths in the fucntion to return"); } + self.ci.nodes.unlock(end); + for mem in self.ci.memories.drain(..) { + if self.ci.nodes[mem.region].kind == Kind::Stck + && self.ci.nodes[mem.node] + .outputs + .iter() + .all(|&n| self.ci.nodes[n].kind == Kind::Return) + { + let outs = core::mem::take(&mut self.ci.nodes[mem.node].outputs); + for out in outs { + let index = + self.ci.nodes[out].inputs.iter().rposition(|&o| o == mem.node).unwrap(); + self.ci.nodes[out].inputs.swap_remove(index); + } + } self.ci.nodes.unlock_remove(mem.node); } + self.ci.nodes.unlock(mem); + for var in self.ci.vars.drain(..) { self.ci.nodes.unlock(var.value); } if self.errors.borrow().is_empty() { + self.ci.nodes.graphviz(); self.gcm(); #[cfg(debug_assertions)] @@ -1665,7 +1761,20 @@ impl Codegen { } //self.ci.nodes.basic_blocks(); - //self.ci.nodes.graphviz(); + self.ci.nodes.graphviz(); + + let mut stack_size = 0; + '_compute_stack: { + let mems = core::mem::take(&mut self.ci.nodes[MEM].outputs); + for &stck in mems.iter() { + stack_size += self.tys.size_of(self.ci.nodes[stck].ty); + self.ci.nodes[stck].offset = stack_size; + } + for &stck in mems.iter() { + self.ci.nodes[stck].offset = stack_size - self.ci.nodes[stck].offset; + } + self.ci.nodes[mem].outputs = mems; + } self.ci.vars = orig_vars; self.ci.nodes.visited.clear(self.ci.nodes.values.len()); @@ -1693,8 +1802,8 @@ impl Codegen { let mut stripped_prelude_size = 0; '_close_function: { let pushed = - (saved as i64 + (std::mem::take(&mut self.ci.call_count) != 0) as i64) * 8; - let stack = std::mem::take(&mut self.ci.stack_size) as i64; + (saved as i64 + (core::mem::take(&mut self.ci.call_count) != 0) as i64) * 8; + let stack = stack_size as i64; match (pushed, stack) { (0, 0) => { @@ -1729,15 +1838,15 @@ impl Codegen { self.tys.funcs[id as usize].relocs.append(&mut self.ci.relocs); self.ci.nodes.clear(); self.ci.filled.clear(); - self.pool.cis.push(std::mem::replace(&mut self.ci, prev_ci)); + self.pool.cis.push(core::mem::replace(&mut self.ci, prev_ci)); } fn emit_body(&mut self, sig: Sig) -> usize { - let mut nodes = std::mem::take(&mut self.ci.nodes); + let mut nodes = core::mem::take(&mut self.ci.nodes); let func = Function::new(&mut nodes, &self.tys, sig); if self.ci.call_count != 0 { - std::mem::swap( + core::mem::swap( &mut self.ralloc.env.preferred_regs_by_class, &mut self.ralloc.env.non_preferred_regs_by_class, ); @@ -1752,7 +1861,7 @@ impl Codegen { .unwrap_or_else(|err| panic!("{err}")); if self.ci.call_count != 0 { - std::mem::swap( + core::mem::swap( &mut self.ralloc.env.preferred_regs_by_class, &mut self.ralloc.env.non_preferred_regs_by_class, ); @@ -1850,58 +1959,37 @@ impl Codegen { self.ci.emit(instrs::jal(reg::RET_ADDR, reg::ZERO, 0)); } Kind::Stck => { - let size = self.tys.size_of(self.tys.base_of(node.ty).unwrap()); let base = reg::STACK_PTR; - let offset = self.ci.stack_size - func.nodes[nid].offset - size; + let offset = func.nodes[nid].offset; self.ci.emit(instrs::addi64(atr(allocs[0]), base, offset as _)); } - Kind::Load => { - let mut region = node.inputs[1]; - let offset = 0; + Kind::Ptr { offset } => { + let region = func.nodes.trace_mem(node.inputs[1]); + let base = reg::STACK_PTR; + let offset = func.nodes[region].offset + offset; + self.ci.emit(instrs::addi64(atr(allocs[0]), base, offset as _)); + } + Kind::Load { offset } => { + let region = func.nodes.trace_mem(node.inputs[1]); let size = self.tys.size_of(node.ty); debug_assert_eq!(size, 8, "TODO"); - let (base, offset) = loop { - match func.nodes[region].kind { - Kind::Stck => { - break ( - reg::STACK_PTR, - self.ci.stack_size - func.nodes[region].offset + offset - - size, - ) - } - Kind::Stre => region = func.nodes[region].inputs[2], - Kind::Load => region = func.nodes[region].inputs[1], - k => unreachable!("{k:?}"), - }; + let (base, offset) = match func.nodes[region].kind { + Kind::Stck => (reg::STACK_PTR, func.nodes[region].offset + offset), + k => unreachable!("{k:?}"), }; - let &[dst] = allocs else { unreachable!() }; self.ci.emit(instrs::ld(atr(dst), base, offset as _, size as _)); } - Kind::Stre => { - let mut region = node.inputs[2]; - let offset = 0; + Kind::Stre { offset } => { + let region = func.nodes.trace_mem(node.inputs[2]); let size = self.tys.size_of(node.ty); debug_assert_eq!(size, 8, "TODO"); - let (base, offset, src) = loop { - match func.nodes[region].kind { - Kind::Stck => { - break ( - reg::STACK_PTR, - self.ci.stack_size - func.nodes[region].offset + offset - - size, - allocs[0], - ); - } - Kind::Arg { .. } => { - break (atr(allocs[0]), 0, allocs[1]); - } - Kind::Stre => region = func.nodes[region].inputs[2], - Kind::Load => region = func.nodes[region].inputs[1], - k => unreachable!("{k:?}"), - }; + let nd = &func.nodes[region]; + let (base, offset, src) = match nd.kind { + Kind::Stck => (reg::STACK_PTR, nd.offset + offset, allocs[0]), + Kind::Arg { .. } => (atr(allocs[0]), offset, allocs[1]), + k => unreachable!("{k:?}"), }; - self.ci.emit(instrs::st(atr(src), base, offset as _, size as _)); } _ => unreachable!(), @@ -1920,7 +2008,7 @@ impl Codegen { } self.report_unhandled_ast(expr, "type"); - ty::NEVER.into() + ty::Id::NEVER } fn find_or_declare( @@ -1965,7 +2053,7 @@ impl Codegen { return existing.expand(); } - let prev_file = std::mem::replace(&mut self.ci.file, file); + let prev_file = core::mem::replace(&mut self.ci.file, file); let sym = match expr { Expr::BinOp { left: Expr::Ident { .. }, @@ -2022,6 +2110,12 @@ impl Codegen { ty::Display::new(&self.tys, &self.files, ty) } + fn ast_display(&self, ast: &Expr) -> String { + let mut s = String::new(); + parser::Formatter::new(&self.cfile().file).fmt(ast, &mut s).unwrap(); + s + } + #[must_use] #[track_caller] fn assert_ty( @@ -2040,28 +2134,33 @@ impl Codegen { let ty = self.ty_display(ty); let expected = self.ty_display(expected); self.report(pos, fa!("expected {hint} to be of type {expected}, got {ty}")); - ty::NEVER.into() + ty::Id::NEVER } } - fn report_log(&self, pos: Pos, msg: impl std::fmt::Display) { + fn report_log(&self, pos: Pos, msg: impl core::fmt::Display) { let mut buf = self.errors.borrow_mut(); self.report_log_to(pos, msg, &mut *buf); } - fn report_log_to(&self, pos: Pos, msg: impl std::fmt::Display, out: &mut impl std::fmt::Write) { + fn report_log_to( + &self, + pos: Pos, + msg: impl core::fmt::Display, + out: &mut impl core::fmt::Write, + ) { self.cfile().report_to(pos, msg, out); } #[track_caller] - fn assert_report(&self, cond: bool, pos: Pos, msg: impl std::fmt::Display) { + fn assert_report(&self, cond: bool, pos: Pos, msg: impl core::fmt::Display) { if !cond { self.report(pos, msg); } } #[track_caller] - fn report(&self, pos: Pos, msg: impl std::fmt::Display) { + fn report(&self, pos: Pos, msg: impl core::fmt::Display) { self.report_log(pos, msg); } @@ -2069,10 +2168,13 @@ impl Codegen { fn report_unhandled_ast(&self, ast: &Expr, hint: &str) { self.report( ast.pos(), - fa!("compiler does not (yet) know how to handle ({hint}):\n\ - {ast:}\n\ + fa!( + "compiler does not (yet) know how to handle ({hint}):\n\ + {:}\n\ info for weak people:\n\ - {ast:#?}"), + {ast:#?}", + self.ast_display(ast) + ), ); } @@ -2095,8 +2197,7 @@ impl Codegen { fn fatal_report(&self, pos: Pos, msg: impl Display) -> ! { self.report(pos, msg); - eprintln!("{}", self.errors.borrow()); - std::process::exit(1); + panic!("{}", self.errors.borrow()); } fn gcm(&mut self) { @@ -2109,10 +2210,10 @@ impl Codegen { fn get_load_type(&self, val: Nid) -> Option { Some(match self.ci.nodes[val].kind { - Kind::Stre | Kind::Load => self.ci.nodes[val].ty, - Kind::Stck | Kind::Arg { .. } => { - self.tys.base_of(self.ci.nodes[val].ty).expect("stack has pointer type, laways") + Kind::Stre { .. } | Kind::Load { .. } | Kind::Stck | Kind::Arg { .. } => { + self.ci.nodes[val].ty } + Kind::Ptr { .. } => self.tys.base_of(self.ci.nodes[val].ty).unwrap(), _ => return None, }) } @@ -2145,7 +2246,7 @@ struct Function<'a> { } impl Debug for Function<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { for (i, block) in self.blocks.iter().enumerate() { writeln!(f, "sb{i}{:?}-{:?}:", block.params, block.preds)?; @@ -2261,7 +2362,7 @@ impl<'a> Function<'a> { && let Some((_, swapped)) = op.cond_op(node.ty.is_signed()) { if swapped { - std::mem::swap(&mut then, &mut else_); + core::mem::swap(&mut then, &mut else_); } let &[_, lhs, rhs] = self.nodes[cond].inputs.as_slice() else { unreachable!() }; let ops = vec![self.urg(lhs), self.urg(rhs)]; @@ -2389,8 +2490,8 @@ impl<'a> Function<'a> { let ty = self.tys.args[ti]; loop { match self.nodes[i].kind { - Kind::Stre => i = self.nodes[i].inputs[2], - Kind::Load => i = self.nodes[i].inputs[1], + Kind::Stre { .. } => i = self.nodes[i].inputs[2], + Kind::Load { .. } => i = self.nodes[i].inputs[1], _ => break, } } @@ -2419,31 +2520,29 @@ impl<'a> Function<'a> { let ops = vec![self.drg(nid)]; self.add_instr(nid, ops); } - Kind::Load => { - let mut region = node.inputs[1]; - let ops = loop { - match self.nodes[region].kind { - Kind::Stck => break vec![self.drg(nid)], - Kind::Stre => region = self.nodes[region].inputs[2], - Kind::Load => region = self.nodes[region].inputs[1], - k => unreachable!("{k:?}"), - }; + Kind::Ptr { .. } => { + let region = self.nodes.trace_mem(node.inputs[1]); + let ops = match self.nodes[region].kind { + Kind::Stck => vec![self.drg(nid)], + k => unreachable!("{k:?}"), }; - self.add_instr(nid, ops); } - Kind::Stre => { - let mut region = node.inputs[2]; - let ops = loop { - match self.nodes[region].kind { - Kind::Stck => break vec![self.urg(node.inputs[1])], - Kind::Arg { .. } => break vec![self.urg(region), self.urg(node.inputs[1])], - Kind::Stre => region = self.nodes[region].inputs[2], - Kind::Load => region = self.nodes[region].inputs[1], - k => unreachable!("{k:?}"), - }; + Kind::Load { .. } => { + let region = self.nodes.trace_mem(node.inputs[1]); + let ops = match self.nodes[region].kind { + Kind::Stck => vec![self.drg(nid)], + k => unreachable!("{k:?}"), + }; + self.add_instr(nid, ops); + } + Kind::Stre { .. } => { + let region = self.nodes.trace_mem(node.inputs[2]); + let ops = match self.nodes[region].kind { + Kind::Stck => vec![self.urg(node.inputs[1])], + Kind::Arg { .. } => vec![self.urg(region), self.urg(node.inputs[1])], + k => unreachable!("{k:?}"), }; - self.add_instr(nid, ops); } } @@ -2759,7 +2858,10 @@ fn common_dom(mut a: Nid, mut b: Nid, nodes: &mut Nodes) -> Nid { #[cfg(test)] mod tests { - use std::fmt::Write; + use { + alloc::{string::String, vec::Vec}, + core::fmt::Write, + }; fn generate(ident: &'static str, input: &'static str, output: &mut String) { _ = env_logger::builder().is_test(true).try_init(); @@ -2780,9 +2882,7 @@ mod tests { let mut out = Vec::new(); codegen.tys.assemble(&mut out); - let mut buf = Vec::::new(); - let err = codegen.tys.disasm(&out, &codegen.files, &mut buf, |_| {}); - output.push_str(String::from_utf8(buf).unwrap().as_str()); + let err = codegen.tys.disasm(&out, &codegen.files, output, |_| {}); if let Err(e) = err { writeln!(output, "!!! asm is invalid: {e}").unwrap(); return; diff --git a/hblang/src/vc.rs b/hblang/src/vc.rs index 13c0ad4..94892ca 100644 --- a/hblang/src/vc.rs +++ b/hblang/src/vc.rs @@ -1,8 +1,11 @@ -use std::{ - fmt::Debug, - mem::MaybeUninit, - ops::{Deref, DerefMut, Not}, - ptr::Unique, +use { + alloc::vec::Vec, + core::{ + fmt::Debug, + mem::MaybeUninit, + ops::{Deref, DerefMut, Not}, + ptr::Unique, + }, }; type Nid = u16; @@ -22,7 +25,7 @@ impl Default for Vc { } impl Debug for Vc { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.as_slice().fmt(f) } } @@ -32,15 +35,15 @@ impl Vc { unsafe { self.inline.cap <= INLINE_ELEMS as Nid } } - fn layout(&self) -> Option { + fn layout(&self) -> Option { unsafe { - self.is_inline() - .not() - .then(|| std::alloc::Layout::array::(self.alloced.cap as _).unwrap_unchecked()) + self.is_inline().not().then(|| { + core::alloc::Layout::array::(self.alloced.cap as _).unwrap_unchecked() + }) } } - fn len(&self) -> usize { + pub fn len(&self) -> usize { unsafe { if self.is_inline() { self.inline.cap as _ @@ -79,11 +82,11 @@ impl Vc { } pub fn as_slice(&self) -> &[Nid] { - unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) } + unsafe { core::slice::from_raw_parts(self.as_ptr(), self.len()) } } fn as_slice_mut(&mut self) -> &mut [Nid] { - unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len()) } + unsafe { core::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len()) } } pub fn push(&mut self, value: Nid) { @@ -93,10 +96,10 @@ impl Vc { unsafe { self.alloced.cap *= 2; self.alloced.base = Unique::new_unchecked( - std::alloc::realloc( + alloc::alloc::realloc( self.alloced.base.as_ptr().cast(), layout, - self.alloced.cap as usize * std::mem::size_of::(), + self.alloced.cap as usize * core::mem::size_of::(), ) .cast(), ); @@ -105,7 +108,7 @@ impl Vc { unsafe { let mut allcd = Self::alloc((self.inline.cap + 1).next_power_of_two() as _, self.len()); - std::ptr::copy_nonoverlapping(self.as_ptr(), allcd.as_mut_ptr(), self.len()); + core::ptr::copy_nonoverlapping(self.as_ptr(), allcd.as_mut_ptr(), self.len()); *self = allcd; } } @@ -118,8 +121,8 @@ impl Vc { unsafe fn alloc(cap: usize, len: usize) -> Self { debug_assert!(cap > INLINE_ELEMS); - let layout = unsafe { std::alloc::Layout::array::(cap).unwrap_unchecked() }; - let alloc = unsafe { std::alloc::alloc(layout) }; + let layout = unsafe { core::alloc::Layout::array::(cap).unwrap_unchecked() }; + let alloc = unsafe { alloc::alloc::alloc(layout) }; unsafe { Vc { alloced: AllocedVc { @@ -147,7 +150,7 @@ impl Drop for Vc { fn drop(&mut self) { if let Some(layout) = self.layout() { unsafe { - std::alloc::dealloc(self.alloced.base.as_ptr().cast(), layout); + alloc::alloc::dealloc(self.alloced.base.as_ptr().cast(), layout); } } } @@ -182,7 +185,7 @@ impl Iterator for VcIntoIter { return None; } - let ret = unsafe { std::ptr::read(self.vc.as_slice().get_unchecked(self.start)) }; + let ret = unsafe { core::ptr::read(self.vc.as_slice().get_unchecked(self.start)) }; self.start += 1; Some(ret) } @@ -200,7 +203,7 @@ impl DoubleEndedIterator for VcIntoIter { } self.end -= 1; - Some(unsafe { std::ptr::read(self.vc.as_slice().get_unchecked(self.end)) }) + Some(unsafe { core::ptr::read(self.vc.as_slice().get_unchecked(self.end)) }) } } @@ -217,14 +220,14 @@ impl<'a> From<&'a [Nid]> for Vc { if value.len() <= INLINE_ELEMS { let mut dflt = Self::default(); unsafe { - std::ptr::copy_nonoverlapping(value.as_ptr(), dflt.as_mut_ptr(), value.len()) + core::ptr::copy_nonoverlapping(value.as_ptr(), dflt.as_mut_ptr(), value.len()) }; dflt.inline.cap = value.len() as _; dflt } else { let mut allcd = unsafe { Self::alloc(value.len(), value.len()) }; unsafe { - std::ptr::copy_nonoverlapping(value.as_ptr(), allcd.as_mut_ptr(), value.len()) + core::ptr::copy_nonoverlapping(value.as_ptr(), allcd.as_mut_ptr(), value.len()) }; allcd } @@ -266,7 +269,7 @@ pub struct BitSet { } impl BitSet { - const ELEM_SIZE: usize = std::mem::size_of::() * 8; + const ELEM_SIZE: usize = core::mem::size_of::() * 8; pub fn clear(&mut self, bit_size: usize) { let new_len = (bit_size + Self::ELEM_SIZE - 1) / Self::ELEM_SIZE;