From 894f73ca35199f524dff6160c57c0916169fbaf6 Mon Sep 17 00:00:00 2001 From: mlokr Date: Wed, 4 Sep 2024 16:54:34 +0200 Subject: [PATCH] adding more type checking --- hblang/README.md | 14 +- hblang/build.rs | 4 +- hblang/src/codegen.rs | 49 +- hblang/src/lib.rs | 6 +- hblang/src/parser.rs | 10 + hblang/src/son.rs | 1062 ++++++++++------- .../son_tests_const_folding_with_arg.txt | 5 + 7 files changed, 702 insertions(+), 448 deletions(-) diff --git a/hblang/README.md b/hblang/README.md index 8a279d3..dc85d6b 100644 --- a/hblang/README.md +++ b/hblang/README.md @@ -695,7 +695,7 @@ example := fn(): void { loop { random_x := @inline(random.integer, 0, 1024) random_y := random.integer(0, 768) - a := @inline(screenidx, random_x, random_y) + a := @inline(screenidx, random_x) break } return @@ -725,3 +725,15 @@ integer := fn(min: int, max: int): int { return rng } ``` + +#### some_generic_code +```hb +some_func := fn($Elem: type): void { + return +} + +main := fn(): void { + some_func(0) + return +} +``` diff --git a/hblang/build.rs b/hblang/build.rs index 468e3d9..1598082 100644 --- a/hblang/build.rs +++ b/hblang/build.rs @@ -110,7 +110,7 @@ fn gen_instrs() -> Result<(), Box> { generated.pop(); writeln!(generated, " => {{")?; if iter_args(ty).count() != 0 { - writeln!(generated, " let data = unsafe {{ std::ptr::read(bytes.take(..std::mem::size_of::<{ty}>())?.as_ptr() as *const {ty}) }};")?; + writeln!(generated, " let data = crate::decode::<{ty}>(bytes)?;")?; writeln!( generated, " buf.extend([{}]);", @@ -119,7 +119,7 @@ fn gen_instrs() -> Result<(), Box> { ) )?; } else { - writeln!(generated, " bytes.take(..std::mem::size_of::<{ty}>())?;")?; + writeln!(generated, " crate::decode::<{ty}>(bytes)?;")?; } writeln!(generated, " }}")?; diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index ca2d330..34e6d47 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -8,7 +8,7 @@ use { parser::{self, find_symbol, idfl, CtorField, Expr, ExprRef, FileId, Pos}, HashMap, }, - std::{ops::Range, rc::Rc, u32}, + std::{fmt::Display, ops::Range, rc::Rc}, }; type Offset = u32; @@ -1438,7 +1438,7 @@ impl Codegen { let mut base_val = self.expr(base)?; base_val.loc = self.make_loc_owned(base_val.loc, base_val.ty); let index_val = self.expr(index)?; - _ = self.assert_ty(index.pos(), index_val.ty, ty::INT.into()); + _ = self.assert_ty(index.pos(), index_val.ty, ty::INT.into(), "subsctipt"); if let ty::Kind::Ptr(ty) = base_val.ty.expand() { base_val.ty = self.tys.ptrs[ty as usize].base; @@ -1501,6 +1501,8 @@ impl Codegen { let scope = self.ci.vars.len(); let sig = self.compute_signature(&mut func, func_ast.pos(), args)?; + self.assert_arg_count(expr.pos(), args.len(), cargs.len(), "inline function call"); + if scope == self.ci.vars.len() { for ((arg, ty), carg) in args.iter().zip(sig.args.view(&self.tys.args).to_owned()).zip(cargs) @@ -1881,7 +1883,10 @@ impl Codegen { let mut values = Vec::with_capacity(args.len()); let mut sig_args = sig.args.range(); let mut should_momize = !args.is_empty() && sig.ret == ty::Id::from(ty::TYPE); - for (arg, carg) in args.iter().zip(cargs) { + + self.assert_arg_count(expr.pos(), args.len(), cargs.len(), "function call"); + + for (i, (arg, carg)) in args.iter().zip(cargs).enumerate() { let ty = self.tys.args[sig_args.next().unwrap()]; let sym = parser::find_symbol(&ast.symbols, carg.id); if sym.flags & idfl::COMPTIME != 0 { @@ -1891,6 +1896,7 @@ impl Codegen { // TODO: pass the arg as dest let varg = self.expr_ctx(arg, Ctx::default().with_ty(ty))?; + _ = self.assert_ty(arg.pos(), varg.ty, ty, format_args!("argument({i})")); self.pass_arg(&varg, &mut parama); values.push(varg.loc); should_momize = false; @@ -1966,7 +1972,7 @@ impl Codegen { match self.ci.ret { None => self.ci.ret = Some(ty), - Some(ret) => _ = self.assert_ty(pos, ty, ret), + Some(ret) => _ = self.assert_ty(pos, ty, ret, "return type"), } self.ci.ret_relocs.push(Reloc::new(self.local_offset(), 1, 4)); @@ -1979,8 +1985,21 @@ impl Codegen { } Some(Value::void()) } - E::Number { value, .. } => Some(Value { - ty: ctx.ty.map(ty::Id::strip_pointer).unwrap_or(ty::INT.into()), + E::Number { value, pos, .. } => Some(Value { + ty: { + let ty = ctx.ty.map(ty::Id::strip_pointer).unwrap_or(ty::INT.into()); + if !ty.is_integer() && !ty.is_pointer() { + self.report( + pos, + format_args!( + "this integer was inferred to be '{}' \ + which does not make sense", + self.ty_display(ty) + ), + ); + } + ty + }, loc: Loc::ct(value as u64), }), E::If { cond, then, else_, .. } => { @@ -2090,7 +2109,7 @@ impl Codegen { if let ty::Kind::Struct(_) = left.ty.expand() { let right = self.expr_ctx(right, Ctx::default().with_ty(left.ty))?; - _ = self.assert_ty(expr.pos(), left.ty, right.ty); + _ = self.assert_ty(expr.pos(), right.ty, left.ty, "right struct operand"); return self.struct_op(op, left.ty, ctx, left.loc, right.loc); } @@ -2100,7 +2119,7 @@ impl Codegen { let right = self.expr_ctx(right, Ctx::default().with_ty(left.ty))?; let rsize = self.tys.size_of(right.ty); - let ty = self.assert_ty(expr.pos(), left.ty, right.ty); + let ty = self.assert_ty(expr.pos(), right.ty, left.ty, "right sclalar operand"); let size = self.tys.size_of(ty); let signed = ty.is_signed(); @@ -2181,7 +2200,7 @@ impl Codegen { }?; if let Some(ty) = ctx.ty { - _ = self.assert_ty(expr.pos(), value.ty, ty); + _ = self.assert_ty(expr.pos(), value.ty, ty, "a thing"); } Some(match ctx.loc { @@ -3300,13 +3319,20 @@ impl Codegen { #[must_use] #[track_caller] - fn assert_ty(&self, pos: Pos, ty: ty::Id, expected: ty::Id) -> ty::Id { + fn assert_ty(&self, pos: Pos, ty: ty::Id, expected: ty::Id, hint: impl Display) -> ty::Id { if let Some(res) = ty.try_upcast(expected) { res } else { let ty = self.ty_display(ty); let expected = self.ty_display(expected); - self.report(pos, format_args!("expected {expected}, got {ty}")); + self.report(pos, format_args!("expected {hint} of type {expected}, got {ty}")); + } + } + + fn assert_arg_count(&self, pos: Pos, got: usize, expected: usize, hint: impl Display) { + if got != expected { + let s = if expected != 1 { "s" } else { "" }; + self.report(pos, format_args!("{hint} expected {expected} argument{s}, got {got}")) } } @@ -3523,5 +3549,6 @@ mod tests { comptime_function_from_another_file => README; inline => README; inline_test => README; + some_generic_code => README; } } diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index edf4f9b..8cef29b 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -21,7 +21,6 @@ use { parser::Ast, std::{ collections::{hash_map, VecDeque}, - default, io::{self, Read}, path::{Path, PathBuf}, sync::Mutex, @@ -131,6 +130,11 @@ unsafe fn encode(instr: T) -> (usize, [u8; instrs::MAX_SIZE]) { (std::mem::size_of::(), buf) } +#[inline] +fn decode(binary: &mut &[u8]) -> Option { + unsafe { Some(std::ptr::read(binary.take(..std::mem::size_of::())?.as_ptr() as *const T)) } +} + #[cfg(test)] #[derive(Clone, Copy)] enum DisasmItem { diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 04995f4..69c8012 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -1220,6 +1220,16 @@ impl ExprRef { // allocations Some(unsafe { { self.0 }.as_ref() }) } + + pub fn dangling() -> Self { + Self(NonNull::dangling()) + } +} + +impl Default for ExprRef { + fn default() -> Self { + Self::dangling() + } } unsafe impl Send for Ast {} diff --git a/hblang/src/son.rs b/hblang/src/son.rs index 601e1a5..60b40ce 100644 --- a/hblang/src/son.rs +++ b/hblang/src/son.rs @@ -2,9 +2,14 @@ use { crate::{ ident::{self, Ident}, + instrs, lexer::{self, TokenKind}, log, - parser::{self, idfl, Expr, ExprRef, FileId, Pos}, + parser::{ + self, + idfl::{self, index}, + Expr, ExprRef, FileId, Pos, + }, HashMap, }, core::fmt, @@ -12,425 +17,86 @@ use { mem, ops::{self, Range}, rc::Rc, + usize, }, }; type Nid = u32; const NILL: u32 = u32::MAX; -pub struct Nodes { - values: Vec, - free: u32, - lookup: HashMap<(Kind, [Nid; MAX_INPUTS]), Nid>, -} +mod reg { + pub const STACK_PTR: Reg = 254; + pub const ZERO: Reg = 0; + pub const RET: Reg = 1; + pub const RET_ADDR: Reg = 31; -impl Default for Nodes { - fn default() -> Self { - Self { values: Default::default(), free: u32::MAX, lookup: Default::default() } - } -} + type Reg = u8; -impl Nodes { - pub fn add(&mut self, value: Node) -> u32 { - if self.free == u32::MAX { - self.free = self.values.len() as _; - self.values.push(PoolSlot::Next(u32::MAX)); + #[derive(Default, Debug, PartialEq, Eq)] + pub struct Id(Reg, bool); + + impl Id { + pub const RET: Self = Id(RET, false); + + pub fn get(&self) -> Reg { + self.0 } - let free = self.free; - self.free = match mem::replace(&mut self.values[free as usize], PoolSlot::Value(value)) { - PoolSlot::Value(_) => unreachable!(), - PoolSlot::Next(free) => free, - }; - free - } - - pub fn remove_low(&mut self, id: u32) -> Node { - let value = match mem::replace(&mut self.values[id as usize], PoolSlot::Next(self.free)) { - PoolSlot::Value(value) => value, - PoolSlot::Next(_) => unreachable!(), - }; - self.free = id; - value - } - - pub fn clear(&mut self) { - self.values.clear(); - self.free = u32::MAX; - } - - fn new_node( - &mut self, - ty: impl Into, - kind: Kind, - inps: [Nid; SIZE], - ) -> Nid { - let mut inputs = [NILL; MAX_INPUTS]; - inputs[..inps.len()].copy_from_slice(&inps); - - if let Some(&id) = self.lookup.get(&(kind, inputs)) { - debug_assert_eq!(self[id].kind, kind); - debug_assert_eq!(self[id].inputs, inputs); - return id; + pub fn as_ref(&self) -> Self { + Self(self.0, false) } - let id = self.add(Node { - inputs, - kind, - depth: u32::MAX, - lock_rc: 0, - ty: ty.into(), - outputs: vec![], - }); - - let prev = self.lookup.insert((kind, inputs), id); - debug_assert_eq!(prev, None); - - self.add_deps(id, &inps); - if let Some(opt) = self.peephole(id) { - debug_assert_ne!(opt, id); - self.lock(opt); - self.remove(id); - self.unlock(opt); - opt - } else { - id + pub fn is_ref(&self) -> bool { + !self.1 } } - fn lock(&mut self, target: Nid) { - self[target].lock_rc += 1; - } - - fn unlock(&mut self, target: Nid) { - self[target].lock_rc -= 1; - } - - fn remove(&mut self, target: Nid) { - if !self[target].is_dangling() { - return; + impl From for Id { + fn from(value: u8) -> Self { + Self(value, false) } - for i in 0..self[target].inputs().len() { - let inp = self[target].inputs[i]; - let index = self[inp].outputs.iter().position(|&p| p == target).unwrap(); - self[inp].outputs.swap_remove(index); - self.remove(inp); - } - let res = self.lookup.remove(&(self[target].kind, self[target].inputs)); - debug_assert_eq!(res, Some(target)); - self.remove_low(target); } - fn peephole(&mut self, target: Nid) -> Option { - match self[target].kind { - Kind::Start => {} - Kind::End => {} - Kind::BinOp { op } => return self.peephole_binop(target, op), - Kind::Return => {} - Kind::Tuple { index } => {} - Kind::ConstInt { value } => {} + impl Drop for Id { + fn drop(&mut self) { + if !std::thread::panicking() && self.1 { + unreachable!("reg id leaked: {:?}", self.0); + } } - None } - fn peephole_binop(&mut self, target: Nid, op: TokenKind) -> Option { - use TokenKind as T; - let [mut lhs, mut rhs, ..] = self[target].inputs; + #[derive(Default, PartialEq, Eq)] + pub struct Alloc { + free: Vec, + max_used: Reg, + } - if lhs == rhs { - match op { - T::Sub => { - return Some(self.new_node(self[target].ty, Kind::ConstInt { value: 0 }, [])); - } - T::Add => { - let rhs = self.new_node(self[target].ty, Kind::ConstInt { value: 2 }, []); - return Some( - self.new_node(self[target].ty, Kind::BinOp { op: T::Mul }, [lhs, rhs]), - ); - } - _ => {} + impl Alloc { + pub fn init(&mut self) { + self.free.clear(); + self.free.extend((32..=253).rev()); + self.max_used = RET_ADDR; + } + + pub fn allocate(&mut self) -> Id { + let reg = self.free.pop().expect("TODO: we need to spill"); + self.max_used = self.max_used.max(reg); + Id(reg, true) + } + + pub fn free(&mut self, reg: Id) { + if reg.1 { + self.free.push(reg.0); + std::mem::forget(reg); } } - if let (Kind::ConstInt { value: a }, Kind::ConstInt { value: b }) = - (self[lhs].kind, self[rhs].kind) - { - return Some(self.new_node( - self[target].ty, - Kind::ConstInt { value: op.apply(a, b) }, - [], - )); - } - - let mut changed = false; - if op.is_comutative() && self[lhs].kind < self[rhs].kind { - std::mem::swap(&mut lhs, &mut rhs); - changed = true; - } - - if let Kind::ConstInt { value } = self[rhs].kind { - match (op, value) { - (T::Add | T::Sub | T::Shl, 0) | (T::Mul | T::Div, 1) => return Some(lhs), - (T::Mul, 0) => return Some(rhs), - _ => {} - } - } - - if op.is_comutative() && self[lhs].kind == (Kind::BinOp { op }) { - if let Kind::ConstInt { value: a } = self[self[lhs].inputs[1]].kind - && let Kind::ConstInt { value: b } = self[rhs].kind - { - let new_rhs = - self.new_node(self[target].ty, Kind::ConstInt { value: op.apply(a, b) }, []); - return Some(self.new_node(self[target].ty, Kind::BinOp { op }, [ - self[lhs].inputs[0], - new_rhs, - ])); - } - - if self.is_const(self[lhs].inputs[1]) { - let new_lhs = - self.new_node(self[target].ty, Kind::BinOp { op }, [self[lhs].inputs[0], rhs]); - return Some(self.new_node(self[target].ty, Kind::BinOp { op }, [ - new_lhs, - self[lhs].inputs[1], - ])); - } - } - - if op == T::Add - && self[lhs].kind == (Kind::BinOp { op: T::Mul }) - && self[lhs].inputs[0] == rhs - && let Kind::ConstInt { value } = self[self[lhs].inputs[1]].kind - { - let new_rhs = self.new_node(self[target].ty, Kind::ConstInt { value: value + 1 }, []); - return Some( - self.new_node(self[target].ty, Kind::BinOp { op: T::Mul }, [rhs, new_rhs]), - ); - } - - if op == T::Sub && self[lhs].kind == (Kind::BinOp { op }) { - // (a - b) - c => a - (b + c) - let [a, b, ..] = self[lhs].inputs; - let c = rhs; - let new_rhs = self.new_node(self[target].ty, Kind::BinOp { op: T::Add }, [b, c]); - return Some(self.new_node(self[target].ty, Kind::BinOp { op }, [a, new_rhs])); - } - - if changed { - return Some(self.new_node(self[target].ty, self[target].kind, [lhs, rhs])); - } - - None - } - - fn is_const(&self, id: Nid) -> bool { - matches!(self[id].kind, Kind::ConstInt { .. }) - } - - fn replace(&mut self, target: Nid, with: Nid) { - //for i in 0..self[target].inputs().len() { - // let inp = self[target].inputs[i]; - // let index = self[inp].outputs.iter().position(|&p| p == target).unwrap(); - // self[inp].outputs[index] = with; - //} - - for i in 0..self[target].outputs.len() { - let out = self[target].outputs[i]; - let index = self[out].inputs().iter().position(|&p| p == target).unwrap(); - let rpl = self.modify_input(out, index, with); - self[with].outputs.push(rpl); - } - - self.remove_low(target); - } - - fn modify_input(&mut self, target: Nid, inp_index: usize, with: Nid) -> Nid { - let out = self.lookup.remove(&(self[target].kind, self[target].inputs)); - debug_assert!(out == Some(target)); - debug_assert_ne!(self[target].inputs[inp_index], with); - - self[target].inputs[inp_index] = with; - if let Err(other) = self.lookup.try_insert((self[target].kind, self[target].inputs), target) - { - let rpl = *other.entry.get(); - self.replace(target, rpl); - return rpl; - } - - target - } - - fn add_deps(&mut self, id: Nid, deps: &[Nid]) { - for &d in deps { - debug_assert_ne!(d, id); - self[d].outputs.push(id); - } - } - - fn unlock_free(&mut self, id: Nid) { - self[id].lock_rc -= 1; - if self[id].is_dangling() { - self.remove_low(id); - } - } - - fn fmt(&self, f: &mut fmt::Formatter, node: Nid, rcs: &mut [usize]) -> fmt::Result { - let mut is_ready = || { - if rcs[node as usize] == 0 { - return false; - } - rcs[node as usize] = rcs[node as usize].saturating_sub(1); - rcs[node as usize] == 0 - }; - - match self[node].kind { - Kind::BinOp { op } => { - write!(f, "(")?; - self.fmt(f, self[node].inputs[0], rcs)?; - write!(f, " {op} ")?; - self.fmt(f, self[node].inputs[1], rcs)?; - write!(f, ")")?; - } - Kind::Return => { - write!(f, "{}: return [{:?}] ", node, self[node].inputs[0])?; - self.fmt(f, self[node].inputs[1], rcs)?; - writeln!(f)?; - self.fmt(f, self[node].inputs[2], rcs)?; - } - Kind::ConstInt { value } => write!(f, "{}", value)?, - Kind::End => { - if is_ready() { - writeln!(f, "{}: {:?}", node, self[node].kind)?; - } - } - Kind::Tuple { index } => { - if index != 0 && self[self[node].inputs[0]].kind == Kind::Start { - write!(f, "{:?}.{}", self[self[node].inputs[0]].kind, index)?; - } else if is_ready() { - writeln!(f, "{}: {:?}", node, self[node].kind)?; - for &o in &self[node].outputs { - if self.is_cfg(o) { - self.fmt(f, o, rcs)?; - } - } - } - } - Kind::Start => 'b: { - if !is_ready() { - break 'b; - } - - writeln!(f, "{}: {:?}", node, self[node].kind)?; - - for &o in &self[node].outputs { - self.fmt(f, o, rcs)?; - } - } - } - - Ok(()) - } - - fn is_cfg(&self, o: Nid) -> bool { - matches!(self[o].kind, Kind::Start | Kind::End | Kind::Return | Kind::Tuple { .. }) - } -} - -impl ops::Index for Nodes { - type Output = Node; - - fn index(&self, index: u32) -> &Self::Output { - match &self.values[index as usize] { - PoolSlot::Value(value) => value, - PoolSlot::Next(_) => unreachable!(), + pub fn pushed_size(&self) -> usize { + ((self.max_used as usize).saturating_sub(RET_ADDR as usize) + 1) * 8 } } } -impl ops::IndexMut for Nodes { - fn index_mut(&mut self, index: u32) -> &mut Self::Output { - match &mut self.values[index as usize] { - PoolSlot::Value(value) => value, - PoolSlot::Next(_) => unreachable!(), - } - } -} - -#[derive(Debug)] -enum PoolSlot { - Value(Node), - Next(u32), -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[repr(u8)] -pub enum Kind { - Start, - End, - Return, - ConstInt { value: i64 }, - Tuple { index: u32 }, - BinOp { op: lexer::TokenKind }, -} - -impl Kind { - fn disc(&self) -> u8 { - unsafe { *(self as *const _ as *const u8) } - } -} - -const MAX_INPUTS: usize = 3; - -#[derive(Debug)] -pub struct Node { - pub inputs: [Nid; MAX_INPUTS], - pub kind: Kind, - pub depth: u32, - pub lock_rc: u32, - pub ty: ty::Id, - pub outputs: Vec, -} - -impl Node { - fn is_dangling(&self) -> bool { - self.outputs.len() + self.lock_rc as usize == 0 - } - - fn inputs(&self) -> &[Nid] { - let len = self.inputs.iter().position(|&n| n == NILL).unwrap_or(MAX_INPUTS); - &self.inputs[..len] - } - - fn inputs_mut(&mut self) -> &mut [Nid] { - let len = self.inputs.iter().position(|&n| n == NILL).unwrap_or(MAX_INPUTS); - &mut self.inputs[..len] - } -} - -impl fmt::Display for Nodes { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.fmt( - f, - 0, - &mut self - .values - .iter() - .map(|s| match s { - PoolSlot::Value(Node { kind: Kind::Start, .. }) => 1, - PoolSlot::Value(Node { kind: Kind::End, ref outputs, .. }) => outputs.len(), - PoolSlot::Value(val) => val.inputs().len(), - PoolSlot::Next(_) => 0, - }) - .collect::>(), - ) - } -} - -type Offset = u32; -type Size = u32; -type ArrayLen = u32; - mod ty { use { crate::{ @@ -747,11 +413,426 @@ mod ty { } } -#[derive(Clone, Copy, Debug)] +struct Nodes { + values: Vec, + free: u32, + lookup: HashMap<(Kind, [Nid; MAX_INPUTS]), Nid>, +} + +impl Default for Nodes { + fn default() -> Self { + Self { values: Default::default(), free: u32::MAX, lookup: Default::default() } + } +} + +impl Nodes { + fn add(&mut self, value: Node) -> u32 { + if self.free == u32::MAX { + self.free = self.values.len() as _; + self.values.push(PoolSlot::Next(u32::MAX)); + } + + let free = self.free; + self.free = match mem::replace(&mut self.values[free as usize], PoolSlot::Value(value)) { + PoolSlot::Value(_) => unreachable!(), + PoolSlot::Next(free) => free, + }; + free + } + + fn remove_low(&mut self, id: u32) -> Node { + let value = match mem::replace(&mut self.values[id as usize], PoolSlot::Next(self.free)) { + PoolSlot::Value(value) => value, + PoolSlot::Next(_) => unreachable!(), + }; + self.free = id; + value + } + + fn clear(&mut self) { + self.values.clear(); + self.free = u32::MAX; + } + + fn new_node( + &mut self, + ty: impl Into, + kind: Kind, + inps: [Nid; SIZE], + ) -> Nid { + let mut inputs = [NILL; MAX_INPUTS]; + inputs[..inps.len()].copy_from_slice(&inps); + + if let Some(&id) = self.lookup.get(&(kind, inputs)) { + debug_assert_eq!(self[id].kind, kind); + debug_assert_eq!(self[id].inputs, inputs); + return id; + } + + let id = self.add(Node { + inputs, + kind, + loc: Default::default(), + depth: u32::MAX, + lock_rc: 0, + ty: ty.into(), + outputs: vec![], + }); + + let prev = self.lookup.insert((kind, inputs), id); + debug_assert_eq!(prev, None); + + self.add_deps(id, &inps); + if let Some(opt) = self.peephole(id) { + debug_assert_ne!(opt, id); + self.lock(opt); + self.remove(id); + self.unlock(opt); + opt + } else { + id + } + } + + fn lock(&mut self, target: Nid) { + self[target].lock_rc += 1; + } + + fn unlock(&mut self, target: Nid) { + self[target].lock_rc -= 1; + } + + fn remove(&mut self, target: Nid) { + if !self[target].is_dangling() { + return; + } + for i in 0..self[target].inputs().len() { + let inp = self[target].inputs[i]; + let index = self[inp].outputs.iter().position(|&p| p == target).unwrap(); + self[inp].outputs.swap_remove(index); + self.remove(inp); + } + let res = self.lookup.remove(&(self[target].kind, self[target].inputs)); + debug_assert_eq!(res, Some(target)); + self.remove_low(target); + } + + fn peephole(&mut self, target: Nid) -> Option { + match self[target].kind { + Kind::Start => {} + Kind::End => {} + Kind::BinOp { op } => return self.peephole_binop(target, op), + Kind::Return => {} + Kind::Tuple { .. } => {} + Kind::ConstInt { .. } => {} + } + None + } + + fn peephole_binop(&mut self, target: Nid, op: TokenKind) -> Option { + use TokenKind as T; + let [mut lhs, mut rhs, ..] = self[target].inputs; + + if lhs == rhs { + match op { + T::Sub => { + return Some(self.new_node(self[target].ty, Kind::ConstInt { value: 0 }, [])); + } + T::Add => { + let rhs = self.new_node(self[target].ty, Kind::ConstInt { value: 2 }, []); + return Some( + self.new_node(self[target].ty, Kind::BinOp { op: T::Mul }, [lhs, rhs]), + ); + } + _ => {} + } + } + + if let (Kind::ConstInt { value: a }, Kind::ConstInt { value: b }) = + (self[lhs].kind, self[rhs].kind) + { + return Some(self.new_node( + self[target].ty, + Kind::ConstInt { value: op.apply(a, b) }, + [], + )); + } + + let mut changed = false; + if op.is_comutative() && self[lhs].kind < self[rhs].kind { + std::mem::swap(&mut lhs, &mut rhs); + changed = true; + } + + if let Kind::ConstInt { value } = self[rhs].kind { + match (op, value) { + (T::Add | T::Sub | T::Shl, 0) | (T::Mul | T::Div, 1) => return Some(lhs), + (T::Mul, 0) => return Some(rhs), + _ => {} + } + } + + if op.is_comutative() && self[lhs].kind == (Kind::BinOp { op }) { + if let Kind::ConstInt { value: a } = self[self[lhs].inputs[1]].kind + && let Kind::ConstInt { value: b } = self[rhs].kind + { + let new_rhs = + self.new_node(self[target].ty, Kind::ConstInt { value: op.apply(a, b) }, []); + return Some(self.new_node(self[target].ty, Kind::BinOp { op }, [ + self[lhs].inputs[0], + new_rhs, + ])); + } + + if self.is_const(self[lhs].inputs[1]) { + let new_lhs = + self.new_node(self[target].ty, Kind::BinOp { op }, [self[lhs].inputs[0], rhs]); + return Some(self.new_node(self[target].ty, Kind::BinOp { op }, [ + new_lhs, + self[lhs].inputs[1], + ])); + } + } + + if op == T::Add + && self[lhs].kind == (Kind::BinOp { op: T::Mul }) + && self[lhs].inputs[0] == rhs + && let Kind::ConstInt { value } = self[self[lhs].inputs[1]].kind + { + let new_rhs = self.new_node(self[target].ty, Kind::ConstInt { value: value + 1 }, []); + return Some( + self.new_node(self[target].ty, Kind::BinOp { op: T::Mul }, [rhs, new_rhs]), + ); + } + + if op == T::Sub && self[lhs].kind == (Kind::BinOp { op }) { + // (a - b) - c => a - (b + c) + let [a, b, ..] = self[lhs].inputs; + let c = rhs; + let new_rhs = self.new_node(self[target].ty, Kind::BinOp { op: T::Add }, [b, c]); + return Some(self.new_node(self[target].ty, Kind::BinOp { op }, [a, new_rhs])); + } + + if changed { + return Some(self.new_node(self[target].ty, self[target].kind, [lhs, rhs])); + } + + None + } + + fn is_const(&self, id: Nid) -> bool { + matches!(self[id].kind, Kind::ConstInt { .. }) + } + + fn replace(&mut self, target: Nid, with: Nid) { + //for i in 0..self[target].inputs().len() { + // let inp = self[target].inputs[i]; + // let index = self[inp].outputs.iter().position(|&p| p == target).unwrap(); + // self[inp].outputs[index] = with; + //} + + for i in 0..self[target].outputs.len() { + let out = self[target].outputs[i]; + let index = self[out].inputs().iter().position(|&p| p == target).unwrap(); + let rpl = self.modify_input(out, index, with); + self[with].outputs.push(rpl); + } + + self.remove_low(target); + } + + fn modify_input(&mut self, target: Nid, inp_index: usize, with: Nid) -> Nid { + let out = self.lookup.remove(&(self[target].kind, self[target].inputs)); + debug_assert!(out == Some(target)); + debug_assert_ne!(self[target].inputs[inp_index], with); + + self[target].inputs[inp_index] = with; + if let Err(other) = self.lookup.try_insert((self[target].kind, self[target].inputs), target) + { + let rpl = *other.entry.get(); + self.replace(target, rpl); + return rpl; + } + + target + } + + fn add_deps(&mut self, id: Nid, deps: &[Nid]) { + for &d in deps { + debug_assert_ne!(d, id); + self[d].outputs.push(id); + } + } + + fn unlock_free(&mut self, id: Nid) { + self[id].lock_rc -= 1; + if self[id].is_dangling() { + self.remove_low(id); + } + } + + fn fmt(&self, f: &mut fmt::Formatter, node: Nid, rcs: &mut [usize]) -> fmt::Result { + let mut is_ready = || { + if rcs[node as usize] == 0 { + return false; + } + rcs[node as usize] = rcs[node as usize].saturating_sub(1); + rcs[node as usize] == 0 + }; + + match self[node].kind { + Kind::BinOp { op } => { + write!(f, "(")?; + self.fmt(f, self[node].inputs[0], rcs)?; + write!(f, " {op} ")?; + self.fmt(f, self[node].inputs[1], rcs)?; + write!(f, ")")?; + } + Kind::Return => { + write!(f, "{}: return [{:?}] ", node, self[node].inputs[0])?; + self.fmt(f, self[node].inputs[1], rcs)?; + writeln!(f)?; + self.fmt(f, self[node].inputs[2], rcs)?; + } + Kind::ConstInt { value } => write!(f, "{}", value)?, + Kind::End => { + if is_ready() { + writeln!(f, "{}: {:?}", node, self[node].kind)?; + } + } + Kind::Tuple { index } => { + if index != 0 && self[self[node].inputs[0]].kind == Kind::Start { + write!(f, "{:?}.{}", self[self[node].inputs[0]].kind, index)?; + } else if is_ready() { + writeln!(f, "{}: {:?}", node, self[node].kind)?; + for &o in &self[node].outputs { + if self.is_cfg(o) { + self.fmt(f, o, rcs)?; + } + } + } + } + Kind::Start => 'b: { + if !is_ready() { + break 'b; + } + + writeln!(f, "{}: {:?}", node, self[node].kind)?; + + for &o in &self[node].outputs { + self.fmt(f, o, rcs)?; + } + } + } + + Ok(()) + } + + fn is_cfg(&self, o: Nid) -> bool { + matches!(self[o].kind, Kind::Start | Kind::End | Kind::Return | Kind::Tuple { .. }) + } +} + +impl ops::Index for Nodes { + type Output = Node; + + fn index(&self, index: u32) -> &Self::Output { + match &self.values[index as usize] { + PoolSlot::Value(value) => value, + PoolSlot::Next(_) => unreachable!(), + } + } +} + +impl ops::IndexMut for Nodes { + fn index_mut(&mut self, index: u32) -> &mut Self::Output { + match &mut self.values[index as usize] { + PoolSlot::Value(value) => value, + PoolSlot::Next(_) => unreachable!(), + } + } +} + +#[derive(Debug)] +enum PoolSlot { + Value(Node), + Next(u32), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(u8)] +pub enum Kind { + Start, + End, + Return, + ConstInt { value: i64 }, + Tuple { index: u32 }, + BinOp { op: lexer::TokenKind }, +} + +impl Kind { + fn disc(&self) -> u8 { + unsafe { *(self as *const _ as *const u8) } + } +} + +const MAX_INPUTS: usize = 3; + +#[derive(Debug)] +struct Node { + inputs: [Nid; MAX_INPUTS], + kind: Kind, + loc: Loc, + depth: u32, + lock_rc: u32, + ty: ty::Id, + outputs: Vec, +} + +impl Node { + fn is_dangling(&self) -> bool { + self.outputs.len() + self.lock_rc as usize == 0 + } + + fn inputs(&self) -> &[Nid] { + let len = self.inputs.iter().position(|&n| n == NILL).unwrap_or(MAX_INPUTS); + &self.inputs[..len] + } + + fn inputs_mut(&mut self) -> &mut [Nid] { + let len = self.inputs.iter().position(|&n| n == NILL).unwrap_or(MAX_INPUTS); + &mut self.inputs[..len] + } +} + +impl fmt::Display for Nodes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.fmt( + f, + 0, + &mut self + .values + .iter() + .map(|s| match s { + PoolSlot::Value(Node { kind: Kind::Start, .. }) => 1, + PoolSlot::Value(Node { kind: Kind::End, ref outputs, .. }) => outputs.len(), + PoolSlot::Value(val) => val.inputs().len(), + PoolSlot::Next(_) => 0, + }) + .collect::>(), + ) + } +} + +type Offset = u32; +type Size = u32; +type ArrayLen = u32; + +#[derive(Debug)] struct Loop { var_count: u32, offset: u32, - reloc_base: u32, + break_relocs: Vec, } struct Variable { @@ -762,21 +843,33 @@ struct Variable { #[derive(Default)] struct ItemCtx { file: FileId, - id: ty::Kind, + id: ty::Id, ret: Option, + + task_base: usize, + + nodes: Nodes, start: Nid, end: Nid, cfg: Nid, - task_base: usize, - snap: Snapshot, - - nodes: Nodes, loops: Vec, vars: Vec, + regs: reg::Alloc, + ret_relocs: Vec, + relocs: Vec, + code: Vec, } -impl ItemCtx {} +impl ItemCtx { + fn emit(&mut self, instr: (usize, [u8; instrs::MAX_SIZE])) { + emit(&mut self.code, instr); + } +} + +fn emit(out: &mut Vec, (len, instr): (usize, [u8; instrs::MAX_SIZE])) { + out.extend_from_slice(&instr[..len]); +} fn write_reloc(doce: &mut [u8], offset: usize, value: i64, size: u16) { let value = value.to_ne_bytes(); @@ -801,17 +894,52 @@ struct Sig { ret: ty::Id, } -#[derive(Clone, Copy)] struct Func { file: FileId, expr: ExprRef, sig: Option, offset: Offset, + // TODO: change to indices into common vec + relocs: Vec, + code: Vec, +} + +impl Default for Func { + fn default() -> Self { + Self { + file: u32::MAX, + expr: Default::default(), + sig: None, + offset: u32::MAX, + relocs: Default::default(), + code: Default::default(), + } + } +} + +struct TypedReloc { + target: ty::Id, + reloc: Reloc, } struct Global { - offset: Offset, ty: ty::Id, + offset: Offset, + data: Vec, +} + +impl Default for Global { + fn default() -> Self { + Self { ty: Default::default(), offset: u32::MAX, data: Default::default() } + } +} + +// TODO: make into bit struct (width: u2, sub_offset: u3, offset: u27) +#[derive(Clone, Copy, Debug)] +struct Reloc { + offset: Offset, + sub_offset: u8, + width: u8, } struct Field { @@ -989,33 +1117,6 @@ struct FTask { id: ty::Func, } -#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] -pub struct Snapshot { - code: usize, - string_data: usize, - funcs: usize, - globals: usize, - strings: usize, -} - -impl Snapshot { - fn _sub(&mut self, other: &Self) { - self.code -= other.code; - self.string_data -= other.string_data; - self.funcs -= other.funcs; - self.globals -= other.globals; - self.strings -= other.strings; - } - - fn _add(&mut self, other: &Self) { - self.code += other.code; - self.string_data += other.string_data; - self.funcs += other.funcs; - self.globals += other.globals; - self.strings += other.strings; - } -} - #[derive(Default, Debug)] struct Ctx { ty: Option, @@ -1027,6 +1128,22 @@ impl Ctx { } } +#[derive(Debug, Default)] +struct Loc { + reg: reg::Id, +} + +#[derive(Default, Debug)] +struct GenCtx { + loc: Option, +} + +impl GenCtx { + pub fn with_loc(self, loc: impl Into) -> Self { + Self { loc: Some(loc.into()) } + } +} + #[derive(Default)] struct Pool { cis: Vec, @@ -1049,6 +1166,8 @@ impl Codegen { self.complete_call_graph_low(); } + pub fn dump_reachable(&mut self, from: ty::Func, to: &mut Vec) {} + fn make_func_reachable(&mut self, func: ty::Func) { let fuc = &mut self.tys.funcs[func as usize]; if fuc.offset == u32::MAX { @@ -1162,7 +1281,7 @@ impl Codegen { } fn handle_task(&mut self, FTask { file, id }: FTask) { - let func = self.tys.funcs[id as usize]; + let func = &self.tys.funcs[id as usize]; debug_assert!(func.file == file); let sig = func.sig.unwrap(); let ast = self.files[file as usize].clone(); @@ -1170,7 +1289,7 @@ impl Codegen { let repl = ItemCtx { file, - id: ty::Kind::Func(id), + id: ty::Kind::Func(id).compress(), ret: Some(sig.ret), ..self.pool.cis.pop().unwrap_or_default() }; @@ -1203,11 +1322,88 @@ impl Codegen { self.report(body.pos(), "expected all paths in the fucntion to return"); } - for var in self.ci.vars.drain(..) { + for var in self.ci.vars.iter() { self.ci.nodes.unlock(var.value); } - //self.pool.cis.push(std::mem::replace(&mut self.ci, prev_ci)); + '_open_function: { + self.ci.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, 0)); + self.ci.emit(instrs::st(reg::RET_ADDR, reg::STACK_PTR, 0, 0)); + } + + self.ci.regs.init(); + + let mut params = self.tys.parama(sig.ret); + for var in self.ci.vars.drain(..) { + match self.tys.size_of(self.ci.nodes[var.value].ty) { + 0 => {} + 1..=8 => { + let reg = self.ci.regs.allocate(); + emit(&mut self.ci.code, instrs::cp(reg.get(), params.next())); + self.ci.nodes[var.value].loc = Loc { reg }; + } + s => todo!("{s}"), + } + } + + self.emit_control(self.ci.nodes[self.ci.start].outputs[0]); + + '_close_function: { + let pushed = self.ci.regs.pushed_size() as i64; + let stack = 0; + + write_reloc(&mut self.ci.code, 3, -(pushed + stack), 8); + write_reloc(&mut self.ci.code, 3 + 8 + 3, stack, 8); + write_reloc(&mut self.ci.code, 3 + 8 + 3 + 8, pushed, 2); + + self.ci.emit(instrs::ld(reg::RET_ADDR, reg::STACK_PTR, stack as _, pushed as _)); + self.ci.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, (pushed + stack) as _)); + } + + self.tys.funcs[id as usize].code.append(&mut self.ci.code); + self.tys.funcs[id as usize].relocs.append(&mut self.ci.relocs); + self.pool.cis.push(std::mem::replace(&mut self.ci, prev_ci)); + } + + fn emit_control(&mut self, ctrl: Nid) { + match self.ci.nodes[ctrl].kind { + Kind::Start => unreachable!(), + Kind::End => unreachable!(), + Kind::Return => { + let ret_loc = match self.tys.size_of(self.ci.ret.expect("TODO")) { + 0 => Loc::default(), + 1..=8 => Loc { reg: 1u8.into() }, + s => todo!("{s}"), + }; + + self.emit_expr(self.ci.nodes[ctrl].inputs[1], GenCtx::default().with_loc(ret_loc)); + } + Kind::ConstInt { value } => unreachable!(), + Kind::Tuple { index } => { + debug_assert!(index == 0); + self.emit_control(self.ci.nodes[ctrl].outputs[0]); + } + Kind::BinOp { op } => unreachable!(), + } + } + + fn emit_expr(&mut self, expr: Nid, ctx: GenCtx) { + match self.ci.nodes[expr].kind { + Kind::Start => unreachable!(), + Kind::End => unreachable!(), + Kind::Return => unreachable!(), + Kind::ConstInt { value } => { + if let Some(loc) = ctx.loc { + self.ci.emit(instrs::li64(loc.reg.get(), value as _)); + } else { + let reg = self.ci.regs.allocate(); + self.ci.emit(instrs::li64(reg.get(), value as _)); + self.ci.nodes[expr].loc = Loc { reg }; + } + } + Kind::Tuple { index } => unreachable!(), + Kind::BinOp { op } => unreachable!(), + } } // TODO: sometimes its better to do this in bulk @@ -1287,7 +1483,7 @@ impl Codegen { debug_assert!(refr.get(&f).is_some()); refr }, - offset: u32::MAX, + ..Default::default() }; let id = self.tys.funcs.len() as _; diff --git a/hblang/tests/son_tests_const_folding_with_arg.txt b/hblang/tests/son_tests_const_folding_with_arg.txt index e69de29..d94af81 100644 --- a/hblang/tests/son_tests_const_folding_with_arg.txt +++ b/hblang/tests/son_tests_const_folding_with_arg.txt @@ -0,0 +1,5 @@ +0: Start +2: Tuple { index: 0 } +7: return [2] 0 +1: End +Start.1 \ No newline at end of file