From f1e715e9bdcf3ef3221bc847b57442747dceb1d1 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Tue, 12 Nov 2024 19:02:29 +0100 Subject: [PATCH] refactoring truncation --- lang/README.md | 15 ++ lang/src/lexer.rs | 6 +- lang/src/lib.rs | 6 +- lang/src/son.rs | 136 +++++++++--------- lang/src/son/hbvm.rs | 11 +- lang/src/son/hbvm/their_regalloc.rs | 23 ++- lang/tests/son_tests_c_strings.txt | 13 +- lang/tests/son_tests_different_types.txt | 37 +++-- lang/tests/son_tests_inline.txt | 21 +-- lang/tests/son_tests_nullable_structure.txt | 45 +++--- .../son_tests_wrong_dead_code_elimination.txt | 31 ++++ 11 files changed, 206 insertions(+), 138 deletions(-) create mode 100644 lang/tests/son_tests_wrong_dead_code_elimination.txt diff --git a/lang/README.md b/lang/README.md index f4732ff..3f565c6 100644 --- a/lang/README.md +++ b/lang/README.md @@ -268,6 +268,21 @@ main := fn(): uint { } ``` +#### wrong_dead_code_elimination +```hb +Color := struct {b: u8} +main := fn(): void { + color := Color.(0) + n := @as(u8, 1) + loop { + if color.b == 255 | color.b == 0 { + n = -n + } + color.b += n + } +} +``` + #### struct_operators ```hb Point := struct { diff --git a/lang/src/lexer.rs b/lang/src/lexer.rs index 2cacd5a..4a7c9bb 100644 --- a/lang/src/lexer.rs +++ b/lang/src/lexer.rs @@ -266,10 +266,8 @@ impl TokenKind { Self::Not => (value == 0) as _, Self::Float if float => value, Self::Float => (value as f64).to_bits() as _, - Self::Number => { - debug_assert!(float); - f64::from_bits(value as _) as _ - } + Self::Number if float => f64::from_bits(value as _) as _, + Self::Number => value, s => todo!("{s}"), } } diff --git a/lang/src/lib.rs b/lang/src/lib.rs index bd6120a..42fae22 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -385,7 +385,7 @@ pub mod ty { } impl Id { - pub const DEFAULT_INT: Self = Self::UINT; + pub const DINT: Self = Self::UINT; pub fn bin_ret(self, op: TokenKind) -> Id { use TokenKind as T; @@ -787,7 +787,7 @@ pub struct Sig { ret: ty::Id, } -#[derive(Default)] +#[derive(Default, Clone, Copy)] struct Func { file: Module, name: Ident, @@ -798,7 +798,7 @@ struct Func { comp_state: [CompState; 2], } -#[derive(Default, PartialEq, Eq)] +#[derive(Default, PartialEq, Eq, Clone, Copy)] enum CompState { #[default] Dead, diff --git a/lang/src/son.rs b/lang/src/son.rs index 9b4ae37..edf24a1 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -231,7 +231,7 @@ impl Nodes { let mut deepest = self[node].inputs[0]; for &inp in self[node].inputs[1..].iter() { if self.idepth(inp) > self.idepth(deepest) { - if matches!(self[inp].kind, Kind::Call { .. }) { + if self[inp].kind.is_call() { deepest = inp; } else { debug_assert!(!self.is_cfg(inp)); @@ -250,8 +250,7 @@ impl Nodes { self[current].outputs.remove(index); self[node].inputs[0] = deepest; debug_assert!( - !self[deepest].outputs.contains(&node) - || matches!(self[deepest].kind, Kind::Call { .. }), + !self[deepest].outputs.contains(&node) || self[deepest].kind.is_call(), "{node} {:?} {deepest} {:?}", self[node], self[deepest] @@ -937,9 +936,12 @@ impl Nodes { let &[_, oper] = self[target].inputs.as_slice() else { unreachable!() }; let ty = self[target].ty; - let is_float = self[oper].ty.is_float(); + if matches!(op, TokenKind::Number | TokenKind::Float) && ty == self[oper].ty { + return Some(oper); + } if let K::CInt { value } = self[oper].kind { + let is_float = self[oper].ty.is_float(); return Some(self.new_const(ty, op.apply_unop(value, is_float))); } } @@ -1089,7 +1091,7 @@ impl Nodes { mem::swap(&mut a, &mut b); } - if matches!(self[a].kind, Kind::Call { .. }) + if self[a].kind.is_call() && self[a].inputs.last() == Some(&target) && self[b].kind == Kind::Load && let &[store] = self[b].outputs.as_slice() @@ -1851,6 +1853,14 @@ pub enum Kind { } impl Kind { + fn is_call(&self) -> bool { + matches!(self, Kind::Call { .. }) + } + + fn is_eca(&self) -> bool { + matches!(self, Kind::Call { func: ty::Func::ECA, .. }) + } + fn is_pinned(&self) -> bool { self.is_cfg() || matches!(self, Self::Phi | Self::Arg | Self::Mem | Self::Loops) } @@ -2580,24 +2590,12 @@ impl<'a> Codegen<'a> { { Some(self.ci.nodes.new_const_lit(ty, (value as f64).to_bits() as i64)) } - Expr::Number { value, .. } => Some( - self.ci.nodes.new_const_lit( - ctx.ty - .map(|ty| self.tys.inner_of(ty).unwrap_or(ty)) - .filter(|ty| ty.is_integer()) - .unwrap_or(ty::Id::DEFAULT_INT), - value, - ), - ), - Expr::Float { value, .. } => Some( - self.ci.nodes.new_const_lit( - ctx.ty - .map(|ty| self.tys.inner_of(ty).unwrap_or(ty)) - .filter(|ty| ty.is_float()) - .unwrap_or(ty::Id::F32), - value as i64, - ), - ), + Expr::Number { value, .. } => { + self.gen_inferred_const(ctx, ty::Id::DINT, value, ty::Id::is_integer) + } + Expr::Float { value, .. } => { + self.gen_inferred_const(ctx, ty::Id::F32, value as i64, ty::Id::is_float) + } Expr::Ident { id, .. } if let Some(index) = self.ci.scope.vars.iter().rposition(|v| v.id == id) => { @@ -2937,13 +2935,25 @@ impl<'a> Codegen<'a> { self.strip_var(&mut rhs); self.implicit_unwrap(right.pos(), &mut rhs); let (ty, aclass) = self.binop_ty(pos, &mut lhs, &mut rhs, op); + let fty = ty.bin_ret(op); + if fty == ty::Id::BOOL { + if lhs.ty.is_float() { + } else { + self.ci.nodes.lock(rhs.id); + let lty = lhs.ty.extend(); + if lty != lhs.ty { + self.extend(&mut lhs, lty); + } + self.ci.nodes.unlock(rhs.id); + let rty = rhs.ty.extend(); + if rty != rhs.ty { + self.extend(&mut rhs, rty); + } + } + } let inps = [VOID, lhs.id, rhs.id]; - let bop = self.ci.nodes.new_node_lit( - ty.bin_ret(op), - Kind::BinOp { op }, - inps, - self.tys, - ); + let bop = + self.ci.nodes.new_node_lit(fty, Kind::BinOp { op }, inps, self.tys); self.ci.nodes.pass_aclass(aclass, bop.id); Some(bop) } @@ -2988,8 +2998,8 @@ impl<'a> Codegen<'a> { }; let elem = self.tys.ins.slices[s].elem; - let mut idx = self.expr_ctx(index, Ctx::default().with_ty(ty::Id::DEFAULT_INT))?; - self.assert_ty(index.pos(), &mut idx, ty::Id::DEFAULT_INT, "subscript"); + let mut idx = self.expr_ctx(index, Ctx::default().with_ty(ty::Id::DINT))?; + self.assert_ty(index.pos(), &mut idx, ty::Id::DINT, "subscript"); let size = self.ci.nodes.new_const(ty::Id::INT, self.tys.size_of(elem)); let inps = [VOID, idx.id, size]; let offset = self.ci.nodes.new_node( @@ -3018,27 +3028,12 @@ impl<'a> Codegen<'a> { } Expr::Directive { name: "sizeof", args: [ty], .. } => { let ty = self.ty(ty); - Some( - self.ci.nodes.new_const_lit( - ctx.ty - .map(|ty| self.tys.inner_of(ty).unwrap_or(ty)) - .filter(|ty| ty.is_integer()) - .unwrap_or(ty::Id::DEFAULT_INT), - self.tys.size_of(ty), - ), - ) + self.gen_inferred_const(ctx, ty::Id::DINT, self.tys.size_of(ty), ty::Id::is_integer) } Expr::Directive { name: "alignof", args: [ty], .. } => { let ty = self.ty(ty); - Some( - self.ci.nodes.new_const_lit( - ctx.ty - .map(|ty| self.tys.inner_of(ty).unwrap_or(ty)) - .filter(|ty| ty.is_integer()) - .unwrap_or(ty::Id::DEFAULT_INT), - self.tys.align_of(ty), - ), - ) + let align = self.tys.align_of(ty); + self.gen_inferred_const(ctx, ty::Id::DINT, align, ty::Id::is_integer) } Expr::Directive { name: "bitcast", args: [val], pos } => { let mut val = self.raw_expr(val)?; @@ -3744,6 +3739,24 @@ impl<'a> Codegen<'a> { } } + fn gen_inferred_const( + &mut self, + ctx: Ctx, + fallback: ty::Id, + value: impl Into, + filter: impl Fn(ty::Id) -> bool, + ) -> Option { + Some( + self.ci.nodes.new_const_lit( + ctx.ty + .map(|ty| self.tys.inner_of(ty).unwrap_or(ty)) + .filter(|&ty| filter(ty)) + .unwrap_or(fallback), + value, + ), + ) + } + fn gen_call(&mut self, func: &Expr, args: &[Expr], inline: bool) -> Option { let ty = self.ty(func); let ty::Kind::Func(mut fu) = ty.expand() else { @@ -3851,10 +3864,8 @@ impl<'a> Codegen<'a> { let (v, ctrl, scope) = mem::replace(&mut self.ci.inline_ret, prev_inline_ret)?; if is_inline && ctrl.get() != prev_ctrl - && (!matches!(self.ci.nodes[ctrl.get()].kind, Kind::Call { - func: ty::Func::ECA, - .. - }) || self.ci.nodes[ctrl.get()].inputs[0] != prev_ctrl) + && (!self.ci.nodes[ctrl.get()].kind.is_eca() + || self.ci.nodes[ctrl.get()].inputs[0] != prev_ctrl) { self.report(body.pos(), "function is makred inline but it contains controlflow"); } @@ -4067,17 +4078,10 @@ impl<'a> Codegen<'a> { let sym = SymKey::FuncInst(*func, args); let ct = |ins: &mut crate::TypeIns| { - let fuc = &ins.funcs[*func]; + let fuc = ins.funcs[*func]; + debug_assert!(fuc.comp_state.iter().all(|&s| s == CompState::default())); ins.funcs - .push(Func { - file: fuc.file, - name: fuc.name, - base: Some(*func), - sig: Some(Sig { args, ret }), - expr: fuc.expr, - is_inline: fuc.is_inline, - ..Default::default() - }) + .push(Func { base: Some(*func), sig: Some(Sig { args, ret }), ..fuc }) .into() }; let ty::Kind::Func(f) = @@ -4628,10 +4632,9 @@ impl<'a> Codegen<'a> { fn extend(&mut self, value: &mut Value, to: ty::Id) { self.strip_ptr(value); - let mask = self.ci.nodes.new_const(to, (1i64 << (self.tys.size_of(value.ty) * 8)) - 1); - let inps = [VOID, value.id, mask]; + let inps = [VOID, value.id]; *value = - self.ci.nodes.new_node_lit(to, Kind::BinOp { op: TokenKind::Band }, inps, self.tys); + self.ci.nodes.new_node_lit(to, Kind::UnOp { op: TokenKind::Number }, inps, self.tys); value.ty = to; } @@ -4810,6 +4813,7 @@ mod tests { fb_driver; // Purely Testing Examples; + wrong_dead_code_elimination; memory_swap; very_nested_loops; generic_type_mishap; diff --git a/lang/src/son/hbvm.rs b/lang/src/son/hbvm.rs index 937e74f..2a5bf5b 100644 --- a/lang/src/son/hbvm.rs +++ b/lang/src/son/hbvm.rs @@ -564,7 +564,8 @@ impl TokenKind { } fn unop(&self, dst: ty::Id, src: ty::Id) -> Option EncodedInstr> { - let src_idx = src.simple_size().unwrap().ilog2() as usize; + let src_idx = + src.simple_size().unwrap_or_else(|| panic!("{:?}", src.expand())).ilog2() as usize; Some(match self { Self::Sub => [ |a, b| sub8(a, reg::ZERO, b), @@ -583,6 +584,14 @@ impl TokenKind { Self::Number if src.is_float() && dst.is_integer() => { [|a, b| instrs::fti32(a, b, 1), |a, b| instrs::fti64(a, b, 1)][src_idx - 2] } + Self::Number if src.is_signed() && dst.is_integer() => { + [instrs::sxt8, instrs::sxt16, instrs::sxt32][src_idx] + } + Self::Number if (src.is_unsigned() || src == ty::Id::BOOL) && dst.is_integer() => [ + |a, b| instrs::andi(a, b, 0xff), + |a, b| instrs::andi(a, b, 0xffff), + |a, b| instrs::andi(a, b, 0xffffffff), + ][src_idx], Self::Float if dst.is_float() && src.is_float() => { [instrs::fc32t64, |a, b| instrs::fc64t32(a, b, 1)][src_idx - 2] } diff --git a/lang/src/son/hbvm/their_regalloc.rs b/lang/src/son/hbvm/their_regalloc.rs index e1a3756..d0f1ca8 100644 --- a/lang/src/son/hbvm/their_regalloc.rs +++ b/lang/src/son/hbvm/their_regalloc.rs @@ -241,8 +241,23 @@ impl HbvmBackend { }), Kind::UnOp { op } => { let op = op - .unop(node.ty, fuc.nodes[node.inputs[1]].ty) - .expect("TODO: unary operator not supported"); + .unop( + node.ty, + tys.inner_of(fuc.nodes[node.inputs[1]].ty) + .unwrap_or(fuc.nodes[node.inputs[1]].ty), + ) + .unwrap_or_else(|| { + panic!( + "TODO: unary operator not supported: {op} {} {}", + ty::Display::new(tys, files, node.ty), + ty::Display::new( + tys, + files, + tys.inner_of(fuc.nodes[node.inputs[1]].ty) + .unwrap_or(fuc.nodes[node.inputs[1]].ty) + ) + ) + }); let &[dst, oper] = allocs else { unreachable!() }; self.emit(op(atr(dst), atr(oper))); } @@ -264,8 +279,8 @@ impl HbvmBackend { } else if let Some(against) = op.cmp_against() { let op_ty = fuc.nodes[rh].ty; - self.emit(extend(fuc.nodes[lh].ty, fuc.nodes[lh].ty.extend(), 1, 1)); - self.emit(extend(fuc.nodes[rh].ty, fuc.nodes[rh].ty.extend(), 2, 2)); + //self.emit(extend(fuc.nodes[lh].ty, fuc.nodes[lh].ty.extend(), 1, 1)); + //self.emit(extend(fuc.nodes[rh].ty, fuc.nodes[rh].ty.extend(), 2, 2)); let &[dst, lhs, rhs] = allocs else { unreachable!() }; if op_ty.is_float() && matches!(op, TokenKind::Le | TokenKind::Ge) { diff --git a/lang/tests/son_tests_c_strings.txt b/lang/tests/son_tests_c_strings.txt index 0aa31af..be908d8 100644 --- a/lang/tests/son_tests_c_strings.txt +++ b/lang/tests/son_tests_c_strings.txt @@ -11,17 +11,16 @@ main: ADDI64 r254, r254, 16d JALA r0, r31, 0a str_len: - LI8 r6, 0b - LI64 r1, 0d - 2: LD r8, r2, 0a, 1h - ANDI r8, r8, 255d - ANDI r6, r6, 255d - JNE r8, r6, :0 + LI64 r3, 0d + CP r1, r3 + 2: LD r7, r2, 0a, 1h + ANDI r9, r7, 255d + JNE r9, r3, :0 JMP :1 0: ADDI64 r2, r2, 1d ADDI64 r1, r1, 1d JMP :2 1: JALA r0, r31, 0a -code size: 216 +code size: 205 ret: 16 status: Ok(()) diff --git a/lang/tests/son_tests_different_types.txt b/lang/tests/son_tests_different_types.txt index 289bd70..bad3ba8 100644 --- a/lang/tests/son_tests_different_types.txt +++ b/lang/tests/son_tests_different_types.txt @@ -1,30 +1,29 @@ main: ADDI64 r254, r254, -12d - LI8 r1, 255b - ST r1, r254, 0a, 1h - LI8 r4, 0b - ST r4, r254, 1a, 1h - ST r4, r254, 2a, 1h - ST r1, r254, 3a, 1h - LI32 r9, 0w - ST r9, r254, 4a, 4h - LD r4, r254, 4a, 4h - LI32 r1, 2w - ST r1, r254, 8a, 4h - LD r5, r254, 8a, 4h - ANDI r5, r5, 4294967295d - ANDI r1, r1, 4294967295d - JEQ r5, r1, :0 LI64 r1, 0d + LI8 r2, 255b + ST r2, r254, 0a, 1h + LI8 r5, 0b + ST r5, r254, 1a, 1h + ST r5, r254, 2a, 1h + ST r2, r254, 3a, 1h + LI32 r10, 0w + ST r10, r254, 4a, 4h + LD r8, r254, 4a, 4h + LI32 r2, 2w + ST r2, r254, 8a, 4h + LD r5, r254, 8a, 4h + LI64 r9, 2d + ANDI r10, r5, 4294967295d + JEQ r10, r9, :0 JMP :1 - 0: ANDI r4, r4, 4294967295d - ANDI r9, r9, 4294967295d - JEQ r4, r9, :2 + 0: ANDI r2, r8, 4294967295d + JEQ r2, r1, :2 LI64 r1, 64d JMP :1 2: LI64 r1, 512d 1: ADDI64 r254, r254, 12d JALA r0, r31, 0a -code size: 257 +code size: 245 ret: 512 status: Ok(()) diff --git a/lang/tests/son_tests_inline.txt b/lang/tests/son_tests_inline.txt index f144cb0..841e96d 100644 --- a/lang/tests/son_tests_inline.txt +++ b/lang/tests/son_tests_inline.txt @@ -1,20 +1,21 @@ main: LI64 r2, 8d ECA - LI64 r10, 6d + LI64 r11, 6d LRA r6, r0, :gb LI64 r9, 0d - LD r11, r6, 0a, 8h - CMPU r12, r11, r9 + LD r10, r6, 0a, 8h + CMPU r12, r10, r9 CMPUI r12, r12, 0d - ORI r2, r12, 0d - ANDI r2, r2, 255d - JNE r2, r0, :0 - CP r7, r10 + ANDI r2, r12, 255d + OR r4, r2, r9 + ANDI r4, r4, 255d + JNE r4, r0, :0 + CP r9, r11 JMP :1 - 0: LI64 r7, 1d - 1: SUB64 r1, r7, r10 + 0: LI64 r9, 1d + 1: SUB64 r1, r9, r11 JALA r0, r31, 0a -code size: 142 +code size: 146 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_structure.txt b/lang/tests/son_tests_nullable_structure.txt index 5599901..7003636 100644 --- a/lang/tests/son_tests_nullable_structure.txt +++ b/lang/tests/son_tests_nullable_structure.txt @@ -1,6 +1,6 @@ main: - ADDI64 r254, r254, -122d - ST r31, r254, 26a, 96h + ADDI64 r254, r254, -138d + ST r31, r254, 26a, 112h JAL r31, r0, :returner_fn CP r32, r1 ADDI64 r1, r254, 2d @@ -9,31 +9,28 @@ main: JAL r31, r0, :returner_cn ST r1, r254, 0a, 2h LI8 r34, 0b - LI8 r35, 0b - LD r36, r254, 2a, 1h CP r1, r32 - ANDI r1, r1, 255d - ANDI r34, r34, 255d - CMPU r37, r1, r34 - CMPUI r37, r37, 0d - ANDI r36, r36, 255d - ANDI r35, r35, 255d - CMPU r38, r36, r35 - CMPUI r38, r38, 0d - LD r39, r254, 0a, 1h - AND r40, r38, r37 - ANDI r39, r39, 255d - ANDI r35, r35, 255d - CMPU r41, r39, r35 - CMPUI r41, r41, 0d - AND r42, r41, r40 - ANDI r42, r42, 255d - JNE r42, r0, :0 + CMPU r35, r1, r34 + CMPUI r35, r35, 0d + LI8 r36, 0b + LD r37, r254, 2a, 1h + ANDI r38, r35, 255d + CMPU r32, r37, r36 + CMPUI r32, r32, 0d + AND r39, r32, r38 + LD r40, r254, 0a, 1h + ANDI r41, r39, 255d + CMPU r42, r40, r36 + CMPUI r42, r42, 0d + AND r43, r42, r41 + ANDI r44, r43, 255d + ANDI r44, r44, 255d + JNE r44, r0, :0 LI64 r1, 0d JMP :1 0: LI64 r1, 1d - 1: LD r31, r254, 26a, 96h - ADDI64 r254, r254, 122d + 1: LD r31, r254, 26a, 112h + ADDI64 r254, r254, 138d JALA r0, r31, 0a returner_bn: ADDI64 r254, r254, -24d @@ -60,6 +57,6 @@ returner_fn: LD r1, r254, 0a, 0h ORI r1, r1, 128d JALA r0, r31, 0a -code size: 546 +code size: 513 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_wrong_dead_code_elimination.txt b/lang/tests/son_tests_wrong_dead_code_elimination.txt new file mode 100644 index 0000000..be8973d --- /dev/null +++ b/lang/tests/son_tests_wrong_dead_code_elimination.txt @@ -0,0 +1,31 @@ +main: + ADDI64 r254, r254, -1d + LI64 r7, 0d + LI64 r5, 255d + LI8 r4, 1b + LI8 r6, 0b + ST r6, r254, 0a, 1h + 2: LD r9, r254, 0a, 1h + AND r12, r9, r5 + CMPU r2, r12, r5 + CMPUI r2, r2, 0d + NOT r2, r2 + CMPU r6, r12, r7 + CMPUI r6, r6, 0d + NOT r6, r6 + AND r8, r2, r5 + AND r10, r6, r5 + OR r11, r10, r8 + ANDI r11, r11, 255d + JNE r11, r0, :0 + JMP :1 + 0: SUB8 r4, r0, r4 + 1: ADD8 r8, r9, r4 + ST r8, r254, 0a, 1h + JMP :2 + ADDI64 r254, r254, 1d + JALA r0, r31, 0a +timed out +code size: 192 +ret: 0 +status: Ok(())