From cb88edea1f1d91e7db72f7bd03aa9a4ba0aa0ff3 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Thu, 24 Oct 2024 15:39:38 +0200 Subject: [PATCH] fixing overoptimization of load -> store --- lang/README.md | 17 ++++- lang/src/son.rs | 70 +++++++++---------- lang/src/vc.rs | 9 ++- lang/tests/son_tests_inline.txt | 18 ++--- .../son_tests_small_struct_assignment.txt | 0 lang/tests/son_tests_small_struct_bitcast.txt | 40 +++++------ 6 files changed, 84 insertions(+), 70 deletions(-) create mode 100644 lang/tests/son_tests_small_struct_assignment.txt diff --git a/lang/README.md b/lang/README.md index 9b87762..9486377 100644 --- a/lang/README.md +++ b/lang/README.md @@ -544,10 +544,23 @@ main := fn(): int { ```hb Color := struct {r: u8, g: u8, b: u8, a: u8} white := Color.(255, 255, 255, 255) -u32_to_color := fn(v: u32): Color return @bitcast(v) +u32_to_color := fn(v: u32): Color return @as(Color, @bitcast(u32_to_u32(@bitcast(v)))) u32_to_u32 := fn(v: u32): u32 return v main := fn(): int { - return u32_to_color(@bitcast(white)).r + @as(Color, @bitcast(u32_to_u32(@bitcast(Color.{r: 1, g: 1, b: 1, a: 1})))).g + return u32_to_color(@bitcast(white)).r +} +``` + +#### small_struct_assignment +```hb + +Color := struct {r: u8, g: u8, b: u8, a: u8} +white := Color.(255, 255, 255, 255) +black := Color.(0, 0, 0, 0) +main := fn(): int { + f := black + f = white + return f.a } ``` diff --git a/lang/src/son.rs b/lang/src/son.rs index 8fd1179..b7ed8ab 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -39,16 +39,6 @@ type Nid = u16; type Lookup = crate::ctx_map::CtxMap; -trait StoreId: Sized { - fn to_store(self) -> Option; -} - -impl StoreId for Nid { - fn to_store(self) -> Option { - (self != MEM).then_some(self) - } -} - impl crate::ctx_map::CtxEntry for Nid { type Ctx = [Result]; type Key<'a> = (Kind, &'a [Nid], ty::Id); @@ -457,6 +447,7 @@ impl Nodes { K::Stre => { if self[target].inputs[2] != VOID && self[target].inputs.len() == 4 + && self[self[target].inputs[1]].kind != Kind::Load && self[self[target].inputs[3]].kind == Kind::Stre && self[self[target].inputs[3]].lock_rc == 0 && self[self[target].inputs[3]].inputs[2] == self[target].inputs[2] @@ -872,7 +863,7 @@ impl Nodes { #[allow(dead_code)] fn eliminate_stack_temporaries(&mut self) { 'o: for stack in self[MEM].outputs.clone() { - if self[stack].kind != Kind::Stck { + if self.values[stack as usize].is_err() || self[stack].kind != Kind::Stck { continue; } let mut full_read_into = None; @@ -1032,7 +1023,7 @@ pub enum Kind { impl Kind { fn is_pinned(&self) -> bool { - self.is_cfg() || matches!(self, Self::Phi | Self::Mem | Self::Arg) + self.is_cfg() || matches!(self, Self::Phi | Self::Arg | Self::Mem) } fn is_cfg(&self) -> bool { @@ -1629,7 +1620,7 @@ impl ItemCtx { if !matches!(self.nodes[stck].kind, Kind::Stck | Kind::Arg) { debug_assert_matches!( self.nodes[stck].kind, - Kind::Phi | Kind::Return | Kind::Load + Kind::Phi | Kind::Return | Kind::Load | Kind::Call { .. } | Kind::Stre ); continue; } @@ -2045,11 +2036,8 @@ impl<'a> Codegen<'a> { ); debug_assert!(self.ci.nodes[region].kind != Kind::Stre); - let mut vc = Vc::from([VOID, value, region]); self.ci.nodes.load_loop_store(&mut self.ci.scope.store, &mut self.ci.loops); - if let Some(str) = self.ci.scope.store.value().to_store() { - vc.push(str); - } + let mut vc = Vc::from([VOID, value, region, self.ci.scope.store.value()]); for load in self.ci.scope.loads.drain(..) { if load == value { self.ci.nodes.unlock(load); @@ -2080,11 +2068,8 @@ impl<'a> Codegen<'a> { self.ty_display(self.ci.nodes[region].ty) ); debug_assert!(self.ci.nodes[region].kind != Kind::Stre); - let mut vc = Vc::from([VOID, region]); self.ci.nodes.load_loop_store(&mut self.ci.scope.store, &mut self.ci.loops); - if let Some(str) = self.ci.scope.store.value().to_store() { - vc.push(str); - } + let vc = [VOID, region, self.ci.scope.store.value()]; let load = self.ci.nodes.new_node(ty, Kind::Load, vc); self.ci.nodes.lock(load); self.ci.scope.loads.push(load); @@ -2206,9 +2191,7 @@ impl<'a> Codegen<'a> { debug_assert_ne!(self.ci.ctrl, VOID); let mut inps = Vc::from([self.ci.ctrl, value.id]); self.ci.nodes.load_loop_store(&mut self.ci.scope.store, &mut self.ci.loops); - if let Some(str) = self.ci.scope.store.value().to_store() { - inps.push(str); - } + inps.push(self.ci.scope.store.value()); self.ci.ctrl = self.ci.nodes.new_node(ty::Id::VOID, Kind::Return, inps); @@ -2570,9 +2553,7 @@ impl<'a> Codegen<'a> { self.ci.nodes.unlock(n); } - if let Some(str) = self.ci.scope.store.value().to_store() { - inps.push(str); - } + inps.push(self.ci.scope.store.value()); self.ci.scope.loads.retain(|&load| { if inps.contains(&load) { return true; @@ -2658,9 +2639,7 @@ impl<'a> Codegen<'a> { } if has_ptr_arg { - if let Some(str) = self.ci.scope.store.value().to_store() { - inps.push(str); - } + inps.push(self.ci.scope.store.value()); self.ci.scope.loads.retain(|&load| { if inps.contains(&load) { return true; @@ -4243,7 +4222,7 @@ fn push_up(nodes: &mut Nodes) { return; } - for i in 0..nodes[node].inputs.len() { + for i in 1..nodes[node].inputs.len() { let inp = nodes[node].inputs[i]; if !nodes[inp].kind.is_pinned() { push_up_impl(inp, nodes); @@ -4298,6 +4277,20 @@ fn push_up(nodes: &mut Nodes) { } } } + + debug_assert_eq!( + nodes + .iter() + .map(|(n, _)| n) + .filter(|&n| !nodes.visited.get(n) && !matches!(nodes[n].kind, Kind::Arg | Kind::Mem)) + .collect::>(), + vec![], + "{:?}", + nodes + .iter() + .filter(|&(n, nod)| !nodes.visited.get(n) && !matches!(nod.kind, Kind::Arg | Kind::Mem)) + .collect::>() + ); } fn push_down(nodes: &mut Nodes, node: Nid) { @@ -4312,6 +4305,7 @@ fn push_down(nodes: &mut Nodes, node: Nid) { } fn better(nodes: &mut Nodes, is: Nid, then: Nid) -> bool { + debug_assert_ne!(idepth(nodes, is), idepth(nodes, then), "{is} {then}"); loop_depth(is, nodes) < loop_depth(then, nodes) || idepth(nodes, is) > idepth(nodes, then) || nodes[then].kind == Kind::If @@ -4321,6 +4315,12 @@ fn push_down(nodes: &mut Nodes, node: Nid) { return; } + for usage in nodes[node].outputs.clone() { + if is_forward_edge(usage, node, nodes) && nodes[node].kind == Kind::Stre { + push_down(nodes, usage); + } + } + for usage in nodes[node].outputs.clone() { if is_forward_edge(usage, node, nodes) { push_down(nodes, usage); @@ -4342,14 +4342,11 @@ fn push_down(nodes: &mut Nodes, node: Nid) { debug_assert!(nodes.dominates(nodes[node].inputs[0], min)); let mut cursor = min; - loop { + while cursor != nodes[node].inputs[0] { + cursor = idom(nodes, cursor); if better(nodes, cursor, min) { min = cursor; } - if cursor == nodes[node].inputs[0] { - break; - } - cursor = idom(nodes, cursor); } if nodes[min].kind.ends_basic_block() { @@ -4468,6 +4465,7 @@ mod tests { // Purely Testing Examples; returning_global_struct; small_struct_bitcast; + small_struct_assignment; wide_ret; comptime_min_reg_leak; different_types; diff --git a/lang/src/vc.rs b/lang/src/vc.rs index a973d9e..cacf136 100644 --- a/lang/src/vc.rs +++ b/lang/src/vc.rs @@ -287,7 +287,6 @@ impl BitSet { self.data.resize(new_len, 0); } - #[track_caller] pub fn set(&mut self, idx: Nid) -> bool { let idx = idx as usize; let data_idx = idx / Self::ELEM_SIZE; @@ -296,4 +295,12 @@ impl BitSet { self.data[data_idx] |= 1 << sub_idx; prev == 0 } + + pub fn get(&self, idx: Nid) -> bool { + let idx = idx as usize; + let data_idx = idx / Self::ELEM_SIZE; + let sub_idx = idx % Self::ELEM_SIZE; + let prev = self.data[data_idx] & (1 << sub_idx); + prev != 0 + } } diff --git a/lang/tests/son_tests_inline.txt b/lang/tests/son_tests_inline.txt index 7cc5619..cbd5556 100644 --- a/lang/tests/son_tests_inline.txt +++ b/lang/tests/son_tests_inline.txt @@ -1,17 +1,17 @@ main: LI64 r5, 0d LRA r4, r0, :gb - LD r6, r4, 0a, 8h - CMPS r9, r6, r5 - CMPUI r9, r9, 0d - ORI r11, r9, 0d - LI64 r1, 6d - ANDI r11, r11, 255d - JNE r11, r0, :0 - CP r5, r1 + LI64 r10, 6d + LD r7, r4, 0a, 8h + CMPS r11, r7, r5 + CMPUI r11, r11, 0d + ORI r12, r11, 0d + ANDI r12, r12, 255d + JNE r12, r0, :0 + CP r5, r10 JMP :1 0: LI64 r5, 1d - 1: SUB64 r1, r5, r1 + 1: SUB64 r1, r5, r10 JALA r0, r31, 0a code size: 131 ret: 0 diff --git a/lang/tests/son_tests_small_struct_assignment.txt b/lang/tests/son_tests_small_struct_assignment.txt new file mode 100644 index 0000000..e69de29 diff --git a/lang/tests/son_tests_small_struct_bitcast.txt b/lang/tests/son_tests_small_struct_bitcast.txt index 942c78a..7ecaf98 100644 --- a/lang/tests/son_tests_small_struct_bitcast.txt +++ b/lang/tests/son_tests_small_struct_bitcast.txt @@ -1,33 +1,29 @@ main: - ADDI64 r254, r254, -24d - ST r31, r254, 8a, 16h + ADDI64 r254, r254, -12d + ST r31, r254, 4a, 8h LRA r1, r0, :white - LD r32, r1, 0a, 4h - ADDI64 r5, r254, 4d - CP r2, r32 + ADDI64 r4, r254, 0d + LD r2, r1, 0a, 4h JAL r31, r0, :u32_to_color - ST r1, r254, 4a, 4h - CP r2, r32 - JAL r31, r0, :u32_to_u32 - ADDI64 r12, r254, 0d - LD r11, r254, 4a, 1h ST r1, r254, 0a, 4h - LD r4, r254, 1a, 1h - ADD8 r7, r4, r11 - ANDI r1, r7, 255d - LD r31, r254, 8a, 16h - ADDI64 r254, r254, 24d + LD r9, r254, 0a, 1h + ANDI r1, r9, 255d + LD r31, r254, 4a, 8h + ADDI64 r254, r254, 12d JALA r0, r31, 0a u32_to_color: - ADDI64 r254, r254, -4d - ADDI64 r3, r254, 0d - ST r2, r254, 0a, 4h - LD r1, r3, 0a, 4h - ADDI64 r254, r254, 4d + ADDI64 r254, r254, -12d + ST r31, r254, 4a, 8h + JAL r31, r0, :u32_to_u32 + ADDI64 r5, r254, 0d + ST r1, r254, 0a, 4h + LD r1, r5, 0a, 4h + LD r31, r254, 4a, 8h + ADDI64 r254, r254, 12d JALA r0, r31, 0a u32_to_u32: CP r1, r2 JALA r0, r31, 0a -code size: 284 -ret: 254 +code size: 263 +ret: 255 status: Ok(())