From 97eb985a02b38631835946764f1f5bd05946f7e9 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Tue, 29 Oct 2024 09:04:49 +0100 Subject: [PATCH] removing specific opts from a fucntion and adding them to the general peepholes --- lang/src/son.rs | 156 ++++-------------- .../son_tests_aliasing_overoptimization.txt | 16 ++ lang/tests/son_tests_arrays.txt | 25 +-- lang/tests/son_tests_conditional_stores.txt | 11 +- lang/tests/son_tests_directives.txt | 16 +- lang/tests/son_tests_inline_test.txt | 29 ++-- lang/tests/son_tests_struct_operators.txt | 116 ++++++------- lang/tests/son_tests_structs.txt | 32 ++-- lang/tests/son_tests_wide_ret.txt | 42 +++-- 9 files changed, 174 insertions(+), 269 deletions(-) diff --git a/lang/src/son.rs b/lang/src/son.rs index 1ff2d0f..48fb5b6 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -621,11 +621,8 @@ impl Nodes { } fn iter_peeps(&mut self, mut fuel: usize, stack: &mut Vec) { - debug_assert!(!self.complete); debug_assert!(stack.is_empty()); - self.complete = true; - self.iter() .filter_map(|(id, node)| node.kind.is_peeped().then_some(id)) .collect_into(stack); @@ -635,6 +632,11 @@ impl Nodes { && let Some(node) = stack.pop() { fuel -= 1; + + if self[node].outputs.is_empty() { + self.push_adjacent_nodes(node, stack); + } + if self.unlock_remove(node) { continue; } @@ -656,11 +658,18 @@ impl Nodes { fn push_adjacent_nodes(&mut self, of: Nid, stack: &mut Vec) { let prev_len = stack.len(); - for &i in self[of].outputs.iter().chain(self[of].inputs.iter()) { - if self[i].kind.is_peeped() && self[i].lock_rc == 0 { + for &i in self[of] + .outputs + .iter() + .chain(self[of].inputs.iter()) + .chain(self[of].peep_triggers.iter()) + { + if self.values[i as usize].is_ok() && self[i].kind.is_peeped() && self[i].lock_rc == 0 { stack.push(i); } } + + self[of].peep_triggers = Vc::default(); stack.iter().skip(prev_len).for_each(|&n| self.lock(n)); } @@ -877,6 +886,11 @@ impl Nodes { let mut cursor = n; let class = self.aclass_index(self[cursor].inputs[2]); + if self[class.1].kind != Kind::Stck { + new_inps.push(n); + continue; + } + cursor = self[cursor].inputs[3]; while cursor != MEM { if self.aclass_index(self[cursor].inputs[2]) != class @@ -927,9 +941,15 @@ impl Nodes { }; 'eliminate: { - break 'eliminate; + if self[target].outputs.is_empty() { + break 'eliminate; + } + if self[value].kind != Kind::Load || self[value].outputs.as_slice() != [target] { + for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) { + self[ele].peep_triggers.push(target); + } break 'eliminate; } @@ -1038,7 +1058,7 @@ impl Nodes { return Some(store); } - return Some(self.modify_input(store, 1, self[target].inputs[1])); + return Some(self.modify_input(store, 1, value)); } } K::Load => { @@ -1383,117 +1403,6 @@ impl Nodes { dominated = self.idom(dominated); } } - - fn eliminate_stack_temporaries(&mut self) { - 'o: for stack in self[MEM].outputs.clone() { - if self.values[stack as usize].is_err() || self[stack].kind != Kind::Stck { - continue; - } - let mut full_read_into = None; - let mut unidentifed = Vc::default(); - for &o in self[stack].outputs.iter() { - match self[o].kind { - Kind::Load - if self[o].ty == self[stack].ty - && self[o].outputs.iter().all(|&n| self[n].kind == Kind::Stre) - && let mut full_stores = self[o].outputs.iter().filter(|&&n| { - self[n].kind == Kind::Stre && self[n].inputs[1] == o - }) - && let Some(&n) = full_stores.next() - && full_stores.next().is_none() => - { - if full_read_into.replace((n, self[o].inputs[2])).is_some() { - continue 'o; - } - } - _ => unidentifed.push(o), - } - } - - let Some((dst, last_store)) = full_read_into else { continue }; - - let mut saved = Vc::default(); - let mut cursor = last_store; - let mut first_store = last_store; - while cursor != MEM && self[cursor].kind == Kind::Stre { - let mut contact_point = cursor; - let mut region = self[cursor].inputs[2]; - if let Kind::BinOp { op } = self[region].kind { - debug_assert_matches!(op, TokenKind::Add | TokenKind::Sub); - contact_point = region; - region = self[region].inputs[1] - } - - if region != stack { - break; - } - let Some(index) = unidentifed.iter().position(|&n| n == contact_point) else { - continue 'o; - }; - unidentifed.remove(index); - saved.push(contact_point); - first_store = cursor; - cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM); - - if unidentifed.is_empty() { - break; - } - } - - let region = self[dst].inputs[2]; - // TODO: this can be an offset already due to previous peeps so handle that - if let &[mcall] = unidentifed.as_slice() - && matches!(self[mcall].kind, Kind::Call { .. }) - && self[mcall].inputs.last() == Some(&stack) - { - self.modify_input(mcall, self[mcall].inputs.len() - 1, region); - - self.replace(dst, last_store); - } else { - debug_assert_matches!( - self[last_store].kind, - Kind::Stre | Kind::Mem, - "{:?}", - self[last_store] - ); - debug_assert_matches!( - self[first_store].kind, - Kind::Stre | Kind::Mem, - "{:?}", - self[first_store] - ); - - if !unidentifed.is_empty() { - continue; - } - - // FIXME: when the loads and stores become parallel we will need to get saved - // differently - let mut prev_store = self[dst].inputs[3]; - for mut oper in saved.into_iter().rev() { - let mut region = region; - if let Kind::BinOp { op } = self[oper].kind { - debug_assert_eq!(self[oper].outputs.len(), 1); - debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre); - region = self.new_node(self[oper].ty, Kind::BinOp { op }, [ - VOID, - region, - self[oper].inputs[2], - ]); - oper = self[oper].outputs[0]; - } - - let mut inps = self[oper].inputs.clone(); - debug_assert_eq!(inps.len(), 4); - inps[2] = region; - inps[3] = prev_store; - prev_store = self.new_node(self[oper].ty, Kind::Stre, inps); - } - - self.replace(dst, prev_store); - } - } - } } impl ops::Index for Nodes { @@ -1614,6 +1523,7 @@ pub struct Node { kind: Kind, inputs: Vc, outputs: Vc, + peep_triggers: Vc, ty: ty::Id, offset: Offset, ralloc_backref: RallocBRef, @@ -1894,7 +1804,6 @@ impl ItemCtx { self.scope.clear(&mut self.nodes); mem::take(&mut self.ctrl).soft_remove(&mut self.nodes); - self.nodes.eliminate_stack_temporaries(); self.nodes.iter_peeps(1000, stack); self.nodes.unlock(MEM); @@ -2199,11 +2108,10 @@ impl<'a> Codegen<'a> { vc.push(load); } } - let store = self.ci.nodes.new_node_nop(ty, Kind::Stre, vc); - aclass.last_store.set(store, &mut self.ci.nodes); - let opted = self.ci.nodes.late_peephole(store).unwrap_or(store); - aclass.last_store.set_remove(opted, &mut self.ci.nodes); - opted + mem::take(&mut aclass.last_store).soft_remove(&mut self.ci.nodes); + let store = self.ci.nodes.new_node(ty, Kind::Stre, vc); + aclass.last_store = StrongRef::new(store, &mut self.ci.nodes); + store } fn load_mem(&mut self, region: Nid, ty: ty::Id) -> Nid { diff --git a/lang/tests/son_tests_aliasing_overoptimization.txt b/lang/tests/son_tests_aliasing_overoptimization.txt index e69de29..3261cd4 100644 --- a/lang/tests/son_tests_aliasing_overoptimization.txt +++ b/lang/tests/son_tests_aliasing_overoptimization.txt @@ -0,0 +1,16 @@ +main: + ADDI64 r254, r254, -24d + ADDI64 r2, r254, 16d + ST r2, r254, 0a, 8h + LI64 r5, 0d + LI64 r4, 2d + ST r5, r254, 8a, 8h + ST r4, r254, 16a, 8h + LD r10, r254, 0a, 8h + ST r5, r10, 0a, 8h + LD r1, r254, 16a, 8h + ADDI64 r254, r254, 24d + JALA r0, r31, 0a +code size: 150 +ret: 0 +status: Ok(()) diff --git a/lang/tests/son_tests_arrays.txt b/lang/tests/son_tests_arrays.txt index c42ff35..eaad767 100644 --- a/lang/tests/son_tests_arrays.txt +++ b/lang/tests/son_tests_arrays.txt @@ -1,24 +1,17 @@ main: - ADDI64 r254, r254, -44d - ST r31, r254, 28a, 16h + ADDI64 r254, r254, -40d + ST r31, r254, 24a, 16h LI64 r32, 1d ADDI64 r2, r254, 0d ST r32, r254, 0a, 8h - LI64 r8, 2d - ST r8, r254, 8a, 8h - LI64 r11, 4d - ST r11, r254, 16a, 8h + LI64 r5, 2d + ST r5, r254, 8a, 8h + LI64 r8, 4d + ST r8, r254, 16a, 8h JAL r31, r0, :pass - LI8 r10, 0b - ST r10, r254, 24a, 1h - ST r10, r254, 25a, 1h - LI16 r12, 511h - ST r12, r254, 26a, 1h - LI16 r4, 1h - ST r4, r254, 27a, 1h ADD64 r1, r1, r32 - LD r31, r254, 28a, 16h - ADDI64 r254, r254, 44d + LD r31, r254, 24a, 16h + ADDI64 r254, r254, 40d JALA r0, r31, 0a pass: LD r4, r2, 8a, 8h @@ -29,6 +22,6 @@ pass: LD r1, r10, 0a, 8h ADD64 r1, r1, r9 JALA r0, r31, 0a -code size: 294 +code size: 231 ret: 8 status: Ok(()) diff --git a/lang/tests/son_tests_conditional_stores.txt b/lang/tests/son_tests_conditional_stores.txt index 6f44a63..b7fc3d2 100644 --- a/lang/tests/son_tests_conditional_stores.txt +++ b/lang/tests/son_tests_conditional_stores.txt @@ -2,8 +2,8 @@ cond: LI64 r1, 0d JALA r0, r31, 0a main: - ADDI64 r254, r254, -16d - ST r31, r254, 8a, 8h + ADDI64 r254, r254, -8d + ST r31, r254, 0a, 8h JAL r31, r0, :cond LI64 r5, 0d CP r7, r5 @@ -12,10 +12,9 @@ main: CP r1, r5 JMP :1 0: LI64 r1, 2d - 1: ST r1, r254, 0a, 8h - LD r31, r254, 8a, 8h - ADDI64 r254, r254, 16d + 1: LD r31, r254, 0a, 8h + ADDI64 r254, r254, 8d JALA r0, r31, 0a -code size: 147 +code size: 134 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_directives.txt b/lang/tests/son_tests_directives.txt index 1f97eda..be46acd 100644 --- a/lang/tests/son_tests_directives.txt +++ b/lang/tests/son_tests_directives.txt @@ -1,23 +1,19 @@ main: - ADDI64 r254, r254, -31d + ADDI64 r254, r254, -16d LI64 r1, 10d - ADDI64 r4, r254, 15d - ST r1, r254, 15a, 8h + ADDI64 r4, r254, 0d + ST r1, r254, 0a, 8h LI64 r7, 20d - ST r7, r254, 23a, 8h + ST r7, r254, 8a, 8h LI64 r6, 6d LI64 r5, 5d LI64 r2, 1d LD r3, r4, 0a, 16h ECA - LRA r5, r0, :arbitrary text - - ADDI64 r7, r254, 0d - BMC r5, r7, 15h LI64 r1, 0d - ADDI64 r254, r254, 31d + ADDI64 r254, r254, 16d JALA r0, r31, 0a ev: Ecall -code size: 190 +code size: 152 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_inline_test.txt b/lang/tests/son_tests_inline_test.txt index 50cc7a6..3ed6ff4 100644 --- a/lang/tests/son_tests_inline_test.txt +++ b/lang/tests/son_tests_inline_test.txt @@ -22,22 +22,19 @@ scalar_values: LI64 r1, 0d JALA r0, r31, 0a structs: - ADDI64 r254, r254, -48d - LI64 r3, 5d - ST r3, r254, 0a, 8h - ST r3, r254, 8a, 8h - LD r7, r254, 0a, 8h - ADDI64 r9, r7, 15d - ST r9, r254, 24a, 8h - LI64 r8, 20d - ST r8, r254, 32a, 8h - LI64 r9, 0d - LD r3, r254, 24a, 8h - ST r8, r254, 16a, 8h - ST r9, r254, 40a, 8h - SUB64 r1, r3, r8 - ADDI64 r254, r254, 48d + ADDI64 r254, r254, -32d + LI64 r2, 5d + ST r2, r254, 16a, 8h + ST r2, r254, 24a, 8h + LD r6, r254, 16a, 8h + ADDI64 r8, r6, 15d + ST r8, r254, 0a, 8h + LI64 r7, 20d + ST r7, r254, 8a, 8h + LD r1, r254, 0a, 8h + SUB64 r1, r1, r7 + ADDI64 r254, r254, 32d JALA r0, r31, 0a -code size: 346 +code size: 310 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_struct_operators.txt b/lang/tests/son_tests_struct_operators.txt index 6e549a6..10c4aa0 100644 --- a/lang/tests/son_tests_struct_operators.txt +++ b/lang/tests/son_tests_struct_operators.txt @@ -2,31 +2,31 @@ main: ADDI64 r254, r254, -152d LI8 r1, 0b LI8 r3, 1b - ST r1, r254, 148a, 1h - ST r3, r254, 144a, 1h - ST r1, r254, 149a, 1h - ST r3, r254, 145a, 1h - ST r1, r254, 150a, 1h - ST r3, r254, 146a, 1h - ST r1, r254, 151a, 1h - ST r3, r254, 147a, 1h - LD r1, r254, 148a, 1h - LD r4, r254, 144a, 1h + ST r1, r254, 132a, 1h + ST r3, r254, 128a, 1h + ST r1, r254, 133a, 1h + ST r3, r254, 129a, 1h + ST r1, r254, 134a, 1h + ST r3, r254, 130a, 1h + ST r1, r254, 135a, 1h + ST r3, r254, 131a, 1h + LD r1, r254, 132a, 1h + LD r4, r254, 128a, 1h ADD8 r5, r4, r1 - LD r8, r254, 145a, 1h - LD r9, r254, 149a, 1h - ST r5, r254, 148a, 1h + LD r8, r254, 129a, 1h + LD r9, r254, 133a, 1h + ST r5, r254, 132a, 1h ADD8 r12, r9, r8 - LD r4, r254, 146a, 1h - LD r5, r254, 150a, 1h - ST r12, r254, 149a, 1h + LD r4, r254, 130a, 1h + LD r5, r254, 134a, 1h + ST r12, r254, 133a, 1h ADD8 r7, r5, r4 - ST r7, r254, 150a, 1h - ST r3, r254, 151a, 1h - LD r12, r254, 149a, 1h - LD r1, r254, 150a, 1h + ST r7, r254, 134a, 1h + ST r3, r254, 135a, 1h + LD r12, r254, 133a, 1h + LD r1, r254, 134a, 1h ADD8 r4, r1, r12 - LD r5, r254, 148a, 1h + LD r5, r254, 132a, 1h ADD8 r7, r5, r4 LI8 r9, 4b ADD8 r1, r7, r3 @@ -36,61 +36,61 @@ main: LI64 r1, 1008d JMP :1 0: LI64 r6, 1d - ADDI64 r5, r254, 112d - ST r6, r254, 112a, 8h + ADDI64 r5, r254, 80d + ST r6, r254, 80a, 8h LI64 r9, 2d - ST r9, r254, 120a, 8h + ST r9, r254, 88a, 8h LI64 r2, 3d - ADDI64 r1, r254, 96d - ST r2, r254, 64a, 8h + ADDI64 r1, r254, 64d + ST r2, r254, 48a, 8h LI64 r6, 4d LI64 r2, 0d BMC r5, r1, 16h - ST r6, r254, 72a, 8h - ST r2, r254, 80a, 8h - LD r11, r254, 96a, 8h - LD r1, r254, 64a, 8h - ST r2, r254, 88a, 8h + ST r6, r254, 56a, 8h + ST r2, r254, 0a, 8h + LD r11, r254, 64a, 8h + LD r1, r254, 48a, 8h + ST r2, r254, 8a, 8h ADD64 r4, r1, r11 - LD r7, r254, 104a, 8h - LD r2, r254, 80a, 8h - ST r4, r254, 32a, 8h + LD r7, r254, 72a, 8h + LD r2, r254, 0a, 8h + ST r4, r254, 96a, 8h ADD64 r12, r7, r6 SUB64 r3, r2, r1 - ADDI64 r8, r254, 0d - ST r12, r254, 40a, 8h + ADDI64 r8, r254, 16d + ST r12, r254, 104a, 8h SUB64 r2, r1, r11 - ST r3, r254, 0a, 8h + ST r3, r254, 16a, 8h LI64 r9, -4d - ST r2, r254, 48a, 8h + ST r2, r254, 112a, 8h SUB64 r7, r6, r7 - ST r9, r254, 8a, 8h + ST r9, r254, 24a, 8h ADDI64 r8, r8, 16d - ST r7, r254, 56a, 8h + ST r7, r254, 120a, 8h BMC r5, r8, 16h - LD r6, r254, 32a, 8h - LD r8, r254, 0a, 8h - ADD64 r9, r8, r6 - LD r11, r254, 8a, 8h - LD r1, r254, 40a, 8h - ST r9, r254, 0a, 8h - ADD64 r4, r1, r11 + LD r6, r254, 96a, 8h LD r8, r254, 16a, 8h - LD r9, r254, 48a, 8h - ST r4, r254, 8a, 8h + ADD64 r9, r8, r6 + LD r11, r254, 24a, 8h + LD r1, r254, 104a, 8h + ST r9, r254, 16a, 8h + ADD64 r4, r1, r11 + LD r8, r254, 32a, 8h + LD r9, r254, 112a, 8h + ST r4, r254, 24a, 8h ADD64 r12, r9, r8 - LD r2, r254, 24a, 8h - ST r12, r254, 16a, 8h + LD r2, r254, 40a, 8h + ST r12, r254, 32a, 8h ADD64 r12, r2, r7 - ST r12, r254, 24a, 8h - LD r7, r254, 0a, 8h - LD r9, r254, 16a, 8h + ST r12, r254, 40a, 8h + LD r7, r254, 16a, 8h + LD r9, r254, 32a, 8h ADD64 r11, r9, r7 - LD r1, r254, 8a, 8h - ST r11, r254, 128a, 8h + LD r1, r254, 24a, 8h + ST r11, r254, 136a, 8h ADD64 r6, r1, r12 - ST r6, r254, 136a, 8h - LD r7, r254, 128a, 8h + ST r6, r254, 144a, 8h + LD r7, r254, 136a, 8h ADD64 r1, r7, r6 1: ADDI64 r254, r254, 152d JALA r0, r31, 0a diff --git a/lang/tests/son_tests_structs.txt b/lang/tests/son_tests_structs.txt index 87d2a20..baa5202 100644 --- a/lang/tests/son_tests_structs.txt +++ b/lang/tests/son_tests_structs.txt @@ -1,25 +1,23 @@ main: - ADDI64 r254, r254, -80d - ST r31, r254, 48a, 32h - LI64 r2, 4d - ADDI64 r32, r254, 32d - ST r2, r254, 32a, 8h - LI64 r33, 3d - ST r33, r254, 40a, 8h - ADDI64 r34, r254, 0d - LD r3, r32, 0a, 16h + ADDI64 r254, r254, -56d + ST r31, r254, 32a, 24h + LI64 r3, 4d + ADDI64 r2, r254, 16d + ST r3, r254, 16a, 8h + LI64 r32, 3d + ST r32, r254, 24a, 8h + ADDI64 r33, r254, 0d + LD r3, r2, 0a, 16h JAL r31, r0, :odher_pass ST r1, r254, 0a, 16h - ADDI64 r11, r254, 16d - BMC r32, r11, 16h - LD r4, r254, 8a, 8h - JNE r4, r33, :0 - CP r2, r34 + LD r2, r254, 8a, 8h + JNE r2, r32, :0 + CP r2, r33 JAL r31, r0, :pass JMP :1 0: LI64 r1, 0d - 1: LD r31, r254, 48a, 32h - ADDI64 r254, r254, 80d + 1: LD r31, r254, 32a, 24h + ADDI64 r254, r254, 56d JALA r0, r31, 0a odher_pass: ADDI64 r254, r254, -16d @@ -31,6 +29,6 @@ odher_pass: pass: LD r1, r2, 0a, 8h JALA r0, r31, 0a -code size: 321 +code size: 305 ret: 4 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index e9639bf..7950daa 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -14,32 +14,30 @@ main: ADDI64 r254, r254, 24d JALA r0, r31, 0a maina: - ADDI64 r254, r254, -44d - ST r31, r254, 36a, 8h - ADDI64 r6, r254, 32d + ADDI64 r254, r254, -36d + ST r31, r254, 28a, 8h + ADDI64 r5, r254, 24d JAL r31, r0, :small_struct - ST r1, r254, 32a, 4h + ST r1, r254, 24a, 4h LI8 r11, 0b - ADDI64 r10, r254, 24d - ST r11, r254, 24a, 1h - ST r11, r254, 25a, 1h - ST r11, r254, 26a, 1h + ADDI64 r10, r254, 0d + ST r11, r254, 0a, 1h + ST r11, r254, 1a, 1h + ST r11, r254, 2a, 1h LI8 r4, 3b - ST r4, r254, 27a, 1h + ST r4, r254, 3a, 1h LI8 r7, 1b - ST r7, r254, 28a, 1h - ST r11, r254, 29a, 1h - ST r11, r254, 30a, 1h - ST r11, r254, 31a, 1h - ADDI64 r2, r254, 0d - BMC r10, r2, 8h - ADDI64 r5, r2, 8d - ADDI64 r4, r254, 16d - BMC r10, r5, 8h + ST r7, r254, 4a, 1h + ST r11, r254, 5a, 1h + ST r11, r254, 6a, 1h + ST r11, r254, 7a, 1h + ADDI64 r1, r254, 8d + BMC r10, r1, 8h + ADDI64 r4, r1, 8d BMC r10, r4, 8h - LD r1, r2, 0a, 16h - LD r31, r254, 36a, 8h - ADDI64 r254, r254, 44d + LD r1, r1, 0a, 16h + LD r31, r254, 28a, 8h + ADDI64 r254, r254, 36d JALA r0, r31, 0a small_struct: ADDI64 r254, r254, -4d @@ -50,6 +48,6 @@ small_struct: LD r1, r3, 0a, 4h ADDI64 r254, r254, 4d JALA r0, r31, 0a -code size: 514 +code size: 498 ret: 2 status: Ok(())