making better peepholes and fixing overoptimization on memory swaps

This commit is contained in:
Jakub Doka 2024-11-12 12:20:08 +01:00
parent f079daa42d
commit f1deab11c9
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
5 changed files with 112 additions and 63 deletions

View file

@ -1088,7 +1088,7 @@ main := fn(): uint {
i += 1 i += 1
} }
return arr[0].u return arr[2].u
} }
``` ```

View file

@ -107,13 +107,19 @@ macro_rules! inference {
#[derive(Clone)] #[derive(Clone)]
pub struct Nodes { pub struct Nodes {
values: Vec<Result<Node, (Nid, debug::Trace)>>, values: Vec<Result<Node, (Nid, debug::Trace)>>,
queued_peeps: Vec<Nid>,
free: Nid, free: Nid,
lookup: Lookup, lookup: Lookup,
} }
impl Default for Nodes { impl Default for Nodes {
fn default() -> Self { fn default() -> Self {
Self { values: Default::default(), free: Nid::MAX, lookup: Default::default() } Self {
values: Default::default(),
queued_peeps: Default::default(),
free: Nid::MAX,
lookup: Default::default(),
}
} }
} }
@ -673,6 +679,7 @@ impl Nodes {
let id = self.new_node_nop(ty, kind, inps); let id = self.new_node_nop(ty, kind, inps);
if let Some(opt) = self.peephole(id, tys) { if let Some(opt) = self.peephole(id, tys) {
debug_assert_ne!(opt, id); debug_assert_ne!(opt, id);
self.queued_peeps.clear();
self.lock(opt); self.lock(opt);
self.remove(id); self.remove(id);
self.unlock(opt); self.unlock(opt);
@ -731,6 +738,7 @@ impl Nodes {
fn late_peephole(&mut self, target: Nid, tys: &Types) -> Option<Nid> { fn late_peephole(&mut self, target: Nid, tys: &Types) -> Option<Nid> {
if let Some(id) = self.peephole(target, tys) { if let Some(id) = self.peephole(target, tys) {
self.queued_peeps.clear();
self.replace(target, id); self.replace(target, id);
return None; return None;
} }
@ -759,6 +767,11 @@ impl Nodes {
} }
if let Some(new) = self.peephole(node, tys) { if let Some(new) = self.peephole(node, tys) {
let plen = stack.len();
stack.append(&mut self.queued_peeps);
for &p in &stack[plen..] {
self.lock(p);
}
self.replace(node, new); self.replace(node, new);
self.push_adjacent_nodes(new, stack); self.push_adjacent_nodes(new, stack);
} }
@ -1043,18 +1056,14 @@ impl Nodes {
K::Phi => { K::Phi => {
let &[ctrl, lhs, rhs] = self[target].inputs.as_slice() else { unreachable!() }; let &[ctrl, lhs, rhs] = self[target].inputs.as_slice() else { unreachable!() };
if rhs == target { if rhs == target || lhs == rhs {
return Some(lhs);
}
if lhs == rhs {
return Some(lhs); return Some(lhs);
} }
if self[lhs].kind == Kind::Stre if self[lhs].kind == Kind::Stre
&& self[rhs].kind == Kind::Stre && self[rhs].kind == Kind::Stre
&& self[lhs].ty == self[rhs].ty && self[lhs].ty == self[rhs].ty
&& !matches!(self[lhs].ty.expand(), ty::Kind::Struct(_)) && self[lhs].ty.loc(tys) == Loc::Reg
&& self[lhs].inputs[2] == self[rhs].inputs[2] && self[lhs].inputs[2] == self[rhs].inputs[2]
&& self[lhs].inputs[3] == self[rhs].inputs[3] && self[lhs].inputs[3] == self[rhs].inputs[3]
{ {
@ -1098,11 +1107,18 @@ impl Nodes {
} }
} }
K::Stre => { K::Stre => {
if target == 79 {
std::dbg!(std::backtrace::Backtrace::capture());
}
let &[_, value, region, store, ..] = self[target].inputs.as_slice() else { let &[_, value, region, store, ..] = self[target].inputs.as_slice() else {
unreachable!() unreachable!()
}; };
if self[value].kind == Kind::Load && self[value].inputs[1] == region { if self[value].kind == Kind::Load && self[value].inputs[1] == region {
if target == 79 {
std::dbg!(1);
}
return Some(store); return Some(store);
} }
@ -1121,6 +1137,9 @@ impl Nodes {
'eliminate: { 'eliminate: {
if self[target].outputs.is_empty() { if self[target].outputs.is_empty() {
if target == 79 {
std::dbg!(2);
}
break 'eliminate; break 'eliminate;
} }
@ -1130,6 +1149,9 @@ impl Nodes {
for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) { for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) {
self[ele].peep_triggers.push(target); self[ele].peep_triggers.push(target);
} }
if target == 79 {
std::dbg!(3);
}
break 'eliminate; break 'eliminate;
} }
@ -1138,6 +1160,9 @@ impl Nodes {
}; };
if self[stack].ty != self[value].ty || self[stack].kind != Kind::Stck { if self[stack].ty != self[value].ty || self[stack].kind != Kind::Stck {
if target == 79 {
std::dbg!(3);
}
break 'eliminate; break 'eliminate;
} }
@ -1162,8 +1187,22 @@ impl Nodes {
} }
let Some(index) = unidentifed.iter().position(|&n| n == contact_point) let Some(index) = unidentifed.iter().position(|&n| n == contact_point)
else { else {
if target == 79 {
std::dbg!(5);
}
break 'eliminate; break 'eliminate;
}; };
if self[self[cursor].inputs[1]].kind == Kind::Load
&& self[value].outputs.iter().any(|&n| {
self.aclass_index(self[self[cursor].inputs[1]].inputs[1]).0
== self.aclass_index(self[n].inputs[2]).0
})
{
if target == 79 {
std::dbg!(6);
}
break 'eliminate;
}
unidentifed.remove(index); unidentifed.remove(index);
saved.push(contact_point); saved.push(contact_point);
first_store = cursor; first_store = cursor;
@ -1175,6 +1214,9 @@ impl Nodes {
} }
if !unidentifed.is_empty() { if !unidentifed.is_empty() {
if target == 79 {
std::dbg!(7);
}
break 'eliminate; break 'eliminate;
} }
@ -1214,7 +1256,8 @@ impl Nodes {
debug_assert_eq!(inps.len(), 4); debug_assert_eq!(inps.len(), 4);
inps[2] = region; inps[2] = region;
inps[3] = prev_store; inps[3] = prev_store;
prev_store = self.new_node(self[oper].ty, Kind::Stre, inps, tys); prev_store = self.new_node_nop(self[oper].ty, Kind::Stre, inps);
self.queued_peeps.push(prev_store);
} }
return Some(prev_store); return Some(prev_store);
@ -1279,6 +1322,15 @@ impl Nodes {
if self[cursor].inputs[0] == ctrl if self[cursor].inputs[0] == ctrl
&& self[cursor].inputs[2] == region && self[cursor].inputs[2] == region
&& self[cursor].ty == self[target].ty && self[cursor].ty == self[target].ty
&& (self[self[cursor].inputs[1]].kind != Kind::Load
|| (!self[target].outputs.is_empty()
&& self[target].outputs.iter().all(|&n| {
self[n].kind != Kind::Stre
|| self
.aclass_index(self[self[cursor].inputs[1]].inputs[1])
.0
!= self.aclass_index(self[n].inputs[2]).0
})))
{ {
return Some(self[cursor].inputs[1]); return Some(self[cursor].inputs[1]);
} }

View file

@ -1,23 +1,21 @@
main: main:
ADDI64 r254, r254, -112d ADDI64 r254, r254, -88d
ST r31, r254, 40a, 72h ST r31, r254, 24a, 64h
LI64 r32, 4d ADDI64 r32, r254, 0d
ADDI64 r33, r254, 24d LI64 r33, 1d
ADDI64 r34, r254, 0d ST r33, r254, 16a, 8h
ST r32, r254, 24a, 8h LI64 r34, 4d
LI64 r35, 1d ST r34, r254, 0a, 8h
ST r35, r254, 32a, 8h ST r33, r254, 8a, 8h
ST r35, r254, 16a, 8h
BMC r33, r34, 16h
JAL r31, r0, :opaque JAL r31, r0, :opaque
ST r1, r254, 0a, 16h ST r1, r254, 0a, 16h
LD r36, r254, 8a, 8h LD r35, r254, 8a, 8h
LD r37, r254, 16a, 8h LD r36, r254, 16a, 8h
ADD64 r38, r37, r36 ADD64 r37, r36, r35
LD r39, r254, 0a, 8h LD r38, r254, 0a, 8h
SUB64 r1, r39, r38 SUB64 r1, r38, r37
LD r31, r254, 40a, 72h LD r31, r254, 24a, 64h
ADDI64 r254, r254, 112d ADDI64 r254, r254, 88d
JALA r0, r31, 0a JALA r0, r31, 0a
opaque: opaque:
ADDI64 r254, r254, -16d ADDI64 r254, r254, -16d
@ -29,6 +27,6 @@ opaque:
LD r1, r2, 0a, 16h LD r1, r2, 0a, 16h
ADDI64 r254, r254, 16d ADDI64 r254, r254, 16d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 323 code size: 307
ret: 0 ret: 0
status: Ok(()) status: Ok(())

View file

@ -49,22 +49,19 @@ optional:
ADDI64 r254, r254, 16d ADDI64 r254, r254, 16d
JALA r0, r31, 0a JALA r0, r31, 0a
optionala: optionala:
ADDI64 r254, r254, -56d ADDI64 r254, r254, -48d
ADDI64 r7, r254, 0d ADDI64 r5, r254, 0d
LI64 r8, 1d ADDI64 r4, r254, 16d
ADDI64 r9, r254, 16d ST r5, r254, 16a, 8h
ADDI64 r6, r254, 24d LI64 r9, 1d
ST r8, r254, 0a, 8h
ST r9, r254, 24a, 8h ST r9, r254, 24a, 8h
ADDI64 r3, r6, 8d ADDI64 r12, r254, 8d
BMC r7, r3, 8h ST r12, r254, 32a, 8h
ADDI64 r4, r254, 8d LI64 r11, 0d
ST r4, r254, 40a, 8h ST r11, r254, 40a, 8h
LI64 r2, 0d BMC r4, r1, 32h
ST r2, r254, 48a, 8h ADDI64 r254, r254, 48d
BMC r6, r1, 32h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 604 code size: 577
ret: 100 ret: 100
status: Ok(()) status: Ok(())

View file

@ -1,5 +1,5 @@
main: main:
ADDI64 r254, r254, -32d ADDI64 r254, r254, -40d
LI64 r7, 1d LI64 r7, 1d
LI64 r6, 4d LI64 r6, 4d
LI64 r4, 0d LI64 r4, 0d
@ -7,36 +7,38 @@ main:
CP r8, r4 CP r8, r4
6: JNE r8, r6, :0 6: JNE r8, r6, :0
LI64 r6, 2d LI64 r6, 2d
ADDI64 r3, r254, 32d
CP r8, r4 CP r8, r4
4: LD r1, r254, 0a, 8h 4: LD r1, r254, 16a, 8h
JNE r8, r7, :1 JNE r8, r7, :1
JMP :2 JMP :2
1: MUL64 r11, r8, r6 1: MUL64 r12, r8, r6
ADD64 r8, r8, r7 ADD64 r8, r8, r7
SUB64 r9, r6, r8 SUB64 r9, r6, r8
MUL64 r9, r9, r6 MUL64 r9, r9, r6
CP r2, r4 CP r11, r4
5: JNE r2, r6, :3 5: JNE r11, r6, :3
JMP :4 JMP :4
3: ADD64 r10, r2, r7 3: ADD64 r10, r11, r7
ADD64 r12, r9, r2 ADD64 r1, r12, r11
MULI64 r12, r12, 8d ADD64 r2, r9, r11
ADD64 r1, r11, r2
ADD64 r12, r5, r12
MULI64 r1, r1, 8d MULI64 r1, r1, 8d
MULI64 r11, r2, 8d
ADD64 r1, r5, r1 ADD64 r1, r5, r1
BMC r12, r1, 8h ADD64 r11, r5, r11
BMC r1, r12, 8h BMC r1, r3, 8h
CP r2, r10 BMC r11, r1, 8h
BMC r3, r11, 8h
CP r11, r10
JMP :5 JMP :5
0: ADD64 r12, r8, r7 0: ADD64 r2, r8, r7
MULI64 r10, r8, 8d MULI64 r12, r8, 8d
ADD64 r1, r5, r10 ADD64 r3, r5, r12
ST r8, r1, 0a, 8h ST r8, r3, 0a, 8h
CP r8, r12 CP r8, r2
JMP :6 JMP :6
2: ADDI64 r254, r254, 32d 2: ADDI64 r254, r254, 40d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 255 code size: 271
ret: 2 ret: 0
status: Ok(()) status: Ok(())