removing return value temporary optimization sadly

This commit is contained in:
Jakub Doka 2024-10-29 17:03:00 +01:00
parent da7cd5926c
commit 7448339605
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
8 changed files with 166 additions and 111 deletions

View file

@ -1181,3 +1181,18 @@ clobber := fn(): void {
var = 0 var = 0
} }
``` ```
#### overwrite_aliasing_overoptimization
```hb
Foo := struct {a: int, b: int}
Bar := struct {f: Foo, b: int}
main := fn(): int {
value := Bar.{b: 1, f: .(4, 1)}
value.f = opaque()
return value.f.a - value.f.b - value.b
}
opaque := fn(): Foo {
return .(3, 2)
}

View file

@ -1002,57 +1002,47 @@ impl Nodes {
} }
} }
// TODO: this can be an offset already due to previous peeps so handle that debug_assert_matches!(
if let &[mcall] = unidentifed.as_slice() self[last_store].kind,
&& matches!(self[mcall].kind, Kind::Call { .. }) Kind::Stre | Kind::Mem,
&& self[mcall].inputs.last() == Some(&stack) "{:?}",
{ self[last_store]
self.modify_input(mcall, self[mcall].inputs.len() - 1, region); );
debug_assert_matches!(
self[first_store].kind,
Kind::Stre | Kind::Mem,
"{:?}",
self[first_store]
);
return Some(last_store); if !unidentifed.is_empty() {
} else { break 'eliminate;
debug_assert_matches!(
self[last_store].kind,
Kind::Stre | Kind::Mem,
"{:?}",
self[last_store]
);
debug_assert_matches!(
self[first_store].kind,
Kind::Stre | Kind::Mem,
"{:?}",
self[first_store]
);
if !unidentifed.is_empty() {
break 'eliminate;
}
// FIXME: when the loads and stores become parallel we will need to get saved
// differently
let mut prev_store = store;
for mut oper in saved.into_iter().rev() {
let mut region = region;
if let Kind::BinOp { op } = self[oper].kind {
debug_assert_eq!(self[oper].outputs.len(), 1);
debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre);
region = self.new_node(self[oper].ty, Kind::BinOp { op }, [
VOID,
region,
self[oper].inputs[2],
]);
oper = self[oper].outputs[0];
}
let mut inps = self[oper].inputs.clone();
debug_assert_eq!(inps.len(), 4);
inps[2] = region;
inps[3] = prev_store;
prev_store = self.new_node(self[oper].ty, Kind::Stre, inps);
}
return Some(prev_store);
} }
// FIXME: when the loads and stores become parallel we will need to get saved
// differently
let mut prev_store = store;
for mut oper in saved.into_iter().rev() {
let mut region = region;
if let Kind::BinOp { op } = self[oper].kind {
debug_assert_eq!(self[oper].outputs.len(), 1);
debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre);
region = self.new_node(self[oper].ty, Kind::BinOp { op }, [
VOID,
region,
self[oper].inputs[2],
]);
oper = self[oper].outputs[0];
}
let mut inps = self[oper].inputs.clone();
debug_assert_eq!(inps.len(), 4);
inps[2] = region;
inps[3] = prev_store;
prev_store = self.new_node(self[oper].ty, Kind::Stre, inps);
}
return Some(prev_store);
} }
if value != VOID if value != VOID
@ -4080,5 +4070,6 @@ mod tests {
infinite_loop_after_peephole; infinite_loop_after_peephole;
aliasing_overoptimization; aliasing_overoptimization;
global_aliasing_overptimization; global_aliasing_overptimization;
overwrite_aliasing_overoptimization;
} }
} }

View file

@ -985,14 +985,17 @@ impl<'a> Function<'a> {
if self.nodes[n].ty.loc(self.tys) == Loc::Reg) if self.nodes[n].ty.loc(self.tys) == Loc::Reg)
|| matches!(self.nodes[n].kind, Kind::Stre || matches!(self.nodes[n].kind, Kind::Stre
if self.nodes[n].ty.loc(self.tys) == Loc::Reg if self.nodes[n].ty.loc(self.tys) == Loc::Reg
&& self.nodes[n].inputs[1] != nid) && self.nodes[n].inputs[1] != nid)
|| matches!(self.nodes[n].kind, Kind::BinOp { op: TokenKind::Add } || matches!(self.nodes[n].kind, Kind::BinOp { op: TokenKind::Add }
if self.nodes.is_const(self.nodes[n].inputs[2]) if self.nodes.is_const(self.nodes[n].inputs[2])
&& self.nodes[n] && self.nodes[n]
.outputs .outputs
.iter() .iter()
.all(|&n| matches!(self.nodes[n].kind, Kind::Stre | Kind::Load .all(|&n| matches!(self.nodes[n].kind, Kind::Load
if self.nodes[n].ty.loc(self.tys) == Loc::Reg))) if self.nodes[n].ty.loc(self.tys) == Loc::Reg)
|| matches!(self.nodes[n].kind, Kind::Stre
if self.nodes[n].ty.loc(self.tys) == Loc::Reg
&& self.nodes[n].inputs[1] != nid)))
}) => self.nodes.lock(nid), }) => self.nodes.lock(nid),
Kind::Stck if self.tys.size_of(node.ty) == 0 => self.nodes.lock(nid), Kind::Stck if self.tys.size_of(node.ty) == 0 => self.nodes.lock(nid),
Kind::Stck => { Kind::Stck => {

View file

@ -1,17 +1,20 @@
deinit: deinit:
ADDI64 r254, r254, -16d ADDI64 r254, r254, -48d
ST r31, r254, 0a, 16h ST r31, r254, 24a, 24h
CP r32, r2
LD r5, r2, 16a, 8h LD r5, r2, 16a, 8h
CP r32, r2
LI64 r4, 8d LI64 r4, 8d
MUL64 r3, r5, r4 MUL64 r3, r5, r4
CP r5, r32 CP r5, r32
LD r2, r5, 0a, 8h LD r2, r5, 0a, 8h
JAL r31, r0, :free JAL r31, r0, :free
CP r1, r32 ADDI64 r33, r254, 0d
CP r1, r33
JAL r31, r0, :new JAL r31, r0, :new
LD r31, r254, 0a, 16h CP r2, r32
ADDI64 r254, r254, 16d BMC r33, r2, 24h
LD r31, r254, 24a, 24h
ADDI64 r254, r254, 48d
JALA r0, r31, 0a JALA r0, r31, 0a
free: free:
CP r10, r2 CP r10, r2
@ -23,21 +26,23 @@ free:
ECA ECA
JALA r0, r31, 0a JALA r0, r31, 0a
main: main:
ADDI64 r254, r254, -48d ADDI64 r254, r254, -80d
ST r31, r254, 24a, 24h ST r31, r254, 48a, 32h
ADDI64 r32, r254, 0d ADDI64 r32, r254, 24d
CP r1, r32 CP r1, r32
JAL r31, r0, :new JAL r31, r0, :new
ADDI64 r33, r254, 0d
BMC r32, r33, 24h
LI64 r3, 69d LI64 r3, 69d
CP r2, r32 CP r2, r33
JAL r31, r0, :push JAL r31, r0, :push
LD r9, r254, 0a, 8h LD r12, r254, 0a, 8h
LD r33, r9, 0a, 8h LD r34, r12, 0a, 8h
CP r2, r32 CP r2, r33
JAL r31, r0, :deinit JAL r31, r0, :deinit
CP r1, r33 CP r1, r34
LD r31, r254, 24a, 24h LD r31, r254, 48a, 32h
ADDI64 r254, r254, 48d ADDI64 r254, r254, 80d
JALA r0, r31, 0a JALA r0, r31, 0a
malloc: malloc:
CP r9, r2 CP r9, r2
@ -121,6 +126,6 @@ push:
4: LD r31, r254, 0a, 72h 4: LD r31, r254, 0a, 72h
ADDI64 r254, r254, 72d ADDI64 r254, r254, 72d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 945 code size: 980
ret: 69 ret: 69
status: Ok(()) status: Ok(())

View file

@ -0,0 +1,36 @@
main:
ADDI64 r254, r254, -72d
ST r31, r254, 56a, 16h
ADDI64 r32, r254, 0d
JAL r31, r0, :opaque
ST r1, r254, 0a, 16h
LI64 r6, 4d
ADDI64 r5, r254, 40d
ADDI64 r8, r254, 16d
ST r6, r254, 40a, 8h
LI64 r7, 1d
ST r7, r254, 48a, 8h
ST r7, r254, 32a, 8h
BMC r5, r8, 16h
BMC r32, r8, 16h
LD r7, r254, 24a, 8h
LD r9, r254, 32a, 8h
ADD64 r11, r9, r7
LD r9, r254, 16a, 8h
SUB64 r1, r9, r11
LD r31, r254, 56a, 16h
ADDI64 r254, r254, 72d
JALA r0, r31, 0a
opaque:
ADDI64 r254, r254, -16d
LI64 r3, 3d
ADDI64 r2, r254, 0d
ST r3, r254, 0a, 8h
LI64 r6, 2d
ST r6, r254, 8a, 8h
LD r1, r2, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 339
ret: 0
status: Ok(())

View file

@ -1,27 +1,29 @@
main: main:
ADDI64 r254, r254, -12d ADDI64 r254, r254, -24d
ST r31, r254, 4a, 8h ST r31, r254, 8a, 16h
ADDI64 r2, r254, 0d ADDI64 r32, r254, 4d
JAL r31, r0, :random_color JAL r31, r0, :random_color
ST r1, r254, 0a, 4h ST r1, r254, 4a, 4h
LD r5, r254, 0a, 1h ADDI64 r5, r254, 0d
LD r8, r254, 1a, 1h BMC r32, r5, 4h
LD r12, r254, 2a, 1h LD r8, r254, 0a, 1h
ANDI r9, r5, 255d LD r11, r254, 1a, 1h
ANDI r1, r8, 255d LD r3, r254, 2a, 1h
LD r6, r254, 3a, 1h ANDI r12, r8, 255d
ANDI r5, r12, 255d ANDI r4, r11, 255d
ADD64 r4, r1, r9 LD r9, r254, 3a, 1h
ANDI r10, r6, 255d ANDI r8, r3, 255d
ADD64 r9, r4, r5 ADD64 r7, r4, r12
ADD64 r1, r9, r10 ANDI r1, r9, 255d
LD r31, r254, 4a, 8h ADD64 r12, r7, r8
ADDI64 r254, r254, 12d ADD64 r1, r12, r1
LD r31, r254, 8a, 16h
ADDI64 r254, r254, 24d
JALA r0, r31, 0a JALA r0, r31, 0a
random_color: random_color:
LRA r1, r0, :white LRA r1, r0, :white
LD r1, r1, 0a, 4h LD r1, r1, 0a, 4h
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 241 code size: 257
ret: 1020 ret: 1020
status: Ok(()) status: Ok(())

View file

@ -1,23 +1,24 @@
main: main:
ADDI64 r254, r254, -56d ADDI64 r254, r254, -72d
ST r31, r254, 32a, 24h ST r31, r254, 48a, 24h
LI64 r3, 4d LI64 r3, 4d
ADDI64 r2, r254, 16d ADDI64 r2, r254, 32d
ST r3, r254, 16a, 8h ST r3, r254, 32a, 8h
LI64 r32, 3d LI64 r32, 3d
ST r32, r254, 24a, 8h ST r32, r254, 40a, 8h
ADDI64 r33, r254, 0d ADDI64 r33, r254, 16d
LD r3, r2, 0a, 16h LD r3, r2, 0a, 16h
JAL r31, r0, :odher_pass JAL r31, r0, :odher_pass
ST r1, r254, 0a, 16h ST r1, r254, 16a, 16h
LD r2, r254, 8a, 8h ADDI64 r2, r254, 0d
JNE r2, r32, :0 BMC r33, r2, 16h
CP r2, r33 LD r4, r254, 8a, 8h
JNE r4, r32, :0
JAL r31, r0, :pass JAL r31, r0, :pass
JMP :1 JMP :1
0: LI64 r1, 0d 0: LI64 r1, 0d
1: LD r31, r254, 32a, 24h 1: LD r31, r254, 48a, 24h
ADDI64 r254, r254, 56d ADDI64 r254, r254, 72d
JALA r0, r31, 0a JALA r0, r31, 0a
odher_pass: odher_pass:
ADDI64 r254, r254, -16d ADDI64 r254, r254, -16d
@ -29,6 +30,6 @@ odher_pass:
pass: pass:
LD r1, r2, 0a, 8h LD r1, r2, 0a, 8h
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 305 code size: 318
ret: 4 ret: 4
status: Ok(()) status: Ok(())

View file

@ -1,17 +1,19 @@
main: main:
ADDI64 r254, r254, -24d ADDI64 r254, r254, -48d
ST r31, r254, 16a, 8h ST r31, r254, 32a, 16h
ADDI64 r3, r254, 0d ADDI64 r32, r254, 16d
LI64 r4, 0d LI64 r4, 0d
CP r3, r4 CP r3, r4
JAL r31, r0, :maina JAL r31, r0, :maina
ST r1, r254, 0a, 16h ST r1, r254, 16a, 16h
LD r8, r254, 12a, 1h ADDI64 r7, r254, 0d
LD r9, r254, 3a, 1h BMC r32, r7, 16h
SUB8 r11, r9, r8 LD r11, r254, 12a, 1h
ANDI r1, r11, 255d LD r12, r254, 3a, 1h
LD r31, r254, 16a, 8h SUB8 r2, r12, r11
ADDI64 r254, r254, 24d ANDI r1, r2, 255d
LD r31, r254, 32a, 16h
ADDI64 r254, r254, 48d
JALA r0, r31, 0a JALA r0, r31, 0a
maina: maina:
ADDI64 r254, r254, -36d ADDI64 r254, r254, -36d
@ -48,6 +50,6 @@ small_struct:
LD r1, r3, 0a, 4h LD r1, r3, 0a, 4h
ADDI64 r254, r254, 4d ADDI64 r254, r254, 4d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 498 code size: 514
ret: 2 ret: 2
status: Ok(()) status: Ok(())