fixing stack alloc overoptimization

This commit is contained in:
Jakub Doka 2024-10-24 19:57:36 +02:00
parent 648bd24d0d
commit 897e121eeb
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
6 changed files with 233 additions and 153 deletions

View file

@ -574,7 +574,36 @@ set := fn($Expr: type, src: ^Expr, dest: ^Expr, count: uint): u32 {
main := fn(): int { main := fn(): int {
return set(int, &0, &0, 1024) return set(int, &0, &0, 1024)
} }
```
#### string_flip
```hb
U := struct {u: int}
main := fn(): int {
arr := @as([U; 2 * 2], idk)
i := 0
loop if i == 2 * 2 break else {
arr[i] = .(i)
i += 1
}
i = 0
loop if i == 2 / 2 break else {
j := 0
loop if j == 2 break else {
a := i * 2 + j
b := (2 - i - 1) * 2 + j
tmp := arr[a]
arr[a] = arr[b]
arr[b] = tmp
j += 1
}
i += 1
}
return arr[0].u
}
``` ```
#### wide_ret #### wide_ret

View file

@ -902,8 +902,7 @@ impl Nodes {
} }
if region != stack { if region != stack {
cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM); break;
continue;
} }
let Some(index) = unidentifed.iter().position(|&n| n == contact_point) else { let Some(index) = unidentifed.iter().position(|&n| n == contact_point) else {
continue 'o; continue 'o;
@ -4467,6 +4466,7 @@ mod tests {
small_struct_bitcast; small_struct_bitcast;
small_struct_assignment; small_struct_assignment;
intcast_store; intcast_store;
string_flip;
wide_ret; wide_ret;
comptime_min_reg_leak; comptime_min_reg_leak;
different_types; different_types;

View file

@ -1,46 +1,48 @@
main: main:
ADDI64 r254, r254, -12d ADDI64 r254, r254, -24d
LI64 r5, 2d LI64 r6, 2d
LI64 r4, 0d LI64 r5, 0d
LI64 r6, 0d LI64 r8, 0d
LI64 r7, 255d LI64 r9, 255d
ADDI64 r9, r254, 0d ADDI64 r10, r254, 0d
ST r7, r254, 0a, 1h ADDI64 r7, r254, 12d
ST r6, r254, 1a, 1h ST r9, r254, 12a, 1h
ST r6, r254, 2a, 1h ST r8, r254, 13a, 1h
ST r7, r254, 3a, 1h ST r8, r254, 14a, 1h
ST r4, r254, 4a, 4h ST r9, r254, 15a, 1h
ST r5, r254, 8a, 4h ST r5, r254, 16a, 4h
LD r6, r254, 8a, 4h ST r6, r254, 20a, 4h
BMC r7, r10, 12h
LD r8, r254, 8a, 4h
ANDI r8, r8, 4294967295d
ANDI r6, r6, 4294967295d ANDI r6, r6, 4294967295d
ANDI r5, r5, 4294967295d JEQ r8, r6, :0
JEQ r6, r5, :0
LI64 r1, 0d LI64 r1, 0d
JMP :1 JMP :1
0: LD r11, r254, 4a, 4h 0: LD r2, r254, 4a, 4h
ANDI r11, r11, 4294967295d ANDI r2, r2, 4294967295d
ANDI r4, r4, 4294967295d ANDI r5, r5, 4294967295d
JEQ r11, r4, :2 JEQ r2, r5, :2
LI64 r1, 64d LI64 r1, 64d
JMP :1 JMP :1
2: LD r5, r254, 3a, 1h 2: LD r8, r254, 3a, 1h
ANDI r7, r5, 255d ANDI r10, r8, 255d
LD r4, r254, 2a, 1h LD r7, r254, 2a, 1h
ANDI r6, r4, 255d ANDI r9, r7, 255d
LD r3, r254, 1a, 1h LD r6, r254, 1a, 1h
ANDI r5, r3, 255d ANDI r8, r6, 255d
LD r1, r254, 0a, 1h LD r4, r254, 0a, 1h
ANDI r4, r1, 255d ANDI r7, r4, 255d
LD r2, r254, 8a, 4h LD r5, r254, 8a, 4h
LD r3, r254, 4a, 4h LD r6, r254, 4a, 4h
ADD32 r8, r2, r3 ADD32 r11, r5, r6
ADD32 r9, r8, r4 ADD32 r12, r11, r7
ADD32 r1, r9, r5 ADD32 r4, r12, r8
ADD32 r5, r1, r6 ADD32 r8, r4, r9
ADD32 r9, r5, r7 ADD32 r12, r8, r10
ANDI r1, r9, 4294967295d ANDI r1, r12, 4294967295d
1: ADDI64 r254, r254, 12d 1: ADDI64 r254, r254, 24d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 433 code size: 449
ret: 512 ret: 512
status: Ok(()) status: Ok(())

View file

@ -0,0 +1,45 @@
main:
ADDI64 r254, r254, -40d
LI64 r6, 4d
LI64 r8, 1d
LI64 r4, 0d
ADDI64 r5, r254, 0d
CP r9, r4
6: JNE r9, r6, :0
LI64 r6, 2d
CP r7, r4
4: JNE r7, r8, :1
LD r1, r254, 0a, 8h
JMP :2
1: CP r10, r4
5: ADD64 r9, r7, r8
JNE r10, r6, :3
CP r7, r9
JMP :4
3: ADDI64 r12, r254, 32d
MUL64 r11, r7, r6
SUB64 r1, r6, r9
MUL64 r1, r1, r6
ADD64 r9, r10, r8
ADD64 r1, r1, r10
MULI64 r1, r1, 8d
ADD64 r1, r1, r5
ADD64 r2, r11, r10
MULI64 r10, r2, 8d
ADD64 r10, r10, r5
BMC r10, r12, 8h
BMC r1, r10, 8h
BMC r12, r1, 8h
CP r10, r9
JMP :5
0: ADD64 r2, r9, r8
MULI64 r12, r9, 8d
ADD64 r3, r12, r5
ST r9, r3, 0a, 8h
CP r9, r2
JMP :6
2: ADDI64 r254, r254, 40d
JALA r0, r31, 0a
code size: 274
ret: 2
status: Ok(())

View file

@ -1,41 +1,41 @@
main: main:
ADDI64 r254, r254, -136d ADDI64 r254, r254, -168d
LI64 r2, 4d LI64 r2, 4d
LI64 r9, 1d LI64 r9, 1d
LI64 r6, 3d LI64 r6, 3d
LI64 r7, 2d LI64 r7, 2d
LI64 r8, 1d LI64 r8, 1d
LI64 r10, 0d LI64 r10, 0d
ADDI64 r12, r254, 112d ADDI64 r12, r254, 144d
ADDI64 r12, r254, 116d ADDI64 r12, r254, 148d
ST r10, r254, 116a, 1h ST r10, r254, 148a, 1h
ST r10, r254, 117a, 1h ST r10, r254, 149a, 1h
ST r10, r254, 118a, 1h ST r10, r254, 150a, 1h
ST r10, r254, 119a, 1h ST r10, r254, 151a, 1h
ST r9, r254, 112a, 1h ST r9, r254, 144a, 1h
ST r9, r254, 113a, 1h ST r9, r254, 145a, 1h
ST r9, r254, 114a, 1h ST r9, r254, 146a, 1h
ST r9, r254, 115a, 1h ST r9, r254, 147a, 1h
LD r1, r254, 112a, 1h LD r1, r254, 144a, 1h
LD r3, r254, 116a, 1h LD r3, r254, 148a, 1h
ADD8 r4, r1, r3 ADD8 r4, r1, r3
ST r4, r254, 116a, 1h ST r4, r254, 148a, 1h
LD r9, r254, 113a, 1h LD r9, r254, 145a, 1h
LD r10, r254, 117a, 1h LD r10, r254, 149a, 1h
ADD8 r11, r10, r9 ADD8 r11, r10, r9
ST r11, r254, 117a, 1h ST r11, r254, 149a, 1h
LD r3, r254, 114a, 1h LD r3, r254, 146a, 1h
LD r4, r254, 118a, 1h LD r4, r254, 150a, 1h
ADD8 r9, r4, r3 ADD8 r9, r4, r3
ST r9, r254, 118a, 1h ST r9, r254, 150a, 1h
LD r10, r254, 115a, 1h LD r10, r254, 147a, 1h
LD r11, r254, 119a, 1h LD r11, r254, 151a, 1h
ADD8 r1, r11, r10 ADD8 r1, r11, r10
ST r1, r254, 119a, 1h ST r1, r254, 151a, 1h
LD r9, r254, 119a, 1h LD r9, r254, 151a, 1h
LD r5, r254, 118a, 1h LD r5, r254, 150a, 1h
LD r10, r254, 116a, 1h LD r10, r254, 148a, 1h
LD r11, r254, 117a, 1h LD r11, r254, 149a, 1h
ADD8 r11, r5, r11 ADD8 r11, r5, r11
ADD8 r3, r10, r11 ADD8 r3, r10, r11
ADD8 r5, r9, r3 ADD8 r5, r9, r3
@ -44,75 +44,77 @@ main:
JEQ r5, r2, :0 JEQ r5, r2, :0
LI64 r1, 1008d LI64 r1, 1008d
JMP :1 JMP :1
0: LI64 r3, 0d 0: LI64 r4, 0d
LI64 r1, 4d LI64 r1, 4d
ADDI64 r4, r254, 0d ADDI64 r9, r254, 0d
ADDI64 r2, r254, 16d ADDI64 r5, r254, 32d
ADDI64 r10, r2, 16d ADDI64 r5, r254, 48d
ADDI64 r4, r254, 48d ADDI64 r12, r5, 16d
ADDI64 r9, r254, 64d ADDI64 r5, r254, 80d
ADDI64 r11, r254, 96d ADDI64 r2, r254, 96d
ST r8, r254, 48a, 8h ADDI64 r11, r254, 112d
ST r7, r254, 56a, 8h
ST r6, r254, 0a, 8h
ST r1, r254, 8a, 8h
LD r2, r254, 48a, 8h
LD r5, r254, 0a, 8h
ADD64 r6, r5, r2
ST r6, r254, 64a, 8h
LD r11, r254, 8a, 8h
LD r12, r254, 56a, 8h
ADD64 r1, r11, r12
ST r1, r254, 72a, 8h
LD r5, r254, 0a, 8h
LD r6, r254, 48a, 8h
SUB64 r8, r5, r6
ST r8, r254, 80a, 8h ST r8, r254, 80a, 8h
LD r12, r254, 56a, 8h ST r7, r254, 88a, 8h
LD r1, r254, 8a, 8h ST r6, r254, 32a, 8h
SUB64 r5, r1, r12 ST r1, r254, 40a, 8h
ST r5, r254, 88a, 8h LD r6, r254, 80a, 8h
ST r3, r254, 96a, 8h LD r7, r254, 32a, 8h
ST r3, r254, 104a, 8h ADD64 r8, r7, r6
LD r11, r254, 96a, 8h ST r8, r254, 0a, 8h
LD r1, r254, 0a, 8h LD r1, r254, 40a, 8h
SUB64 r3, r11, r1 LD r2, r254, 88a, 8h
ST r3, r254, 16a, 8h ADD64 r3, r1, r2
LD r7, r254, 8a, 8h ST r3, r254, 8a, 8h
LD r8, r254, 104a, 8h LD r6, r254, 32a, 8h
SUB64 r11, r8, r7 LD r8, r254, 80a, 8h
ST r11, r254, 24a, 8h SUB64 r10, r6, r8
BMC r4, r10, 16h ST r10, r254, 16a, 8h
LD r3, r254, 16a, 8h
LD r5, r254, 64a, 8h
ADD64 r7, r3, r5
ST r7, r254, 16a, 8h
LD r11, r254, 72a, 8h
LD r12, r254, 24a, 8h
ADD64 r2, r11, r12
ST r2, r254, 24a, 8h
LD r7, r254, 80a, 8h
LD r8, r254, 32a, 8h
ADD64 r10, r7, r8
ST r10, r254, 32a, 8h
LD r2, r254, 88a, 8h LD r2, r254, 88a, 8h
LD r3, r254, 40a, 8h LD r3, r254, 40a, 8h
SUB64 r6, r3, r2
ST r6, r254, 24a, 8h
BMC r9, r11, 32h
ST r4, r254, 96a, 8h
ST r4, r254, 104a, 8h
LD r2, r254, 96a, 8h
LD r4, r254, 32a, 8h
SUB64 r6, r2, r4
ST r6, r254, 48a, 8h
LD r10, r254, 40a, 8h
LD r11, r254, 104a, 8h
SUB64 r1, r11, r10
ST r1, r254, 56a, 8h
BMC r5, r12, 16h
LD r6, r254, 48a, 8h
LD r8, r254, 112a, 8h
ADD64 r10, r6, r8
ST r10, r254, 48a, 8h
LD r2, r254, 120a, 8h
LD r3, r254, 56a, 8h
ADD64 r5, r2, r3 ADD64 r5, r2, r3
ST r5, r254, 40a, 8h ST r5, r254, 56a, 8h
LD r8, r254, 16a, 8h LD r10, r254, 128a, 8h
LD r10, r254, 32a, 8h LD r11, r254, 64a, 8h
ADD64 r12, r10, r8 ADD64 r1, r10, r11
ADDI64 r5, r254, 120d ST r1, r254, 64a, 8h
ST r12, r254, 120a, 8h LD r5, r254, 136a, 8h
LD r5, r254, 40a, 8h LD r6, r254, 72a, 8h
LD r6, r254, 24a, 8h
ADD64 r8, r5, r6 ADD64 r8, r5, r6
ST r8, r254, 128a, 8h ST r8, r254, 72a, 8h
LD r12, r254, 120a, 8h LD r11, r254, 48a, 8h
LD r2, r254, 128a, 8h LD r1, r254, 64a, 8h
ADD64 r1, r2, r12 ADD64 r3, r1, r11
1: ADDI64 r254, r254, 136d ADDI64 r8, r254, 152d
ST r3, r254, 152a, 8h
LD r8, r254, 72a, 8h
LD r9, r254, 56a, 8h
ADD64 r11, r8, r9
ST r11, r254, 160a, 8h
LD r3, r254, 152a, 8h
LD r5, r254, 160a, 8h
ADD64 r1, r5, r3
1: ADDI64 r254, r254, 168d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 1231 code size: 1247
ret: 10 ret: 10
status: Ok(()) status: Ok(())

View file

@ -16,32 +16,34 @@ main:
ADDI64 r254, r254, 48d ADDI64 r254, r254, 48d
JALA r0, r31, 0a JALA r0, r31, 0a
maina: maina:
ADDI64 r254, r254, -48d ADDI64 r254, r254, -56d
ST r31, r254, 32a, 16h ST r31, r254, 40a, 16h
ADDI64 r32, r254, 28d ADDI64 r32, r254, 36d
JAL r31, r0, :small_struct JAL r31, r0, :small_struct
ST r1, r254, 28a, 4h ST r1, r254, 36a, 4h
LI64 r2, 1d LI64 r2, 1d
LI64 r4, 3d LI64 r3, 3d
LI64 r1, 0d LI64 r1, 0d
ADDI64 r5, r254, 0d ADDI64 r6, r254, 0d
ADDI64 r7, r5, 8d ADDI64 r8, r6, 8d
ADDI64 r3, r254, 16d ADDI64 r7, r254, 16d
ADDI64 r6, r254, 24d ADDI64 r4, r254, 24d
BMC r32, r6, 4h ADDI64 r5, r254, 32d
ST r1, r254, 16a, 1h BMC r32, r5, 4h
ST r1, r254, 17a, 1h ST r1, r254, 24a, 1h
ST r1, r254, 18a, 1h ST r1, r254, 25a, 1h
ST r4, r254, 19a, 1h ST r1, r254, 26a, 1h
ST r2, r254, 20a, 1h ST r3, r254, 27a, 1h
ST r1, r254, 21a, 1h ST r2, r254, 28a, 1h
ST r1, r254, 22a, 1h ST r1, r254, 29a, 1h
ST r1, r254, 23a, 1h ST r1, r254, 30a, 1h
BMC r3, r7, 8h ST r1, r254, 31a, 1h
BMC r3, r5, 8h BMC r4, r7, 8h
LD r1, r5, 0a, 16h BMC r4, r6, 8h
LD r31, r254, 32a, 16h BMC r7, r8, 8h
ADDI64 r254, r254, 48d LD r1, r6, 0a, 16h
LD r31, r254, 40a, 16h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a JALA r0, r31, 0a
small_struct: small_struct:
ADDI64 r254, r254, -4d ADDI64 r254, r254, -4d
@ -52,6 +54,6 @@ small_struct:
LD r1, r3, 0a, 4h LD r1, r3, 0a, 4h
ADDI64 r254, r254, 4d ADDI64 r254, r254, 4d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 557 code size: 573
ret: 2 ret: 2
status: Ok(()) status: Ok(())