fixing stack alloc overoptimization

This commit is contained in:
Jakub Doka 2024-10-24 19:57:36 +02:00
parent 648bd24d0d
commit 897e121eeb
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
6 changed files with 233 additions and 153 deletions

View file

@ -574,7 +574,36 @@ set := fn($Expr: type, src: ^Expr, dest: ^Expr, count: uint): u32 {
main := fn(): int {
return set(int, &0, &0, 1024)
}
```
#### string_flip
```hb
U := struct {u: int}
main := fn(): int {
arr := @as([U; 2 * 2], idk)
i := 0
loop if i == 2 * 2 break else {
arr[i] = .(i)
i += 1
}
i = 0
loop if i == 2 / 2 break else {
j := 0
loop if j == 2 break else {
a := i * 2 + j
b := (2 - i - 1) * 2 + j
tmp := arr[a]
arr[a] = arr[b]
arr[b] = tmp
j += 1
}
i += 1
}
return arr[0].u
}
```
#### wide_ret

View file

@ -902,8 +902,7 @@ impl Nodes {
}
if region != stack {
cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM);
continue;
break;
}
let Some(index) = unidentifed.iter().position(|&n| n == contact_point) else {
continue 'o;
@ -4467,6 +4466,7 @@ mod tests {
small_struct_bitcast;
small_struct_assignment;
intcast_store;
string_flip;
wide_ret;
comptime_min_reg_leak;
different_types;

View file

@ -1,46 +1,48 @@
main:
ADDI64 r254, r254, -12d
LI64 r5, 2d
LI64 r4, 0d
LI64 r6, 0d
LI64 r7, 255d
ADDI64 r9, r254, 0d
ST r7, r254, 0a, 1h
ST r6, r254, 1a, 1h
ST r6, r254, 2a, 1h
ST r7, r254, 3a, 1h
ST r4, r254, 4a, 4h
ST r5, r254, 8a, 4h
LD r6, r254, 8a, 4h
ADDI64 r254, r254, -24d
LI64 r6, 2d
LI64 r5, 0d
LI64 r8, 0d
LI64 r9, 255d
ADDI64 r10, r254, 0d
ADDI64 r7, r254, 12d
ST r9, r254, 12a, 1h
ST r8, r254, 13a, 1h
ST r8, r254, 14a, 1h
ST r9, r254, 15a, 1h
ST r5, r254, 16a, 4h
ST r6, r254, 20a, 4h
BMC r7, r10, 12h
LD r8, r254, 8a, 4h
ANDI r8, r8, 4294967295d
ANDI r6, r6, 4294967295d
ANDI r5, r5, 4294967295d
JEQ r6, r5, :0
JEQ r8, r6, :0
LI64 r1, 0d
JMP :1
0: LD r11, r254, 4a, 4h
ANDI r11, r11, 4294967295d
ANDI r4, r4, 4294967295d
JEQ r11, r4, :2
0: LD r2, r254, 4a, 4h
ANDI r2, r2, 4294967295d
ANDI r5, r5, 4294967295d
JEQ r2, r5, :2
LI64 r1, 64d
JMP :1
2: LD r5, r254, 3a, 1h
ANDI r7, r5, 255d
LD r4, r254, 2a, 1h
ANDI r6, r4, 255d
LD r3, r254, 1a, 1h
ANDI r5, r3, 255d
LD r1, r254, 0a, 1h
ANDI r4, r1, 255d
LD r2, r254, 8a, 4h
LD r3, r254, 4a, 4h
ADD32 r8, r2, r3
ADD32 r9, r8, r4
ADD32 r1, r9, r5
ADD32 r5, r1, r6
ADD32 r9, r5, r7
ANDI r1, r9, 4294967295d
1: ADDI64 r254, r254, 12d
2: LD r8, r254, 3a, 1h
ANDI r10, r8, 255d
LD r7, r254, 2a, 1h
ANDI r9, r7, 255d
LD r6, r254, 1a, 1h
ANDI r8, r6, 255d
LD r4, r254, 0a, 1h
ANDI r7, r4, 255d
LD r5, r254, 8a, 4h
LD r6, r254, 4a, 4h
ADD32 r11, r5, r6
ADD32 r12, r11, r7
ADD32 r4, r12, r8
ADD32 r8, r4, r9
ADD32 r12, r8, r10
ANDI r1, r12, 4294967295d
1: ADDI64 r254, r254, 24d
JALA r0, r31, 0a
code size: 433
code size: 449
ret: 512
status: Ok(())

View file

@ -0,0 +1,45 @@
main:
ADDI64 r254, r254, -40d
LI64 r6, 4d
LI64 r8, 1d
LI64 r4, 0d
ADDI64 r5, r254, 0d
CP r9, r4
6: JNE r9, r6, :0
LI64 r6, 2d
CP r7, r4
4: JNE r7, r8, :1
LD r1, r254, 0a, 8h
JMP :2
1: CP r10, r4
5: ADD64 r9, r7, r8
JNE r10, r6, :3
CP r7, r9
JMP :4
3: ADDI64 r12, r254, 32d
MUL64 r11, r7, r6
SUB64 r1, r6, r9
MUL64 r1, r1, r6
ADD64 r9, r10, r8
ADD64 r1, r1, r10
MULI64 r1, r1, 8d
ADD64 r1, r1, r5
ADD64 r2, r11, r10
MULI64 r10, r2, 8d
ADD64 r10, r10, r5
BMC r10, r12, 8h
BMC r1, r10, 8h
BMC r12, r1, 8h
CP r10, r9
JMP :5
0: ADD64 r2, r9, r8
MULI64 r12, r9, 8d
ADD64 r3, r12, r5
ST r9, r3, 0a, 8h
CP r9, r2
JMP :6
2: ADDI64 r254, r254, 40d
JALA r0, r31, 0a
code size: 274
ret: 2
status: Ok(())

View file

@ -1,41 +1,41 @@
main:
ADDI64 r254, r254, -136d
ADDI64 r254, r254, -168d
LI64 r2, 4d
LI64 r9, 1d
LI64 r6, 3d
LI64 r7, 2d
LI64 r8, 1d
LI64 r10, 0d
ADDI64 r12, r254, 112d
ADDI64 r12, r254, 116d
ST r10, r254, 116a, 1h
ST r10, r254, 117a, 1h
ST r10, r254, 118a, 1h
ST r10, r254, 119a, 1h
ST r9, r254, 112a, 1h
ST r9, r254, 113a, 1h
ST r9, r254, 114a, 1h
ST r9, r254, 115a, 1h
LD r1, r254, 112a, 1h
LD r3, r254, 116a, 1h
ADDI64 r12, r254, 144d
ADDI64 r12, r254, 148d
ST r10, r254, 148a, 1h
ST r10, r254, 149a, 1h
ST r10, r254, 150a, 1h
ST r10, r254, 151a, 1h
ST r9, r254, 144a, 1h
ST r9, r254, 145a, 1h
ST r9, r254, 146a, 1h
ST r9, r254, 147a, 1h
LD r1, r254, 144a, 1h
LD r3, r254, 148a, 1h
ADD8 r4, r1, r3
ST r4, r254, 116a, 1h
LD r9, r254, 113a, 1h
LD r10, r254, 117a, 1h
ST r4, r254, 148a, 1h
LD r9, r254, 145a, 1h
LD r10, r254, 149a, 1h
ADD8 r11, r10, r9
ST r11, r254, 117a, 1h
LD r3, r254, 114a, 1h
LD r4, r254, 118a, 1h
ST r11, r254, 149a, 1h
LD r3, r254, 146a, 1h
LD r4, r254, 150a, 1h
ADD8 r9, r4, r3
ST r9, r254, 118a, 1h
LD r10, r254, 115a, 1h
LD r11, r254, 119a, 1h
ST r9, r254, 150a, 1h
LD r10, r254, 147a, 1h
LD r11, r254, 151a, 1h
ADD8 r1, r11, r10
ST r1, r254, 119a, 1h
LD r9, r254, 119a, 1h
LD r5, r254, 118a, 1h
LD r10, r254, 116a, 1h
LD r11, r254, 117a, 1h
ST r1, r254, 151a, 1h
LD r9, r254, 151a, 1h
LD r5, r254, 150a, 1h
LD r10, r254, 148a, 1h
LD r11, r254, 149a, 1h
ADD8 r11, r5, r11
ADD8 r3, r10, r11
ADD8 r5, r9, r3
@ -44,75 +44,77 @@ main:
JEQ r5, r2, :0
LI64 r1, 1008d
JMP :1
0: LI64 r3, 0d
0: LI64 r4, 0d
LI64 r1, 4d
ADDI64 r4, r254, 0d
ADDI64 r2, r254, 16d
ADDI64 r10, r2, 16d
ADDI64 r4, r254, 48d
ADDI64 r9, r254, 64d
ADDI64 r11, r254, 96d
ST r8, r254, 48a, 8h
ST r7, r254, 56a, 8h
ST r6, r254, 0a, 8h
ST r1, r254, 8a, 8h
LD r2, r254, 48a, 8h
LD r5, r254, 0a, 8h
ADD64 r6, r5, r2
ST r6, r254, 64a, 8h
LD r11, r254, 8a, 8h
LD r12, r254, 56a, 8h
ADD64 r1, r11, r12
ST r1, r254, 72a, 8h
LD r5, r254, 0a, 8h
LD r6, r254, 48a, 8h
SUB64 r8, r5, r6
ADDI64 r9, r254, 0d
ADDI64 r5, r254, 32d
ADDI64 r5, r254, 48d
ADDI64 r12, r5, 16d
ADDI64 r5, r254, 80d
ADDI64 r2, r254, 96d
ADDI64 r11, r254, 112d
ST r8, r254, 80a, 8h
LD r12, r254, 56a, 8h
LD r1, r254, 8a, 8h
SUB64 r5, r1, r12
ST r5, r254, 88a, 8h
ST r3, r254, 96a, 8h
ST r3, r254, 104a, 8h
LD r11, r254, 96a, 8h
LD r1, r254, 0a, 8h
SUB64 r3, r11, r1
ST r3, r254, 16a, 8h
LD r7, r254, 8a, 8h
LD r8, r254, 104a, 8h
SUB64 r11, r8, r7
ST r11, r254, 24a, 8h
BMC r4, r10, 16h
LD r3, r254, 16a, 8h
LD r5, r254, 64a, 8h
ADD64 r7, r3, r5
ST r7, r254, 16a, 8h
LD r11, r254, 72a, 8h
LD r12, r254, 24a, 8h
ADD64 r2, r11, r12
ST r2, r254, 24a, 8h
LD r7, r254, 80a, 8h
LD r8, r254, 32a, 8h
ADD64 r10, r7, r8
ST r10, r254, 32a, 8h
ST r7, r254, 88a, 8h
ST r6, r254, 32a, 8h
ST r1, r254, 40a, 8h
LD r6, r254, 80a, 8h
LD r7, r254, 32a, 8h
ADD64 r8, r7, r6
ST r8, r254, 0a, 8h
LD r1, r254, 40a, 8h
LD r2, r254, 88a, 8h
ADD64 r3, r1, r2
ST r3, r254, 8a, 8h
LD r6, r254, 32a, 8h
LD r8, r254, 80a, 8h
SUB64 r10, r6, r8
ST r10, r254, 16a, 8h
LD r2, r254, 88a, 8h
LD r3, r254, 40a, 8h
SUB64 r6, r3, r2
ST r6, r254, 24a, 8h
BMC r9, r11, 32h
ST r4, r254, 96a, 8h
ST r4, r254, 104a, 8h
LD r2, r254, 96a, 8h
LD r4, r254, 32a, 8h
SUB64 r6, r2, r4
ST r6, r254, 48a, 8h
LD r10, r254, 40a, 8h
LD r11, r254, 104a, 8h
SUB64 r1, r11, r10
ST r1, r254, 56a, 8h
BMC r5, r12, 16h
LD r6, r254, 48a, 8h
LD r8, r254, 112a, 8h
ADD64 r10, r6, r8
ST r10, r254, 48a, 8h
LD r2, r254, 120a, 8h
LD r3, r254, 56a, 8h
ADD64 r5, r2, r3
ST r5, r254, 40a, 8h
LD r8, r254, 16a, 8h
LD r10, r254, 32a, 8h
ADD64 r12, r10, r8
ADDI64 r5, r254, 120d
ST r12, r254, 120a, 8h
LD r5, r254, 40a, 8h
LD r6, r254, 24a, 8h
ST r5, r254, 56a, 8h
LD r10, r254, 128a, 8h
LD r11, r254, 64a, 8h
ADD64 r1, r10, r11
ST r1, r254, 64a, 8h
LD r5, r254, 136a, 8h
LD r6, r254, 72a, 8h
ADD64 r8, r5, r6
ST r8, r254, 128a, 8h
LD r12, r254, 120a, 8h
LD r2, r254, 128a, 8h
ADD64 r1, r2, r12
1: ADDI64 r254, r254, 136d
ST r8, r254, 72a, 8h
LD r11, r254, 48a, 8h
LD r1, r254, 64a, 8h
ADD64 r3, r1, r11
ADDI64 r8, r254, 152d
ST r3, r254, 152a, 8h
LD r8, r254, 72a, 8h
LD r9, r254, 56a, 8h
ADD64 r11, r8, r9
ST r11, r254, 160a, 8h
LD r3, r254, 152a, 8h
LD r5, r254, 160a, 8h
ADD64 r1, r5, r3
1: ADDI64 r254, r254, 168d
JALA r0, r31, 0a
code size: 1231
code size: 1247
ret: 10
status: Ok(())

View file

@ -16,32 +16,34 @@ main:
ADDI64 r254, r254, 48d
JALA r0, r31, 0a
maina:
ADDI64 r254, r254, -48d
ST r31, r254, 32a, 16h
ADDI64 r32, r254, 28d
ADDI64 r254, r254, -56d
ST r31, r254, 40a, 16h
ADDI64 r32, r254, 36d
JAL r31, r0, :small_struct
ST r1, r254, 28a, 4h
ST r1, r254, 36a, 4h
LI64 r2, 1d
LI64 r4, 3d
LI64 r3, 3d
LI64 r1, 0d
ADDI64 r5, r254, 0d
ADDI64 r7, r5, 8d
ADDI64 r3, r254, 16d
ADDI64 r6, r254, 24d
BMC r32, r6, 4h
ST r1, r254, 16a, 1h
ST r1, r254, 17a, 1h
ST r1, r254, 18a, 1h
ST r4, r254, 19a, 1h
ST r2, r254, 20a, 1h
ST r1, r254, 21a, 1h
ST r1, r254, 22a, 1h
ST r1, r254, 23a, 1h
BMC r3, r7, 8h
BMC r3, r5, 8h
LD r1, r5, 0a, 16h
LD r31, r254, 32a, 16h
ADDI64 r254, r254, 48d
ADDI64 r6, r254, 0d
ADDI64 r8, r6, 8d
ADDI64 r7, r254, 16d
ADDI64 r4, r254, 24d
ADDI64 r5, r254, 32d
BMC r32, r5, 4h
ST r1, r254, 24a, 1h
ST r1, r254, 25a, 1h
ST r1, r254, 26a, 1h
ST r3, r254, 27a, 1h
ST r2, r254, 28a, 1h
ST r1, r254, 29a, 1h
ST r1, r254, 30a, 1h
ST r1, r254, 31a, 1h
BMC r4, r7, 8h
BMC r4, r6, 8h
BMC r7, r8, 8h
LD r1, r6, 0a, 16h
LD r31, r254, 40a, 16h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a
small_struct:
ADDI64 r254, r254, -4d
@ -52,6 +54,6 @@ small_struct:
LD r1, r3, 0a, 4h
ADDI64 r254, r254, 4d
JALA r0, r31, 0a
code size: 557
code size: 573
ret: 2
status: Ok(())