From 897e121eeb6fb20afff606bb6826736ee801f60c Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Thu, 24 Oct 2024 19:57:36 +0200 Subject: [PATCH] fixing stack alloc overoptimization --- lang/README.md | 29 ++++ lang/src/son.rs | 4 +- lang/tests/son_tests_different_types.txt | 76 ++++----- lang/tests/son_tests_string_flip.txt | 45 ++++++ lang/tests/son_tests_struct_operators.txt | 182 +++++++++++----------- lang/tests/son_tests_wide_ret.txt | 50 +++--- 6 files changed, 233 insertions(+), 153 deletions(-) create mode 100644 lang/tests/son_tests_string_flip.txt diff --git a/lang/README.md b/lang/README.md index 28053c4..68f0230 100644 --- a/lang/README.md +++ b/lang/README.md @@ -574,7 +574,36 @@ set := fn($Expr: type, src: ^Expr, dest: ^Expr, count: uint): u32 { main := fn(): int { return set(int, &0, &0, 1024) } +``` +#### string_flip +```hb +U := struct {u: int} +main := fn(): int { + arr := @as([U; 2 * 2], idk) + + i := 0 + loop if i == 2 * 2 break else { + arr[i] = .(i) + i += 1 + } + + i = 0 + loop if i == 2 / 2 break else { + j := 0 + loop if j == 2 break else { + a := i * 2 + j + b := (2 - i - 1) * 2 + j + tmp := arr[a] + arr[a] = arr[b] + arr[b] = tmp + j += 1 + } + i += 1 + } + + return arr[0].u +} ``` #### wide_ret diff --git a/lang/src/son.rs b/lang/src/son.rs index 953154f..c10a41e 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -902,8 +902,7 @@ impl Nodes { } if region != stack { - cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM); - continue; + break; } let Some(index) = unidentifed.iter().position(|&n| n == contact_point) else { continue 'o; @@ -4467,6 +4466,7 @@ mod tests { small_struct_bitcast; small_struct_assignment; intcast_store; + string_flip; wide_ret; comptime_min_reg_leak; different_types; diff --git a/lang/tests/son_tests_different_types.txt b/lang/tests/son_tests_different_types.txt index 4be6576..45dfca4 100644 --- a/lang/tests/son_tests_different_types.txt +++ b/lang/tests/son_tests_different_types.txt @@ -1,46 +1,48 @@ main: - ADDI64 r254, r254, -12d - LI64 r5, 2d - LI64 r4, 0d - LI64 r6, 0d - LI64 r7, 255d - ADDI64 r9, r254, 0d - ST r7, r254, 0a, 1h - ST r6, r254, 1a, 1h - ST r6, r254, 2a, 1h - ST r7, r254, 3a, 1h - ST r4, r254, 4a, 4h - ST r5, r254, 8a, 4h - LD r6, r254, 8a, 4h + ADDI64 r254, r254, -24d + LI64 r6, 2d + LI64 r5, 0d + LI64 r8, 0d + LI64 r9, 255d + ADDI64 r10, r254, 0d + ADDI64 r7, r254, 12d + ST r9, r254, 12a, 1h + ST r8, r254, 13a, 1h + ST r8, r254, 14a, 1h + ST r9, r254, 15a, 1h + ST r5, r254, 16a, 4h + ST r6, r254, 20a, 4h + BMC r7, r10, 12h + LD r8, r254, 8a, 4h + ANDI r8, r8, 4294967295d ANDI r6, r6, 4294967295d - ANDI r5, r5, 4294967295d - JEQ r6, r5, :0 + JEQ r8, r6, :0 LI64 r1, 0d JMP :1 - 0: LD r11, r254, 4a, 4h - ANDI r11, r11, 4294967295d - ANDI r4, r4, 4294967295d - JEQ r11, r4, :2 + 0: LD r2, r254, 4a, 4h + ANDI r2, r2, 4294967295d + ANDI r5, r5, 4294967295d + JEQ r2, r5, :2 LI64 r1, 64d JMP :1 - 2: LD r5, r254, 3a, 1h - ANDI r7, r5, 255d - LD r4, r254, 2a, 1h - ANDI r6, r4, 255d - LD r3, r254, 1a, 1h - ANDI r5, r3, 255d - LD r1, r254, 0a, 1h - ANDI r4, r1, 255d - LD r2, r254, 8a, 4h - LD r3, r254, 4a, 4h - ADD32 r8, r2, r3 - ADD32 r9, r8, r4 - ADD32 r1, r9, r5 - ADD32 r5, r1, r6 - ADD32 r9, r5, r7 - ANDI r1, r9, 4294967295d - 1: ADDI64 r254, r254, 12d + 2: LD r8, r254, 3a, 1h + ANDI r10, r8, 255d + LD r7, r254, 2a, 1h + ANDI r9, r7, 255d + LD r6, r254, 1a, 1h + ANDI r8, r6, 255d + LD r4, r254, 0a, 1h + ANDI r7, r4, 255d + LD r5, r254, 8a, 4h + LD r6, r254, 4a, 4h + ADD32 r11, r5, r6 + ADD32 r12, r11, r7 + ADD32 r4, r12, r8 + ADD32 r8, r4, r9 + ADD32 r12, r8, r10 + ANDI r1, r12, 4294967295d + 1: ADDI64 r254, r254, 24d JALA r0, r31, 0a -code size: 433 +code size: 449 ret: 512 status: Ok(()) diff --git a/lang/tests/son_tests_string_flip.txt b/lang/tests/son_tests_string_flip.txt new file mode 100644 index 0000000..34fba88 --- /dev/null +++ b/lang/tests/son_tests_string_flip.txt @@ -0,0 +1,45 @@ +main: + ADDI64 r254, r254, -40d + LI64 r6, 4d + LI64 r8, 1d + LI64 r4, 0d + ADDI64 r5, r254, 0d + CP r9, r4 + 6: JNE r9, r6, :0 + LI64 r6, 2d + CP r7, r4 + 4: JNE r7, r8, :1 + LD r1, r254, 0a, 8h + JMP :2 + 1: CP r10, r4 + 5: ADD64 r9, r7, r8 + JNE r10, r6, :3 + CP r7, r9 + JMP :4 + 3: ADDI64 r12, r254, 32d + MUL64 r11, r7, r6 + SUB64 r1, r6, r9 + MUL64 r1, r1, r6 + ADD64 r9, r10, r8 + ADD64 r1, r1, r10 + MULI64 r1, r1, 8d + ADD64 r1, r1, r5 + ADD64 r2, r11, r10 + MULI64 r10, r2, 8d + ADD64 r10, r10, r5 + BMC r10, r12, 8h + BMC r1, r10, 8h + BMC r12, r1, 8h + CP r10, r9 + JMP :5 + 0: ADD64 r2, r9, r8 + MULI64 r12, r9, 8d + ADD64 r3, r12, r5 + ST r9, r3, 0a, 8h + CP r9, r2 + JMP :6 + 2: ADDI64 r254, r254, 40d + JALA r0, r31, 0a +code size: 274 +ret: 2 +status: Ok(()) diff --git a/lang/tests/son_tests_struct_operators.txt b/lang/tests/son_tests_struct_operators.txt index b311e0b..b47caf9 100644 --- a/lang/tests/son_tests_struct_operators.txt +++ b/lang/tests/son_tests_struct_operators.txt @@ -1,41 +1,41 @@ main: - ADDI64 r254, r254, -136d + ADDI64 r254, r254, -168d LI64 r2, 4d LI64 r9, 1d LI64 r6, 3d LI64 r7, 2d LI64 r8, 1d LI64 r10, 0d - ADDI64 r12, r254, 112d - ADDI64 r12, r254, 116d - ST r10, r254, 116a, 1h - ST r10, r254, 117a, 1h - ST r10, r254, 118a, 1h - ST r10, r254, 119a, 1h - ST r9, r254, 112a, 1h - ST r9, r254, 113a, 1h - ST r9, r254, 114a, 1h - ST r9, r254, 115a, 1h - LD r1, r254, 112a, 1h - LD r3, r254, 116a, 1h + ADDI64 r12, r254, 144d + ADDI64 r12, r254, 148d + ST r10, r254, 148a, 1h + ST r10, r254, 149a, 1h + ST r10, r254, 150a, 1h + ST r10, r254, 151a, 1h + ST r9, r254, 144a, 1h + ST r9, r254, 145a, 1h + ST r9, r254, 146a, 1h + ST r9, r254, 147a, 1h + LD r1, r254, 144a, 1h + LD r3, r254, 148a, 1h ADD8 r4, r1, r3 - ST r4, r254, 116a, 1h - LD r9, r254, 113a, 1h - LD r10, r254, 117a, 1h + ST r4, r254, 148a, 1h + LD r9, r254, 145a, 1h + LD r10, r254, 149a, 1h ADD8 r11, r10, r9 - ST r11, r254, 117a, 1h - LD r3, r254, 114a, 1h - LD r4, r254, 118a, 1h + ST r11, r254, 149a, 1h + LD r3, r254, 146a, 1h + LD r4, r254, 150a, 1h ADD8 r9, r4, r3 - ST r9, r254, 118a, 1h - LD r10, r254, 115a, 1h - LD r11, r254, 119a, 1h + ST r9, r254, 150a, 1h + LD r10, r254, 147a, 1h + LD r11, r254, 151a, 1h ADD8 r1, r11, r10 - ST r1, r254, 119a, 1h - LD r9, r254, 119a, 1h - LD r5, r254, 118a, 1h - LD r10, r254, 116a, 1h - LD r11, r254, 117a, 1h + ST r1, r254, 151a, 1h + LD r9, r254, 151a, 1h + LD r5, r254, 150a, 1h + LD r10, r254, 148a, 1h + LD r11, r254, 149a, 1h ADD8 r11, r5, r11 ADD8 r3, r10, r11 ADD8 r5, r9, r3 @@ -44,75 +44,77 @@ main: JEQ r5, r2, :0 LI64 r1, 1008d JMP :1 - 0: LI64 r3, 0d + 0: LI64 r4, 0d LI64 r1, 4d - ADDI64 r4, r254, 0d - ADDI64 r2, r254, 16d - ADDI64 r10, r2, 16d - ADDI64 r4, r254, 48d - ADDI64 r9, r254, 64d - ADDI64 r11, r254, 96d - ST r8, r254, 48a, 8h - ST r7, r254, 56a, 8h - ST r6, r254, 0a, 8h - ST r1, r254, 8a, 8h - LD r2, r254, 48a, 8h - LD r5, r254, 0a, 8h - ADD64 r6, r5, r2 - ST r6, r254, 64a, 8h - LD r11, r254, 8a, 8h - LD r12, r254, 56a, 8h - ADD64 r1, r11, r12 - ST r1, r254, 72a, 8h - LD r5, r254, 0a, 8h - LD r6, r254, 48a, 8h - SUB64 r8, r5, r6 + ADDI64 r9, r254, 0d + ADDI64 r5, r254, 32d + ADDI64 r5, r254, 48d + ADDI64 r12, r5, 16d + ADDI64 r5, r254, 80d + ADDI64 r2, r254, 96d + ADDI64 r11, r254, 112d ST r8, r254, 80a, 8h - LD r12, r254, 56a, 8h - LD r1, r254, 8a, 8h - SUB64 r5, r1, r12 - ST r5, r254, 88a, 8h - ST r3, r254, 96a, 8h - ST r3, r254, 104a, 8h - LD r11, r254, 96a, 8h - LD r1, r254, 0a, 8h - SUB64 r3, r11, r1 - ST r3, r254, 16a, 8h - LD r7, r254, 8a, 8h - LD r8, r254, 104a, 8h - SUB64 r11, r8, r7 - ST r11, r254, 24a, 8h - BMC r4, r10, 16h - LD r3, r254, 16a, 8h - LD r5, r254, 64a, 8h - ADD64 r7, r3, r5 - ST r7, r254, 16a, 8h - LD r11, r254, 72a, 8h - LD r12, r254, 24a, 8h - ADD64 r2, r11, r12 - ST r2, r254, 24a, 8h - LD r7, r254, 80a, 8h - LD r8, r254, 32a, 8h - ADD64 r10, r7, r8 - ST r10, r254, 32a, 8h + ST r7, r254, 88a, 8h + ST r6, r254, 32a, 8h + ST r1, r254, 40a, 8h + LD r6, r254, 80a, 8h + LD r7, r254, 32a, 8h + ADD64 r8, r7, r6 + ST r8, r254, 0a, 8h + LD r1, r254, 40a, 8h + LD r2, r254, 88a, 8h + ADD64 r3, r1, r2 + ST r3, r254, 8a, 8h + LD r6, r254, 32a, 8h + LD r8, r254, 80a, 8h + SUB64 r10, r6, r8 + ST r10, r254, 16a, 8h LD r2, r254, 88a, 8h LD r3, r254, 40a, 8h + SUB64 r6, r3, r2 + ST r6, r254, 24a, 8h + BMC r9, r11, 32h + ST r4, r254, 96a, 8h + ST r4, r254, 104a, 8h + LD r2, r254, 96a, 8h + LD r4, r254, 32a, 8h + SUB64 r6, r2, r4 + ST r6, r254, 48a, 8h + LD r10, r254, 40a, 8h + LD r11, r254, 104a, 8h + SUB64 r1, r11, r10 + ST r1, r254, 56a, 8h + BMC r5, r12, 16h + LD r6, r254, 48a, 8h + LD r8, r254, 112a, 8h + ADD64 r10, r6, r8 + ST r10, r254, 48a, 8h + LD r2, r254, 120a, 8h + LD r3, r254, 56a, 8h ADD64 r5, r2, r3 - ST r5, r254, 40a, 8h - LD r8, r254, 16a, 8h - LD r10, r254, 32a, 8h - ADD64 r12, r10, r8 - ADDI64 r5, r254, 120d - ST r12, r254, 120a, 8h - LD r5, r254, 40a, 8h - LD r6, r254, 24a, 8h + ST r5, r254, 56a, 8h + LD r10, r254, 128a, 8h + LD r11, r254, 64a, 8h + ADD64 r1, r10, r11 + ST r1, r254, 64a, 8h + LD r5, r254, 136a, 8h + LD r6, r254, 72a, 8h ADD64 r8, r5, r6 - ST r8, r254, 128a, 8h - LD r12, r254, 120a, 8h - LD r2, r254, 128a, 8h - ADD64 r1, r2, r12 - 1: ADDI64 r254, r254, 136d + ST r8, r254, 72a, 8h + LD r11, r254, 48a, 8h + LD r1, r254, 64a, 8h + ADD64 r3, r1, r11 + ADDI64 r8, r254, 152d + ST r3, r254, 152a, 8h + LD r8, r254, 72a, 8h + LD r9, r254, 56a, 8h + ADD64 r11, r8, r9 + ST r11, r254, 160a, 8h + LD r3, r254, 152a, 8h + LD r5, r254, 160a, 8h + ADD64 r1, r5, r3 + 1: ADDI64 r254, r254, 168d JALA r0, r31, 0a -code size: 1231 +code size: 1247 ret: 10 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index 31815c9..55cd32a 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -16,32 +16,34 @@ main: ADDI64 r254, r254, 48d JALA r0, r31, 0a maina: - ADDI64 r254, r254, -48d - ST r31, r254, 32a, 16h - ADDI64 r32, r254, 28d + ADDI64 r254, r254, -56d + ST r31, r254, 40a, 16h + ADDI64 r32, r254, 36d JAL r31, r0, :small_struct - ST r1, r254, 28a, 4h + ST r1, r254, 36a, 4h LI64 r2, 1d - LI64 r4, 3d + LI64 r3, 3d LI64 r1, 0d - ADDI64 r5, r254, 0d - ADDI64 r7, r5, 8d - ADDI64 r3, r254, 16d - ADDI64 r6, r254, 24d - BMC r32, r6, 4h - ST r1, r254, 16a, 1h - ST r1, r254, 17a, 1h - ST r1, r254, 18a, 1h - ST r4, r254, 19a, 1h - ST r2, r254, 20a, 1h - ST r1, r254, 21a, 1h - ST r1, r254, 22a, 1h - ST r1, r254, 23a, 1h - BMC r3, r7, 8h - BMC r3, r5, 8h - LD r1, r5, 0a, 16h - LD r31, r254, 32a, 16h - ADDI64 r254, r254, 48d + ADDI64 r6, r254, 0d + ADDI64 r8, r6, 8d + ADDI64 r7, r254, 16d + ADDI64 r4, r254, 24d + ADDI64 r5, r254, 32d + BMC r32, r5, 4h + ST r1, r254, 24a, 1h + ST r1, r254, 25a, 1h + ST r1, r254, 26a, 1h + ST r3, r254, 27a, 1h + ST r2, r254, 28a, 1h + ST r1, r254, 29a, 1h + ST r1, r254, 30a, 1h + ST r1, r254, 31a, 1h + BMC r4, r7, 8h + BMC r4, r6, 8h + BMC r7, r8, 8h + LD r1, r6, 0a, 16h + LD r31, r254, 40a, 16h + ADDI64 r254, r254, 56d JALA r0, r31, 0a small_struct: ADDI64 r254, r254, -4d @@ -52,6 +54,6 @@ small_struct: LD r1, r3, 0a, 4h ADDI64 r254, r254, 4d JALA r0, r31, 0a -code size: 557 +code size: 573 ret: 2 status: Ok(())