From cfd3eac0a832b61abe57a047aeb14ff8290dd8e6 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Thu, 19 Dec 2024 11:00:19 +0100 Subject: [PATCH] making the instruction scheduling smarter the instructions that are only depended by phis are pushed to the end of the block, which usually saves copy instructions Signed-off-by: Jakub Doka --- lang/src/nodes.rs | 12 +- ..._tests_different_function_destinations.txt | 7 +- .../son_tests_exhaustive_loop_testing.txt | 4 +- lang/tests/son_tests_generic_types.txt | 132 +++++++++--------- lang/tests/son_tests_idk.txt | 21 ++- lang/tests/son_tests_inlining_issues.txt | 4 +- lang/tests/son_tests_request_page.txt | 2 +- .../son_tests_sort_something_viredly.txt | 2 +- lang/tests/son_tests_string_flip.txt | 53 ++++--- .../tests/son_tests_tests_ptr_to_ptr_copy.txt | 32 ++--- ...s_triggering_store_in_divergent_branch.txt | 4 +- 11 files changed, 132 insertions(+), 141 deletions(-) diff --git a/lang/src/nodes.rs b/lang/src/nodes.rs index 16d9835..afd5be1 100644 --- a/lang/src/nodes.rs +++ b/lang/src/nodes.rs @@ -295,9 +295,7 @@ impl Nodes { continue; } let mut cursor = buf.len(); - for &o in outputs.iter().filter(|&&n| n == o) { - buf.push(o); - } + buf.push(o); while let Some(&n) = buf.get(cursor) { for &i in &self[n].inputs[1..] { if fromc == self[i].inputs.first() @@ -308,15 +306,17 @@ impl Nodes { }) && seen.set(i) { - for &o in outputs.iter().filter(|&&n| n == i) { - buf.push(o); - } + buf.push(i); } } cursor += 1; } } + buf[1..].sort_by_key(|&n| { + self[n].has_no_value() || !self[n].outputs.iter().all(|&o| self[o].kind == Kind::Phi) + }); + debug_assert_eq!( outputs.iter().filter(|&&n| !seen.get(n)).copied().collect::>(), vec![], diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index d86d005..169ad91 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -48,10 +48,9 @@ main: JMP :1 6: CP r1, r0 JMP :1 - 5: ADDI64 r34, r32, 16d - ST r0, r32, 0a, 8h + 5: ST r0, r32, 0a, 8h ST r0, r32, 8a, 8h - CP r32, r34 + ADDI64 r32, r32, 16d JMP :7 3: JAL r31, r0, :new_stru ST r1, r32, 0a, 16h @@ -67,6 +66,6 @@ new_stru: LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 658 +code size: 655 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_exhaustive_loop_testing.txt b/lang/tests/son_tests_exhaustive_loop_testing.txt index 2e9d153..dee5838 100644 --- a/lang/tests/son_tests_exhaustive_loop_testing.txt +++ b/lang/tests/son_tests_exhaustive_loop_testing.txt @@ -1,10 +1,10 @@ continue_and_state_change: CP r13, r2 - CP r15, r0 LI64 r16, 3d - LI64 r14, 4d LI64 r17, 2d LI64 r18, 10d + CP r15, r0 + LI64 r14, 4d 6: JLTU r13, r18, :0 JMP :1 0: JNE r13, r17, :2 diff --git a/lang/tests/son_tests_generic_types.txt b/lang/tests/son_tests_generic_types.txt index 9f1181a..072fb29 100644 --- a/lang/tests/son_tests_generic_types.txt +++ b/lang/tests/son_tests_generic_types.txt @@ -112,51 +112,49 @@ new: push: ADDI64 r254, r254, -80d ST r31, r254, 0a, 80h - CP r38, r2 - CP r39, r3 - LI64 r37, 1d - LD r33, r38, 8a, 8h - LD r32, r38, 16a, 8h + CP r36, r2 + CP r37, r3 + LI64 r35, 1d + LD r33, r36, 8a, 8h + LD r32, r36, 16a, 8h JNE r32, r33, :0 JNE r32, r0, :1 - CP r32, r37 + CP r32, r35 JMP :2 1: MULI64 r32, r32, 2d 2: CP r2, r32 - CP r3, r37 + CP r3, r35 JAL r31, r0, :malloc - ST r32, r38, 16a, 8h - CP r35, r1 - JNE r35, r0, :3 + ST r32, r36, 16a, 8h + CP r34, r1 + JNE r34, r0, :3 CP r1, r0 JMP :4 - 3: LD r32, r38, 0a, 8h - ADD64 r40, r33, r32 - CP r34, r35 - 7: LD r33, r38, 0a, 8h - LD r36, r38, 8a, 8h - JNE r40, r32, :5 - JEQ r36, r0, :6 - CP r2, r33 - CP r3, r36 - CP r4, r37 + 3: LD r32, r36, 0a, 8h + ADD64 r38, r33, r32 + CP r33, r34 + 7: LD r39, r36, 0a, 8h + LD r40, r36, 8a, 8h + JNE r38, r32, :5 + JEQ r40, r0, :6 + CP r2, r39 + CP r3, r40 + CP r4, r35 JAL r31, r0, :free JMP :6 - 6: ST r35, r38, 0a, 8h + 6: ST r34, r36, 0a, 8h JMP :0 - 5: ADDI64 r36, r34, 1d - ADDI64 r33, r32, 1d - LD r32, r32, 0a, 1h - ST r32, r34, 0a, 1h - CP r32, r33 - CP r34, r36 - JMP :7 - 0: LD r32, r38, 8a, 8h - LD r33, r38, 0a, 8h - ADD64 r33, r32, r33 + 5: LD r39, r32, 0a, 1h ST r39, r33, 0a, 1h - ADD64 r32, r32, r37 - ST r32, r38, 8a, 8h + ADDI64 r33, r33, 1d + ADDI64 r32, r32, 1d + JMP :7 + 0: LD r32, r36, 8a, 8h + LD r33, r36, 0a, 8h + ADD64 r33, r32, r33 + ST r37, r33, 0a, 1h + ADD64 r32, r32, r35 + ST r32, r36, 8a, 8h CP r1, r33 4: LD r31, r254, 0a, 80h ADDI64 r254, r254, 80d @@ -164,60 +162,58 @@ push: push: ADDI64 r254, r254, -88d ST r31, r254, 0a, 88h - CP r38, r2 - CP r39, r3 - LI64 r37, 1d - LD r33, r38, 8a, 8h - LD r32, r38, 16a, 8h + CP r36, r2 + CP r37, r3 + LI64 r35, 1d + LD r33, r36, 8a, 8h + LD r32, r36, 16a, 8h JNE r32, r33, :0 JNE r32, r0, :1 - CP r32, r37 + CP r32, r35 JMP :2 1: MULI64 r32, r32, 2d - 2: LI64 r40, 8d - MUL64 r34, r32, r40 + 2: LI64 r38, 8d + MUL64 r34, r32, r38 CP r2, r34 - CP r3, r40 + CP r3, r38 JAL r31, r0, :malloc - ST r32, r38, 16a, 8h - CP r35, r1 - JNE r35, r0, :3 + ST r32, r36, 16a, 8h + CP r34, r1 + JNE r34, r0, :3 CP r1, r0 JMP :4 3: MULI64 r33, r33, 8d - LD r32, r38, 0a, 8h - ADD64 r41, r32, r33 - CP r34, r35 - 7: LD r33, r38, 0a, 8h - LD r36, r38, 8a, 8h - JNE r41, r32, :5 - JEQ r36, r0, :6 - MUL64 r32, r36, r40 - CP r2, r33 + LD r32, r36, 0a, 8h + ADD64 r39, r32, r33 + CP r33, r34 + 7: LD r40, r36, 0a, 8h + LD r41, r36, 8a, 8h + JNE r39, r32, :5 + JEQ r41, r0, :6 + MUL64 r32, r41, r38 + CP r2, r40 CP r3, r32 - CP r4, r40 + CP r4, r38 JAL r31, r0, :free JMP :6 - 6: ST r35, r38, 0a, 8h + 6: ST r34, r36, 0a, 8h JMP :0 - 5: ADDI64 r36, r34, 8d - ADDI64 r33, r32, 8d - LD r32, r32, 0a, 8h - ST r32, r34, 0a, 8h - CP r32, r33 - CP r34, r36 + 5: LD r40, r32, 0a, 8h + ST r40, r33, 0a, 8h + ADDI64 r33, r33, 8d + ADDI64 r32, r32, 8d JMP :7 - 0: LD r32, r38, 8a, 8h + 0: LD r32, r36, 8a, 8h MULI64 r33, r32, 8d - LD r34, r38, 0a, 8h + LD r34, r36, 0a, 8h ADD64 r33, r34, r33 - ST r39, r33, 0a, 8h - ADD64 r32, r32, r37 - ST r32, r38, 8a, 8h + ST r37, r33, 0a, 8h + ADD64 r32, r32, r35 + ST r32, r36, 8a, 8h CP r1, r33 4: LD r31, r254, 0a, 88h ADDI64 r254, r254, 88d JALA r0, r31, 0a -code size: 1635 +code size: 1623 ret: 69 status: Ok(()) diff --git a/lang/tests/son_tests_idk.txt b/lang/tests/son_tests_idk.txt index 0f7074b..6a19e1f 100644 --- a/lang/tests/son_tests_idk.txt +++ b/lang/tests/son_tests_idk.txt @@ -1,21 +1,20 @@ main: ADDI64 r254, r254, -128d - ADDI64 r15, r254, 0d - LI8 r16, 69b - LI64 r17, 128d + ADDI64 r14, r254, 0d + LI8 r15, 69b + LI64 r16, 128d CP r13, r0 - 2: LD r14, r254, 42a, 1h - JLTU r13, r17, :0 - ANDI r13, r14, 255d + 2: LD r17, r254, 42a, 1h + JLTU r13, r16, :0 + ANDI r13, r17, 255d CP r1, r13 JMP :1 - 0: ADDI64 r14, r13, 1d - ADD64 r13, r15, r13 - ST r16, r13, 0a, 1h - CP r13, r14 + 0: ADD64 r17, r14, r13 + ST r15, r17, 0a, 1h + ADDI64 r13, r13, 1d JMP :2 1: ADDI64 r254, r254, 128d JALA r0, r31, 0a -code size: 141 +code size: 138 ret: 69 status: Ok(()) diff --git a/lang/tests/son_tests_inlining_issues.txt b/lang/tests/son_tests_inlining_issues.txt index f842f6c..aec4d8a 100644 --- a/lang/tests/son_tests_inlining_issues.txt +++ b/lang/tests/son_tests_inlining_issues.txt @@ -59,9 +59,9 @@ put_filled_rect: LD r14, r14, 0a, 8h ADD64 r26, r14, r26 LD r28, r15, 0a, 8h + MUL64 r15, r27, r25 + ADD64 r14, r14, r15 ADD64 r15, r28, r26 - MUL64 r25, r27, r25 - ADD64 r14, r14, r25 ADD64 r14, r28, r14 3: JGTU r13, r20, :0 JNE r13, r20, :1 diff --git a/lang/tests/son_tests_request_page.txt b/lang/tests/son_tests_request_page.txt index b4fe810..2b7f0ae 100644 --- a/lang/tests/son_tests_request_page.txt +++ b/lang/tests/son_tests_request_page.txt @@ -12,8 +12,8 @@ create_back_buffer: 0: LI8 r34, 255b CP r2, r34 JAL r31, r0, :request_page - SUB64 r32, r32, r33 CP r35, r1 + SUB64 r32, r32, r33 5: JGTS r32, r0, :2 CP r1, r35 JMP :1 diff --git a/lang/tests/son_tests_sort_something_viredly.txt b/lang/tests/son_tests_sort_something_viredly.txt index 0a1320e..de32183 100644 --- a/lang/tests/son_tests_sort_something_viredly.txt +++ b/lang/tests/son_tests_sort_something_viredly.txt @@ -11,9 +11,9 @@ main: JALA r0, r31, 0a sqrt: CP r14, r2 + CP r17, r0 LI64 r16, 15d LI64 r15, 32768d - CP r17, r0 CP r13, r17 3: JNE r15, r17, :0 CP r1, r13 diff --git a/lang/tests/son_tests_string_flip.txt b/lang/tests/son_tests_string_flip.txt index d737d87..f56d560 100644 --- a/lang/tests/son_tests_string_flip.txt +++ b/lang/tests/son_tests_string_flip.txt @@ -1,45 +1,44 @@ main: ADDI64 r254, r254, -40d - LI64 r17, 1d + LI64 r16, 1d LI64 r15, 4d + ADDI64 r17, r254, 0d CP r14, r0 - ADDI64 r18, r254, 0d CP r13, r14 6: JNE r13, r15, :0 - ADDI64 r19, r254, 32d - LI64 r20, 2d + ADDI64 r18, r254, 32d + LI64 r19, 2d CP r13, r14 4: LD r15, r254, 16a, 8h - JNE r13, r17, :1 + JNE r13, r16, :1 CP r1, r15 JMP :2 - 1: ADD64 r16, r13, r17 - SUB64 r15, r20, r16 - MUL64 r21, r15, r20 - MUL64 r22, r13, r20 + 1: ADD64 r15, r13, r16 + SUB64 r20, r19, r15 + MUL64 r20, r20, r19 + MUL64 r21, r13, r19 CP r13, r14 - 5: JNE r13, r20, :3 - CP r13, r16 - JMP :4 - 3: ADD64 r15, r13, r17 - ADD64 r23, r22, r13 - ADD64 r13, r21, r13 - MULI64 r23, r23, 8d - MULI64 r13, r13, 8d - ADD64 r23, r18, r23 - ADD64 r13, r18, r13 - BMC r23, r19, 8h - BMC r13, r23, 8h - BMC r19, r13, 8h + 5: JNE r13, r19, :3 CP r13, r15 + JMP :4 + 3: ADD64 r22, r21, r13 + ADD64 r23, r20, r13 + MULI64 r22, r22, 8d + MULI64 r23, r23, 8d + ADD64 r22, r17, r22 + ADD64 r23, r17, r23 + BMC r22, r18, 8h + BMC r23, r22, 8h + BMC r18, r23, 8h + ADD64 r13, r13, r16 JMP :5 - 0: MULI64 r16, r13, 8d - ADD64 r16, r18, r16 - ST r13, r16, 0a, 8h - ADD64 r13, r13, r17 + 0: MULI64 r18, r13, 8d + ADD64 r18, r17, r18 + ST r13, r18, 0a, 8h + ADD64 r13, r13, r16 JMP :6 2: ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 267 +code size: 264 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt b/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt index 13510ef..ba6ed71 100644 --- a/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt +++ b/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt @@ -1,28 +1,26 @@ main: ADDI64 r254, r254, -10240d - LI8 r15, 64b - LI64 r16, 1024d + LI8 r14, 64b + LI64 r15, 1024d + ADDI64 r16, r254, 0d CP r13, r0 - ADDI64 r17, r254, 0d - 4: JLTU r13, r16, :0 - ADDI64 r13, r17, 1024d - ADDI64 r15, r17, 10240d - 3: LD r14, r254, 2048a, 1h - JLTU r13, r15, :1 - ANDI r13, r14, 255d + 4: JLTU r13, r15, :0 + ADDI64 r14, r16, 10240d + ADDI64 r13, r16, 1024d + 3: LD r15, r254, 2048a, 1h + JLTU r13, r14, :1 + ANDI r13, r15, 255d CP r1, r13 JMP :2 - 1: ADDI64 r14, r13, 1024d - BMC r17, r13, 1024h - CP r13, r14 + 1: BMC r16, r13, 1024h + ADDI64 r13, r13, 1024d JMP :3 - 0: ADDI64 r14, r13, 1d - ADD64 r13, r17, r13 - ST r15, r13, 0a, 1h - CP r13, r14 + 0: ADD64 r17, r16, r13 + ST r14, r17, 0a, 1h + ADDI64 r13, r13, 1d JMP :4 2: ADDI64 r254, r254, 10240d JALA r0, r31, 0a -code size: 192 +code size: 186 ret: 64 status: Ok(()) diff --git a/lang/tests/son_tests_triggering_store_in_divergent_branch.txt b/lang/tests/son_tests_triggering_store_in_divergent_branch.txt index 89b8321..317aa00 100644 --- a/lang/tests/son_tests_triggering_store_in_divergent_branch.txt +++ b/lang/tests/son_tests_triggering_store_in_divergent_branch.txt @@ -1,10 +1,10 @@ main: ADDI64 r254, r254, -64d ST r31, r254, 0a, 64h - CP r34, r0 LI64 r37, 65536d - LI8 r35, 1b CP r36, r0 + CP r34, r0 + LI8 r35, 1b CP r32, r36 7: JAL r31, r0, :opaque CP r33, r1