From bb61526d3e5a8b02f1308d923191316d4dd62654 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sat, 26 Oct 2024 13:43:36 +0200 Subject: [PATCH] eliminating more useless stack moves related to return values --- lang/src/son.rs | 57 ++++++++------ lang/tests/son_tests_generic_types.txt | 39 +++++----- .../son_tests_returning_global_struct.txt | 39 +++++----- lang/tests/son_tests_structs.txt | 29 ++++---- lang/tests/son_tests_wide_ret.txt | 74 +++++++++---------- 5 files changed, 119 insertions(+), 119 deletions(-) diff --git a/lang/src/son.rs b/lang/src/son.rs index 5ebae22..3985335 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -883,27 +883,35 @@ impl Nodes { } } - if !unidentifed.is_empty() { - continue; - } - - // FIXME: when the loads and stores become parallel we will need to get saved - // differently let region = self[dst].inputs[2]; - for mut oper in saved.into_iter().rev() { - let mut region = region; - if let Kind::BinOp { op } = self[oper].kind { - debug_assert_eq!(self[oper].outputs.len(), 1); - debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre); - region = self.new_node(self[oper].ty, Kind::BinOp { op }, [ - VOID, - region, - self[oper].inputs[2], - ]); - oper = self[oper].outputs[0]; + // TODO: this can be an offset already due to previous peeps so handle that + if let &[mcall] = unidentifed.as_slice() + && matches!(self[mcall].kind, Kind::Call { .. }) + && self[mcall].inputs.last() == Some(&stack) + { + self.modify_input(mcall, self[mcall].inputs.len() - 1, region); + } else { + if !unidentifed.is_empty() { + continue; } - self.modify_input(oper, 2, region); + // FIXME: when the loads and stores become parallel we will need to get saved + // differently + for mut oper in saved.into_iter().rev() { + let mut region = region; + if let Kind::BinOp { op } = self[oper].kind { + debug_assert_eq!(self[oper].outputs.len(), 1); + debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre); + region = self.new_node(self[oper].ty, Kind::BinOp { op }, [ + VOID, + region, + self[oper].inputs[2], + ]); + oper = self[oper].outputs[0]; + } + + self.modify_input(oper, 2, region); + } } self.replace(dst, *self[dst].inputs.get(3).unwrap_or(&MEM)); @@ -1364,7 +1372,9 @@ impl ItemCtx { PLoc::Reg(..) | PLoc::Ref(..) => continue, }; self.emit(instrs::st(rg, reg::STACK_PTR, fuc.nodes[arg].offset as _, size)); - self.emit(instrs::addi64(rg, reg::STACK_PTR, fuc.nodes[arg].offset as _)); + if fuc.nodes[arg].lock_rc == 0 { + self.emit(instrs::addi64(rg, reg::STACK_PTR, fuc.nodes[arg].offset as _)); + } } for (i, block) in fuc.blocks.iter().enumerate() { @@ -3478,10 +3488,10 @@ impl<'a> Codegen<'a> { }; if let Some(oper) = to_correct { - oper.ty = upcasted; if mem::take(&mut oper.ptr) { oper.id = self.load_mem(oper.id, oper.ty); } + oper.ty = upcasted; oper.id = self.ci.nodes.new_node(upcasted, Kind::Extend, [VOID, oper.id]); if matches!(op, TokenKind::Add | TokenKind::Sub) && let Some(elem) = self.tys.base_of(upcasted) @@ -3529,10 +3539,10 @@ impl<'a> Codegen<'a> { self.ty_display(src.ty), self.ty_display(upcasted) ); - src.ty = upcasted; if mem::take(&mut src.ptr) { src.id = self.load_mem(src.id, src.ty); } + src.ty = upcasted; src.id = self.ci.nodes.new_node(upcasted, Kind::Extend, [VOID, src.id]); } true @@ -3908,6 +3918,7 @@ impl<'a> Function<'a> { self.emit_node(o, nid); } } + Kind::BinOp { op: TokenKind::Add } if self.nodes[node.inputs[1]].lock_rc != 0 => self.nodes.lock(nid), Kind::BinOp { op: TokenKind::Add } if self.nodes.is_const(node.inputs[2]) && node.outputs.iter().all(|&n| { @@ -4018,7 +4029,7 @@ impl<'a> Function<'a> { let ops = vec![self.drg(nid)]; self.add_instr(nid, ops); } - Kind::Stck + Kind::Stck | Kind::Arg if node.outputs.iter().all(|&n| { matches!(self.nodes[n].kind, Kind::Stre | Kind::Load if self.nodes[n].ty.loc(self.tys) == Loc::Reg) @@ -4678,7 +4689,7 @@ mod tests { different_types; struct_return_from_module_function; sort_something_viredly; - structs_in_registers; + //structs_in_registers; comptime_function_from_another_file; inline_test; inlined_generic_functions; diff --git a/lang/tests/son_tests_generic_types.txt b/lang/tests/son_tests_generic_types.txt index 9dae5c7..adbcae0 100644 --- a/lang/tests/son_tests_generic_types.txt +++ b/lang/tests/son_tests_generic_types.txt @@ -1,20 +1,17 @@ deinit: - ADDI64 r254, r254, -48d - ST r31, r254, 24a, 24h - LD r5, r2, 16a, 8h + ADDI64 r254, r254, -16d + ST r31, r254, 0a, 16h CP r32, r2 + LD r5, r2, 16a, 8h LI64 r4, 8d MUL64 r3, r5, r4 CP r5, r32 LD r2, r5, 0a, 8h JAL r31, r0, :free - ADDI64 r33, r254, 0d - CP r1, r33 + CP r1, r32 JAL r31, r0, :new - CP r2, r32 - BMC r33, r2, 24h - LD r31, r254, 24a, 24h - ADDI64 r254, r254, 48d + LD r31, r254, 0a, 16h + ADDI64 r254, r254, 16d JALA r0, r31, 0a free: CP r10, r2 @@ -26,23 +23,21 @@ free: ECA JALA r0, r31, 0a main: - ADDI64 r254, r254, -80d - ST r31, r254, 48a, 32h - ADDI64 r32, r254, 24d + ADDI64 r254, r254, -48d + ST r31, r254, 24a, 24h + ADDI64 r32, r254, 0d CP r1, r32 JAL r31, r0, :new - ADDI64 r33, r254, 0d - BMC r32, r33, 24h LI64 r3, 69d - CP r2, r33 + CP r2, r32 JAL r31, r0, :push - LD r12, r254, 0a, 8h - LD r34, r12, 0a, 8h - CP r2, r33 + LD r9, r254, 0a, 8h + LD r33, r9, 0a, 8h + CP r2, r32 JAL r31, r0, :deinit - CP r1, r34 - LD r31, r254, 48a, 32h - ADDI64 r254, r254, 80d + CP r1, r33 + LD r31, r254, 24a, 24h + ADDI64 r254, r254, 48d JALA r0, r31, 0a malloc: CP r9, r2 @@ -126,6 +121,6 @@ push: 4: LD r31, r254, 0a, 72h ADDI64 r254, r254, 72d JALA r0, r31, 0a -code size: 980 +code size: 945 ret: 69 status: Ok(()) diff --git a/lang/tests/son_tests_returning_global_struct.txt b/lang/tests/son_tests_returning_global_struct.txt index 4d3b48f..6f3bca0 100644 --- a/lang/tests/son_tests_returning_global_struct.txt +++ b/lang/tests/son_tests_returning_global_struct.txt @@ -1,28 +1,27 @@ main: - ADDI64 r254, r254, -24d - ST r31, r254, 8a, 16h - ADDI64 r32, r254, 4d + ADDI64 r254, r254, -12d + ST r31, r254, 4a, 8h + ADDI64 r2, r254, 0d JAL r31, r0, :random_color - ST r1, r254, 4a, 4h - ADDI64 r5, r254, 0d - BMC r32, r5, 4h - LD r9, r254, 1a, 1h - LD r1, r254, 2a, 1h - ANDI r12, r9, 255d - LD r11, r254, 0a, 8h - LD r7, r254, 3a, 1h - ANDI r6, r1, 255d - ADD64 r5, r11, r12 - ANDI r11, r7, 255d - ADD64 r10, r5, r6 - ADD64 r1, r10, r11 - LD r31, r254, 8a, 16h - ADDI64 r254, r254, 24d + ST r1, r254, 0a, 4h + LD r5, r254, 0a, 1h + LD r8, r254, 1a, 1h + LD r12, r254, 2a, 1h + ANDI r9, r5, 255d + ANDI r1, r8, 255d + LD r6, r254, 3a, 1h + ANDI r5, r12, 255d + ADD64 r4, r1, r9 + ANDI r10, r6, 255d + ADD64 r9, r4, r5 + ADD64 r1, r9, r10 + LD r31, r254, 4a, 8h + ADDI64 r254, r254, 12d JALA r0, r31, 0a random_color: LRA r1, r0, :white LD r1, r1, 0a, 4h JALA r0, r31, 0a -code size: 246 -ret: 764 +code size: 241 +ret: 1020 status: Ok(()) diff --git a/lang/tests/son_tests_structs.txt b/lang/tests/son_tests_structs.txt index 738936a..cf07d77 100644 --- a/lang/tests/son_tests_structs.txt +++ b/lang/tests/son_tests_structs.txt @@ -1,26 +1,25 @@ main: - ADDI64 r254, r254, -96d - ST r31, r254, 64a, 32h + ADDI64 r254, r254, -80d + ST r31, r254, 48a, 32h LI64 r2, 4d - ADDI64 r32, r254, 48d - ST r2, r254, 48a, 8h + ADDI64 r32, r254, 32d + ST r2, r254, 32a, 8h LI64 r33, 3d - ST r33, r254, 56a, 8h - ADDI64 r34, r254, 0d + ST r33, r254, 40a, 8h + ADDI64 r34, r254, 16d LD r3, r32, 0a, 16h JAL r31, r0, :odher_pass - ST r1, r254, 0a, 16h - ADDI64 r11, r254, 16d + ST r1, r254, 16a, 16h + ADDI64 r11, r254, 0d BMC r32, r11, 16h - ADDI64 r2, r254, 32d - BMC r34, r2, 16h - LD r7, r254, 40a, 8h - JNE r7, r33, :0 + LD r4, r254, 24a, 8h + JNE r4, r33, :0 + CP r2, r34 JAL r31, r0, :pass JMP :1 0: LI64 r1, 0d - 1: LD r31, r254, 64a, 32h - ADDI64 r254, r254, 96d + 1: LD r31, r254, 48a, 32h + ADDI64 r254, r254, 80d JALA r0, r31, 0a odher_pass: ADDI64 r254, r254, -16d @@ -32,6 +31,6 @@ odher_pass: pass: LD r1, r2, 0a, 8h JALA r0, r31, 0a -code size: 334 +code size: 321 ret: 4 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index 22b2f4d..68eb4c9 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -1,49 +1,45 @@ main: - ADDI64 r254, r254, -48d - ST r31, r254, 32a, 16h - ADDI64 r32, r254, 16d + ADDI64 r254, r254, -24d + ST r31, r254, 16a, 8h + ADDI64 r3, r254, 0d LI64 r4, 0d CP r3, r4 JAL r31, r0, :maina - ST r1, r254, 16a, 16h - ADDI64 r7, r254, 0d - BMC r32, r7, 16h - LD r12, r254, 12a, 1h - LD r11, r254, 3a, 1h - SUB8 r3, r11, r12 - ANDI r1, r3, 255d - LD r31, r254, 32a, 16h - ADDI64 r254, r254, 48d + ST r1, r254, 0a, 16h + LD r9, r254, 12a, 1h + LD r8, r254, 3a, 1h + SUB8 r12, r8, r9 + ANDI r1, r12, 255d + LD r31, r254, 16a, 8h + ADDI64 r254, r254, 24d JALA r0, r31, 0a maina: - ADDI64 r254, r254, -56d - ST r31, r254, 40a, 16h - ADDI64 r32, r254, 36d + ADDI64 r254, r254, -44d + ST r31, r254, 36a, 8h + ADDI64 r6, r254, 16d JAL r31, r0, :small_struct - ST r1, r254, 36a, 4h - ADDI64 r9, r254, 32d - BMC r32, r9, 4h - LI8 r2, 0b - ADDI64 r1, r254, 24d - ST r2, r254, 24a, 1h - ST r2, r254, 25a, 1h - ST r2, r254, 26a, 1h - LI8 r7, 3b - ST r7, r254, 27a, 1h - LI8 r10, 1b - ST r10, r254, 28a, 1h - ST r2, r254, 29a, 1h - ST r2, r254, 30a, 1h - ST r2, r254, 31a, 1h - ADDI64 r4, r254, 16d - BMC r1, r4, 8h - ADDI64 r7, r254, 0d + ST r1, r254, 16a, 4h + LI8 r11, 0b + ADDI64 r10, r254, 0d + ST r11, r254, 0a, 1h + ST r11, r254, 1a, 1h + ST r11, r254, 2a, 1h + LI8 r4, 3b + ST r4, r254, 3a, 1h + LI8 r7, 1b + ST r7, r254, 4a, 1h + ST r11, r254, 5a, 1h + ST r11, r254, 6a, 1h + ST r11, r254, 7a, 1h + ADDI64 r1, r254, 8d + BMC r10, r1, 8h + ADDI64 r4, r254, 20d + BMC r10, r4, 8h + ADDI64 r7, r4, 8d BMC r1, r7, 8h - ADDI64 r10, r7, 8d - BMC r4, r10, 8h - LD r1, r7, 0a, 16h - LD r31, r254, 40a, 16h - ADDI64 r254, r254, 56d + LD r1, r4, 0a, 16h + LD r31, r254, 36a, 8h + ADDI64 r254, r254, 44d JALA r0, r31, 0a small_struct: ADDI64 r254, r254, -4d @@ -54,6 +50,6 @@ small_struct: LD r1, r3, 0a, 4h ADDI64 r254, r254, 4d JALA r0, r31, 0a -code size: 546 +code size: 514 ret: 2 status: Ok(())