From 5a7a01ca028b2aabdb8fe61a23bf6c9fa7e747e1 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sun, 15 Dec 2024 17:57:22 +0100 Subject: [PATCH] adding the stack offset elision for return values as well Signed-off-by: Jakub Doka --- lang/src/backend/hbvm.rs | 5 ++- lang/src/backend/hbvm/regalloc.rs | 36 +++++++++---------- ..._tests_different_function_destinations.txt | 5 ++- .../son_tests_null_check_in_the_loop.txt | 5 ++- ...ests_null_check_returning_small_global.txt | 28 ++++++--------- lang/tests/son_tests_nullable_structure.txt | 5 ++- lang/tests/son_tests_nullable_types.txt | 8 ++--- ...ts_overwrite_aliasing_overoptimization.txt | 5 ++- .../son_tests_returning_optional_issues.txt | 9 +++-- .../son_tests_scheduling_block_did_dirty.txt | 13 ++++--- lang/tests/son_tests_small_struct_bitcast.txt | 13 ++++--- ...son_tests_storing_into_nullable_struct.txt | 5 ++- ...sts_struct_return_from_module_function.txt | 5 ++- lang/tests/son_tests_wide_ret.txt | 28 +++++++-------- 14 files changed, 74 insertions(+), 96 deletions(-) diff --git a/lang/src/backend/hbvm.rs b/lang/src/backend/hbvm.rs index 616b4560b..970785cf3 100644 --- a/lang/src/backend/hbvm.rs +++ b/lang/src/backend/hbvm.rs @@ -392,11 +392,10 @@ impl Nodes { op.cond_op(self[self[cnd].inputs[1]].ty) } - fn strip_offset(&self, region: Nid, ty: ty::Id, tys: &Types) -> (Nid, Offset) { + fn strip_offset(&self, region: Nid) -> (Nid, Offset) { if matches!(self[region].kind, Kind::BinOp { op: TokenKind::Add | TokenKind::Sub }) && self.is_locked(region) && let Kind::CInt { value } = self[self[region].inputs[2]].kind - && ty.loc(tys) == Loc::Reg { (self[region].inputs[1], value as _) } else { @@ -442,7 +441,7 @@ impl Nodes { // this means the struct is actually loaded into a register so no BMC needed || (node.kind == Kind::Load && !matches!(tys.parama(node.ty).0, Some(PLoc::Ref(..))) - && node.outputs.iter().all(|&o| self[o].kind.is_call()))) + && node.outputs.iter().all(|&o| matches!(self[o].kind, Kind::Call { .. } | Kind::Return { .. })))) } } diff --git a/lang/src/backend/hbvm/regalloc.rs b/lang/src/backend/hbvm/regalloc.rs index 290d9193f..0a5723e3b 100644 --- a/lang/src/backend/hbvm/regalloc.rs +++ b/lang/src/backend/hbvm/regalloc.rs @@ -4,14 +4,14 @@ use { reg::{self, Reg}, HbvmBackend, Nid, Nodes, PLoc, Reloc, TypedReloc, }, - lexer::{Token, TokenKind}, + lexer::TokenKind, parser, quad_sort, son::{Kind, ARG_START, MEM, VOID}, ty::{self, Arg, Loc, Module, Offset, Sig, Types}, utils::{BitSet, EntSlice}, }, alloc::{borrow::ToOwned, vec::Vec}, - core::{assert_matches::debug_assert_matches, mem, ops::Range, usize}, + core::{assert_matches::debug_assert_matches, mem, ops::Range}, hbbytecode::{self as instrs}, }; @@ -136,7 +136,7 @@ impl HbvmBackend { let offset_atr = |pallc: Nid, offsets: &[Offset]| { let allc = strip_load(pallc); if nodes.is_locked(allc) { - let (region, offset) = nodes.strip_offset(allc, ty::Id::VOID, tys); + let (region, offset) = nodes.strip_offset(allc); match nodes[region].kind { Kind::Stck => { return ( @@ -268,12 +268,12 @@ impl HbvmBackend { match retl { None => {} Some(PLoc::Reg(r, size)) if sig.ret.loc(tys) == Loc::Stack => { - // TODO: handle the stack load - self.emit(instrs::ld(r, atr(ret), 0, size)) + let (src, offset) = offset_atr(ret, &self.offsets); + self.emit(instrs::ld(r, src, offset, size)) } Some(PLoc::WideReg(r, size)) => { - // TODO: handle the stack load - self.emit(instrs::ld(r, atr(ret), 0, size)) + let (src, offset) = offset_atr(ret, &self.offsets); + self.emit(instrs::ld(r, src, offset, size)) } Some(PLoc::Reg(r, _)) => { alloc_buf.push(atr(ret)); @@ -419,6 +419,13 @@ impl HbvmBackend { { self.emit(instrs::st(r, atr(*node.inputs.last().unwrap()), 0, size)); } + + match ret { + Some(PLoc::WideReg(..)) => {} + Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} + Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), + None | Some(PLoc::Ref(..)) => {} + } } Kind::Global { global } => { let reloc = Reloc::new(self.code.len(), 3, 4); @@ -431,7 +438,7 @@ impl HbvmBackend { self.emit(instrs::addi64(atr(nid), base, offset as _)); } Kind::Load => { - let (region, offset) = nodes.strip_offset(node.inputs[1], node.ty, tys); + let (region, offset) = nodes.strip_offset(node.inputs[1]); let size = tys.size_of(node.ty); if node.ty.loc(tys) != Loc::Stack { let (base, offset) = match nodes[region].kind { @@ -445,7 +452,7 @@ impl HbvmBackend { } Kind::Stre => { debug_assert_ne!(node.inputs[1], VOID); - let (region, offset) = nodes.strip_offset(node.inputs[2], node.ty, tys); + let (region, offset) = nodes.strip_offset(node.inputs[2]); let size = u16::try_from(tys.size_of(node.ty)).expect("TODO"); let (base, offset, src) = match nodes[region].kind { Kind::Stck if node.ty.loc(tys) == Loc::Reg => ( @@ -474,16 +481,6 @@ impl HbvmBackend { | Kind::Phi | Kind::Join) => unreachable!("{e:?}"), } - if let Kind::Call { .. } = node.kind { - let (ret, ..) = tys.parama(node.ty); - - match ret { - Some(PLoc::WideReg(..)) => {} - Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} - Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), - None | Some(PLoc::Ref(..)) => {} - } - } } } @@ -502,6 +499,7 @@ impl HbvmBackend { vec![], "{bundle_count}" ); + ( if tail { bundle_count.saturating_sub(reg::RET_ADDR as _) diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index 1fdcb8374..a15a2c05b 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -66,12 +66,11 @@ main: JALA r0, r31, 0a new_stru: ADDI64 r254, r254, -16d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 8h ST r0, r254, 8a, 8h - LD r1, r13, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 684 +code size: 673 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_null_check_in_the_loop.txt b/lang/tests/son_tests_null_check_in_the_loop.txt index 93b6fbb60..6247293ba 100644 --- a/lang/tests/son_tests_null_check_in_the_loop.txt +++ b/lang/tests/son_tests_null_check_in_the_loop.txt @@ -17,16 +17,15 @@ main: return_fn: ADDI64 r254, r254, -6d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h ST r0, r254, 1a, 1h ST r0, r254, 2a, 1h ST r0, r254, 3a, 1h ST r0, r254, 4a, 1h ST r0, r254, 5a, 1h - LD r1, r14, 0a, 6h + LD r1, r254, 0a, 6h ADDI64 r254, r254, 6d JALA r0, r31, 0a -code size: 288 +code size: 277 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_null_check_returning_small_global.txt b/lang/tests/son_tests_null_check_returning_small_global.txt index deca828eb..5a7630ef8 100644 --- a/lang/tests/son_tests_null_check_returning_small_global.txt +++ b/lang/tests/son_tests_null_check_returning_small_global.txt @@ -9,31 +9,27 @@ foo: LD r33, r254, 64a, 1h ANDI r33, r33, 255d JNE r33, r0, :0 - ADDI64 r32, r254, 48d ST r0, r254, 48a, 1h - LD r1, r32, 0a, 16h + LD r1, r254, 48a, 16h JMP :1 0: LI8 r33, 1b LI64 r34, 4d LD r32, r254, 72a, 8h JNE r32, r34, :2 - ADDI64 r32, r254, 32d ST r33, r254, 32a, 1h - LI64 r33, 2d - ST r33, r254, 40a, 8h - LD r1, r32, 0a, 16h + LI64 r32, 2d + ST r32, r254, 40a, 8h + LD r1, r254, 32a, 16h JMP :1 2: LRA r34, r0, :magic LD r34, r34, 0a, 8h JNE r34, r32, :3 - ADDI64 r32, r254, 16d ST r33, r254, 16a, 1h ST r0, r254, 24a, 8h - LD r1, r32, 0a, 16h + LD r1, r254, 16a, 16h JMP :1 - 3: ADDI64 r32, r254, 0d - ST r0, r254, 0a, 1h - LD r1, r32, 0a, 16h + 3: ST r0, r254, 0a, 1h + LD r1, r254, 0a, 16h 1: LD r31, r254, 80a, 32h ADDI64 r254, r254, 112d JALA r0, r31, 0a @@ -46,14 +42,12 @@ get: LD r14, r14, 0a, 8h JNE r14, r13, :0 LI8 r13, 1b - ADDI64 r15, r254, 16d ST r13, r254, 16a, 1h ST r14, r254, 24a, 8h - LD r1, r15, 0a, 16h + LD r1, r254, 16a, 16h JMP :1 - 0: ADDI64 r13, r254, 0d - ST r0, r254, 0a, 1h - LD r1, r13, 0a, 16h + 0: ST r0, r254, 0a, 1h + LD r1, r254, 0a, 16h 1: ADDI64 r254, r254, 32d JALA r0, r31, 0a main: @@ -73,6 +67,6 @@ main: 1: LD r31, r254, 16a, 24h ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 739 +code size: 673 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_structure.txt b/lang/tests/son_tests_nullable_structure.txt index a75507824..ca9d6bba2 100644 --- a/lang/tests/son_tests_nullable_structure.txt +++ b/lang/tests/son_tests_nullable_structure.txt @@ -42,10 +42,9 @@ returner_bn: returner_cn: ADDI64 r254, r254, -2d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h ST r0, r254, 1a, 1h - LD r1, r14, 0a, 2h + LD r1, r254, 0a, 2h ADDI64 r254, r254, 2d JALA r0, r31, 0a returner_fn: @@ -53,6 +52,6 @@ returner_fn: ORI r13, r13, 128d CP r1, r13 JALA r0, r31, 0a -code size: 463 +code size: 452 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_types.txt b/lang/tests/son_tests_nullable_types.txt index 5dbf749b5..a473c59a6 100644 --- a/lang/tests/son_tests_nullable_types.txt +++ b/lang/tests/son_tests_nullable_types.txt @@ -117,17 +117,15 @@ new_bar: new_foo: ADDI64 r254, r254, -24d ADDI64 r13, r254, 0d - ADDI64 r14, r254, 8d ST r13, r254, 8a, 8h ST r0, r254, 16a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 8a, 16h ADDI64 r254, r254, 24d JALA r0, r31, 0a no_foo: ADDI64 r254, r254, -16d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 8h - LD r1, r13, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a use_foo: @@ -136,6 +134,6 @@ use_foo: ADDI64 r2, r254, 0d ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 1114 +code size: 1092 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt b/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt index 5d53583af..48188fb53 100644 --- a/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt +++ b/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt @@ -21,13 +21,12 @@ main: opaque: ADDI64 r254, r254, -16d LI64 r13, 3d - ADDI64 r14, r254, 0d ST r13, r254, 0a, 8h LI64 r13, 2d ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 310 +code size: 299 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_returning_optional_issues.txt b/lang/tests/son_tests_returning_optional_issues.txt index 4e99b5f01..778314e82 100644 --- a/lang/tests/son_tests_returning_optional_issues.txt +++ b/lang/tests/son_tests_returning_optional_issues.txt @@ -1,12 +1,11 @@ get_format: ADDI64 r254, r254, -16d LI8 r13, 1b - ADDI64 r14, r254, 0d - LRA r15, r0, :bmp + LRA r14, r0, :bmp ST r13, r254, 0a, 1h - LD r13, r15, 0a, 8h + LD r13, r14, 0a, 8h ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a main: @@ -26,6 +25,6 @@ main: 1: LD r31, r254, 16a, 24h ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 275 +code size: 264 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_scheduling_block_did_dirty.txt b/lang/tests/son_tests_scheduling_block_did_dirty.txt index 2b2e7fd61..a145004ba 100644 --- a/lang/tests/son_tests_scheduling_block_did_dirty.txt +++ b/lang/tests/son_tests_scheduling_block_did_dirty.txt @@ -1,16 +1,15 @@ constructor: - ADDI64 r254, r254, -40d - ST r31, r254, 16a, 24h + ADDI64 r254, r254, -32d + ST r31, r254, 16a, 16h CP r32, r3 CP r2, r32 JAL r31, r0, :opaque CP r32, r1 - ADDI64 r33, r254, 0d ST r32, r254, 0a, 8h ST r32, r254, 8a, 8h - LD r1, r33, 0a, 16h - LD r31, r254, 16a, 24h - ADDI64 r254, r254, 40d + LD r1, r254, 0a, 16h + LD r31, r254, 16a, 16h + ADDI64 r254, r254, 32d JALA r0, r31, 0a main: ADDI64 r254, r254, -32d @@ -30,6 +29,6 @@ opaque: ANDI r13, r13, 255d CP r1, r13 JALA r0, r31, 0a -code size: 279 +code size: 268 ret: 255 status: Ok(()) diff --git a/lang/tests/son_tests_small_struct_bitcast.txt b/lang/tests/son_tests_small_struct_bitcast.txt index c93440981..c26bc3d25 100644 --- a/lang/tests/son_tests_small_struct_bitcast.txt +++ b/lang/tests/son_tests_small_struct_bitcast.txt @@ -14,22 +14,21 @@ main: ADDI64 r254, r254, 28d JALA r0, r31, 0a u32_to_color: - ADDI64 r254, r254, -28d - ST r31, r254, 4a, 24h + ADDI64 r254, r254, -20d + ST r31, r254, 4a, 16h CP r32, r2 CP r2, r32 JAL r31, r0, :u32_to_u32 CP r32, r1 - ADDI64 r33, r254, 0d ST r32, r254, 0a, 4h - LD r1, r33, 0a, 4h - LD r31, r254, 4a, 24h - ADDI64 r254, r254, 28d + LD r1, r254, 0a, 4h + LD r31, r254, 4a, 16h + ADDI64 r254, r254, 20d JALA r0, r31, 0a u32_to_u32: CP r13, r2 CP r1, r13 JALA r0, r31, 0a -code size: 281 +code size: 270 ret: 255 status: Ok(()) diff --git a/lang/tests/son_tests_storing_into_nullable_struct.txt b/lang/tests/son_tests_storing_into_nullable_struct.txt index 9e4dacf7b..938ee472c 100644 --- a/lang/tests/son_tests_storing_into_nullable_struct.txt +++ b/lang/tests/son_tests_storing_into_nullable_struct.txt @@ -43,11 +43,10 @@ main: optional: ADDI64 r254, r254, -16d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h LI64 r13, 10d ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a optionala: @@ -66,6 +65,6 @@ optionala: BMC r15, r17, 32h ADDI64 r254, r254, 48d JALA r0, r31, 0a -code size: 567 +code size: 556 ret: 100 status: Ok(()) diff --git a/lang/tests/son_tests_struct_return_from_module_function.txt b/lang/tests/son_tests_struct_return_from_module_function.txt index b5687ac79..f0608aa9a 100644 --- a/lang/tests/son_tests_struct_return_from_module_function.txt +++ b/lang/tests/son_tests_struct_return_from_module_function.txt @@ -1,12 +1,11 @@ foo: ADDI64 r254, r254, -16d LI64 r13, 3d - ADDI64 r14, r254, 0d ST r13, r254, 0a, 8h LI32 r13, 2w ST r13, r254, 8a, 4h ST r13, r254, 12a, 4h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a main: @@ -34,6 +33,6 @@ main: LD r31, r254, 48a, 40h ADDI64 r254, r254, 88d JALA r0, r31, 0a -code size: 358 +code size: 347 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index 9c8cbec5f..5861be5d3 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -15,42 +15,40 @@ main: ADDI64 r254, r254, 48d JALA r0, r31, 0a maina: - ADDI64 r254, r254, -60d - ST r31, r254, 20a, 40h + ADDI64 r254, r254, -52d + ST r31, r254, 20a, 32h ADDI64 r32, r254, 16d JAL r31, r0, :small_struct ST r1, r32, 0a, 4h - ADDI64 r33, r254, 0d ST r0, r254, 0a, 1h ST r0, r254, 1a, 1h ST r0, r254, 2a, 1h - LI8 r34, 3b - ST r34, r254, 3a, 1h - LI8 r35, 1b - ST r35, r254, 4a, 1h + LI8 r33, 3b + ST r33, r254, 3a, 1h + LI8 r34, 1b + ST r34, r254, 4a, 1h ST r0, r254, 5a, 1h ST r0, r254, 6a, 1h ST r0, r254, 7a, 1h ST r0, r254, 8a, 1h ST r0, r254, 9a, 1h ST r0, r254, 10a, 1h - ST r34, r254, 11a, 1h - ST r35, r254, 12a, 1h + ST r33, r254, 11a, 1h + ST r34, r254, 12a, 1h ST r0, r254, 13a, 1h ST r0, r254, 14a, 1h ST r0, r254, 15a, 1h - LD r1, r33, 0a, 16h - LD r31, r254, 20a, 40h - ADDI64 r254, r254, 60d + LD r1, r254, 0a, 16h + LD r31, r254, 20a, 32h + ADDI64 r254, r254, 52d JALA r0, r31, 0a small_struct: ADDI64 r254, r254, -4d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 2h ST r0, r254, 2a, 2h - LD r1, r13, 0a, 4h + LD r1, r254, 0a, 4h ADDI64 r254, r254, 4d JALA r0, r31, 0a -code size: 559 +code size: 537 ret: 2 status: Ok(())