From 5a7a01ca028b2aabdb8fe61a23bf6c9fa7e747e1 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sun, 15 Dec 2024 17:57:22 +0100 Subject: [PATCH] adding the stack offset elision for return values as well Signed-off-by: Jakub Doka --- lang/src/backend/hbvm.rs | 5 ++- lang/src/backend/hbvm/regalloc.rs | 36 +++++++++---------- ..._tests_different_function_destinations.txt | 5 ++- .../son_tests_null_check_in_the_loop.txt | 5 ++- ...ests_null_check_returning_small_global.txt | 28 ++++++--------- lang/tests/son_tests_nullable_structure.txt | 5 ++- lang/tests/son_tests_nullable_types.txt | 8 ++--- ...ts_overwrite_aliasing_overoptimization.txt | 5 ++- .../son_tests_returning_optional_issues.txt | 9 +++-- .../son_tests_scheduling_block_did_dirty.txt | 13 ++++--- lang/tests/son_tests_small_struct_bitcast.txt | 13 ++++--- ...son_tests_storing_into_nullable_struct.txt | 5 ++- ...sts_struct_return_from_module_function.txt | 5 ++- lang/tests/son_tests_wide_ret.txt | 28 +++++++-------- 14 files changed, 74 insertions(+), 96 deletions(-) diff --git a/lang/src/backend/hbvm.rs b/lang/src/backend/hbvm.rs index 616b4560..970785cf 100644 --- a/lang/src/backend/hbvm.rs +++ b/lang/src/backend/hbvm.rs @@ -392,11 +392,10 @@ impl Nodes { op.cond_op(self[self[cnd].inputs[1]].ty) } - fn strip_offset(&self, region: Nid, ty: ty::Id, tys: &Types) -> (Nid, Offset) { + fn strip_offset(&self, region: Nid) -> (Nid, Offset) { if matches!(self[region].kind, Kind::BinOp { op: TokenKind::Add | TokenKind::Sub }) && self.is_locked(region) && let Kind::CInt { value } = self[self[region].inputs[2]].kind - && ty.loc(tys) == Loc::Reg { (self[region].inputs[1], value as _) } else { @@ -442,7 +441,7 @@ impl Nodes { // this means the struct is actually loaded into a register so no BMC needed || (node.kind == Kind::Load && !matches!(tys.parama(node.ty).0, Some(PLoc::Ref(..))) - && node.outputs.iter().all(|&o| self[o].kind.is_call()))) + && node.outputs.iter().all(|&o| matches!(self[o].kind, Kind::Call { .. } | Kind::Return { .. })))) } } diff --git a/lang/src/backend/hbvm/regalloc.rs b/lang/src/backend/hbvm/regalloc.rs index 290d9193..0a5723e3 100644 --- a/lang/src/backend/hbvm/regalloc.rs +++ b/lang/src/backend/hbvm/regalloc.rs @@ -4,14 +4,14 @@ use { reg::{self, Reg}, HbvmBackend, Nid, Nodes, PLoc, Reloc, TypedReloc, }, - lexer::{Token, TokenKind}, + lexer::TokenKind, parser, quad_sort, son::{Kind, ARG_START, MEM, VOID}, ty::{self, Arg, Loc, Module, Offset, Sig, Types}, utils::{BitSet, EntSlice}, }, alloc::{borrow::ToOwned, vec::Vec}, - core::{assert_matches::debug_assert_matches, mem, ops::Range, usize}, + core::{assert_matches::debug_assert_matches, mem, ops::Range}, hbbytecode::{self as instrs}, }; @@ -136,7 +136,7 @@ impl HbvmBackend { let offset_atr = |pallc: Nid, offsets: &[Offset]| { let allc = strip_load(pallc); if nodes.is_locked(allc) { - let (region, offset) = nodes.strip_offset(allc, ty::Id::VOID, tys); + let (region, offset) = nodes.strip_offset(allc); match nodes[region].kind { Kind::Stck => { return ( @@ -268,12 +268,12 @@ impl HbvmBackend { match retl { None => {} Some(PLoc::Reg(r, size)) if sig.ret.loc(tys) == Loc::Stack => { - // TODO: handle the stack load - self.emit(instrs::ld(r, atr(ret), 0, size)) + let (src, offset) = offset_atr(ret, &self.offsets); + self.emit(instrs::ld(r, src, offset, size)) } Some(PLoc::WideReg(r, size)) => { - // TODO: handle the stack load - self.emit(instrs::ld(r, atr(ret), 0, size)) + let (src, offset) = offset_atr(ret, &self.offsets); + self.emit(instrs::ld(r, src, offset, size)) } Some(PLoc::Reg(r, _)) => { alloc_buf.push(atr(ret)); @@ -419,6 +419,13 @@ impl HbvmBackend { { self.emit(instrs::st(r, atr(*node.inputs.last().unwrap()), 0, size)); } + + match ret { + Some(PLoc::WideReg(..)) => {} + Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} + Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), + None | Some(PLoc::Ref(..)) => {} + } } Kind::Global { global } => { let reloc = Reloc::new(self.code.len(), 3, 4); @@ -431,7 +438,7 @@ impl HbvmBackend { self.emit(instrs::addi64(atr(nid), base, offset as _)); } Kind::Load => { - let (region, offset) = nodes.strip_offset(node.inputs[1], node.ty, tys); + let (region, offset) = nodes.strip_offset(node.inputs[1]); let size = tys.size_of(node.ty); if node.ty.loc(tys) != Loc::Stack { let (base, offset) = match nodes[region].kind { @@ -445,7 +452,7 @@ impl HbvmBackend { } Kind::Stre => { debug_assert_ne!(node.inputs[1], VOID); - let (region, offset) = nodes.strip_offset(node.inputs[2], node.ty, tys); + let (region, offset) = nodes.strip_offset(node.inputs[2]); let size = u16::try_from(tys.size_of(node.ty)).expect("TODO"); let (base, offset, src) = match nodes[region].kind { Kind::Stck if node.ty.loc(tys) == Loc::Reg => ( @@ -474,16 +481,6 @@ impl HbvmBackend { | Kind::Phi | Kind::Join) => unreachable!("{e:?}"), } - if let Kind::Call { .. } = node.kind { - let (ret, ..) = tys.parama(node.ty); - - match ret { - Some(PLoc::WideReg(..)) => {} - Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} - Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), - None | Some(PLoc::Ref(..)) => {} - } - } } } @@ -502,6 +499,7 @@ impl HbvmBackend { vec![], "{bundle_count}" ); + ( if tail { bundle_count.saturating_sub(reg::RET_ADDR as _) diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index 1fdcb837..a15a2c05 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -66,12 +66,11 @@ main: JALA r0, r31, 0a new_stru: ADDI64 r254, r254, -16d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 8h ST r0, r254, 8a, 8h - LD r1, r13, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 684 +code size: 673 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_null_check_in_the_loop.txt b/lang/tests/son_tests_null_check_in_the_loop.txt index 93b6fbb6..6247293b 100644 --- a/lang/tests/son_tests_null_check_in_the_loop.txt +++ b/lang/tests/son_tests_null_check_in_the_loop.txt @@ -17,16 +17,15 @@ main: return_fn: ADDI64 r254, r254, -6d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h ST r0, r254, 1a, 1h ST r0, r254, 2a, 1h ST r0, r254, 3a, 1h ST r0, r254, 4a, 1h ST r0, r254, 5a, 1h - LD r1, r14, 0a, 6h + LD r1, r254, 0a, 6h ADDI64 r254, r254, 6d JALA r0, r31, 0a -code size: 288 +code size: 277 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_null_check_returning_small_global.txt b/lang/tests/son_tests_null_check_returning_small_global.txt index deca828e..5a7630ef 100644 --- a/lang/tests/son_tests_null_check_returning_small_global.txt +++ b/lang/tests/son_tests_null_check_returning_small_global.txt @@ -9,31 +9,27 @@ foo: LD r33, r254, 64a, 1h ANDI r33, r33, 255d JNE r33, r0, :0 - ADDI64 r32, r254, 48d ST r0, r254, 48a, 1h - LD r1, r32, 0a, 16h + LD r1, r254, 48a, 16h JMP :1 0: LI8 r33, 1b LI64 r34, 4d LD r32, r254, 72a, 8h JNE r32, r34, :2 - ADDI64 r32, r254, 32d ST r33, r254, 32a, 1h - LI64 r33, 2d - ST r33, r254, 40a, 8h - LD r1, r32, 0a, 16h + LI64 r32, 2d + ST r32, r254, 40a, 8h + LD r1, r254, 32a, 16h JMP :1 2: LRA r34, r0, :magic LD r34, r34, 0a, 8h JNE r34, r32, :3 - ADDI64 r32, r254, 16d ST r33, r254, 16a, 1h ST r0, r254, 24a, 8h - LD r1, r32, 0a, 16h + LD r1, r254, 16a, 16h JMP :1 - 3: ADDI64 r32, r254, 0d - ST r0, r254, 0a, 1h - LD r1, r32, 0a, 16h + 3: ST r0, r254, 0a, 1h + LD r1, r254, 0a, 16h 1: LD r31, r254, 80a, 32h ADDI64 r254, r254, 112d JALA r0, r31, 0a @@ -46,14 +42,12 @@ get: LD r14, r14, 0a, 8h JNE r14, r13, :0 LI8 r13, 1b - ADDI64 r15, r254, 16d ST r13, r254, 16a, 1h ST r14, r254, 24a, 8h - LD r1, r15, 0a, 16h + LD r1, r254, 16a, 16h JMP :1 - 0: ADDI64 r13, r254, 0d - ST r0, r254, 0a, 1h - LD r1, r13, 0a, 16h + 0: ST r0, r254, 0a, 1h + LD r1, r254, 0a, 16h 1: ADDI64 r254, r254, 32d JALA r0, r31, 0a main: @@ -73,6 +67,6 @@ main: 1: LD r31, r254, 16a, 24h ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 739 +code size: 673 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_structure.txt b/lang/tests/son_tests_nullable_structure.txt index a7550782..ca9d6bba 100644 --- a/lang/tests/son_tests_nullable_structure.txt +++ b/lang/tests/son_tests_nullable_structure.txt @@ -42,10 +42,9 @@ returner_bn: returner_cn: ADDI64 r254, r254, -2d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h ST r0, r254, 1a, 1h - LD r1, r14, 0a, 2h + LD r1, r254, 0a, 2h ADDI64 r254, r254, 2d JALA r0, r31, 0a returner_fn: @@ -53,6 +52,6 @@ returner_fn: ORI r13, r13, 128d CP r1, r13 JALA r0, r31, 0a -code size: 463 +code size: 452 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_types.txt b/lang/tests/son_tests_nullable_types.txt index 5dbf749b..a473c59a 100644 --- a/lang/tests/son_tests_nullable_types.txt +++ b/lang/tests/son_tests_nullable_types.txt @@ -117,17 +117,15 @@ new_bar: new_foo: ADDI64 r254, r254, -24d ADDI64 r13, r254, 0d - ADDI64 r14, r254, 8d ST r13, r254, 8a, 8h ST r0, r254, 16a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 8a, 16h ADDI64 r254, r254, 24d JALA r0, r31, 0a no_foo: ADDI64 r254, r254, -16d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 8h - LD r1, r13, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a use_foo: @@ -136,6 +134,6 @@ use_foo: ADDI64 r2, r254, 0d ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 1114 +code size: 1092 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt b/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt index 5d53583a..48188fb5 100644 --- a/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt +++ b/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt @@ -21,13 +21,12 @@ main: opaque: ADDI64 r254, r254, -16d LI64 r13, 3d - ADDI64 r14, r254, 0d ST r13, r254, 0a, 8h LI64 r13, 2d ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 310 +code size: 299 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_returning_optional_issues.txt b/lang/tests/son_tests_returning_optional_issues.txt index 4e99b5f0..778314e8 100644 --- a/lang/tests/son_tests_returning_optional_issues.txt +++ b/lang/tests/son_tests_returning_optional_issues.txt @@ -1,12 +1,11 @@ get_format: ADDI64 r254, r254, -16d LI8 r13, 1b - ADDI64 r14, r254, 0d - LRA r15, r0, :bmp + LRA r14, r0, :bmp ST r13, r254, 0a, 1h - LD r13, r15, 0a, 8h + LD r13, r14, 0a, 8h ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a main: @@ -26,6 +25,6 @@ main: 1: LD r31, r254, 16a, 24h ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 275 +code size: 264 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_scheduling_block_did_dirty.txt b/lang/tests/son_tests_scheduling_block_did_dirty.txt index 2b2e7fd6..a145004b 100644 --- a/lang/tests/son_tests_scheduling_block_did_dirty.txt +++ b/lang/tests/son_tests_scheduling_block_did_dirty.txt @@ -1,16 +1,15 @@ constructor: - ADDI64 r254, r254, -40d - ST r31, r254, 16a, 24h + ADDI64 r254, r254, -32d + ST r31, r254, 16a, 16h CP r32, r3 CP r2, r32 JAL r31, r0, :opaque CP r32, r1 - ADDI64 r33, r254, 0d ST r32, r254, 0a, 8h ST r32, r254, 8a, 8h - LD r1, r33, 0a, 16h - LD r31, r254, 16a, 24h - ADDI64 r254, r254, 40d + LD r1, r254, 0a, 16h + LD r31, r254, 16a, 16h + ADDI64 r254, r254, 32d JALA r0, r31, 0a main: ADDI64 r254, r254, -32d @@ -30,6 +29,6 @@ opaque: ANDI r13, r13, 255d CP r1, r13 JALA r0, r31, 0a -code size: 279 +code size: 268 ret: 255 status: Ok(()) diff --git a/lang/tests/son_tests_small_struct_bitcast.txt b/lang/tests/son_tests_small_struct_bitcast.txt index c9344098..c26bc3d2 100644 --- a/lang/tests/son_tests_small_struct_bitcast.txt +++ b/lang/tests/son_tests_small_struct_bitcast.txt @@ -14,22 +14,21 @@ main: ADDI64 r254, r254, 28d JALA r0, r31, 0a u32_to_color: - ADDI64 r254, r254, -28d - ST r31, r254, 4a, 24h + ADDI64 r254, r254, -20d + ST r31, r254, 4a, 16h CP r32, r2 CP r2, r32 JAL r31, r0, :u32_to_u32 CP r32, r1 - ADDI64 r33, r254, 0d ST r32, r254, 0a, 4h - LD r1, r33, 0a, 4h - LD r31, r254, 4a, 24h - ADDI64 r254, r254, 28d + LD r1, r254, 0a, 4h + LD r31, r254, 4a, 16h + ADDI64 r254, r254, 20d JALA r0, r31, 0a u32_to_u32: CP r13, r2 CP r1, r13 JALA r0, r31, 0a -code size: 281 +code size: 270 ret: 255 status: Ok(()) diff --git a/lang/tests/son_tests_storing_into_nullable_struct.txt b/lang/tests/son_tests_storing_into_nullable_struct.txt index 9e4dacf7..938ee472 100644 --- a/lang/tests/son_tests_storing_into_nullable_struct.txt +++ b/lang/tests/son_tests_storing_into_nullable_struct.txt @@ -43,11 +43,10 @@ main: optional: ADDI64 r254, r254, -16d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h LI64 r13, 10d ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a optionala: @@ -66,6 +65,6 @@ optionala: BMC r15, r17, 32h ADDI64 r254, r254, 48d JALA r0, r31, 0a -code size: 567 +code size: 556 ret: 100 status: Ok(()) diff --git a/lang/tests/son_tests_struct_return_from_module_function.txt b/lang/tests/son_tests_struct_return_from_module_function.txt index b5687ac7..f0608aa9 100644 --- a/lang/tests/son_tests_struct_return_from_module_function.txt +++ b/lang/tests/son_tests_struct_return_from_module_function.txt @@ -1,12 +1,11 @@ foo: ADDI64 r254, r254, -16d LI64 r13, 3d - ADDI64 r14, r254, 0d ST r13, r254, 0a, 8h LI32 r13, 2w ST r13, r254, 8a, 4h ST r13, r254, 12a, 4h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a main: @@ -34,6 +33,6 @@ main: LD r31, r254, 48a, 40h ADDI64 r254, r254, 88d JALA r0, r31, 0a -code size: 358 +code size: 347 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index 9c8cbec5..5861be5d 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -15,42 +15,40 @@ main: ADDI64 r254, r254, 48d JALA r0, r31, 0a maina: - ADDI64 r254, r254, -60d - ST r31, r254, 20a, 40h + ADDI64 r254, r254, -52d + ST r31, r254, 20a, 32h ADDI64 r32, r254, 16d JAL r31, r0, :small_struct ST r1, r32, 0a, 4h - ADDI64 r33, r254, 0d ST r0, r254, 0a, 1h ST r0, r254, 1a, 1h ST r0, r254, 2a, 1h - LI8 r34, 3b - ST r34, r254, 3a, 1h - LI8 r35, 1b - ST r35, r254, 4a, 1h + LI8 r33, 3b + ST r33, r254, 3a, 1h + LI8 r34, 1b + ST r34, r254, 4a, 1h ST r0, r254, 5a, 1h ST r0, r254, 6a, 1h ST r0, r254, 7a, 1h ST r0, r254, 8a, 1h ST r0, r254, 9a, 1h ST r0, r254, 10a, 1h - ST r34, r254, 11a, 1h - ST r35, r254, 12a, 1h + ST r33, r254, 11a, 1h + ST r34, r254, 12a, 1h ST r0, r254, 13a, 1h ST r0, r254, 14a, 1h ST r0, r254, 15a, 1h - LD r1, r33, 0a, 16h - LD r31, r254, 20a, 40h - ADDI64 r254, r254, 60d + LD r1, r254, 0a, 16h + LD r31, r254, 20a, 32h + ADDI64 r254, r254, 52d JALA r0, r31, 0a small_struct: ADDI64 r254, r254, -4d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 2h ST r0, r254, 2a, 2h - LD r1, r13, 0a, 4h + LD r1, r254, 0a, 4h ADDI64 r254, r254, 4d JALA r0, r31, 0a -code size: 559 +code size: 537 ret: 2 status: Ok(())