From 5a7a01ca028b2aabdb8fe61a23bf6c9fa7e747e1 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sun, 15 Dec 2024 17:57:22 +0100 Subject: [PATCH] adding the stack offset elision for return values as well Signed-off-by: Jakub Doka --- lang/src/backend/hbvm.rs | 5 ++- lang/src/backend/hbvm/regalloc.rs | 36 +++++++++---------- ..._tests_different_function_destinations.txt | 5 ++- .../son_tests_null_check_in_the_loop.txt | 5 ++- ...ests_null_check_returning_small_global.txt | 28 ++++++--------- lang/tests/son_tests_nullable_structure.txt | 5 ++- lang/tests/son_tests_nullable_types.txt | 8 ++--- ...ts_overwrite_aliasing_overoptimization.txt | 5 ++- .../son_tests_returning_optional_issues.txt | 9 +++-- .../son_tests_scheduling_block_did_dirty.txt | 13 ++++--- lang/tests/son_tests_small_struct_bitcast.txt | 13 ++++--- ...son_tests_storing_into_nullable_struct.txt | 5 ++- ...sts_struct_return_from_module_function.txt | 5 ++- lang/tests/son_tests_wide_ret.txt | 28 +++++++-------- 14 files changed, 74 insertions(+), 96 deletions(-) diff --git a/lang/src/backend/hbvm.rs b/lang/src/backend/hbvm.rs index 616b456..970785c 100644 --- a/lang/src/backend/hbvm.rs +++ b/lang/src/backend/hbvm.rs @@ -392,11 +392,10 @@ impl Nodes { op.cond_op(self[self[cnd].inputs[1]].ty) } - fn strip_offset(&self, region: Nid, ty: ty::Id, tys: &Types) -> (Nid, Offset) { + fn strip_offset(&self, region: Nid) -> (Nid, Offset) { if matches!(self[region].kind, Kind::BinOp { op: TokenKind::Add | TokenKind::Sub }) && self.is_locked(region) && let Kind::CInt { value } = self[self[region].inputs[2]].kind - && ty.loc(tys) == Loc::Reg { (self[region].inputs[1], value as _) } else { @@ -442,7 +441,7 @@ impl Nodes { // this means the struct is actually loaded into a register so no BMC needed || (node.kind == Kind::Load && !matches!(tys.parama(node.ty).0, Some(PLoc::Ref(..))) - && node.outputs.iter().all(|&o| self[o].kind.is_call()))) + && node.outputs.iter().all(|&o| matches!(self[o].kind, Kind::Call { .. } | Kind::Return { .. })))) } } diff --git a/lang/src/backend/hbvm/regalloc.rs b/lang/src/backend/hbvm/regalloc.rs index 290d919..0a5723e 100644 --- a/lang/src/backend/hbvm/regalloc.rs +++ b/lang/src/backend/hbvm/regalloc.rs @@ -4,14 +4,14 @@ use { reg::{self, Reg}, HbvmBackend, Nid, Nodes, PLoc, Reloc, TypedReloc, }, - lexer::{Token, TokenKind}, + lexer::TokenKind, parser, quad_sort, son::{Kind, ARG_START, MEM, VOID}, ty::{self, Arg, Loc, Module, Offset, Sig, Types}, utils::{BitSet, EntSlice}, }, alloc::{borrow::ToOwned, vec::Vec}, - core::{assert_matches::debug_assert_matches, mem, ops::Range, usize}, + core::{assert_matches::debug_assert_matches, mem, ops::Range}, hbbytecode::{self as instrs}, }; @@ -136,7 +136,7 @@ impl HbvmBackend { let offset_atr = |pallc: Nid, offsets: &[Offset]| { let allc = strip_load(pallc); if nodes.is_locked(allc) { - let (region, offset) = nodes.strip_offset(allc, ty::Id::VOID, tys); + let (region, offset) = nodes.strip_offset(allc); match nodes[region].kind { Kind::Stck => { return ( @@ -268,12 +268,12 @@ impl HbvmBackend { match retl { None => {} Some(PLoc::Reg(r, size)) if sig.ret.loc(tys) == Loc::Stack => { - // TODO: handle the stack load - self.emit(instrs::ld(r, atr(ret), 0, size)) + let (src, offset) = offset_atr(ret, &self.offsets); + self.emit(instrs::ld(r, src, offset, size)) } Some(PLoc::WideReg(r, size)) => { - // TODO: handle the stack load - self.emit(instrs::ld(r, atr(ret), 0, size)) + let (src, offset) = offset_atr(ret, &self.offsets); + self.emit(instrs::ld(r, src, offset, size)) } Some(PLoc::Reg(r, _)) => { alloc_buf.push(atr(ret)); @@ -419,6 +419,13 @@ impl HbvmBackend { { self.emit(instrs::st(r, atr(*node.inputs.last().unwrap()), 0, size)); } + + match ret { + Some(PLoc::WideReg(..)) => {} + Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} + Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), + None | Some(PLoc::Ref(..)) => {} + } } Kind::Global { global } => { let reloc = Reloc::new(self.code.len(), 3, 4); @@ -431,7 +438,7 @@ impl HbvmBackend { self.emit(instrs::addi64(atr(nid), base, offset as _)); } Kind::Load => { - let (region, offset) = nodes.strip_offset(node.inputs[1], node.ty, tys); + let (region, offset) = nodes.strip_offset(node.inputs[1]); let size = tys.size_of(node.ty); if node.ty.loc(tys) != Loc::Stack { let (base, offset) = match nodes[region].kind { @@ -445,7 +452,7 @@ impl HbvmBackend { } Kind::Stre => { debug_assert_ne!(node.inputs[1], VOID); - let (region, offset) = nodes.strip_offset(node.inputs[2], node.ty, tys); + let (region, offset) = nodes.strip_offset(node.inputs[2]); let size = u16::try_from(tys.size_of(node.ty)).expect("TODO"); let (base, offset, src) = match nodes[region].kind { Kind::Stck if node.ty.loc(tys) == Loc::Reg => ( @@ -474,16 +481,6 @@ impl HbvmBackend { | Kind::Phi | Kind::Join) => unreachable!("{e:?}"), } - if let Kind::Call { .. } = node.kind { - let (ret, ..) = tys.parama(node.ty); - - match ret { - Some(PLoc::WideReg(..)) => {} - Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} - Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), - None | Some(PLoc::Ref(..)) => {} - } - } } } @@ -502,6 +499,7 @@ impl HbvmBackend { vec![], "{bundle_count}" ); + ( if tail { bundle_count.saturating_sub(reg::RET_ADDR as _) diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index 1fdcb83..a15a2c0 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -66,12 +66,11 @@ main: JALA r0, r31, 0a new_stru: ADDI64 r254, r254, -16d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 8h ST r0, r254, 8a, 8h - LD r1, r13, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 684 +code size: 673 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_null_check_in_the_loop.txt b/lang/tests/son_tests_null_check_in_the_loop.txt index 93b6fbb..6247293 100644 --- a/lang/tests/son_tests_null_check_in_the_loop.txt +++ b/lang/tests/son_tests_null_check_in_the_loop.txt @@ -17,16 +17,15 @@ main: return_fn: ADDI64 r254, r254, -6d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h ST r0, r254, 1a, 1h ST r0, r254, 2a, 1h ST r0, r254, 3a, 1h ST r0, r254, 4a, 1h ST r0, r254, 5a, 1h - LD r1, r14, 0a, 6h + LD r1, r254, 0a, 6h ADDI64 r254, r254, 6d JALA r0, r31, 0a -code size: 288 +code size: 277 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_null_check_returning_small_global.txt b/lang/tests/son_tests_null_check_returning_small_global.txt index deca828..5a7630e 100644 --- a/lang/tests/son_tests_null_check_returning_small_global.txt +++ b/lang/tests/son_tests_null_check_returning_small_global.txt @@ -9,31 +9,27 @@ foo: LD r33, r254, 64a, 1h ANDI r33, r33, 255d JNE r33, r0, :0 - ADDI64 r32, r254, 48d ST r0, r254, 48a, 1h - LD r1, r32, 0a, 16h + LD r1, r254, 48a, 16h JMP :1 0: LI8 r33, 1b LI64 r34, 4d LD r32, r254, 72a, 8h JNE r32, r34, :2 - ADDI64 r32, r254, 32d ST r33, r254, 32a, 1h - LI64 r33, 2d - ST r33, r254, 40a, 8h - LD r1, r32, 0a, 16h + LI64 r32, 2d + ST r32, r254, 40a, 8h + LD r1, r254, 32a, 16h JMP :1 2: LRA r34, r0, :magic LD r34, r34, 0a, 8h JNE r34, r32, :3 - ADDI64 r32, r254, 16d ST r33, r254, 16a, 1h ST r0, r254, 24a, 8h - LD r1, r32, 0a, 16h + LD r1, r254, 16a, 16h JMP :1 - 3: ADDI64 r32, r254, 0d - ST r0, r254, 0a, 1h - LD r1, r32, 0a, 16h + 3: ST r0, r254, 0a, 1h + LD r1, r254, 0a, 16h 1: LD r31, r254, 80a, 32h ADDI64 r254, r254, 112d JALA r0, r31, 0a @@ -46,14 +42,12 @@ get: LD r14, r14, 0a, 8h JNE r14, r13, :0 LI8 r13, 1b - ADDI64 r15, r254, 16d ST r13, r254, 16a, 1h ST r14, r254, 24a, 8h - LD r1, r15, 0a, 16h + LD r1, r254, 16a, 16h JMP :1 - 0: ADDI64 r13, r254, 0d - ST r0, r254, 0a, 1h - LD r1, r13, 0a, 16h + 0: ST r0, r254, 0a, 1h + LD r1, r254, 0a, 16h 1: ADDI64 r254, r254, 32d JALA r0, r31, 0a main: @@ -73,6 +67,6 @@ main: 1: LD r31, r254, 16a, 24h ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 739 +code size: 673 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_structure.txt b/lang/tests/son_tests_nullable_structure.txt index a755078..ca9d6bb 100644 --- a/lang/tests/son_tests_nullable_structure.txt +++ b/lang/tests/son_tests_nullable_structure.txt @@ -42,10 +42,9 @@ returner_bn: returner_cn: ADDI64 r254, r254, -2d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h ST r0, r254, 1a, 1h - LD r1, r14, 0a, 2h + LD r1, r254, 0a, 2h ADDI64 r254, r254, 2d JALA r0, r31, 0a returner_fn: @@ -53,6 +52,6 @@ returner_fn: ORI r13, r13, 128d CP r1, r13 JALA r0, r31, 0a -code size: 463 +code size: 452 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_types.txt b/lang/tests/son_tests_nullable_types.txt index 5dbf749..a473c59 100644 --- a/lang/tests/son_tests_nullable_types.txt +++ b/lang/tests/son_tests_nullable_types.txt @@ -117,17 +117,15 @@ new_bar: new_foo: ADDI64 r254, r254, -24d ADDI64 r13, r254, 0d - ADDI64 r14, r254, 8d ST r13, r254, 8a, 8h ST r0, r254, 16a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 8a, 16h ADDI64 r254, r254, 24d JALA r0, r31, 0a no_foo: ADDI64 r254, r254, -16d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 8h - LD r1, r13, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a use_foo: @@ -136,6 +134,6 @@ use_foo: ADDI64 r2, r254, 0d ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 1114 +code size: 1092 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt b/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt index 5d53583..48188fb 100644 --- a/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt +++ b/lang/tests/son_tests_overwrite_aliasing_overoptimization.txt @@ -21,13 +21,12 @@ main: opaque: ADDI64 r254, r254, -16d LI64 r13, 3d - ADDI64 r14, r254, 0d ST r13, r254, 0a, 8h LI64 r13, 2d ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 310 +code size: 299 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_returning_optional_issues.txt b/lang/tests/son_tests_returning_optional_issues.txt index 4e99b5f..778314e 100644 --- a/lang/tests/son_tests_returning_optional_issues.txt +++ b/lang/tests/son_tests_returning_optional_issues.txt @@ -1,12 +1,11 @@ get_format: ADDI64 r254, r254, -16d LI8 r13, 1b - ADDI64 r14, r254, 0d - LRA r15, r0, :bmp + LRA r14, r0, :bmp ST r13, r254, 0a, 1h - LD r13, r15, 0a, 8h + LD r13, r14, 0a, 8h ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a main: @@ -26,6 +25,6 @@ main: 1: LD r31, r254, 16a, 24h ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 275 +code size: 264 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_scheduling_block_did_dirty.txt b/lang/tests/son_tests_scheduling_block_did_dirty.txt index 2b2e7fd..a145004 100644 --- a/lang/tests/son_tests_scheduling_block_did_dirty.txt +++ b/lang/tests/son_tests_scheduling_block_did_dirty.txt @@ -1,16 +1,15 @@ constructor: - ADDI64 r254, r254, -40d - ST r31, r254, 16a, 24h + ADDI64 r254, r254, -32d + ST r31, r254, 16a, 16h CP r32, r3 CP r2, r32 JAL r31, r0, :opaque CP r32, r1 - ADDI64 r33, r254, 0d ST r32, r254, 0a, 8h ST r32, r254, 8a, 8h - LD r1, r33, 0a, 16h - LD r31, r254, 16a, 24h - ADDI64 r254, r254, 40d + LD r1, r254, 0a, 16h + LD r31, r254, 16a, 16h + ADDI64 r254, r254, 32d JALA r0, r31, 0a main: ADDI64 r254, r254, -32d @@ -30,6 +29,6 @@ opaque: ANDI r13, r13, 255d CP r1, r13 JALA r0, r31, 0a -code size: 279 +code size: 268 ret: 255 status: Ok(()) diff --git a/lang/tests/son_tests_small_struct_bitcast.txt b/lang/tests/son_tests_small_struct_bitcast.txt index c934409..c26bc3d 100644 --- a/lang/tests/son_tests_small_struct_bitcast.txt +++ b/lang/tests/son_tests_small_struct_bitcast.txt @@ -14,22 +14,21 @@ main: ADDI64 r254, r254, 28d JALA r0, r31, 0a u32_to_color: - ADDI64 r254, r254, -28d - ST r31, r254, 4a, 24h + ADDI64 r254, r254, -20d + ST r31, r254, 4a, 16h CP r32, r2 CP r2, r32 JAL r31, r0, :u32_to_u32 CP r32, r1 - ADDI64 r33, r254, 0d ST r32, r254, 0a, 4h - LD r1, r33, 0a, 4h - LD r31, r254, 4a, 24h - ADDI64 r254, r254, 28d + LD r1, r254, 0a, 4h + LD r31, r254, 4a, 16h + ADDI64 r254, r254, 20d JALA r0, r31, 0a u32_to_u32: CP r13, r2 CP r1, r13 JALA r0, r31, 0a -code size: 281 +code size: 270 ret: 255 status: Ok(()) diff --git a/lang/tests/son_tests_storing_into_nullable_struct.txt b/lang/tests/son_tests_storing_into_nullable_struct.txt index 9e4dacf..938ee47 100644 --- a/lang/tests/son_tests_storing_into_nullable_struct.txt +++ b/lang/tests/son_tests_storing_into_nullable_struct.txt @@ -43,11 +43,10 @@ main: optional: ADDI64 r254, r254, -16d LI8 r13, 1b - ADDI64 r14, r254, 0d ST r13, r254, 0a, 1h LI64 r13, 10d ST r13, r254, 8a, 8h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a optionala: @@ -66,6 +65,6 @@ optionala: BMC r15, r17, 32h ADDI64 r254, r254, 48d JALA r0, r31, 0a -code size: 567 +code size: 556 ret: 100 status: Ok(()) diff --git a/lang/tests/son_tests_struct_return_from_module_function.txt b/lang/tests/son_tests_struct_return_from_module_function.txt index b5687ac..f0608aa 100644 --- a/lang/tests/son_tests_struct_return_from_module_function.txt +++ b/lang/tests/son_tests_struct_return_from_module_function.txt @@ -1,12 +1,11 @@ foo: ADDI64 r254, r254, -16d LI64 r13, 3d - ADDI64 r14, r254, 0d ST r13, r254, 0a, 8h LI32 r13, 2w ST r13, r254, 8a, 4h ST r13, r254, 12a, 4h - LD r1, r14, 0a, 16h + LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a main: @@ -34,6 +33,6 @@ main: LD r31, r254, 48a, 40h ADDI64 r254, r254, 88d JALA r0, r31, 0a -code size: 358 +code size: 347 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index 9c8cbec..5861be5 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -15,42 +15,40 @@ main: ADDI64 r254, r254, 48d JALA r0, r31, 0a maina: - ADDI64 r254, r254, -60d - ST r31, r254, 20a, 40h + ADDI64 r254, r254, -52d + ST r31, r254, 20a, 32h ADDI64 r32, r254, 16d JAL r31, r0, :small_struct ST r1, r32, 0a, 4h - ADDI64 r33, r254, 0d ST r0, r254, 0a, 1h ST r0, r254, 1a, 1h ST r0, r254, 2a, 1h - LI8 r34, 3b - ST r34, r254, 3a, 1h - LI8 r35, 1b - ST r35, r254, 4a, 1h + LI8 r33, 3b + ST r33, r254, 3a, 1h + LI8 r34, 1b + ST r34, r254, 4a, 1h ST r0, r254, 5a, 1h ST r0, r254, 6a, 1h ST r0, r254, 7a, 1h ST r0, r254, 8a, 1h ST r0, r254, 9a, 1h ST r0, r254, 10a, 1h - ST r34, r254, 11a, 1h - ST r35, r254, 12a, 1h + ST r33, r254, 11a, 1h + ST r34, r254, 12a, 1h ST r0, r254, 13a, 1h ST r0, r254, 14a, 1h ST r0, r254, 15a, 1h - LD r1, r33, 0a, 16h - LD r31, r254, 20a, 40h - ADDI64 r254, r254, 60d + LD r1, r254, 0a, 16h + LD r31, r254, 20a, 32h + ADDI64 r254, r254, 52d JALA r0, r31, 0a small_struct: ADDI64 r254, r254, -4d - ADDI64 r13, r254, 0d ST r0, r254, 0a, 2h ST r0, r254, 2a, 2h - LD r1, r13, 0a, 4h + LD r1, r254, 0a, 4h ADDI64 r254, r254, 4d JALA r0, r31, 0a -code size: 559 +code size: 537 ret: 2 status: Ok(())