//! Implementation of a standard Riscv64 ABI. use { alloc::{boxed::Box, vec::Vec}, cranelift_codegen::{ inst::*, ir::{self, types::*, LibCall, Signature}, isa::{self, unwind::UnwindInst, CallConv}, machinst::*, settings::{self, Flags as RiscvFlags}, CodegenError, CodegenResult, }, regalloc2::{MachineEnv, PReg, PRegSet}, smallvec::{smallvec, SmallVec}, std::sync::OnceLock, }; /// Support for the Riscv64 ABI from the callee side (within a function body). pub(crate) type Riscv64Callee = Callee; /// Support for the Riscv64 ABI from the caller side (at a callsite). pub(crate) type Riscv64ABICallSite = CallSite; /// This is the limit for the size of argument and return-value areas on the /// stack. We place a reasonable limit here to avoid integer overflow issues /// with 32-bit arithmetic: for now, 128 MB. static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; /// Riscv64-specific ABI behavior. This struct just serves as an implementation /// point for the trait; it is never actually instantiated. pub struct Riscv64MachineDeps; impl IsaFlags for RiscvFlags {} impl RiscvFlags { pub(crate) fn min_vec_reg_size(&self) -> u64 { let entries = [ (self.has_zvl65536b(), 65536), (self.has_zvl32768b(), 32768), (self.has_zvl16384b(), 16384), (self.has_zvl8192b(), 8192), (self.has_zvl4096b(), 4096), (self.has_zvl2048b(), 2048), (self.has_zvl1024b(), 1024), (self.has_zvl512b(), 512), (self.has_zvl256b(), 256), // In order to claim the Application Profile V extension, a minimum // register size of 128 is required. i.e. V implies Zvl128b. (self.has_v(), 128), (self.has_zvl128b(), 128), (self.has_zvl64b(), 64), (self.has_zvl32b(), 32), ]; for (has_flag, size) in entries.into_iter() { if !has_flag { continue; } // Due to a limitation in regalloc2, we can't support types // larger than 1024 bytes. So limit that here. return std::cmp::min(size, 1024); } return 0; } } impl ABIMachineSpec for Riscv64MachineDeps { type F = RiscvFlags; type I = Inst; fn word_bits() -> u32 { 64 } /// Return required stack alignment in bytes. fn stack_align(_call_conv: isa::CallConv) -> u32 { 16 } fn compute_arg_locs( call_conv: isa::CallConv, _flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, mut args: ArgsAccumulator, ) -> CodegenResult<(u32, Option)> { assert_ne!( call_conv, isa::CallConv::Winch, "riscv64 does not support the 'winch' calling convention yet" ); // All registers that can be used as parameters or rets. // both start and end are included. let (x_start, x_end, f_start, f_end) = match args_or_rets { ArgsOrRets::Args => (10, 17, 10, 17), ArgsOrRets::Rets => (10, 11, 10, 11), }; let mut next_x_reg = x_start; let mut next_f_reg = f_start; // Stack space. let mut next_stack: u32 = 0; for param in params { if let ir::ArgumentPurpose::StructArgument(_) = param.purpose { panic!( "StructArgument parameters are not supported on riscv64. \ Use regular pointer arguments instead." ); } // Find regclass(es) of the register(s) used to store a value of this type. let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; let mut slots = ABIArgSlotVec::new(); for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int { let x = Some(x_reg(next_x_reg)); next_x_reg += 1; x } else if (next_f_reg <= f_end) && *rc == RegClass::Float { let x = Some(f_reg(next_f_reg)); next_f_reg += 1; x } else { None }; if let Some(reg) = next_reg { slots.push(ABIArgSlot::Reg { reg: reg.to_real_reg().unwrap(), ty: *reg_ty, extension: param.extension, }); } else { // Compute size and 16-byte stack alignment happens // separately after all args. let size = reg_ty.bits() / 8; let size = std::cmp::max(size, 8); // Align. debug_assert!(size.is_power_of_two()); next_stack = align_to(next_stack, size); slots.push(ABIArgSlot::Stack { offset: next_stack as i64, ty: *reg_ty, extension: param.extension, }); next_stack += size; } } args.push(ABIArg::Slots { slots, purpose: param.purpose }); } let pos: Option = if add_ret_area_ptr { assert!(ArgsOrRets::Args == args_or_rets); if next_x_reg <= x_end { let arg = ABIArg::reg( x_reg(next_x_reg).to_real_reg().unwrap(), I64, ir::ArgumentExtension::None, ir::ArgumentPurpose::Normal, ); args.push_non_formal(arg); } else { let arg = ABIArg::stack( next_stack as i64, I64, ir::ArgumentExtension::None, ir::ArgumentPurpose::Normal, ); args.push_non_formal(arg); next_stack += 8; } Some(args.args().len() - 1) } else { None }; next_stack = align_to(next_stack, Self::stack_align(call_conv)); // To avoid overflow issues, limit the arg/return size to something // reasonable -- here, 128 MB. if next_stack > STACK_ARG_RET_SIZE_LIMIT { return Err(CodegenError::ImplLimitExceeded); } Ok((next_stack, pos)) } fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) } fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) } fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { Inst::gen_move(to_reg, from_reg, ty) } fn gen_extend( to_reg: Writable, from_reg: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Inst { assert!(from_bits < to_bits); Inst::Extend { rd: to_reg, rn: from_reg, signed, from_bits, to_bits } } fn get_ext_mode( _call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension { specified } fn gen_args(args: Vec) -> Inst { Inst::Args { args } } fn gen_rets(rets: Vec) -> Inst { Inst::Rets { rets } } fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { spilltmp_reg() } fn gen_add_imm( _call_conv: isa::CallConv, into_reg: Writable, from_reg: Reg, imm: u32, ) -> SmallInstVec { let mut insts = SmallInstVec::new(); if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) { insts.push(Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd: into_reg, rs: from_reg, imm12, }); } else { insts.extend(Inst::load_constant_u32(writable_spilltmp_reg2(), imm as u64)); insts.push(Inst::AluRRR { alu_op: AluOPRRR::Add, rd: into_reg, rs1: spilltmp_reg2(), rs2: from_reg, }); } insts } fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { let mut insts = SmallVec::new(); insts.push(Inst::TrapIf { cc: IntCC::UnsignedLessThan, rs1: stack_reg(), rs2: limit_reg, trap_code: ir::TrapCode::StackOverflow, }); insts } fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable) -> Inst { Inst::LoadAddr { rd: into_reg, mem: mem.into() } } fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { let mem = AMode::RegOffset(base, offset as i64); Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) } fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { let mem = AMode::RegOffset(base, offset as i64); Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) } fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { let mut insts = SmallVec::new(); if amount == 0 { return insts; } if let Some(imm) = Imm12::maybe_from_i64(amount as i64) { insts.push(Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd: writable_stack_reg(), rs: stack_reg(), imm12: imm, }) } else { let tmp = writable_spilltmp_reg(); insts.extend(Inst::load_constant_u64(tmp, amount as i64 as u64)); insts.push(Inst::AluRRR { alu_op: AluOPRRR::Add, rd: writable_stack_reg(), rs1: stack_reg(), rs2: tmp.to_reg(), }); } insts } fn gen_prologue_frame_setup( _call_conv: isa::CallConv, flags: &settings::Flags, _isa_flags: &RiscvFlags, frame_layout: &FrameLayout, ) -> SmallInstVec { let mut insts = SmallVec::new(); if frame_layout.setup_area_size > 0 { // add sp,sp,-16 ;; alloc stack space for fp. // sd ra,8(sp) ;; save ra. // sd fp,0(sp) ;; store old fp. // mv fp,sp ;; set fp to sp. insts.extend(Self::gen_sp_reg_adjust(-16)); insts.push(Inst::gen_store(AMode::SPOffset(8), link_reg(), I64, MemFlags::trusted())); insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted())); if flags.unwind_info() { insts.push(Inst::Unwind { inst: UnwindInst::PushFrameRegs { offset_upward_to_caller_sp: frame_layout.setup_area_size, }, }); } insts.push(Inst::Mov { rd: writable_fp_reg(), rm: stack_reg(), ty: I64 }); } insts } /// reverse of gen_prologue_frame_setup. fn gen_epilogue_frame_restore( call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &RiscvFlags, frame_layout: &FrameLayout, ) -> SmallInstVec { let mut insts = SmallVec::new(); if frame_layout.setup_area_size > 0 { insts.push(Inst::gen_load( writable_link_reg(), AMode::SPOffset(8), I64, MemFlags::trusted(), )); insts.push(Inst::gen_load( writable_fp_reg(), AMode::SPOffset(0), I64, MemFlags::trusted(), )); insts.extend(Self::gen_sp_reg_adjust(16)); } if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 { insts.extend(Self::gen_sp_reg_adjust(frame_layout.tail_args_size.try_into().unwrap())); } insts } fn gen_return( _call_conv: isa::CallConv, _isa_flags: &RiscvFlags, _frame_layout: &FrameLayout, ) -> SmallInstVec { smallvec![Inst::Ret {}] } fn gen_probestack(insts: &mut SmallInstVec, frame_size: u32) { insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64)); let mut info = CallInfo::empty(ExternalName::LibCall(LibCall::Probestack), CallConv::SystemV); info.uses.push(CallArgPair { vreg: a0(), preg: a0() }); insts.push(Inst::Call { info: Box::new(info) }); } fn gen_clobber_save( _call_conv: isa::CallConv, flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let setup_frame = frame_layout.setup_area_size > 0; let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size; if incoming_args_diff > 0 { // Decrement SP by the amount of additional incoming argument space we need insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32))); if setup_frame { // Write the lr position on the stack again, as it hasn't changed since it was // pushed in `gen_prologue_frame_setup` insts.push(Inst::gen_store( AMode::SPOffset(8), link_reg(), I64, MemFlags::trusted(), )); insts.push(Inst::gen_load( writable_fp_reg(), AMode::SPOffset(i64::from(incoming_args_diff)), I64, MemFlags::trusted(), )); insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted())); // Finally, sync the frame pointer with SP insts.push(Inst::gen_move(writable_fp_reg(), stack_reg(), I64)); } } if flags.unwind_info() && setup_frame { // The *unwind* frame (but not the actual frame) starts at the // clobbers, just below the saved FP/LR pair. insts.push(Inst::Unwind { inst: UnwindInst::DefineNewFrame { offset_downward_to_clobbers: frame_layout.clobber_size, offset_upward_to_caller_sp: frame_layout.setup_area_size, }, }); } // Adjust the stack pointer downward for clobbers, the function fixed // frame (spillslots and storage slots), and outgoing arguments. let stack_size = frame_layout.clobber_size + frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size; // Store each clobbered register in order at offsets from SP, // placing them above the fixed frame slots. if stack_size > 0 { insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32))); let mut cur_offset = 8; for reg in &frame_layout.clobbered_callee_saves { let r_reg = reg.to_reg(); let ty = match r_reg.class() { RegClass::Int => I64, RegClass::Float => F64, RegClass::Vector => unimplemented!("Vector Clobber Saves"), }; insts.push(Inst::gen_store( AMode::SPOffset((stack_size - cur_offset) as i64), Reg::from(reg.to_reg()), ty, MemFlags::trusted(), )); if flags.unwind_info() { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset: frame_layout.clobber_size - cur_offset, reg: r_reg, }, }); } cur_offset += 8 } } insts } fn gen_clobber_restore( _call_conv: isa::CallConv, _flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let stack_size = frame_layout.clobber_size + frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size; let mut cur_offset = 8; for reg in &frame_layout.clobbered_callee_saves { let rreg = reg.to_reg(); let ty = match rreg.class() { RegClass::Int => I64, RegClass::Float => F64, RegClass::Vector => unimplemented!("Vector Clobber Restores"), }; insts.push(Inst::gen_load( reg.map(Reg::from), AMode::SPOffset(i64::from(stack_size - cur_offset)), ty, MemFlags::trusted(), )); cur_offset += 8 } if stack_size > 0 { insts.extend(Self::gen_sp_reg_adjust(stack_size as i32)); } insts } fn gen_call(dest: &CallDest, tmp: Writable, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> { let mut insts = SmallVec::new(); match &dest { &CallDest::ExtName(ref name, RelocDistance::Near) => { let info = Box::new(info.map(|()| name.clone())); insts.push(Inst::Call { info }) } &CallDest::ExtName(ref name, RelocDistance::Far) => { insts.push(Inst::LoadExtName { rd: tmp, name: Box::new(name.clone()), offset: 0 }); let info = Box::new(info.map(|()| tmp.to_reg())); insts.push(Inst::CallInd { info }); } &CallDest::Reg(reg) => { let info = Box::new(info.map(|()| *reg)); insts.push(Inst::CallInd { info }); } } insts } fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, src: Reg, size: usize, mut alloc_tmp: F, ) -> SmallVec<[Self::I; 8]> { let mut insts = SmallVec::new(); let arg0 = Writable::from_reg(x_reg(10)); let arg1 = Writable::from_reg(x_reg(11)); let arg2 = Writable::from_reg(x_reg(12)); let tmp = alloc_tmp(Self::word_type()); insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter()); insts.push(Inst::Call { info: Box::new(CallInfo { dest: ExternalName::LibCall(LibCall::Memcpy), uses: smallvec![ CallArgPair { vreg: dst, preg: arg0.to_reg() }, CallArgPair { vreg: src, preg: arg1.to_reg() }, CallArgPair { vreg: tmp.to_reg(), preg: arg2.to_reg() } ], defs: smallvec![], clobbers: Self::get_regs_clobbered_by_call(call_conv), caller_conv: call_conv, callee_conv: call_conv, callee_pop_size: 0, }), }); insts } fn get_number_of_spillslots_for_value( rc: RegClass, _target_vector_bytes: u32, isa_flags: &RiscvFlags, ) -> u32 { // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, RegClass::Float => 1, RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32, } } fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv { static MACHINE_ENV: OnceLock = OnceLock::new(); MACHINE_ENV.get_or_init(create_reg_enviroment) } fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet { DEFAULT_CLOBBERS } fn compute_frame_layout( _call_conv: isa::CallConv, flags: &settings::Flags, _sig: &Signature, regs: &[Writable], is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> FrameLayout { let mut regs: Vec> = regs .iter() .cloned() .filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into())) .collect(); regs.sort_unstable(); // Compute clobber size. let clobber_size = compute_clobber_size(®s); // Compute linkage frame size. let setup_area_size = if flags.preserve_frame_pointers() || !is_leaf // The function arguments that are passed on the stack are addressed // relative to the Frame Pointer. || incoming_args_size > 0 || clobber_size > 0 || fixed_frame_storage_size > 0 { 16 // FP, LR } else { 0 }; // Return FrameLayout structure. FrameLayout { incoming_args_size, tail_args_size, setup_area_size, clobber_size, fixed_frame_storage_size, outgoing_args_size, clobbered_callee_saves: regs, } } fn gen_inline_probestack( insts: &mut SmallInstVec, _call_conv: isa::CallConv, frame_size: u32, guard_size: u32, ) { // Unroll at most n consecutive probes, before falling back to using a loop const PROBE_MAX_UNROLL: u32 = 3; // Number of probes that we need to perform let probe_count = align_to(frame_size, guard_size) / guard_size; // Must be a caller-saved register that is not an argument. let tmp = Writable::from_reg(x_reg(28)); // t3 if probe_count <= PROBE_MAX_UNROLL { Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count) } else { insts.push(Inst::StackProbeLoop { guard_size, probe_count, tmp }); } } } impl Riscv64ABICallSite { pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { let new_stack_arg_size = u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap(); ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size); // Put all arguments in registers and stack slots (within that newly // allocated stack space). self.emit_args(ctx, args); self.emit_stack_ret_arg_for_tail_call(ctx); let dest = self.dest().clone(); let uses = self.take_uses(); match dest { CallDest::ExtName(name, RelocDistance::Near) => { let info = Box::new(ReturnCallInfo { dest: name, uses, new_stack_arg_size }); ctx.emit(Inst::ReturnCall { info }); } CallDest::ExtName(name, RelocDistance::Far) => { let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap(); ctx.emit(Inst::LoadExtName { rd: callee, name: Box::new(name), offset: 0 }); let info = Box::new(ReturnCallInfo { dest: callee.to_reg(), uses, new_stack_arg_size }); ctx.emit(Inst::ReturnCallInd { info }); } CallDest::Reg(callee) => { let info = Box::new(ReturnCallInfo { dest: callee, uses, new_stack_arg_size }); ctx.emit(Inst::ReturnCallInd { info }); } } } } // NOTE: no V regs are callee save. const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty() // X Regs .with(px_reg(2)) .with(px_reg(8)) .with(px_reg(9)) .with(px_reg(18)) .with(px_reg(19)) .with(px_reg(20)) .with(px_reg(21)) .with(px_reg(22)) .with(px_reg(23)) .with(px_reg(24)) .with(px_reg(25)) .with(px_reg(26)) .with(px_reg(27)) // F Regs .with(pf_reg(8)) .with(pf_reg(18)) .with(pf_reg(19)) .with(pf_reg(20)) .with(pf_reg(21)) .with(pf_reg(22)) .with(pf_reg(23)) .with(pf_reg(24)) .with(pf_reg(25)) .with(pf_reg(26)) .with(pf_reg(27)); fn compute_clobber_size(clobbers: &[Writable]) -> u32 { let mut clobbered_size = 0; for reg in clobbers { match reg.to_reg().class() { RegClass::Int => { clobbered_size += 8; } RegClass::Float => { clobbered_size += 8; } RegClass::Vector => unimplemented!("Vector Size Clobbered"), } } align_to(clobbered_size, 16) } const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() .with(px_reg(1)) .with(px_reg(5)) .with(px_reg(6)) .with(px_reg(7)) .with(px_reg(10)) .with(px_reg(11)) .with(px_reg(12)) .with(px_reg(13)) .with(px_reg(14)) .with(px_reg(15)) .with(px_reg(16)) .with(px_reg(17)) .with(px_reg(28)) .with(px_reg(29)) .with(px_reg(30)) .with(px_reg(31)) // F Regs .with(pf_reg(0)) .with(pf_reg(1)) .with(pf_reg(2)) .with(pf_reg(3)) .with(pf_reg(4)) .with(pf_reg(5)) .with(pf_reg(6)) .with(pf_reg(7)) .with(pf_reg(9)) .with(pf_reg(10)) .with(pf_reg(11)) .with(pf_reg(12)) .with(pf_reg(13)) .with(pf_reg(14)) .with(pf_reg(15)) .with(pf_reg(16)) .with(pf_reg(17)) .with(pf_reg(28)) .with(pf_reg(29)) .with(pf_reg(30)) .with(pf_reg(31)) // V Regs - All vector regs get clobbered .with(pv_reg(0)) .with(pv_reg(1)) .with(pv_reg(2)) .with(pv_reg(3)) .with(pv_reg(4)) .with(pv_reg(5)) .with(pv_reg(6)) .with(pv_reg(7)) .with(pv_reg(8)) .with(pv_reg(9)) .with(pv_reg(10)) .with(pv_reg(11)) .with(pv_reg(12)) .with(pv_reg(13)) .with(pv_reg(14)) .with(pv_reg(15)) .with(pv_reg(16)) .with(pv_reg(17)) .with(pv_reg(18)) .with(pv_reg(19)) .with(pv_reg(20)) .with(pv_reg(21)) .with(pv_reg(22)) .with(pv_reg(23)) .with(pv_reg(24)) .with(pv_reg(25)) .with(pv_reg(26)) .with(pv_reg(27)) .with(pv_reg(28)) .with(pv_reg(29)) .with(pv_reg(30)) .with(pv_reg(31)); fn create_reg_enviroment() -> MachineEnv { // Some C Extension instructions can only use a subset of the registers. // x8 - x15, f8 - f15, v8 - v15 so we should prefer to use those since // they allow us to emit C instructions more often. // // In general the order of preference is: // 1. Compressible Caller Saved registers. // 2. Non-Compressible Caller Saved registers. // 3. Compressible Callee Saved registers. // 4. Non-Compressible Callee Saved registers. let preferred_regs_by_class: [Vec; 3] = { let x_registers: Vec = (10..=15).map(px_reg).collect(); let f_registers: Vec = (10..=15).map(pf_reg).collect(); let v_registers: Vec = (8..=15).map(pv_reg).collect(); [x_registers, f_registers, v_registers] }; let non_preferred_regs_by_class: [Vec; 3] = { // x0 - x4 are special registers, so we don't want to use them. // Omit x30 and x31 since they are the spilltmp registers. // Start with the Non-Compressible Caller Saved registers. let x_registers: Vec = (5..=7) .chain(16..=17) .chain(28..=29) // The first Callee Saved register is x9 since its Compressible // Omit x8 since it's the frame pointer. .chain(9..=9) // The rest of the Callee Saved registers are Non-Compressible .chain(18..=27) .map(px_reg) .collect(); // Prefer Caller Saved registers. let f_registers: Vec = (0..=7) .chain(16..=17) .chain(28..=31) // Once those are exhausted, we should prefer f8 and f9 since they are // callee saved, but compressible. .chain(8..=9) .chain(18..=27) .map(pf_reg) .collect(); let v_registers = (0..=7).chain(16..=31).map(pv_reg).collect(); [x_registers, f_registers, v_registers] }; MachineEnv { preferred_regs_by_class, non_preferred_regs_by_class, fixed_stack_slots: vec![], scratch_by_class: [None, None, None], } } impl Riscv64MachineDeps { fn gen_probestack_unroll( insts: &mut SmallInstVec, tmp: Writable, guard_size: u32, probe_count: u32, ) { // When manually unrolling adjust the stack pointer and then write a zero // to the stack at that offset. // // We do this because valgrind expects us to never write beyond the stack // pointer and associated redzone. // See: https://github.com/bytecodealliance/wasmtime/issues/7454 // Store the adjust amount in a register upfront, so we don't have to // reload it for each probe. It's worth loading this as a negative and // using an `add` instruction since we have compressed versions of `add` // but not the `sub` instruction. insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64)); for _ in 0..probe_count { insts.push(Inst::AluRRR { alu_op: AluOPRRR::Add, rd: writable_stack_reg(), rs1: stack_reg(), rs2: tmp.to_reg(), }); insts.push(Inst::gen_store(AMode::SPOffset(0), zero_reg(), I32, MemFlags::trusted())); } // Restore the stack pointer to its original value insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32)); } }