forked from AbleOS/holey-bytes
901 lines
30 KiB
Rust
901 lines
30 KiB
Rust
//! Implementation of a standard Riscv64 ABI.
|
|
|
|
use {
|
|
crate::inst::*,
|
|
alloc::{boxed::Box, vec::Vec},
|
|
cranelift_codegen::{
|
|
ir::{self, types::*, LibCall, Signature},
|
|
isa::{self, unwind::UnwindInst, CallConv},
|
|
machinst::*,
|
|
settings::{self, Flags as RiscvFlags},
|
|
CodegenError, CodegenResult,
|
|
},
|
|
regalloc2::{MachineEnv, PReg, PRegSet},
|
|
smallvec::{smallvec, SmallVec},
|
|
std::sync::OnceLock,
|
|
};
|
|
|
|
/// Support for the Riscv64 ABI from the callee side (within a function body).
|
|
pub(crate) type Riscv64Callee = Callee<Riscv64MachineDeps>;
|
|
|
|
/// Support for the Riscv64 ABI from the caller side (at a callsite).
|
|
pub(crate) type Riscv64ABICallSite = CallSite<Riscv64MachineDeps>;
|
|
|
|
/// This is the limit for the size of argument and return-value areas on the
|
|
/// stack. We place a reasonable limit here to avoid integer overflow issues
|
|
/// with 32-bit arithmetic: for now, 128 MB.
|
|
static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
|
|
|
|
/// Riscv64-specific ABI behavior. This struct just serves as an implementation
|
|
/// point for the trait; it is never actually instantiated.
|
|
pub struct Riscv64MachineDeps;
|
|
|
|
impl IsaFlags for RiscvFlags {}
|
|
|
|
impl RiscvFlags {
|
|
pub(crate) fn min_vec_reg_size(&self) -> u64 {
|
|
let entries = [
|
|
(self.has_zvl65536b(), 65536),
|
|
(self.has_zvl32768b(), 32768),
|
|
(self.has_zvl16384b(), 16384),
|
|
(self.has_zvl8192b(), 8192),
|
|
(self.has_zvl4096b(), 4096),
|
|
(self.has_zvl2048b(), 2048),
|
|
(self.has_zvl1024b(), 1024),
|
|
(self.has_zvl512b(), 512),
|
|
(self.has_zvl256b(), 256),
|
|
// In order to claim the Application Profile V extension, a minimum
|
|
// register size of 128 is required. i.e. V implies Zvl128b.
|
|
(self.has_v(), 128),
|
|
(self.has_zvl128b(), 128),
|
|
(self.has_zvl64b(), 64),
|
|
(self.has_zvl32b(), 32),
|
|
];
|
|
|
|
for (has_flag, size) in entries.into_iter() {
|
|
if !has_flag {
|
|
continue;
|
|
}
|
|
|
|
// Due to a limitation in regalloc2, we can't support types
|
|
// larger than 1024 bytes. So limit that here.
|
|
return std::cmp::min(size, 1024);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
impl ABIMachineSpec for Riscv64MachineDeps {
|
|
type F = RiscvFlags;
|
|
type I = Inst;
|
|
|
|
fn word_bits() -> u32 {
|
|
64
|
|
}
|
|
|
|
/// Return required stack alignment in bytes.
|
|
fn stack_align(_call_conv: isa::CallConv) -> u32 {
|
|
16
|
|
}
|
|
|
|
fn compute_arg_locs(
|
|
call_conv: isa::CallConv,
|
|
_flags: &settings::Flags,
|
|
params: &[ir::AbiParam],
|
|
args_or_rets: ArgsOrRets,
|
|
add_ret_area_ptr: bool,
|
|
mut args: ArgsAccumulator,
|
|
) -> CodegenResult<(u32, Option<usize>)> {
|
|
assert_ne!(
|
|
call_conv,
|
|
isa::CallConv::Winch,
|
|
"riscv64 does not support the 'winch' calling convention yet"
|
|
);
|
|
|
|
// All registers that can be used as parameters or rets.
|
|
// both start and end are included.
|
|
let (x_start, x_end, f_start, f_end) = match args_or_rets {
|
|
ArgsOrRets::Args => (10, 17, 10, 17),
|
|
ArgsOrRets::Rets => (10, 11, 10, 11),
|
|
};
|
|
let mut next_x_reg = x_start;
|
|
let mut next_f_reg = f_start;
|
|
// Stack space.
|
|
let mut next_stack: u32 = 0;
|
|
|
|
for param in params {
|
|
if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
|
|
panic!(
|
|
"StructArgument parameters are not supported on riscv64. \
|
|
Use regular pointer arguments instead."
|
|
);
|
|
}
|
|
|
|
// Find regclass(es) of the register(s) used to store a value of this type.
|
|
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
|
|
let mut slots = ABIArgSlotVec::new();
|
|
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
|
|
let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
|
|
let x = Some(x_reg(next_x_reg));
|
|
next_x_reg += 1;
|
|
x
|
|
} else if (next_f_reg <= f_end) && *rc == RegClass::Float {
|
|
let x = Some(f_reg(next_f_reg));
|
|
next_f_reg += 1;
|
|
x
|
|
} else {
|
|
None
|
|
};
|
|
if let Some(reg) = next_reg {
|
|
slots.push(ABIArgSlot::Reg {
|
|
reg: reg.to_real_reg().unwrap(),
|
|
ty: *reg_ty,
|
|
extension: param.extension,
|
|
});
|
|
} else {
|
|
// Compute size and 16-byte stack alignment happens
|
|
// separately after all args.
|
|
let size = reg_ty.bits() / 8;
|
|
let size = std::cmp::max(size, 8);
|
|
// Align.
|
|
debug_assert!(size.is_power_of_two());
|
|
next_stack = align_to(next_stack, size);
|
|
slots.push(ABIArgSlot::Stack {
|
|
offset: next_stack as i64,
|
|
ty: *reg_ty,
|
|
extension: param.extension,
|
|
});
|
|
next_stack += size;
|
|
}
|
|
}
|
|
args.push(ABIArg::Slots { slots, purpose: param.purpose });
|
|
}
|
|
let pos: Option<usize> = if add_ret_area_ptr {
|
|
assert!(ArgsOrRets::Args == args_or_rets);
|
|
if next_x_reg <= x_end {
|
|
let arg = ABIArg::reg(
|
|
x_reg(next_x_reg).to_real_reg().unwrap(),
|
|
I64,
|
|
ir::ArgumentExtension::None,
|
|
ir::ArgumentPurpose::Normal,
|
|
);
|
|
args.push_non_formal(arg);
|
|
} else {
|
|
let arg = ABIArg::stack(
|
|
next_stack as i64,
|
|
I64,
|
|
ir::ArgumentExtension::None,
|
|
ir::ArgumentPurpose::Normal,
|
|
);
|
|
args.push_non_formal(arg);
|
|
next_stack += 8;
|
|
}
|
|
Some(args.args().len() - 1)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
next_stack = align_to(next_stack, Self::stack_align(call_conv));
|
|
|
|
// To avoid overflow issues, limit the arg/return size to something
|
|
// reasonable -- here, 128 MB.
|
|
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
|
|
return Err(CodegenError::ImplLimitExceeded);
|
|
}
|
|
|
|
Ok((next_stack, pos))
|
|
}
|
|
|
|
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
|
|
Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
|
|
}
|
|
|
|
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
|
|
Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
|
|
}
|
|
|
|
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
|
|
Inst::gen_move(to_reg, from_reg, ty)
|
|
}
|
|
|
|
fn gen_extend(
|
|
to_reg: Writable<Reg>,
|
|
from_reg: Reg,
|
|
signed: bool,
|
|
from_bits: u8,
|
|
to_bits: u8,
|
|
) -> Inst {
|
|
assert!(from_bits < to_bits);
|
|
Inst::Extend { rd: to_reg, rn: from_reg, signed, from_bits, to_bits }
|
|
}
|
|
|
|
fn get_ext_mode(
|
|
_call_conv: isa::CallConv,
|
|
specified: ir::ArgumentExtension,
|
|
) -> ir::ArgumentExtension {
|
|
specified
|
|
}
|
|
|
|
fn gen_args(args: Vec<ArgPair>) -> Inst {
|
|
Inst::Args { args }
|
|
}
|
|
|
|
fn gen_rets(rets: Vec<RetPair>) -> Inst {
|
|
Inst::Rets { rets }
|
|
}
|
|
|
|
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
|
|
spilltmp_reg()
|
|
}
|
|
|
|
fn gen_add_imm(
|
|
_call_conv: isa::CallConv,
|
|
into_reg: Writable<Reg>,
|
|
from_reg: Reg,
|
|
imm: u32,
|
|
) -> SmallInstVec<Inst> {
|
|
let mut insts = SmallInstVec::new();
|
|
if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
|
|
insts.push(Inst::AluRRImm12 {
|
|
alu_op: AluOPRRI::Addi,
|
|
rd: into_reg,
|
|
rs: from_reg,
|
|
imm12,
|
|
});
|
|
} else {
|
|
insts.extend(Inst::load_constant_u32(writable_spilltmp_reg2(), imm as u64));
|
|
insts.push(Inst::AluRRR {
|
|
alu_op: AluOPRRR::Add,
|
|
rd: into_reg,
|
|
rs1: spilltmp_reg2(),
|
|
rs2: from_reg,
|
|
});
|
|
}
|
|
insts
|
|
}
|
|
|
|
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
|
|
let mut insts = SmallVec::new();
|
|
insts.push(Inst::TrapIf {
|
|
cc: IntCC::UnsignedLessThan,
|
|
rs1: stack_reg(),
|
|
rs2: limit_reg,
|
|
trap_code: ir::TrapCode::StackOverflow,
|
|
});
|
|
insts
|
|
}
|
|
|
|
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
|
|
Inst::LoadAddr { rd: into_reg, mem: mem.into() }
|
|
}
|
|
|
|
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
|
|
let mem = AMode::RegOffset(base, offset as i64);
|
|
Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
|
|
}
|
|
|
|
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
|
|
let mem = AMode::RegOffset(base, offset as i64);
|
|
Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
|
|
}
|
|
|
|
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
|
|
let mut insts = SmallVec::new();
|
|
|
|
if amount == 0 {
|
|
return insts;
|
|
}
|
|
|
|
if let Some(imm) = Imm12::maybe_from_i64(amount as i64) {
|
|
insts.push(Inst::AluRRImm12 {
|
|
alu_op: AluOPRRI::Addi,
|
|
rd: writable_stack_reg(),
|
|
rs: stack_reg(),
|
|
imm12: imm,
|
|
})
|
|
} else {
|
|
let tmp = writable_spilltmp_reg();
|
|
insts.extend(Inst::load_constant_u64(tmp, amount as i64 as u64));
|
|
insts.push(Inst::AluRRR {
|
|
alu_op: AluOPRRR::Add,
|
|
rd: writable_stack_reg(),
|
|
rs1: stack_reg(),
|
|
rs2: tmp.to_reg(),
|
|
});
|
|
}
|
|
|
|
insts
|
|
}
|
|
|
|
fn gen_prologue_frame_setup(
|
|
_call_conv: isa::CallConv,
|
|
flags: &settings::Flags,
|
|
_isa_flags: &RiscvFlags,
|
|
frame_layout: &FrameLayout,
|
|
) -> SmallInstVec<Inst> {
|
|
let mut insts = SmallVec::new();
|
|
|
|
if frame_layout.setup_area_size > 0 {
|
|
// add sp,sp,-16 ;; alloc stack space for fp.
|
|
// sd ra,8(sp) ;; save ra.
|
|
// sd fp,0(sp) ;; store old fp.
|
|
// mv fp,sp ;; set fp to sp.
|
|
insts.extend(Self::gen_sp_reg_adjust(-16));
|
|
insts.push(Inst::gen_store(AMode::SPOffset(8), link_reg(), I64, MemFlags::trusted()));
|
|
insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted()));
|
|
|
|
if flags.unwind_info() {
|
|
insts.push(Inst::Unwind {
|
|
inst: UnwindInst::PushFrameRegs {
|
|
offset_upward_to_caller_sp: frame_layout.setup_area_size,
|
|
},
|
|
});
|
|
}
|
|
insts.push(Inst::Mov { rd: writable_fp_reg(), rm: stack_reg(), ty: I64 });
|
|
}
|
|
|
|
insts
|
|
}
|
|
|
|
/// reverse of gen_prologue_frame_setup.
|
|
fn gen_epilogue_frame_restore(
|
|
call_conv: isa::CallConv,
|
|
_flags: &settings::Flags,
|
|
_isa_flags: &RiscvFlags,
|
|
frame_layout: &FrameLayout,
|
|
) -> SmallInstVec<Inst> {
|
|
let mut insts = SmallVec::new();
|
|
|
|
if frame_layout.setup_area_size > 0 {
|
|
insts.push(Inst::gen_load(
|
|
writable_link_reg(),
|
|
AMode::SPOffset(8),
|
|
I64,
|
|
MemFlags::trusted(),
|
|
));
|
|
insts.push(Inst::gen_load(
|
|
writable_fp_reg(),
|
|
AMode::SPOffset(0),
|
|
I64,
|
|
MemFlags::trusted(),
|
|
));
|
|
insts.extend(Self::gen_sp_reg_adjust(16));
|
|
}
|
|
|
|
if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
|
|
insts.extend(Self::gen_sp_reg_adjust(frame_layout.tail_args_size.try_into().unwrap()));
|
|
}
|
|
|
|
insts
|
|
}
|
|
|
|
fn gen_return(
|
|
_call_conv: isa::CallConv,
|
|
_isa_flags: &RiscvFlags,
|
|
_frame_layout: &FrameLayout,
|
|
) -> SmallInstVec<Inst> {
|
|
smallvec![Inst::Ret {}]
|
|
}
|
|
|
|
fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
|
|
insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64));
|
|
let mut info =
|
|
CallInfo::empty(ExternalName::LibCall(LibCall::Probestack), CallConv::SystemV);
|
|
info.uses.push(CallArgPair { vreg: a0(), preg: a0() });
|
|
insts.push(Inst::Call { info: Box::new(info) });
|
|
}
|
|
|
|
fn gen_clobber_save(
|
|
_call_conv: isa::CallConv,
|
|
flags: &settings::Flags,
|
|
frame_layout: &FrameLayout,
|
|
) -> SmallVec<[Inst; 16]> {
|
|
let mut insts = SmallVec::new();
|
|
let setup_frame = frame_layout.setup_area_size > 0;
|
|
|
|
let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
|
|
if incoming_args_diff > 0 {
|
|
// Decrement SP by the amount of additional incoming argument space we need
|
|
insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
|
|
|
|
if setup_frame {
|
|
// Write the lr position on the stack again, as it hasn't changed since it was
|
|
// pushed in `gen_prologue_frame_setup`
|
|
insts.push(Inst::gen_store(
|
|
AMode::SPOffset(8),
|
|
link_reg(),
|
|
I64,
|
|
MemFlags::trusted(),
|
|
));
|
|
insts.push(Inst::gen_load(
|
|
writable_fp_reg(),
|
|
AMode::SPOffset(i64::from(incoming_args_diff)),
|
|
I64,
|
|
MemFlags::trusted(),
|
|
));
|
|
insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted()));
|
|
|
|
// Finally, sync the frame pointer with SP
|
|
insts.push(Inst::gen_move(writable_fp_reg(), stack_reg(), I64));
|
|
}
|
|
}
|
|
|
|
if flags.unwind_info() && setup_frame {
|
|
// The *unwind* frame (but not the actual frame) starts at the
|
|
// clobbers, just below the saved FP/LR pair.
|
|
insts.push(Inst::Unwind {
|
|
inst: UnwindInst::DefineNewFrame {
|
|
offset_downward_to_clobbers: frame_layout.clobber_size,
|
|
offset_upward_to_caller_sp: frame_layout.setup_area_size,
|
|
},
|
|
});
|
|
}
|
|
|
|
// Adjust the stack pointer downward for clobbers, the function fixed
|
|
// frame (spillslots and storage slots), and outgoing arguments.
|
|
let stack_size = frame_layout.clobber_size
|
|
+ frame_layout.fixed_frame_storage_size
|
|
+ frame_layout.outgoing_args_size;
|
|
|
|
// Store each clobbered register in order at offsets from SP,
|
|
// placing them above the fixed frame slots.
|
|
if stack_size > 0 {
|
|
insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
|
|
|
|
let mut cur_offset = 8;
|
|
for reg in &frame_layout.clobbered_callee_saves {
|
|
let r_reg = reg.to_reg();
|
|
let ty = match r_reg.class() {
|
|
RegClass::Int => I64,
|
|
RegClass::Float => F64,
|
|
RegClass::Vector => unimplemented!("Vector Clobber Saves"),
|
|
};
|
|
insts.push(Inst::gen_store(
|
|
AMode::SPOffset((stack_size - cur_offset) as i64),
|
|
Reg::from(reg.to_reg()),
|
|
ty,
|
|
MemFlags::trusted(),
|
|
));
|
|
|
|
if flags.unwind_info() {
|
|
insts.push(Inst::Unwind {
|
|
inst: UnwindInst::SaveReg {
|
|
clobber_offset: frame_layout.clobber_size - cur_offset,
|
|
reg: r_reg,
|
|
},
|
|
});
|
|
}
|
|
|
|
cur_offset += 8
|
|
}
|
|
}
|
|
insts
|
|
}
|
|
|
|
fn gen_clobber_restore(
|
|
_call_conv: isa::CallConv,
|
|
_flags: &settings::Flags,
|
|
frame_layout: &FrameLayout,
|
|
) -> SmallVec<[Inst; 16]> {
|
|
let mut insts = SmallVec::new();
|
|
|
|
let stack_size = frame_layout.clobber_size
|
|
+ frame_layout.fixed_frame_storage_size
|
|
+ frame_layout.outgoing_args_size;
|
|
|
|
let mut cur_offset = 8;
|
|
for reg in &frame_layout.clobbered_callee_saves {
|
|
let rreg = reg.to_reg();
|
|
let ty = match rreg.class() {
|
|
RegClass::Int => I64,
|
|
RegClass::Float => F64,
|
|
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
|
|
};
|
|
insts.push(Inst::gen_load(
|
|
reg.map(Reg::from),
|
|
AMode::SPOffset(i64::from(stack_size - cur_offset)),
|
|
ty,
|
|
MemFlags::trusted(),
|
|
));
|
|
cur_offset += 8
|
|
}
|
|
|
|
if stack_size > 0 {
|
|
insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
|
|
}
|
|
|
|
insts
|
|
}
|
|
|
|
fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> {
|
|
let mut insts = SmallVec::new();
|
|
match &dest {
|
|
&CallDest::ExtName(ref name, RelocDistance::Near) => {
|
|
let info = Box::new(info.map(|()| name.clone()));
|
|
insts.push(Inst::Call { info })
|
|
}
|
|
&CallDest::ExtName(ref name, RelocDistance::Far) => {
|
|
insts.push(Inst::LoadExtName { rd: tmp, name: Box::new(name.clone()), offset: 0 });
|
|
let info = Box::new(info.map(|()| tmp.to_reg()));
|
|
insts.push(Inst::CallInd { info });
|
|
}
|
|
&CallDest::Reg(reg) => {
|
|
let info = Box::new(info.map(|()| *reg));
|
|
insts.push(Inst::CallInd { info });
|
|
}
|
|
}
|
|
insts
|
|
}
|
|
|
|
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
|
|
call_conv: isa::CallConv,
|
|
dst: Reg,
|
|
src: Reg,
|
|
size: usize,
|
|
mut alloc_tmp: F,
|
|
) -> SmallVec<[Self::I; 8]> {
|
|
let mut insts = SmallVec::new();
|
|
let arg0 = Writable::from_reg(x_reg(10));
|
|
let arg1 = Writable::from_reg(x_reg(11));
|
|
let arg2 = Writable::from_reg(x_reg(12));
|
|
let tmp = alloc_tmp(Self::word_type());
|
|
insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
|
|
insts.push(Inst::Call {
|
|
info: Box::new(CallInfo {
|
|
dest: ExternalName::LibCall(LibCall::Memcpy),
|
|
uses: smallvec![
|
|
CallArgPair { vreg: dst, preg: arg0.to_reg() },
|
|
CallArgPair { vreg: src, preg: arg1.to_reg() },
|
|
CallArgPair { vreg: tmp.to_reg(), preg: arg2.to_reg() }
|
|
],
|
|
defs: smallvec![],
|
|
clobbers: Self::get_regs_clobbered_by_call(call_conv),
|
|
caller_conv: call_conv,
|
|
callee_conv: call_conv,
|
|
callee_pop_size: 0,
|
|
}),
|
|
});
|
|
insts
|
|
}
|
|
|
|
fn get_number_of_spillslots_for_value(
|
|
rc: RegClass,
|
|
_target_vector_bytes: u32,
|
|
isa_flags: &RiscvFlags,
|
|
) -> u32 {
|
|
// We allocate in terms of 8-byte slots.
|
|
match rc {
|
|
RegClass::Int => 1,
|
|
RegClass::Float => 1,
|
|
RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
|
|
}
|
|
}
|
|
|
|
fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
|
|
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
|
|
MACHINE_ENV.get_or_init(create_reg_enviroment)
|
|
}
|
|
|
|
fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
|
|
DEFAULT_CLOBBERS
|
|
}
|
|
|
|
fn compute_frame_layout(
|
|
_call_conv: isa::CallConv,
|
|
flags: &settings::Flags,
|
|
_sig: &Signature,
|
|
regs: &[Writable<RealReg>],
|
|
is_leaf: bool,
|
|
incoming_args_size: u32,
|
|
tail_args_size: u32,
|
|
fixed_frame_storage_size: u32,
|
|
outgoing_args_size: u32,
|
|
) -> FrameLayout {
|
|
let mut regs: Vec<Writable<RealReg>> = regs
|
|
.iter()
|
|
.cloned()
|
|
.filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into()))
|
|
.collect();
|
|
|
|
regs.sort_unstable();
|
|
|
|
// Compute clobber size.
|
|
let clobber_size = compute_clobber_size(®s);
|
|
|
|
// Compute linkage frame size.
|
|
let setup_area_size = if flags.preserve_frame_pointers()
|
|
|| !is_leaf
|
|
// The function arguments that are passed on the stack are addressed
|
|
// relative to the Frame Pointer.
|
|
|| incoming_args_size > 0
|
|
|| clobber_size > 0
|
|
|| fixed_frame_storage_size > 0
|
|
{
|
|
16 // FP, LR
|
|
} else {
|
|
0
|
|
};
|
|
|
|
// Return FrameLayout structure.
|
|
FrameLayout {
|
|
incoming_args_size,
|
|
tail_args_size,
|
|
setup_area_size,
|
|
clobber_size,
|
|
fixed_frame_storage_size,
|
|
outgoing_args_size,
|
|
clobbered_callee_saves: regs,
|
|
}
|
|
}
|
|
|
|
fn gen_inline_probestack(
|
|
insts: &mut SmallInstVec<Self::I>,
|
|
_call_conv: isa::CallConv,
|
|
frame_size: u32,
|
|
guard_size: u32,
|
|
) {
|
|
// Unroll at most n consecutive probes, before falling back to using a loop
|
|
const PROBE_MAX_UNROLL: u32 = 3;
|
|
// Number of probes that we need to perform
|
|
let probe_count = align_to(frame_size, guard_size) / guard_size;
|
|
|
|
// Must be a caller-saved register that is not an argument.
|
|
let tmp = Writable::from_reg(x_reg(28)); // t3
|
|
|
|
if probe_count <= PROBE_MAX_UNROLL {
|
|
Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count)
|
|
} else {
|
|
insts.push(Inst::StackProbeLoop { guard_size, probe_count, tmp });
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Riscv64ABICallSite {
|
|
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
|
|
let new_stack_arg_size =
|
|
u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
|
|
|
|
ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);
|
|
|
|
// Put all arguments in registers and stack slots (within that newly
|
|
// allocated stack space).
|
|
self.emit_args(ctx, args);
|
|
self.emit_stack_ret_arg_for_tail_call(ctx);
|
|
|
|
let dest = self.dest().clone();
|
|
let uses = self.take_uses();
|
|
|
|
match dest {
|
|
CallDest::ExtName(name, RelocDistance::Near) => {
|
|
let info = Box::new(ReturnCallInfo { dest: name, uses, new_stack_arg_size });
|
|
ctx.emit(Inst::ReturnCall { info });
|
|
}
|
|
CallDest::ExtName(name, RelocDistance::Far) => {
|
|
let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap();
|
|
ctx.emit(Inst::LoadExtName { rd: callee, name: Box::new(name), offset: 0 });
|
|
let info =
|
|
Box::new(ReturnCallInfo { dest: callee.to_reg(), uses, new_stack_arg_size });
|
|
ctx.emit(Inst::ReturnCallInd { info });
|
|
}
|
|
CallDest::Reg(callee) => {
|
|
let info = Box::new(ReturnCallInfo { dest: callee, uses, new_stack_arg_size });
|
|
ctx.emit(Inst::ReturnCallInd { info });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// NOTE: no V regs are callee save.
|
|
const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty()
|
|
// X Regs
|
|
.with(px_reg(2))
|
|
.with(px_reg(8))
|
|
.with(px_reg(9))
|
|
.with(px_reg(18))
|
|
.with(px_reg(19))
|
|
.with(px_reg(20))
|
|
.with(px_reg(21))
|
|
.with(px_reg(22))
|
|
.with(px_reg(23))
|
|
.with(px_reg(24))
|
|
.with(px_reg(25))
|
|
.with(px_reg(26))
|
|
.with(px_reg(27))
|
|
// F Regs
|
|
.with(pf_reg(8))
|
|
.with(pf_reg(18))
|
|
.with(pf_reg(19))
|
|
.with(pf_reg(20))
|
|
.with(pf_reg(21))
|
|
.with(pf_reg(22))
|
|
.with(pf_reg(23))
|
|
.with(pf_reg(24))
|
|
.with(pf_reg(25))
|
|
.with(pf_reg(26))
|
|
.with(pf_reg(27));
|
|
|
|
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
|
|
let mut clobbered_size = 0;
|
|
for reg in clobbers {
|
|
match reg.to_reg().class() {
|
|
RegClass::Int => {
|
|
clobbered_size += 8;
|
|
}
|
|
RegClass::Float => {
|
|
clobbered_size += 8;
|
|
}
|
|
RegClass::Vector => unimplemented!("Vector Size Clobbered"),
|
|
}
|
|
}
|
|
align_to(clobbered_size, 16)
|
|
}
|
|
|
|
const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
|
|
.with(px_reg(1))
|
|
.with(px_reg(5))
|
|
.with(px_reg(6))
|
|
.with(px_reg(7))
|
|
.with(px_reg(10))
|
|
.with(px_reg(11))
|
|
.with(px_reg(12))
|
|
.with(px_reg(13))
|
|
.with(px_reg(14))
|
|
.with(px_reg(15))
|
|
.with(px_reg(16))
|
|
.with(px_reg(17))
|
|
.with(px_reg(28))
|
|
.with(px_reg(29))
|
|
.with(px_reg(30))
|
|
.with(px_reg(31))
|
|
// F Regs
|
|
.with(pf_reg(0))
|
|
.with(pf_reg(1))
|
|
.with(pf_reg(2))
|
|
.with(pf_reg(3))
|
|
.with(pf_reg(4))
|
|
.with(pf_reg(5))
|
|
.with(pf_reg(6))
|
|
.with(pf_reg(7))
|
|
.with(pf_reg(9))
|
|
.with(pf_reg(10))
|
|
.with(pf_reg(11))
|
|
.with(pf_reg(12))
|
|
.with(pf_reg(13))
|
|
.with(pf_reg(14))
|
|
.with(pf_reg(15))
|
|
.with(pf_reg(16))
|
|
.with(pf_reg(17))
|
|
.with(pf_reg(28))
|
|
.with(pf_reg(29))
|
|
.with(pf_reg(30))
|
|
.with(pf_reg(31))
|
|
// V Regs - All vector regs get clobbered
|
|
.with(pv_reg(0))
|
|
.with(pv_reg(1))
|
|
.with(pv_reg(2))
|
|
.with(pv_reg(3))
|
|
.with(pv_reg(4))
|
|
.with(pv_reg(5))
|
|
.with(pv_reg(6))
|
|
.with(pv_reg(7))
|
|
.with(pv_reg(8))
|
|
.with(pv_reg(9))
|
|
.with(pv_reg(10))
|
|
.with(pv_reg(11))
|
|
.with(pv_reg(12))
|
|
.with(pv_reg(13))
|
|
.with(pv_reg(14))
|
|
.with(pv_reg(15))
|
|
.with(pv_reg(16))
|
|
.with(pv_reg(17))
|
|
.with(pv_reg(18))
|
|
.with(pv_reg(19))
|
|
.with(pv_reg(20))
|
|
.with(pv_reg(21))
|
|
.with(pv_reg(22))
|
|
.with(pv_reg(23))
|
|
.with(pv_reg(24))
|
|
.with(pv_reg(25))
|
|
.with(pv_reg(26))
|
|
.with(pv_reg(27))
|
|
.with(pv_reg(28))
|
|
.with(pv_reg(29))
|
|
.with(pv_reg(30))
|
|
.with(pv_reg(31));
|
|
|
|
fn create_reg_enviroment() -> MachineEnv {
|
|
// Some C Extension instructions can only use a subset of the registers.
|
|
// x8 - x15, f8 - f15, v8 - v15 so we should prefer to use those since
|
|
// they allow us to emit C instructions more often.
|
|
//
|
|
// In general the order of preference is:
|
|
// 1. Compressible Caller Saved registers.
|
|
// 2. Non-Compressible Caller Saved registers.
|
|
// 3. Compressible Callee Saved registers.
|
|
// 4. Non-Compressible Callee Saved registers.
|
|
|
|
let preferred_regs_by_class: [Vec<PReg>; 3] = {
|
|
let x_registers: Vec<PReg> = (10..=15).map(px_reg).collect();
|
|
let f_registers: Vec<PReg> = (10..=15).map(pf_reg).collect();
|
|
let v_registers: Vec<PReg> = (8..=15).map(pv_reg).collect();
|
|
|
|
[x_registers, f_registers, v_registers]
|
|
};
|
|
|
|
let non_preferred_regs_by_class: [Vec<PReg>; 3] = {
|
|
// x0 - x4 are special registers, so we don't want to use them.
|
|
// Omit x30 and x31 since they are the spilltmp registers.
|
|
|
|
// Start with the Non-Compressible Caller Saved registers.
|
|
let x_registers: Vec<PReg> = (5..=7)
|
|
.chain(16..=17)
|
|
.chain(28..=29)
|
|
// The first Callee Saved register is x9 since its Compressible
|
|
// Omit x8 since it's the frame pointer.
|
|
.chain(9..=9)
|
|
// The rest of the Callee Saved registers are Non-Compressible
|
|
.chain(18..=27)
|
|
.map(px_reg)
|
|
.collect();
|
|
|
|
// Prefer Caller Saved registers.
|
|
let f_registers: Vec<PReg> = (0..=7)
|
|
.chain(16..=17)
|
|
.chain(28..=31)
|
|
// Once those are exhausted, we should prefer f8 and f9 since they are
|
|
// callee saved, but compressible.
|
|
.chain(8..=9)
|
|
.chain(18..=27)
|
|
.map(pf_reg)
|
|
.collect();
|
|
|
|
let v_registers = (0..=7).chain(16..=31).map(pv_reg).collect();
|
|
|
|
[x_registers, f_registers, v_registers]
|
|
};
|
|
|
|
MachineEnv {
|
|
preferred_regs_by_class,
|
|
non_preferred_regs_by_class,
|
|
fixed_stack_slots: vec![],
|
|
scratch_by_class: [None, None, None],
|
|
}
|
|
}
|
|
|
|
impl Riscv64MachineDeps {
|
|
fn gen_probestack_unroll(
|
|
insts: &mut SmallInstVec<Inst>,
|
|
tmp: Writable<Reg>,
|
|
guard_size: u32,
|
|
probe_count: u32,
|
|
) {
|
|
// When manually unrolling adjust the stack pointer and then write a zero
|
|
// to the stack at that offset.
|
|
//
|
|
// We do this because valgrind expects us to never write beyond the stack
|
|
// pointer and associated redzone.
|
|
// See: https://github.com/bytecodealliance/wasmtime/issues/7454
|
|
|
|
// Store the adjust amount in a register upfront, so we don't have to
|
|
// reload it for each probe. It's worth loading this as a negative and
|
|
// using an `add` instruction since we have compressed versions of `add`
|
|
// but not the `sub` instruction.
|
|
insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64));
|
|
|
|
for _ in 0..probe_count {
|
|
insts.push(Inst::AluRRR {
|
|
alu_op: AluOPRRR::Add,
|
|
rd: writable_stack_reg(),
|
|
rs1: stack_reg(),
|
|
rs2: tmp.to_reg(),
|
|
});
|
|
|
|
insts.push(Inst::gen_store(AMode::SPOffset(0), zero_reg(), I32, MemFlags::trusted()));
|
|
}
|
|
|
|
// Restore the stack pointer to its original value
|
|
insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
|
|
}
|
|
}
|