diff --git a/Cargo.lock b/Cargo.lock index 3fb8300..49297bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,131 +2,16 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "arbitrary" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" - -[[package]] -name = "bumpalo" -version = "3.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "cranelift-bforest" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" -dependencies = [ - "cranelift-entity", -] - -[[package]] -name = "cranelift-bitset" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" - -[[package]] -name = "cranelift-codegen" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" -dependencies = [ - "bumpalo", - "cranelift-bforest", - "cranelift-bitset", - "cranelift-codegen-meta", - "cranelift-codegen-shared", - "cranelift-control", - "cranelift-entity", - "cranelift-isle", - "hashbrown", - "log", - "regalloc2 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-hash", - "smallvec", - "target-lexicon", -] - -[[package]] -name = "cranelift-codegen-meta" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" -dependencies = [ - "cranelift-codegen-shared", -] - -[[package]] -name = "cranelift-codegen-shared" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" - -[[package]] -name = "cranelift-control" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" -dependencies = [ - "arbitrary", -] - -[[package]] -name = "cranelift-entity" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" -dependencies = [ - "cranelift-bitset", -] - -[[package]] -name = "cranelift-isle" -version = "0.113.0" -source = "git+https://github.com/jakubDoka/wasmtime.git#9831bdd139ec6ad205707db5a3125778e6f4f16c" - [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", -] [[package]] name = "hbbytecode" version = "0.1.0" -[[package]] -name = "hbcb" -version = "0.1.0" -dependencies = [ - "cranelift-codegen", - "cranelift-codegen-meta", - "cranelift-control", - "cranelift-isle", - "log", - "regalloc2 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec", - "target-lexicon", -] - [[package]] name = "hbjit" version = "0.1.0" @@ -137,7 +22,7 @@ version = "0.1.0" dependencies = [ "hbbytecode", "hbvm", - "regalloc2 0.10.2 (git+https://github.com/jakubDoka/regalloc2)", + "regalloc2", ] [[package]] @@ -161,12 +46,6 @@ version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" -[[package]] -name = "log" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" - [[package]] name = "memmap2" version = "0.9.5" @@ -176,43 +55,6 @@ dependencies = [ "libc", ] -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "regalloc2" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12908dbeb234370af84d0579b9f68258a0f67e201412dd9a2814e6f45b2fc0f0" -dependencies = [ - "hashbrown", - "log", - "rustc-hash", - "slice-group-by", - "smallvec", -] - [[package]] name = "regalloc2" version = "0.10.2" @@ -229,67 +71,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" -[[package]] -name = "slice-group-by" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" - [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "syn" -version = "2.0.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.12.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" - -[[package]] -name = "unicode-ident" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" - -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "xtask" version = "0.1.0" - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/Cargo.toml b/Cargo.toml index fbc1d6d..acb9024 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit", "hbcb"] +members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit"] [profile.release] strip = true diff --git a/hbcb/Cargo.toml b/hbcb/Cargo.toml deleted file mode 100644 index e631671..0000000 --- a/hbcb/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "hbcb" -version = "0.1.0" -edition = "2021" - -[dependencies] -cranelift-codegen = { git = "https://github.com/jakubDoka/wasmtime.git", default-features = false, features = ["std"] } -cranelift-control = { git = "https://github.com/jakubDoka/wasmtime.git", default-features = false } -log = "0.4.22" -regalloc2 = "0.10.2" -smallvec = "1.13.2" -target-lexicon = "0.12.16" - -[features] -default = ["isle-errors"] -unwind = [] -isle-errors = [] - -[build-dependencies] -cranelift-codegen-meta = { git = "https://github.com/jakubDoka/wasmtime.git" } -cranelift-isle = { git = "https://github.com/jakubDoka/wasmtime.git" } diff --git a/hbcb/build.rs b/hbcb/build.rs deleted file mode 100644 index d68e525..0000000 --- a/hbcb/build.rs +++ /dev/null @@ -1,216 +0,0 @@ -use { - cranelift_codegen_meta::{self as meta, isle::IsleCompilations}, - cranelift_isle::error::Errors, - meta::isle::IsleCompilation, - std::{env, io::Read, process, time::Instant}, -}; - -fn main() { - let start_time = Instant::now(); - - let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set"); - let out_dir = std::path::Path::new(&out_dir); - let isas = &[meta::isa::Isa::Riscv64]; - - let cur_dir = env::current_dir().expect("Can't access current working directory"); - let crate_dir = cur_dir.as_path(); - - println!("cargo:rerun-if-changed=build.rs"); - - let explicit_isle_dir = &crate_dir.join("isle_generated_code"); - let isle_dir = &out_dir; - - { - if explicit_isle_dir.is_dir() { - eprintln!(concat!( - "Error: directory isle_generated_code/ exists but is only used when\n", - "`--feature isle-in-source-tree` is specified. To prevent confusion,\n", - "this build script requires the directory to be removed when reverting\n", - "to the usual generated code in target/. Please delete the directory and\n", - "re-run this build.\n", - )); - std::process::exit(1); - } - } - - if let Err(err) = meta::generate(isas, out_dir, isle_dir) { - eprintln!("Error: {err}"); - process::exit(1); - } - - if &std::env::var("SKIP_ISLE").unwrap_or("0".to_string()) != "1" { - if let Err(err) = build_isle(crate_dir, isle_dir) { - eprintln!("Error: {err}"); - process::exit(1); - } - } - - if env::var("CRANELIFT_VERBOSE").is_ok() { - for isa in isas { - println!("cargo:warning=Includes support for {} ISA", isa); - } - println!("cargo:warning=Build step took {:?}.", Instant::now() - start_time); - println!("cargo:warning=Generated files are in {}", out_dir.display()); - } - - let pkg_version = env::var("CARGO_PKG_VERSION").unwrap(); - let mut cmd = std::process::Command::new("git"); - cmd.arg("rev-parse") - .arg("HEAD") - .stdout(std::process::Stdio::piped()) - .current_dir(env::var("CARGO_MANIFEST_DIR").unwrap()); - let version = if let Ok(mut child) = cmd.spawn() { - let mut git_rev = String::new(); - child.stdout.as_mut().unwrap().read_to_string(&mut git_rev).unwrap(); - let status = child.wait().unwrap(); - if status.success() { - let git_rev = git_rev.trim().chars().take(9).collect::(); - format!("{pkg_version}-{git_rev}") - } else { - // not a git repo - pkg_version - } - } else { - // git not available - pkg_version - }; - std::fs::write( - std::path::Path::new(&out_dir).join("version.rs"), - format!( - "/// Version number of this crate. \n\ - pub const VERSION: &str = \"{version}\";" - ), - ) - .unwrap(); -} - -fn make_isle_source_path_relative( - cur_dir: &std::path::Path, - filename: &std::path::Path, -) -> std::path::PathBuf { - if let Ok(suffix) = filename.strip_prefix(cur_dir) { - suffix.to_path_buf() - } else { - filename.to_path_buf() - } -} - -fn build_isle( - crate_dir: &std::path::Path, - isle_dir: &std::path::Path, -) -> Result<(), Box> { - let cur_dir = std::env::current_dir()?; - let codegen_crate_dir = &make_isle_source_path_relative(&cur_dir, crate_dir); - let gen_dir = &make_isle_source_path_relative(&cur_dir, isle_dir); - - let clif_lower_isle = gen_dir.join("clif_lower.isle"); - let prelude_isle = codegen_crate_dir.join("src").join("prelude.isle"); - let prelude_lower_isle = codegen_crate_dir.join("src").join("prelude_lower.isle"); - - let src_isa_risc_v = codegen_crate_dir.join("src"); - - let isle_compilations = IsleCompilations { - items: vec![IsleCompilation { - output: gen_dir.join("isle_riscv64.rs"), - inputs: vec![ - prelude_isle.clone(), - prelude_lower_isle.clone(), - src_isa_risc_v.join("inst.isle"), - src_isa_risc_v.join("inst_vector.isle"), - src_isa_risc_v.join("lower.isle"), - ], - untracked_inputs: vec![clif_lower_isle.clone()], - }], - }; - - let mut had_error = false; - for compilation in &isle_compilations.items { - for file in &compilation.inputs { - println!("cargo:rerun-if-changed={}", file.display()); - } - - if let Err(e) = run_compilation(compilation) { - had_error = true; - eprintln!("Error building ISLE files:"); - eprintln!("{e:?}"); - #[cfg(not(feature = "isle-errors"))] - { - eprintln!("To see a more detailed error report, run: "); - eprintln!(); - eprintln!(" $ cargo check -p cranelift-codegen --features isle-errors"); - eprintln!(); - } - } - } - - if had_error { - std::process::exit(1); - } - - println!("cargo:rustc-env=ISLE_DIR={}", isle_dir.to_str().unwrap()); - - Ok(()) -} - -/// Build ISLE DSL source text into generated Rust code. -/// -/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since -/// it consumes files generated by them. -fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> { - use cranelift_isle as isle; - - eprintln!("Rebuilding {}", compilation.output.display()); - - let code = { - let file_paths = compilation.inputs.iter().chain(compilation.untracked_inputs.iter()); - - let options = isle::codegen::CodegenOptions { - // Because we include!() the generated ISLE source, we cannot - // put the global pragmas (`#![allow(...)]`) in the ISLE - // source itself; we have to put them in the source that - // include!()s it. (See - // https://github.com/rust-lang/rust/issues/47995.) - exclude_global_allow_pragmas: true, - }; - - isle::compile::from_files(file_paths, &options)? - }; - - let code = rustfmt(&code).unwrap_or_else(|e| { - println!("cargo:warning=Failed to run `rustfmt` on ISLE-generated code: {e:?}"); - code - }); - - eprintln!("Writing ISLE-generated Rust code to {}", compilation.output.display()); - std::fs::write(&compilation.output, code) - .map_err(|e| Errors::from_io(e, "failed writing output"))?; - - Ok(()) -} - -fn rustfmt(code: &str) -> std::io::Result { - use std::io::Write; - - let mut rustfmt = std::process::Command::new("rustfmt") - .stdin(std::process::Stdio::piped()) - .stdout(std::process::Stdio::piped()) - .spawn()?; - - let mut stdin = rustfmt.stdin.take().unwrap(); - stdin.write_all(code.as_bytes())?; - drop(stdin); - - let mut stdout = rustfmt.stdout.take().unwrap(); - let mut data = vec![]; - stdout.read_to_end(&mut data)?; - - let status = rustfmt.wait()?; - if !status.success() { - return Err(std::io::Error::new( - std::io::ErrorKind::Other, - format!("`rustfmt` exited with status {status}"), - )); - } - - Ok(String::from_utf8(data).expect("rustfmt always writs utf-8 to stdout")) -} diff --git a/hbcb/src/abi.rs b/hbcb/src/abi.rs deleted file mode 100644 index 4b75885..0000000 --- a/hbcb/src/abi.rs +++ /dev/null @@ -1,906 +0,0 @@ -//! Implementation of a standard Riscv64 ABI. - -use { - crate::{ - inst::*, - settings::{self, Flags as RiscvFlags}, - }, - alloc::{boxed::Box, vec::Vec}, - cranelift_codegen::{ - ir::{self, types::*, LibCall, Signature}, - isa::{self, unwind::UnwindInst, CallConv}, - machinst::*, - CodegenError, CodegenResult, - }, - regalloc2::{MachineEnv, PReg, PRegSet}, - smallvec::{smallvec, SmallVec}, - std::sync::OnceLock, -}; - -/// Support for the Riscv64 ABI from the callee side (within a function body). -pub(crate) type Riscv64Callee = Callee; - -/// Support for the Riscv64 ABI from the caller side (at a callsite). -pub(crate) type Riscv64ABICallSite = CallSite; - -/// Riscv64-specific ABI behavior. This struct just serves as an implementation -/// point for the trait; it is never actually instantiated. -pub struct Riscv64MachineDeps; - -impl IsaFlags for RiscvFlags {} - -impl RiscvFlags { - pub(crate) fn min_vec_reg_size(&self) -> u64 { - let entries = [ - (self.has_zvl65536b(), 65536), - (self.has_zvl32768b(), 32768), - (self.has_zvl16384b(), 16384), - (self.has_zvl8192b(), 8192), - (self.has_zvl4096b(), 4096), - (self.has_zvl2048b(), 2048), - (self.has_zvl1024b(), 1024), - (self.has_zvl512b(), 512), - (self.has_zvl256b(), 256), - // In order to claim the Application Profile V extension, a minimum - // register size of 128 is required. i.e. V implies Zvl128b. - (self.has_v(), 128), - (self.has_zvl128b(), 128), - (self.has_zvl64b(), 64), - (self.has_zvl32b(), 32), - ]; - - for (has_flag, size) in entries.into_iter() { - if !has_flag { - continue; - } - - // Due to a limitation in regalloc2, we can't support types - // larger than 1024 bytes. So limit that here. - return std::cmp::min(size, 1024); - } - - return 0; - } -} - -impl ABIMachineSpec for Riscv64MachineDeps { - type F = RiscvFlags; - type I = Inst; - - /// This is the limit for the size of argument and return-value areas on the - /// stack. We place a reasonable limit here to avoid integer overflow issues - /// with 32-bit arithmetic: for now, 128 MB. - const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; - - fn word_bits() -> u32 { - 64 - } - - /// Return required stack alignment in bytes. - fn stack_align(_call_conv: isa::CallConv) -> u32 { - 16 - } - - fn compute_arg_locs( - call_conv: isa::CallConv, - _flags: &settings::Flags, - params: &[ir::AbiParam], - args_or_rets: ArgsOrRets, - add_ret_area_ptr: bool, - mut args: ArgsAccumulator, - ) -> CodegenResult<(u32, Option)> { - assert_ne!( - call_conv, - isa::CallConv::Winch, - "riscv64 does not support the 'winch' calling convention yet" - ); - - // All registers that can be used as parameters or rets. - // both start and end are included. - let (x_start, x_end, f_start, f_end) = match args_or_rets { - ArgsOrRets::Args => (10, 17, 10, 17), - ArgsOrRets::Rets => (10, 11, 10, 11), - }; - let mut next_x_reg = x_start; - let mut next_f_reg = f_start; - // Stack space. - let mut next_stack: u32 = 0; - - for param in params { - if let ir::ArgumentPurpose::StructArgument(_) = param.purpose { - panic!( - "StructArgument parameters are not supported on riscv64. \ - Use regular pointer arguments instead." - ); - } - - // Find regclass(es) of the register(s) used to store a value of this type. - let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; - let mut slots = ABIArgSlotVec::new(); - for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { - let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int { - let x = Some(x_reg(next_x_reg)); - next_x_reg += 1; - x - } else if (next_f_reg <= f_end) && *rc == RegClass::Float { - let x = Some(f_reg(next_f_reg)); - next_f_reg += 1; - x - } else { - None - }; - if let Some(reg) = next_reg { - slots.push(ABIArgSlot::Reg { - reg: reg.to_real_reg().unwrap(), - ty: *reg_ty, - extension: param.extension, - }); - } else { - // Compute size and 16-byte stack alignment happens - // separately after all args. - let size = reg_ty.bits() / 8; - let size = std::cmp::max(size, 8); - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = align_to(next_stack, size); - slots.push(ABIArgSlot::Stack { - offset: next_stack as i64, - ty: *reg_ty, - extension: param.extension, - }); - next_stack += size; - } - } - args.push(ABIArg::Slots { slots, purpose: param.purpose }); - } - let pos: Option = if add_ret_area_ptr { - assert!(ArgsOrRets::Args == args_or_rets); - if next_x_reg <= x_end { - let arg = ABIArg::reg( - x_reg(next_x_reg).to_real_reg().unwrap(), - I64, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - ); - args.push_non_formal(arg); - } else { - let arg = ABIArg::stack( - next_stack as i64, - I64, - ir::ArgumentExtension::None, - ir::ArgumentPurpose::Normal, - ); - args.push_non_formal(arg); - next_stack += 8; - } - Some(args.args().len() - 1) - } else { - None - }; - - next_stack = align_to(next_stack, Self::stack_align(call_conv)); - - // To avoid overflow issues, limit the arg/return size to something - // reasonable -- here, 128 MB. - if next_stack > STACK_ARG_RET_SIZE_LIMIT { - return Err(CodegenError::ImplLimitExceeded); - } - - Ok((next_stack, pos)) - } - - fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { - Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) - } - - fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { - Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) - } - - fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { - Inst::gen_move(to_reg, from_reg, ty) - } - - fn gen_extend( - to_reg: Writable, - from_reg: Reg, - signed: bool, - from_bits: u8, - to_bits: u8, - ) -> Inst { - assert!(from_bits < to_bits); - Inst::Extend { rd: to_reg, rn: from_reg, signed, from_bits, to_bits } - } - - fn get_ext_mode( - _call_conv: isa::CallConv, - specified: ir::ArgumentExtension, - ) -> ir::ArgumentExtension { - specified - } - - fn gen_args(args: Vec) -> Inst { - Inst::Args { args } - } - - fn gen_rets(rets: Vec) -> Inst { - Inst::Rets { rets } - } - - fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { - spilltmp_reg() - } - - fn gen_add_imm( - _call_conv: isa::CallConv, - into_reg: Writable, - from_reg: Reg, - imm: u32, - ) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) { - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: into_reg, - rs: from_reg, - imm12, - }); - } else { - insts.extend(Inst::load_constant_u32(writable_spilltmp_reg2(), imm as u64)); - insts.push(Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: into_reg, - rs1: spilltmp_reg2(), - rs2: from_reg, - }); - } - insts - } - - fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { - let mut insts = SmallVec::new(); - insts.push(Inst::TrapIf { - cc: IntCC::UnsignedLessThan, - rs1: stack_reg(), - rs2: limit_reg, - trap_code: ir::TrapCode::StackOverflow, - }); - insts - } - - fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable) -> Inst { - Inst::LoadAddr { rd: into_reg, mem: mem.into() } - } - - fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { - let mem = AMode::RegOffset(base, offset as i64); - Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) - } - - fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { - let mem = AMode::RegOffset(base, offset as i64); - Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) - } - - fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { - let mut insts = SmallVec::new(); - - if amount == 0 { - return insts; - } - - if let Some(imm) = Imm12::maybe_from_i64(amount as i64) { - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: writable_stack_reg(), - rs: stack_reg(), - imm12: imm, - }) - } else { - let tmp = writable_spilltmp_reg(); - insts.extend(Inst::load_constant_u64(tmp, amount as i64 as u64)); - insts.push(Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: writable_stack_reg(), - rs1: stack_reg(), - rs2: tmp.to_reg(), - }); - } - - insts - } - - fn gen_prologue_frame_setup( - _call_conv: isa::CallConv, - flags: &settings::Flags, - _isa_flags: &RiscvFlags, - frame_layout: &FrameLayout, - ) -> SmallInstVec { - let mut insts = SmallVec::new(); - - if frame_layout.setup_area_size > 0 { - // add sp,sp,-16 ;; alloc stack space for fp. - // sd ra,8(sp) ;; save ra. - // sd fp,0(sp) ;; store old fp. - // mv fp,sp ;; set fp to sp. - insts.extend(Self::gen_sp_reg_adjust(-16)); - insts.push(Inst::gen_store(AMode::SPOffset(8), link_reg(), I64, MemFlags::trusted())); - insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted())); - - if flags.unwind_info() { - insts.push(Inst::Unwind { - inst: UnwindInst::PushFrameRegs { - offset_upward_to_caller_sp: frame_layout.setup_area_size, - }, - }); - } - insts.push(Inst::Mov { rd: writable_fp_reg(), rm: stack_reg(), ty: I64 }); - } - - insts - } - - /// reverse of gen_prologue_frame_setup. - fn gen_epilogue_frame_restore( - call_conv: isa::CallConv, - _flags: &settings::Flags, - _isa_flags: &RiscvFlags, - frame_layout: &FrameLayout, - ) -> SmallInstVec { - let mut insts = SmallVec::new(); - - if frame_layout.setup_area_size > 0 { - insts.push(Inst::gen_load( - writable_link_reg(), - AMode::SPOffset(8), - I64, - MemFlags::trusted(), - )); - insts.push(Inst::gen_load( - writable_fp_reg(), - AMode::SPOffset(0), - I64, - MemFlags::trusted(), - )); - insts.extend(Self::gen_sp_reg_adjust(16)); - } - - if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 { - insts.extend(Self::gen_sp_reg_adjust(frame_layout.tail_args_size.try_into().unwrap())); - } - - insts - } - - fn gen_return( - _call_conv: isa::CallConv, - _isa_flags: &RiscvFlags, - _frame_layout: &FrameLayout, - ) -> SmallInstVec { - smallvec![Inst::Ret {}] - } - - fn gen_probestack(insts: &mut SmallInstVec, frame_size: u32) { - insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64)); - let mut info = - CallInfo::empty(ExternalName::LibCall(LibCall::Probestack), CallConv::SystemV); - info.uses.push(CallArgPair { vreg: a0(), preg: a0() }); - insts.push(Inst::Call { info: Box::new(info) }); - } - - fn gen_clobber_save( - _call_conv: isa::CallConv, - flags: &settings::Flags, - frame_layout: &FrameLayout, - ) -> SmallVec<[Inst; 16]> { - let mut insts = SmallVec::new(); - let setup_frame = frame_layout.setup_area_size > 0; - - let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size; - if incoming_args_diff > 0 { - // Decrement SP by the amount of additional incoming argument space we need - insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32))); - - if setup_frame { - // Write the lr position on the stack again, as it hasn't changed since it was - // pushed in `gen_prologue_frame_setup` - insts.push(Inst::gen_store( - AMode::SPOffset(8), - link_reg(), - I64, - MemFlags::trusted(), - )); - insts.push(Inst::gen_load( - writable_fp_reg(), - AMode::SPOffset(i64::from(incoming_args_diff)), - I64, - MemFlags::trusted(), - )); - insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted())); - - // Finally, sync the frame pointer with SP - insts.push(Inst::gen_move(writable_fp_reg(), stack_reg(), I64)); - } - } - - if flags.unwind_info() && setup_frame { - // The *unwind* frame (but not the actual frame) starts at the - // clobbers, just below the saved FP/LR pair. - insts.push(Inst::Unwind { - inst: UnwindInst::DefineNewFrame { - offset_downward_to_clobbers: frame_layout.clobber_size, - offset_upward_to_caller_sp: frame_layout.setup_area_size, - }, - }); - } - - // Adjust the stack pointer downward for clobbers, the function fixed - // frame (spillslots and storage slots), and outgoing arguments. - let stack_size = frame_layout.clobber_size - + frame_layout.fixed_frame_storage_size - + frame_layout.outgoing_args_size; - - // Store each clobbered register in order at offsets from SP, - // placing them above the fixed frame slots. - if stack_size > 0 { - insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32))); - - let mut cur_offset = 8; - for reg in &frame_layout.clobbered_callee_saves { - let r_reg = reg.to_reg(); - let ty = match r_reg.class() { - RegClass::Int => I64, - RegClass::Float => F64, - RegClass::Vector => unimplemented!("Vector Clobber Saves"), - }; - insts.push(Inst::gen_store( - AMode::SPOffset((stack_size - cur_offset) as i64), - Reg::from(reg.to_reg()), - ty, - MemFlags::trusted(), - )); - - if flags.unwind_info() { - insts.push(Inst::Unwind { - inst: UnwindInst::SaveReg { - clobber_offset: frame_layout.clobber_size - cur_offset, - reg: r_reg, - }, - }); - } - - cur_offset += 8 - } - } - insts - } - - fn gen_clobber_restore( - _call_conv: isa::CallConv, - _flags: &settings::Flags, - frame_layout: &FrameLayout, - ) -> SmallVec<[Inst; 16]> { - let mut insts = SmallVec::new(); - - let stack_size = frame_layout.clobber_size - + frame_layout.fixed_frame_storage_size - + frame_layout.outgoing_args_size; - - let mut cur_offset = 8; - for reg in &frame_layout.clobbered_callee_saves { - let rreg = reg.to_reg(); - let ty = match rreg.class() { - RegClass::Int => I64, - RegClass::Float => F64, - RegClass::Vector => unimplemented!("Vector Clobber Restores"), - }; - insts.push(Inst::gen_load( - reg.map(Reg::from), - AMode::SPOffset(i64::from(stack_size - cur_offset)), - ty, - MemFlags::trusted(), - )); - cur_offset += 8 - } - - if stack_size > 0 { - insts.extend(Self::gen_sp_reg_adjust(stack_size as i32)); - } - - insts - } - - fn gen_call(dest: &CallDest, tmp: Writable, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> { - let mut insts = SmallVec::new(); - match &dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => { - let info = Box::new(info.map(|()| name.clone())); - insts.push(Inst::Call { info }) - } - &CallDest::ExtName(ref name, RelocDistance::Far) => { - insts.push(Inst::LoadExtName { rd: tmp, name: Box::new(name.clone()), offset: 0 }); - let info = Box::new(info.map(|()| tmp.to_reg())); - insts.push(Inst::CallInd { info }); - } - &CallDest::Reg(reg) => { - let info = Box::new(info.map(|()| *reg)); - insts.push(Inst::CallInd { info }); - } - } - insts - } - - fn gen_memcpy Writable>( - call_conv: isa::CallConv, - dst: Reg, - src: Reg, - size: usize, - mut alloc_tmp: F, - ) -> SmallVec<[Self::I; 8]> { - let mut insts = SmallVec::new(); - let arg0 = Writable::from_reg(x_reg(10)); - let arg1 = Writable::from_reg(x_reg(11)); - let arg2 = Writable::from_reg(x_reg(12)); - let tmp = alloc_tmp(Self::word_type()); - insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter()); - insts.push(Inst::Call { - info: Box::new(CallInfo { - dest: ExternalName::LibCall(LibCall::Memcpy), - uses: smallvec![ - CallArgPair { vreg: dst, preg: arg0.to_reg() }, - CallArgPair { vreg: src, preg: arg1.to_reg() }, - CallArgPair { vreg: tmp.to_reg(), preg: arg2.to_reg() } - ], - defs: smallvec![], - clobbers: Self::get_regs_clobbered_by_call(call_conv), - caller_conv: call_conv, - callee_conv: call_conv, - callee_pop_size: 0, - }), - }); - insts - } - - fn get_number_of_spillslots_for_value( - rc: RegClass, - _target_vector_bytes: u32, - isa_flags: &RiscvFlags, - ) -> u32 { - // We allocate in terms of 8-byte slots. - match rc { - RegClass::Int => 1, - RegClass::Float => 1, - RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32, - } - } - - fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv { - static MACHINE_ENV: OnceLock = OnceLock::new(); - MACHINE_ENV.get_or_init(create_reg_enviroment) - } - - fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet { - DEFAULT_CLOBBERS - } - - fn compute_frame_layout( - _call_conv: isa::CallConv, - flags: &settings::Flags, - _sig: &Signature, - regs: &[Writable], - is_leaf: bool, - incoming_args_size: u32, - tail_args_size: u32, - fixed_frame_storage_size: u32, - outgoing_args_size: u32, - ) -> FrameLayout { - let mut regs: Vec> = regs - .iter() - .cloned() - .filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into())) - .collect(); - - regs.sort_unstable(); - - // Compute clobber size. - let clobber_size = compute_clobber_size(®s); - - // Compute linkage frame size. - let setup_area_size = if flags.preserve_frame_pointers() - || !is_leaf - // The function arguments that are passed on the stack are addressed - // relative to the Frame Pointer. - || incoming_args_size > 0 - || clobber_size > 0 - || fixed_frame_storage_size > 0 - { - 16 // FP, LR - } else { - 0 - }; - - // Return FrameLayout structure. - FrameLayout { - incoming_args_size, - tail_args_size, - setup_area_size, - clobber_size, - fixed_frame_storage_size, - outgoing_args_size, - clobbered_callee_saves: regs, - } - } - - fn gen_inline_probestack( - insts: &mut SmallInstVec, - _call_conv: isa::CallConv, - frame_size: u32, - guard_size: u32, - ) { - // Unroll at most n consecutive probes, before falling back to using a loop - const PROBE_MAX_UNROLL: u32 = 3; - // Number of probes that we need to perform - let probe_count = align_to(frame_size, guard_size) / guard_size; - - // Must be a caller-saved register that is not an argument. - let tmp = Writable::from_reg(x_reg(28)); // t3 - - if probe_count <= PROBE_MAX_UNROLL { - Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count) - } else { - insts.push(Inst::StackProbeLoop { guard_size, probe_count, tmp }); - } - } -} - -pub trait EmitReturnCall { - fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice); -} - -impl EmitReturnCall for Riscv64ABICallSite { - fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { - let new_stack_arg_size = - u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap(); - - ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size); - - // Put all arguments in registers and stack slots (within that newly - // allocated stack space). - self.emit_args(ctx, args); - self.emit_stack_ret_arg_for_tail_call(ctx); - - let dest = self.dest().clone(); - let uses = self.take_uses(); - - match dest { - CallDest::ExtName(name, RelocDistance::Near) => { - let info = Box::new(ReturnCallInfo { dest: name, uses, new_stack_arg_size }); - ctx.emit(Inst::ReturnCall { info }); - } - CallDest::ExtName(name, RelocDistance::Far) => { - let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap(); - ctx.emit(Inst::LoadExtName { rd: callee, name: Box::new(name), offset: 0 }); - let info = - Box::new(ReturnCallInfo { dest: callee.to_reg(), uses, new_stack_arg_size }); - ctx.emit(Inst::ReturnCallInd { info }); - } - CallDest::Reg(callee) => { - let info = Box::new(ReturnCallInfo { dest: callee, uses, new_stack_arg_size }); - ctx.emit(Inst::ReturnCallInd { info }); - } - } - } -} - -// NOTE: no V regs are callee save. -const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty() - // X Regs - .with(px_reg(2)) - .with(px_reg(8)) - .with(px_reg(9)) - .with(px_reg(18)) - .with(px_reg(19)) - .with(px_reg(20)) - .with(px_reg(21)) - .with(px_reg(22)) - .with(px_reg(23)) - .with(px_reg(24)) - .with(px_reg(25)) - .with(px_reg(26)) - .with(px_reg(27)) - // F Regs - .with(pf_reg(8)) - .with(pf_reg(18)) - .with(pf_reg(19)) - .with(pf_reg(20)) - .with(pf_reg(21)) - .with(pf_reg(22)) - .with(pf_reg(23)) - .with(pf_reg(24)) - .with(pf_reg(25)) - .with(pf_reg(26)) - .with(pf_reg(27)); - -fn compute_clobber_size(clobbers: &[Writable]) -> u32 { - let mut clobbered_size = 0; - for reg in clobbers { - match reg.to_reg().class() { - RegClass::Int => { - clobbered_size += 8; - } - RegClass::Float => { - clobbered_size += 8; - } - RegClass::Vector => unimplemented!("Vector Size Clobbered"), - } - } - align_to(clobbered_size, 16) -} - -const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() - .with(px_reg(1)) - .with(px_reg(5)) - .with(px_reg(6)) - .with(px_reg(7)) - .with(px_reg(10)) - .with(px_reg(11)) - .with(px_reg(12)) - .with(px_reg(13)) - .with(px_reg(14)) - .with(px_reg(15)) - .with(px_reg(16)) - .with(px_reg(17)) - .with(px_reg(28)) - .with(px_reg(29)) - .with(px_reg(30)) - .with(px_reg(31)) - // F Regs - .with(pf_reg(0)) - .with(pf_reg(1)) - .with(pf_reg(2)) - .with(pf_reg(3)) - .with(pf_reg(4)) - .with(pf_reg(5)) - .with(pf_reg(6)) - .with(pf_reg(7)) - .with(pf_reg(9)) - .with(pf_reg(10)) - .with(pf_reg(11)) - .with(pf_reg(12)) - .with(pf_reg(13)) - .with(pf_reg(14)) - .with(pf_reg(15)) - .with(pf_reg(16)) - .with(pf_reg(17)) - .with(pf_reg(28)) - .with(pf_reg(29)) - .with(pf_reg(30)) - .with(pf_reg(31)) - // V Regs - All vector regs get clobbered - .with(pv_reg(0)) - .with(pv_reg(1)) - .with(pv_reg(2)) - .with(pv_reg(3)) - .with(pv_reg(4)) - .with(pv_reg(5)) - .with(pv_reg(6)) - .with(pv_reg(7)) - .with(pv_reg(8)) - .with(pv_reg(9)) - .with(pv_reg(10)) - .with(pv_reg(11)) - .with(pv_reg(12)) - .with(pv_reg(13)) - .with(pv_reg(14)) - .with(pv_reg(15)) - .with(pv_reg(16)) - .with(pv_reg(17)) - .with(pv_reg(18)) - .with(pv_reg(19)) - .with(pv_reg(20)) - .with(pv_reg(21)) - .with(pv_reg(22)) - .with(pv_reg(23)) - .with(pv_reg(24)) - .with(pv_reg(25)) - .with(pv_reg(26)) - .with(pv_reg(27)) - .with(pv_reg(28)) - .with(pv_reg(29)) - .with(pv_reg(30)) - .with(pv_reg(31)); - -fn create_reg_enviroment() -> MachineEnv { - // Some C Extension instructions can only use a subset of the registers. - // x8 - x15, f8 - f15, v8 - v15 so we should prefer to use those since - // they allow us to emit C instructions more often. - // - // In general the order of preference is: - // 1. Compressible Caller Saved registers. - // 2. Non-Compressible Caller Saved registers. - // 3. Compressible Callee Saved registers. - // 4. Non-Compressible Callee Saved registers. - - let preferred_regs_by_class: [Vec; 3] = { - let x_registers: Vec = (10..=15).map(px_reg).collect(); - let f_registers: Vec = (10..=15).map(pf_reg).collect(); - let v_registers: Vec = (8..=15).map(pv_reg).collect(); - - [x_registers, f_registers, v_registers] - }; - - let non_preferred_regs_by_class: [Vec; 3] = { - // x0 - x4 are special registers, so we don't want to use them. - // Omit x30 and x31 since they are the spilltmp registers. - - // Start with the Non-Compressible Caller Saved registers. - let x_registers: Vec = (5..=7) - .chain(16..=17) - .chain(28..=29) - // The first Callee Saved register is x9 since its Compressible - // Omit x8 since it's the frame pointer. - .chain(9..=9) - // The rest of the Callee Saved registers are Non-Compressible - .chain(18..=27) - .map(px_reg) - .collect(); - - // Prefer Caller Saved registers. - let f_registers: Vec = (0..=7) - .chain(16..=17) - .chain(28..=31) - // Once those are exhausted, we should prefer f8 and f9 since they are - // callee saved, but compressible. - .chain(8..=9) - .chain(18..=27) - .map(pf_reg) - .collect(); - - let v_registers = (0..=7).chain(16..=31).map(pv_reg).collect(); - - [x_registers, f_registers, v_registers] - }; - - MachineEnv { - preferred_regs_by_class, - non_preferred_regs_by_class, - fixed_stack_slots: vec![], - scratch_by_class: [None, None, None], - } -} - -impl Riscv64MachineDeps { - fn gen_probestack_unroll( - insts: &mut SmallInstVec, - tmp: Writable, - guard_size: u32, - probe_count: u32, - ) { - // When manually unrolling adjust the stack pointer and then write a zero - // to the stack at that offset. - // - // We do this because valgrind expects us to never write beyond the stack - // pointer and associated redzone. - // See: https://github.com/bytecodealliance/wasmtime/issues/7454 - - // Store the adjust amount in a register upfront, so we don't have to - // reload it for each probe. It's worth loading this as a negative and - // using an `add` instruction since we have compressed versions of `add` - // but not the `sub` instruction. - insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64)); - - for _ in 0..probe_count { - insts.push(Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: writable_stack_reg(), - rs1: stack_reg(), - rs2: tmp.to_reg(), - }); - - insts.push(Inst::gen_store(AMode::SPOffset(0), zero_reg(), I32, MemFlags::trusted())); - } - - // Restore the stack pointer to its original value - insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32)); - } -} diff --git a/hbcb/src/inst.isle b/hbcb/src/inst.isle deleted file mode 100644 index f6e4570..0000000 --- a/hbcb/src/inst.isle +++ /dev/null @@ -1,3128 +0,0 @@ -;; Instruction formats. -(type MInst - (enum - ;; A no-op of zero size. - (Nop0) - (Nop4) - - ;; load immediate - (Lui - (rd WritableReg) - (imm Imm20)) - - (LoadInlineConst - (rd WritableReg) - (ty Type) - (imm u64)) - - (Auipc - (rd WritableReg) - (imm Imm20)) - - (Fli - (ty Type) - (imm FliConstant) - (rd WritableReg)) - - ;; An ALU operation with one register sources and a register destination. - (FpuRR - (alu_op FpuOPRR) - (width FpuOPWidth) - (frm FRM) - (rd WritableReg) - (rs Reg)) - - - ;; An ALU operation with two register sources and a register destination. - (AluRRR - (alu_op AluOPRRR) - (rd WritableReg) - (rs1 Reg) - (rs2 Reg)) - - ;; An ALU operation with two register sources and a register destination. - (FpuRRR - (alu_op FpuOPRRR) - (width FpuOPWidth) - (frm FRM) - (rd WritableReg) - (rs1 Reg) - (rs2 Reg)) - - ;; An ALU operation with three register sources and a register destination. - (FpuRRRR - (alu_op FpuOPRRRR) - (width FpuOPWidth) - (frm FRM) - (rd WritableReg) - (rs1 Reg) - (rs2 Reg) - (rs3 Reg)) - - ;; An ALU operation with a register source and an immediate-12 source, and a register - ;; destination. - (AluRRImm12 - (alu_op AluOPRRI) - (rd WritableReg) - (rs Reg) - (imm12 Imm12)) - - ;; A CSR Reading or Writing instruction with a register source and a register destination. - (CsrReg - (op CsrRegOP) - (rd WritableReg) - (rs Reg) - (csr CSR)) - - ;; A CSR Writing instruction with an immediate source and a register destination. - (CsrImm - (op CsrImmOP) - (rd WritableReg) - (imm UImm5) - (csr CSR)) - - ;; An load - (Load - (rd WritableReg) - (op LoadOP) - (flags MemFlags) - (from AMode)) - ;; An Store - (Store - (to AMode) - (op StoreOP) - (flags MemFlags) - (src Reg)) - - ;; A pseudo-instruction that captures register arguments in vregs. - (Args - (args VecArgPair)) - - ;; A pseudo-instruction that moves vregs to return registers. - (Rets - (rets VecRetPair)) - - (Ret) - - (Extend - (rd WritableReg) - (rn Reg) - (signed bool) - (from_bits u8) - (to_bits u8)) - - (Call (info BoxCallInfo)) - - ;; A machine indirect-call instruction. - (CallInd (info BoxCallIndInfo)) - - ;; A direct return-call macro instruction. - (ReturnCall (info BoxReturnCallInfo)) - - ;; An indirect return-call macro instruction. - (ReturnCallInd (info BoxReturnCallIndInfo)) - - ;; Emits a trap with the given trap code if the comparison succeeds - (TrapIf - (rs1 Reg) - (rs2 Reg) - (cc IntCC) - (trap_code TrapCode)) - - (Jal - ;; (rd WritableReg) don't use - (label MachLabel)) - - (CondBr - (taken CondBrTarget) - (not_taken CondBrTarget) - (kind IntegerCompare)) - - ;; Load an inline symbol reference. - (LoadExtName - (rd WritableReg) - (name BoxExternalName) - (offset i64)) - - ;; Load a TLS symbol address - (ElfTlsGetAddr - (rd WritableReg) - (name BoxExternalName)) - - ;; Load address referenced by `mem` into `rd`. - (LoadAddr - (rd WritableReg) - (mem AMode)) - - ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we - ;; keep them separate at the `Inst` level for better pretty-printing - ;; and faster `is_move()` logic. - (Mov - (rd WritableReg) - (rm Reg) - (ty Type)) - - ;; A MOV instruction, but where the source register is a non-allocatable - ;; PReg. It's important that the register be non-allocatable, as regalloc2 - ;; will not see it as used. - (MovFromPReg - (rd WritableReg) - (rm PReg)) - - (Fence - (pred FenceReq) - (succ FenceReq)) - - (EBreak) - - ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at - ;; runtime. - (Udf - (trap_code TrapCode)) - ;; a jump and link register operation - (Jalr - ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0. - (rd WritableReg) - (base Reg) - (offset Imm12)) - - ;; atomic operations. - (Atomic - (op AtomicOP) - (rd WritableReg) - (addr Reg) - (src Reg) - (amo AMO)) - ;; an atomic store - (AtomicStore - (src Reg) - (ty Type) - (p Reg)) - ;; an atomic load. - (AtomicLoad - (rd WritableReg) - (ty Type) - (p Reg)) - - ;; an atomic nand need using loop to implement. - (AtomicRmwLoop - (offset Reg) - (op AtomicRmwOp) - (dst WritableReg) - (ty Type) - (p Reg) - (x Reg) - (t0 WritableReg)) - - ;; select x or y base on condition - (Select - (dst WritableValueRegs) - (condition IntegerCompare) - (x ValueRegs) - (y ValueRegs)) - - (BrTable - (index Reg) - (tmp1 WritableReg) - (tmp2 WritableReg) - (targets VecMachLabel)) - - ;; atomic compare and set operation - (AtomicCas - (offset Reg) - (t0 WritableReg) - (dst WritableReg) - (e Reg) - (addr Reg) - (v Reg) - (ty Type)) - - (RawData (data VecU8)) - - ;; An unwind pseudo-instruction. - (Unwind - (inst UnwindInst)) - - ;; A dummy use, useful to keep a value alive. - (DummyUse - (reg Reg)) - - ;; popcnt if target doesn't support extension B - ;; use iteration to implement. - (Popcnt - (sum WritableReg) - (step WritableReg) - (tmp WritableReg) - (rs Reg) - (ty Type)) - - ;;; counting leading or trailing zeros. - (Cltz - ;; leading or trailing. - (leading bool) - (sum WritableReg) - (step WritableReg) - (tmp WritableReg) - (rs Reg) - (ty Type)) - - (Brev8 - (rs Reg) - (ty Type) - (step WritableReg) - (tmp WritableReg) - (tmp2 WritableReg) - (rd WritableReg)) - (StackProbeLoop - (guard_size u32) - (probe_count u32) - (tmp WritableReg)) - - (VecAluRRRR - (op VecAluOpRRRR) - (vd WritableReg) - (vd_src Reg) - (vs2 Reg) - (vs1 Reg) - (mask VecOpMasking) - (vstate VState)) - - (VecAluRRRImm5 - (op VecAluOpRRRImm5) - (vd WritableReg) - (vd_src Reg) - (vs2 Reg) - (imm Imm5) - (mask VecOpMasking) - (vstate VState)) - - (VecAluRRR - (op VecAluOpRRR) - (vd WritableReg) - (vs2 Reg) - (vs1 Reg) - (mask VecOpMasking) - (vstate VState)) - - (VecAluRRImm5 - (op VecAluOpRRImm5) - (vd WritableReg) - (vs2 Reg) - (imm Imm5) - (mask VecOpMasking) - (vstate VState)) - - (VecAluRR - (op VecAluOpRR) - (vd WritableReg) - (vs Reg) - (mask VecOpMasking) - (vstate VState)) - - (VecAluRImm5 - (op VecAluOpRImm5) - (vd WritableReg) - (imm Imm5) - (mask VecOpMasking) - (vstate VState)) - - (VecSetState - (rd WritableReg) - (vstate VState)) - - (VecLoad - (eew VecElementWidth) - (to WritableReg) - (from VecAMode) - (flags MemFlags) - (mask VecOpMasking) - (vstate VState)) - - (VecStore - (eew VecElementWidth) - (to VecAMode) - (from Reg) - (flags MemFlags) - (mask VecOpMasking) - (vstate VState)) -)) - -(type AtomicOP (enum - (LrW) - (ScW) - (AmoswapW) - (AmoaddW) - (AmoxorW) - (AmoandW) - (AmoorW) - (AmominW) - (AmomaxW) - (AmominuW) - (AmomaxuW) - (LrD) - (ScD) - (AmoswapD) - (AmoaddD) - (AmoxorD) - (AmoandD) - (AmoorD) - (AmominD) - (AmomaxD) - (AmominuD) - (AmomaxuD) -)) - -(type FpuOPRRRR (enum - (Fmadd) - (Fmsub) - (Fnmsub) - (Fnmadd) -)) - -(type FClassResult (enum - ;;0 rs1 is −∞. - (NegInfinite) - ;; 1 rs1 is a negative normal number. - (NegNormal) - ;; 2 rs1 is a negative subnormal number. - (NegSubNormal) - ;; 3 rs1 is −0. - (NegZero) - ;; 4 rs1 is +0. - (PosZero) - ;; 5 rs1 is a positive subnormal number. - (PosSubNormal) - ;; 6 rs1 is a positive normal number. - (PosNormal) - ;; 7 rs1 is +∞. - (PosInfinite) - ;; 8 rs1 is a signaling NaN. - (SNaN) - ;; 9 rs1 is a quiet NaN. - (QNaN) -)) - -(type FliConstant (primitive FliConstant)) - -(type FpuOPWidth (enum - (S) - (D) - (H) - (Q) -)) - -(decl pure fpu_op_width_from_ty (Type) FpuOPWidth) -(extern constructor fpu_op_width_from_ty fpu_op_width_from_ty) -(convert Type FpuOPWidth fpu_op_width_from_ty) - -(type FpuOPRR (enum - (Fsqrt) ;; fsqrt.{fmt} - (Fclass) ;; fclass.{fmt} - (FcvtWFmt) ;; fcvt.w.{fmt} - (FcvtWuFmt) ;; fcvt.wu.{fmt} - (FcvtLFmt) ;; fcvt.l.{fmt} - (FcvtLuFmt) ;; fcvt.lu.{fmt} - (FcvtFmtW) ;; fcvt.{fmt}.w - (FcvtFmtWu) ;; fcvt.{fmt}.wu - (FcvtFmtL) ;; fcvt.{fmt}.l - (FcvtFmtLu) ;; fcvt.{fmt}.lu - (FmvXFmt) ;; fmv.x.{fmt} - (FmvFmtX) ;; fmv.{fmt}.x - (FcvtSD) ;; fcvt.s.d - (FcvtDS) ;; fcvt.d.s - - ;; Zfa Extension - (Fround) ;; fround.{fmt} -)) - -(type LoadOP (enum - (Lb) - (Lh) - (Lw) - (Lbu) - (Lhu) - (Lwu) - (Ld) - (Flh) - (Flw) - (Fld) -)) - -(type StoreOP (enum - (Sb) - (Sh) - (Sw) - (Sd) - (Fsh) - (Fsw) - (Fsd) -)) - -(type AluOPRRR (enum - ;; base set - (Add) - (Sub) - (Sll) - (Slt) - (SltU) - (Sgt) - (Sgtu) - (Xor) - (Srl) - (Sra) - (Or) - (And) - - ;; RV64I Base Instruction Set (in addition to RV32I) - (Addw) - (Subw) - (Sllw) - (Srlw) - (Sraw) - - - ;;RV32M Standard Extension - (Mul) - (Mulh) - (Mulhsu) - (Mulhu) - (Div) - (DivU) - (Rem) - (RemU) - - ;; RV64M Standard Extension (in addition to RV32M) - (Mulw) - (Divw) - (Divuw) - (Remw) - (Remuw) - - ;; Zba: Address Generation Instructions - (Adduw) - (Sh1add) - (Sh1adduw) - (Sh2add) - (Sh2adduw) - (Sh3add) - (Sh3adduw) - - ;; Zbb: Bit Manipulation Instructions - (Andn) - (Orn) - (Xnor) - (Max) - (Maxu) - (Min) - (Minu) - (Rol) - (Rolw) - (Ror) - (Rorw) - - ;; Zbs: Single-bit instructions - (Bclr) - (Bext) - (Binv) - (Bset) - - ;; Zbc: Carry-less multiplication - (Clmul) - (Clmulh) - (Clmulr) - - ;; Zbkb: Bit-manipulation for Cryptography - (Pack) - (Packw) - (Packh) - - ;; ZiCond: Integer Conditional Operations - (CzeroEqz) - (CzeroNez) -)) - - -(type FpuOPRRR (enum - (Fadd) - (Fsub) - (Fmul) - (Fdiv) - (Fsgnj) - (Fsgnjn) - (Fsgnjx) - (Fmin) - (Fmax) - (Feq) - (Flt) - (Fle) - - ;; Zfa Extension - (Fminm) - (Fmaxm) -)) - - - -(type AluOPRRI (enum - ;; Base ISA - (Addi) - (Slti) - (SltiU) - (Xori) - (Ori) - (Andi) - (Slli) - (Srli) - (Srai) - (Addiw) - (Slliw) - (SrliW) - (Sraiw) - - ;; Zba: Address Generation Instructions - (SlliUw) - - ;; Zbb: Bit Manipulation Instructions - (Clz) - (Clzw) - (Ctz) - (Ctzw) - (Cpop) - (Cpopw) - (Sextb) - (Sexth) - (Zexth) - (Rori) - (Roriw) - (Rev8) - (Brev8) - (Orcb) - - ;; Zbs: Single-bit instructions - (Bclri) - (Bexti) - (Binvi) - (Bseti) -)) - -(type COpcodeSpace (enum - (C0) - (C1) - (C2) -)) - -;; Opcodes for the CR compressed instruction format -(type CrOp (enum - (CMv) - (CAdd) - (CJr) - (CJalr) - ;; c.ebreak technically isn't a CR format instruction, but it's encoding - ;; lines up with this format. - (CEbreak) -)) - -;; Opcodes for the CA compressed instruction format -(type CaOp (enum - (CAnd) - (COr) - (CXor) - (CSub) - (CAddw) - (CSubw) - (CMul) -)) - -;; Opcodes for the CJ compressed instruction format -(type CjOp (enum - (CJ) -)) - -;; Opcodes for the CI compressed instruction format -(type CiOp (enum - (CAddi) - (CAddiw) - (CAddi16sp) - (CSlli) - (CLi) - (CLui) - (CLwsp) - (CLdsp) - (CFldsp) -)) - -;; Opcodes for the CIW compressed instruction format -(type CiwOp (enum - (CAddi4spn) -)) - -;; Opcodes for the CB compressed instruction format -(type CbOp (enum - (CSrli) - (CSrai) - (CAndi) -)) - -;; Opcodes for the CSS compressed instruction format -(type CssOp (enum - (CSwsp) - (CSdsp) - (CFsdsp) -)) - -;; Opcodes for the CS compressed instruction format -(type CsOp (enum - (CSw) - (CSd) - (CFsd) -)) - -;; Opcodes for the CL compressed instruction format -(type ClOp (enum - (CLw) - (CLd) - (CFld) -)) - -;; Opcodes for the CSZN compressed instruction format -(type CsznOp (enum - (CNot) - (CZextb) - (CZexth) - (CZextw) - (CSextb) - (CSexth) -)) - -;; This is a mix of all Zcb memory addressing instructions -;; -;; Technically they are split across 4 different formats. -;; But they are all very similar, so we just group them all together. -(type ZcbMemOp (enum - (CLbu) - (CLhu) - (CLh) - (CSb) - (CSh) -)) - - -(type CsrRegOP (enum - ;; Atomic Read/Write CSR - (CsrRW) - ;; Atomic Read and Set Bits in CSR - (CsrRS) - ;; Atomic Read and Clear Bits in CSR - (CsrRC) -)) - -(type CsrImmOP (enum - ;; Atomic Read/Write CSR (Immediate Source) - (CsrRWI) - ;; Atomic Read and Set Bits in CSR (Immediate Source) - (CsrRSI) - ;; Atomic Read and Clear Bits in CSR (Immediate Source) - (CsrRCI) -)) - -;; Enum of the known CSR registers -(type CSR (enum - ;; Floating-Point Dynamic Rounding Mode - (Frm) -)) - - -(type FRM (enum - ;; Round to Nearest, ties to Even - (RNE) - ;; Round towards Zero - (RTZ) - ;; Round Down (towards −∞) - (RDN) - ;; Round Up (towards +∞) - (RUP) - ;; Round to Nearest, ties to Max Magnitude - (RMM) - ;; In instruction’s rm field, selects dynamic rounding mode; - ;;In Rounding Mode register, Invalid. - (Fcsr) -)) - -(decl pure frm_bits (FRM) UImm5) -(extern constructor frm_bits frm_bits) -(convert FRM UImm5 frm_bits) - -(type FFlagsException (enum - ;; Invalid Operation - (NV) - ;; Divide by Zero - (DZ) - ;; Overflow - (OF) - ;; Underflow - (UF) - ;; Inexact - (NX) -)) - -;;;; input output read write -;;;; SI SO SR SW -;;;; PI PO PR PW -;;;; lowest four bit are used. -(type FenceReq (primitive u8)) - -(type BoxCallInfo (primitive BoxCallInfo)) -(type BoxCallIndInfo (primitive BoxCallIndInfo)) -(type BoxReturnCallInfo (primitive BoxReturnCallInfo)) -(type BoxReturnCallIndInfo (primitive BoxReturnCallIndInfo)) -(type IntegerCompare (primitive IntegerCompare)) -(type AMode (primitive AMode)) -(type OptionReg (primitive OptionReg)) -(type OptionImm12 (primitive OptionImm12)) -(type OptionUimm5 (primitive OptionUimm5)) -(type Imm12 (primitive Imm12)) -(type UImm5 (primitive UImm5)) -(type Imm5 (primitive Imm5)) -(type Imm20 (primitive Imm20)) -(type Imm3 (primitive Imm3)) -(type CondBrTarget (primitive CondBrTarget)) -(type VecU8 (primitive VecU8)) -(type AMO (primitive AMO)) -(type VecMachLabel extern (enum)) - - -;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(type XReg (primitive XReg)) -(type WritableXReg (primitive WritableXReg)) -(type FReg (primitive FReg)) -(type WritableFReg (primitive WritableFReg)) -(type VReg (primitive VReg)) -(type WritableVReg (primitive WritableVReg)) - -;; Construct a new `XReg` from a `Reg`. -;; -;; Asserts that the register has a Integer RegClass. -(decl xreg_new (Reg) XReg) -(extern constructor xreg_new xreg_new) -(convert Reg XReg xreg_new) - -;; Construct a new `WritableXReg` from a `WritableReg`. -;; -;; Asserts that the register has a Integer RegClass. -(decl writable_xreg_new (WritableReg) WritableXReg) -(extern constructor writable_xreg_new writable_xreg_new) -(convert WritableReg WritableXReg writable_xreg_new) - -;; Put a value into a XReg. -;; -;; Asserts that the value goes into a XReg. -(decl put_in_xreg (Value) XReg) -(rule (put_in_xreg val) (xreg_new (put_in_reg val))) -(convert Value XReg put_in_xreg) - -;; Construct an `InstOutput` out of a single XReg register. -(decl output_xreg (XReg) InstOutput) -(rule (output_xreg x) (output_reg x)) -(convert XReg InstOutput output_xreg) - -;; Convert a `WritableXReg` to an `XReg`. -(decl pure writable_xreg_to_xreg (WritableXReg) XReg) -(extern constructor writable_xreg_to_xreg writable_xreg_to_xreg) -(convert WritableXReg XReg writable_xreg_to_xreg) - -;; Convert a `WritableXReg` to an `WritableReg`. -(decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg) -(extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg) -(convert WritableXReg WritableReg writable_xreg_to_writable_reg) - -;; Convert a `WritableXReg` to an `Reg`. -(decl pure writable_xreg_to_reg (WritableXReg) Reg) -(rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x)) -(convert WritableXReg Reg writable_xreg_to_reg) - -;; Convert an `XReg` to a `Reg`. -(decl pure xreg_to_reg (XReg) Reg) -(extern constructor xreg_to_reg xreg_to_reg) -(convert XReg Reg xreg_to_reg) - -;; Convert a `XReg` to a `ValueRegs`. -(decl xreg_to_value_regs (XReg) ValueRegs) -(rule (xreg_to_value_regs x) (value_reg x)) -(convert XReg ValueRegs xreg_to_reg) - -;; Convert a `WritableXReg` to a `ValueRegs`. -(decl writable_xreg_to_value_regs (WritableXReg) ValueRegs) -(rule (writable_xreg_to_value_regs x) (value_reg x)) -(convert WritableXReg ValueRegs writable_xreg_to_value_regs) - -;; Allocates a new `WritableXReg`. -(decl temp_writable_xreg () WritableXReg) -(rule (temp_writable_xreg) (temp_writable_reg $I64)) - - -;; Construct a new `FReg` from a `Reg`. -;; -;; Asserts that the register has a Float RegClass. -(decl freg_new (Reg) FReg) -(extern constructor freg_new freg_new) -(convert Reg FReg freg_new) - -;; Construct a new `WritableFReg` from a `WritableReg`. -;; -;; Asserts that the register has a Float RegClass. -(decl writable_freg_new (WritableReg) WritableFReg) -(extern constructor writable_freg_new writable_freg_new) -(convert WritableReg WritableFReg writable_freg_new) - -;; Put a value into a FReg. -;; -;; Asserts that the value goes into a FReg. -(decl put_in_freg (Value) FReg) -(rule (put_in_freg val) (freg_new (put_in_reg val))) -(convert Value FReg put_in_freg) - -;; Construct an `InstOutput` out of a single FReg register. -(decl output_freg (FReg) InstOutput) -(rule (output_freg x) (output_reg x)) -(convert FReg InstOutput output_freg) - -;; Convert a `WritableFReg` to an `FReg`. -(decl pure writable_freg_to_freg (WritableFReg) FReg) -(extern constructor writable_freg_to_freg writable_freg_to_freg) -(convert WritableFReg FReg writable_freg_to_freg) - -;; Convert a `WritableFReg` to an `WritableReg`. -(decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg) -(extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg) -(convert WritableFReg WritableReg writable_freg_to_writable_reg) - -;; Convert a `WritableFReg` to an `Reg`. -(decl pure writable_freg_to_reg (WritableFReg) Reg) -(rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x)) -(convert WritableFReg Reg writable_freg_to_reg) - -;; Convert an `FReg` to a `Reg`. -(decl pure freg_to_reg (FReg) Reg) -(extern constructor freg_to_reg freg_to_reg) -(convert FReg Reg freg_to_reg) - -;; Convert a `FReg` to a `ValueRegs`. -(decl freg_to_value_regs (FReg) ValueRegs) -(rule (freg_to_value_regs x) (value_reg x)) -(convert FReg ValueRegs xreg_to_reg) - -;; Convert a `WritableFReg` to a `ValueRegs`. -(decl writable_freg_to_value_regs (WritableFReg) ValueRegs) -(rule (writable_freg_to_value_regs x) (value_reg x)) -(convert WritableFReg ValueRegs writable_freg_to_value_regs) - -;; Allocates a new `WritableFReg`. -(decl temp_writable_freg () WritableFReg) -(rule (temp_writable_freg) (temp_writable_reg $F64)) - - - -;; Construct a new `VReg` from a `Reg`. -;; -;; Asserts that the register has a Vector RegClass. -(decl vreg_new (Reg) VReg) -(extern constructor vreg_new vreg_new) -(convert Reg VReg vreg_new) - -;; Construct a new `WritableVReg` from a `WritableReg`. -;; -;; Asserts that the register has a Vector RegClass. -(decl writable_vreg_new (WritableReg) WritableVReg) -(extern constructor writable_vreg_new writable_vreg_new) -(convert WritableReg WritableVReg writable_vreg_new) - -;; Put a value into a VReg. -;; -;; Asserts that the value goes into a VReg. -(decl put_in_vreg (Value) VReg) -(rule (put_in_vreg val) (vreg_new (put_in_reg val))) -(convert Value VReg put_in_vreg) - -;; Construct an `InstOutput` out of a single VReg register. -(decl output_vreg (VReg) InstOutput) -(rule (output_vreg x) (output_reg x)) -(convert VReg InstOutput output_vreg) - -;; Convert a `WritableVReg` to an `VReg`. -(decl pure writable_vreg_to_vreg (WritableVReg) VReg) -(extern constructor writable_vreg_to_vreg writable_vreg_to_vreg) -(convert WritableVReg VReg writable_vreg_to_vreg) - -;; Convert a `WritableVReg` to an `WritableReg`. -(decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg) -(extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg) -(convert WritableVReg WritableReg writable_vreg_to_writable_reg) - -;; Convert a `WritableVReg` to an `Reg`. -(decl pure writable_vreg_to_reg (WritableVReg) Reg) -(rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x)) -(convert WritableVReg Reg writable_vreg_to_reg) - -;; Convert an `VReg` to a `Reg`. -(decl pure vreg_to_reg (VReg) Reg) -(extern constructor vreg_to_reg vreg_to_reg) -(convert VReg Reg vreg_to_reg) - -;; Convert a `VReg` to a `ValueRegs`. -(decl vreg_to_value_regs (VReg) ValueRegs) -(rule (vreg_to_value_regs x) (value_reg x)) -(convert VReg ValueRegs xreg_to_reg) - -;; Convert a `WritableVReg` to a `ValueRegs`. -(decl writable_vreg_to_value_regs (WritableVReg) ValueRegs) -(rule (writable_vreg_to_value_regs x) (value_reg x)) -(convert WritableVReg ValueRegs writable_vreg_to_value_regs) - -;; Allocates a new `WritableVReg`. -(decl temp_writable_vreg () WritableVReg) -(rule (temp_writable_vreg) (temp_writable_reg $I8X16)) - - -;; Converters - -(convert u8 i32 u8_as_i32) -(decl u8_as_i32 (u8) i32) -(extern constructor u8_as_i32 u8_as_i32) - -;; ISA Extension helpers - -(decl pure has_m () bool) -(extern constructor has_m has_m) - -(decl pure has_v () bool) -(extern constructor has_v has_v) - -(decl pure has_zfa () bool) -(extern constructor has_zfa has_zfa) - -(decl pure has_zfh () bool) -(extern constructor has_zfh has_zfh) - -(decl pure has_zbkb () bool) -(extern constructor has_zbkb has_zbkb) - -(decl pure has_zba () bool) -(extern constructor has_zba has_zba) - -(decl pure has_zbb () bool) -(extern constructor has_zbb has_zbb) - -(decl pure has_zbc () bool) -(extern constructor has_zbc has_zbc) - -(decl pure has_zbs () bool) -(extern constructor has_zbs has_zbs) - -(decl pure has_zicond () bool) -(extern constructor has_zicond has_zicond) - - -;;;; Type Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Helper that matches any supported type. This extractor checks the ISA flags -;; to determine if the type is supported. -(decl ty_supported (Type) Type) -(extern extractor ty_supported ty_supported) - -;; Helper that matches any scalar floating point type -(decl ty_supported_float (Type) Type) -(extern extractor ty_supported_float ty_supported_float) - -;; Helper that matches any supported vector type -(decl ty_supported_vec (Type) Type) -(extern extractor ty_supported_vec ty_supported_vec) - - -;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; RV32I Base Integer Instruction Set - -;; Helper for emitting the `add` instruction. -;; rd ← rs1 + rs2 -(decl rv_add (XReg XReg) XReg) -(rule (rv_add rs1 rs2) - (alu_rrr (AluOPRRR.Add) rs1 rs2)) - -;; Helper for emitting the `addi` ("Add Immediate") instruction. -;; rd ← rs1 + sext(imm) -(decl rv_addi (XReg Imm12) XReg) -(rule (rv_addi rs1 imm) - (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm)) - -;; Helper for emitting the `sub` instruction. -;; rd ← rs1 - rs2 -(decl rv_sub (XReg XReg) XReg) -(rule (rv_sub rs1 rs2) - (alu_rrr (AluOPRRR.Sub) rs1 rs2)) - -;; Helper for emitting the `neg` instruction. -;; This instruction is a mnemonic for `sub rd, zero, rs1`. -(decl rv_neg (XReg) XReg) -(rule (rv_neg rs1) - (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1)) - -;; Helper for emitting the `sll` ("Shift Left Logical") instruction. -;; rd ← rs1 << rs2 -(decl rv_sll (XReg XReg) XReg) -(rule (rv_sll rs1 rs2) - (alu_rrr (AluOPRRR.Sll) rs1 rs2)) - -;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction. -;; rd ← rs1 << uext(imm) -(decl rv_slli (XReg Imm12) XReg) -(rule (rv_slli rs1 imm) - (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm)) - -;; Helper for emitting the `srl` ("Shift Right Logical") instruction. -;; rd ← rs1 >> rs2 -(decl rv_srl (XReg XReg) XReg) -(rule (rv_srl rs1 rs2) - (alu_rrr (AluOPRRR.Srl) rs1 rs2)) - -;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction. -;; rd ← rs1 >> uext(imm) -(decl rv_srli (XReg Imm12) XReg) -(rule (rv_srli rs1 imm) - (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm)) - -;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction. -;; rd ← rs1 >> rs2 -(decl rv_sra (XReg XReg) XReg) -(rule (rv_sra rs1 rs2) - (alu_rrr (AluOPRRR.Sra) rs1 rs2)) - -;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction. -;; rd ← rs1 >> uext(imm) -(decl rv_srai (XReg Imm12) XReg) -(rule (rv_srai rs1 imm) - (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm)) - -;; Helper for emitting the `or` instruction. -;; rd ← rs1 ∨ rs2 -(decl rv_or (XReg XReg) XReg) -(rule (rv_or rs1 rs2) - (alu_rrr (AluOPRRR.Or) rs1 rs2)) - -;; Helper for emitting the `ori` ("Or Immediate") instruction. -;; rd ← rs1 ∨ uext(imm) -(decl rv_ori (XReg Imm12) XReg) -(rule (rv_ori rs1 imm) - (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm)) - -;; Helper for emitting the `xor` instruction. -;; rd ← rs1 ⊕ rs2 -(decl rv_xor (XReg XReg) XReg) -(rule (rv_xor rs1 rs2) - (alu_rrr (AluOPRRR.Xor) rs1 rs2)) - -;; Helper for emitting the `xori` ("Exclusive Or Immediate") instruction. -;; rd ← rs1 ⊕ uext(imm) -(decl rv_xori (XReg Imm12) XReg) -(rule (rv_xori rs1 imm) - (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm)) - -;; Helper for emitting the `not` instruction. -;; This instruction is a mnemonic for `xori rd, rs1, -1`. -(decl rv_not (XReg) XReg) -(rule (rv_not rs1) - (rv_xori rs1 (imm12_const -1))) - -;; Helper for emitting the `and` instruction. -;; rd ← rs1 ∧ rs2 -(decl rv_and (XReg XReg) XReg) -(rule (rv_and rs1 rs2) - (alu_rrr (AluOPRRR.And) rs1 rs2)) - -;; Helper for emitting the `andi` ("And Immediate") instruction. -;; rd ← rs1 ∧ uext(imm) -(decl rv_andi (XReg Imm12) XReg) -(rule (rv_andi rs1 imm) - (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm)) - -;; Helper for emitting the `slt` ("Set Less Than") instruction. -;; rd ← rs1 < rs2 -(decl rv_slt (XReg XReg) XReg) -(rule (rv_slt rs1 rs2) - (alu_rrr (AluOPRRR.Slt) rs1 rs2)) - -;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction. -;; rd ← rs1 < rs2 -(decl rv_sltu (XReg XReg) XReg) -(rule (rv_sltu rs1 rs2) - (alu_rrr (AluOPRRR.SltU) rs1 rs2)) - -;; Helper for emitting the `snez` instruction. -;; This instruction is a mnemonic for `sltu rd, zero, rs`. -(decl rv_snez (XReg) XReg) -(rule (rv_snez rs1) - (rv_sltu (zero_reg) rs1)) - -;; Helper for emitting the `slti` ("Set Less Than Immediate") instruction. -;; rd ← rs1 < imm -(decl rv_slti (XReg Imm12) XReg) -(rule (rv_slti rs1 imm) - (alu_rr_imm12 (AluOPRRI.Slti) rs1 imm)) - -;; Helper for emitting the `sltiu` ("Set Less Than Immediate Unsigned") instruction. -;; rd ← rs1 < imm -(decl rv_sltiu (XReg Imm12) XReg) -(rule (rv_sltiu rs1 imm) - (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm)) - -;; Helper for emitting the `seqz` instruction. -;; This instruction is a mnemonic for `sltiu rd, rs, 1`. -(decl rv_seqz (XReg) XReg) -(rule (rv_seqz rs1) - (rv_sltiu rs1 (imm12_const 1))) - - -;; RV64I Base Integer Instruction Set -;; Unlike RV32I instructions these are only present in the 64bit ISA - -;; Helper for emitting the `addw` ("Add Word") instruction. -;; rd ← sext32(rs1) + sext32(rs2) -(decl rv_addw (XReg XReg) XReg) -(rule (rv_addw rs1 rs2) - (alu_rrr (AluOPRRR.Addw) rs1 rs2)) - -;; Helper for emitting the `addiw` ("Add Word Immediate") instruction. -;; rd ← sext32(rs1) + imm -(decl rv_addiw (XReg Imm12) XReg) -(rule (rv_addiw rs1 imm) - (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm)) - -;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction. -;; This instruction is a mnemonic for `addiw rd, rs, zero`. -(decl rv_sextw (XReg) XReg) -(rule (rv_sextw rs1) - (rv_addiw rs1 (imm12_const 0))) - -;; Helper for emitting the `subw` ("Subtract Word") instruction. -;; rd ← sext32(rs1) - sext32(rs2) -(decl rv_subw (XReg XReg) XReg) -(rule (rv_subw rs1 rs2) - (alu_rrr (AluOPRRR.Subw) rs1 rs2)) - -;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction. -;; rd ← sext32(uext32(rs1) << rs2) -(decl rv_sllw (XReg XReg) XReg) -(rule (rv_sllw rs1 rs2) - (alu_rrr (AluOPRRR.Sllw) rs1 rs2)) - -;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction. -;; rd ← sext32(uext32(rs1) << imm) -(decl rv_slliw (XReg Imm12) XReg) -(rule (rv_slliw rs1 imm) - (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm)) - -;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction. -;; rd ← sext32(uext32(rs1) >> rs2) -(decl rv_srlw (XReg XReg) XReg) -(rule (rv_srlw rs1 rs2) - (alu_rrr (AluOPRRR.Srlw) rs1 rs2)) - -;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction. -;; rd ← sext32(uext32(rs1) >> imm) -(decl rv_srliw (XReg Imm12) XReg) -(rule (rv_srliw rs1 imm) - (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm)) - -;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction. -;; rd ← sext32(rs1 >> rs2) -(decl rv_sraw (XReg XReg) XReg) -(rule (rv_sraw rs1 rs2) - (alu_rrr (AluOPRRR.Sraw) rs1 rs2)) - -;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction. -;; rd ← sext32(rs1 >> imm) -(decl rv_sraiw (XReg Imm12) XReg) -(rule (rv_sraiw rs1 imm) - (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm)) - - -;; RV32M Extension -;; TODO: Enable these instructions only when we have the M extension - -;; Helper for emitting the `mul` instruction. -;; rd ← rs1 × rs2 -(decl rv_mul (XReg XReg) XReg) -(rule (rv_mul rs1 rs2) - (alu_rrr (AluOPRRR.Mul) rs1 rs2)) - -;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction. -;; rd ← (sext(rs1) × sext(rs2)) » xlen -(decl rv_mulh (XReg XReg) XReg) -(rule (rv_mulh rs1 rs2) - (alu_rrr (AluOPRRR.Mulh) rs1 rs2)) - -;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction. -;; rd ← (uext(rs1) × uext(rs2)) » xlen -(decl rv_mulhu (XReg XReg) XReg) -(rule (rv_mulhu rs1 rs2) - (alu_rrr (AluOPRRR.Mulhu) rs1 rs2)) - -;; Helper for emitting the `div` instruction. -;; rd ← rs1 ÷ rs2 -(decl rv_div (XReg XReg) XReg) -(rule (rv_div rs1 rs2) - (alu_rrr (AluOPRRR.Div) rs1 rs2)) - -;; Helper for emitting the `divu` ("Divide Unsigned") instruction. -;; rd ← rs1 ÷ rs2 -(decl rv_divu (XReg XReg) XReg) -(rule (rv_divu rs1 rs2) - (alu_rrr (AluOPRRR.DivU) rs1 rs2)) - -;; Helper for emitting the `rem` instruction. -;; rd ← rs1 mod rs2 -(decl rv_rem (XReg XReg) XReg) -(rule (rv_rem rs1 rs2) - (alu_rrr (AluOPRRR.Rem) rs1 rs2)) - -;; Helper for emitting the `remu` ("Remainder Unsigned") instruction. -;; rd ← rs1 mod rs2 -(decl rv_remu (XReg XReg) XReg) -(rule (rv_remu rs1 rs2) - (alu_rrr (AluOPRRR.RemU) rs1 rs2)) - -;; RV64M Extension -;; TODO: Enable these instructions only when we have the M extension - -;; Helper for emitting the `mulw` ("Multiply Word") instruction. -;; rd ← uext32(rs1) × uext32(rs2) -(decl rv_mulw (XReg XReg) XReg) -(rule (rv_mulw rs1 rs2) - (alu_rrr (AluOPRRR.Mulw) rs1 rs2)) - -;; Helper for emitting the `divw` ("Divide Word") instruction. -;; rd ← sext32(rs1) ÷ sext32(rs2) -(decl rv_divw (XReg XReg) XReg) -(rule (rv_divw rs1 rs2) - (alu_rrr (AluOPRRR.Divw) rs1 rs2)) - -;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction. -;; rd ← uext32(rs1) ÷ uext32(rs2) -(decl rv_divuw (XReg XReg) XReg) -(rule (rv_divuw rs1 rs2) - (alu_rrr (AluOPRRR.Divuw) rs1 rs2)) - -;; Helper for emitting the `remw` ("Remainder Word") instruction. -;; rd ← sext32(rs1) mod sext32(rs2) -(decl rv_remw (XReg XReg) XReg) -(rule (rv_remw rs1 rs2) - (alu_rrr (AluOPRRR.Remw) rs1 rs2)) - -;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction. -;; rd ← uext32(rs1) mod uext32(rs2) -(decl rv_remuw (XReg XReg) XReg) -(rule (rv_remuw rs1 rs2) - (alu_rrr (AluOPRRR.Remuw) rs1 rs2)) - - -;; F and D Extensions -;; TODO: Enable these instructions only when we have the F or D extensions - -;; Helper for emitting the `fadd` instruction. -(decl rv_fadd (Type FRM FReg FReg) FReg) -(rule (rv_fadd ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fadd) ty frm rs1 rs2)) - -;; Helper for emitting the `fsub` instruction. -(decl rv_fsub (Type FRM FReg FReg) FReg) -(rule (rv_fsub ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fsub) ty frm rs1 rs2)) - -;; Helper for emitting the `fmul` instruction. -(decl rv_fmul (Type FRM FReg FReg) FReg) -(rule (rv_fmul ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fmul) ty frm rs1 rs2)) - -;; Helper for emitting the `fdiv` instruction. -(decl rv_fdiv (Type FRM FReg FReg) FReg) -(rule (rv_fdiv ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fdiv) ty frm rs1 rs2)) - -;; Helper for emitting the `fsqrt` instruction. -(decl rv_fsqrt (Type FRM FReg) FReg) -(rule (rv_fsqrt ty frm rs1) (fpu_rr (FpuOPRR.Fsqrt) ty frm rs1)) - -;; Helper for emitting the `fmadd` instruction. -(decl rv_fmadd (Type FRM FReg FReg FReg) FReg) -(rule (rv_fmadd ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fmadd) ty frm rs1 rs2 rs3)) - -;; Helper for emitting the `fmsub` instruction. -(decl rv_fmsub (Type FRM FReg FReg FReg) FReg) -(rule (rv_fmsub ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fmsub) ty frm rs1 rs2 rs3)) - -;; Helper for emitting the `fnmadd` instruction. -(decl rv_fnmadd (Type FRM FReg FReg FReg) FReg) -(rule (rv_fnmadd ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fnmadd) ty frm rs1 rs2 rs3)) - -;; Helper for emitting the `fnmsub` instruction. -(decl rv_fnmsub (Type FRM FReg FReg FReg) FReg) -(rule (rv_fnmsub ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fnmsub) ty frm rs1 rs2 rs3)) - -;; Helper for emitting the `fmv.x.h` instruction. -(decl rv_fmvxh (FReg) XReg) -(rule (rv_fmvxh r) (fpu_rr_int (FpuOPRR.FmvXFmt) $F16 (FRM.RNE) r)) - -;; Helper for emitting the `fmv.x.w` instruction. -(decl rv_fmvxw (FReg) XReg) -(rule (rv_fmvxw r) (fpu_rr_int (FpuOPRR.FmvXFmt) $F32 (FRM.RNE) r)) - -;; Helper for emitting the `fmv.x.d` instruction. -(decl rv_fmvxd (FReg) XReg) -(rule (rv_fmvxd r) (fpu_rr_int (FpuOPRR.FmvXFmt) $F64 (FRM.RNE) r)) - -;; Helper for emitting the `fmv.h.x` instruction. -(decl rv_fmvhx (XReg) FReg) -(rule (rv_fmvhx r) (fpu_rr (FpuOPRR.FmvFmtX) $F16 (FRM.RNE) r)) - -;; Helper for emitting the `fmv.w.x` instruction. -(decl rv_fmvwx (XReg) FReg) -(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvFmtX) $F32 (FRM.RNE) r)) - -;; Helper for emitting the `fmv.d.x` instruction. -(decl rv_fmvdx (XReg) FReg) -(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvFmtX) $F64 (FRM.RNE) r)) - -;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction. -(decl rv_fcvtds (FReg) FReg) -(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F64 (FRM.RNE) rs1)) - -;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction. -(decl rv_fcvtsd (FRM FReg) FReg) -(rule (rv_fcvtsd frm rs1) (fpu_rr (FpuOPRR.FcvtSD) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.s.w` instruction. -(decl rv_fcvtsw (FRM XReg) FReg) -(rule (rv_fcvtsw frm rs1) (fpu_rr (FpuOPRR.FcvtFmtW) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.s.wu` instruction. -(decl rv_fcvtswu (FRM XReg) FReg) -(rule (rv_fcvtswu frm rs1) (fpu_rr (FpuOPRR.FcvtFmtWu) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.d.w` instruction. -(decl rv_fcvtdw (XReg) FReg) -(rule (rv_fcvtdw rs1) (fpu_rr (FpuOPRR.FcvtFmtW) $F64 (FRM.RNE) rs1)) - -;; Helper for emitting the `fcvt.d.wu` instruction. -(decl rv_fcvtdwu (XReg) FReg) -(rule (rv_fcvtdwu rs1) (fpu_rr (FpuOPRR.FcvtFmtWu) $F64 (FRM.RNE) rs1)) - -;; Helper for emitting the `fcvt.s.l` instruction. -(decl rv_fcvtsl (FRM XReg) FReg) -(rule (rv_fcvtsl frm rs1) (fpu_rr (FpuOPRR.FcvtFmtL) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.s.lu` instruction. -(decl rv_fcvtslu (FRM XReg) FReg) -(rule (rv_fcvtslu frm rs1) (fpu_rr (FpuOPRR.FcvtFmtLu) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.d.l` instruction. -(decl rv_fcvtdl (FRM XReg) FReg) -(rule (rv_fcvtdl frm rs1) (fpu_rr (FpuOPRR.FcvtFmtL) $F64 frm rs1)) - -;; Helper for emitting the `fcvt.d.lu` instruction. -(decl rv_fcvtdlu (FRM XReg) FReg) -(rule (rv_fcvtdlu frm rs1) (fpu_rr (FpuOPRR.FcvtFmtLu) $F64 frm rs1)) - -;; Helper for emitting the `fcvt.w.s` instruction. -(decl rv_fcvtws (FRM FReg) XReg) -(rule (rv_fcvtws frm rs1) (fpu_rr_int (FpuOPRR.FcvtWFmt) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.l.s` instruction. -(decl rv_fcvtls (FRM FReg) XReg) -(rule (rv_fcvtls frm rs1) (fpu_rr_int (FpuOPRR.FcvtLFmt) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.wu.s` instruction. -(decl rv_fcvtwus (FRM FReg) XReg) -(rule (rv_fcvtwus frm rs1) (fpu_rr_int (FpuOPRR.FcvtWuFmt) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.lu.s` instruction. -(decl rv_fcvtlus (FRM FReg) XReg) -(rule (rv_fcvtlus frm rs1) (fpu_rr_int (FpuOPRR.FcvtLuFmt) $F32 frm rs1)) - -;; Helper for emitting the `fcvt.w.d` instruction. -(decl rv_fcvtwd (FRM FReg) XReg) -(rule (rv_fcvtwd frm rs1) (fpu_rr_int (FpuOPRR.FcvtWFmt) $F64 frm rs1)) - -;; Helper for emitting the `fcvt.l.d` instruction. -(decl rv_fcvtld (FRM FReg) XReg) -(rule (rv_fcvtld frm rs1) (fpu_rr_int (FpuOPRR.FcvtLFmt) $F64 frm rs1)) - -;; Helper for emitting the `fcvt.wu.d` instruction. -(decl rv_fcvtwud (FRM FReg) XReg) -(rule (rv_fcvtwud frm rs1) (fpu_rr_int (FpuOPRR.FcvtWuFmt) $F64 frm rs1)) - -;; Helper for emitting the `fcvt.lu.d` instruction. -(decl rv_fcvtlud (FRM FReg) XReg) -(rule (rv_fcvtlud frm rs1) (fpu_rr_int (FpuOPRR.FcvtLuFmt) $F64 frm rs1)) - -;; Helper for emitting the `fcvt.w.*` instructions. -(decl rv_fcvtw (Type FRM FReg) XReg) -(rule (rv_fcvtw $F32 frm rs1) (rv_fcvtws frm rs1)) -(rule (rv_fcvtw $F64 frm rs1) (rv_fcvtwd frm rs1)) - -;; Helper for emitting the `fcvt.l.*` instructions. -(decl rv_fcvtl (Type FRM FReg) XReg) -(rule (rv_fcvtl $F32 frm rs1) (rv_fcvtls frm rs1)) -(rule (rv_fcvtl $F64 frm rs1) (rv_fcvtld frm rs1)) - -;; Helper for emitting the `fcvt.wu.*` instructions. -(decl rv_fcvtwu (Type FRM FReg) XReg) -(rule (rv_fcvtwu $F32 frm rs1) (rv_fcvtwus frm rs1)) -(rule (rv_fcvtwu $F64 frm rs1) (rv_fcvtwud frm rs1)) - -;; Helper for emitting the `fcvt.lu.*` instructions. -(decl rv_fcvtlu (Type FRM FReg) XReg) -(rule (rv_fcvtlu $F32 frm rs1) (rv_fcvtlus frm rs1)) -(rule (rv_fcvtlu $F64 frm rs1) (rv_fcvtlud frm rs1)) - -;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction. -;; The output of this instruction is `rs1` with the sign bit from `rs2` -;; This implements the `copysign` operation -(decl rv_fsgnj (Type FReg FReg) FReg) -(rule (rv_fsgnj ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fsgnj) ty (FRM.RNE) rs1 rs2)) - -;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction. -;; The output of this instruction is `rs1` with the negated sign bit from `rs2` -;; When `rs1 == rs2` this implements the `neg` operation -(decl rv_fsgnjn (Type FReg FReg) FReg) -(rule (rv_fsgnjn ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fsgnjn) ty (FRM.RTZ) rs1 rs2)) - -;; Helper for emitting the `fneg` ("Floating Point Negate") instruction. -;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1` -(decl rv_fneg (Type FReg) FReg) -(rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1)) - -;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction. -;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`. -;; When `rs1 == rs2` this implements `fabs` -(decl rv_fsgnjx (Type FReg FReg) FReg) -(rule (rv_fsgnjx ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fsgnjx) ty (FRM.RDN) rs1 rs2)) - -;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction. -;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1` -(decl rv_fabs (Type FReg) FReg) -(rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1)) - -;; Helper for emitting the `feq` ("Float Equal") instruction. -(decl rv_feq (Type FReg FReg) XReg) -(rule (rv_feq ty rs1 rs2) (fpu_rrr_int (FpuOPRRR.Feq) ty (FRM.RDN) rs1 rs2)) - -;; Helper for emitting the `flt` ("Float Less Than") instruction. -(decl rv_flt (Type FReg FReg) XReg) -(rule (rv_flt ty rs1 rs2) (fpu_rrr_int (FpuOPRRR.Flt) ty (FRM.RTZ) rs1 rs2)) - -;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction. -(decl rv_fle (Type FReg FReg) XReg) -(rule (rv_fle ty rs1 rs2) (fpu_rrr_int (FpuOPRRR.Fle) ty (FRM.RNE) rs1 rs2)) - -;; Helper for emitting the `fgt` ("Float Greater Than") instruction. -;; Note: The arguments are reversed -(decl rv_fgt (Type FReg FReg) XReg) -(rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1)) - -;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction. -;; Note: The arguments are reversed -(decl rv_fge (Type FReg FReg) XReg) -(rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1)) - -;; Helper for emitting the `fmin` instruction. -(decl rv_fmin (Type FReg FReg) FReg) -(rule (rv_fmin ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fmin) ty (FRM.RNE) rs1 rs2)) - -;; Helper for emitting the `fmax` instruction. -(decl rv_fmax (Type FReg FReg) FReg) -(rule (rv_fmax ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fmax) ty (FRM.RTZ) rs1 rs2)) - -;; `Zfa` Extension Instructions - -;; Helper for emitting the `fminm` instruction. -(decl rv_fminm (Type FReg FReg) FReg) -(rule (rv_fminm ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fminm) ty (FRM.RDN) rs1 rs2)) - -;; Helper for emitting the `fmaxm` instruction. -(decl rv_fmaxm (Type FReg FReg) FReg) -(rule (rv_fmaxm ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fmaxm) ty (FRM.RUP) rs1 rs2)) - -;; Helper for emitting the `fround` instruction. -(decl rv_fround (Type FRM FReg) FReg) -(rule (rv_fround ty frm rs) (fpu_rr (FpuOPRR.Fround) ty frm rs)) - -;; Helper for emitting the `fli` instruction. -(decl rv_fli (Type FliConstant) FReg) -(rule (rv_fli ty imm) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.Fli ty - imm - dst)))) - dst)) - -;; `Zba` Extension Instructions - -;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction. -;; rd ← uext32(rs1) + uext32(rs2) -(decl rv_adduw (XReg XReg) XReg) -(rule (rv_adduw rs1 rs2) - (alu_rrr (AluOPRRR.Adduw) rs1 rs2)) - -;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction. -;; This instruction is a mnemonic for `adduw rd, rs1, zero`. -;; rd ← uext32(rs1) -(decl rv_zextw (XReg) XReg) -(rule (rv_zextw rs1) - (rv_adduw rs1 (zero_reg))) - -;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction. -;; rd ← uext32(rs1) << imm -(decl rv_slliuw (XReg Imm12) XReg) -(rule (rv_slliuw rs1 imm) - (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm)) - - -;; `Zbb` Extension Instructions - -;; Helper for emitting the `andn` ("And Negated") instruction. -;; rd ← rs1 ∧ ~(rs2) -(decl rv_andn (XReg XReg) XReg) -(rule (rv_andn rs1 rs2) - (if-let $true (has_zbb)) - (alu_rrr (AluOPRRR.Andn) rs1 rs2)) -(rule (rv_andn rs1 rs2) - (if-let $false (has_zbb)) - (rv_and rs1 (rv_not rs2))) - -;; Helper for emitting the `orn` ("Or Negated") instruction. -;; rd ← rs1 ∨ ~(rs2) -(decl rv_orn (XReg XReg) XReg) -(rule (rv_orn rs1 rs2) - (alu_rrr (AluOPRRR.Orn) rs1 rs2)) - -;; Helper for emitting the `xnor` ("Exclusive NOR") instruction. -;; rd ← ~(rs1 ^ rs2) -(decl rv_xnor (XReg XReg) XReg) -(rule (rv_xnor rs1 rs2) - (alu_rrr (AluOPRRR.Xnor) rs1 rs2)) - -;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction. -(decl rv_clz (XReg) XReg) -(rule (rv_clz rs1) - (alu_rr_funct12 (AluOPRRI.Clz) rs1)) - -;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction. -(decl rv_clzw (XReg) XReg) -(rule (rv_clzw rs1) - (alu_rr_funct12 (AluOPRRI.Clzw) rs1)) - -;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction. -(decl rv_ctz (XReg) XReg) -(rule (rv_ctz rs1) - (alu_rr_funct12 (AluOPRRI.Ctz) rs1)) - -;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction. -(decl rv_ctzw (XReg) XReg) -(rule (rv_ctzw rs1) - (alu_rr_funct12 (AluOPRRI.Ctzw) rs1)) - -;; Helper for emitting the `cpop` ("Count Population") instruction. -(decl rv_cpop (XReg) XReg) -(rule (rv_cpop rs1) - (alu_rr_funct12 (AluOPRRI.Cpop) rs1)) - -;; Helper for emitting the `cpopw` ("Count Population") instruction. -(decl rv_cpopw (XReg) XReg) -(rule (rv_cpopw rs1) - (alu_rr_funct12 (AluOPRRI.Cpopw) rs1)) - -;; Helper for emitting the `max` instruction. -(decl rv_max (XReg XReg) XReg) -(rule (rv_max rs1 rs2) - (alu_rrr (AluOPRRR.Max) rs1 rs2)) - -;; Helper for emitting the `maxu` instruction. -(decl rv_maxu (XReg XReg) XReg) -(rule (rv_maxu rs1 rs2) - (alu_rrr (AluOPRRR.Maxu) rs1 rs2)) - -;; Helper for emitting the `min` instruction. -(decl rv_min (XReg XReg) XReg) -(rule (rv_min rs1 rs2) - (alu_rrr (AluOPRRR.Min) rs1 rs2)) - -;; Helper for emitting the `minu` instruction. -(decl rv_minu (XReg XReg) XReg) -(rule (rv_minu rs1 rs2) - (alu_rrr (AluOPRRR.Minu) rs1 rs2)) - -;; Helper for emitting the `sext.b` instruction. -(decl rv_sextb (XReg) XReg) -(rule (rv_sextb rs1) - (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0))) - -;; Helper for emitting the `sext.h` instruction. -(decl rv_sexth (XReg) XReg) -(rule (rv_sexth rs1) - (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0))) - -;; Helper for emitting the `zext.h` instruction. -(decl rv_zexth (XReg) XReg) -(rule (rv_zexth rs1) - (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0))) - -;; Helper for emitting the `rol` ("Rotate Left") instruction. -(decl rv_rol (XReg XReg) XReg) -(rule (rv_rol rs1 rs2) - (alu_rrr (AluOPRRR.Rol) rs1 rs2)) - -;; Helper for emitting the `rolw` ("Rotate Left Word") instruction. -(decl rv_rolw (XReg XReg) XReg) -(rule (rv_rolw rs1 rs2) - (alu_rrr (AluOPRRR.Rolw) rs1 rs2)) - -;; Helper for emitting the `ror` ("Rotate Right") instruction. -(decl rv_ror (XReg XReg) XReg) -(rule (rv_ror rs1 rs2) - (alu_rrr (AluOPRRR.Ror) rs1 rs2)) - -;; Helper for emitting the `rorw` ("Rotate Right Word") instruction. -(decl rv_rorw (XReg XReg) XReg) -(rule (rv_rorw rs1 rs2) - (alu_rrr (AluOPRRR.Rorw) rs1 rs2)) - -;; Helper for emitting the `rori` ("Rotate Right") instruction. -(decl rv_rori (XReg Imm12) XReg) -(rule (rv_rori rs1 rs2) - (alu_rr_imm12 (AluOPRRI.Rori) rs1 rs2)) - -;; Helper for emitting the `roriw` ("Rotate Right Word") instruction. -(decl rv_roriw (XReg Imm12) XReg) -(rule (rv_roriw rs1 rs2) - (alu_rr_imm12 (AluOPRRI.Roriw) rs1 rs2)) - -;; Helper for emitting the `rev8` ("Byte Reverse") instruction. -(decl rv_rev8 (XReg) XReg) -(rule (rv_rev8 rs1) - (alu_rr_funct12 (AluOPRRI.Rev8) rs1)) - -;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction. -;; TODO: This instruction is mentioned in some older versions of the -;; spec, but has since disappeared, we should follow up on this. -;; It probably was renamed to `rev.b` which seems to be the closest match. -(decl rv_brev8 (XReg) XReg) -(rule (rv_brev8 rs1) - (alu_rr_funct12 (AluOPRRI.Brev8) rs1)) - -;; `Zbs` Extension Instructions - -(decl rv_bclr (XReg XReg) XReg) -(rule (rv_bclr rs1 rs2) - (alu_rrr (AluOPRRR.Bclr) rs1 rs2)) - -(decl rv_bclri (XReg Imm12) XReg) -(rule (rv_bclri rs1 imm) - (alu_rr_imm12 (AluOPRRI.Bclri) rs1 imm)) - -(decl rv_bext (XReg XReg) XReg) -(rule (rv_bext rs1 rs2) - (alu_rrr (AluOPRRR.Bext) rs1 rs2)) - -(decl rv_bexti (XReg Imm12) XReg) -(rule (rv_bexti rs1 imm) - (alu_rr_imm12 (AluOPRRI.Bexti) rs1 imm)) - -(decl rv_binv (XReg XReg) XReg) -(rule (rv_binv rs1 rs2) - (alu_rrr (AluOPRRR.Binv) rs1 rs2)) - -(decl rv_binvi (XReg Imm12) XReg) -(rule (rv_binvi rs1 imm) - (alu_rr_imm12 (AluOPRRI.Binvi) rs1 imm)) - -(decl rv_bset (XReg XReg) XReg) -(rule (rv_bset rs1 rs2) - (alu_rrr (AluOPRRR.Bset) rs1 rs2)) - -;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction. -(decl rv_bseti (XReg Imm12) XReg) -(rule (rv_bseti rs1 imm) - (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm)) - -;; `Zbkb` Extension Instructions - -;; Helper for emitting the `pack` ("Pack low halves of registers") instruction. -(decl rv_pack (XReg XReg) XReg) -(rule (rv_pack rs1 rs2) - (alu_rrr (AluOPRRR.Pack) rs1 rs2)) - -;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction. -(decl rv_packw (XReg XReg) XReg) -(rule (rv_packw rs1 rs2) - (alu_rrr (AluOPRRR.Packw) rs1 rs2)) - -;; `ZiCond` Extension Instructions - -;; Helper for emitting the `czero.eqz` ("Conditional zero, if condition is equal to zero") instruction. -;; RS1 is the data source -;; RS2 is the condition -;; -;; rd = (rs2 == 0) ? 0 : rs1 -(decl rv_czero_eqz (XReg XReg) XReg) -(rule (rv_czero_eqz rs1 rs2) - (alu_rrr (AluOPRRR.CzeroEqz) rs1 rs2)) - -;; Helper for emitting the `czero.nez` ("Conditional zero, if condition is nonzero") instruction. -;; RS1 is the data source -;; RS2 is the condition -;; -;; rd = (rs2 != 0) ? 0 : rs1 -(decl rv_czero_nez (XReg XReg) XReg) -(rule (rv_czero_nez rs1 rs2) - (alu_rrr (AluOPRRR.CzeroNez) rs1 rs2)) - - -;; `Zicsr` Extension Instructions - -;; Helper for emitting the `csrrwi` instruction. -(decl rv_csrrwi (CSR UImm5) XReg) -(rule (rv_csrrwi csr imm) - (csr_imm (CsrImmOP.CsrRWI) csr imm)) - -;; This is a special case of `csrrwi` when the CSR is the `frm` CSR. -(decl rv_fsrmi (FRM) XReg) -(rule (rv_fsrmi frm) (rv_csrrwi (CSR.Frm) frm)) - - -;; Helper for emitting the `csrw` instruction. This is a special case of -;; `csrrw` where the destination register is always `x0`. -(decl rv_csrw (CSR XReg) Unit) -(rule (rv_csrw csr rs) - (csr_reg_dst_zero (CsrRegOP.CsrRW) csr rs)) - -;; This is a special case of `csrw` when the CSR is the `frm` CSR. -(decl rv_fsrm (XReg) Unit) -(rule (rv_fsrm rs) (rv_csrw (CSR.Frm) rs)) - - - - - - -;; Helper for generating a FliConstant from a u64 constant -(decl pure partial fli_constant_from_u64 (Type u64) FliConstant) -(extern constructor fli_constant_from_u64 fli_constant_from_u64) - -;; Helper for generating a FliConstant from a u64 negated constant -(decl pure partial fli_constant_from_negated_u64 (Type u64) FliConstant) -(extern constructor fli_constant_from_negated_u64 fli_constant_from_negated_u64) - -;; Helper for generating a i64 from a pair of Imm20 and Imm12 constants -(decl i64_generate_imm (Imm20 Imm12) i64) -(extern extractor i64_generate_imm i64_generate_imm) - -;; Helper for generating a i64 from a shift of a Imm20 constant with LUI -(decl i64_shift_for_lui (u64 Imm12) i64) -(extern extractor i64_shift_for_lui i64_shift_for_lui) - -;; Helper for generating a i64 from a shift of a Imm20 constant -(decl i64_shift (i64 Imm12) i64) -(extern extractor i64_shift i64_shift) - -;; Immediate Loading rules -;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it. -;; TODO: Load floats using `fld` instead of `ld` -(decl imm (Type u64) Reg) - -;; Special-case 0.0 for floats to use the `(zero_reg)` directly. -;; See #7162 for why this doesn't fall out of the rules below. -(rule 9 (imm (ty_supported_float $F16) 0) (gen_bitcast (zero_reg) $I16 $F16)) -(rule 9 (imm (ty_supported_float $F32) 0) (gen_bitcast (zero_reg) $I32 $F32)) -(rule 9 (imm (ty_supported_float $F64) 0) (gen_bitcast (zero_reg) $I64 $F64)) - -;; If Zfa is enabled, we can load certain constants with the `fli` instruction. -(rule 8 (imm (ty_supported_float (ty_32_or_64 ty)) imm) - (if-let $true (has_zfa)) - (if-let const (fli_constant_from_u64 ty imm)) - (rv_fli ty const)) - -;; It is beneficial to load the negated constant with `fli` and then negate it -;; in a register. -;; -;; For f64's this saves one instruction, and for f32's it avoids -;; having to allocate an integer register, reducing integer register pressure. -(rule 7 (imm (ty_supported_float (ty_32_or_64 ty)) imm) - (if-let $true (has_zfa)) - (if-let const (fli_constant_from_negated_u64 ty imm)) - (rv_fneg ty (rv_fli ty const))) - -;; Otherwise floats get loaded as integers and then moved into an F register. -(rule 6 (imm (ty_supported_float $F16) c) (gen_bitcast (imm $I16 c) $I16 $F16)) -(rule 6 (imm (ty_supported_float $F32) c) (gen_bitcast (imm $I32 c) $I32 $F32)) -(rule 6 (imm (ty_supported_float $F64) c) (gen_bitcast (imm $I64 c) $I64 $F64)) - -;; Try to match just an imm12 -(rule 4 (imm (ty_int ty) c) - (if-let (i64_generate_imm (imm20_is_zero) imm12) (i64_sextend_u64 ty c)) - (rv_addi (zero_reg) imm12)) - -;; We can also try to load using a single LUI. -;; LUI takes a 20 bit immediate, places it on bits 13 to 32 of the register. -;; In RV64 this value is then sign extended to 64bits. -(rule 3 (imm (ty_int ty) c) - (if-let (i64_generate_imm imm20 (imm12_is_zero)) (i64_sextend_u64 ty c)) - (rv_lui imm20)) - -;; We can combo addi + lui to represent all 32-bit immediates -;; And some 64-bit immediates as well. -(rule 2 (imm (ty_int ty) c) - (if-let (i64_generate_imm imm20 imm12) (i64_sextend_u64 ty c)) - (rv_addi (rv_lui imm20) imm12)) - -;; If the non-zero bits of the immediate fit in 20 bits, we can use LUI + shift -(rule 1 (imm (ty_int ty) c) - (if-let (i64_shift_for_lui (imm20_from_u64 base) shift) (i64_sextend_u64 ty c)) - (rv_slli (rv_lui base) shift)) - -;; Combine one of the above rules with a shift-left if possible, This chops off -;; all trailing zeros from the input constant and then attempts if the resulting -;; constant can itself use one of the above rules via the `i64_generate_imm` -;; matcher. This will then recurse on the above rules to materialize a smaller -;; constant which is then shifted left to create the desired constant. -(rule 0 (imm (ty_int ty) c) - (if-let (i64_shift c_shifted shift) (i64_sextend_u64 ty c)) ;; constant to make - (if-let (i64_generate_imm _ _) c_shifted) ;; can the smaller constant be made? - (rv_slli (imm ty (i64_as_u64 c_shifted)) shift)) - -;; Otherwise we fall back to loading the immediate from the constant pool. -(rule -1 (imm (ty_int ty) c) - (gen_load - (gen_const_amode (emit_u64_le_const c)) - (LoadOP.Ld) - (mem_flags_trusted))) - -;; Imm12 Rules - -(decl pure imm12_zero () Imm12) -(rule (imm12_zero) (imm12_const 0)) - -(decl pure imm12_const (i32) Imm12) -(extern constructor imm12_const imm12_const) - -(decl load_imm12 (i32) Reg) -(rule - (load_imm12 x) - (rv_addi (zero_reg) (imm12_const x))) - -;; for load immediate -(decl imm_from_bits (u64) Imm12) -(extern constructor imm_from_bits imm_from_bits) - -(decl imm_from_neg_bits (i64) Imm12) -(extern constructor imm_from_neg_bits imm_from_neg_bits) - -(decl imm12_const_add (i32 i32) Imm12) -(extern constructor imm12_const_add imm12_const_add) - -;; Performs a fallible add of the `Imm12` value and the 32-bit value provided. -(decl pure partial imm12_add (Imm12 i32) Imm12) -(extern constructor imm12_add imm12_add) - -(decl imm12_and (Imm12 u64) Imm12) -(extern constructor imm12_and imm12_and) - -;; Imm12 Extractors - -;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`. -(decl imm12_from_value (Imm12) Value) -(extractor (imm12_from_value n) (i64_from_iconst (imm12_from_i64 n))) - -;; Conceptually the same as `imm12_from_value`, but tries negating the constant -;; value (first sign-extending to handle narrow widths). -(decl pure partial imm12_from_negated_value (Value) Imm12) -(rule - (imm12_from_negated_value (has_type ty (iconst n))) - (if-let (imm12_from_u64 imm) (i64_as_u64 (i64_neg (i64_sextend_imm64 ty n)))) - imm) - -(decl imm12_from_u64 (Imm12) u64) -(extern extractor imm12_from_u64 imm12_from_u64) - -(decl imm12_from_i64 (Imm12) i64) -(extern extractor imm12_from_i64 imm12_from_i64) - -(decl pure partial u64_to_imm12 (u64) Imm12) -(rule (u64_to_imm12 (imm12_from_u64 n)) n) - -(decl pure imm12_is_zero () Imm12) -(extern extractor imm12_is_zero imm12_is_zero) - -;; Imm20 - -;; Extractor that matches if a Imm20 is zero -(decl pure imm20_is_zero () Imm20) -(extern extractor imm20_is_zero imm20_is_zero) - -(decl imm20_from_u64 (Imm20) u64) -(extern extractor imm20_from_u64 imm20_from_u64) - -(decl imm20_from_i64 (Imm20) i64) -(extern extractor imm20_from_i64 imm20_from_i64) - - -;; Imm5 Extractors - -(decl imm5_from_u64 (Imm5) u64) -(extern extractor imm5_from_u64 imm5_from_u64) - -(decl imm5_from_i64 (Imm5) i64) -(extern extractor imm5_from_i64 imm5_from_i64) - -;; Construct a Imm5 from an i8 -(decl pure partial i8_to_imm5 (i8) Imm5) -(extern constructor i8_to_imm5 i8_to_imm5) - -;; Helper to go directly from a `Value` to an `Imm5`. -(decl imm5_from_value (Imm5) Value) -(extractor (imm5_from_value n) (i64_from_iconst (imm5_from_i64 n))) - -;; Like imm5_from_value, but first negates the `Value`. -(decl pure partial imm5_from_negated_value (Value) Imm5) -(rule (imm5_from_negated_value (has_type ty (iconst n))) - (if-let (imm5_from_i64 imm) (i64_neg (i64_sextend_imm64 ty n))) - imm) - -;; Constructor that matches a `Value` equivalent to a replicated Imm5 on all lanes. -(decl pure partial replicated_imm5 (Value) Imm5) -(rule (replicated_imm5 (splat (imm5_from_value n))) n) -(rule (replicated_imm5 (vconst (u128_from_constant n128))) - (if-let (u128_replicated_u64 n64) n128) - (if-let (u64_replicated_u32 n32) n64) - (if-let (u32_replicated_u16 n16) n32) - (if-let (u16_replicated_u8 n8) n16) - (if-let n (i8_to_imm5 (u8_as_i8 n8))) - n) - -;; Like replicated_imm5, but first negates the `Value`. -(decl pure partial negated_replicated_imm5 (Value) Imm5) -(rule (negated_replicated_imm5 (splat n)) - (if-let imm5 (imm5_from_negated_value n)) - imm5) -(rule (negated_replicated_imm5 (vconst (u128_from_constant n128))) - (if-let (u128_replicated_u64 n64) n128) - (if-let (u64_replicated_u32 n32) n64) - (if-let (u32_replicated_u16 n16) n32) - (if-let (u16_replicated_u8 n8) n16) - (if-let n (i8_to_imm5 (i8_neg (u8_as_i8 n8)))) - n) - -;; UImm5 Helpers - -;; Constructor that matches a `Value` equivalent to a replicated UImm5 on all lanes. -(decl pure partial replicated_uimm5 (Value) UImm5) -(rule (replicated_uimm5 (splat (uimm5_from_value n))) n) -(rule 1 (replicated_uimm5 (vconst (u128_from_constant n128))) - (if-let (u128_replicated_u64 n64) n128) - (if-let (u64_replicated_u32 n32) n64) - (if-let (u32_replicated_u16 n16) n32) - (if-let (u16_replicated_u8 n8) n16) - (if-let (uimm5_from_u8 n) n8) - n) - -;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`. -(decl uimm5_from_value (UImm5) Value) -(extractor (uimm5_from_value n) - (iconst (u64_from_imm64 (uimm5_from_u64 n)))) - -;; Extract a `UImm5` from an `u8`. -(decl pure partial uimm5_from_u8 (UImm5) u8) -(extern extractor uimm5_from_u8 uimm5_from_u8) - -;; Extract a `UImm5` from an `u64`. -(decl pure partial uimm5_from_u64 (UImm5) u64) -(extern extractor uimm5_from_u64 uimm5_from_u64) - -;; Convert a `u64` into an `UImm5` -(decl pure partial u64_to_uimm5 (u64) UImm5) -(rule (u64_to_uimm5 (uimm5_from_u64 n)) n) - -(decl uimm5_bitcast_to_imm5 (UImm5) Imm5) -(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5) - -;; Float Helpers - -;; Returns the bitpattern of the Canonical NaN for the given type. -(decl pure canonical_nan_u64 (Type) u64) -(rule (canonical_nan_u64 $F32) 0x7fc00000) -(rule (canonical_nan_u64 $F64) 0x7ff8000000000000) - -;; Helper for emitting `MInst.FpuRR` instructions. -(decl fpu_rr (FpuOPRR Type FRM Reg) FReg) -(rule (fpu_rr op ty frm src) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.FpuRR op ty frm dst src)))) - dst)) - -;; Similar to fpu_rr but with an integer destination register -(decl fpu_rr_int (FpuOPRR Type FRM Reg) XReg) -(rule (fpu_rr_int op ty frm src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.FpuRR op ty frm dst src)))) - dst)) - -;; Helper for emitting `MInst.AluRRR` instructions. -(decl alu_rrr (AluOPRRR Reg Reg) Reg) -(rule (alu_rrr op src1 src2) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.AluRRR op dst src1 src2)))) - dst)) - -;; Helper for emitting `MInst.FpuRRR` instructions. -(decl fpu_rrr (FpuOPRRR Type FRM Reg Reg) FReg) -(rule (fpu_rrr op ty frm src1 src2) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.FpuRRR op ty frm dst src1 src2)))) - dst)) - -;; Similar to fpu_rrr but with an integer destination register -(decl fpu_rrr_int (FpuOPRRR Type FRM Reg Reg) XReg) -(rule (fpu_rrr_int op ty frm src1 src2) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.FpuRRR op ty frm dst src1 src2)))) - dst)) - -;; Helper for emitting `MInst.FpuRRRR` instructions. -(decl fpu_rrrr (FpuOPRRRR Type FRM Reg Reg Reg) FReg) -(rule (fpu_rrrr op ty frm src1 src2 src3) - (let ((dst WritableFReg (temp_writable_freg)) - (_ Unit (emit (MInst.FpuRRRR op ty frm dst src1 src2 src3)))) - dst)) - - -;; Helper for emitting `MInst.AluRRImm12` instructions. -(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg) -(rule (alu_rr_imm12 op src imm) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.AluRRImm12 op dst src imm)))) - dst)) - -;; some instruction use imm12 as funct12. -;; so we don't need the imm12 parameter. -(decl alu_rr_funct12 (AluOPRRI Reg) Reg) -(rule (alu_rr_funct12 op src) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero))))) - dst)) - -;; Helper for emitting the `Lui` instruction. -;; TODO: This should be something like `emit_u_type`. And should share the -;; `MInst` with `auipc` since these instructions share the U-Type format. -(decl rv_lui (Imm20) XReg) -(rule (rv_lui imm) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Lui dst imm)))) - dst)) - -;; Helper for emitting `MInst.CsrImm` instructions. -(decl csr_imm (CsrImmOP CSR UImm5) XReg) -(rule (csr_imm op csr imm) - (let ((dst WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.CsrImm op dst imm csr)))) - dst)) - -;; Helper for emitting a `MInst.CsrReg` instruction that writes the result to x0. -(decl csr_reg_dst_zero (CsrRegOP CSR XReg) Unit) -(rule (csr_reg_dst_zero op csr rs) - (emit (MInst.CsrReg op (writable_zero_reg) rs csr))) - - - -(decl select_addi (Type) AluOPRRI) -(rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw)) -(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi)) - - -(decl gen_andi (XReg u64) XReg) -(rule 1 (gen_andi x (imm12_from_u64 y)) - (rv_andi x y)) - -(rule 0 (gen_andi x y) - (rv_and x (imm $I64 y))) - - -(decl gen_or (Type ValueRegs ValueRegs) ValueRegs) -(rule 1 (gen_or $I128 x y) - (value_regs - (rv_or (value_regs_get x 0) (value_regs_get y 0)) - (rv_or (value_regs_get x 1) (value_regs_get y 1)))) - -(rule 0 (gen_or (fits_in_64 _) x y) - (rv_or (value_regs_get x 0) (value_regs_get y 0))) - - -(decl lower_ctz (Type Reg) Reg) -(rule (lower_ctz ty x) - (gen_cltz $false x ty)) - -(rule 1 (lower_ctz (fits_in_16 ty) x) - (if-let $true (has_zbb)) - (let ((tmp Reg (gen_bseti x (ty_bits ty)))) - (rv_ctzw tmp))) - -(rule 2 (lower_ctz $I32 x) - (if-let $true (has_zbb)) - (rv_ctzw x)) - -(rule 2 (lower_ctz $I64 x) - (if-let $true (has_zbb)) - (rv_ctz x)) - -;; Count leading zeros from a i128 bit value. -;; We count both halves separately and conditionally add them if it makes sense. - -(decl gen_cltz (bool XReg Type) XReg) -(rule (gen_cltz leading rs ty) - (let ((tmp WritableXReg (temp_writable_xreg)) - (step WritableXReg (temp_writable_xreg)) - (sum WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty)))) - sum)) - -;; Performs a zero extension of the given value -(decl zext (Value) XReg) - -;; In the most generic case, we shift left and then shift right. -(rule 0 (zext val @ (value_type (fits_in_32 ty))) - (let ((shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits ty))))) - (rv_srli (rv_slli val shift) shift))) - -;; If we are zero extending a U8 we can use a `andi` instruction. -(rule 1 (zext val @ (value_type $I8)) - (rv_andi val (imm12_const 0xff))) - -;; No point in trying to use `packh` here to zero extend 8 bit values -;; since we can just use `andi` instead which is part of the base ISA. - -;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values -(rule 1 (zext val @ (value_type $I16)) - (if-let $true (has_zbkb)) - (rv_packw val (zero_reg))) - -;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers -(rule 1 (zext val @ (value_type $I32)) - (if-let $true (has_zbkb)) - (rv_pack val (zero_reg))) - -;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction. -(rule 2 (zext val @ (value_type $I16)) - (if-let $true (has_zbb)) - (rv_zexth val)) - -;; With `zba` we have a `zext.w` instruction -(rule 2 (zext val @ (value_type $I32)) - (if-let $true (has_zba)) - (rv_zextw val)) - -;; Ignore sign extensions for values whose representation is already the full -;; register width. -(rule 3 (zext val) - (if (val_already_extended (ExtendOp.Zero) val)) - val) - -;; Performs a signed extension of the given value -(decl sext (Value) XReg) - -;; Same base case as `zext`, shift left-then-right. -(rule 0 (sext val @ (value_type (fits_in_32 ty))) - (let ((shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits ty))))) - (rv_srai (rv_slli val shift) shift))) - -;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction. -(rule 1 (sext val @ (value_type $I8)) - (if-let $true (has_zbb)) - (rv_sextb val)) - -;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction. -(rule 1 (sext val @ (value_type $I16)) - (if-let $true (has_zbb)) - (rv_sexth val)) - -;; When signed extending from 32 to 64 bits we can use a -;; `addiw val 0`. Also known as a `sext.w` -(rule 1 (sext val @ (value_type $I32)) - (rv_sextw val)) - -;; Ignore sign extensions for values whose representation is already the full -;; register width. -(rule 2 (sext val) - (if (val_already_extended (ExtendOp.Signed) val)) - val) - -;; Helper matcher for when a value's representation is already sign or zero -;; extended to the full 64-bit register representation. This is used by `zext` -;; and `sext` above to skip the extension instruction entirely in some -;; circumstances. -(decl pure partial val_already_extended (ExtendOp Value) bool) -(rule 0 (val_already_extended _ v @ (value_type $I64)) $true) - -;; When extending our backend always extends to the full register width, so -;; there's no need to extend-an-extend. -(rule 1 (val_already_extended (ExtendOp.Zero) (uextend _)) $true) -(rule 1 (val_already_extended (ExtendOp.Signed) (sextend _)) $true) - -;; The result of `icmp`/`fcmp` is zero or one, meaning that it's already sign -;; extended to the full register width. -(rule 1 (val_already_extended _ (icmp _ _ _)) $true) -(rule 1 (val_already_extended _ (fcmp _ _ _)) $true) - -;; The lowering for these operations always sign-extend their results due to the -;; use of the `*w` instructions in RV64I. Note that this requires that the -;; extension is from 32 to 64, 16/8-bit operations are explicitly excluded here. -;; There are no native instructions for the 16/8 bit operations so they must -;; fall through to actual sign extension above. -(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (ishl _ _))) $true) -(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (ushr _ _))) $true) -(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (sshr _ _))) $true) -(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (iadd _ _))) $true) -(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (isub _ _))) $true) - -(type ExtendOp - (enum - (Zero) - (Signed))) - -(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs) -(rule - (lower_b128_binary op a b) - (let - ( ;; low part. - (low XReg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0))) - ;; high part. - (high XReg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1)))) - (value_regs low high))) - -(decl lower_smlhi (Type XReg XReg) XReg) -(rule 1 - (lower_smlhi $I64 rs1 rs2) - (rv_mulh rs1 rs2)) - -(rule - (lower_smlhi ty rs1 rs2) - (let - ((tmp XReg (rv_mul rs1 rs2))) - (rv_srli tmp (imm12_const (ty_bits ty))))) - -;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function. -;;;; this will return shift amount and (ty_bits - "shift amount") -;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform. -(decl gen_shamt (Type XReg) ValueRegs) -(extern constructor gen_shamt gen_shamt) - -;; bseti: Set a single bit in a register, indexed by a constant. -(decl gen_bseti (Reg u64) Reg) -(rule (gen_bseti val bit) - (if-let $false (has_zbs)) - (if-let $false (u64_le bit 12)) - (let ((const XReg (imm $I64 (u64_shl 1 bit)))) - (rv_or val const))) - -(rule (gen_bseti val bit) - (if-let $false (has_zbs)) - (if-let $true (u64_le bit 12)) - (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit))))) - -(rule (gen_bseti val bit) - (if-let $true (has_zbs)) - (rv_bseti val (imm12_const (u64_as_i32 bit)))) - - -(decl gen_popcnt (XReg) Reg) -(rule (gen_popcnt rs) - (let - ((tmp WritableXReg (temp_writable_xreg)) - (step WritableXReg (temp_writable_xreg)) - (sum WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Popcnt sum step tmp rs $I64)))) - (writable_reg_to_reg sum))) - -;; Generates a AMode that points to a register plus an offset. -(decl gen_reg_offset_amode (Reg i64) AMode) -(extern constructor gen_reg_offset_amode gen_reg_offset_amode) - -;; Generates a AMode that an offset from the stack pointer. -(decl gen_sp_offset_amode (i64) AMode) -(extern constructor gen_sp_offset_amode gen_sp_offset_amode) - -;; Generates a AMode that an offset from the frame pointer. -(decl gen_fp_offset_amode (i64) AMode) -(extern constructor gen_fp_offset_amode gen_fp_offset_amode) - -;; Generates an AMode that points to a stack slot + offset. -(decl gen_stack_slot_amode (StackSlot i64) AMode) -(extern constructor gen_stack_slot_amode gen_stack_slot_amode) - -;; Generates a AMode that points to a constant in the constant pool. -(decl gen_const_amode (VCodeConstant) AMode) -(extern constructor gen_const_amode gen_const_amode) - - - -;; Tries to match a Value + Offset into an AMode -(decl amode (Value i32) AMode) -(rule 0 (amode addr offset) (amode_inner addr offset)) - -;; If we are adding a constant offset with an iadd we can instead make that -;; offset part of the amode offset. -;; -;; We can't recurse into `amode` again since that could cause stack overflows. -;; See: https://github.com/bytecodealliance/wasmtime/pull/6968 -(rule 1 (amode (iadd addr (i32_from_iconst y)) offset) - (if-let new_offset (s32_add_fallible y offset)) - (amode_inner addr new_offset)) -(rule 2 (amode (iadd (i32_from_iconst x) addr) offset) - (if-let new_offset (s32_add_fallible x offset)) - (amode_inner addr new_offset)) - - -;; These are the normal rules for generating an AMode. -(decl amode_inner (Value i32) AMode) - -;; In the simplest case we just lower into a Reg+Offset -(rule 0 (amode_inner r @ (value_type (ty_addr64 _)) offset) - (gen_reg_offset_amode r offset)) - -;; If the value is a `get_frame_pointer`, we can just use the offset from that. -(rule 1 (amode_inner (get_frame_pointer) offset) - (gen_fp_offset_amode offset)) - -;; If the value is a `get_stack_pointer`, we can just use the offset from that. -(rule 1 (amode_inner (get_stack_pointer) offset) - (gen_sp_offset_amode offset)) - -;; Similarly if the value is a `stack_addr` we can also turn that into an sp offset. -(rule 1 (amode_inner (stack_addr ss ss_offset) amode_offset) - (if-let combined_offset (s32_add_fallible ss_offset amode_offset)) - (gen_stack_slot_amode ss combined_offset)) - - -;; Helpers for sinkable loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; RISC-V doesen't really have sinkable loads. But the regular load instructions -;; sign / zero extend their results to 64 bits. So we can pretend they are -;; an extend instruction with a sinkable load. This allows us to have better -;; lowerings on these cases. - -;; Extract a sinkable instruction from a value operand. -(decl sinkable_inst (Inst) Value) -(extern extractor sinkable_inst sinkable_inst) - -;; Matches a sinkable load. -(decl sinkable_load (Inst Type MemFlags Value Offset32) Value) -(extractor (sinkable_load inst ty flags addr offset) - (and - (load flags addr offset) - (sinkable_inst (has_type ty inst)))) - -;; Returns a canonical type for a LoadOP. We only return I64 or F64. -(decl load_op_reg_type (LoadOP) Type) -(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64) -(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64) -(rule 1 (load_op_reg_type (LoadOP.Flh)) $F64) -(rule 0 (load_op_reg_type _) $I64) - -;; Helper constructor to build a load instruction. -(decl gen_load (AMode LoadOP MemFlags) Reg) -(rule (gen_load amode op flags) - (let ((dst WritableReg (temp_writable_reg (load_op_reg_type op))) - (_ Unit (emit (MInst.Load dst op flags amode)))) - dst)) - -;; Similar to `gen_load` but marks `Inst` as sunk at the current point. -;; -;; This is only useful for load op's that perform some additional computation -;; such as extending the loaded value. -(decl gen_sunk_load (Inst AMode LoadOP MemFlags) Reg) -(rule (gen_sunk_load inst amode op flags) - (let ((_ Unit (sink_inst inst))) - (gen_load amode op flags))) - - -;; Helper constructor to build a store instruction. -;; -;; This helper contains a special-case for zero constants stored to memory to -;; directly store the `zero` register to memory. See #7162 for some discussion -;; on why this doesn't just fall out. -(decl gen_store (AMode MemFlags Value) InstOutput) -(rule 1 (gen_store amode flags val @ (value_type ty)) - (if-let (u64_from_iconst 0) val) - (rv_store amode (store_op ty) flags (zero_reg))) -(rule 0 (gen_store amode flags val @ (value_type ty)) - (rv_store amode (store_op ty) flags val)) - -;; Emit a raw instruction to store a register into memory. -;; -;; Note that the `src` operand must have the correct type for the `op` -;; specified. -(decl rv_store (AMode StoreOP MemFlags Reg) InstOutput) -(rule (rv_store amode op flags src) - (side_effect (SideEffectNoResult.Inst (MInst.Store amode op flags src)))) - - - - -(decl valid_atomic_transaction (Type) Type) -(extern extractor valid_atomic_transaction valid_atomic_transaction) - -;;helper function. -;;construct an atomic instruction. -(decl gen_atomic (AtomicOP Reg Reg AMO) Reg) -(rule - (gen_atomic op addr src amo) - (let - ((tmp WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Atomic op tmp addr src amo)))) - tmp)) - -;; helper function -(decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP) -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Add)) - (AtomicOP.AmoaddW)) -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Add)) - (AtomicOP.AmoaddD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.And)) - (AtomicOP.AmoandW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.And)) - (AtomicOP.AmoandD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Or)) - (AtomicOP.AmoorW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Or)) - (AtomicOP.AmoorD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax)) - (AtomicOP.AmomaxW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax)) - (AtomicOP.AmomaxD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin)) - (AtomicOP.AmominW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin)) - (AtomicOP.AmominD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax)) - (AtomicOP.AmomaxuW) -) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax)) - (AtomicOP.AmomaxuD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin)) - (AtomicOP.AmominuW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin)) - (AtomicOP.AmominuD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg)) - (AtomicOP.AmoswapW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg)) - (AtomicOP.AmoswapD)) - -(rule - (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor)) - (AtomicOP.AmoxorW)) - -(rule - (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor)) - (AtomicOP.AmoxorD)) - -(decl atomic_amo () AMO) -(extern constructor atomic_amo atomic_amo) - - -(decl gen_atomic_load (Reg Type) Reg) -(rule - (gen_atomic_load p ty) - (let - ((tmp WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.AtomicLoad tmp ty p)))) - (writable_reg_to_reg tmp))) - -;;; -(decl gen_atomic_store (Reg Type Reg) InstOutput) -(rule - (gen_atomic_store p ty src) - (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p))) -) - - -;; Rounds a FReg by converting the value into an integer and back with a specified -;; float rounding mode. -(decl float_round_fcvt (Type FRM FReg) FReg) -(rule (float_round_fcvt $F32 frm rs) (rv_fcvtsw frm (rv_fcvtws frm rs))) -(rule (float_round_fcvt $F64 frm rs) (rv_fcvtdl frm (rv_fcvtld frm rs))) - -(decl gen_float_round (FRM FReg Type) FReg) -(rule 0 (gen_float_round frm rs ty) - (let (;; if rs is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits - ;; in mantissa, the result is the same as src, check for these cases first. - (max FReg (imm ty (float_int_max ty))) - (abs FReg (rv_fabs ty rs)) - (exact XReg (rv_flt ty abs max)) - - ;; Manually round the value using the fcvt instructions - ;; to move the value to an integer register and back. - (fcvt FReg (float_round_fcvt ty frm rs)) - ;; Restore the sign bit from the initial value. - (rounded FReg (rv_fsgnj ty fcvt rs)) - - ;; We want to return a arithmetic nan if the input is a canonical nan. - ;; Convert them by adding 0.0 to the input. - (float_zero FReg (gen_bitcast (zero_reg) (float_int_of_same_size ty) ty)) - (corrected_nan FReg (rv_fadd ty (FRM.RNE) rs float_zero))) - - ;; Check if the value cannot be rounded exactly and return the source input if so - (gen_select_freg (cmp_eqz exact) corrected_nan rounded))) - -;; With Zfa we can use the dedicated `fround` instruction. -(rule 1 (gen_float_round frm rs ty) - (if-let $true (has_zfa)) - (rv_fround ty frm rs)) - - - -(decl gen_stack_addr (StackSlot Offset32) Reg) -(extern constructor gen_stack_addr gen_stack_addr) - -(decl gen_select_xreg (IntegerCompare XReg XReg) XReg) - -(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y) - (if-let (IntCC.UnsignedLessThan) (intcc_without_eq cc)) - (if-let $true (has_zbb)) - (rv_minu x y)) - -(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y) - (if-let (IntCC.SignedLessThan) (intcc_without_eq cc)) - (if-let $true (has_zbb)) - (rv_min x y)) - -(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y) - (if-let (IntCC.UnsignedGreaterThan) (intcc_without_eq cc)) - (if-let $true (has_zbb)) - (rv_maxu x y)) - -(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y) - (if-let (IntCC.SignedGreaterThan) (intcc_without_eq cc)) - (if-let $true (has_zbb)) - (rv_max x y)) - -;; Rotate Zero Reg to the right. This allows us to write fewer rules -;; below when matching the zero register -;; -;; Additionally prevent this rule from recursing infinitely by only -;; matching when one of the inputs is the zero register, but not both. - -(rule 5 (gen_select_xreg (int_compare_decompose cc a @ (zero_reg) b @ (non_zero_reg)) x y) - (if-let $true (has_zicond)) - (gen_select_xreg (int_compare (intcc_swap_args cc) b a) x y)) - -(rule 4 (gen_select_xreg c @ (int_compare_decompose cc a b) x @ (zero_reg) y @ (non_zero_reg)) - (if-let $true (has_zicond)) - (gen_select_xreg (int_compare (intcc_complement cc) a b) y x)) - -(rule 3 (gen_select_xreg (int_compare_decompose (IntCC.Equal) c (zero_reg)) x (zero_reg)) - (if-let $true (has_zicond)) - (rv_czero_nez x c)) - -(rule 3 (gen_select_xreg (int_compare_decompose (IntCC.NotEqual) c (zero_reg)) x (zero_reg)) - (if-let $true (has_zicond)) - (rv_czero_eqz x c)) - -(rule 2 (gen_select_xreg (int_compare_decompose (IntCC.Equal) c (zero_reg)) x y) - (if-let $true (has_zicond)) - (rv_or - (rv_czero_nez x c) - (rv_czero_eqz y c))) - -(rule 2 (gen_select_xreg (int_compare_decompose (IntCC.NotEqual) c (zero_reg)) x y) - (if-let $true (has_zicond)) - (rv_or - (rv_czero_eqz x c) - (rv_czero_nez y c))) - -;; It is still beneficial to emit the full compare instruction, and then the 3 instruction -;; select using zicond, so do that here as a last resort. -(rule 1 (gen_select_xreg compare x y) - (if-let $true (has_zicond)) - (gen_select_xreg (cmp_nez (lower_int_compare compare)) x y)) - -;; In the base case we emit a conditional branch and a few moves. - -(rule 0 (gen_select_xreg c x y) - (let - ((dst WritableReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Select dst c x y)))) - (writable_reg_to_reg dst))) - - -(decl gen_select_vreg (IntegerCompare VReg VReg) VReg) -(rule (gen_select_vreg c x y) - (let - ((dst WritableReg (temp_writable_vreg)) - (_ Unit (emit (MInst.Select dst c (vreg_to_reg x) (vreg_to_reg y))))) - (writable_reg_to_reg dst))) -(decl gen_select_freg (IntegerCompare FReg FReg) FReg) -(rule (gen_select_freg c x y) - (let - ((dst WritableReg (temp_writable_freg)) - (_ Unit (emit (MInst.Select dst c (freg_to_reg x) (freg_to_reg y))))) - (writable_reg_to_reg dst))) -(decl gen_select_regs (IntegerCompare ValueRegs ValueRegs) ValueRegs) -(rule (gen_select_regs c x y) - (let - ((dst1 WritableReg (temp_writable_xreg)) - (dst2 WritableReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Select (writable_value_regs dst1 dst2) c x y)))) - (value_regs dst1 dst2))) - -(decl udf (TrapCode) InstOutput) -(rule - (udf code) - (side_effect (SideEffectNoResult.Inst (MInst.Udf code)))) - -(decl load_op (Type) LoadOP) -(extern constructor load_op load_op) - -(decl store_op (Type) StoreOP) -(extern constructor store_op store_op) - - -;;;; load extern name -(decl load_ext_name (ExternalName i64) Reg) -(extern constructor load_ext_name load_ext_name) - -(decl elf_tls_get_addr (ExternalName) Reg) -(rule (elf_tls_get_addr name) - (let ((dst WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.ElfTlsGetAddr dst name)))) - dst)) - -;;; some float binary operation -;;; 1. need move into x register. -;;; 2. do the operation. -;;; 3. move back. -(decl lower_float_binary (AluOPRRR FReg FReg Type) FReg) -(rule - (lower_float_binary op rs1 rs2 ty) - (let ((x_rs1 XReg (move_f_to_x rs1 ty)) - (x_rs2 XReg (move_f_to_x rs2 ty)) - (tmp XReg (alu_rrr op x_rs1 x_rs2))) - (move_x_to_f tmp (float_int_of_same_size ty)))) - - -(decl i128_sub (ValueRegs ValueRegs) ValueRegs) -(rule - (i128_sub x y ) - (let - (;; low part. - (low XReg (rv_sub (value_regs_get x 0) (value_regs_get y 0))) - ;; compute borrow. - (borrow XReg (rv_sltu (value_regs_get x 0) low)) - ;; - (high_tmp XReg (rv_sub (value_regs_get x 1) (value_regs_get y 1))) - ;; - (high XReg (rv_sub high_tmp borrow))) - (value_regs low high))) - -;; Consume a CmpResult, producing a branch on its result. -(decl cond_br (IntegerCompare CondBrTarget CondBrTarget) SideEffectNoResult) -(rule (cond_br cmp then else) - (SideEffectNoResult.Inst - (MInst.CondBr then else cmp))) - -;; Helper for emitting the `j` mnemonic, an unconditional jump to label. -(decl rv_j (MachLabel) SideEffectNoResult) -(rule (rv_j label) - (SideEffectNoResult.Inst (MInst.Jal label))) - -;; Construct an IntegerCompare value. -(decl int_compare (IntCC XReg XReg) IntegerCompare) -(extern constructor int_compare int_compare) - -;; Extract the components of an `IntegerCompare` -(decl int_compare_decompose (IntCC XReg XReg) IntegerCompare) -(extern extractor infallible int_compare_decompose int_compare_decompose) - -(decl label_to_br_target (MachLabel) CondBrTarget) -(extern constructor label_to_br_target label_to_br_target) -(convert MachLabel CondBrTarget label_to_br_target) - -(decl cmp_eqz (XReg) IntegerCompare) -(rule (cmp_eqz r) (int_compare (IntCC.Equal) r (zero_reg))) - -(decl cmp_nez (XReg) IntegerCompare) -(rule (cmp_nez r) (int_compare (IntCC.NotEqual) r (zero_reg))) - -(decl cmp_eq (XReg XReg) IntegerCompare) -(rule (cmp_eq rs1 rs2) (int_compare (IntCC.Equal) rs1 rs2)) - -(decl cmp_ne (XReg XReg) IntegerCompare) -(rule (cmp_ne rs1 rs2) (int_compare (IntCC.NotEqual) rs1 rs2)) - -(decl cmp_lt (XReg XReg) IntegerCompare) -(rule (cmp_lt rs1 rs2) (int_compare (IntCC.SignedLessThan) rs1 rs2)) - -(decl cmp_ltz (XReg) IntegerCompare) -(rule (cmp_ltz rs) (int_compare (IntCC.SignedLessThan) rs (zero_reg))) - -(decl cmp_gt (XReg XReg) IntegerCompare) -(rule (cmp_gt rs1 rs2) (int_compare (IntCC.SignedGreaterThan) rs1 rs2)) - -(decl cmp_ge (XReg XReg) IntegerCompare) -(rule (cmp_ge rs1 rs2) (int_compare (IntCC.SignedGreaterThanOrEqual) rs1 rs2)) - -(decl cmp_le (XReg XReg) IntegerCompare) -(rule (cmp_le rs1 rs2) (int_compare (IntCC.SignedLessThanOrEqual) rs1 rs2)) - -(decl cmp_gtu (XReg XReg) IntegerCompare) -(rule (cmp_gtu rs1 rs2) (int_compare (IntCC.UnsignedGreaterThan) rs1 rs2)) - -(decl cmp_geu (XReg XReg) IntegerCompare) -(rule (cmp_geu rs1 rs2) (int_compare (IntCC.UnsignedGreaterThanOrEqual) rs1 rs2)) - -(decl cmp_ltu (XReg XReg) IntegerCompare) -(rule (cmp_ltu rs1 rs2) (int_compare (IntCC.UnsignedLessThan) rs1 rs2)) - -(decl cmp_leu (XReg XReg) IntegerCompare) -(rule (cmp_leu rs1 rs2) (int_compare (IntCC.UnsignedLessThanOrEqual) rs1 rs2)) - -;; Helper to generate an `IntegerCompare` which represents the "truthy" value of -;; the input provided. -;; -;; This is used in `Select` and `brif` for example to generate conditional -;; branches. The returned comparison, when taken, represents that `Value` is -;; nonzero. When not taken the input `Value` is zero. -(decl is_nonzero_cmp (Value) IntegerCompare) - -;; Base case - convert to a "truthy" value and compare it against zero. -;; -;; Note that non-64-bit types need to be extended since the upper bits from -;; Cranelift's point of view are undefined. Favor a zero extension for 8-bit -;; types because that's a single `andi` instruction, but favor sign-extension -;; for 16 and 32-bit types because many RISC-V which operate on the low 32-bits. -;; Additionally the base 64-bit ISA has a single instruction for sign-extending -;; from 32 to 64-bits which makes that a bit cheaper if used. -;; of registers sign-extend the results. -(rule 0 (is_nonzero_cmp val @ (value_type (fits_in_64 _))) - (cmp_nez (sext val))) -(rule 1 (is_nonzero_cmp val @ (value_type $I8)) - (cmp_nez (zext val))) -(rule 1 (is_nonzero_cmp val @ (value_type $I128)) - (cmp_nez (rv_or (value_regs_get val 0) (value_regs_get val 1)))) - -;; If the input value is itself an `icmp` or `fcmp` we can avoid generating the -;; result of the comparison and instead move the comparison directly into the -;; `IntegerCompare` that's returned. -(rule 2 (is_nonzero_cmp (maybe_uextend (icmp cc a b @ (value_type (fits_in_64 _))))) - (icmp_to_int_compare cc a b)) -(rule 2 (is_nonzero_cmp (maybe_uextend (fcmp cc a @ (value_type ty) b))) - (fcmp_to_float_compare cc ty a b)) - -;; Creates an `IntegerCompare` from an `icmp` node's parts. This will extend -;; values as necessary to their full register width to perform the -;; comparison. The returned `IntegerCompare` is suitable to use in conditional -;; branches for example. -;; -;; Note that this should ideally only be used when the `IntegerCompare` returned -;; is fed into a branch. If `IntegerCompare` is materialized this will miss out -;; on optimizations to compare against constants using some native instructions. -(decl icmp_to_int_compare (IntCC Value Value) IntegerCompare) -(rule 0 (icmp_to_int_compare cc a b @ (value_type (fits_in_64 in_ty))) - (int_compare cc (put_value_in_reg_for_icmp cc a) (put_value_in_reg_for_icmp cc b))) -(rule 1 (icmp_to_int_compare cc a b @ (value_type $I128)) - (cmp_nez (lower_icmp_i128 cc a b))) - -;; Places a `Value` into a full register width to prepare for a comparison -;; using `IntCC`. -;; -;; This is largely a glorified means of choosing sign-extension or -;; zero-extension for the `Value` input. -(decl put_value_in_reg_for_icmp (IntCC Value) XReg) - -;; Base cases, use the `cc` to determine whether to zero or sign extend. -(rule 0 (put_value_in_reg_for_icmp cc val) - (zext val)) -(rule 1 (put_value_in_reg_for_icmp cc val) - (if (signed_cond_code cc)) - (sext val)) - -;; For equality and inequality favor sign extension since it's generally -;; easier to perform sign extension on RV64 via native instructions. For 8-bit -;; types though use zero-extension since that's a single instruction `and`. -(rule 2 (put_value_in_reg_for_icmp (IntCC.Equal) val @ (value_type (fits_in_64 _))) - (sext val)) -(rule 2 (put_value_in_reg_for_icmp (IntCC.NotEqual) val @ (value_type (fits_in_64 _))) - (sext val)) -(rule 3 (put_value_in_reg_for_icmp (IntCC.Equal) val @ (value_type $I8)) - (zext val)) -(rule 3 (put_value_in_reg_for_icmp (IntCC.NotEqual) val @ (value_type $I8)) - (zext val)) - -;; As a special case use `x0` directly if a constant is 0. -(rule 4 (put_value_in_reg_for_icmp _ (i64_from_iconst 0)) - (zero_reg)) - - -(decl partial lower_branch (Inst MachLabelSlice) Unit) -(rule (lower_branch (jump _) (single_target label)) - (emit_side_effect (rv_j label))) - -(rule (lower_branch (brif v _ _) (two_targets then else)) - (emit_side_effect (cond_br (is_nonzero_cmp v) then else))) - -(decl lower_br_table (Reg MachLabelSlice) Unit) -(extern constructor lower_br_table lower_br_table) - -(rule (lower_branch (br_table index _) targets) - (lower_br_table index targets)) - -(decl load_ra () Reg) -(extern constructor load_ra load_ra) - - -;; Generates a bitcast instruction. -;; Args are: src, src_ty, dst_ty -(decl gen_bitcast (Reg Type Type) Reg) - -;; To support FP16 vfmv.* we need to check for the `zvfh` isa flag, which we currently don't -;; support, so restrict the floating point types to 32/64 bits. -(rule 5 (gen_bitcast r (ty_supported_float (ty_32_or_64 src_ty)) (ty_supported_vec _)) (rv_vfmv_sf r src_ty)) -(rule 4 (gen_bitcast r (ty_supported_vec _) (ty_supported_float (ty_32_or_64 dst_ty))) (rv_vfmv_fs r dst_ty)) - -(rule 3 (gen_bitcast r (ty_int_ref_scalar_64 src_ty) (ty_supported_vec _)) (rv_vmv_sx r src_ty)) -(rule 2 (gen_bitcast r (ty_supported_vec _) (ty_int_ref_scalar_64 dst_ty)) (rv_vmv_xs r dst_ty)) -(rule 1 (gen_bitcast r $F16 $I16) (rv_fmvxh r)) -(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r)) -(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r)) -(rule 1 (gen_bitcast r $I16 $F16) (rv_fmvhx r)) -(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r)) -(rule 1 (gen_bitcast r $I64 $F64) (rv_fmvdx r)) -(rule (gen_bitcast r _ _) r) - -(decl move_f_to_x (FReg Type) XReg) -(rule (move_f_to_x r $F32) (gen_bitcast r $F32 $I32)) -(rule (move_f_to_x r $F64) (gen_bitcast r $F64 $I64)) - -(decl move_x_to_f (XReg Type) FReg) -(rule (move_x_to_f r $I32) (gen_bitcast r $I32 $F32)) -(rule (move_x_to_f r $I64) (gen_bitcast r $I64 $F64)) - -(decl float_int_of_same_size (Type) Type) -(rule (float_int_of_same_size $F32) $I32) -(rule (float_int_of_same_size $F64) $I64) - - -(decl gen_brev8 (Reg Type) Reg) -(rule 1 - (gen_brev8 rs _) - (if-let $true (has_zbkb)) - (rv_brev8 rs)) -(rule - (gen_brev8 rs ty) - (if-let $false (has_zbkb)) - (let - ((tmp WritableXReg (temp_writable_xreg)) - (tmp2 WritableXReg (temp_writable_xreg)) - (step WritableXReg (temp_writable_xreg)) - (rd WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd)))) - (writable_reg_to_reg rd))) - -;; Negates x -;; Equivalent to 0 - x -(decl neg (Type ValueRegs) ValueRegs) -(rule 1 (neg (fits_in_64 (ty_int ty)) val) - (value_reg - (rv_neg (value_regs_get val 0)))) - -(rule 2 (neg $I128 val) - (i128_sub (value_regs_zero) val)) - - -;; Builds an instruction sequence that traps if the comparison succeeds. -(decl gen_trapif (IntCC XReg XReg TrapCode) InstOutput) -(rule (gen_trapif cc a b trap_code) - (side_effect (SideEffectNoResult.Inst (MInst.TrapIf a b cc trap_code)))) - -;; Builds an instruction sequence that traps if the input is non-zero. -(decl gen_trapnz (XReg TrapCode) InstOutput) -(rule (gen_trapnz test trap_code) - (gen_trapif (IntCC.NotEqual) test (zero_reg) trap_code)) - -;; Builds an instruction sequence that traps if the input is zero. -(decl gen_trapz (XReg TrapCode) InstOutput) -(rule (gen_trapz test trap_code) - (gen_trapif (IntCC.Equal) test (zero_reg) trap_code)) - -;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) -(extern constructor gen_call gen_call) - -(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) -(extern constructor gen_call_indirect gen_call_indirect) - -;;; this is trying to imitate aarch64 `madd` instruction. -(decl madd (XReg XReg XReg) XReg) -(rule - (madd n m a) - (let - ((t XReg (rv_mul n m))) - (rv_add t a))) - -;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Generates either 0 if `Value` is zero or -1 otherwise. -(decl gen_bmask (Value) XReg) - -;; Base cases: use `snez` after a sign extension to ensure that the entire -;; register is defined. For i128 we test both the upper and lower half. -(rule 0 (gen_bmask val @ (value_type (fits_in_64 _))) - (let ((non_zero XReg (rv_snez (sext val)))) - (rv_neg non_zero))) -(rule 1 (gen_bmask val @ (value_type $I128)) - (let ((non_zero XReg (rv_snez (rv_or (value_regs_get val 0) (value_regs_get val 1))))) - (rv_neg non_zero))) - -;; If the input value is an `icmp` or an `fcmp` directly then the `snez` can -;; be omitted because the result of the icmp or fcmp is a 0 or 1 directly. This -;; means we can go straight to the `neg` instruction to produce the final -;; result. -(rule 2 (gen_bmask val @ (maybe_uextend (icmp _ _ _))) (rv_neg val)) -(rule 2 (gen_bmask val @ (maybe_uextend (fcmp _ _ _))) (rv_neg val)) - -(decl lower_bmask (Value Type) ValueRegs) -(rule 0 (lower_bmask val (fits_in_64 _)) - (value_reg (gen_bmask val))) -(rule 1 (lower_bmask val $I128) - (let ((bits XReg (gen_bmask val))) - (value_regs bits bits))) - -;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl gen_mov_from_preg (PReg) Reg) - -(rule - (gen_mov_from_preg rm) - (let ((rd WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.MovFromPReg rd rm)))) - rd)) - -(decl fp_reg () PReg) -(extern constructor fp_reg fp_reg) - -(decl sp_reg () PReg) -(extern constructor sp_reg sp_reg) - -;; Extractor that matches all registers, except the zero register -(decl non_zero_reg () XReg) -(extern extractor non_zero_reg is_non_zero_reg) - -;; Helper for creating the zero register. -(decl zero_reg () XReg) -(extern constructor zero_reg zero_reg) -(extern extractor zero_reg is_zero_reg) - -(decl value_regs_zero () ValueRegs) -(rule (value_regs_zero) - (value_regs (imm $I64 0) (imm $I64 0))) - -(decl writable_zero_reg () WritableReg) -(extern constructor writable_zero_reg writable_zero_reg) - - -;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(type FloatCompare (enum - ;; The comparison succeeded if `r` is one - (One (r XReg)) - ;; The comparison succeeded if `r` is zero - (Zero (r XReg)) -)) - -(decl float_compare_invert (FloatCompare) FloatCompare) -(rule (float_compare_invert (FloatCompare.One r)) (FloatCompare.Zero r)) -(rule (float_compare_invert (FloatCompare.Zero r)) (FloatCompare.One r)) - -(decl float_to_int_compare (FloatCompare) IntegerCompare) -(rule (float_to_int_compare (FloatCompare.One r)) (cmp_nez r)) -(rule (float_to_int_compare (FloatCompare.Zero r)) (cmp_eqz r)) -(convert FloatCompare IntegerCompare float_to_int_compare) - -;; Compare two floating point numbers and return a zero/non-zero result. -(decl fcmp_to_float_compare (FloatCC Type FReg FReg) FloatCompare) - -;; Direct codegen for unordered comparisons is not that efficient, so invert -;; the comparison to get an ordered comparison and generate that. Then invert -;; the result to produce the final fcmp result. -(rule 0 (fcmp_to_float_compare cc ty a b) - (if-let $true (floatcc_unordered cc)) - (float_compare_invert (fcmp_to_float_compare (floatcc_complement cc) ty a b))) - -;; a is not nan && b is not nan -(rule 1 (fcmp_to_float_compare (FloatCC.Ordered) ty a b) - (FloatCompare.One (rv_and (is_not_nan ty a) (is_not_nan ty b)))) - -(decl is_not_nan (Type FReg) XReg) -(rule (is_not_nan ty a) (rv_feq ty a a)) - -;; a == b -(rule 1 (fcmp_to_float_compare (FloatCC.Equal) ty a b) - (FloatCompare.One (rv_feq ty a b))) - -;; a != b -;; == !(a == b) -(rule 1 (fcmp_to_float_compare (FloatCC.NotEqual) ty a b) - (FloatCompare.Zero (rv_feq ty a b))) - -;; a < b || a > b -(rule 1 (fcmp_to_float_compare (FloatCC.OrderedNotEqual) ty a b) - (FloatCompare.One (rv_or (rv_flt ty a b) (rv_fgt ty a b)))) - -;; a < b -(rule 1 (fcmp_to_float_compare (FloatCC.LessThan) ty a b) - (FloatCompare.One (rv_flt ty a b))) - -;; a <= b -(rule 1 (fcmp_to_float_compare (FloatCC.LessThanOrEqual) ty a b) - (FloatCompare.One (rv_fle ty a b))) - -;; a > b -(rule 1 (fcmp_to_float_compare (FloatCC.GreaterThan) ty a b) - (FloatCompare.One (rv_fgt ty a b))) - -;; a >= b -(rule 1 (fcmp_to_float_compare (FloatCC.GreaterThanOrEqual) ty a b) - (FloatCompare.One (rv_fge ty a b))) diff --git a/hbcb/src/inst/args.rs b/hbcb/src/inst/args.rs deleted file mode 100644 index 8fa412d..0000000 --- a/hbcb/src/inst/args.rs +++ /dev/null @@ -1,1929 +0,0 @@ -//! Riscv64 ISA definitions: instruction arguments. - -use { - super::*, - crate::{ - ir::condcodes::CondCode, - lower::isle::generated_code::{ - COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, - ZcbMemOp, - }, - }, - cranelift_codegen::machinst::isle::WritableReg, - std::fmt::Result, -}; - -/// A macro for defining a newtype of `Reg` that enforces some invariant about -/// the wrapped `Reg` (such as that it is of a particular register class). -macro_rules! newtype_of_reg { - ( - $newtype_reg:ident, - $newtype_writable_reg:ident, - |$check_reg:ident| $check:expr - ) => { - /// A newtype wrapper around `Reg`. - #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub struct $newtype_reg(Reg); - - impl PartialEq for $newtype_reg { - fn eq(&self, other: &Reg) -> bool { - self.0 == *other - } - } - - impl From<$newtype_reg> for Reg { - fn from(r: $newtype_reg) -> Self { - r.0 - } - } - - impl $newtype_reg { - /// Create this newtype from the given register, or return `None` if the register - /// is not a valid instance of this newtype. - pub fn new($check_reg: Reg) -> Option { - if $check { - Some(Self($check_reg)) - } else { - None - } - } - - /// Get this newtype's underlying `Reg`. - pub fn to_reg(self) -> Reg { - self.0 - } - } - - // Convenience impl so that people working with this newtype can use it - // "just like" a plain `Reg`. - // - // NB: We cannot implement `DerefMut` because that would let people do - // nasty stuff like `*my_xreg.deref_mut() = some_freg`, breaking the - // invariants that `XReg` provides. - impl std::ops::Deref for $newtype_reg { - type Target = Reg; - - fn deref(&self) -> &Reg { - &self.0 - } - } - - /// Writable Reg. - pub type $newtype_writable_reg = Writable<$newtype_reg>; - }; -} - -// Newtypes for registers classes. -newtype_of_reg!(XReg, WritableXReg, |reg| reg.class() == RegClass::Int); -newtype_of_reg!(FReg, WritableFReg, |reg| reg.class() == RegClass::Float); -newtype_of_reg!(VReg, WritableVReg, |reg| reg.class() == RegClass::Vector); - -/// An addressing mode specified for a load/store operation. -#[derive(Clone, Debug, Copy)] -pub enum AMode { - /// Arbitrary offset from a register. Converted to generation of large - /// offsets with multiple instructions as necessary during code emission. - RegOffset(Reg, i64), - /// Offset from the stack pointer. - SPOffset(i64), - - /// Offset from the frame pointer. - FPOffset(i64), - - /// Offset into the slot area of the stack, which lies just above the - /// outgoing argument area that's setup by the function prologue. - /// At emission time, this is converted to `SPOffset` with a fixup added to - /// the offset constant. The fixup is a running value that is tracked as - /// emission iterates through instructions in linear order, and can be - /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. - /// - /// The standard ABI is in charge of handling this (by emitting the - /// adjustment meta-instructions). See the diagram in the documentation - /// for [crate::isa::aarch64::abi](the ABI module) for more details. - SlotOffset(i64), - - /// Offset into the argument area. - IncomingArg(i64), - - /// A reference to a constant which is placed outside of the function's - /// body, typically at the end. - Const(VCodeConstant), - - /// A reference to a label. - Label(MachLabel), -} - -impl AMode { - /// Add the registers referenced by this AMode to `collector`. - pub(crate) fn get_operands(&mut self, collector: &mut impl OperandVisitor) { - match self { - AMode::RegOffset(reg, ..) => collector.reg_use(reg), - // Registers used in these modes aren't allocatable. - AMode::SPOffset(..) - | AMode::FPOffset(..) - | AMode::SlotOffset(..) - | AMode::IncomingArg(..) - | AMode::Const(..) - | AMode::Label(..) => {} - } - } - - pub(crate) fn get_base_register(&self) -> Option { - match self { - &AMode::RegOffset(reg, ..) => Some(reg), - &AMode::SPOffset(..) => Some(stack_reg()), - &AMode::FPOffset(..) => Some(fp_reg()), - &AMode::SlotOffset(..) => Some(stack_reg()), - &AMode::IncomingArg(..) => Some(stack_reg()), - &AMode::Const(..) | AMode::Label(..) => None, - } - } - - pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { - match self { - &AMode::SlotOffset(offset) => { - offset + i64::from(state.frame_layout().outgoing_args_size) - } - - // Compute the offset into the incoming argument area relative to SP - &AMode::IncomingArg(offset) => { - let frame_layout = state.frame_layout(); - let sp_offset = frame_layout.tail_args_size - + frame_layout.setup_area_size - + frame_layout.clobber_size - + frame_layout.fixed_frame_storage_size - + frame_layout.outgoing_args_size; - i64::from(sp_offset) - offset - } - - &AMode::RegOffset(_, offset) => offset, - &AMode::SPOffset(offset) => offset, - &AMode::FPOffset(offset) => offset, - &AMode::Const(_) | &AMode::Label(_) => 0, - } - } - - /// Retrieve a MachLabel that corresponds to this addressing mode, if it exists. - pub(crate) fn get_label_with_sink(&self, sink: &mut MachBuffer) -> Option { - match self { - &AMode::Const(addr) => Some(sink.get_label_for_constant(addr)), - &AMode::Label(label) => Some(label), - &AMode::RegOffset(..) - | &AMode::SPOffset(..) - | &AMode::FPOffset(..) - | &AMode::IncomingArg(..) - | &AMode::SlotOffset(..) => None, - } - } -} - -impl Display for AMode { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - match self { - &AMode::RegOffset(r, offset, ..) => { - write!(f, "{}({})", offset, reg_name(r)) - } - &AMode::SPOffset(offset, ..) => { - write!(f, "{offset}(sp)") - } - &AMode::SlotOffset(offset, ..) => { - write!(f, "{offset}(slot)") - } - &AMode::IncomingArg(offset) => { - write!(f, "-{offset}(incoming_arg)") - } - &AMode::FPOffset(offset, ..) => { - write!(f, "{offset}(fp)") - } - &AMode::Const(addr, ..) => { - write!(f, "[const({})]", addr.as_u32()) - } - &AMode::Label(label) => { - write!(f, "[label{}]", label.as_u32()) - } - } - } -} - -impl Into for StackAMode { - fn into(self) -> AMode { - match self { - StackAMode::IncomingArg(offset, stack_args_size) => { - AMode::IncomingArg(i64::from(stack_args_size) - offset) - } - StackAMode::OutgoingArg(offset) => AMode::SPOffset(offset), - StackAMode::Slot(offset) => AMode::SlotOffset(offset), - } - } -} - -/// risc-v always take two register to compare -#[derive(Clone, Copy, Debug)] -pub struct IntegerCompare { - pub(crate) kind: IntCC, - pub(crate) rs1: Reg, - pub(crate) rs2: Reg, -} - -pub(crate) enum BranchFunct3 { - // == - Eq, - // != - Ne, - // signed < - Lt, - // signed >= - Ge, - // unsigned < - Ltu, - // unsigned >= - Geu, -} - -impl BranchFunct3 { - pub(crate) fn funct3(self) -> u32 { - match self { - BranchFunct3::Eq => 0b000, - BranchFunct3::Ne => 0b001, - BranchFunct3::Lt => 0b100, - BranchFunct3::Ge => 0b101, - BranchFunct3::Ltu => 0b110, - BranchFunct3::Geu => 0b111, - } - } -} - -impl IntegerCompare { - pub(crate) fn op_code(self) -> u32 { - 0b1100011 - } - - // funct3 and if need inverse the register - pub(crate) fn funct3(&self) -> (BranchFunct3, bool) { - match self.kind { - IntCC::Equal => (BranchFunct3::Eq, false), - IntCC::NotEqual => (BranchFunct3::Ne, false), - IntCC::SignedLessThan => (BranchFunct3::Lt, false), - IntCC::SignedGreaterThanOrEqual => (BranchFunct3::Ge, false), - - IntCC::SignedGreaterThan => (BranchFunct3::Lt, true), - IntCC::SignedLessThanOrEqual => (BranchFunct3::Ge, true), - - IntCC::UnsignedLessThan => (BranchFunct3::Ltu, false), - IntCC::UnsignedGreaterThanOrEqual => (BranchFunct3::Geu, false), - - IntCC::UnsignedGreaterThan => (BranchFunct3::Ltu, true), - IntCC::UnsignedLessThanOrEqual => (BranchFunct3::Geu, true), - } - } - - #[inline] - pub(crate) fn op_name(&self) -> &'static str { - match self.kind { - IntCC::Equal => "beq", - IntCC::NotEqual => "bne", - IntCC::SignedLessThan => "blt", - IntCC::SignedGreaterThanOrEqual => "bge", - IntCC::SignedGreaterThan => "bgt", - IntCC::SignedLessThanOrEqual => "ble", - IntCC::UnsignedLessThan => "bltu", - IntCC::UnsignedGreaterThanOrEqual => "bgeu", - IntCC::UnsignedGreaterThan => "bgtu", - IntCC::UnsignedLessThanOrEqual => "bleu", - } - } - - pub(crate) fn emit(self) -> u32 { - let (funct3, reverse) = self.funct3(); - let (rs1, rs2) = if reverse { (self.rs2, self.rs1) } else { (self.rs1, self.rs2) }; - - self.op_code() - | funct3.funct3() << 12 - | reg_to_gpr_num(rs1) << 15 - | reg_to_gpr_num(rs2) << 20 - } - - pub(crate) fn inverse(self) -> Self { - Self { kind: self.kind.complement(), ..self } - } - - pub(crate) fn regs(&self) -> [Reg; 2] { - [self.rs1, self.rs2] - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub struct FliConstant(u8); - -impl FliConstant { - pub(crate) fn new(value: u8) -> Self { - debug_assert!(value <= 31, "Invalid FliConstant: {value}"); - Self(value) - } - - pub(crate) fn maybe_from_u64(ty: Type, imm: u64) -> Option { - // Convert the value into an F64, this allows us to represent - // values from both f32 and f64 in the same value. - let value = match ty { - F32 => f32::from_bits(imm as u32) as f64, - F64 => f64::from_bits(imm), - _ => unimplemented!(), - }; - - Some(match (ty, value) { - (_, f) if f == -1.0 => Self::new(0), - - // Since f64 can represent all f32 values, f32::min_positive won't be - // the same as f64::min_positive, so we need to check for both indepenendtly - (F32, f) if f == (f32::MIN_POSITIVE as f64) => Self::new(1), - (F64, f) if f == f64::MIN_POSITIVE => Self::new(1), - - (_, f) if f == 2.0f64.powi(-16) => Self::new(2), - (_, f) if f == 2.0f64.powi(-15) => Self::new(3), - (_, f) if f == 2.0f64.powi(-8) => Self::new(4), - (_, f) if f == 2.0f64.powi(-7) => Self::new(5), - (_, f) if f == 0.0625 => Self::new(6), - (_, f) if f == 0.125 => Self::new(7), - (_, f) if f == 0.25 => Self::new(8), - (_, f) if f == 0.3125 => Self::new(9), - (_, f) if f == 0.375 => Self::new(10), - (_, f) if f == 0.4375 => Self::new(11), - (_, f) if f == 0.5 => Self::new(12), - (_, f) if f == 0.625 => Self::new(13), - (_, f) if f == 0.75 => Self::new(14), - (_, f) if f == 0.875 => Self::new(15), - (_, f) if f == 1.0 => Self::new(16), - (_, f) if f == 1.25 => Self::new(17), - (_, f) if f == 1.5 => Self::new(18), - (_, f) if f == 1.75 => Self::new(19), - (_, f) if f == 2.0 => Self::new(20), - (_, f) if f == 2.5 => Self::new(21), - (_, f) if f == 3.0 => Self::new(22), - (_, f) if f == 4.0 => Self::new(23), - (_, f) if f == 8.0 => Self::new(24), - (_, f) if f == 16.0 => Self::new(25), - (_, f) if f == 128.0 => Self::new(26), - (_, f) if f == 256.0 => Self::new(27), - (_, f) if f == 32768.0 => Self::new(28), - (_, f) if f == 65536.0 => Self::new(29), - (_, f) if f == f64::INFINITY => Self::new(30), - - // NaN's are not guaranteed to preserve the sign / payload bits, so we need to check - // the original bits directly. - (F32, f) if f.is_nan() && imm == 0x7fc0_0000 => Self::new(31), // Canonical NaN - (F64, f) if f.is_nan() && imm == 0x7ff8_0000_0000_0000 => Self::new(31), // Canonical NaN - _ => return None, - }) - } - - pub(crate) fn format(self) -> &'static str { - // The preferred assembly syntax for entries 1, 30, and 31 is min, inf, and nan, respectively. - // For entries 0 through 29 (including entry 1), the assembler will accept decimal constants - // in C-like syntax. - match self.0 { - 0 => "-1.0", - 1 => "min", - 2 => "2^-16", - 3 => "2^-15", - 4 => "2^-8", - 5 => "2^-7", - 6 => "0.0625", - 7 => "0.125", - 8 => "0.25", - 9 => "0.3125", - 10 => "0.375", - 11 => "0.4375", - 12 => "0.5", - 13 => "0.625", - 14 => "0.75", - 15 => "0.875", - 16 => "1.0", - 17 => "1.25", - 18 => "1.5", - 19 => "1.75", - 20 => "2.0", - 21 => "2.5", - 22 => "3.0", - 23 => "4.0", - 24 => "8.0", - 25 => "16.0", - 26 => "128.0", - 27 => "256.0", - 28 => "32768.0", - 29 => "65536.0", - 30 => "inf", - 31 => "nan", - _ => panic!("Invalid FliConstant"), - } - } - - pub(crate) fn bits(self) -> u8 { - self.0 - } -} - -impl FpuOPRRRR { - pub(crate) fn op_name(self, width: FpuOPWidth) -> String { - match self { - Self::Fmadd => format!("fmadd.{width}"), - Self::Fmsub => format!("fmsub.{width}"), - Self::Fnmsub => format!("fnmsub.{width}"), - Self::Fnmadd => format!("fnmadd.{width}"), - } - } - - pub(crate) fn opcode(self) -> u32 { - match self { - Self::Fmadd => 0b1000011, - Self::Fmsub => 0b1000111, - Self::Fnmsub => 0b1001011, - Self::Fnmadd => 0b1001111, - } - } -} - -impl FpuOPRR { - pub(crate) fn op_name(self, width: FpuOPWidth) -> String { - let fmv_width = match width { - FpuOPWidth::H => "h", - FpuOPWidth::S => "w", - FpuOPWidth::D => "d", - FpuOPWidth::Q => "q", - }; - match self { - Self::Fsqrt => format!("fsqrt.{width}"), - Self::Fround => format!("fround.{width}"), - Self::Fclass => format!("fclass.{width}"), - Self::FcvtWFmt => format!("fcvt.w.{width}"), - Self::FcvtWuFmt => format!("fcvt.wu.{width}"), - Self::FcvtLFmt => format!("fcvt.l.{width}"), - Self::FcvtLuFmt => format!("fcvt.lu.{width}"), - Self::FcvtFmtW => format!("fcvt.{width}.w"), - Self::FcvtFmtWu => format!("fcvt.{width}.wu"), - Self::FcvtFmtL => format!("fcvt.{width}.l"), - Self::FcvtFmtLu => format!("fcvt.{width}.lu"), - - // fmv instructions deviate from the normal encoding and instead - // encode the width as "w" instead of "s". The ISA manual gives this rationale: - // - // Instructions FMV.S.X and FMV.X.S were renamed to FMV.W.X and FMV.X.W respectively - // to be more consistent with their semantics, which did not change. The old names will continue - // to be supported in the tools. - Self::FmvXFmt => format!("fmv.x.{fmv_width}"), - Self::FmvFmtX => format!("fmv.{fmv_width}.x"), - - Self::FcvtSD => "fcvt.s.d".to_string(), - Self::FcvtDS => "fcvt.d.s".to_string(), - } - } - - pub(crate) fn is_convert_to_int(self) -> bool { - match self { - Self::FcvtWFmt | Self::FcvtWuFmt | Self::FcvtLFmt | Self::FcvtLuFmt => true, - _ => false, - } - } - - pub(crate) fn has_frm(self) -> bool { - match self { - FpuOPRR::FmvXFmt | FpuOPRR::FmvFmtX | FpuOPRR::Fclass => false, - _ => true, - } - } - - pub(crate) fn opcode(self) -> u32 { - // OP-FP Major opcode - 0b1010011 - } - - pub(crate) fn rs2(self) -> u32 { - match self { - Self::Fsqrt => 0b00000, - Self::Fround => 0b00100, - Self::Fclass => 0b00000, - Self::FcvtWFmt => 0b00000, - Self::FcvtWuFmt => 0b00001, - Self::FcvtLFmt => 0b00010, - Self::FcvtLuFmt => 0b00011, - Self::FcvtFmtW => 0b00000, - Self::FcvtFmtWu => 0b00001, - Self::FcvtFmtL => 0b00010, - Self::FcvtFmtLu => 0b00011, - Self::FmvXFmt => 0b00000, - Self::FmvFmtX => 0b00000, - Self::FcvtSD => 0b00001, - Self::FcvtDS => 0b00000, - } - } - - pub(crate) fn funct5(self) -> u32 { - match self { - Self::Fsqrt => 0b01011, - Self::Fround => 0b01000, - Self::Fclass => 0b11100, - Self::FcvtWFmt => 0b11000, - Self::FcvtWuFmt => 0b11000, - Self::FcvtLFmt => 0b11000, - Self::FcvtLuFmt => 0b11000, - Self::FcvtFmtW => 0b11010, - Self::FcvtFmtWu => 0b11010, - Self::FcvtFmtL => 0b11010, - Self::FcvtFmtLu => 0b11010, - Self::FmvXFmt => 0b11100, - Self::FmvFmtX => 0b11110, - Self::FcvtSD => 0b01000, - Self::FcvtDS => 0b01000, - } - } - - pub(crate) fn funct7(self, width: FpuOPWidth) -> u32 { - (self.funct5() << 2) | width.as_u32() - } -} - -impl FpuOPRRR { - pub(crate) fn op_name(self, width: FpuOPWidth) -> String { - match self { - Self::Fadd => format!("fadd.{width}"), - Self::Fsub => format!("fsub.{width}"), - Self::Fmul => format!("fmul.{width}"), - Self::Fdiv => format!("fdiv.{width}"), - Self::Fsgnj => format!("fsgnj.{width}"), - Self::Fsgnjn => format!("fsgnjn.{width}"), - Self::Fsgnjx => format!("fsgnjx.{width}"), - Self::Fmin => format!("fmin.{width}"), - Self::Fmax => format!("fmax.{width}"), - Self::Feq => format!("feq.{width}"), - Self::Flt => format!("flt.{width}"), - Self::Fle => format!("fle.{width}"), - Self::Fminm => format!("fminm.{width}"), - Self::Fmaxm => format!("fmaxm.{width}"), - } - } - - pub(crate) fn opcode(self) -> u32 { - // OP-FP Major opcode - 0b1010011 - } - - pub(crate) const fn funct5(self) -> u32 { - match self { - Self::Fadd => 0b00000, - Self::Fsub => 0b00001, - Self::Fmul => 0b00010, - Self::Fdiv => 0b00011, - Self::Fsgnj => 0b00100, - Self::Fsgnjn => 0b00100, - Self::Fsgnjx => 0b00100, - Self::Fmin => 0b00101, - Self::Fmax => 0b00101, - Self::Feq => 0b10100, - Self::Flt => 0b10100, - Self::Fle => 0b10100, - Self::Fminm => 0b00101, - Self::Fmaxm => 0b00101, - } - } - - pub(crate) fn funct7(self, width: FpuOPWidth) -> u32 { - (self.funct5() << 2) | width.as_u32() - } - - pub(crate) fn has_frm(self) -> bool { - match self { - FpuOPRRR::Fsgnj - | FpuOPRRR::Fsgnjn - | FpuOPRRR::Fsgnjx - | FpuOPRRR::Fmin - | FpuOPRRR::Fmax - | FpuOPRRR::Feq - | FpuOPRRR::Flt - | FpuOPRRR::Fle => false, - _ => true, - } - } -} - -impl Display for FpuOPWidth { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", match self { - FpuOPWidth::H => "h", - FpuOPWidth::S => "s", - FpuOPWidth::D => "d", - FpuOPWidth::Q => "q", - }) - } -} - -impl TryFrom for FpuOPWidth { - type Error = &'static str; - - fn try_from(value: Type) -> std::result::Result { - match value { - F16 => Ok(FpuOPWidth::H), - F32 => Ok(FpuOPWidth::S), - F64 => Ok(FpuOPWidth::D), - F128 => Ok(FpuOPWidth::Q), - _ => Err("Invalid type for FpuOPWidth"), - } - } -} - -impl FpuOPWidth { - pub(crate) fn as_u32(&self) -> u32 { - match self { - FpuOPWidth::S => 0b00, - FpuOPWidth::D => 0b01, - FpuOPWidth::H => 0b10, - FpuOPWidth::Q => 0b11, - } - } -} - -impl AluOPRRR { - pub(crate) const fn op_name(self) -> &'static str { - match self { - Self::Add => "add", - Self::Sub => "sub", - Self::Sll => "sll", - Self::Slt => "slt", - Self::Sgt => "sgt", - Self::SltU => "sltu", - Self::Sgtu => "sgtu", - Self::Xor => "xor", - Self::Srl => "srl", - Self::Sra => "sra", - Self::Or => "or", - Self::And => "and", - Self::Addw => "addw", - Self::Subw => "subw", - Self::Sllw => "sllw", - Self::Srlw => "srlw", - Self::Sraw => "sraw", - Self::Mul => "mul", - Self::Mulh => "mulh", - Self::Mulhsu => "mulhsu", - Self::Mulhu => "mulhu", - Self::Div => "div", - Self::DivU => "divu", - Self::Rem => "rem", - Self::RemU => "remu", - Self::Mulw => "mulw", - Self::Divw => "divw", - Self::Divuw => "divuw", - Self::Remw => "remw", - Self::Remuw => "remuw", - Self::Adduw => "add.uw", - Self::Andn => "andn", - Self::Bclr => "bclr", - Self::Bext => "bext", - Self::Binv => "binv", - Self::Bset => "bset", - Self::Clmul => "clmul", - Self::Clmulh => "clmulh", - Self::Clmulr => "clmulr", - Self::Max => "max", - Self::Maxu => "maxu", - Self::Min => "min", - Self::Minu => "minu", - Self::Orn => "orn", - Self::Rol => "rol", - Self::Rolw => "rolw", - Self::Ror => "ror", - Self::Rorw => "rorw", - Self::Sh1add => "sh1add", - Self::Sh1adduw => "sh1add.uw", - Self::Sh2add => "sh2add", - Self::Sh2adduw => "sh2add.uw", - Self::Sh3add => "sh3add", - Self::Sh3adduw => "sh3add.uw", - Self::Xnor => "xnor", - Self::Pack => "pack", - Self::Packw => "packw", - Self::Packh => "packh", - Self::CzeroEqz => "czero.eqz", - Self::CzeroNez => "czero.nez", - } - } - - pub fn funct3(self) -> u32 { - match self { - AluOPRRR::Add => 0b000, - AluOPRRR::Sll => 0b001, - AluOPRRR::Slt => 0b010, - AluOPRRR::Sgt => 0b010, - AluOPRRR::SltU => 0b011, - AluOPRRR::Sgtu => 0b011, - AluOPRRR::Xor => 0b100, - AluOPRRR::Srl => 0b101, - AluOPRRR::Sra => 0b101, - AluOPRRR::Or => 0b110, - AluOPRRR::And => 0b111, - AluOPRRR::Sub => 0b000, - - AluOPRRR::Addw => 0b000, - AluOPRRR::Subw => 0b000, - AluOPRRR::Sllw => 0b001, - AluOPRRR::Srlw => 0b101, - AluOPRRR::Sraw => 0b101, - - AluOPRRR::Mul => 0b000, - AluOPRRR::Mulh => 0b001, - AluOPRRR::Mulhsu => 0b010, - AluOPRRR::Mulhu => 0b011, - AluOPRRR::Div => 0b100, - AluOPRRR::DivU => 0b101, - AluOPRRR::Rem => 0b110, - AluOPRRR::RemU => 0b111, - - AluOPRRR::Mulw => 0b000, - AluOPRRR::Divw => 0b100, - AluOPRRR::Divuw => 0b101, - AluOPRRR::Remw => 0b110, - AluOPRRR::Remuw => 0b111, - - // Zbb - AluOPRRR::Adduw => 0b000, - AluOPRRR::Andn => 0b111, - AluOPRRR::Bclr => 0b001, - AluOPRRR::Bext => 0b101, - AluOPRRR::Binv => 0b001, - AluOPRRR::Bset => 0b001, - AluOPRRR::Clmul => 0b001, - AluOPRRR::Clmulh => 0b011, - AluOPRRR::Clmulr => 0b010, - AluOPRRR::Max => 0b110, - AluOPRRR::Maxu => 0b111, - AluOPRRR::Min => 0b100, - AluOPRRR::Minu => 0b101, - AluOPRRR::Orn => 0b110, - AluOPRRR::Rol => 0b001, - AluOPRRR::Rolw => 0b001, - AluOPRRR::Ror => 0b101, - AluOPRRR::Rorw => 0b101, - AluOPRRR::Sh1add => 0b010, - AluOPRRR::Sh1adduw => 0b010, - AluOPRRR::Sh2add => 0b100, - AluOPRRR::Sh2adduw => 0b100, - AluOPRRR::Sh3add => 0b110, - AluOPRRR::Sh3adduw => 0b110, - AluOPRRR::Xnor => 0b100, - - // Zbkb - AluOPRRR::Pack => 0b100, - AluOPRRR::Packw => 0b100, - AluOPRRR::Packh => 0b111, - - // ZiCond - AluOPRRR::CzeroEqz => 0b101, - AluOPRRR::CzeroNez => 0b111, - } - } - - pub fn op_code(self) -> u32 { - match self { - AluOPRRR::Add - | AluOPRRR::Sub - | AluOPRRR::Sll - | AluOPRRR::Slt - | AluOPRRR::Sgt - | AluOPRRR::SltU - | AluOPRRR::Sgtu - | AluOPRRR::Xor - | AluOPRRR::Srl - | AluOPRRR::Sra - | AluOPRRR::Or - | AluOPRRR::And - | AluOPRRR::Pack - | AluOPRRR::Packh => 0b0110011, - - AluOPRRR::Addw - | AluOPRRR::Subw - | AluOPRRR::Sllw - | AluOPRRR::Srlw - | AluOPRRR::Sraw - | AluOPRRR::Packw => 0b0111011, - - AluOPRRR::Mul - | AluOPRRR::Mulh - | AluOPRRR::Mulhsu - | AluOPRRR::Mulhu - | AluOPRRR::Div - | AluOPRRR::DivU - | AluOPRRR::Rem - | AluOPRRR::RemU => 0b0110011, - - AluOPRRR::Mulw - | AluOPRRR::Divw - | AluOPRRR::Divuw - | AluOPRRR::Remw - | AluOPRRR::Remuw => 0b0111011, - - AluOPRRR::Adduw => 0b0111011, - AluOPRRR::Andn - | AluOPRRR::Bclr - | AluOPRRR::Bext - | AluOPRRR::Binv - | AluOPRRR::Bset - | AluOPRRR::Clmul - | AluOPRRR::Clmulh - | AluOPRRR::Clmulr - | AluOPRRR::Max - | AluOPRRR::Maxu - | AluOPRRR::Min - | AluOPRRR::Minu - | AluOPRRR::Orn - | AluOPRRR::Rol - | AluOPRRR::Ror - | AluOPRRR::Sh1add - | AluOPRRR::Sh2add - | AluOPRRR::Sh3add - | AluOPRRR::Xnor - | AluOPRRR::CzeroEqz - | AluOPRRR::CzeroNez => 0b0110011, - - AluOPRRR::Rolw - | AluOPRRR::Rorw - | AluOPRRR::Sh2adduw - | AluOPRRR::Sh3adduw - | AluOPRRR::Sh1adduw => 0b0111011, - } - } - - pub const fn funct7(self) -> u32 { - match self { - AluOPRRR::Add => 0b0000000, - AluOPRRR::Sub => 0b0100000, - AluOPRRR::Sll => 0b0000000, - AluOPRRR::Slt => 0b0000000, - AluOPRRR::Sgt => 0b0000000, - AluOPRRR::SltU => 0b0000000, - AluOPRRR::Sgtu => 0b0000000, - - AluOPRRR::Xor => 0b0000000, - AluOPRRR::Srl => 0b0000000, - AluOPRRR::Sra => 0b0100000, - AluOPRRR::Or => 0b0000000, - AluOPRRR::And => 0b0000000, - - AluOPRRR::Addw => 0b0000000, - AluOPRRR::Subw => 0b0100000, - AluOPRRR::Sllw => 0b0000000, - AluOPRRR::Srlw => 0b0000000, - AluOPRRR::Sraw => 0b0100000, - - AluOPRRR::Mul => 0b0000001, - AluOPRRR::Mulh => 0b0000001, - AluOPRRR::Mulhsu => 0b0000001, - AluOPRRR::Mulhu => 0b0000001, - AluOPRRR::Div => 0b0000001, - AluOPRRR::DivU => 0b0000001, - AluOPRRR::Rem => 0b0000001, - AluOPRRR::RemU => 0b0000001, - - AluOPRRR::Mulw => 0b0000001, - AluOPRRR::Divw => 0b0000001, - AluOPRRR::Divuw => 0b0000001, - AluOPRRR::Remw => 0b0000001, - AluOPRRR::Remuw => 0b0000001, - AluOPRRR::Adduw => 0b0000100, - AluOPRRR::Andn => 0b0100000, - AluOPRRR::Bclr => 0b0100100, - AluOPRRR::Bext => 0b0100100, - AluOPRRR::Binv => 0b0110100, - AluOPRRR::Bset => 0b0010100, - AluOPRRR::Clmul => 0b0000101, - AluOPRRR::Clmulh => 0b0000101, - AluOPRRR::Clmulr => 0b0000101, - AluOPRRR::Max => 0b0000101, - AluOPRRR::Maxu => 0b0000101, - AluOPRRR::Min => 0b0000101, - AluOPRRR::Minu => 0b0000101, - AluOPRRR::Orn => 0b0100000, - AluOPRRR::Rol => 0b0110000, - AluOPRRR::Rolw => 0b0110000, - AluOPRRR::Ror => 0b0110000, - AluOPRRR::Rorw => 0b0110000, - AluOPRRR::Sh1add => 0b0010000, - AluOPRRR::Sh1adduw => 0b0010000, - AluOPRRR::Sh2add => 0b0010000, - AluOPRRR::Sh2adduw => 0b0010000, - AluOPRRR::Sh3add => 0b0010000, - AluOPRRR::Sh3adduw => 0b0010000, - AluOPRRR::Xnor => 0b0100000, - - // Zbkb - AluOPRRR::Pack => 0b0000100, - AluOPRRR::Packw => 0b0000100, - AluOPRRR::Packh => 0b0000100, - - // ZiCond - AluOPRRR::CzeroEqz => 0b0000111, - AluOPRRR::CzeroNez => 0b0000111, - } - } - - pub(crate) fn reverse_rs(self) -> bool { - // special case. - // sgt and sgtu is not defined in isa. - // emit should reverse rs1 and rs2. - self == AluOPRRR::Sgt || self == AluOPRRR::Sgtu - } -} - -impl AluOPRRI { - pub(crate) fn option_funct6(self) -> Option { - let x: Option = match self { - Self::Slli => Some(0b00_0000), - Self::Srli => Some(0b00_0000), - Self::Srai => Some(0b01_0000), - Self::Bclri => Some(0b010010), - Self::Bexti => Some(0b010010), - Self::Binvi => Some(0b011010), - Self::Bseti => Some(0b001010), - Self::Rori => Some(0b011000), - Self::SlliUw => Some(0b000010), - _ => None, - }; - x - } - - pub(crate) fn option_funct7(self) -> Option { - let x = match self { - Self::Slliw => Some(0b000_0000), - Self::SrliW => Some(0b000_0000), - Self::Sraiw => Some(0b010_0000), - Self::Roriw => Some(0b0110000), - _ => None, - }; - x - } - - pub(crate) fn imm12(self, imm12: Imm12) -> u32 { - let x = imm12.bits(); - if let Some(func) = self.option_funct6() { - func << 6 | (x & 0b11_1111) - } else if let Some(func) = self.option_funct7() { - func << 5 | (x & 0b1_1111) - } else if let Some(func) = self.option_funct12() { - func - } else { - x - } - } - - pub(crate) fn option_funct12(self) -> Option { - match self { - Self::Clz => Some(0b011000000000), - Self::Clzw => Some(0b011000000000), - Self::Cpop => Some(0b011000000010), - Self::Cpopw => Some(0b011000000010), - Self::Ctz => Some(0b011000000001), - Self::Ctzw => Some(0b011000000001), - Self::Rev8 => Some(0b011010111000), - Self::Sextb => Some(0b011000000100), - Self::Sexth => Some(0b011000000101), - Self::Zexth => Some(0b000010000000), - Self::Orcb => Some(0b001010000111), - Self::Brev8 => Some(0b0110_1000_0111), - _ => None, - } - } - - pub(crate) fn op_name(self) -> &'static str { - match self { - Self::Addi => "addi", - Self::Slti => "slti", - Self::SltiU => "sltiu", - Self::Xori => "xori", - Self::Ori => "ori", - Self::Andi => "andi", - Self::Slli => "slli", - Self::Srli => "srli", - Self::Srai => "srai", - Self::Addiw => "addiw", - Self::Slliw => "slliw", - Self::SrliW => "srliw", - Self::Sraiw => "sraiw", - Self::Bclri => "bclri", - Self::Bexti => "bexti", - Self::Binvi => "binvi", - Self::Bseti => "bseti", - Self::Rori => "rori", - Self::Roriw => "roriw", - Self::SlliUw => "slli.uw", - Self::Clz => "clz", - Self::Clzw => "clzw", - Self::Cpop => "cpop", - Self::Cpopw => "cpopw", - Self::Ctz => "ctz", - Self::Ctzw => "ctzw", - Self::Rev8 => "rev8", - Self::Sextb => "sext.b", - Self::Sexth => "sext.h", - Self::Zexth => "zext.h", - Self::Orcb => "orc.b", - Self::Brev8 => "brev8", - } - } - - pub fn funct3(self) -> u32 { - match self { - AluOPRRI::Addi => 0b000, - AluOPRRI::Slti => 0b010, - AluOPRRI::SltiU => 0b011, - AluOPRRI::Xori => 0b100, - AluOPRRI::Ori => 0b110, - AluOPRRI::Andi => 0b111, - AluOPRRI::Slli => 0b001, - AluOPRRI::Srli => 0b101, - AluOPRRI::Srai => 0b101, - AluOPRRI::Addiw => 0b000, - AluOPRRI::Slliw => 0b001, - AluOPRRI::SrliW => 0b101, - AluOPRRI::Sraiw => 0b101, - AluOPRRI::Bclri => 0b001, - AluOPRRI::Bexti => 0b101, - AluOPRRI::Binvi => 0b001, - AluOPRRI::Bseti => 0b001, - AluOPRRI::Rori => 0b101, - AluOPRRI::Roriw => 0b101, - AluOPRRI::SlliUw => 0b001, - AluOPRRI::Clz => 0b001, - AluOPRRI::Clzw => 0b001, - AluOPRRI::Cpop => 0b001, - AluOPRRI::Cpopw => 0b001, - AluOPRRI::Ctz => 0b001, - AluOPRRI::Ctzw => 0b001, - AluOPRRI::Rev8 => 0b101, - AluOPRRI::Sextb => 0b001, - AluOPRRI::Sexth => 0b001, - AluOPRRI::Zexth => 0b100, - AluOPRRI::Orcb => 0b101, - AluOPRRI::Brev8 => 0b101, - } - } - - pub fn op_code(self) -> u32 { - match self { - AluOPRRI::Addi - | AluOPRRI::Slti - | AluOPRRI::SltiU - | AluOPRRI::Xori - | AluOPRRI::Ori - | AluOPRRI::Andi - | AluOPRRI::Slli - | AluOPRRI::Srli - | AluOPRRI::Srai - | AluOPRRI::Bclri - | AluOPRRI::Bexti - | AluOPRRI::Binvi - | AluOPRRI::Bseti - | AluOPRRI::Rori - | AluOPRRI::Clz - | AluOPRRI::Cpop - | AluOPRRI::Ctz - | AluOPRRI::Rev8 - | AluOPRRI::Sextb - | AluOPRRI::Sexth - | AluOPRRI::Orcb - | AluOPRRI::Brev8 => 0b0010011, - - AluOPRRI::Addiw - | AluOPRRI::Slliw - | AluOPRRI::SrliW - | AluOPRRI::Sraiw - | AluOPRRI::Roriw - | AluOPRRI::SlliUw - | AluOPRRI::Clzw - | AluOPRRI::Cpopw - | AluOPRRI::Ctzw => 0b0011011, - AluOPRRI::Zexth => 0b0111011, - } - } -} - -impl Default for FRM { - fn default() -> Self { - Self::Fcsr - } -} - -/// float rounding mode. -impl FRM { - pub(crate) fn to_static_str(self) -> &'static str { - match self { - FRM::RNE => "rne", - FRM::RTZ => "rtz", - FRM::RDN => "rdn", - FRM::RUP => "rup", - FRM::RMM => "rmm", - FRM::Fcsr => "fcsr", - } - } - - #[inline] - pub(crate) fn bits(self) -> u8 { - match self { - FRM::RNE => 0b000, - FRM::RTZ => 0b001, - FRM::RDN => 0b010, - FRM::RUP => 0b011, - FRM::RMM => 0b100, - FRM::Fcsr => 0b111, - } - } - - pub(crate) fn as_u32(self) -> u32 { - self.bits() as u32 - } -} - -impl FFlagsException { - #[inline] - #[allow(dead_code)] - pub(crate) fn mask(self) -> u32 { - match self { - FFlagsException::NV => 1 << 4, - FFlagsException::DZ => 1 << 3, - FFlagsException::OF => 1 << 2, - FFlagsException::UF => 1 << 1, - FFlagsException::NX => 1 << 0, - } - } -} - -impl LoadOP { - pub(crate) fn op_name(self) -> &'static str { - match self { - Self::Lb => "lb", - Self::Lh => "lh", - Self::Lw => "lw", - Self::Lbu => "lbu", - Self::Lhu => "lhu", - Self::Lwu => "lwu", - Self::Ld => "ld", - Self::Flh => "flh", - Self::Flw => "flw", - Self::Fld => "fld", - } - } - - pub(crate) fn from_type(ty: Type) -> Self { - match ty { - F16 => Self::Flh, - F32 => Self::Flw, - F64 => Self::Fld, - I8 => Self::Lb, - I16 => Self::Lh, - I32 => Self::Lw, - I64 => Self::Ld, - _ => unreachable!(), - } - } - - pub(crate) fn size(&self) -> i64 { - match self { - Self::Lb | Self::Lbu => 1, - Self::Lh | Self::Lhu | Self::Flh => 2, - Self::Lw | Self::Lwu | Self::Flw => 4, - Self::Ld | Self::Fld => 8, - } - } - - pub(crate) fn op_code(self) -> u32 { - match self { - Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => { - 0b0000011 - } - Self::Flh | Self::Flw | Self::Fld => 0b0000111, - } - } - - pub(crate) fn funct3(self) -> u32 { - match self { - Self::Lb => 0b000, - Self::Lh => 0b001, - Self::Lw => 0b010, - Self::Lwu => 0b110, - Self::Lbu => 0b100, - Self::Lhu => 0b101, - Self::Ld => 0b011, - Self::Flh => 0b001, - Self::Flw => 0b010, - Self::Fld => 0b011, - } - } -} - -impl StoreOP { - pub(crate) fn op_name(self) -> &'static str { - match self { - Self::Sb => "sb", - Self::Sh => "sh", - Self::Sw => "sw", - Self::Sd => "sd", - Self::Fsh => "fsh", - Self::Fsw => "fsw", - Self::Fsd => "fsd", - } - } - - pub(crate) fn from_type(ty: Type) -> Self { - match ty { - F16 => Self::Fsh, - F32 => Self::Fsw, - F64 => Self::Fsd, - I8 => Self::Sb, - I16 => Self::Sh, - I32 => Self::Sw, - I64 => Self::Sd, - _ => unreachable!(), - } - } - - pub(crate) fn size(&self) -> i64 { - match self { - Self::Sb => 1, - Self::Sh | Self::Fsh => 2, - Self::Sw | Self::Fsw => 4, - Self::Sd | Self::Fsd => 8, - } - } - - pub(crate) fn op_code(self) -> u32 { - match self { - Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011, - Self::Fsh | Self::Fsw | Self::Fsd => 0b0100111, - } - } - - pub(crate) fn funct3(self) -> u32 { - match self { - Self::Sb => 0b000, - Self::Sh => 0b001, - Self::Sw => 0b010, - Self::Sd => 0b011, - Self::Fsh => 0b001, - Self::Fsw => 0b010, - Self::Fsd => 0b011, - } - } -} - -#[allow(dead_code)] -impl FClassResult { - pub(crate) const fn bit(self) -> u32 { - match self { - FClassResult::NegInfinite => 1 << 0, - FClassResult::NegNormal => 1 << 1, - FClassResult::NegSubNormal => 1 << 2, - FClassResult::NegZero => 1 << 3, - FClassResult::PosZero => 1 << 4, - FClassResult::PosSubNormal => 1 << 5, - FClassResult::PosNormal => 1 << 6, - FClassResult::PosInfinite => 1 << 7, - FClassResult::SNaN => 1 << 8, - FClassResult::QNaN => 1 << 9, - } - } - - #[inline] - pub(crate) const fn is_nan_bits() -> u32 { - Self::SNaN.bit() | Self::QNaN.bit() - } - - #[inline] - pub(crate) fn is_zero_bits() -> u32 { - Self::NegZero.bit() | Self::PosZero.bit() - } - - #[inline] - pub(crate) fn is_infinite_bits() -> u32 { - Self::PosInfinite.bit() | Self::NegInfinite.bit() - } -} - -impl AtomicOP { - #[inline] - pub(crate) fn is_load(self) -> bool { - match self { - Self::LrW | Self::LrD => true, - _ => false, - } - } - - #[inline] - pub(crate) fn op_name(self, amo: AMO) -> String { - let s = match self { - Self::LrW => "lr.w", - Self::ScW => "sc.w", - - Self::AmoswapW => "amoswap.w", - Self::AmoaddW => "amoadd.w", - Self::AmoxorW => "amoxor.w", - Self::AmoandW => "amoand.w", - Self::AmoorW => "amoor.w", - Self::AmominW => "amomin.w", - Self::AmomaxW => "amomax.w", - Self::AmominuW => "amominu.w", - Self::AmomaxuW => "amomaxu.w", - Self::LrD => "lr.d", - Self::ScD => "sc.d", - Self::AmoswapD => "amoswap.d", - Self::AmoaddD => "amoadd.d", - Self::AmoxorD => "amoxor.d", - Self::AmoandD => "amoand.d", - Self::AmoorD => "amoor.d", - Self::AmominD => "amomin.d", - Self::AmomaxD => "amomax.d", - Self::AmominuD => "amominu.d", - Self::AmomaxuD => "amomaxu.d", - }; - format!("{}{}", s, amo.to_static_str()) - } - - #[inline] - pub(crate) fn op_code(self) -> u32 { - 0b0101111 - } - - #[inline] - pub(crate) fn funct7(self, amo: AMO) -> u32 { - self.funct5() << 2 | amo.as_u32() & 0b11 - } - - pub(crate) fn funct3(self) -> u32 { - match self { - AtomicOP::LrW - | AtomicOP::ScW - | AtomicOP::AmoswapW - | AtomicOP::AmoaddW - | AtomicOP::AmoxorW - | AtomicOP::AmoandW - | AtomicOP::AmoorW - | AtomicOP::AmominW - | AtomicOP::AmomaxW - | AtomicOP::AmominuW - | AtomicOP::AmomaxuW => 0b010, - AtomicOP::LrD - | AtomicOP::ScD - | AtomicOP::AmoswapD - | AtomicOP::AmoaddD - | AtomicOP::AmoxorD - | AtomicOP::AmoandD - | AtomicOP::AmoorD - | AtomicOP::AmominD - | AtomicOP::AmomaxD - | AtomicOP::AmominuD - | AtomicOP::AmomaxuD => 0b011, - } - } - - pub(crate) fn funct5(self) -> u32 { - match self { - AtomicOP::LrW => 0b00010, - AtomicOP::ScW => 0b00011, - AtomicOP::AmoswapW => 0b00001, - AtomicOP::AmoaddW => 0b00000, - AtomicOP::AmoxorW => 0b00100, - AtomicOP::AmoandW => 0b01100, - AtomicOP::AmoorW => 0b01000, - AtomicOP::AmominW => 0b10000, - AtomicOP::AmomaxW => 0b10100, - AtomicOP::AmominuW => 0b11000, - AtomicOP::AmomaxuW => 0b11100, - AtomicOP::LrD => 0b00010, - AtomicOP::ScD => 0b00011, - AtomicOP::AmoswapD => 0b00001, - AtomicOP::AmoaddD => 0b00000, - AtomicOP::AmoxorD => 0b00100, - AtomicOP::AmoandD => 0b01100, - AtomicOP::AmoorD => 0b01000, - AtomicOP::AmominD => 0b10000, - AtomicOP::AmomaxD => 0b10100, - AtomicOP::AmominuD => 0b11000, - AtomicOP::AmomaxuD => 0b11100, - } - } - - pub(crate) fn load_op(t: Type) -> Self { - if t.bits() <= 32 { - Self::LrW - } else { - Self::LrD - } - } - - pub(crate) fn store_op(t: Type) -> Self { - if t.bits() <= 32 { - Self::ScW - } else { - Self::ScD - } - } - - /// extract - pub(crate) fn extract(rd: WritableReg, offset: Reg, rs: Reg, ty: Type) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - insts.push(Inst::AluRRR { alu_op: AluOPRRR::Srl, rd, rs1: rs, rs2: offset }); - // - insts.push(Inst::Extend { - rd, - rn: rd.to_reg(), - signed: false, - from_bits: ty.bits() as u8, - to_bits: 64, - }); - insts - } - - /// like extract but sign extend the value. - /// suitable for smax,etc. - pub(crate) fn extract_sext( - rd: WritableReg, - offset: Reg, - rs: Reg, - ty: Type, - ) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - insts.push(Inst::AluRRR { alu_op: AluOPRRR::Srl, rd, rs1: rs, rs2: offset }); - // - insts.push(Inst::Extend { - rd, - rn: rd.to_reg(), - signed: true, - from_bits: ty.bits() as u8, - to_bits: 64, - }); - insts - } - - pub(crate) fn unset( - rd: WritableReg, - tmp: WritableReg, - offset: Reg, - ty: Type, - ) -> SmallInstVec { - assert!(rd != tmp); - let mut insts = SmallInstVec::new(); - insts.extend(Inst::load_int_mask(tmp, ty)); - insts.push(Inst::AluRRR { alu_op: AluOPRRR::Sll, rd: tmp, rs1: tmp.to_reg(), rs2: offset }); - insts.push(Inst::construct_bit_not(tmp, tmp.to_reg())); - insts.push(Inst::AluRRR { alu_op: AluOPRRR::And, rd, rs1: rd.to_reg(), rs2: tmp.to_reg() }); - insts - } - - pub(crate) fn set( - rd: WritableReg, - tmp: WritableReg, - offset: Reg, - rs: Reg, - ty: Type, - ) -> SmallInstVec { - assert!(rd != tmp); - let mut insts = SmallInstVec::new(); - // make rs into tmp. - insts.push(Inst::Extend { - rd: tmp, - rn: rs, - signed: false, - from_bits: ty.bits() as u8, - to_bits: 64, - }); - insts.push(Inst::AluRRR { alu_op: AluOPRRR::Sll, rd: tmp, rs1: tmp.to_reg(), rs2: offset }); - insts.push(Inst::AluRRR { alu_op: AluOPRRR::Or, rd, rs1: rd.to_reg(), rs2: tmp.to_reg() }); - insts - } - - /// Merge reset part of rs into rd. - /// Call this function must make sure that other part of value is already in rd. - pub(crate) fn merge( - rd: WritableReg, - tmp: WritableReg, - offset: Reg, - rs: Reg, - ty: Type, - ) -> SmallInstVec { - let mut insts = Self::unset(rd, tmp, offset, ty); - insts.extend(Self::set(rd, tmp, offset, rs, ty)); - insts - } -} - -///Atomic Memory ordering. -#[derive(Copy, Clone, Debug)] -pub enum AMO { - Relax = 0b00, - Release = 0b01, - Aquire = 0b10, - SeqCst = 0b11, -} - -impl AMO { - pub(crate) fn to_static_str(self) -> &'static str { - match self { - AMO::Relax => "", - AMO::Release => ".rl", - AMO::Aquire => ".aq", - AMO::SeqCst => ".aqrl", - } - } - - pub(crate) fn as_u32(self) -> u32 { - self as u32 - } -} - -impl Inst { - /// fence request bits. - pub(crate) const FENCE_REQ_I: u8 = 1 << 3; - pub(crate) const FENCE_REQ_O: u8 = 1 << 2; - pub(crate) const FENCE_REQ_R: u8 = 1 << 1; - pub(crate) const FENCE_REQ_W: u8 = 1 << 0; - - pub(crate) fn fence_req_to_string(x: u8) -> String { - let mut s = String::default(); - if x & Self::FENCE_REQ_I != 0 { - s.push_str("i"); - } - if x & Self::FENCE_REQ_O != 0 { - s.push_str("o"); - } - if x & Self::FENCE_REQ_R != 0 { - s.push_str("r"); - } - if x & Self::FENCE_REQ_W != 0 { - s.push_str("w"); - } - s - } -} - -pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u32) -> (f32, f32) { - match (signed, out_bits) { - (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.), - (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.), - (true, 32) => (-2147483904.0, 2147483648.0), - (true, 64) => (-9223373136366403584.0, 9223372036854775808.0), - (false, 8) => (-1., u8::max_value() as f32 + 1.), - (false, 16) => (-1., u16::max_value() as f32 + 1.), - (false, 32) => (-1., 4294967296.0), - (false, 64) => (-1., 18446744073709551616.0), - _ => unreachable!(), - } -} - -pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u32) -> (f64, f64) { - match (signed, out_bits) { - (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.), - (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.), - (true, 32) => (-2147483649.0, 2147483648.0), - (true, 64) => (-9223372036854777856.0, 9223372036854775808.0), - (false, 8) => (-1., u8::max_value() as f64 + 1.), - (false, 16) => (-1., u16::max_value() as f64 + 1.), - (false, 32) => (-1., 4294967296.0), - (false, 64) => (-1., 18446744073709551616.0), - _ => unreachable!(), - } -} - -impl CsrRegOP { - pub(crate) fn funct3(self) -> u32 { - match self { - CsrRegOP::CsrRW => 0b001, - CsrRegOP::CsrRS => 0b010, - CsrRegOP::CsrRC => 0b011, - } - } - - pub(crate) fn opcode(self) -> u32 { - 0b1110011 - } - - pub(crate) fn name(self) -> &'static str { - match self { - CsrRegOP::CsrRW => "csrrw", - CsrRegOP::CsrRS => "csrrs", - CsrRegOP::CsrRC => "csrrc", - } - } -} - -impl Display for CsrRegOP { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.name()) - } -} - -impl CsrImmOP { - pub(crate) fn funct3(self) -> u32 { - match self { - CsrImmOP::CsrRWI => 0b101, - CsrImmOP::CsrRSI => 0b110, - CsrImmOP::CsrRCI => 0b111, - } - } - - pub(crate) fn opcode(self) -> u32 { - 0b1110011 - } - - pub(crate) fn name(self) -> &'static str { - match self { - CsrImmOP::CsrRWI => "csrrwi", - CsrImmOP::CsrRSI => "csrrsi", - CsrImmOP::CsrRCI => "csrrci", - } - } -} - -impl Display for CsrImmOP { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.name()) - } -} - -impl CSR { - pub(crate) fn bits(self) -> Imm12 { - Imm12::from_i16(match self { - CSR::Frm => 0x0002, - }) - } - - pub(crate) fn name(self) -> &'static str { - match self { - CSR::Frm => "frm", - } - } -} - -impl Display for CSR { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.name()) - } -} - -impl COpcodeSpace { - pub fn bits(&self) -> u32 { - match self { - COpcodeSpace::C0 => 0b00, - COpcodeSpace::C1 => 0b01, - COpcodeSpace::C2 => 0b10, - } - } -} - -impl CrOp { - pub fn funct4(&self) -> u32 { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - // `c.jr` has the same op/funct4 as C.MV, but RS2 is 0, which is illegal for mv. - CrOp::CMv | CrOp::CJr => 0b1000, - CrOp::CAdd | CrOp::CJalr | CrOp::CEbreak => 0b1001, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CrOp::CMv | CrOp::CAdd | CrOp::CJr | CrOp::CJalr | CrOp::CEbreak => COpcodeSpace::C2, - } - } -} - -impl CaOp { - pub fn funct2(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CaOp::CAnd => 0b11, - CaOp::COr => 0b10, - CaOp::CXor => 0b01, - CaOp::CSub => 0b00, - CaOp::CAddw => 0b01, - CaOp::CSubw => 0b00, - CaOp::CMul => 0b10, - } - } - - pub fn funct6(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CaOp::CAnd | CaOp::COr | CaOp::CXor | CaOp::CSub => 0b100_011, - CaOp::CSubw | CaOp::CAddw | CaOp::CMul => 0b100_111, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CaOp::CAnd - | CaOp::COr - | CaOp::CXor - | CaOp::CSub - | CaOp::CAddw - | CaOp::CSubw - | CaOp::CMul => COpcodeSpace::C1, - } - } -} - -impl CjOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CjOp::CJ => 0b101, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CjOp::CJ => COpcodeSpace::C1, - } - } -} - -impl CiOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CiOp::CAddi | CiOp::CSlli => 0b000, - CiOp::CAddiw | CiOp::CFldsp => 0b001, - CiOp::CLi | CiOp::CLwsp => 0b010, - CiOp::CAddi16sp | CiOp::CLui | CiOp::CLdsp => 0b011, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CiOp::CAddi | CiOp::CAddiw | CiOp::CAddi16sp | CiOp::CLi | CiOp::CLui => { - COpcodeSpace::C1 - } - CiOp::CSlli | CiOp::CLwsp | CiOp::CLdsp | CiOp::CFldsp => COpcodeSpace::C2, - } - } -} - -impl CiwOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CiwOp::CAddi4spn => 0b000, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CiwOp::CAddi4spn => COpcodeSpace::C0, - } - } -} - -impl CbOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CbOp::CSrli | CbOp::CSrai | CbOp::CAndi => 0b100, - } - } - - pub fn funct2(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CbOp::CSrli => 0b00, - CbOp::CSrai => 0b01, - CbOp::CAndi => 0b10, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CbOp::CSrli | CbOp::CSrai | CbOp::CAndi => COpcodeSpace::C1, - } - } -} - -impl CssOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CssOp::CFsdsp => 0b101, - CssOp::CSwsp => 0b110, - CssOp::CSdsp => 0b111, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CssOp::CSwsp | CssOp::CSdsp | CssOp::CFsdsp => COpcodeSpace::C2, - } - } -} - -impl CsOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CsOp::CFsd => 0b101, - CsOp::CSw => 0b110, - CsOp::CSd => 0b111, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CsOp::CSw | CsOp::CSd | CsOp::CFsd => COpcodeSpace::C0, - } - } -} - -impl ClOp { - pub fn funct3(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - ClOp::CFld => 0b001, - ClOp::CLw => 0b010, - ClOp::CLd => 0b011, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - ClOp::CLw | ClOp::CLd | ClOp::CFld => COpcodeSpace::C0, - } - } -} - -impl CsznOp { - pub fn funct6(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CsznOp::CNot - | CsznOp::CZextw - | CsznOp::CZextb - | CsznOp::CZexth - | CsznOp::CSextb - | CsznOp::CSexth => 0b100_111, - } - } - - pub fn funct5(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - CsznOp::CNot => 0b11_101, - CsznOp::CZextb => 0b11_000, - CsznOp::CZexth => 0b11_010, - CsznOp::CZextw => 0b11_100, - CsznOp::CSextb => 0b11_001, - CsznOp::CSexth => 0b11_011, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - CsznOp::CNot - | CsznOp::CZextb - | CsznOp::CZexth - | CsznOp::CZextw - | CsznOp::CSextb - | CsznOp::CSexth => COpcodeSpace::C1, - } - } -} - -impl ZcbMemOp { - pub fn funct6(&self) -> u32 { - // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes - match self { - ZcbMemOp::CLbu => 0b100_000, - // These two opcodes are differentiated in the imm field of the instruction. - ZcbMemOp::CLhu | ZcbMemOp::CLh => 0b100_001, - ZcbMemOp::CSb => 0b100_010, - ZcbMemOp::CSh => 0b100_011, - } - } - - pub fn imm_bits(&self) -> u8 { - match self { - ZcbMemOp::CLhu | ZcbMemOp::CLh | ZcbMemOp::CSh => 1, - ZcbMemOp::CLbu | ZcbMemOp::CSb => 2, - } - } - - pub fn op(&self) -> COpcodeSpace { - // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap - match self { - ZcbMemOp::CLbu | ZcbMemOp::CLhu | ZcbMemOp::CLh | ZcbMemOp::CSb | ZcbMemOp::CSh => { - COpcodeSpace::C0 - } - } - } -} diff --git a/hbcb/src/inst/emit.rs b/hbcb/src/inst/emit.rs deleted file mode 100644 index 96e21a1..0000000 --- a/hbcb/src/inst/emit.rs +++ /dev/null @@ -1,2685 +0,0 @@ -//! Riscv64 ISA: binary code emission. - -use crate::ir::{self, LibCall, TrapCode}; -use crate::inst::*; -use crate::lower::isle::generated_code::{ - CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp, -}; -use cranelift_control::ControlPlane; - -pub struct EmitInfo { - shared_flag: settings::Flags, - isa_flags: super::super::riscv_settings::Flags, -} - -impl EmitInfo { - pub(crate) fn new( - shared_flag: settings::Flags, - isa_flags: super::super::riscv_settings::Flags, - ) -> Self { - Self { - shared_flag, - isa_flags, - } - } -} - -pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 { - u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() -} - -pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 { - let real_reg = m.to_real_reg().unwrap().hw_enc(); - debug_assert!(real_reg >= 8 && real_reg < 16); - let compressed_reg = real_reg - 8; - u32::try_from(compressed_reg).unwrap() -} - -#[derive(Clone, Debug, PartialEq, Default)] -pub enum EmitVState { - #[default] - Unknown, - Known(VState), -} - -/// State carried between emissions of a sequence of instructions. -#[derive(Default, Clone, Debug)] -pub struct EmitState { - /// The user stack map for the upcoming instruction, as provided to - /// `pre_safepoint()`. - user_stack_map: Option, - - /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and - /// optimized away at compiletime. See [cranelift_control]. - ctrl_plane: ControlPlane, - - /// Vector State - /// Controls the current state of the vector unit at the emission point. - vstate: EmitVState, - - frame_layout: FrameLayout, -} - -impl EmitState { - fn take_stack_map(&mut self) -> Option { - self.user_stack_map.take() - } -} - -impl MachInstEmitState for EmitState { - fn new( - abi: &Callee, - ctrl_plane: ControlPlane, - ) -> Self { - EmitState { - user_stack_map: None, - ctrl_plane, - vstate: EmitVState::Unknown, - frame_layout: abi.frame_layout().clone(), - } - } - - fn pre_safepoint(&mut self, user_stack_map: Option) { - self.user_stack_map = user_stack_map; - } - - fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { - &mut self.ctrl_plane - } - - fn take_ctrl_plane(self) -> ControlPlane { - self.ctrl_plane - } - - fn on_new_block(&mut self) { - // Reset the vector state. - self.vstate = EmitVState::Unknown; - } - - fn frame_layout(&self) -> &FrameLayout { - &self.frame_layout - } -} - -impl Inst { - /// Load int mask. - /// If ty is int then 0xff in rd. - pub(crate) fn load_int_mask(rd: Writable, ty: Type) -> SmallInstVec { - let mut insts = SmallInstVec::new(); - assert!(ty.is_int() && ty.bits() <= 64); - match ty { - I64 => { - insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1))); - } - I32 | I16 => { - insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1))); - insts.push(Inst::Extend { - rd: rd, - rn: rd.to_reg(), - signed: false, - from_bits: ty.bits() as u8, - to_bits: 64, - }); - } - I8 => { - insts.push(Inst::load_imm12(rd, Imm12::from_i16(255))); - } - _ => unreachable!("ty:{:?}", ty), - } - insts - } - /// inverse all bit - pub(crate) fn construct_bit_not(rd: Writable, rs: Reg) -> Inst { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Xori, - rd, - rs, - imm12: Imm12::from_i16(-1), - } - } - - /// Returns Some(VState) if this instruction is expecting a specific vector state - /// before emission. - fn expected_vstate(&self) -> Option<&VState> { - match self { - Inst::Nop0 - | Inst::Nop4 - | Inst::BrTable { .. } - | Inst::Auipc { .. } - | Inst::Fli { .. } - | Inst::Lui { .. } - | Inst::LoadInlineConst { .. } - | Inst::AluRRR { .. } - | Inst::FpuRRR { .. } - | Inst::AluRRImm12 { .. } - | Inst::CsrReg { .. } - | Inst::CsrImm { .. } - | Inst::Load { .. } - | Inst::Store { .. } - | Inst::Args { .. } - | Inst::Rets { .. } - | Inst::Ret { .. } - | Inst::Extend { .. } - | Inst::Call { .. } - | Inst::CallInd { .. } - | Inst::ReturnCall { .. } - | Inst::ReturnCallInd { .. } - | Inst::Jal { .. } - | Inst::CondBr { .. } - | Inst::LoadExtName { .. } - | Inst::ElfTlsGetAddr { .. } - | Inst::LoadAddr { .. } - | Inst::Mov { .. } - | Inst::MovFromPReg { .. } - | Inst::Fence { .. } - | Inst::EBreak - | Inst::Udf { .. } - | Inst::FpuRR { .. } - | Inst::FpuRRRR { .. } - | Inst::Jalr { .. } - | Inst::Atomic { .. } - | Inst::Select { .. } - | Inst::AtomicCas { .. } - | Inst::RawData { .. } - | Inst::AtomicStore { .. } - | Inst::AtomicLoad { .. } - | Inst::AtomicRmwLoop { .. } - | Inst::TrapIf { .. } - | Inst::Unwind { .. } - | Inst::DummyUse { .. } - | Inst::Popcnt { .. } - | Inst::Cltz { .. } - | Inst::Brev8 { .. } - | Inst::StackProbeLoop { .. } => None, - - // VecSetState does not expect any vstate, rather it updates it. - Inst::VecSetState { .. } => None, - - // `vmv` instructions copy a set of registers and ignore vstate. - Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None, - - Inst::VecAluRR { vstate, .. } | - Inst::VecAluRRR { vstate, .. } | - Inst::VecAluRRRR { vstate, .. } | - Inst::VecAluRImm5 { vstate, .. } | - Inst::VecAluRRImm5 { vstate, .. } | - Inst::VecAluRRRImm5 { vstate, .. } | - // TODO: Unit-stride loads and stores only need the AVL to be correct, not - // the full vtype. A future optimization could be to decouple these two when - // updating vstate. This would allow us to avoid emitting a VecSetState in - // some cases. - Inst::VecLoad { vstate, .. } - | Inst::VecStore { vstate, .. } => Some(vstate), - } - } -} - -impl MachInstEmit for Inst { - type State = EmitState; - type Info = EmitInfo; - - fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { - // Check if we need to update the vector state before emitting this instruction - if let Some(expected) = self.expected_vstate() { - if state.vstate != EmitVState::Known(*expected) { - // Update the vector state. - Inst::VecSetState { - rd: writable_zero_reg(), - vstate: *expected, - } - .emit(sink, emit_info, state); - } - } - - // N.B.: we *must* not exceed the "worst-case size" used to compute - // where to insert islands, except when islands are explicitly triggered - // (with an `EmitIsland`). We check this in debug builds. This is `mut` - // to allow disabling the check for `JTSequence`, which is always - // emitted following an `EmitIsland`. - let mut start_off = sink.cur_offset(); - - // First try to emit this as a compressed instruction - let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off); - if res.is_none() { - // If we can't lets emit it as a normal instruction - self.emit_uncompressed(sink, emit_info, state, &mut start_off); - } - - // We exclude br_table and return call from these checks since they emit - // their own islands, and thus are allowed to exceed the worst case size. - if !matches!( - self, - Inst::BrTable { .. } | Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } - ) { - let end_off = sink.cur_offset(); - assert!( - (end_off - start_off) <= Inst::worst_case_size(), - "Inst:{:?} length:{} worst_case_size:{}", - self, - end_off - start_off, - Inst::worst_case_size() - ); - } - } - - fn pretty_print_inst(&self, state: &mut Self::State) -> String { - self.print_with_state(state) - } -} - -impl Inst { - /// Tries to emit an instruction as compressed, if we can't return false. - fn try_emit_compressed( - &self, - sink: &mut MachBuffer, - emit_info: &EmitInfo, - state: &mut EmitState, - start_off: &mut u32, - ) -> Option<()> { - let has_m = emit_info.isa_flags.has_m(); - let has_zba = emit_info.isa_flags.has_zba(); - let has_zbb = emit_info.isa_flags.has_zbb(); - let has_zca = emit_info.isa_flags.has_zca(); - let has_zcb = emit_info.isa_flags.has_zcb(); - let has_zcd = emit_info.isa_flags.has_zcd(); - - // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca - // to be enabled, so check it early. - if !has_zca { - return None; - } - - fn reg_is_compressible(r: Reg) -> bool { - r.to_real_reg() - .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16) - .unwrap_or(false) - } - - match *self { - // C.ADD - Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd, - rs1, - rs2, - } if (rd.to_reg() == rs1 || rd.to_reg() == rs2) - && rs1 != zero_reg() - && rs2 != zero_reg() => - { - // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can - // also swap rs1 with rs2 and we get an equivalent instruction. i.e we - // can also compress `add rd, rs, rd` into `c.add rd, rs`. - let src = if rd.to_reg() == rs1 { rs2 } else { rs1 }; - - sink.put2(encode_cr_type(CrOp::CAdd, rd, src)); - } - - // C.MV - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi | AluOPRRI::Ori, - rd, - rs, - imm12, - } if rd.to_reg() != rs - && rd.to_reg() != zero_reg() - && rs != zero_reg() - && imm12.as_i16() == 0 => - { - sink.put2(encode_cr_type(CrOp::CMv, rd, rs)); - } - - // CA Ops - Inst::AluRRR { - alu_op: - alu_op @ (AluOPRRR::And - | AluOPRRR::Or - | AluOPRRR::Xor - | AluOPRRR::Addw - | AluOPRRR::Mul), - rd, - rs1, - rs2, - } if (rd.to_reg() == rs1 || rd.to_reg() == rs2) - && reg_is_compressible(rs1) - && reg_is_compressible(rs2) => - { - let op = match alu_op { - AluOPRRR::And => CaOp::CAnd, - AluOPRRR::Or => CaOp::COr, - AluOPRRR::Xor => CaOp::CXor, - AluOPRRR::Addw => CaOp::CAddw, - AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul, - _ => return None, - }; - // The canonical expansion for these instruction has `rd == rs1`, but - // these are all commutative operations, so we can swap the operands. - let src = if rd.to_reg() == rs1 { rs2 } else { rs1 }; - - sink.put2(encode_ca_type(op, rd, src)); - } - - // The sub instructions are non commutative, so we can't swap the operands. - Inst::AluRRR { - alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw), - rd, - rs1, - rs2, - } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => { - let op = match alu_op { - AluOPRRR::Sub => CaOp::CSub, - AluOPRRR::Subw => CaOp::CSubw, - _ => return None, - }; - sink.put2(encode_ca_type(op, rd, rs2)); - } - - // c.j - // - // We don't have a separate JAL as that is only available in RV32C - Inst::Jal { label } => { - sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump); - sink.add_uncond_branch(*start_off, *start_off + 2, label); - sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO)); - } - - // c.jr - Inst::Jalr { rd, base, offset } - if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 => - { - sink.put2(encode_cr2_type(CrOp::CJr, base)); - } - - // c.jalr - Inst::Jalr { rd, base, offset } - if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 => - { - sink.put2(encode_cr2_type(CrOp::CJalr, base)); - } - - // c.ebreak - Inst::EBreak => { - sink.put2(encode_cr_type( - CrOp::CEbreak, - writable_zero_reg(), - zero_reg(), - )); - } - - // c.unimp - Inst::Udf { trap_code } => { - sink.add_trap(trap_code); - sink.put2(0x0000); - } - // c.addi16sp - // - // c.addi16sp shares the opcode with c.lui, but has a destination field of x2. - // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2), - // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used - // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp - // is only valid when nzimm≠0; the code point with nzimm=0 is reserved. - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs, - imm12, - } if rd.to_reg() == rs - && rs == stack_reg() - && imm12.as_i16() != 0 - && (imm12.as_i16() % 16) == 0 - && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() => - { - let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap(); - sink.put2(encode_c_addi16sp(imm6)); - } - - // c.addi4spn - // - // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero - // immediate, scaled by 4, to the stack pointer, x2, and writes the result to - // rd. This instruction is used to generate pointers to stack-allocated variables - // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0; - // the code points with nzuimm=0 are reserved. - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs, - imm12, - } if reg_is_compressible(rd.to_reg()) - && rs == stack_reg() - && imm12.as_i16() != 0 - && (imm12.as_i16() % 4) == 0 - && u8::try_from(imm12.as_i16() / 4).is_ok() => - { - let imm = u8::try_from(imm12.as_i16() / 4).unwrap(); - sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm)); - } - - // c.li - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs, - imm12, - } if rd.to_reg() != zero_reg() && rs == zero_reg() => { - let imm6 = Imm6::maybe_from_imm12(imm12)?; - sink.put2(encode_ci_type(CiOp::CLi, rd, imm6)); - } - - // c.addi - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs, - imm12, - } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => { - let imm6 = Imm6::maybe_from_imm12(imm12)?; - sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6)); - } - - // c.addiw - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addiw, - rd, - rs, - imm12, - } if rd.to_reg() == rs && rs != zero_reg() => { - let imm6 = Imm6::maybe_from_imm12(imm12)?; - sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6)); - } - - // c.lui - // - // c.lui loads the non-zero 6-bit immediate field into bits 17–12 - // of the destination register, clears the bottom 12 bits, and - // sign-extends bit 17 into all higher bits of the destination. - Inst::Lui { rd, imm: imm20 } - if rd.to_reg() != zero_reg() - && rd.to_reg() != stack_reg() - && imm20.as_i32() != 0 => - { - // Check that the top bits are sign extended - let imm = imm20.as_i32() << 14 >> 14; - if imm != imm20.as_i32() { - return None; - } - let imm6 = Imm6::maybe_from_i32(imm)?; - sink.put2(encode_ci_type(CiOp::CLui, rd, imm6)); - } - - // c.slli - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd, - rs, - imm12, - } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => { - // The shift amount is unsigned, but we encode it as signed. - let shift = imm12.as_i16() & 0x3f; - let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap(); - sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6)); - } - - // c.srli / c.srai - Inst::AluRRImm12 { - alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai), - rd, - rs, - imm12, - } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => { - let op = match op { - AluOPRRI::Srli => CbOp::CSrli, - AluOPRRI::Srai => CbOp::CSrai, - _ => unreachable!(), - }; - - // The shift amount is unsigned, but we encode it as signed. - let shift = imm12.as_i16() & 0x3f; - let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap(); - sink.put2(encode_cb_type(op, rd, imm6)); - } - - // c.zextb - // - // This is an alias for `andi rd, rd, 0xff` - Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd, - rs, - imm12, - } if has_zcb - && rd.to_reg() == rs - && reg_is_compressible(rs) - && imm12.as_i16() == 0xff => - { - sink.put2(encode_cszn_type(CsznOp::CZextb, rd)); - } - - // c.andi - Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd, - rs, - imm12, - } if rd.to_reg() == rs && reg_is_compressible(rs) => { - let imm6 = Imm6::maybe_from_imm12(imm12)?; - sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6)); - } - - // Stack Based Loads - Inst::Load { - rd, - op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld), - from, - flags, - } if from.get_base_register() == Some(stack_reg()) - && (from.get_offset_with_state(state) % op.size()) == 0 => - { - // We encode the offset in multiples of the load size. - let offset = from.get_offset_with_state(state); - let imm6 = u8::try_from(offset / op.size()) - .ok() - .and_then(Uimm6::maybe_from_u8)?; - - // Some additional constraints on these instructions. - // - // Integer loads are not allowed to target x0, but floating point loads - // are, since f0 is not a special register. - // - // Floating point loads are not included in the base Zca extension - // but in a separate Zcd extension. Both of these are part of the C Extension. - let rd_is_zero = rd.to_reg() == zero_reg(); - let op = match op { - LoadOP::Lw if !rd_is_zero => CiOp::CLwsp, - LoadOP::Ld if !rd_is_zero => CiOp::CLdsp, - LoadOP::Fld if has_zcd => CiOp::CFldsp, - _ => return None, - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - sink.put2(encode_ci_sp_load(op, rd, imm6)); - } - - // Regular Loads - Inst::Load { - rd, - op: - op - @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh), - from, - flags, - } if reg_is_compressible(rd.to_reg()) - && from - .get_base_register() - .map(reg_is_compressible) - .unwrap_or(false) - && (from.get_offset_with_state(state) % op.size()) == 0 => - { - let base = from.get_base_register().unwrap(); - - // We encode the offset in multiples of the store size. - let offset = from.get_offset_with_state(state); - let offset = u8::try_from(offset / op.size()).ok()?; - - // We mix two different formats here. - // - // c.lw / c.ld / c.fld instructions are available in the standard Zca - // extension using the CL format. - // - // c.lbu / c.lhu / c.lh are only available in the Zcb extension and - // are also encoded differently. Technically they each have a different - // format, but they are similar enough that we can group them. - let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh); - let encoded = if is_zcb_load { - if !has_zcb { - return None; - } - - let op = match op { - LoadOP::Lbu => ZcbMemOp::CLbu, - LoadOP::Lhu => ZcbMemOp::CLhu, - LoadOP::Lh => ZcbMemOp::CLh, - _ => unreachable!(), - }; - - // Byte stores & loads have 2 bits of immediate offset. Halfword stores - // and loads only have 1 bit. - let imm2 = Uimm2::maybe_from_u8(offset)?; - if (offset & !((1 << op.imm_bits()) - 1)) != 0 { - return None; - } - - encode_zcbmem_load(op, rd, base, imm2) - } else { - // Floating point loads are not included in the base Zca extension - // but in a separate Zcd extension. Both of these are part of the C Extension. - let op = match op { - LoadOP::Lw => ClOp::CLw, - LoadOP::Ld => ClOp::CLd, - LoadOP::Fld if has_zcd => ClOp::CFld, - _ => return None, - }; - let imm5 = Uimm5::maybe_from_u8(offset)?; - - encode_cl_type(op, rd, base, imm5) - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - sink.put2(encoded); - } - - // Stack Based Stores - Inst::Store { - src, - op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd), - to, - flags, - } if to.get_base_register() == Some(stack_reg()) - && (to.get_offset_with_state(state) % op.size()) == 0 => - { - // We encode the offset in multiples of the store size. - let offset = to.get_offset_with_state(state); - let imm6 = u8::try_from(offset / op.size()) - .ok() - .and_then(Uimm6::maybe_from_u8)?; - - // Floating point stores are not included in the base Zca extension - // but in a separate Zcd extension. Both of these are part of the C Extension. - let op = match op { - StoreOP::Sw => CssOp::CSwsp, - StoreOP::Sd => CssOp::CSdsp, - StoreOP::Fsd if has_zcd => CssOp::CFsdsp, - _ => return None, - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - sink.put2(encode_css_type(op, src, imm6)); - } - - // Regular Stores - Inst::Store { - src, - op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb), - to, - flags, - } if reg_is_compressible(src) - && to - .get_base_register() - .map(reg_is_compressible) - .unwrap_or(false) - && (to.get_offset_with_state(state) % op.size()) == 0 => - { - let base = to.get_base_register().unwrap(); - - // We encode the offset in multiples of the store size. - let offset = to.get_offset_with_state(state); - let offset = u8::try_from(offset / op.size()).ok()?; - - // We mix two different formats here. - // - // c.sw / c.sd / c.fsd instructions are available in the standard Zca - // extension using the CL format. - // - // c.sb / c.sh are only available in the Zcb extension and are also - // encoded differently. - let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb); - let encoded = if is_zcb_store { - if !has_zcb { - return None; - } - - let op = match op { - StoreOP::Sh => ZcbMemOp::CSh, - StoreOP::Sb => ZcbMemOp::CSb, - _ => unreachable!(), - }; - - // Byte stores & loads have 2 bits of immediate offset. Halfword stores - // and loads only have 1 bit. - let imm2 = Uimm2::maybe_from_u8(offset)?; - if (offset & !((1 << op.imm_bits()) - 1)) != 0 { - return None; - } - - encode_zcbmem_store(op, src, base, imm2) - } else { - // Floating point stores are not included in the base Zca extension - // but in a separate Zcd extension. Both of these are part of the C Extension. - let op = match op { - StoreOP::Sw => CsOp::CSw, - StoreOP::Sd => CsOp::CSd, - StoreOP::Fsd if has_zcd => CsOp::CFsd, - _ => return None, - }; - let imm5 = Uimm5::maybe_from_u8(offset)?; - - encode_cs_type(op, src, base, imm5) - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - sink.put2(encoded); - } - - // c.not - // - // This is an alias for `xori rd, rd, -1` - Inst::AluRRImm12 { - alu_op: AluOPRRI::Xori, - rd, - rs, - imm12, - } if has_zcb - && rd.to_reg() == rs - && reg_is_compressible(rs) - && imm12.as_i16() == -1 => - { - sink.put2(encode_cszn_type(CsznOp::CNot, rd)); - } - - // c.sext.b / c.sext.h / c.zext.h - // - // These are all the extend instructions present in `Zcb`, they - // also require `Zbb` since they aren't available in the base ISA. - Inst::AluRRImm12 { - alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth), - rd, - rs, - imm12, - } if has_zcb - && has_zbb - && rd.to_reg() == rs - && reg_is_compressible(rs) - && imm12.as_i16() == 0 => - { - let op = match alu_op { - AluOPRRI::Sextb => CsznOp::CSextb, - AluOPRRI::Sexth => CsznOp::CSexth, - AluOPRRI::Zexth => CsznOp::CZexth, - _ => unreachable!(), - }; - sink.put2(encode_cszn_type(op, rd)); - } - - // c.zext.w - // - // This is an alias for `add.uw rd, rd, zero` - Inst::AluRRR { - alu_op: AluOPRRR::Adduw, - rd, - rs1, - rs2, - } if has_zcb - && has_zba - && rd.to_reg() == rs1 - && reg_is_compressible(rs1) - && rs2 == zero_reg() => - { - sink.put2(encode_cszn_type(CsznOp::CZextw, rd)); - } - - _ => return None, - } - - return Some(()); - } - - fn emit_uncompressed( - &self, - sink: &mut MachBuffer, - emit_info: &EmitInfo, - state: &mut EmitState, - start_off: &mut u32, - ) { - match self { - &Inst::Nop0 => { - // do nothing - } - // Addi x0, x0, 0 - &Inst::Nop4 => { - let x = Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: Writable::from_reg(zero_reg()), - rs: zero_reg(), - imm12: Imm12::ZERO, - }; - x.emit(sink, emit_info, state) - } - &Inst::RawData { ref data } => { - // Right now we only put a u32 or u64 in this instruction. - // It is not very long, no need to check if need `emit_island`. - // If data is very long , this is a bug because RawData is typically - // use to load some data and rely on some position in the code stream. - // and we may exceed `Inst::worst_case_size`. - // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612. - sink.put_data(&data[..]); - } - &Inst::Lui { rd, ref imm } => { - let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12); - sink.put4(x); - } - &Inst::Fli { rd, ty, imm } => { - sink.put4(encode_fli(ty, imm, rd)); - } - &Inst::LoadInlineConst { rd, ty, imm } => { - let data = &imm.to_le_bytes()[..ty.bytes() as usize]; - - let label_data: MachLabel = sink.get_label(); - let label_end: MachLabel = sink.get_label(); - - // Load into rd - Inst::Load { - rd, - op: LoadOP::from_type(ty), - flags: MemFlags::new(), - from: AMode::Label(label_data), - } - .emit(sink, emit_info, state); - - // Jump over the inline pool - Inst::gen_jump(label_end).emit(sink, emit_info, state); - - // Emit the inline data - sink.bind_label(label_data, &mut state.ctrl_plane); - Inst::RawData { data: data.into() }.emit(sink, emit_info, state); - - sink.bind_label(label_end, &mut state.ctrl_plane); - } - &Inst::FpuRR { - alu_op, - width, - frm, - rd, - rs, - } => { - if alu_op.is_convert_to_int() { - sink.add_trap(TrapCode::BadConversionToInteger); - } - sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs)); - } - &Inst::FpuRRRR { - alu_op, - rd, - rs1, - rs2, - rs3, - frm, - width, - } => { - sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3)); - } - &Inst::FpuRRR { - alu_op, - width, - frm, - rd, - rs1, - rs2, - } => { - sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2)); - } - &Inst::Unwind { ref inst } => { - sink.add_unwind(inst.clone()); - } - &Inst::DummyUse { .. } => { - // This has already been handled by Inst::allocate. - } - &Inst::AluRRR { - alu_op, - rd, - rs1, - rs2, - } => { - let (rs1, rs2) = if alu_op.reverse_rs() { - (rs2, rs1) - } else { - (rs1, rs2) - }; - - sink.put4(encode_r_type( - alu_op.op_code(), - rd, - alu_op.funct3(), - rs1, - rs2, - alu_op.funct7(), - )); - } - &Inst::AluRRImm12 { - alu_op, - rd, - rs, - imm12, - } => { - let x = alu_op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | alu_op.funct3() << 12 - | reg_to_gpr_num(rs) << 15 - | alu_op.imm12(imm12) << 20; - sink.put4(x); - } - &Inst::CsrReg { op, rd, rs, csr } => { - sink.put4(encode_csr_reg(op, rd, rs, csr)); - } - &Inst::CsrImm { op, rd, csr, imm } => { - sink.put4(encode_csr_imm(op, rd, csr, imm)); - } - &Inst::Load { - rd, - op, - from, - flags, - } => { - let base = from.get_base_register(); - let offset = from.get_offset_with_state(state); - let offset_imm12 = Imm12::maybe_from_i64(offset); - let label = from.get_label_with_sink(sink); - - let (addr, imm12) = match (base, offset_imm12, label) { - // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it. - (Some(base), Some(imm12), None) => (base, imm12), - - // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a - // register and load from that. - (Some(_), None, None) => { - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state); - (tmp.to_reg(), Imm12::ZERO) - } - - // If the AMode contains a label we can emit an internal relocation that gets - // resolved with the correct address later. - (None, Some(imm), Some(label)) => { - debug_assert_eq!(imm.as_i16(), 0); - - // Get the current PC. - sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); - Inst::Auipc { - rd, - imm: Imm20::ZERO, - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // Emit a relocation for the load. This patches the offset into the instruction. - sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); - - // Imm12 here is meaningless since it's going to get replaced. - (rd.to_reg(), Imm12::ZERO) - } - - // These cases are impossible with the current AModes that we have. We either - // always have a register, or always have a label. Never both, and never neither. - (None, None, None) - | (None, Some(_), None) - | (Some(_), None, Some(_)) - | (Some(_), Some(_), Some(_)) - | (None, None, Some(_)) => { - unreachable!("Invalid load address") - } - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - - sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); - } - &Inst::Store { op, src, flags, to } => { - let base = to.get_base_register(); - let offset = to.get_offset_with_state(state); - let offset_imm12 = Imm12::maybe_from_i64(offset); - - let (addr, imm12) = match (base, offset_imm12) { - // If the offset fits into an imm12 we can directly encode it. - (Some(base), Some(imm12)) => (base, imm12), - // Otherwise load the address it into a reg and load from it. - _ => { - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state); - (tmp.to_reg(), Imm12::ZERO) - } - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - - sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); - } - &Inst::Args { .. } | &Inst::Rets { .. } => { - // Nothing: this is a pseudoinstruction that serves - // only to constrain registers at a certain point. - } - &Inst::Ret {} => { - // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent - // `jalr x0, x1, 0` that jumps to the return address. - Inst::Jalr { - rd: writable_zero_reg(), - base: link_reg(), - offset: Imm12::ZERO, - } - .emit(sink, emit_info, state); - } - - &Inst::Extend { - rd, - rn, - signed, - from_bits, - to_bits: _to_bits, - } => { - let mut insts = SmallInstVec::new(); - let shift_bits = (64 - from_bits) as i16; - let is_u8 = || from_bits == 8 && signed == false; - if is_u8() { - // special for u8. - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd, - rs: rn, - imm12: Imm12::from_i16(255), - }); - } else { - insts.push(Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd, - rs: rn, - imm12: Imm12::from_i16(shift_bits), - }); - insts.push(Inst::AluRRImm12 { - alu_op: if signed { - AluOPRRI::Srai - } else { - AluOPRRI::Srli - }, - rd, - rs: rd.to_reg(), - imm12: Imm12::from_i16(shift_bits), - }); - } - insts - .into_iter() - .for_each(|i| i.emit(sink, emit_info, state)); - } - - &Inst::Call { ref info } => { - sink.add_call_site(); - sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0); - - Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0) - .into_iter() - .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); - - if let Some(s) = state.take_stack_map() { - let offset = sink.cur_offset(); - sink.push_user_stack_map(state, offset, s); - } - - let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap(); - if callee_pop_size > 0 { - for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) { - inst.emit(sink, emit_info, state); - } - } - } - &Inst::CallInd { ref info } => { - Inst::Jalr { - rd: writable_link_reg(), - base: info.dest, - offset: Imm12::ZERO, - } - .emit(sink, emit_info, state); - - if let Some(s) = state.take_stack_map() { - let offset = sink.cur_offset(); - sink.push_user_stack_map(state, offset, s); - } - - sink.add_call_site(); - - let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap(); - if callee_pop_size > 0 { - for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) { - inst.emit(sink, emit_info, state); - } - } - } - - &Inst::ReturnCall { ref info } => { - emit_return_call_common_sequence(sink, emit_info, state, info); - - sink.add_call_site(); - sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0); - Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) - .into_iter() - .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); - } - - &Inst::ReturnCallInd { ref info } => { - emit_return_call_common_sequence(sink, emit_info, state, &info); - - Inst::Jalr { - rd: writable_zero_reg(), - base: info.dest, - offset: Imm12::ZERO, - } - .emit(sink, emit_info, state); - } - &Inst::Jal { label } => { - sink.use_label_at_offset(*start_off, label, LabelUse::Jal20); - sink.add_uncond_branch(*start_off, *start_off + 4, label); - sink.put4(0b1101111); - } - &Inst::CondBr { - taken, - not_taken, - kind, - } => { - match taken { - CondBrTarget::Label(label) => { - let code = kind.emit(); - let code_inverse = kind.inverse().emit().to_le_bytes(); - sink.use_label_at_offset(*start_off, label, LabelUse::B12); - sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse); - sink.put4(code); - } - CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"), - } - - match not_taken { - CondBrTarget::Label(label) => { - Inst::gen_jump(label).emit(sink, emit_info, state) - } - CondBrTarget::Fallthrough => {} - }; - } - - &Inst::Mov { rd, rm, ty } => { - debug_assert_eq!(rd.to_reg().class(), rm.class()); - if rd.to_reg() == rm { - return; - } - - match rm.class() { - RegClass::Int => Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: rd, - rs: rm, - imm12: Imm12::ZERO, - }, - RegClass::Float => Inst::FpuRRR { - alu_op: FpuOPRRR::Fsgnj, - width: FpuOPWidth::try_from(ty).unwrap(), - frm: FRM::RNE, - rd: rd, - rs1: rm, - rs2: rm, - }, - RegClass::Vector => Inst::VecAluRRImm5 { - op: VecAluOpRRImm5::VmvrV, - vd: rd, - vs2: rm, - // Imm 0 means copy 1 register. - imm: Imm5::maybe_from_i8(0).unwrap(), - mask: VecOpMasking::Disabled, - // Vstate for this instruction is ignored. - vstate: VState::from_type(ty), - }, - } - .emit(sink, emit_info, state); - } - - &Inst::MovFromPReg { rd, rm } => { - Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state); - } - - &Inst::BrTable { - index, - tmp1, - tmp2, - ref targets, - } => { - let ext_index = writable_spilltmp_reg(); - - let label_compute_target = sink.get_label(); - - // The default target is passed in as the 0th element of `targets` - // separate it here for clarity. - let default_target = targets[0]; - let targets = &targets[1..]; - - // We are going to potentially emit a large amount of instructions, so ensure that we emit an island - // now if we need one. - // - // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions. - // Check if we need to emit a jump table here to support that jump. - let inst_count = 12 + (targets.len() * 2); - let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32; - if sink.island_needed(distance) { - let jump_around_label = sink.get_label(); - Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); - sink.emit_island(distance + 4, &mut state.ctrl_plane); - sink.bind_label(jump_around_label, &mut state.ctrl_plane); - } - - // We emit a bounds check on the index, if the index is larger than the number of - // jump table entries, we jump to the default block. Otherwise we compute a jump - // offset by multiplying the index by 8 (the size of each entry) and then jump to - // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially. - // - // Build the following sequence: - // - // extend_index: - // zext.w ext_index, index - // bounds_check: - // li tmp, n_labels - // bltu ext_index, tmp, compute_target - // jump_to_default_block: - // auipc pc, 0 - // jalr zero, pc, default_block - // compute_target: - // auipc pc, 0 - // slli tmp, ext_index, 3 - // add pc, pc, tmp - // jalr zero, pc, 0x10 - // jump_table: - // ; This repeats for each entry in the jumptable - // auipc pc, 0 - // jalr zero, pc, block_target - - // Extend the index to 64 bits. - // - // This prevents us branching on the top 32 bits of the index, which - // are undefined. - Inst::Extend { - rd: ext_index, - rn: index, - signed: false, - from_bits: 32, - to_bits: 64, - } - .emit(sink, emit_info, state); - - // Bounds check. - // - // Check if the index passed in is larger than the number of jumptable - // entries that we have. If it is, we fallthrough to a jump into the - // default block. - Inst::load_constant_u32(tmp2, targets.len() as u64) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - Inst::CondBr { - taken: CondBrTarget::Label(label_compute_target), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::UnsignedLessThan, - rs1: ext_index.to_reg(), - rs2: tmp2.to_reg(), - }, - } - .emit(sink, emit_info, state); - - sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32); - Inst::construct_auipc_and_jalr(None, tmp2, 0) - .iter() - .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); - - // Compute the jump table offset. - // We need to emit a PC relative offset, - sink.bind_label(label_compute_target, &mut state.ctrl_plane); - - // Get the current PC. - Inst::Auipc { - rd: tmp1, - imm: Imm20::ZERO, - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // These instructions must be emitted as uncompressed since we - // are manually computing the offset from the PC. - - // Multiply the index by 8, since that is the size in - // bytes of each jump table entry - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp2, - rs: ext_index.to_reg(), - imm12: Imm12::from_i16(3), - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // Calculate the base of the jump, PC + the offset from above. - Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: tmp1, - rs1: tmp1.to_reg(), - rs2: tmp2.to_reg(), - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // Jump to the middle of the jump table. - // We add a 16 byte offset here, since we used 4 instructions - // since the AUIPC that was used to get the PC. - Inst::Jalr { - rd: writable_zero_reg(), - base: tmp1.to_reg(), - offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16), - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // Emit the jump table. - // - // Each entry is a auipc + jalr to the target block. We also start with a island - // if necessary. - - // Emit the jumps back to back - for target in targets.iter() { - sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32); - - Inst::construct_auipc_and_jalr(None, tmp2, 0) - .iter() - .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); - } - - // We've just emitted an island that is safe up to *here*. - // Mark it as such so that we don't needlessly emit additional islands. - *start_off = sink.cur_offset(); - } - - &Inst::Atomic { - op, - rd, - addr, - src, - amo, - } => { - // TODO: get flags from original CLIF atomic instruction - let flags = MemFlags::new(); - if let Some(trap_code) = flags.trap_code() { - sink.add_trap(trap_code); - } - let x = op.op_code() - | reg_to_gpr_num(rd.to_reg()) << 7 - | op.funct3() << 12 - | reg_to_gpr_num(addr) << 15 - | reg_to_gpr_num(src) << 20 - | op.funct7(amo) << 25; - - sink.put4(x); - } - &Inst::Fence { pred, succ } => { - let x = 0b0001111 - | 0b00000 << 7 - | 0b000 << 12 - | 0b00000 << 15 - | (succ as u32) << 20 - | (pred as u32) << 24; - - sink.put4(x); - } - &Inst::Auipc { rd, imm } => { - sink.put4(enc_auipc(rd, imm)); - } - - &Inst::LoadAddr { rd, mem } => { - let base = mem.get_base_register(); - let offset = mem.get_offset_with_state(state); - let offset_imm12 = Imm12::maybe_from_i64(offset); - - match (mem, base, offset_imm12) { - (_, Some(rs), Some(imm12)) => { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs, - imm12, - } - .emit(sink, emit_info, state); - } - (_, Some(rs), None) => { - let mut insts = Inst::load_constant_u64(rd, offset as u64); - insts.push(Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd, - rs1: rd.to_reg(), - rs2: rs, - }); - insts - .into_iter() - .for_each(|inst| inst.emit(sink, emit_info, state)); - } - (AMode::Const(addr), None, _) => { - // Get an address label for the constant and recurse. - let label = sink.get_label_for_constant(addr); - Inst::LoadAddr { - rd, - mem: AMode::Label(label), - } - .emit(sink, emit_info, state); - } - (AMode::Label(label), None, _) => { - // Get the current PC. - sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); - let inst = Inst::Auipc { - rd, - imm: Imm20::ZERO, - }; - inst.emit_uncompressed(sink, emit_info, state, start_off); - - // Emit an add to the address with a relocation. - // This later gets patched up with the correct offset. - sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd, - rs: rd.to_reg(), - imm12: Imm12::ZERO, - } - .emit_uncompressed(sink, emit_info, state, start_off); - } - (amode, _, _) => { - unimplemented!("LoadAddr: {:?}", amode); - } - } - } - - &Inst::Select { - ref dst, - condition, - ref x, - ref y, - } => { - // The general form for this select is the following: - // - // mv rd, x - // b{cond} rcond, label_end - // mv rd, y - // label_end: - // ... etc - // - // This is built on the assumption that moves are cheap, but branches and jumps - // are not. So with this format we always avoid one jump instruction at the expense - // of an unconditional move. - // - // We also perform another optimization here. If the destination register is the same - // as one of the input registers, we can avoid emitting the first unconditional move - // and emit just the branch and the second move. - // - // To make sure that this happens as often as possible, we also try to invert the - // condition, so that if either of the input registers are the same as the destination - // we avoid that move. - - let label_end = sink.get_label(); - - let xregs = x.regs(); - let yregs = y.regs(); - let dstregs: Vec = dst.regs().into_iter().map(|r| r.to_reg()).collect(); - let condregs = condition.regs(); - - // We are going to write to the destination register before evaluating - // the condition, so we need to make sure that the destination register - // is not one of the condition registers. - // - // This should never happen, since hopefully the regalloc constraints - // for this register are set up correctly. - debug_assert_ne!(dstregs, condregs); - - // Check if we can invert the condition and avoid moving the y registers into - // the destination. This allows us to only emit the branch and one of the moves. - let (uncond_move, cond_move, condition) = if yregs == dstregs { - (yregs, xregs, condition.inverse()) - } else { - (xregs, yregs, condition) - }; - - // Unconditionally move one of the values to the destination register. - // - // These moves may not end up being emitted if the source and - // destination registers are the same. That logic is built into - // the emit function for `Inst::Mov`. - for i in gen_moves(dst.regs(), uncond_move) { - i.emit(sink, emit_info, state); - } - - // If the condition passes we skip over the conditional move - Inst::CondBr { - taken: CondBrTarget::Label(label_end), - not_taken: CondBrTarget::Fallthrough, - kind: condition, - } - .emit(sink, emit_info, state); - - // Move the conditional value to the destination register. - for i in gen_moves(dst.regs(), cond_move) { - i.emit(sink, emit_info, state); - } - - sink.bind_label(label_end, &mut state.ctrl_plane); - } - &Inst::Jalr { rd, base, offset } => { - sink.put4(enc_jalr(rd, base, offset)); - } - &Inst::EBreak => { - sink.put4(0x00100073); - } - &Inst::AtomicCas { - offset, - t0, - dst, - e, - addr, - v, - ty, - } => { - // # addr holds address of memory location - // # e holds expected value - // # v holds desired value - // # dst holds return value - // cas: - // lr.w dst, (addr) # Load original value. - // bne dst, e, fail # Doesn’t match, so fail. - // sc.w t0, v, (addr) # Try to update. - // bnez t0 , cas # if store not ok,retry. - // fail: - let fail_label = sink.get_label(); - let cas_lebel = sink.get_label(); - sink.bind_label(cas_lebel, &mut state.ctrl_plane); - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: dst, - addr, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - if ty.bits() < 32 { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - } else if ty.bits() == 32 { - Inst::Extend { - rd: dst, - rn: dst.to_reg(), - signed: false, - from_bits: 32, - to_bits: 64, - } - .emit(sink, emit_info, state); - } - Inst::CondBr { - taken: CondBrTarget::Label(fail_label), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: e, - rs2: dst.to_reg(), - }, - } - .emit(sink, emit_info, state); - let store_value = if ty.bits() < 32 { - // reload value to t0. - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: t0, - addr, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - // set reset part. - AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - t0.to_reg() - } else { - v - }; - Inst::Atomic { - op: AtomicOP::store_op(ty), - rd: t0, - addr, - src: store_value, - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - // check is our value stored. - Inst::CondBr { - taken: CondBrTarget::Label(cas_lebel), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: t0.to_reg(), - rs2: zero_reg(), - }, - } - .emit(sink, emit_info, state); - sink.bind_label(fail_label, &mut state.ctrl_plane); - } - &Inst::AtomicRmwLoop { - offset, - op, - dst, - ty, - p, - x, - t0, - } => { - let retry = sink.get_label(); - sink.bind_label(retry, &mut state.ctrl_plane); - // load old value. - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: dst, - addr: p, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - // - - let store_value: Reg = match op { - crate::ir::AtomicRmwOp::Add - | crate::ir::AtomicRmwOp::Sub - | crate::ir::AtomicRmwOp::And - | crate::ir::AtomicRmwOp::Or - | crate::ir::AtomicRmwOp::Xor => { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - Inst::AluRRR { - alu_op: match op { - crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, - crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, - crate::ir::AtomicRmwOp::And => AluOPRRR::And, - crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, - crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, - _ => unreachable!(), - }, - rd: t0, - rs1: dst.to_reg(), - rs2: x, - } - .emit(sink, emit_info, state); - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), - addr: p, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - t0.to_reg(), - ty, - ) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - spilltmp_reg2() - } - crate::ir::AtomicRmwOp::Nand => { - if ty.bits() < 32 { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - } - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: t0, - rs1: x, - rs2: dst.to_reg(), - } - .emit(sink, emit_info, state); - Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state); - if ty.bits() < 32 { - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), - addr: p, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - t0.to_reg(), - ty, - ) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - spilltmp_reg2() - } else { - t0.to_reg() - } - } - - crate::ir::AtomicRmwOp::Umin - | crate::ir::AtomicRmwOp::Umax - | crate::ir::AtomicRmwOp::Smin - | crate::ir::AtomicRmwOp::Smax => { - let label_select_dst = sink.get_label(); - let label_select_done = sink.get_label(); - if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax - { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - } else { - AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) - } - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - - Inst::CondBr { - taken: CondBrTarget::Label(label_select_dst), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: match op { - crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, - crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, - crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, - crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, - _ => unreachable!(), - }, - rs1: dst.to_reg(), - rs2: x, - }, - } - .emit(sink, emit_info, state); - // here we select x. - Inst::gen_move(t0, x, I64).emit(sink, emit_info, state); - Inst::gen_jump(label_select_done).emit(sink, emit_info, state); - sink.bind_label(label_select_dst, &mut state.ctrl_plane); - Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state); - sink.bind_label(label_select_done, &mut state.ctrl_plane); - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), - addr: p, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - t0.to_reg(), - ty, - ) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - spilltmp_reg2() - } - crate::ir::AtomicRmwOp::Xchg => { - AtomicOP::extract(dst, offset, dst.to_reg(), ty) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - Inst::Atomic { - op: AtomicOP::load_op(ty), - rd: writable_spilltmp_reg2(), - addr: p, - src: zero_reg(), - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - AtomicOP::merge( - writable_spilltmp_reg2(), - writable_spilltmp_reg(), - offset, - x, - ty, - ) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - spilltmp_reg2() - } - }; - - Inst::Atomic { - op: AtomicOP::store_op(ty), - rd: t0, - addr: p, - src: store_value, - amo: AMO::SeqCst, - } - .emit(sink, emit_info, state); - - // if store is not ok,retry. - Inst::CondBr { - taken: CondBrTarget::Label(retry), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: t0.to_reg(), - rs2: zero_reg(), - }, - } - .emit(sink, emit_info, state); - } - - &Inst::LoadExtName { - rd, - ref name, - offset, - } => { - if emit_info.shared_flag.is_pic() { - // Load a PC-relative address into a register. - // RISC-V does this slightly differently from other arches. We emit a relocation - // with a label, instead of the symbol itself. - // - // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses - // - // Emit the following code: - // label: - // auipc rd, 0 # R_RISCV_GOT_HI20 (symbol_name) - // ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label) - - // Create the label that is going to be published to the final binary object. - let auipc_label = sink.get_label(); - sink.bind_label(auipc_label, &mut state.ctrl_plane); - - // Get the current PC. - sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0); - Inst::Auipc { - rd: rd, - imm: Imm20::from_i32(0), - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // The `ld` here, points to the `auipc` label instead of directly to the symbol. - sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0); - Inst::Load { - rd, - op: LoadOP::Ld, - flags: MemFlags::trusted(), - from: AMode::RegOffset(rd.to_reg(), 0), - } - .emit_uncompressed(sink, emit_info, state, start_off); - } else { - // In the non PIC sequence we relocate the absolute address into - // a prealocatted space, load it into a register and jump over it. - // - // Emit the following code: - // ld rd, label_data - // j label_end - // label_data: - // <8 byte space> # ABS8 - // label_end: - - let label_data = sink.get_label(); - let label_end = sink.get_label(); - - // Load the value from a label - Inst::Load { - rd, - op: LoadOP::Ld, - flags: MemFlags::trusted(), - from: AMode::Label(label_data), - } - .emit(sink, emit_info, state); - - // Jump over the data - Inst::gen_jump(label_end).emit(sink, emit_info, state); - - sink.bind_label(label_data, &mut state.ctrl_plane); - sink.add_reloc(Reloc::Abs8, name.as_ref(), offset); - sink.put8(0); - - sink.bind_label(label_end, &mut state.ctrl_plane); - } - } - - &Inst::ElfTlsGetAddr { rd, ref name } => { - // RISC-V's TLS GD model is slightly different from other arches. - // - // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits - // of the address relative to the GOT entry. This relocation points to - // the symbol as usual. - // - // However when loading the bottom 12bits of the address, we need to - // use a label that points to the previous AUIPC instruction. - // - // label: - // auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol) - // addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label) - // - // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic - - // Create the label that is going to be published to the final binary object. - let auipc_label = sink.get_label(); - sink.bind_label(auipc_label, &mut state.ctrl_plane); - - // Get the current PC. - sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0); - Inst::Auipc { - rd: rd, - imm: Imm20::from_i32(0), - } - .emit_uncompressed(sink, emit_info, state, start_off); - - // The `addi` here, points to the `auipc` label instead of directly to the symbol. - sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: rd, - rs: rd.to_reg(), - imm12: Imm12::from_i16(0), - } - .emit_uncompressed(sink, emit_info, state, start_off); - - Inst::Call { - info: Box::new(CallInfo::empty( - ExternalName::LibCall(LibCall::ElfTlsGetAddr), - CallConv::SystemV, - )), - } - .emit_uncompressed(sink, emit_info, state, start_off); - } - - &Inst::TrapIf { - rs1, - rs2, - cc, - trap_code, - } => { - let label_end = sink.get_label(); - let cond = IntegerCompare { kind: cc, rs1, rs2 }; - - // Jump over the trap if we the condition is false. - Inst::CondBr { - taken: CondBrTarget::Label(label_end), - not_taken: CondBrTarget::Fallthrough, - kind: cond.inverse(), - } - .emit(sink, emit_info, state); - Inst::Udf { trap_code }.emit(sink, emit_info, state); - - sink.bind_label(label_end, &mut state.ctrl_plane); - } - &Inst::Udf { trap_code } => { - sink.add_trap(trap_code); - sink.put_data(Inst::TRAP_OPCODE); - } - &Inst::AtomicLoad { rd, ty, p } => { - // emit the fence. - Inst::Fence { - pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - } - .emit(sink, emit_info, state); - // load. - Inst::Load { - rd: rd, - op: LoadOP::from_type(ty), - flags: MemFlags::new(), - from: AMode::RegOffset(p, 0), - } - .emit(sink, emit_info, state); - Inst::Fence { - pred: Inst::FENCE_REQ_R, - succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - } - .emit(sink, emit_info, state); - } - &Inst::AtomicStore { src, ty, p } => { - Inst::Fence { - pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, - succ: Inst::FENCE_REQ_W, - } - .emit(sink, emit_info, state); - Inst::Store { - to: AMode::RegOffset(p, 0), - op: StoreOP::from_type(ty), - flags: MemFlags::new(), - src, - } - .emit(sink, emit_info, state); - } - - &Inst::Popcnt { - sum, - tmp, - step, - rs, - ty, - } => { - // load 0 to sum , init. - Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state); - // load - Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)) - .emit(sink, emit_info, state); - // - Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_i16((ty.bits() - 1) as i16), - } - .emit(sink, emit_info, state); - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: CondBrTarget::Label(label_done), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::SignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(sink, emit_info, state); - // test and add sum. - { - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg2(), - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(sink, emit_info, state); - let label_over = sink.get_label(); - Inst::CondBr { - taken: CondBrTarget::Label(label_over), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: zero_reg(), - rs2: spilltmp_reg2(), - }, - } - .emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: sum, - rs: sum.to_reg(), - imm12: Imm12::ONE, - } - .emit(sink, emit_info, state); - sink.bind_label(label_over, &mut state.ctrl_plane); - } - // set step and tmp. - { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_i16(-1), - } - .emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::ONE, - } - .emit(sink, emit_info, state); - Inst::gen_jump(label_loop).emit(sink, emit_info, state); - } - sink.bind_label(label_done, &mut state.ctrl_plane); - } - &Inst::Cltz { - sum, - tmp, - step, - rs, - leading, - ty, - } => { - // load 0 to sum , init. - Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state); - // load - Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)) - .emit(sink, emit_info, state); - // - Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state); - if leading { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_i16((ty.bits() - 1) as i16), - } - .emit(sink, emit_info, state); - } - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: CondBrTarget::Label(label_done), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::SignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(sink, emit_info, state); - // test and add sum. - { - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg2(), - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(sink, emit_info, state); - Inst::CondBr { - taken: CondBrTarget::Label(label_done), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: zero_reg(), - rs2: spilltmp_reg2(), - }, - } - .emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: sum, - rs: sum.to_reg(), - imm12: Imm12::ONE, - } - .emit(sink, emit_info, state); - } - // set step and tmp. - { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_i16(-1), - } - .emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: if leading { - AluOPRRI::Srli - } else { - AluOPRRI::Slli - }, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::ONE, - } - .emit(sink, emit_info, state); - Inst::gen_jump(label_loop).emit(sink, emit_info, state); - } - sink.bind_label(label_done, &mut state.ctrl_plane); - } - &Inst::Brev8 { - rs, - ty, - step, - tmp, - tmp2, - rd, - } => { - Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state); - Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)) - .emit(sink, emit_info, state); - // - Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::from_i16((ty.bits() - 1) as i16), - } - .emit(sink, emit_info, state); - Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp2, - rs: tmp2.to_reg(), - imm12: Imm12::from_i16((ty.bits() - 8) as i16), - } - .emit(sink, emit_info, state); - - let label_done = sink.get_label(); - let label_loop = sink.get_label(); - sink.bind_label(label_loop, &mut state.ctrl_plane); - Inst::CondBr { - taken: CondBrTarget::Label(label_done), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::SignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: zero_reg(), - }, - } - .emit(sink, emit_info, state); - // test and set bit. - { - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_spilltmp_reg2(), - rs1: tmp.to_reg(), - rs2: rs, - } - .emit(sink, emit_info, state); - let label_over = sink.get_label(); - Inst::CondBr { - taken: CondBrTarget::Label(label_over), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::Equal, - rs1: zero_reg(), - rs2: spilltmp_reg2(), - }, - } - .emit(sink, emit_info, state); - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: rd, - rs1: rd.to_reg(), - rs2: tmp2.to_reg(), - } - .emit(sink, emit_info, state); - sink.bind_label(label_over, &mut state.ctrl_plane); - } - // set step and tmp. - { - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: step, - rs: step.to_reg(), - imm12: Imm12::from_i16(-1), - } - .emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp, - rs: tmp.to_reg(), - imm12: Imm12::ONE, - } - .emit(sink, emit_info, state); - { - // reset tmp2 - // if (step %=8 == 0) then tmp2 = tmp2 >> 15 - // if (step %=8 != 0) then tmp2 = tmp2 << 1 - let label_over = sink.get_label(); - let label_sll_1 = sink.get_label(); - Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8)) - .emit(sink, emit_info, state); - Inst::AluRRR { - alu_op: AluOPRRR::Rem, - rd: writable_spilltmp_reg2(), - rs1: step.to_reg(), - rs2: spilltmp_reg2(), - } - .emit(sink, emit_info, state); - Inst::CondBr { - taken: CondBrTarget::Label(label_sll_1), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::NotEqual, - rs1: spilltmp_reg2(), - rs2: zero_reg(), - }, - } - .emit(sink, emit_info, state); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: tmp2, - rs: tmp2.to_reg(), - imm12: Imm12::from_i16(15), - } - .emit(sink, emit_info, state); - Inst::gen_jump(label_over).emit(sink, emit_info, state); - sink.bind_label(label_sll_1, &mut state.ctrl_plane); - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: tmp2, - rs: tmp2.to_reg(), - imm12: Imm12::ONE, - } - .emit(sink, emit_info, state); - sink.bind_label(label_over, &mut state.ctrl_plane); - } - Inst::gen_jump(label_loop).emit(sink, emit_info, state); - } - sink.bind_label(label_done, &mut state.ctrl_plane); - } - &Inst::StackProbeLoop { - guard_size, - probe_count, - tmp: guard_size_tmp, - } => { - let step = writable_spilltmp_reg(); - Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64)) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - Inst::load_constant_u64(guard_size_tmp, guard_size as u64) - .iter() - .for_each(|i| i.emit(sink, emit_info, state)); - - let loop_start = sink.get_label(); - let label_done = sink.get_label(); - sink.bind_label(loop_start, &mut state.ctrl_plane); - Inst::CondBr { - taken: CondBrTarget::Label(label_done), - not_taken: CondBrTarget::Fallthrough, - kind: IntegerCompare { - kind: IntCC::UnsignedLessThanOrEqual, - rs1: step.to_reg(), - rs2: guard_size_tmp.to_reg(), - }, - } - .emit(sink, emit_info, state); - // compute address. - Inst::AluRRR { - alu_op: AluOPRRR::Sub, - rd: writable_spilltmp_reg2(), - rs1: stack_reg(), - rs2: step.to_reg(), - } - .emit(sink, emit_info, state); - Inst::Store { - to: AMode::RegOffset(spilltmp_reg2(), 0), - op: StoreOP::Sb, - flags: MemFlags::new(), - src: zero_reg(), - } - .emit(sink, emit_info, state); - // reset step. - Inst::AluRRR { - alu_op: AluOPRRR::Sub, - rd: step, - rs1: step.to_reg(), - rs2: guard_size_tmp.to_reg(), - } - .emit(sink, emit_info, state); - Inst::gen_jump(loop_start).emit(sink, emit_info, state); - sink.bind_label(label_done, &mut state.ctrl_plane); - } - &Inst::VecAluRRRImm5 { - op, - vd, - vd_src, - imm, - vs2, - ref mask, - .. - } => { - debug_assert_eq!(vd.to_reg(), vd_src); - - sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask)); - } - &Inst::VecAluRRRR { - op, - vd, - vd_src, - vs1, - vs2, - ref mask, - .. - } => { - debug_assert_eq!(vd.to_reg(), vd_src); - - sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask)); - } - &Inst::VecAluRRR { - op, - vd, - vs1, - vs2, - ref mask, - .. - } => { - sink.put4(encode_valu(op, vd, vs1, vs2, *mask)); - } - &Inst::VecAluRRImm5 { - op, - vd, - imm, - vs2, - ref mask, - .. - } => { - sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask)); - } - &Inst::VecAluRR { - op, - vd, - vs, - ref mask, - .. - } => { - sink.put4(encode_valu_rr(op, vd, vs, *mask)); - } - &Inst::VecAluRImm5 { - op, - vd, - imm, - ref mask, - .. - } => { - sink.put4(encode_valu_r_imm(op, vd, imm, *mask)); - } - &Inst::VecSetState { rd, ref vstate } => { - sink.put4(encode_vcfg_imm( - 0x57, - rd.to_reg(), - vstate.avl.unwrap_static(), - &vstate.vtype, - )); - - // Update the current vector emit state. - state.vstate = EmitVState::Known(*vstate); - } - - &Inst::VecLoad { - eew, - to, - ref from, - ref mask, - flags, - .. - } => { - // Vector Loads don't support immediate offsets, so we need to load it into a register. - let addr = match from { - VecAMode::UnitStride { base } => { - let base_reg = base.get_base_register(); - let offset = base.get_offset_with_state(state); - - // Reg+0 Offset can be directly encoded - if let (Some(base_reg), 0) = (base_reg, offset) { - base_reg - } else { - // Otherwise load the address it into a reg and load from it. - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { - rd: tmp, - mem: *base, - } - .emit(sink, emit_info, state); - tmp.to_reg() - } - } - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - - sink.put4(encode_vmem_load( - 0x07, - to.to_reg(), - eew, - addr, - from.lumop(), - *mask, - from.mop(), - from.nf(), - )); - } - - &Inst::VecStore { - eew, - ref to, - from, - ref mask, - flags, - .. - } => { - // Vector Stores don't support immediate offsets, so we need to load it into a register. - let addr = match to { - VecAMode::UnitStride { base } => { - let base_reg = base.get_base_register(); - let offset = base.get_offset_with_state(state); - - // Reg+0 Offset can be directly encoded - if let (Some(base_reg), 0) = (base_reg, offset) { - base_reg - } else { - // Otherwise load the address it into a reg and load from it. - let tmp = writable_spilltmp_reg(); - Inst::LoadAddr { - rd: tmp, - mem: *base, - } - .emit(sink, emit_info, state); - tmp.to_reg() - } - } - }; - - if let Some(trap_code) = flags.trap_code() { - // Register the offset at which the actual load instruction starts. - sink.add_trap(trap_code); - } - - sink.put4(encode_vmem_store( - 0x27, - from, - eew, - addr, - to.sumop(), - *mask, - to.mop(), - to.nf(), - )); - } - }; - } -} - -fn emit_return_call_common_sequence( - sink: &mut MachBuffer, - emit_info: &EmitInfo, - state: &mut EmitState, - info: &ReturnCallInfo, -) { - // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!) - // So lets emit an island here if we need it. - // - // It is difficult to calculate exactly how many instructions are going to be emitted, so - // we calculate it by emitting it into a disposable buffer, and then checking how many instructions - // were actually emitted. - let mut buffer = MachBuffer::new(); - let mut fake_emit_state = state.clone(); - - return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info); - - // Finalize the buffer and get the number of bytes emitted. - let buffer = buffer.finish(&Default::default(), &mut Default::default()); - let length = buffer.data().len() as u32; - - // And now emit the island inline with this instruction. - if sink.island_needed(length) { - let jump_around_label = sink.get_label(); - Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); - sink.emit_island(length + 4, &mut state.ctrl_plane); - sink.bind_label(jump_around_label, &mut state.ctrl_plane); - } - - // Now that we're done, emit the *actual* return sequence. - return_call_emit_impl(sink, emit_info, state, info); -} - -/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence]. -fn return_call_emit_impl( - sink: &mut MachBuffer, - emit_info: &EmitInfo, - state: &mut EmitState, - info: &ReturnCallInfo, -) { - let sp_to_fp_offset = { - let frame_layout = state.frame_layout(); - i64::from( - frame_layout.clobber_size - + frame_layout.fixed_frame_storage_size - + frame_layout.outgoing_args_size, - ) - }; - - let mut clobber_offset = sp_to_fp_offset - 8; - for reg in state.frame_layout().clobbered_callee_saves.clone() { - let rreg = reg.to_reg(); - let ty = match rreg.class() { - RegClass::Int => I64, - RegClass::Float => F64, - RegClass::Vector => unimplemented!("Vector Clobber Restores"), - }; - - Inst::gen_load( - reg.map(Reg::from), - AMode::SPOffset(clobber_offset), - ty, - MemFlags::trusted(), - ) - .emit(sink, emit_info, state); - - clobber_offset -= 8 - } - - // Restore the link register and frame pointer - let setup_area_size = i64::from(state.frame_layout().setup_area_size); - if setup_area_size > 0 { - Inst::gen_load( - writable_link_reg(), - AMode::SPOffset(sp_to_fp_offset + 8), - I64, - MemFlags::trusted(), - ) - .emit(sink, emit_info, state); - - Inst::gen_load( - writable_fp_reg(), - AMode::SPOffset(sp_to_fp_offset), - I64, - MemFlags::trusted(), - ) - .emit(sink, emit_info, state); - } - - // If we over-allocated the incoming args area in the prologue, resize down to what the callee - // is expecting. - let incoming_args_diff = - i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size); - - // Increment SP all at once - let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff; - if sp_increment > 0 { - for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) { - inst.emit(sink, emit_info, state); - } - } -} diff --git a/hbcb/src/inst/emit_tests.rs b/hbcb/src/inst/emit_tests.rs deleted file mode 100644 index 668e170..0000000 --- a/hbcb/src/inst/emit_tests.rs +++ /dev/null @@ -1,2277 +0,0 @@ -#[allow(unused)] -use crate::ir::LibCall; -use crate::inst::*; -use crate::lower::isle::generated_code::FpuOPWidth; -use std::borrow::Cow; - -fn fa7() -> Reg { - f_reg(17) -} - -#[test] -fn test_riscv64_binemit() { - struct TestUnit { - inst: Inst, - assembly: &'static str, - code: TestEncoding, - } - - struct TestEncoding(Cow<'static, str>); - - impl From<&'static str> for TestEncoding { - fn from(value: &'static str) -> Self { - Self(value.into()) - } - } - - impl From for TestEncoding { - fn from(value: u32) -> Self { - let value = value.swap_bytes(); - let value = format!("{value:08X}"); - Self(value.into()) - } - } - - impl TestUnit { - fn new(inst: Inst, assembly: &'static str, code: impl Into) -> Self { - let code = code.into(); - Self { - inst, - assembly, - code, - } - } - } - - let mut insns = Vec::::with_capacity(500); - - insns.push(TestUnit::new(Inst::Ret {}, "ret", 0x00008067)); - - insns.push(TestUnit::new( - Inst::Mov { - rd: writable_fa0(), - rm: fa1(), - ty: F32, - }, - "fmv.s fa0,fa1", - 0x20b58553, - )); - - insns.push(TestUnit::new( - Inst::Mov { - rd: writable_fa0(), - rm: fa1(), - ty: F64, - }, - "fmv.d fa0,fa1", - 0x22b58553, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Brev8, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "brev8 a1,a0", - 0x68755593, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Rev8, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "rev8 a1,a0", - 0x6b855593, - )); - - // - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Bclri, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "bclri a1,a0,5", - 0x48551593, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Bexti, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "bexti a1,a0,5", - 0x48555593, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Binvi, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "binvi a1,a0,5", - 0x68551593, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Bseti, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "bseti a1,a0,5", - 0x28551593, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Rori, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "rori a1,a0,5", - 0x60555593, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Roriw, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "roriw a1,a0,5", - 0x6055559b, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::SlliUw, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "slli.uw a1,a0,5", - 0x855159b, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Clz, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "clz a1,a0", - 0x60051593, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Clzw, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "clzw a1,a0", - 0x6005159b, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Cpop, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "cpop a1,a0", - 0x60251593, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Cpopw, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "cpopw a1,a0", - 0x6025159b, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Ctz, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "ctz a1,a0", - 0x60151593, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Ctzw, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "ctzw a1,a0", - 0x6015159b, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Sextb, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "sext.b a1,a0", - 0x60451593, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Sexth, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "sext.h a1,a0", - 0x60551593, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Zexth, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "zext.h a1,a0", - 0x80545bb, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Orcb, - rd: writable_a1(), - rs: a0(), - imm12: Imm12::ZERO, - }, - "orc.b a1,a0", - 0x28755593, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Adduw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "zext.w a1,a0", - 0x80505bb, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Adduw, - rd: writable_a1(), - rs1: a0(), - rs2: a1(), - }, - "add.uw a1,a0,a1", - 0x08b505bb, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Andn, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "andn a1,a0,zero", - 0x400575b3, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Bclr, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "bclr a1,a0,zero", - 0x480515b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Bext, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "bext a1,a0,zero", - 0x480555b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Binv, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "binv a1,a0,zero", - 0x680515b3, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Bset, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "bset a1,a0,zero", - 0x280515b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Clmul, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "clmul a1,a0,zero", - 0xa0515b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Clmulh, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "clmulh a1,a0,zero", - 0xa0535b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Clmulr, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "clmulr a1,a0,zero", - 0xa0525b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Max, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "max a1,a0,zero", - 0xa0565b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Maxu, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "maxu a1,a0,zero", - 0xa0575b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Min, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "min a1,a0,zero", - 0xa0545b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Minu, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "minu a1,a0,zero", - 0xa0555b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Orn, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "orn a1,a0,zero", - 0x400565b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Rol, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "rol a1,a0,zero", - 0x600515b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Rolw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "rolw a1,a0,zero", - 0x600515bb, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Ror, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "ror a1,a0,zero", - 0x600555b3, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Rorw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "rorw a1,a0,zero", - 0x600555bb, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sh1add, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "sh1add a1,a0,zero", - 0x200525b3, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sh1adduw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "sh1add.uw a1,a0,zero", - 0x200525bb, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sh2add, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "sh2add a1,a0,zero", - 0x200545b3, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sh2adduw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "sh2add.uw a1,a0,zero", - 0x200545bb, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sh3add, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "sh3add a1,a0,zero", - 0x200565b3, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sh3adduw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "sh3add.uw a1,a0,zero", - 0x200565bb, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Xnor, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "xnor a1,a0,zero", - 0x400545b3, - )); - - // Zbkb - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Pack, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "pack a1,a0,zero", - 0x080545b3, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Packw, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "packw a1,a0,zero", - 0x080545bb, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Packh, - rd: writable_a1(), - rs1: a0(), - rs2: zero_reg(), - }, - "packh a1,a0,zero", - 0x080575b3, - )); - - // - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: writable_fp_reg(), - rs1: fp_reg(), - rs2: zero_reg(), - }, - "add fp,fp,zero", - 0x40433, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: writable_fp_reg(), - rs: stack_reg(), - imm12: Imm12::maybe_from_u64(100).unwrap(), - }, - "addi fp,sp,100", - 0x6410413, - )); - insns.push(TestUnit::new( - Inst::Lui { - rd: writable_zero_reg(), - imm: Imm20::from_i32(120), - }, - "lui zero,120", - 0x78037, - )); - insns.push(TestUnit::new( - Inst::Auipc { - rd: writable_zero_reg(), - imm: Imm20::from_i32(120), - }, - "auipc zero,120", - 0x78017, - )); - - insns.push(TestUnit::new( - Inst::Jalr { - rd: writable_a0(), - base: a0(), - offset: Imm12::from_i16(100), - }, - "jalr a0,100(a0)", - 0x6450567, - )); - - insns.push(TestUnit::new( - Inst::Load { - rd: writable_a0(), - op: LoadOP::Lb, - flags: MemFlags::new(), - from: AMode::RegOffset(a1(), 100), - }, - "lb a0,100(a1)", - 0x6458503, - )); - insns.push(TestUnit::new( - Inst::Load { - rd: writable_a0(), - op: LoadOP::Lh, - flags: MemFlags::new(), - from: AMode::RegOffset(a1(), 100), - }, - "lh a0,100(a1)", - 0x6459503, - )); - - insns.push(TestUnit::new( - Inst::Load { - rd: writable_a0(), - op: LoadOP::Lw, - flags: MemFlags::new(), - from: AMode::RegOffset(a1(), 100), - }, - "lw a0,100(a1)", - 0x645a503, - )); - - insns.push(TestUnit::new( - Inst::Load { - rd: writable_a0(), - op: LoadOP::Ld, - flags: MemFlags::new(), - from: AMode::RegOffset(a1(), 100), - }, - "ld a0,100(a1)", - 0x645b503, - )); - insns.push(TestUnit::new( - Inst::Load { - rd: Writable::from_reg(fa0()), - op: LoadOP::Flw, - flags: MemFlags::new(), - from: AMode::RegOffset(a1(), 100), - }, - "flw fa0,100(a1)", - 0x645a507, - )); - - insns.push(TestUnit::new( - Inst::Load { - rd: Writable::from_reg(fa0()), - op: LoadOP::Fld, - flags: MemFlags::new(), - from: AMode::RegOffset(a1(), 100), - }, - "fld fa0,100(a1)", - 0x645b507, - )); - insns.push(TestUnit::new( - Inst::Store { - to: AMode::SPOffset(100), - op: StoreOP::Sb, - flags: MemFlags::new(), - src: a0(), - }, - "sb a0,100(sp)", - 0x6a10223, - )); - insns.push(TestUnit::new( - Inst::Store { - to: AMode::SPOffset(100), - op: StoreOP::Sh, - flags: MemFlags::new(), - src: a0(), - }, - "sh a0,100(sp)", - 0x6a11223, - )); - insns.push(TestUnit::new( - Inst::Store { - to: AMode::SPOffset(100), - op: StoreOP::Sw, - flags: MemFlags::new(), - src: a0(), - }, - "sw a0,100(sp)", - 0x6a12223, - )); - insns.push(TestUnit::new( - Inst::Store { - to: AMode::SPOffset(100), - op: StoreOP::Sd, - flags: MemFlags::new(), - src: a0(), - }, - "sd a0,100(sp)", - 0x6a13223, - )); - insns.push(TestUnit::new( - Inst::Store { - to: AMode::SPOffset(100), - op: StoreOP::Fsw, - flags: MemFlags::new(), - src: fa0(), - }, - "fsw fa0,100(sp)", - 0x6a12227, - )); - insns.push(TestUnit::new( - Inst::Store { - to: AMode::SPOffset(100), - op: StoreOP::Fsd, - flags: MemFlags::new(), - src: fa0(), - }, - "fsd fa0,100(sp)", - 0x6a13227, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addi, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(100), - }, - "addi a0,a0,100", - 0x6450513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slti, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(100), - }, - "slti a0,a0,100", - 0x6452513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::SltiU, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(100), - }, - "sltiu a0,a0,100", - 0x6453513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Xori, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(100), - }, - "xori a0,a0,100", - 0x6454513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(100), - }, - "andi a0,a0,100", - 0x6457513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slli, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "slli a0,a0,5", - 0x551513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srli, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "srli a0,a0,5", - 0x555513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Srai, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "srai a0,a0,5", - 0x40555513, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Addiw, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(120), - }, - "addiw a0,a0,120", - 0x785051b, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Slliw, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "slliw a0,a0,5", - 0x55151b, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::SrliW, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "srliw a0,a0,5", - 0x55551b, - )); - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Sraiw, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "sraiw a0,a0,5", - 0x4055551b, - )); - - insns.push(TestUnit::new( - Inst::AluRRImm12 { - alu_op: AluOPRRI::Sraiw, - rd: writable_a0(), - rs: a0(), - imm12: Imm12::from_i16(5), - }, - "sraiw a0,a0,5", - 0x4055551b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Add, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "add a0,a0,a1", - 0xb50533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sub, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "sub a0,a0,a1", - 0x40b50533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sll, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "sll a0,a0,a1", - 0xb51533, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Slt, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "slt a0,a0,a1", - 0xb52533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::SltU, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "sltu a0,a0,a1", - 0xb53533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Xor, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "xor a0,a0,a1", - 0xb54533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Srl, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "srl a0,a0,a1", - 0xb55533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sra, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "sra a0,a0,a1", - 0x40b55533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Or, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "or a0,a0,a1", - 0xb56533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::And, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "and a0,a0,a1", - 0xb57533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Addw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "addw a0,a0,a1", - 0xb5053b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Subw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "subw a0,a0,a1", - 0x40b5053b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sllw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "sllw a0,a0,a1", - 0xb5153b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Srlw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "srlw a0,a0,a1", - 0xb5553b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Sraw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "sraw a0,a0,a1", - 0x40b5553b, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Mul, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "mul a0,a0,a1", - 0x2b50533, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Mulh, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "mulh a0,a0,a1", - 0x2b51533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Mulhsu, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "mulhsu a0,a0,a1", - 0x2b52533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Mulhu, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "mulhu a0,a0,a1", - 0x2b53533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Div, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "div a0,a0,a1", - 0x2b54533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::DivU, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "divu a0,a0,a1", - 0x2b55533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Rem, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "rem a0,a0,a1", - 0x2b56533, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::RemU, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "remu a0,a0,a1", - 0x2b57533, - )); - - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Mulw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "mulw a0,a0,a1", - 0x2b5053b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Divw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "divw a0,a0,a1", - 0x2b5453b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Remw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "remw a0,a0,a1", - 0x2b5653b, - )); - insns.push(TestUnit::new( - Inst::AluRRR { - alu_op: AluOPRRR::Remuw, - rd: writable_a0(), - rs1: a0(), - rs2: a1(), - }, - "remuw a0,a0,a1", - 0x2b5753b, - )); - - // - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fadd, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fadd.s fa0,fa0,fa1,rne", - 0xb50553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fsub, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsub.s fa0,fa0,fa1,rtz", - 0x8b51553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RUP, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fmul, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fmul.s fa0,fa0,fa1,rup", - 0x10b53553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fdiv, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fdiv.s fa0,fa0,fa1,fcsr", - 0x18b57553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fsgnj, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsgnj.s fa0,fa0,fa1", - 0x20b50553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fsgnjn, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsgnjn.s fa0,fa0,fa1", - 0x20b51553, - )); - - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RDN, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fsgnjx, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsgnjx.s fa0,fa0,fa1", - 0x20b52553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fmin, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fmin.s fa0,fa0,fa1", - 0x28b50553, - )); - - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fmax, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fmax.s fa0,fa0,fa1", - 0x28b51553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RDN, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Feq, - rd: writable_a0(), - rs1: fa0(), - rs2: fa1(), - }, - "feq.s a0,fa0,fa1", - 0xa0b52553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Flt, - rd: writable_a0(), - rs1: fa0(), - rs2: fa1(), - }, - "flt.s a0,fa0,fa1", - 0xa0b51553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRRR::Fle, - rd: writable_a0(), - rs1: fa0(), - rs2: fa1(), - }, - "fle.s a0,fa0,fa1", - 0xa0b50553, - )); - - // - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fadd, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fadd.d fa0,fa0,fa1,fcsr", - 0x2b57553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fsub, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsub.d fa0,fa0,fa1,fcsr", - 0xab57553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fmul, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fmul.d fa0,fa0,fa1,fcsr", - 0x12b57553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fdiv, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fdiv.d fa0,fa0,fa1,fcsr", - 0x1ab57553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fsgnj, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsgnj.d fa0,fa0,fa1", - 0x22b50553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fsgnjn, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsgnjn.d fa0,fa0,fa1", - 0x22b51553, - )); - - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RDN, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fsgnjx, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fsgnjx.d fa0,fa0,fa1", - 0x22b52553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fmin, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fmin.d fa0,fa0,fa1", - 0x2ab50553, - )); - - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fmax, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - }, - "fmax.d fa0,fa0,fa1", - 0x2ab51553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RDN, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Feq, - rd: writable_a0(), - rs1: fa0(), - rs2: fa1(), - }, - "feq.d a0,fa0,fa1", - 0xa2b52553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RTZ, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Flt, - rd: writable_a0(), - rs1: fa0(), - rs2: fa1(), - }, - "flt.d a0,fa0,fa1", - 0xa2b51553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - frm: FRM::RNE, - width: FpuOPWidth::D, - alu_op: FpuOPRRR::Fle, - rd: writable_a0(), - rs1: fa0(), - rs2: fa1(), - }, - "fle.d a0,fa0,fa1", - 0xa2b50553, - )); - - // - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRR::Fsqrt, - rd: writable_fa0(), - rs: fa1(), - }, - "fsqrt.s fa0,fa1,rne", - 0x58058553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtWFmt, - rd: writable_a0(), - rs: fa1(), - }, - "fcvt.w.s a0,fa1,fcsr", - 0xc005f553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtWuFmt, - rd: writable_a0(), - rs: fa1(), - }, - "fcvt.wu.s a0,fa1,fcsr", - 0xc015f553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FmvXFmt, - rd: writable_a0(), - rs: fa1(), - }, - "fmv.x.w a0,fa1", - 0xe0058553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RTZ, - width: FpuOPWidth::S, - alu_op: FpuOPRR::Fclass, - rd: writable_a0(), - rs: fa1(), - }, - "fclass.s a0,fa1", - 0xe0059553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtFmtW, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.s.w fa0,a0,fcsr", - 0xd0057553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtFmtWu, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.s.wu fa0,a0,fcsr", - 0xd0157553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FmvFmtX, - rd: writable_fa0(), - rs: a0(), - }, - "fmv.w.x fa0,a0", - 0xf0050553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtLFmt, - rd: writable_a0(), - rs: fa0(), - }, - "fcvt.l.s a0,fa0,fcsr", - 0xc0257553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtLuFmt, - rd: writable_a0(), - rs: fa0(), - }, - "fcvt.lu.s a0,fa0,fcsr", - 0xc0357553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtFmtL, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.s.l fa0,a0,fcsr", - 0xd0257553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtFmtLu, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.s.lu fa0,a0,fcsr", - 0xd0357553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::Fsqrt, - rd: writable_fa0(), - rs: fa1(), - }, - "fsqrt.d fa0,fa1,fcsr", - 0x5a05f553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtWFmt, - rd: writable_a0(), - rs: fa1(), - }, - "fcvt.w.d a0,fa1,fcsr", - 0xc205f553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtWuFmt, - rd: writable_a0(), - rs: fa1(), - }, - "fcvt.wu.d a0,fa1,fcsr", - 0xc215f553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RNE, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FmvXFmt, - rd: writable_a0(), - rs: fa1(), - }, - "fmv.x.d a0,fa1", - 0xe2058553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RTZ, - width: FpuOPWidth::D, - alu_op: FpuOPRR::Fclass, - rd: writable_a0(), - rs: fa1(), - }, - "fclass.d a0,fa1", - 0xe2059553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRR::FcvtSD, - rd: writable_fa0(), - rs: fa0(), - }, - "fcvt.s.d fa0,fa0,fcsr", - 0x40157553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RNE, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtFmtWu, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.d.wu fa0,a0,rne", - 0xd2150553, - )); - - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::RNE, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FmvFmtX, - rd: writable_fa0(), - rs: a0(), - }, - "fmv.d.x fa0,a0", - 0xf2050553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtLFmt, - rd: writable_a0(), - rs: fa0(), - }, - "fcvt.l.d a0,fa0,fcsr", - 0xc2257553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtLuFmt, - rd: writable_a0(), - rs: fa0(), - }, - "fcvt.lu.d a0,fa0,fcsr", - 0xc2357553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtFmtL, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.d.l fa0,a0,fcsr", - 0xd2257553, - )); - insns.push(TestUnit::new( - Inst::FpuRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRR::FcvtFmtLu, - rd: writable_fa0(), - rs: a0(), - }, - "fcvt.d.lu fa0,a0,fcsr", - 0xd2357553, - )); - ////////////////////// - - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::RNE, - width: FpuOPWidth::S, - alu_op: FpuOPRRRR::Fmadd, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fmadd.s fa0,fa0,fa1,fa7,rne", - 0x88b50543, - )); - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRRRR::Fmsub, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fmsub.s fa0,fa0,fa1,fa7,fcsr", - 0x88b57547, - )); - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRRRR::Fnmsub, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fnmsub.s fa0,fa0,fa1,fa7,fcsr", - 0x88b5754b, - )); - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::S, - alu_op: FpuOPRRRR::Fnmadd, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fnmadd.s fa0,fa0,fa1,fa7,fcsr", - 0x88b5754f, - )); - - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRRR::Fmadd, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fmadd.d fa0,fa0,fa1,fa7,fcsr", - 0x8ab57543, - )); - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRRR::Fmsub, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fmsub.d fa0,fa0,fa1,fa7,fcsr", - 0x8ab57547, - )); - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRRR::Fnmsub, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fnmsub.d fa0,fa0,fa1,fa7,fcsr", - 0x8ab5754b, - )); - insns.push(TestUnit::new( - Inst::FpuRRRR { - frm: FRM::Fcsr, - width: FpuOPWidth::D, - alu_op: FpuOPRRRR::Fnmadd, - rd: writable_fa0(), - rs1: fa0(), - rs2: fa1(), - rs3: fa7(), - }, - "fnmadd.d fa0,fa0,fa1,fa7,fcsr", - 0x8ab5754f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::LrW, - rd: writable_a0(), - addr: a1(), - src: zero_reg(), - amo: AMO::Relax, - }, - "lr.w a0,(a1)", - 0x1005a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::ScW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Release, - }, - "sc.w.rl a0,a2,(a1)", - 0x1ac5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoswapW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Aquire, - }, - "amoswap.w.aq a0,a2,(a1)", - 0xcc5a52f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoaddW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::SeqCst, - }, - "amoadd.w.aqrl a0,a2,(a1)", - 0x6c5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoxorW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoxor.w a0,a2,(a1)", - 0x20c5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoandW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoand.w a0,a2,(a1)", - 0x60c5a52f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoorW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoor.w a0,a2,(a1)", - 0x40c5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmominW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amomin.w a0,a2,(a1)", - 0x80c5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmomaxW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amomax.w a0,a2,(a1)", - 0xa0c5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmominuW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amominu.w a0,a2,(a1)", - 0xc0c5a52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmomaxuW, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amomaxu.w a0,a2,(a1)", - 0xe0c5a52f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::LrD, - rd: writable_a0(), - addr: a1(), - src: zero_reg(), - amo: AMO::Relax, - }, - "lr.d a0,(a1)", - 0x1005b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::ScD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "sc.d a0,a2,(a1)", - 0x18c5b52f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoswapD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoswap.d a0,a2,(a1)", - 0x8c5b52f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoaddD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoadd.d a0,a2,(a1)", - 0xc5b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoxorD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoxor.d a0,a2,(a1)", - 0x20c5b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoandD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoand.d a0,a2,(a1)", - 0x60c5b52f, - )); - - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmoorD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amoor.d a0,a2,(a1)", - 0x40c5b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmominD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amomin.d a0,a2,(a1)", - 0x80c5b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmomaxD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amomax.d a0,a2,(a1)", - 0xa0c5b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmominuD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amominu.d a0,a2,(a1)", - 0xc0c5b52f, - )); - insns.push(TestUnit::new( - Inst::Atomic { - op: AtomicOP::AmomaxuD, - rd: writable_a0(), - addr: a1(), - src: a2(), - amo: AMO::Relax, - }, - "amomaxu.d a0,a2,(a1)", - 0xe0c5b52f, - )); - - ///////// - insns.push(TestUnit::new( - Inst::Fence { - pred: 1, - succ: 1 << 1, - }, - "fence w,r", - 0x120000f, - )); - insns.push(TestUnit::new(Inst::EBreak {}, "ebreak", 0x100073)); - - insns.push(TestUnit::new( - Inst::FpuRRR { - alu_op: FpuOPRRR::Fsgnj, - width: FpuOPWidth::S, - frm: FRM::RNE, - rd: writable_fa0(), - rs1: fa1(), - rs2: fa1(), - }, - "fmv.s fa0,fa1", - 0x20b58553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - alu_op: FpuOPRRR::Fsgnj, - width: FpuOPWidth::D, - frm: FRM::RNE, - rd: writable_fa0(), - rs1: fa1(), - rs2: fa1(), - }, - "fmv.d fa0,fa1", - 0x22b58553, - )); - - insns.push(TestUnit::new( - Inst::FpuRRR { - alu_op: FpuOPRRR::Fsgnjn, - width: FpuOPWidth::S, - frm: FRM::RTZ, - rd: writable_fa0(), - rs1: fa1(), - rs2: fa1(), - }, - "fneg.s fa0,fa1", - 0x20b59553, - )); - insns.push(TestUnit::new( - Inst::FpuRRR { - alu_op: FpuOPRRR::Fsgnjn, - width: FpuOPWidth::D, - frm: FRM::RTZ, - rd: writable_fa0(), - rs1: fa1(), - rs2: fa1(), - }, - "fneg.d fa0,fa1", - 0x22b59553, - )); - - insns.push(TestUnit::new( - Inst::Fli { - ty: F32, - rd: writable_fa0(), - imm: FliConstant::new(0), - }, - "fli.s fa0,-1.0", - 0xf0100553, - )); - - insns.push(TestUnit::new( - Inst::Fli { - ty: F64, - rd: writable_fa0(), - imm: FliConstant::new(13), - }, - "fli.d fa0,0.625", - 0xf2168553, - )); - - let (flags, isa_flags) = make_test_flags(); - let emit_info = EmitInfo::new(flags, isa_flags); - - for unit in insns.iter() { - println!("Riscv64: {:?}, {}", unit.inst, unit.assembly); - // Check the printed text is as expected. - let actual_printing = unit.inst.print_with_state(&mut EmitState::default()); - assert_eq!(unit.assembly, actual_printing); - let mut buffer = MachBuffer::new(); - unit.inst - .emit(&mut buffer, &emit_info, &mut Default::default()); - let buffer = buffer.finish(&Default::default(), &mut Default::default()); - let actual_encoding = buffer.stringify_code_bytes(); - - assert_eq!(actual_encoding, unit.code.0); - } -} - -fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) { - let b = settings::builder(); - let flags = settings::Flags::new(b.clone()); - let b2 = super::super::riscv_settings::builder(); - let isa_flags = super::super::riscv_settings::Flags::new(&flags, &b2); - (flags, isa_flags) -} - -#[test] -fn riscv64_worst_case_instruction_size() { - let (flags, isa_flags) = make_test_flags(); - let emit_info = EmitInfo::new(flags, isa_flags); - - // These are all candidate instructions with potential to generate a lot of bytes. - let mut candidates: Vec = vec![]; - - candidates.push(Inst::Popcnt { - sum: writable_a0(), - tmp: writable_a0(), - step: writable_a0(), - rs: a0(), - ty: I64, - }); - - candidates.push(Inst::Cltz { - sum: writable_a0(), - tmp: writable_a0(), - step: writable_a0(), - rs: a0(), - leading: true, - ty: I64, - }); - - candidates.push(Inst::Brev8 { - rd: writable_a0(), - tmp: writable_a0(), - step: writable_a0(), - tmp2: writable_a0(), - rs: a0(), - ty: I64, - }); - - candidates.push(Inst::AtomicCas { - offset: a0(), - t0: writable_a0(), - dst: writable_a0(), - e: a0(), - addr: a0(), - v: a0(), - ty: I64, - }); - - candidates.push(Inst::AtomicCas { - offset: a0(), - t0: writable_a0(), - dst: writable_a0(), - e: a0(), - addr: a0(), - v: a0(), - ty: I16, - }); - - candidates.extend( - crate::ir::AtomicRmwOp::all() - .iter() - .map(|op| Inst::AtomicRmwLoop { - op: *op, - offset: a0(), - dst: writable_a1(), - ty: I16, - p: a1(), - x: a2(), - t0: writable_a0(), - }), - ); - - // Return Call Indirect and BrTable are the largest instructions possible. However they - // emit their own island, so we don't account them here. - - let mut max: (u32, MInst) = (0, Inst::Nop0); - for i in candidates { - let mut buffer = MachBuffer::new(); - let mut emit_state = Default::default(); - i.emit(&mut buffer, &emit_info, &mut emit_state); - let buffer = buffer.finish(&Default::default(), &mut Default::default()); - let length = buffer.data().len() as u32; - if length > max.0 { - let length = buffer.data().len() as u32; - max = (length, i.clone()); - } - println!("insn:{i:?} length: {length}"); - } - println!("calculate max size is {} , inst is {:?}", max.0, max.1); - assert!(max.0 <= Inst::worst_case_size()); -} diff --git a/hbcb/src/inst/encode.rs b/hbcb/src/inst/encode.rs deleted file mode 100644 index 2e14cfb..0000000 --- a/hbcb/src/inst/encode.rs +++ /dev/null @@ -1,709 +0,0 @@ -//! Contains the RISC-V instruction encoding logic. -//! -//! These formats are specified in the RISC-V specification in section 2.2. -//! See: -//! -//! Some instructions especially in extensions have slight variations from -//! the base RISC-V specification. - -use { - super::*, - crate::lower::isle::generated_code::{ - COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, - VecAluOpRImm5, VecAluOpRR, VecAluOpRRRImm5, VecAluOpRRRR, VecOpCategory, ZcbMemOp, - }, - cranelift_codegen::machinst::isle::WritableReg, -}; - -fn unsigned_field_width(value: u32, width: u8) -> u32 { - debug_assert_eq!(value & (!0 << width), 0); - value -} - -/// Layout: -/// 0-------6-7-------11-12------14-15------19-20------24-25-------31 -/// | Opcode | rd | funct3 | rs1 | rs2 | funct7 | -fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 { - let mut bits = 0; - bits |= unsigned_field_width(opcode, 7); - bits |= unsigned_field_width(rd, 5) << 7; - bits |= unsigned_field_width(funct3, 3) << 12; - bits |= unsigned_field_width(rs1, 5) << 15; - bits |= unsigned_field_width(rs2, 5) << 20; - bits |= unsigned_field_width(funct7, 7) << 25; - bits -} - -/// Encode an R-type instruction. -pub fn encode_r_type( - opcode: u32, - rd: WritableReg, - funct3: u32, - rs1: Reg, - rs2: Reg, - funct7: u32, -) -> u32 { - encode_r_type_bits( - opcode, - reg_to_gpr_num(rd.to_reg()), - funct3, - reg_to_gpr_num(rs1), - reg_to_gpr_num(rs2), - funct7, - ) -} - -/// Layout: -/// 0-------6-7-------11-12------14-15------19-20------------------31 -/// | Opcode | rd | width | rs1 | Offset[11:0] | -fn encode_i_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, offset: u32) -> u32 { - let mut bits = 0; - bits |= unsigned_field_width(opcode, 7); - bits |= unsigned_field_width(rd, 5) << 7; - bits |= unsigned_field_width(funct3, 3) << 12; - bits |= unsigned_field_width(rs1, 5) << 15; - bits |= unsigned_field_width(offset, 12) << 20; - bits -} - -/// Encode an I-type instruction. -pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 { - encode_i_type_bits( - opcode, - reg_to_gpr_num(rd.to_reg()), - width, - reg_to_gpr_num(rs1), - offset.bits(), - ) -} - -/// Encode an S-type instruction. -/// -/// Layout: -/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31 -/// | Opcode | imm[4:0] | width | base | src | imm[11:5] | -pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 { - let mut bits = 0; - bits |= unsigned_field_width(opcode, 7); - bits |= (offset.bits() & 0b11111) << 7; - bits |= unsigned_field_width(width, 3) << 12; - bits |= reg_to_gpr_num(base) << 15; - bits |= reg_to_gpr_num(src) << 20; - bits |= unsigned_field_width(offset.bits() >> 5, 7) << 25; - bits -} - -/// Encodes a Vector ALU instruction. -/// -/// Fields: -/// - opcode (7 bits) -/// - vd (5 bits) -/// - funct3 (3 bits) -/// - vs1 (5 bits) -/// - vs2 (5 bits) -/// - vm (1 bit) -/// - funct6 (6 bits) -/// -/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc -pub fn encode_valu( - op: VecAluOpRRR, - vd: WritableReg, - vs1: Reg, - vs2: Reg, - masking: VecOpMasking, -) -> u32 { - let funct7 = (op.funct6() << 1) | masking.encode(); - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(vd.to_reg()), - op.funct3(), - reg_to_gpr_num(vs1), - reg_to_gpr_num(vs2), - funct7, - ) -} - -/// Encodes a Vector ALU+Imm instruction. -/// This is just a Vector ALU instruction with an immediate in the VS1 field. -/// -/// Fields: -/// - opcode (7 bits) -/// - vd (5 bits) -/// - funct3 (3 bits) -/// - imm (5 bits) -/// - vs2 (5 bits) -/// - vm (1 bit) -/// - funct6 (6 bits) -/// -/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc -pub fn encode_valu_rr_imm( - op: VecAluOpRRImm5, - vd: WritableReg, - imm: Imm5, - vs2: Reg, - masking: VecOpMasking, -) -> u32 { - let funct7 = (op.funct6() << 1) | masking.encode(); - let imm = imm.bits() as u32; - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(vd.to_reg()), - op.funct3(), - imm, - reg_to_gpr_num(vs2), - funct7, - ) -} - -pub fn encode_valu_rrrr( - op: VecAluOpRRRR, - vd: WritableReg, - vs2: Reg, - vs1: Reg, - masking: VecOpMasking, -) -> u32 { - let funct7 = (op.funct6() << 1) | masking.encode(); - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(vd.to_reg()), - op.funct3(), - reg_to_gpr_num(vs1), - reg_to_gpr_num(vs2), - funct7, - ) -} - -pub fn encode_valu_rrr_imm( - op: VecAluOpRRRImm5, - vd: WritableReg, - imm: Imm5, - vs2: Reg, - masking: VecOpMasking, -) -> u32 { - let funct7 = (op.funct6() << 1) | masking.encode(); - let imm = imm.bits() as u32; - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(vd.to_reg()), - op.funct3(), - imm, - reg_to_gpr_num(vs2), - funct7, - ) -} - -pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 { - let funct7 = (op.funct6() << 1) | masking.encode(); - - let (vs1, vs2) = if op.vs_is_vs2_encoded() { - (op.aux_encoding(), reg_to_gpr_num(vs)) - } else { - (reg_to_gpr_num(vs), op.aux_encoding()) - }; - - encode_r_type_bits(op.opcode(), reg_to_gpr_num(vd.to_reg()), op.funct3(), vs1, vs2, funct7) -} - -pub fn encode_valu_r_imm( - op: VecAluOpRImm5, - vd: WritableReg, - imm: Imm5, - masking: VecOpMasking, -) -> u32 { - let funct7 = (op.funct6() << 1) | masking.encode(); - - // This is true for this opcode, not sure if there are any other ones. - debug_assert_eq!(op, VecAluOpRImm5::VmvVI); - let vs1 = imm.bits() as u32; - let vs2 = op.aux_encoding(); - - encode_r_type_bits(op.opcode(), reg_to_gpr_num(vd.to_reg()), op.funct3(), vs1, vs2, funct7) -} - -/// Encodes a Vector CFG Imm instruction. -/// -/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc -// TODO: Check if this is any of the known instruction types in the spec. -pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 { - let mut bits = 0; - bits |= unsigned_field_width(opcode, 7); - bits |= reg_to_gpr_num(rd) << 7; - bits |= VecOpCategory::OPCFG.encode() << 12; - bits |= unsigned_field_width(imm.bits(), 5) << 15; - bits |= unsigned_field_width(vtype.encode(), 10) << 20; - bits |= 0b11 << 30; - bits -} - -/// Encodes a Vector Mem Unit Stride Load instruction. -/// -/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc -/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP -pub fn encode_vmem_load( - opcode: u32, - vd: Reg, - width: VecElementWidth, - rs1: Reg, - lumop: u32, - masking: VecOpMasking, - mop: u32, - nf: u32, -) -> u32 { - // Width is encoded differently to avoid a clash with the FP load/store sizes. - let width = match width { - VecElementWidth::E8 => 0b000, - VecElementWidth::E16 => 0b101, - VecElementWidth::E32 => 0b110, - VecElementWidth::E64 => 0b111, - }; - - let mut bits = 0; - bits |= unsigned_field_width(opcode, 7); - bits |= reg_to_gpr_num(vd) << 7; - bits |= width << 12; - bits |= reg_to_gpr_num(rs1) << 15; - bits |= unsigned_field_width(lumop, 5) << 20; - bits |= masking.encode() << 25; - bits |= unsigned_field_width(mop, 2) << 26; - - // The mew bit (inst[28]) when set is expected to be used to encode expanded - // memory sizes of 128 bits and above, but these encodings are currently reserved. - bits |= 0b0 << 28; - - bits |= unsigned_field_width(nf, 3) << 29; - bits -} - -/// Encodes a Vector Mem Unit Stride Load instruction. -/// -/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc -/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP -pub fn encode_vmem_store( - opcode: u32, - vs3: Reg, - width: VecElementWidth, - rs1: Reg, - sumop: u32, - masking: VecOpMasking, - mop: u32, - nf: u32, -) -> u32 { - // This is pretty much the same as the load instruction, just - // with different names on the fields. - encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf) -} - -// The CSR Reg instruction is really just an I type instruction with the CSR in -// the immediate field. -pub fn encode_csr_reg(op: CsrRegOP, rd: WritableReg, rs: Reg, csr: CSR) -> u32 { - encode_i_type(op.opcode(), rd, op.funct3(), rs, csr.bits()) -} - -// The CSR Imm instruction is an I type instruction with the CSR in -// the immediate field and the value to be set in the `rs1` field. -pub fn encode_csr_imm(op: CsrImmOP, rd: WritableReg, csr: CSR, imm: UImm5) -> u32 { - encode_i_type_bits( - op.opcode(), - reg_to_gpr_num(rd.to_reg()), - op.funct3(), - imm.bits(), - csr.bits().bits(), - ) -} - -// Encode a CR type instruction. -// -// 0--1-2-----6-7-------11-12-------15 -// |op | rs2 | rd/rs1 | funct4 | -pub fn encode_cr_type(op: CrOp, rd: WritableReg, rs2: Reg) -> u16 { - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= reg_to_gpr_num(rs2) << 2; - bits |= reg_to_gpr_num(rd.to_reg()) << 7; - bits |= unsigned_field_width(op.funct4(), 4) << 12; - bits.try_into().unwrap() -} - -// This isn't technically a instruction format that exists. It's just a CR type -// where the source is rs1, rs2 is zero. rs1 is never written to. -// -// Used for C.JR and C.JALR -pub fn encode_cr2_type(op: CrOp, rs1: Reg) -> u16 { - encode_cr_type(op, WritableReg::from_reg(rs1), zero_reg()) -} - -// Encode a CA type instruction. -// -// 0--1-2-----4-5--------6-7--------9-10------15 -// |op | rs2 | funct2 | rd/rs1 | funct6 | -pub fn encode_ca_type(op: CaOp, rd: WritableReg, rs2: Reg) -> u16 { - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= reg_to_compressed_gpr_num(rs2) << 2; - bits |= unsigned_field_width(op.funct2(), 2) << 5; - bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7; - bits |= unsigned_field_width(op.funct6(), 6) << 10; - bits.try_into().unwrap() -} - -// Encode a CJ type instruction. -// -// The imm field is a 11 bit signed immediate that is shifted left by 1. -// -// 0--1-2-----12-13--------15 -// |op | imm | funct3 | -pub fn encode_cj_type(op: CjOp, imm: Imm12) -> u16 { - let imm = imm.bits(); - debug_assert!(imm & 1 == 0); - - // The offset bits are in rather weird positions. - // [11|4|9:8|10|6|7|3:1|5] - let mut imm_field = 0; - imm_field |= ((imm >> 11) & 1) << 10; - imm_field |= ((imm >> 4) & 1) << 9; - imm_field |= ((imm >> 8) & 3) << 7; - imm_field |= ((imm >> 10) & 1) << 6; - imm_field |= ((imm >> 6) & 1) << 5; - imm_field |= ((imm >> 7) & 1) << 4; - imm_field |= ((imm >> 1) & 7) << 1; - imm_field |= ((imm >> 5) & 1) << 0; - - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= unsigned_field_width(imm_field, 11) << 2; - bits |= unsigned_field_width(op.funct3(), 3) << 13; - bits.try_into().unwrap() -} - -// Encode a CI type instruction. -// -// The imm field is a 6 bit signed immediate. -// -// 0--1-2-------6-7-------11-12-----12-13-----15 -// |op | imm[4:0] | src | imm[5] | funct3 | -pub fn encode_ci_type(op: CiOp, rd: WritableReg, imm: Imm6) -> u16 { - let imm = imm.bits(); - - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2; - bits |= reg_to_gpr_num(rd.to_reg()) << 7; - bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12; - bits |= unsigned_field_width(op.funct3(), 3) << 13; - bits.try_into().unwrap() -} - -// Stack-Pointer relative loads are regular CI instructions, but, the immediate -// is zero extended, and with a slightly different immediate field encoding. -pub fn encode_ci_sp_load(op: CiOp, rd: WritableReg, imm: Uimm6) -> u16 { - let imm = imm.bits(); - - // These are the spec encoded offsets. - // LWSP: [5|4:2|7:6] - // LDSP: [5|4:3|8:6] - // FLDSP: [5|4:3|8:6] - // - // We don't receive the entire offset in `imm`, just a multiple of the load-size. - - // Number of bits in the lowest position of imm. 3 for lwsp, 2 for {f,}ldsp. - let low_bits = match op { - CiOp::CLwsp => 3, // [4:2] - CiOp::CLdsp | CiOp::CFldsp => 2, // [4:3] - _ => unreachable!(), - }; - let high_bits = 6 - 1 - low_bits; - let mut enc_imm = 0; - - // Encode [7:6] at the bottom of imm - enc_imm |= imm >> (6 - high_bits); - - // Next place [4:2] in the middle - enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits; - - // Finally place [5] at the top - enc_imm |= ((imm >> low_bits) & 1) << 5; - - let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap(); - - encode_ci_type(op, rd, enc_imm) -} - -/// c.addi16sp is a regular CI op, but the immediate field is encoded in a weird way -pub fn encode_c_addi16sp(imm: Imm6) -> u16 { - let imm = imm.bits(); - - // [6|1|3|5:4|2] - let mut enc_imm = 0; - enc_imm |= ((imm >> 5) & 1) << 5; - enc_imm |= ((imm >> 0) & 1) << 4; - enc_imm |= ((imm >> 2) & 1) << 3; - enc_imm |= ((imm >> 3) & 3) << 1; - enc_imm |= ((imm >> 1) & 1) << 0; - let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap(); - - encode_ci_type(CiOp::CAddi16sp, writable_stack_reg(), enc_imm) -} - -// Encode a CIW type instruction. -// -// 0--1-2------4-5------12-13--------15 -// |op | rd | imm | funct3 | -pub fn encode_ciw_type(op: CiwOp, rd: WritableReg, imm: u8) -> u16 { - // [3:2|7:4|0|1] - let mut imm_field = 0; - imm_field |= ((imm >> 1) & 1) << 0; - imm_field |= ((imm >> 0) & 1) << 1; - imm_field |= ((imm >> 4) & 15) << 2; - imm_field |= ((imm >> 2) & 3) << 6; - - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 2; - bits |= unsigned_field_width(imm_field as u32, 8) << 5; - bits |= unsigned_field_width(op.funct3(), 3) << 13; - bits.try_into().unwrap() -} - -// Encode a CB type instruction. -// -// The imm field is a 6 bit signed immediate. -// -// 0--1-2-------6-7-------9-10-------11-12-------13--------15 -// |op | imm[4:0] | dst | funct2 | imm[5] | funct3 | -pub fn encode_cb_type(op: CbOp, rd: WritableReg, imm: Imm6) -> u16 { - let imm = imm.bits(); - - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2; - bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7; - bits |= unsigned_field_width(op.funct2(), 2) << 10; - bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12; - bits |= unsigned_field_width(op.funct3(), 3) << 13; - bits.try_into().unwrap() -} - -// Encode a CSS type instruction. -// -// The imm field is a 6 bit unsigned immediate. -// -// 0--1-2-------6-7--------12-13-------15 -// |op | src | imm | funct3 | -pub fn encode_css_type(op: CssOp, src: Reg, imm: Uimm6) -> u16 { - let imm = imm.bits(); - - // These are the spec encoded offsets. - // c.swsp: [5:2|7:6] - // c.sdsp: [5:3|8:6] - // c.fsdsp: [5:3|8:6] - // - // We don't receive the entire offset in `imm`, just a multiple of the load-size. - - // Number of bits in the lowest position of imm. 4 for c.swsp, 3 for c.{f,}sdsp. - let low_bits = match op { - CssOp::CSwsp => 4, // [5:2] - CssOp::CSdsp | CssOp::CFsdsp => 3, // [5:3] - }; - let high_bits = 6 - low_bits; - - let mut enc_imm = 0; - enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits; - enc_imm |= imm >> low_bits; - - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= reg_to_gpr_num(src) << 2; - bits |= unsigned_field_width(enc_imm as u32, 6) << 7; - bits |= unsigned_field_width(op.funct3(), 3) << 13; - bits.try_into().unwrap() -} - -// Encode a CS type instruction. -// -// The imm field is a 5 bit unsigned immediate. -// -// 0--1-2-----4-5----------6-7---------9-10----------12-13-----15 -// |op | src | imm(2-bit) | base | imm(3-bit) | funct3 | -pub fn encode_cs_type(op: CsOp, src: Reg, base: Reg, imm: Uimm5) -> u16 { - let size = match op { - CsOp::CFsd | CsOp::CSd => 8, - CsOp::CSw => 4, - }; - - encode_cs_cl_type_bits(op.op(), op.funct3(), size, src, base, imm) -} - -// Encode a CL type instruction. -// -// The imm field is a 5 bit unsigned immediate. -// -// 0--1-2------4-5----------6-7---------9-10----------12-13-----15 -// |op | dest | imm(2-bit) | base | imm(3-bit) | funct3 | -pub fn encode_cl_type(op: ClOp, dest: WritableReg, base: Reg, imm: Uimm5) -> u16 { - let size = match op { - ClOp::CFld | ClOp::CLd => 8, - ClOp::CLw => 4, - }; - - encode_cs_cl_type_bits(op.op(), op.funct3(), size, dest.to_reg(), base, imm) -} - -// CL and CS type instructions have the same physical layout. -// -// 0--1-2----------4-5----------6-7---------9-10----------12-13-----15 -// |op | dest/src | imm(2-bit) | base | imm(3-bit) | funct3 | -fn encode_cs_cl_type_bits( - op: COpcodeSpace, - funct3: u32, - size: u32, - dest_src: Reg, - base: Reg, - imm: Uimm5, -) -> u16 { - let imm = imm.bits(); - - // c.sw / c.lw: [2|6] - // c.sd / c.ld: [7:6] - // c.fsd / c.fld: [7:6] - // - // We differentiate these based on the operation size - let imm2 = match size { - 4 => ((imm >> 4) & 1) | ((imm & 1) << 1), - 8 => (imm >> 3) & 0b11, - _ => unreachable!(), - }; - - // [5:3] on all opcodes - let imm3 = match size { - 4 => (imm >> 1) & 0b111, - 8 => (imm >> 0) & 0b111, - _ => unreachable!(), - }; - - let mut bits = 0; - bits |= unsigned_field_width(op.bits(), 2); - bits |= reg_to_compressed_gpr_num(dest_src) << 2; - bits |= unsigned_field_width(imm2 as u32, 2) << 5; - bits |= reg_to_compressed_gpr_num(base) << 7; - bits |= unsigned_field_width(imm3 as u32, 3) << 10; - bits |= unsigned_field_width(funct3, 3) << 13; - bits.try_into().unwrap() -} - -// Encode a CSZN type instruction. -// -// This is an additional encoding format that is introduced in the Zcb extension. -// -// 0--1-2---------6-7--------9-10------15 -// |op | funct5 | rd/rs1 | funct6 | -pub fn encode_cszn_type(op: CsznOp, rd: WritableReg) -> u16 { - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= unsigned_field_width(op.funct5(), 5) << 2; - bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7; - bits |= unsigned_field_width(op.funct6(), 6) << 10; - bits.try_into().unwrap() -} - -// Encodes the various memory operations in the Zcb extension. -// -// 0--1-2----------4-5----------6-7---------9-10-------15 -// |op | dest/src | imm(2-bit) | base | funct6 | -fn encode_zcbmem_bits(op: ZcbMemOp, dest_src: Reg, base: Reg, imm: Uimm2) -> u16 { - let imm = imm.bits(); - - // For these ops, bit 6 is part of the opcode, and bit 5 encodes the imm offset. - let imm = match op { - ZcbMemOp::CLh | ZcbMemOp::CLhu | ZcbMemOp::CSh => { - debug_assert_eq!(imm & !1, 0); - // Only c.lh has this bit as 1 - let opcode_bit = (op == ZcbMemOp::CLh) as u8; - imm | (opcode_bit << 1) - } - // In the rest of the ops the imm is reversed. - _ => ((imm & 1) << 1) | ((imm >> 1) & 1), - }; - - let mut bits = 0; - bits |= unsigned_field_width(op.op().bits(), 2); - bits |= reg_to_compressed_gpr_num(dest_src) << 2; - bits |= unsigned_field_width(imm as u32, 2) << 5; - bits |= reg_to_compressed_gpr_num(base) << 7; - bits |= unsigned_field_width(op.funct6(), 6) << 10; - bits.try_into().unwrap() -} - -pub fn encode_zcbmem_load(op: ZcbMemOp, rd: WritableReg, base: Reg, imm: Uimm2) -> u16 { - encode_zcbmem_bits(op, rd.to_reg(), base, imm) -} - -pub fn encode_zcbmem_store(op: ZcbMemOp, src: Reg, base: Reg, imm: Uimm2) -> u16 { - encode_zcbmem_bits(op, src, base, imm) -} - -pub fn encode_fli(ty: Type, imm: FliConstant, rd: WritableReg) -> u32 { - // FLI.{S,D} is encoded as a FMV.{W,D} instruction with rs2 set to the - // immediate value to be loaded. - let op = FpuOPRR::FmvFmtX; - let width = FpuOPWidth::try_from(ty).unwrap(); - let frm = 0; // FRM is hard coded to 0 in both instructions - let rs2 = 1; // rs2 set to 1 is what differentiates FLI from FMV - - let mut bits = 0; - bits |= unsigned_field_width(op.opcode(), 7); - bits |= reg_to_gpr_num(rd.to_reg()) << 7; - bits |= unsigned_field_width(frm, 3) << 12; - bits |= unsigned_field_width(imm.bits() as u32, 5) << 15; - bits |= unsigned_field_width(rs2, 6) << 20; - bits |= unsigned_field_width(op.funct7(width), 7) << 25; - bits -} - -pub fn encode_fp_rr(op: FpuOPRR, width: FpuOPWidth, frm: FRM, rd: WritableReg, rs: Reg) -> u32 { - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(rd.to_reg()), - frm.as_u32(), - reg_to_gpr_num(rs), - op.rs2(), - op.funct7(width), - ) -} - -pub fn encode_fp_rrr( - op: FpuOPRRR, - width: FpuOPWidth, - frm: FRM, - rd: WritableReg, - rs1: Reg, - rs2: Reg, -) -> u32 { - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(rd.to_reg()), - frm.as_u32(), - reg_to_gpr_num(rs1), - reg_to_gpr_num(rs2), - op.funct7(width), - ) -} - -pub fn encode_fp_rrrr( - op: FpuOPRRRR, - width: FpuOPWidth, - frm: FRM, - rd: WritableReg, - rs1: Reg, - rs2: Reg, - rs3: Reg, -) -> u32 { - let funct7 = (reg_to_gpr_num(rs3) << 2) | width.as_u32(); - encode_r_type_bits( - op.opcode(), - reg_to_gpr_num(rd.to_reg()), - frm.as_u32(), - reg_to_gpr_num(rs1), - reg_to_gpr_num(rs2), - funct7, - ) -} diff --git a/hbcb/src/inst/imms.rs b/hbcb/src/inst/imms.rs deleted file mode 100644 index 28f2791..0000000 --- a/hbcb/src/inst/imms.rs +++ /dev/null @@ -1,374 +0,0 @@ -//! Riscv64 ISA definitions: immediate constants. - -// Some variants are never constructed, but we still want them as options in the future. -use super::Inst; -#[allow(dead_code)] -use std::fmt::{Debug, Display, Formatter, Result}; - -#[derive(Copy, Clone, Debug, Default)] -pub struct Imm12 { - /// 16-bit container where the low 12 bits are the data payload. - /// - /// Acquiring the underlying value requires sign-extending the 12th bit. - bits: u16, -} - -impl Imm12 { - pub(crate) const ZERO: Self = Self { bits: 0 }; - pub(crate) const ONE: Self = Self { bits: 1 }; - - pub fn maybe_from_u64(val: u64) -> Option { - Self::maybe_from_i64(val as i64) - } - - pub fn maybe_from_i64(val: i64) -> Option { - if val >= -2048 && val <= 2047 { - Some(Imm12 { - bits: val as u16 & 0xfff, - }) - } else { - None - } - } - - #[inline] - pub fn from_i16(bits: i16) -> Self { - assert!(bits >= -2048 && bits <= 2047); - Self { - bits: (bits & 0xfff) as u16, - } - } - - #[inline] - pub fn as_i16(self) -> i16 { - (self.bits << 4) as i16 >> 4 - } - - #[inline] - pub fn bits(&self) -> u32 { - self.bits.into() - } -} - -impl Into for Imm12 { - fn into(self) -> i64 { - self.as_i16().into() - } -} - -impl Display for Imm12 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{:+}", self.as_i16()) - } -} - -// signed -#[derive(Clone, Copy, Default)] -pub struct Imm20 { - /// 32-bit container where the low 20 bits are the data payload. - /// - /// Acquiring the underlying value requires sign-extending the 20th bit. - bits: u32, -} - -impl Imm20 { - pub(crate) const ZERO: Self = Self { bits: 0 }; - - pub fn maybe_from_u64(val: u64) -> Option { - Self::maybe_from_i64(val as i64) - } - - pub fn maybe_from_i64(val: i64) -> Option { - if val >= -(0x7_ffff + 1) && val <= 0x7_ffff { - Some(Imm20 { bits: val as u32 }) - } else { - None - } - } - - #[inline] - pub fn from_i32(bits: i32) -> Self { - assert!(bits >= -(0x7_ffff + 1) && bits <= 0x7_ffff); - Self { - bits: (bits as u32) & 0xf_ffff, - } - } - - #[inline] - pub fn as_i32(&self) -> i32 { - ((self.bits << 12) as i32) >> 12 - } - - #[inline] - pub fn bits(&self) -> u32 { - self.bits - } -} - -impl Debug for Imm20 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.as_i32()) - } -} - -impl Display for Imm20 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.bits) - } -} - -/// An unsigned 5-bit immediate. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct UImm5 { - value: u8, -} - -impl UImm5 { - /// Create an unsigned 5-bit immediate from u8. - pub fn maybe_from_u8(value: u8) -> Option { - if value < 32 { - Some(UImm5 { value }) - } else { - None - } - } - - /// Bits for encoding. - pub fn bits(&self) -> u32 { - u32::from(self.value) - } -} - -impl Display for UImm5 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.value) - } -} - -/// A Signed 5-bit immediate. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Imm5 { - value: i8, -} - -impl Imm5 { - /// Create an signed 5-bit immediate from an i8. - pub fn maybe_from_i8(value: i8) -> Option { - if value >= -16 && value <= 15 { - Some(Imm5 { value }) - } else { - None - } - } - - pub fn from_bits(value: u8) -> Imm5 { - assert_eq!(value & 0x1f, value); - let signed = ((value << 3) as i8) >> 3; - Imm5 { value: signed } - } - - /// Bits for encoding. - pub fn bits(&self) -> u8 { - self.value as u8 & 0x1f - } -} - -impl Display for Imm5 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.value) - } -} - -/// A Signed 6-bit immediate. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Imm6 { - value: i8, -} - -impl Imm6 { - /// Create an signed 6-bit immediate from an i16 - pub fn maybe_from_i16(value: i16) -> Option { - if value >= -32 && value <= 31 { - Some(Self { value: value as i8 }) - } else { - None - } - } - - pub fn maybe_from_i32(value: i32) -> Option { - value.try_into().ok().and_then(Imm6::maybe_from_i16) - } - - pub fn maybe_from_imm12(value: Imm12) -> Option { - Imm6::maybe_from_i16(value.as_i16()) - } - - /// Bits for encoding. - pub fn bits(&self) -> u8 { - self.value as u8 & 0x3f - } -} - -impl Display for Imm6 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.value) - } -} - -/// A unsigned 6-bit immediate. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Uimm6 { - value: u8, -} - -impl Uimm6 { - /// Create an unsigned 6-bit immediate from an u8 - pub fn maybe_from_u8(value: u8) -> Option { - if value <= 63 { - Some(Self { value }) - } else { - None - } - } - - /// Bits for encoding. - pub fn bits(&self) -> u8 { - self.value & 0x3f - } -} - -impl Display for Uimm6 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.value) - } -} - -/// A unsigned 5-bit immediate. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Uimm5 { - value: u8, -} - -impl Uimm5 { - /// Create an unsigned 5-bit immediate from an u8 - pub fn maybe_from_u8(value: u8) -> Option { - if value <= 31 { - Some(Self { value }) - } else { - None - } - } - - /// Bits for encoding. - pub fn bits(&self) -> u8 { - self.value & 0x1f - } -} - -impl Display for Uimm5 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.value) - } -} - -/// A unsigned 2-bit immediate. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct Uimm2 { - value: u8, -} - -impl Uimm2 { - /// Create an unsigned 2-bit immediate from an u8 - pub fn maybe_from_u8(value: u8) -> Option { - if value <= 3 { - Some(Self { value }) - } else { - None - } - } - - /// Bits for encoding. - pub fn bits(&self) -> u8 { - self.value & 0x3 - } -} - -impl Display for Uimm2 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { - write!(f, "{}", self.value) - } -} - -impl Inst { - pub(crate) fn imm_min() -> i64 { - let imm20_max: i64 = (1 << 19) << 12; - let imm12_max = 1 << 11; - -imm20_max - imm12_max - } - pub(crate) fn imm_max() -> i64 { - let imm20_max: i64 = ((1 << 19) - 1) << 12; - let imm12_max = (1 << 11) - 1; - imm20_max + imm12_max - } - - /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate. - /// This helper produces an imm12, imm20, or both to generate the value. - /// - /// `value` must be between `imm_min()` and `imm_max()`, or else - /// this helper returns `None`. - pub(crate) fn generate_imm(value: u64) -> Option<(Imm20, Imm12)> { - if let Some(imm12) = Imm12::maybe_from_u64(value) { - // can be load using single imm12. - return Some((Imm20::ZERO, imm12)); - } - let value = value as i64; - if !(value >= Self::imm_min() && value <= Self::imm_max()) { - // not in range, return None. - return None; - } - const MOD_NUM: i64 = 4096; - let (imm20, imm12) = if value > 0 { - let mut imm20 = value / MOD_NUM; - let mut imm12 = value % MOD_NUM; - if imm12 >= 2048 { - imm12 -= MOD_NUM; - imm20 += 1; - } - assert!(imm12 >= -2048 && imm12 <= 2047); - (imm20, imm12) - } else { - // this is the abs value. - let value_abs = value.abs(); - let imm20 = value_abs / MOD_NUM; - let imm12 = value_abs % MOD_NUM; - let mut imm20 = -imm20; - let mut imm12 = -imm12; - if imm12 < -2048 { - imm12 += MOD_NUM; - imm20 -= 1; - } - (imm20, imm12) - }; - assert!(imm20 != 0 || imm12 != 0); - let imm20 = i32::try_from(imm20).unwrap(); - let imm12 = i16::try_from(imm12).unwrap(); - Some((Imm20::from_i32(imm20), Imm12::from_i16(imm12))) - } -} - -#[cfg(test)] -mod test { - use super::*; - #[test] - fn test_imm12() { - let x = Imm12::ZERO; - assert_eq!(0, x.bits()); - Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap(); - } - - #[test] - fn imm20_and_imm12() { - assert!(Inst::imm_max() == (i32::MAX - 2048) as i64); - assert!(Inst::imm_min() == i32::MIN as i64 - 2048); - } -} diff --git a/hbcb/src/inst/mod.rs b/hbcb/src/inst/mod.rs deleted file mode 100644 index 6440c57..0000000 --- a/hbcb/src/inst/mod.rs +++ /dev/null @@ -1,1559 +0,0 @@ -//! This module defines riscv64-specific machine instruction types. - -pub use crate::ir::{ - condcodes::{FloatCC, IntCC}, - ExternalName, MemFlags, Type, -}; -use { - super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking}, - alloc::vec::Vec, - cranelift_codegen::{ - binemit::{Addend, CodeOffset, Reloc}, - ir::types::{self, F128, F16, F32, F64, I128, I16, I32, I64, I8, I8X16}, - isa::{CallConv, FunctionAlignment}, - machinst::*, - settings, CodegenError, CodegenResult, - }, - regalloc2::RegClass, - smallvec::{smallvec, SmallVec}, - std::{ - boxed::Box, - fmt::Write, - string::{String, ToString}, - }, -}; - -pub mod regs; -pub use self::regs::*; -pub mod imms; -pub use self::imms::*; -pub mod args; -pub use self::args::*; -pub mod emit; -pub use self::emit::*; -pub mod vector; -pub use self::vector::*; -pub mod encode; -pub use self::encode::*; -pub mod unwind; - -use crate::abi::Riscv64MachineDeps; - -#[cfg(test)] -mod emit_tests; - -use std::fmt::{Display, Formatter}; - -pub(crate) type VecU8 = Vec; - -//============================================================================= -// Instructions (top level): definition - -pub use crate::lower::isle::generated_code::{ - AluOPRRI, AluOPRRR, AtomicOP, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FpuOPRR, - FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP, CSR, FRM, -}; -use crate::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR}; - -/// Additional information for `return_call[_ind]` instructions, left out of -/// line to lower the size of the `Inst` enum. -#[derive(Clone, Debug)] -pub struct ReturnCallInfo { - pub dest: T, - pub uses: CallArgList, - pub new_stack_arg_size: u32, -} - -/// A conditional branch target. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum CondBrTarget { - /// An unresolved reference to a Label, as passed into - /// `lower_branch_group()`. - Label(MachLabel), - /// No jump; fall through to the next instruction. - Fallthrough, -} - -impl CondBrTarget { - /// Return the target's label, if it is a label-based target. - pub(crate) fn as_label(self) -> Option { - match self { - CondBrTarget::Label(l) => Some(l), - _ => None, - } - } - - pub(crate) fn is_fallthrouh(&self) -> bool { - self == &CondBrTarget::Fallthrough - } -} - -impl Display for CondBrTarget { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - CondBrTarget::Label(l) => write!(f, "{}", l.to_string()), - CondBrTarget::Fallthrough => write!(f, "0"), - } - } -} - -pub(crate) fn enc_auipc(rd: Writable, imm: Imm20) -> u32 { - let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.bits() << 12; - x -} - -pub(crate) fn enc_jalr(rd: Writable, base: Reg, offset: Imm12) -> u32 { - let x = 0b1100111 - | reg_to_gpr_num(rd.to_reg()) << 7 - | 0b000 << 12 - | reg_to_gpr_num(base) << 15 - | offset.bits() << 20; - x -} - -/// rd and src must have the same length. -pub(crate) fn gen_moves(rd: &[Writable], src: &[Reg]) -> SmallInstVec { - assert!(rd.len() == src.len()); - assert!(rd.len() > 0); - let mut insts = SmallInstVec::new(); - for (dst, src) in rd.iter().zip(src.iter()) { - let ty = Inst::canonical_type_for_rc(dst.to_reg().class()); - insts.push(Inst::gen_move(*dst, *src, ty)); - } - insts -} - -impl Inst { - /// RISC-V can have multiple instruction sizes. 2 bytes for compressed - /// instructions, 4 for regular instructions, 6 and 8 byte instructions - /// are also being considered. - const UNCOMPRESSED_INSTRUCTION_SIZE: i32 = 4; - - #[inline] - pub(crate) fn load_imm12(rd: Writable, imm: Imm12) -> Inst { - Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd, rs: zero_reg(), imm12: imm } - } - - /// Immediates can be loaded using lui and addi instructions. - fn load_const_imm(rd: Writable, value: u64) -> Option> { - Inst::generate_imm(value).map(|(imm20, imm12)| { - let mut insts = SmallVec::new(); - - let imm20_is_zero = imm20.as_i32() == 0; - let imm12_is_zero = imm12.as_i16() == 0; - - let rs = if !imm20_is_zero { - insts.push(Inst::Lui { rd, imm: imm20 }); - rd.to_reg() - } else { - zero_reg() - }; - - // We also need to emit the addi if the value is 0, otherwise we just - // won't produce any instructions. - if !imm12_is_zero || (imm20_is_zero && imm12_is_zero) { - insts.push(Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd, rs, imm12 }) - } - - insts - }) - } - - pub(crate) fn load_constant_u32(rd: Writable, value: u64) -> SmallInstVec { - let insts = Inst::load_const_imm(rd, value); - insts.unwrap_or_else(|| smallvec![Inst::LoadInlineConst { rd, ty: I32, imm: value }]) - } - - pub fn load_constant_u64(rd: Writable, value: u64) -> SmallInstVec { - let insts = Inst::load_const_imm(rd, value); - insts.unwrap_or_else(|| smallvec![Inst::LoadInlineConst { rd, ty: I64, imm: value }]) - } - - pub(crate) fn construct_auipc_and_jalr( - link: Option>, - tmp: Writable, - offset: i64, - ) -> [Inst; 2] { - Inst::generate_imm(offset as u64) - .map(|(imm20, imm12)| { - let a = Inst::Auipc { rd: tmp, imm: imm20 }; - let b = Inst::Jalr { - rd: link.unwrap_or(writable_zero_reg()), - base: tmp.to_reg(), - offset: imm12, - }; - [a, b] - }) - .expect("code range is too big.") - } - - /// Generic constructor for a load (zero-extending where appropriate). - pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type, flags: MemFlags) -> Inst { - if ty.is_vector() { - Inst::VecLoad { - eew: VecElementWidth::from_type(ty), - to: into_reg, - from: VecAMode::UnitStride { base: mem }, - flags, - mask: VecOpMasking::Disabled, - vstate: VState::from_type(ty), - } - } else { - Inst::Load { rd: into_reg, op: LoadOP::from_type(ty), from: mem, flags } - } - } - - /// Generic constructor for a store. - pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { - if ty.is_vector() { - Inst::VecStore { - eew: VecElementWidth::from_type(ty), - to: VecAMode::UnitStride { base: mem }, - from: from_reg, - flags, - mask: VecOpMasking::Disabled, - vstate: VState::from_type(ty), - } - } else { - Inst::Store { src: from_reg, op: StoreOP::from_type(ty), to: mem, flags } - } - } -} - -//============================================================================= - -fn vec_mask_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) { - match mask { - VecOpMasking::Enabled { reg } => { - collector.reg_fixed_use(reg, pv_reg(0).into()); - } - VecOpMasking::Disabled => {} - } -} -fn vec_mask_late_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) { - match mask { - VecOpMasking::Enabled { reg } => { - collector.reg_fixed_late_use(reg, pv_reg(0).into()); - } - VecOpMasking::Disabled => {} - } -} - -fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { - match inst { - Inst::Nop0 | Inst::Nop4 => {} - Inst::BrTable { index, tmp1, tmp2, .. } => { - collector.reg_use(index); - collector.reg_early_def(tmp1); - collector.reg_early_def(tmp2); - } - Inst::Auipc { rd, .. } => collector.reg_def(rd), - Inst::Lui { rd, .. } => collector.reg_def(rd), - Inst::Fli { rd, .. } => collector.reg_def(rd), - Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd), - Inst::AluRRR { rd, rs1, rs2, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); - collector.reg_def(rd); - } - Inst::FpuRRR { rd, rs1, rs2, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); - collector.reg_def(rd); - } - Inst::AluRRImm12 { rd, rs, .. } => { - collector.reg_use(rs); - collector.reg_def(rd); - } - Inst::CsrReg { rd, rs, .. } => { - collector.reg_use(rs); - collector.reg_def(rd); - } - Inst::CsrImm { rd, .. } => { - collector.reg_def(rd); - } - Inst::Load { rd, from, .. } => { - from.get_operands(collector); - collector.reg_def(rd); - } - Inst::Store { to, src, .. } => { - to.get_operands(collector); - collector.reg_use(src); - } - - Inst::Args { args } => { - for ArgPair { vreg, preg } in args { - collector.reg_fixed_def(vreg, *preg); - } - } - Inst::Rets { rets } => { - for RetPair { vreg, preg } in rets { - collector.reg_fixed_use(vreg, *preg); - } - } - Inst::Ret { .. } => {} - - Inst::Extend { rd, rn, .. } => { - collector.reg_use(rn); - collector.reg_def(rd); - } - Inst::Call { info, .. } => { - let CallInfo { uses, defs, .. } = &mut **info; - for CallArgPair { vreg, preg } in uses { - collector.reg_fixed_use(vreg, *preg); - } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); - } - collector.reg_clobbers(info.clobbers); - } - Inst::CallInd { info } => { - let CallInfo { dest, uses, defs, .. } = &mut **info; - collector.reg_use(dest); - for CallArgPair { vreg, preg } in uses { - collector.reg_fixed_use(vreg, *preg); - } - for CallRetPair { vreg, preg } in defs { - collector.reg_fixed_def(vreg, *preg); - } - collector.reg_clobbers(info.clobbers); - } - Inst::ReturnCall { info } => { - for CallArgPair { vreg, preg } in &mut info.uses { - collector.reg_fixed_use(vreg, *preg); - } - } - Inst::ReturnCallInd { info } => { - // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): - // This shouldn't be a fixed register constraint. - collector.reg_fixed_use(&mut info.dest, x_reg(5)); - - for CallArgPair { vreg, preg } in &mut info.uses { - collector.reg_fixed_use(vreg, *preg); - } - } - Inst::Jal { .. } => { - // JAL technically has a rd register, but we currently always - // hardcode it to x0. - } - Inst::CondBr { kind: IntegerCompare { rs1, rs2, .. }, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); - } - Inst::LoadExtName { rd, .. } => { - collector.reg_def(rd); - } - Inst::ElfTlsGetAddr { rd, .. } => { - // x10 is a0 which is both the first argument and the first return value. - collector.reg_fixed_def(rd, a0()); - let mut clobbers = Riscv64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV); - clobbers.remove(px_reg(10)); - collector.reg_clobbers(clobbers); - } - Inst::LoadAddr { rd, mem } => { - mem.get_operands(collector); - collector.reg_early_def(rd); - } - - Inst::Mov { rd, rm, .. } => { - collector.reg_use(rm); - collector.reg_def(rd); - } - Inst::MovFromPReg { rd, rm } => { - debug_assert!([px_reg(2), px_reg(8)].contains(rm)); - collector.reg_def(rd); - } - Inst::Fence { .. } => {} - Inst::EBreak => {} - Inst::Udf { .. } => {} - Inst::FpuRR { rd, rs, .. } => { - collector.reg_use(rs); - collector.reg_def(rd); - } - Inst::FpuRRRR { rd, rs1, rs2, rs3, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); - collector.reg_use(rs3); - collector.reg_def(rd); - } - - Inst::Jalr { rd, base, .. } => { - collector.reg_use(base); - collector.reg_def(rd); - } - Inst::Atomic { rd, addr, src, .. } => { - collector.reg_use(addr); - collector.reg_use(src); - collector.reg_def(rd); - } - Inst::Select { dst, condition: IntegerCompare { rs1, rs2, .. }, x, y, .. } => { - // Mark the condition registers as late use so that they don't overlap with the destination - // register. We may potentially write to the destination register before evaluating the - // condition. - collector.reg_late_use(rs1); - collector.reg_late_use(rs2); - - for reg in x.regs_mut() { - collector.reg_use(reg); - } - for reg in y.regs_mut() { - collector.reg_use(reg); - } - - // If there's more than one destination register then use - // `reg_early_def` to prevent destination registers from overlapping - // with any operands. This ensures that the lowering doesn't have to - // deal with a situation such as when the input registers need to be - // swapped when moved to the destination. - // - // When there's only one destination register though don't use an - // early def because once the register is written no other inputs - // are read so it's ok for the destination to overlap the sources. - // The condition registers are already marked as late use so they - // won't overlap with the destination. - match dst.regs_mut() { - [reg] => collector.reg_def(reg), - regs => { - for d in regs { - collector.reg_early_def(d); - } - } - } - } - Inst::AtomicCas { offset, t0, dst, e, addr, v, .. } => { - collector.reg_use(offset); - collector.reg_use(e); - collector.reg_use(addr); - collector.reg_use(v); - collector.reg_early_def(t0); - collector.reg_early_def(dst); - } - - Inst::RawData { .. } => {} - Inst::AtomicStore { src, p, .. } => { - collector.reg_use(src); - collector.reg_use(p); - } - Inst::AtomicLoad { rd, p, .. } => { - collector.reg_use(p); - collector.reg_def(rd); - } - Inst::AtomicRmwLoop { offset, dst, p, x, t0, .. } => { - collector.reg_use(offset); - collector.reg_use(p); - collector.reg_use(x); - collector.reg_early_def(t0); - collector.reg_early_def(dst); - } - Inst::TrapIf { rs1, rs2, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); - } - Inst::Unwind { .. } => {} - Inst::DummyUse { reg } => { - collector.reg_use(reg); - } - Inst::Popcnt { sum, step, rs, tmp, .. } => { - collector.reg_use(rs); - collector.reg_early_def(tmp); - collector.reg_early_def(step); - collector.reg_early_def(sum); - } - Inst::Cltz { sum, step, tmp, rs, .. } => { - collector.reg_use(rs); - collector.reg_early_def(tmp); - collector.reg_early_def(step); - collector.reg_early_def(sum); - } - Inst::Brev8 { rs, rd, step, tmp, tmp2, .. } => { - collector.reg_use(rs); - collector.reg_early_def(step); - collector.reg_early_def(tmp); - collector.reg_early_def(tmp2); - collector.reg_early_def(rd); - } - Inst::StackProbeLoop { .. } => { - // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue. - // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg) - // gen_prologue is called at emit stage. - // no need let reg alloc know. - } - Inst::VecAluRRRR { op, vd, vd_src, vs1, vs2, mask, .. } => { - debug_assert_eq!(vd_src.class(), RegClass::Vector); - debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); - debug_assert_eq!(vs2.class(), RegClass::Vector); - debug_assert_eq!(vs1.class(), op.vs1_regclass()); - - collector.reg_late_use(vs1); - collector.reg_late_use(vs2); - collector.reg_use(vd_src); - collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`. - vec_mask_late_operands(mask, collector); - } - Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, mask, .. } => { - debug_assert_eq!(vd_src.class(), RegClass::Vector); - debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); - debug_assert_eq!(vs2.class(), RegClass::Vector); - - // If the operation forbids source/destination overlap we need to - // ensure that the source and destination registers are different. - if op.forbids_overlaps(mask) { - collector.reg_late_use(vs2); - collector.reg_use(vd_src); - collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`. - vec_mask_late_operands(mask, collector); - } else { - collector.reg_use(vs2); - collector.reg_use(vd_src); - collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`. - vec_mask_operands(mask, collector); - } - } - Inst::VecAluRRR { op, vd, vs1, vs2, mask, .. } => { - debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); - debug_assert_eq!(vs2.class(), RegClass::Vector); - debug_assert_eq!(vs1.class(), op.vs1_regclass()); - - collector.reg_use(vs1); - collector.reg_use(vs2); - - // If the operation forbids source/destination overlap, then we must - // register it as an early_def. This encodes the constraint that - // these must not overlap. - if op.forbids_overlaps(mask) { - collector.reg_early_def(vd); - } else { - collector.reg_def(vd); - } - - vec_mask_operands(mask, collector); - } - Inst::VecAluRRImm5 { op, vd, vs2, mask, .. } => { - debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); - debug_assert_eq!(vs2.class(), RegClass::Vector); - - collector.reg_use(vs2); - - // If the operation forbids source/destination overlap, then we must - // register it as an early_def. This encodes the constraint that - // these must not overlap. - if op.forbids_overlaps(mask) { - collector.reg_early_def(vd); - } else { - collector.reg_def(vd); - } - - vec_mask_operands(mask, collector); - } - Inst::VecAluRR { op, vd, vs, mask, .. } => { - debug_assert_eq!(vd.to_reg().class(), op.dst_regclass()); - debug_assert_eq!(vs.class(), op.src_regclass()); - - collector.reg_use(vs); - - // If the operation forbids source/destination overlap, then we must - // register it as an early_def. This encodes the constraint that - // these must not overlap. - if op.forbids_overlaps(mask) { - collector.reg_early_def(vd); - } else { - collector.reg_def(vd); - } - - vec_mask_operands(mask, collector); - } - Inst::VecAluRImm5 { op, vd, mask, .. } => { - debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); - debug_assert!(!op.forbids_overlaps(mask)); - - collector.reg_def(vd); - vec_mask_operands(mask, collector); - } - Inst::VecSetState { rd, .. } => { - collector.reg_def(rd); - } - Inst::VecLoad { to, from, mask, .. } => { - from.get_operands(collector); - collector.reg_def(to); - vec_mask_operands(mask, collector); - } - Inst::VecStore { to, from, mask, .. } => { - to.get_operands(collector); - collector.reg_use(from); - vec_mask_operands(mask, collector); - } - } -} - -impl MachInst for Inst { - type ABIMachineSpec = Riscv64MachineDeps; - type LabelUse = LabelUse; - - // https://github.com/riscv/riscv-isa-manual/issues/850 - // all zero will cause invalid opcode. - const TRAP_OPCODE: &'static [u8] = &[0; 4]; - - fn gen_dummy_use(reg: Reg) -> Self { - Inst::DummyUse { reg } - } - - fn canonical_type_for_rc(rc: RegClass) -> Type { - match rc { - regalloc2::RegClass::Int => I64, - regalloc2::RegClass::Float => F64, - regalloc2::RegClass::Vector => I8X16, - } - } - - fn is_safepoint(&self) -> bool { - match self { - Inst::Call { .. } | Inst::CallInd { .. } => true, - _ => false, - } - } - - fn get_operands(&mut self, collector: &mut impl OperandVisitor) { - riscv64_get_operands(self, collector); - } - - fn is_move(&self) -> Option<(Writable, Reg)> { - match self { - Inst::Mov { rd, rm, .. } => Some((*rd, *rm)), - _ => None, - } - } - - fn is_included_in_clobbers(&self) -> bool { - match self { - &Inst::Args { .. } => false, - _ => true, - } - } - - fn is_trap(&self) -> bool { - match self { - Self::Udf { .. } => true, - _ => false, - } - } - - fn is_args(&self) -> bool { - match self { - Self::Args { .. } => true, - _ => false, - } - } - - fn is_term(&self) -> MachTerminator { - match self { - &Inst::Jal { .. } => MachTerminator::Uncond, - &Inst::CondBr { .. } => MachTerminator::Cond, - &Inst::Jalr { .. } => MachTerminator::Uncond, - &Inst::Rets { .. } => MachTerminator::Ret, - &Inst::BrTable { .. } => MachTerminator::Indirect, - &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall, - _ => MachTerminator::None, - } - } - - fn is_mem_access(&self) -> bool { - panic!("TODO FILL ME OUT") - } - - fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { - let x = Inst::Mov { rd: to_reg, rm: from_reg, ty }; - x - } - - fn gen_nop(preferred_size: usize) -> Inst { - if preferred_size == 0 { - return Inst::Nop0; - } - // We can't give a NOP (or any insn) < 4 bytes. - assert!(preferred_size >= 4); - Inst::Nop4 - } - - fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { - match ty { - I8 => Ok((&[RegClass::Int], &[I8])), - I16 => Ok((&[RegClass::Int], &[I16])), - I32 => Ok((&[RegClass::Int], &[I32])), - I64 => Ok((&[RegClass::Int], &[I64])), - F16 => Ok((&[RegClass::Float], &[F16])), - F32 => Ok((&[RegClass::Float], &[F32])), - F64 => Ok((&[RegClass::Float], &[F64])), - I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), - _ if ty.is_vector() => { - debug_assert!(ty.bits() <= 512); - - // Here we only need to return a SIMD type with the same size as `ty`. - // We use these types for spills and reloads, so prefer types with lanes <= 31 - // since that fits in the immediate field of `vsetivli`. - const SIMD_TYPES: [[Type; 1]; 6] = [ - [types::I8X2], - [types::I8X4], - [types::I8X8], - [types::I8X16], - [types::I16X16], - [types::I32X16], - ]; - let idx = (ty.bytes().ilog2() - 1) as usize; - let ty = &SIMD_TYPES[idx][..]; - - Ok((&[RegClass::Vector], ty)) - } - _ => Err(CodegenError::Unsupported(format!("Unexpected SSA-value type: {ty}"))), - } - } - - fn gen_jump(target: MachLabel) -> Inst { - Inst::Jal { label: target } - } - - fn worst_case_size() -> CodeOffset { - // Our worst case size is determined by the riscv64_worst_case_instruction_size test - 84 - } - - fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { - RegClass::Int - } - - fn function_alignment() -> FunctionAlignment { - FunctionAlignment { minimum: 2, preferred: 4 } - } -} - -//============================================================================= -// Pretty-printing of instructions. -pub fn reg_name(reg: Reg) -> String { - match reg.to_real_reg() { - Some(real) => match real.class() { - RegClass::Int => match real.hw_enc() { - 0 => "zero".into(), - 1 => "ra".into(), - 2 => "sp".into(), - 3 => "gp".into(), - 4 => "tp".into(), - 5..=7 => format!("t{}", real.hw_enc() - 5), - 8 => "fp".into(), - 9 => "s1".into(), - 10..=17 => format!("a{}", real.hw_enc() - 10), - 18..=27 => format!("s{}", real.hw_enc() - 16), - 28..=31 => format!("t{}", real.hw_enc() - 25), - _ => unreachable!(), - }, - RegClass::Float => match real.hw_enc() { - 0..=7 => format!("ft{}", real.hw_enc() - 0), - 8..=9 => format!("fs{}", real.hw_enc() - 8), - 10..=17 => format!("fa{}", real.hw_enc() - 10), - 18..=27 => format!("fs{}", real.hw_enc() - 16), - 28..=31 => format!("ft{}", real.hw_enc() - 20), - _ => unreachable!(), - }, - RegClass::Vector => format!("v{}", real.hw_enc()), - }, - None => { - format!("{reg:?}") - } - } -} - -impl Inst { - fn print_with_state(&self, _state: &mut EmitState) -> String { - let format_reg = |reg: Reg| -> String { reg_name(reg) }; - - let format_vec_amode = |amode: &VecAMode| -> String { - match amode { - VecAMode::UnitStride { base } => base.to_string(), - } - }; - - let format_mask = |mask: &VecOpMasking| -> String { - match mask { - VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg)), - VecOpMasking::Disabled => format!(""), - } - }; - - let format_regs = |regs: &[Reg]| -> String { - let mut x = if regs.len() > 1 { String::from("[") } else { String::default() }; - regs.iter().for_each(|i| { - x.push_str(format_reg(*i).as_str()); - if *i != *regs.last().unwrap() { - x.push_str(","); - } - }); - if regs.len() > 1 { - x.push_str("]"); - } - x - }; - let format_labels = |labels: &[MachLabel]| -> String { - if labels.len() == 0 { - return String::from("[_]"); - } - let mut x = String::from("["); - labels.iter().for_each(|l| { - x.push_str( - format!("{:?}{}", l, if l != labels.last().unwrap() { "," } else { "" },) - .as_str(), - ); - }); - x.push_str("]"); - x - }; - - fn format_frm(rounding_mode: FRM) -> String { - format!(",{}", rounding_mode.to_static_str()) - } - - match self { - &Inst::Nop0 => { - format!("##zero length nop") - } - &Inst::Nop4 => { - format!("##fixed 4-size nop") - } - &Inst::StackProbeLoop { guard_size, probe_count, tmp } => { - let tmp = format_reg(tmp.to_reg()); - format!( - "inline_stack_probe##guard_size={guard_size} probe_count={probe_count} tmp={tmp}" - ) - } - &Inst::AtomicStore { src, ty, p } => { - let src = format_reg(src); - let p = format_reg(p); - format!("atomic_store.{ty} {src},({p})") - } - &Inst::DummyUse { reg } => { - let reg = format_reg(reg); - format!("dummy_use {reg}") - } - - &Inst::AtomicLoad { rd, ty, p } => { - let p = format_reg(p); - let rd = format_reg(rd.to_reg()); - format!("atomic_load.{ty} {rd},({p})") - } - &Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => { - let offset = format_reg(offset); - let p = format_reg(p); - let x = format_reg(x); - let t0 = format_reg(t0.to_reg()); - let dst = format_reg(dst.to_reg()); - format!("atomic_rmw.{ty} {op} {dst},{x},({p})##t0={t0} offset={offset}") - } - - &Inst::RawData { ref data } => match data.len() { - 4 => { - let mut bytes = [0; 4]; - for i in 0..bytes.len() { - bytes[i] = data[i]; - } - format!(".4byte 0x{:x}", u32::from_le_bytes(bytes)) - } - 8 => { - let mut bytes = [0; 8]; - for i in 0..bytes.len() { - bytes[i] = data[i]; - } - format!(".8byte 0x{:x}", u64::from_le_bytes(bytes)) - } - _ => { - format!(".data {data:?}") - } - }, - &Inst::Unwind { ref inst } => { - format!("unwind {inst:?}") - } - &Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => { - let rs = format_reg(rs); - let step = format_reg(step.to_reg()); - let tmp = format_reg(tmp.to_reg()); - let tmp2 = format_reg(tmp2.to_reg()); - let rd = format_reg(rd.to_reg()); - format!("brev8 {rd},{rs}##tmp={tmp} tmp2={tmp2} step={step} ty={ty}") - } - &Inst::Popcnt { sum, step, rs, tmp, ty } => { - let rs = format_reg(rs); - let tmp = format_reg(tmp.to_reg()); - let step = format_reg(step.to_reg()); - let sum = format_reg(sum.to_reg()); - format!("popcnt {sum},{rs}##ty={ty} tmp={tmp} step={step}") - } - &Inst::Cltz { sum, step, rs, tmp, ty, leading } => { - let rs = format_reg(rs); - let tmp = format_reg(tmp.to_reg()); - let step = format_reg(step.to_reg()); - let sum = format_reg(sum.to_reg()); - format!( - "{} {},{}##ty={} tmp={} step={}", - if leading { "clz" } else { "ctz" }, - sum, - rs, - ty, - tmp, - step - ) - } - &Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => { - let offset = format_reg(offset); - let e = format_reg(e); - let addr = format_reg(addr); - let v = format_reg(v); - let t0 = format_reg(t0.to_reg()); - let dst = format_reg(dst.to_reg()); - format!("atomic_cas.{ty} {dst},{e},{v},({addr})##t0={t0} offset={offset}",) - } - &Inst::BrTable { index, tmp1, tmp2, ref targets } => { - format!( - "{} {},{}##tmp1={},tmp2={}", - "br_table", - format_reg(index), - format_labels(&targets[..]), - format_reg(tmp1.to_reg()), - format_reg(tmp2.to_reg()), - ) - } - &Inst::Auipc { rd, imm } => { - format!("{} {},{}", "auipc", format_reg(rd.to_reg()), imm.as_i32(),) - } - &Inst::Jalr { rd, base, offset } => { - let base = format_reg(base); - let rd = format_reg(rd.to_reg()); - format!("{} {},{}({})", "jalr", rd, offset.as_i16(), base) - } - &Inst::Lui { rd, ref imm } => { - format!("{} {},{}", "lui", format_reg(rd.to_reg()), imm.as_i32()) - } - &Inst::Fli { rd, ty, imm } => { - let rd_s = format_reg(rd.to_reg()); - let imm_s = imm.format(); - let suffix = match ty { - F32 => "s", - F64 => "d", - _ => unreachable!(), - }; - - format!("fli.{suffix} {rd_s},{imm_s}") - } - &Inst::LoadInlineConst { rd, imm, .. } => { - let rd = format_reg(rd.to_reg()); - let mut buf = String::new(); - write!(&mut buf, "auipc {rd},0; ").unwrap(); - write!(&mut buf, "ld {rd},12({rd}); ").unwrap(); - write!(&mut buf, "j {}; ", Inst::UNCOMPRESSED_INSTRUCTION_SIZE + 8).unwrap(); - write!(&mut buf, ".8byte 0x{imm:x}").unwrap(); - buf - } - &Inst::AluRRR { alu_op, rd, rs1, rs2 } => { - let rs1_s = format_reg(rs1); - let rs2_s = format_reg(rs2); - let rd_s = format_reg(rd.to_reg()); - match alu_op { - AluOPRRR::Adduw if rs2 == zero_reg() => { - format!("zext.w {rd_s},{rs1_s}") - } - _ => { - format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s) - } - } - } - &Inst::FpuRR { alu_op, width, frm, rd, rs } => { - let rs = format_reg(rs); - let rd = format_reg(rd.to_reg()); - let frm = if alu_op.has_frm() { format_frm(frm) } else { String::new() }; - format!("{} {rd},{rs}{frm}", alu_op.op_name(width)) - } - &Inst::FpuRRR { alu_op, width, rd, rs1, rs2, frm } => { - let rs1 = format_reg(rs1); - let rs2 = format_reg(rs2); - let rd = format_reg(rd.to_reg()); - let frm = if alu_op.has_frm() { format_frm(frm) } else { String::new() }; - - let rs1_is_rs2 = rs1 == rs2; - match alu_op { - FpuOPRRR::Fsgnj if rs1_is_rs2 => format!("fmv.{width} {rd},{rs1}"), - FpuOPRRR::Fsgnjn if rs1_is_rs2 => format!("fneg.{width} {rd},{rs1}"), - FpuOPRRR::Fsgnjx if rs1_is_rs2 => format!("fabs.{width} {rd},{rs1}"), - _ => format!("{} {rd},{rs1},{rs2}{frm}", alu_op.op_name(width)), - } - } - &Inst::FpuRRRR { alu_op, rd, rs1, rs2, rs3, frm, width } => { - let rs1 = format_reg(rs1); - let rs2 = format_reg(rs2); - let rs3 = format_reg(rs3); - let rd = format_reg(rd.to_reg()); - let frm = format_frm(frm); - let op_name = alu_op.op_name(width); - format!("{op_name} {rd},{rs1},{rs2},{rs3}{frm}") - } - &Inst::AluRRImm12 { alu_op, rd, rs, ref imm12 } => { - let rs_s = format_reg(rs); - let rd = format_reg(rd.to_reg()); - - // Some of these special cases are better known as - // their pseudo-instruction version, so prefer printing those. - match (alu_op, rs, imm12) { - (AluOPRRI::Addi, rs, _) if rs == zero_reg() => { - return format!("li {},{}", rd, imm12.as_i16()); - } - (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => { - return format!("sext.w {rd},{rs_s}"); - } - (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => { - return format!("not {rd},{rs_s}"); - } - (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => { - return format!("seqz {rd},{rs_s}"); - } - (alu_op, _, _) if alu_op.option_funct12().is_some() => { - format!("{} {},{}", alu_op.op_name(), rd, rs_s) - } - (alu_op, _, imm12) => { - format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16()) - } - } - } - &Inst::CsrReg { op, rd, rs, csr } => { - let rs_s = format_reg(rs); - let rd_s = format_reg(rd.to_reg()); - - match (op, csr, rd) { - (CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => { - format!("fsrm {rs_s}") - } - _ => { - format!("{op} {rd_s},{csr},{rs_s}") - } - } - } - &Inst::CsrImm { op, rd, csr, imm } => { - let rd_s = format_reg(rd.to_reg()); - - match (op, csr, rd) { - (CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => { - format!("fsrmi {rd_s},{imm}") - } - _ => { - format!("{op} {rd_s},{csr},{imm}") - } - } - } - &Inst::Load { rd, op, from, flags: _flags } => { - let base = from.to_string(); - let rd = format_reg(rd.to_reg()); - format!("{} {},{}", op.op_name(), rd, base,) - } - &Inst::Store { to, src, op, flags: _flags } => { - let base = to.to_string(); - let src = format_reg(src); - format!("{} {},{}", op.op_name(), src, base,) - } - &Inst::Args { ref args } => { - let mut s = "args".to_string(); - for arg in args { - let preg = format_reg(arg.preg); - let def = format_reg(arg.vreg.to_reg()); - write!(&mut s, " {def}={preg}").unwrap(); - } - s - } - &Inst::Rets { ref rets } => { - let mut s = "rets".to_string(); - for ret in rets { - let preg = format_reg(ret.preg); - let vreg = format_reg(ret.vreg); - write!(&mut s, " {vreg}={preg}").unwrap(); - } - s - } - &Inst::Ret {} => "ret".to_string(), - - &MInst::Extend { rd, rn, signed, from_bits, .. } => { - let rn = format_reg(rn); - let rd = format_reg(rd.to_reg()); - return if signed == false && from_bits == 8 { - format!("andi {rd},{rn}") - } else { - let op = if signed { "srai" } else { "srli" }; - let shift_bits = (64 - from_bits) as i16; - format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}") - }; - } - &MInst::Call { ref info } => format!("call {}", info.dest.display(None)), - &MInst::CallInd { ref info } => { - let rd = format_reg(info.dest); - format!("callind {rd}") - } - &MInst::ReturnCall { ref info } => { - let mut s = format!( - "return_call {:?} new_stack_arg_size:{}", - info.dest, info.new_stack_arg_size - ); - for ret in &info.uses { - let preg = format_reg(ret.preg); - let vreg = format_reg(ret.vreg); - write!(&mut s, " {vreg}={preg}").unwrap(); - } - s - } - &MInst::ReturnCallInd { ref info } => { - let callee = format_reg(info.dest); - let mut s = format!( - "return_call_ind {callee} new_stack_arg_size:{}", - info.new_stack_arg_size - ); - for ret in &info.uses { - let preg = format_reg(ret.preg); - let vreg = format_reg(ret.vreg); - write!(&mut s, " {vreg}={preg}").unwrap(); - } - s - } - &MInst::TrapIf { rs1, rs2, cc, trap_code } => { - let rs1 = format_reg(rs1); - let rs2 = format_reg(rs2); - format!("trap_if {trap_code}##({rs1} {cc} {rs2})") - } - &MInst::Jal { label } => { - format!("j {}", label.to_string()) - } - &MInst::CondBr { taken, not_taken, kind, .. } => { - let rs1 = format_reg(kind.rs1); - let rs2 = format_reg(kind.rs2); - if not_taken.is_fallthrouh() && taken.as_label().is_none() { - format!("{} {},{},0", kind.op_name(), rs1, rs2) - } else { - let x = format!( - "{} {},{},taken({}),not_taken({})", - kind.op_name(), - rs1, - rs2, - taken, - not_taken - ); - x - } - } - &MInst::Atomic { op, rd, addr, src, amo } => { - let op_name = op.op_name(amo); - let addr = format_reg(addr); - let src = format_reg(src); - let rd = format_reg(rd.to_reg()); - if op.is_load() { - format!("{op_name} {rd},({addr})") - } else { - format!("{op_name} {rd},{src},({addr})") - } - } - &MInst::LoadExtName { rd, ref name, offset } => { - let rd = format_reg(rd.to_reg()); - format!("load_sym {},{}{:+}", rd, name.display(None), offset) - } - &Inst::ElfTlsGetAddr { rd, ref name } => { - let rd = format_reg(rd.to_reg()); - format!("elf_tls_get_addr {rd},{}", name.display(None)) - } - &MInst::LoadAddr { ref rd, ref mem } => { - let rs = mem.to_string(); - let rd = format_reg(rd.to_reg()); - format!("load_addr {rd},{rs}") - } - &MInst::Mov { rd, rm, ty } => { - let rm = format_reg(rm); - let rd = format_reg(rd.to_reg()); - - let op = match ty { - F16 => "fmv.h", - F32 => "fmv.s", - F64 => "fmv.d", - ty if ty.is_vector() => "vmv1r.v", - _ => "mv", - }; - - format!("{op} {rd},{rm}") - } - &MInst::MovFromPReg { rd, rm } => { - let rd = format_reg(rd.to_reg()); - debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); - let rm = reg_name(Reg::from(rm)); - format!("mv {rd},{rm}") - } - &MInst::Fence { pred, succ } => { - format!( - "fence {},{}", - Inst::fence_req_to_string(pred), - Inst::fence_req_to_string(succ), - ) - } - &MInst::Select { ref dst, condition, ref x, ref y } => { - let c_rs1 = format_reg(condition.rs1); - let c_rs2 = format_reg(condition.rs2); - let x = format_regs(x.regs()); - let y = format_regs(y.regs()); - let dst = dst.map(|r| r.to_reg()); - let dst = format_regs(dst.regs()); - format!( - "select {},{},{}##condition=({} {} {})", - dst, - x, - y, - c_rs1, - condition.kind.to_static_str(), - c_rs2 - ) - } - &MInst::Udf { trap_code } => format!("udf##trap_code={trap_code}"), - &MInst::EBreak {} => String::from("ebreak"), - &Inst::VecAluRRRR { op, vd, vd_src, vs1, vs2, ref mask, ref vstate } => { - let vs1_s = format_reg(vs1); - let vs2_s = format_reg(vs2); - let vd_src_s = format_reg(vd_src); - let vd_s = format_reg(vd.to_reg()); - let mask = format_mask(mask); - - let vd_fmt = if vd_s != vd_src_s { format!("{vd_s},{vd_src_s}") } else { vd_s }; - - // Note: vs2 and vs1 here are opposite to the standard scalar ordering. - // This is noted in Section 10.1 of the RISC-V Vector spec. - format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}") - } - &Inst::VecAluRRRImm5 { op, vd, imm, vs2, ref mask, ref vstate, .. } => { - let vs2_s = format_reg(vs2); - let vd_s = format_reg(vd.to_reg()); - let mask = format_mask(mask); - - // Some opcodes interpret the immediate as unsigned, lets show the - // correct number here. - let imm_s = - if op.imm_is_unsigned() { format!("{}", imm.bits()) } else { format!("{imm}") }; - - format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}") - } - &Inst::VecAluRRR { op, vd, vs1, vs2, ref mask, ref vstate } => { - let vs1_s = format_reg(vs1); - let vs2_s = format_reg(vs2); - let vd_s = format_reg(vd.to_reg()); - let mask = format_mask(mask); - - // Note: vs2 and vs1 here are opposite to the standard scalar ordering. - // This is noted in Section 10.1 of the RISC-V Vector spec. - match (op, vs2, vs1) { - (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => { - format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}") - } - (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => { - format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}") - } - (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => { - format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}") - } - (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => { - format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}") - } - _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"), - } - } - &Inst::VecAluRRImm5 { op, vd, imm, vs2, ref mask, ref vstate } => { - let vs2_s = format_reg(vs2); - let vd_s = format_reg(vd.to_reg()); - let mask = format_mask(mask); - - // Some opcodes interpret the immediate as unsigned, lets show the - // correct number here. - let imm_s = - if op.imm_is_unsigned() { format!("{}", imm.bits()) } else { format!("{imm}") }; - - match (op, imm) { - (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => { - format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}") - } - _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"), - } - } - &Inst::VecAluRR { op, vd, vs, ref mask, ref vstate } => { - let vs_s = format_reg(vs); - let vd_s = format_reg(vd.to_reg()); - let mask = format_mask(mask); - - format!("{op} {vd_s},{vs_s}{mask} {vstate}") - } - &Inst::VecAluRImm5 { op, vd, imm, ref mask, ref vstate } => { - let vd_s = format_reg(vd.to_reg()); - let mask = format_mask(mask); - - format!("{op} {vd_s},{imm}{mask} {vstate}") - } - &Inst::VecSetState { rd, ref vstate } => { - let rd_s = format_reg(rd.to_reg()); - assert!(vstate.avl.is_static()); - format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype) - } - Inst::VecLoad { eew, to, from, ref mask, ref vstate, .. } => { - let base = format_vec_amode(from); - let vd = format_reg(to.to_reg()); - let mask = format_mask(mask); - - format!("vl{eew}.v {vd},{base}{mask} {vstate}") - } - Inst::VecStore { eew, to, from, ref mask, ref vstate, .. } => { - let dst = format_vec_amode(to); - let vs3 = format_reg(*from); - let mask = format_mask(mask); - - format!("vs{eew}.v {vs3},{dst}{mask} {vstate}") - } - } - } -} - -/// Different forms of label references for different instruction formats. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum LabelUse { - /// 20-bit branch offset (unconditional branches). PC-rel, offset is - /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions. - Jal20, - - /// The unconditional jump instructions all use PC-relative - /// addressing to help support position independent code. The JALR - /// instruction was defined to enable a two-instruction sequence to - /// jump anywhere in a 32-bit absolute address range. A LUI - /// instruction can first load rs1 with the upper 20 bits of a - /// target address, then JALR can add in the lower bits. Similarly, - /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative - /// address range. - PCRel32, - - /// All branch instructions use the B-type instruction format. The - /// 12-bit B-immediate encodes signed offsets in multiples of 2, and - /// is added to the current pc to give the target address. The - /// conditional branch range is ±4 KiB. - B12, - - /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting - /// the immediate field of an `auipc` instruction. - PCRelHi20, - - /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to - /// the final address, instead of the `PCREL_HI20` label. Allows setting - /// the immediate field of I Type instructions such as `addi` or `lw`. - /// - /// Since we currently don't support offsets in labels, this relocation has - /// an implicit offset of 4. - PCRelLo12I, - - /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation - RVCJump, -} - -impl MachInstLabelUse for LabelUse { - /// Alignment for veneer code. Every Riscv64 instruction must be - /// 4-byte-aligned. - const ALIGN: CodeOffset = 4; - - /// Maximum PC-relative range (positive), inclusive. - fn max_pos_range(self) -> CodeOffset { - match self { - LabelUse::Jal20 => ((1 << 19) - 1) * 2, - LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => { - Inst::imm_max() as CodeOffset - } - LabelUse::B12 => ((1 << 11) - 1) * 2, - LabelUse::RVCJump => ((1 << 10) - 1) * 2, - } - } - - /// Maximum PC-relative range (negative). - fn max_neg_range(self) -> CodeOffset { - match self { - LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset, - _ => self.max_pos_range() + 2, - } - } - - /// Size of window into code needed to do the patch. - fn patch_size(self) -> CodeOffset { - match self { - LabelUse::RVCJump => 2, - LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4, - LabelUse::PCRel32 => 8, - } - } - - /// Perform the patch. - fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { - assert!(use_offset % 2 == 0); - assert!(label_offset % 2 == 0); - let offset = (label_offset as i64) - (use_offset as i64); - - // re-check range - assert!( - offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64), - "{self:?} offset '{offset}' use_offset:'{use_offset}' label_offset:'{label_offset}' must not exceed max range.", - ); - self.patch_raw_offset(buffer, offset); - } - - /// Is a veneer supported for this label reference type? - fn supports_veneer(self) -> bool { - match self { - Self::Jal20 | Self::B12 | Self::RVCJump => true, - _ => false, - } - } - - /// How large is the veneer, if supported? - fn veneer_size(self) -> CodeOffset { - match self { - Self::B12 | Self::Jal20 | Self::RVCJump => 8, - _ => unreachable!(), - } - } - - fn worst_case_veneer_size() -> CodeOffset { - 8 - } - - /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return - /// an offset and label-use for the veneer's use of the original label. - fn generate_veneer( - self, - buffer: &mut [u8], - veneer_offset: CodeOffset, - ) -> (CodeOffset, LabelUse) { - let base = writable_spilltmp_reg(); - { - let x = enc_auipc(base, Imm20::ZERO).to_le_bytes(); - buffer[0] = x[0]; - buffer[1] = x[1]; - buffer[2] = x[2]; - buffer[3] = x[3]; - } - { - let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::ZERO).to_le_bytes(); - buffer[4] = x[0]; - buffer[5] = x[1]; - buffer[6] = x[2]; - buffer[7] = x[3]; - } - (veneer_offset, Self::PCRel32) - } - - fn from_reloc(reloc: Reloc, addend: Addend) -> Option { - match (reloc, addend) { - (Reloc::RiscvCallPlt, _) => Some(Self::PCRel32), - _ => None, - } - } -} - -impl LabelUse { - #[allow(dead_code)] // in case it's needed in the future - fn offset_in_range(self, offset: i64) -> bool { - let min = -(self.max_neg_range() as i64); - let max = self.max_pos_range() as i64; - offset >= min && offset <= max - } - - fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) { - let insn = match self { - LabelUse::RVCJump => u16::from_le_bytes(buffer[..2].try_into().unwrap()) as u32, - _ => u32::from_le_bytes(buffer[..4].try_into().unwrap()), - }; - - match self { - LabelUse::Jal20 => { - let offset = offset as u32; - let v = ((offset >> 12 & 0b1111_1111) << 12) - | ((offset >> 11 & 0b1) << 20) - | ((offset >> 1 & 0b11_1111_1111) << 21) - | ((offset >> 20 & 0b1) << 31); - buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); - } - LabelUse::PCRel32 => { - let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]); - Inst::generate_imm(offset as u64) - .map(|(imm20, imm12)| { - // Encode the OR-ed-in value with zero_reg(). The - // register parameter must be in the original - // encoded instruction and or'ing in zeroes does not - // change it. - buffer[0..4].clone_from_slice(&u32::to_le_bytes( - insn | enc_auipc(writable_zero_reg(), imm20), - )); - buffer[4..8].clone_from_slice(&u32::to_le_bytes( - insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12), - )); - }) - // expect make sure we handled. - .expect("we have check the range before,this is a compiler error."); - } - - LabelUse::B12 => { - let offset = offset as u32; - let v = ((offset >> 11 & 0b1) << 7) - | ((offset >> 1 & 0b1111) << 8) - | ((offset >> 5 & 0b11_1111) << 25) - | ((offset >> 12 & 0b1) << 31); - buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); - } - - LabelUse::PCRelHi20 => { - // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses - // - // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the - // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an - // offset of 2048, we need to land at the next page and subtract instead. - let offset = offset as u32; - let hi20 = offset.wrapping_add(0x800) >> 12; - let insn = (insn & 0xFFF) | (hi20 << 12); - buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); - } - - LabelUse::PCRelLo12I => { - // `offset` is the offset from the current instruction to the target address. - // - // However we are trying to compute the offset to the target address from the previous instruction. - // The previous instruction should be the one that contains the PCRelHi20 relocation and - // stores/references the program counter (`auipc` usually). - // - // Since we are trying to compute the offset from the previous instruction, we can - // represent it as offset = target_address - (current_instruction_address - 4) - // which is equivalent to offset = target_address - current_instruction_address + 4. - // - // Thus we need to add 4 to the offset here. - let lo12 = (offset + 4) as u32 & 0xFFF; - let insn = (insn & 0xFFFFF) | (lo12 << 20); - buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn)); - } - LabelUse::RVCJump => { - debug_assert!(offset & 1 == 0); - - // We currently only support this for the C.J operation, so assert that is the opcode in - // the buffer. - debug_assert_eq!(insn & 0xFFFF, 0xA001); - - buffer[0..2].clone_from_slice(&u16::to_le_bytes(encode_cj_type( - CjOp::CJ, - Imm12::from_i16(i16::try_from(offset).unwrap()), - ))); - } - } - } -} - -#[cfg(test)] -mod test { - use super::*; - #[test] - fn label_use_max_range() { - assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2); - assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2); - assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset)); - assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset)); - assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2); - } -} diff --git a/hbcb/src/inst/regs.rs b/hbcb/src/inst/regs.rs deleted file mode 100644 index 94bc857..0000000 --- a/hbcb/src/inst/regs.rs +++ /dev/null @@ -1,167 +0,0 @@ -//! Riscv64 ISA definitions: registers. -//! - -use { - alloc::{vec, vec::Vec}, - cranelift_codegen::machinst::{Reg, Writable}, - regalloc2::{PReg, RegClass, VReg}, -}; - -// first argument of function call -#[inline] -pub fn a0() -> Reg { - x_reg(10) -} - -// second argument of function call -#[inline] -#[allow(dead_code)] -pub fn a1() -> Reg { - x_reg(11) -} - -// third argument of function call -#[inline] -#[allow(dead_code)] -pub fn a2() -> Reg { - x_reg(12) -} - -#[inline] -#[allow(dead_code)] -pub fn writable_a0() -> Writable { - Writable::from_reg(a0()) -} -#[inline] -#[allow(dead_code)] -pub fn writable_a1() -> Writable { - Writable::from_reg(a1()) -} -#[inline] -#[allow(dead_code)] -pub fn writable_a2() -> Writable { - Writable::from_reg(a2()) -} - -#[inline] -#[allow(dead_code)] -pub fn fa0() -> Reg { - f_reg(10) -} -#[inline] -#[allow(dead_code)] -pub fn writable_fa0() -> Writable { - Writable::from_reg(fa0()) -} -#[inline] -#[allow(dead_code)] -pub fn writable_fa1() -> Writable { - Writable::from_reg(fa1()) -} -#[inline] -pub fn fa1() -> Reg { - f_reg(11) -} - -/// Get a reference to the zero-register. -#[inline] -pub fn zero_reg() -> Reg { - x_reg(0) -} - -/// Get a writable reference to the zero-register (this discards a result). -#[inline] -pub fn writable_zero_reg() -> Writable { - Writable::from_reg(zero_reg()) -} -#[inline] -pub fn stack_reg() -> Reg { - x_reg(2) -} - -/// Get a writable reference to the stack-pointer register. -#[inline] -pub fn writable_stack_reg() -> Writable { - Writable::from_reg(stack_reg()) -} - -/// Get a reference to the link register (x1). -pub fn link_reg() -> Reg { - x_reg(1) -} - -/// Get a writable reference to the link register. -#[inline] -pub fn writable_link_reg() -> Writable { - Writable::from_reg(link_reg()) -} - -/// Get a reference to the frame pointer (x8). -#[inline] -pub fn fp_reg() -> Reg { - x_reg(8) -} - -/// Get a writable reference to the frame pointer. -#[inline] -pub fn writable_fp_reg() -> Writable { - Writable::from_reg(fp_reg()) -} - -/// Get a reference to the first temporary, sometimes "spill temporary", -/// register. This register is used in various ways as a temporary. -#[inline] -pub fn spilltmp_reg() -> Reg { - x_reg(31) -} - -/// Get a writable reference to the spilltmp reg. -#[inline] -pub fn writable_spilltmp_reg() -> Writable { - Writable::from_reg(spilltmp_reg()) -} - -///spilltmp2 -#[inline] -pub fn spilltmp_reg2() -> Reg { - x_reg(30) -} - -/// Get a writable reference to the spilltmp2 reg. -#[inline] -pub fn writable_spilltmp_reg2() -> Writable { - Writable::from_reg(spilltmp_reg2()) -} - -#[inline] -pub fn x_reg(enc: usize) -> Reg { - let p_reg = PReg::new(enc, RegClass::Int); - let v_reg = VReg::new(p_reg.index(), p_reg.class()); - Reg::from(v_reg) -} -pub const fn px_reg(enc: usize) -> PReg { - PReg::new(enc, RegClass::Int) -} - -#[inline] -pub fn f_reg(enc: usize) -> Reg { - let p_reg = PReg::new(enc, RegClass::Float); - let v_reg = VReg::new(p_reg.index(), p_reg.class()); - Reg::from(v_reg) -} -pub const fn pf_reg(enc: usize) -> PReg { - PReg::new(enc, RegClass::Float) -} - -#[allow(dead_code)] -pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec> { - let mut regs = vec![]; - for i in start..=end { - regs.push(Writable::from_reg(x_reg(i))); - } - regs -} - -pub const fn pv_reg(enc: usize) -> PReg { - PReg::new(enc, RegClass::Vector) -} diff --git a/hbcb/src/inst/unwind.rs b/hbcb/src/inst/unwind.rs deleted file mode 100644 index 1e2bb90..0000000 --- a/hbcb/src/inst/unwind.rs +++ /dev/null @@ -1,2 +0,0 @@ -#[cfg(feature = "unwind")] -pub(crate) mod systemv; diff --git a/hbcb/src/inst/unwind/systemv.rs b/hbcb/src/inst/unwind/systemv.rs deleted file mode 100644 index 6cf2445..0000000 --- a/hbcb/src/inst/unwind/systemv.rs +++ /dev/null @@ -1,170 +0,0 @@ -//! Unwind information for System V ABI (Riscv64). - -use crate::inst::regs; -use crate::isa::unwind::systemv::RegisterMappingError; -use crate::machinst::Reg; -use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; -use regalloc2::RegClass; - -/// Creates a new riscv64 common information entry (CIE). -pub fn create_cie() -> CommonInformationEntry { - use gimli::write::CallFrameInstruction; - - let mut entry = CommonInformationEntry::new( - Encoding { - address_size: 8, - format: Format::Dwarf32, - version: 1, - }, - 2, // Code alignment factor - -8, // Data alignment factor - Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16), - ); - - // Every frame will start with the call frame address (CFA) at SP - let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into()); - entry.add_instruction(CallFrameInstruction::Cfa(sp, 0)); - - entry -} - -/// Map Cranelift registers to their corresponding Gimli registers. -pub fn map_reg(reg: Reg) -> Result { - let reg_offset = match reg.class() { - RegClass::Int => 0, - RegClass::Float => 32, - RegClass::Vector => 64, - }; - - let reg = reg.to_real_reg().unwrap().hw_enc() as u16; - Ok(Register(reg_offset + reg)) -} - -pub(crate) struct RegisterMapper; - -impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { - fn map(&self, reg: Reg) -> Result { - Ok(map_reg(reg)?.0) - } - fn fp(&self) -> Option { - Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16) - } - fn lr(&self) -> Option { - Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16) - } - fn lr_offset(&self) -> Option { - Some(8) - } -} - -#[cfg(test)] -mod tests { - use crate::cursor::{Cursor, FuncCursor}; - - use crate::ir::{ - types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind, - UserFuncName, - }; - use crate::isa::{lookup, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use gimli::write::Address; - use target_lexicon::triple; - - #[test] - fn test_simple_func() { - let isa = lookup(triple!("riscv64")) - .expect("expect riscv64 ISA") - .finish(Flags::new(builder())) - .expect("Creating compiler backend"); - - let mut context = Context::for_function(create_function( - CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)), - )); - - let code = context - .compile(&*isa, &mut Default::default()) - .expect("expected compilation"); - - let fde = match code - .create_unwind_info(isa.as_ref()) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(1234)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{fde:?}"), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }"); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.sized_stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup(triple!("riscv64")) - .expect("expect riscv64 ISA") - .finish(Flags::new(builder())) - .expect("Creating compiler backend"); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - let code = context - .compile(&*isa, &mut Default::default()) - .expect("expected compilation"); - - let fde = match code - .create_unwind_info(isa.as_ref()) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(4321)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!( - format!("{fde:?}"), - "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }" - ); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brif(v0, block2, &[], block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/hbcb/src/inst/vector.rs b/hbcb/src/inst/vector.rs deleted file mode 100644 index a65ed82..0000000 --- a/hbcb/src/inst/vector.rs +++ /dev/null @@ -1,1153 +0,0 @@ -use { - super::{Type, UImm5}, - crate::{ - lower::isle::generated_code::{ - VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, - VecAluOpRRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, VecOpCategory, - VecOpMasking, VecTailMode, - }, - Reg, - }, - core::fmt, - cranelift_codegen::machinst::{OperandVisitor, RegClass}, -}; - -impl VecAvl { - pub fn _static(size: u32) -> Self { - VecAvl::Static { size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL") } - } - - pub fn is_static(&self) -> bool { - match self { - VecAvl::Static { .. } => true, - } - } - - pub fn unwrap_static(&self) -> UImm5 { - match self { - VecAvl::Static { size } => *size, - } - } -} - -// TODO: Can we tell ISLE to derive this? -impl Copy for VecAvl {} - -// TODO: Can we tell ISLE to derive this? -impl PartialEq for VecAvl { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs, - } - } -} - -impl fmt::Display for VecAvl { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - VecAvl::Static { size } => write!(f, "{size}"), - } - } -} - -impl VecElementWidth { - pub fn from_type(ty: Type) -> Self { - Self::from_bits(ty.lane_bits()) - } - - pub fn from_bits(bits: u32) -> Self { - match bits { - 8 => VecElementWidth::E8, - 16 => VecElementWidth::E16, - 32 => VecElementWidth::E32, - 64 => VecElementWidth::E64, - _ => panic!("Invalid number of bits for VecElementWidth: {bits}"), - } - } - - pub fn bits(&self) -> u32 { - match self { - VecElementWidth::E8 => 8, - VecElementWidth::E16 => 16, - VecElementWidth::E32 => 32, - VecElementWidth::E64 => 64, - } - } - - pub fn encode(&self) -> u32 { - match self { - VecElementWidth::E8 => 0b000, - VecElementWidth::E16 => 0b001, - VecElementWidth::E32 => 0b010, - VecElementWidth::E64 => 0b011, - } - } -} - -impl fmt::Display for VecElementWidth { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "e{}", self.bits()) - } -} - -impl VecLmul { - pub fn encode(&self) -> u32 { - match self { - VecLmul::LmulF8 => 0b101, - VecLmul::LmulF4 => 0b110, - VecLmul::LmulF2 => 0b111, - VecLmul::Lmul1 => 0b000, - VecLmul::Lmul2 => 0b001, - VecLmul::Lmul4 => 0b010, - VecLmul::Lmul8 => 0b011, - } - } -} - -impl fmt::Display for VecLmul { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - VecLmul::LmulF8 => write!(f, "mf8"), - VecLmul::LmulF4 => write!(f, "mf4"), - VecLmul::LmulF2 => write!(f, "mf2"), - VecLmul::Lmul1 => write!(f, "m1"), - VecLmul::Lmul2 => write!(f, "m2"), - VecLmul::Lmul4 => write!(f, "m4"), - VecLmul::Lmul8 => write!(f, "m8"), - } - } -} - -impl VecTailMode { - pub fn encode(&self) -> u32 { - match self { - VecTailMode::Agnostic => 1, - VecTailMode::Undisturbed => 0, - } - } -} - -impl fmt::Display for VecTailMode { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - VecTailMode::Agnostic => write!(f, "ta"), - VecTailMode::Undisturbed => write!(f, "tu"), - } - } -} - -impl VecMaskMode { - pub fn encode(&self) -> u32 { - match self { - VecMaskMode::Agnostic => 1, - VecMaskMode::Undisturbed => 0, - } - } -} - -impl fmt::Display for VecMaskMode { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - VecMaskMode::Agnostic => write!(f, "ma"), - VecMaskMode::Undisturbed => write!(f, "mu"), - } - } -} - -/// Vector Type (VType) -/// -/// vtype provides the default type used to interpret the contents of the vector register file. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct VType { - pub sew: VecElementWidth, - pub lmul: VecLmul, - pub tail_mode: VecTailMode, - pub mask_mode: VecMaskMode, -} - -impl VType { - // https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc - pub fn encode(&self) -> u32 { - let mut bits = 0; - bits |= self.lmul.encode(); - bits |= self.sew.encode() << 3; - bits |= self.tail_mode.encode() << 6; - bits |= self.mask_mode.encode() << 7; - bits - } -} - -impl fmt::Display for VType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}, {}, {}, {}", self.sew, self.lmul, self.tail_mode, self.mask_mode) - } -} - -/// Vector State (VState) -/// -/// VState represents the state of the vector unit that each instruction expects before execution. -/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is -/// used by our instruction emission code to ensure that the vector unit is in the correct state. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct VState { - pub avl: VecAvl, - pub vtype: VType, -} - -impl VState { - pub fn from_type(ty: Type) -> Self { - VState { - avl: VecAvl::_static(ty.lane_count()), - vtype: VType { - sew: VecElementWidth::from_type(ty), - lmul: VecLmul::Lmul1, - tail_mode: VecTailMode::Agnostic, - mask_mode: VecMaskMode::Agnostic, - }, - } - } -} - -impl fmt::Display for VState { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype) - } -} - -impl VecOpCategory { - pub fn encode(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding - match self { - VecOpCategory::OPIVV => 0b000, - VecOpCategory::OPFVV => 0b001, - VecOpCategory::OPMVV => 0b010, - VecOpCategory::OPIVI => 0b011, - VecOpCategory::OPIVX => 0b100, - VecOpCategory::OPFVF => 0b101, - VecOpCategory::OPMVX => 0b110, - VecOpCategory::OPCFG => 0b111, - } - } -} - -impl Copy for VecOpMasking {} -impl VecOpMasking { - pub fn is_enabled(&self) -> bool { - match self { - VecOpMasking::Enabled { .. } => true, - VecOpMasking::Disabled => false, - } - } - - pub fn encode(&self) -> u32 { - match self { - VecOpMasking::Enabled { .. } => 0, - VecOpMasking::Disabled => 1, - } - } -} - -impl VecAluOpRRRR { - pub fn opcode(&self) -> u32 { - // Vector Opcode - 0x57 - } - - pub fn funct3(&self) -> u32 { - self.category().encode() - } - - pub fn funct6(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc - match self { - VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VmaccVX => 0b101101, - VecAluOpRRRR::VnmsacVV | VecAluOpRRRR::VnmsacVX => 0b101111, - VecAluOpRRRR::VfmaccVV | VecAluOpRRRR::VfmaccVF => 0b101100, - VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101, - VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110, - VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111, - VecAluOpRRRR::Vslide1upVX => 0b001110, - } - } - - pub fn category(&self) -> VecOpCategory { - match self { - VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV, - VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX | VecAluOpRRRR::Vslide1upVX => { - VecOpCategory::OPMVX - } - VecAluOpRRRR::VfmaccVV - | VecAluOpRRRR::VfnmaccVV - | VecAluOpRRRR::VfmsacVV - | VecAluOpRRRR::VfnmsacVV => VecOpCategory::OPFVV, - VecAluOpRRRR::VfmaccVF - | VecAluOpRRRR::VfnmaccVF - | VecAluOpRRRR::VfmsacVF - | VecAluOpRRRR::VfnmsacVF => VecOpCategory::OPFVF, - } - } - - // vs1 is the only variable source, vs2 is fixed. - pub fn vs1_regclass(&self) -> RegClass { - match self.category() { - VecOpCategory::OPMVV | VecOpCategory::OPFVV => RegClass::Vector, - VecOpCategory::OPMVX => RegClass::Int, - VecOpCategory::OPFVF => RegClass::Float, - _ => unreachable!(), - } - } -} - -impl VecInstOverlapInfo for VecAluOpRRRR { - fn forbids_src_dst_overlaps(&self) -> bool { - match self { - VecAluOpRRRR::Vslide1upVX => true, - _ => false, - } - } -} - -impl fmt::Display for VecAluOpRRRR { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut s = format!("{self:?}"); - s.make_ascii_lowercase(); - let (opcode, category) = s.split_at(s.len() - 2); - f.write_str(&format!("{opcode}.{category}")) - } -} - -impl VecAluOpRRRImm5 { - pub fn opcode(&self) -> u32 { - // Vector Opcode - 0x57 - } - - pub fn funct3(&self) -> u32 { - self.category().encode() - } - - pub fn funct6(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc - match self { - VecAluOpRRRImm5::VslideupVI => 0b001110, - } - } - - pub fn category(&self) -> VecOpCategory { - match self { - VecAluOpRRRImm5::VslideupVI => VecOpCategory::OPIVI, - } - } - - pub fn imm_is_unsigned(&self) -> bool { - match self { - VecAluOpRRRImm5::VslideupVI => true, - } - } -} - -impl VecInstOverlapInfo for VecAluOpRRRImm5 { - fn forbids_src_dst_overlaps(&self) -> bool { - match self { - VecAluOpRRRImm5::VslideupVI => true, - } - } -} - -impl fmt::Display for VecAluOpRRRImm5 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut s = format!("{self:?}"); - s.make_ascii_lowercase(); - let (opcode, category) = s.split_at(s.len() - 2); - f.write_str(&format!("{opcode}.{category}")) - } -} - -impl VecAluOpRRR { - pub fn opcode(&self) -> u32 { - // Vector Opcode - 0x57 - } - - pub fn funct3(&self) -> u32 { - self.category().encode() - } - - pub fn funct6(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc - match self { - VecAluOpRRR::VaddVV - | VecAluOpRRR::VaddVX - | VecAluOpRRR::VfaddVV - | VecAluOpRRR::VfaddVF => 0b000000, - VecAluOpRRR::VsubVV - | VecAluOpRRR::VsubVX - | VecAluOpRRR::VfsubVV - | VecAluOpRRR::VfsubVF => 0b000010, - VecAluOpRRR::VrsubVX => 0b000011, - VecAluOpRRR::VmulVV | VecAluOpRRR::VmulVX => 0b100101, - VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhVX => 0b100111, - VecAluOpRRR::VmulhuVV - | VecAluOpRRR::VmulhuVX - | VecAluOpRRR::VfmulVV - | VecAluOpRRR::VfmulVF => 0b100100, - VecAluOpRRR::VsmulVV | VecAluOpRRR::VsmulVX => 0b100111, - VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101, - VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000, - VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001, - VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001, - VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010, - VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011, - VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100, - VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101, - VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110, - VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111, - VecAluOpRRR::VslidedownVX => 0b001111, - VecAluOpRRR::VfrsubVF => 0b100111, - VecAluOpRRR::VmergeVVM - | VecAluOpRRR::VmergeVXM - | VecAluOpRRR::VfmergeVFM - | VecAluOpRRR::VcompressVM => 0b010111, - VecAluOpRRR::VfdivVV - | VecAluOpRRR::VfdivVF - | VecAluOpRRR::VsadduVV - | VecAluOpRRR::VsadduVX => 0b100000, - VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001, - VecAluOpRRR::VfminVV => 0b000100, - VecAluOpRRR::VfmaxVV => 0b000110, - VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010, - VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011, - VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000, - VecAluOpRRR::VfsgnjnVV => 0b001001, - VecAluOpRRR::VfsgnjxVV => 0b001010, - VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100, - VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000, - VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001, - VecAluOpRRR::VwsubuVV | VecAluOpRRR::VwsubuVX => 0b110010, - VecAluOpRRR::VwsubVV | VecAluOpRRR::VwsubVX => 0b110011, - VecAluOpRRR::VwadduWV | VecAluOpRRR::VwadduWX => 0b110100, - VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101, - VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110, - VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111, - VecAluOpRRR::VmseqVV - | VecAluOpRRR::VmseqVX - | VecAluOpRRR::VmfeqVV - | VecAluOpRRR::VmfeqVF => 0b011000, - VecAluOpRRR::VmsneVV - | VecAluOpRRR::VmsneVX - | VecAluOpRRR::VmfleVV - | VecAluOpRRR::VmfleVF - | VecAluOpRRR::VmandMM => 0b011001, - VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010, - VecAluOpRRR::VmsltVV - | VecAluOpRRR::VmsltVX - | VecAluOpRRR::VmfltVV - | VecAluOpRRR::VmfltVF => 0b011011, - VecAluOpRRR::VmsleuVV - | VecAluOpRRR::VmsleuVX - | VecAluOpRRR::VmfneVV - | VecAluOpRRR::VmfneVF => 0b011100, - VecAluOpRRR::VmsleVV - | VecAluOpRRR::VmsleVX - | VecAluOpRRR::VmfgtVF - | VecAluOpRRR::VmnandMM => 0b011101, - VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110, - VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111, - } - } - - pub fn category(&self) -> VecOpCategory { - match self { - VecAluOpRRR::VaddVV - | VecAluOpRRR::VsaddVV - | VecAluOpRRR::VsadduVV - | VecAluOpRRR::VsubVV - | VecAluOpRRR::VssubVV - | VecAluOpRRR::VssubuVV - | VecAluOpRRR::VsmulVV - | VecAluOpRRR::VsllVV - | VecAluOpRRR::VsrlVV - | VecAluOpRRR::VsraVV - | VecAluOpRRR::VandVV - | VecAluOpRRR::VorVV - | VecAluOpRRR::VxorVV - | VecAluOpRRR::VminuVV - | VecAluOpRRR::VminVV - | VecAluOpRRR::VmaxuVV - | VecAluOpRRR::VmaxVV - | VecAluOpRRR::VmergeVVM - | VecAluOpRRR::VrgatherVV - | VecAluOpRRR::VmseqVV - | VecAluOpRRR::VmsneVV - | VecAluOpRRR::VmsltuVV - | VecAluOpRRR::VmsltVV - | VecAluOpRRR::VmsleuVV - | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV, - VecAluOpRRR::VwaddVV - | VecAluOpRRR::VwaddWV - | VecAluOpRRR::VwadduVV - | VecAluOpRRR::VwadduWV - | VecAluOpRRR::VwsubVV - | VecAluOpRRR::VwsubWV - | VecAluOpRRR::VwsubuVV - | VecAluOpRRR::VwsubuWV - | VecAluOpRRR::VmulVV - | VecAluOpRRR::VmulhVV - | VecAluOpRRR::VmulhuVV - | VecAluOpRRR::VredmaxuVS - | VecAluOpRRR::VredminuVS - | VecAluOpRRR::VcompressVM - | VecAluOpRRR::VmandMM - | VecAluOpRRR::VmorMM - | VecAluOpRRR::VmnandMM - | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV, - VecAluOpRRR::VwaddVX - | VecAluOpRRR::VwadduVX - | VecAluOpRRR::VwadduWX - | VecAluOpRRR::VwaddWX - | VecAluOpRRR::VwsubVX - | VecAluOpRRR::VwsubuVX - | VecAluOpRRR::VwsubuWX - | VecAluOpRRR::VwsubWX - | VecAluOpRRR::VmulVX - | VecAluOpRRR::VmulhVX - | VecAluOpRRR::VmulhuVX => VecOpCategory::OPMVX, - VecAluOpRRR::VaddVX - | VecAluOpRRR::VsaddVX - | VecAluOpRRR::VsadduVX - | VecAluOpRRR::VsubVX - | VecAluOpRRR::VssubVX - | VecAluOpRRR::VssubuVX - | VecAluOpRRR::VrsubVX - | VecAluOpRRR::VsmulVX - | VecAluOpRRR::VsllVX - | VecAluOpRRR::VsrlVX - | VecAluOpRRR::VsraVX - | VecAluOpRRR::VandVX - | VecAluOpRRR::VorVX - | VecAluOpRRR::VxorVX - | VecAluOpRRR::VminuVX - | VecAluOpRRR::VminVX - | VecAluOpRRR::VmaxuVX - | VecAluOpRRR::VmaxVX - | VecAluOpRRR::VslidedownVX - | VecAluOpRRR::VmergeVXM - | VecAluOpRRR::VrgatherVX - | VecAluOpRRR::VmseqVX - | VecAluOpRRR::VmsneVX - | VecAluOpRRR::VmsltuVX - | VecAluOpRRR::VmsltVX - | VecAluOpRRR::VmsleuVX - | VecAluOpRRR::VmsleVX - | VecAluOpRRR::VmsgtuVX - | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX, - VecAluOpRRR::VfaddVV - | VecAluOpRRR::VfsubVV - | VecAluOpRRR::VfmulVV - | VecAluOpRRR::VfdivVV - | VecAluOpRRR::VfmaxVV - | VecAluOpRRR::VfminVV - | VecAluOpRRR::VfsgnjVV - | VecAluOpRRR::VfsgnjnVV - | VecAluOpRRR::VfsgnjxVV - | VecAluOpRRR::VmfeqVV - | VecAluOpRRR::VmfneVV - | VecAluOpRRR::VmfltVV - | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV, - VecAluOpRRR::VfaddVF - | VecAluOpRRR::VfsubVF - | VecAluOpRRR::VfrsubVF - | VecAluOpRRR::VfmulVF - | VecAluOpRRR::VfdivVF - | VecAluOpRRR::VfrdivVF - | VecAluOpRRR::VfmergeVFM - | VecAluOpRRR::VfsgnjVF - | VecAluOpRRR::VmfeqVF - | VecAluOpRRR::VmfneVF - | VecAluOpRRR::VmfltVF - | VecAluOpRRR::VmfleVF - | VecAluOpRRR::VmfgtVF - | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF, - } - } - - // vs1 is the only variable source, vs2 is fixed. - pub fn vs1_regclass(&self) -> RegClass { - match self.category() { - VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => RegClass::Vector, - VecOpCategory::OPIVX | VecOpCategory::OPMVX => RegClass::Int, - VecOpCategory::OPFVF => RegClass::Float, - _ => unreachable!(), - } - } -} - -impl VecInstOverlapInfo for VecAluOpRRR { - fn forbids_src_dst_overlaps(&self) -> bool { - match self { - VecAluOpRRR::VrgatherVV - | VecAluOpRRR::VrgatherVX - | VecAluOpRRR::VcompressVM - | VecAluOpRRR::VwadduVV - | VecAluOpRRR::VwadduVX - | VecAluOpRRR::VwaddVV - | VecAluOpRRR::VwaddVX - | VecAluOpRRR::VwadduWV - | VecAluOpRRR::VwadduWX - | VecAluOpRRR::VwaddWV - | VecAluOpRRR::VwaddWX - | VecAluOpRRR::VwsubuVV - | VecAluOpRRR::VwsubuVX - | VecAluOpRRR::VwsubVV - | VecAluOpRRR::VwsubVX - | VecAluOpRRR::VwsubuWV - | VecAluOpRRR::VwsubuWX - | VecAluOpRRR::VwsubWV - | VecAluOpRRR::VwsubWX => true, - _ => false, - } - } - - // Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps. - fn forbids_mask_dst_overlaps(&self) -> bool { - match self { - VecAluOpRRR::VredmaxuVS - | VecAluOpRRR::VredminuVS - | VecAluOpRRR::VmandMM - | VecAluOpRRR::VmorMM - | VecAluOpRRR::VmnandMM - | VecAluOpRRR::VmnorMM - | VecAluOpRRR::VmseqVX - | VecAluOpRRR::VmsneVX - | VecAluOpRRR::VmsltuVX - | VecAluOpRRR::VmsltVX - | VecAluOpRRR::VmsleuVX - | VecAluOpRRR::VmsleVX - | VecAluOpRRR::VmsgtuVX - | VecAluOpRRR::VmsgtVX - | VecAluOpRRR::VmfeqVV - | VecAluOpRRR::VmfneVV - | VecAluOpRRR::VmfltVV - | VecAluOpRRR::VmfleVV - | VecAluOpRRR::VmfeqVF - | VecAluOpRRR::VmfneVF - | VecAluOpRRR::VmfltVF - | VecAluOpRRR::VmfleVF - | VecAluOpRRR::VmfgtVF - | VecAluOpRRR::VmfgeVF => false, - _ => true, - } - } -} - -impl fmt::Display for VecAluOpRRR { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let suffix_length = match self { - VecAluOpRRR::VmergeVVM | VecAluOpRRR::VmergeVXM | VecAluOpRRR::VfmergeVFM => 3, - _ => 2, - }; - - let mut s = format!("{self:?}"); - s.make_ascii_lowercase(); - let (opcode, category) = s.split_at(s.len() - suffix_length); - f.write_str(&format!("{opcode}.{category}")) - } -} - -impl VecAluOpRRImm5 { - pub fn opcode(&self) -> u32 { - // Vector Opcode - 0x57 - } - - pub fn funct3(&self) -> u32 { - self.category().encode() - } - - pub fn funct6(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc - match self { - VecAluOpRRImm5::VaddVI => 0b000000, - VecAluOpRRImm5::VrsubVI => 0b000011, - VecAluOpRRImm5::VsllVI => 0b100101, - VecAluOpRRImm5::VsrlVI => 0b101000, - VecAluOpRRImm5::VsraVI => 0b101001, - VecAluOpRRImm5::VandVI => 0b001001, - VecAluOpRRImm5::VorVI => 0b001010, - VecAluOpRRImm5::VxorVI => 0b001011, - VecAluOpRRImm5::VslidedownVI => 0b001111, - VecAluOpRRImm5::VssrlVI => 0b101010, - VecAluOpRRImm5::VmergeVIM => 0b010111, - VecAluOpRRImm5::VsadduVI => 0b100000, - VecAluOpRRImm5::VsaddVI => 0b100001, - VecAluOpRRImm5::VrgatherVI => 0b001100, - VecAluOpRRImm5::VmvrV => 0b100111, - VecAluOpRRImm5::VnclipWI => 0b101111, - VecAluOpRRImm5::VnclipuWI => 0b101110, - VecAluOpRRImm5::VmseqVI => 0b011000, - VecAluOpRRImm5::VmsneVI => 0b011001, - VecAluOpRRImm5::VmsleuVI => 0b011100, - VecAluOpRRImm5::VmsleVI => 0b011101, - VecAluOpRRImm5::VmsgtuVI => 0b011110, - VecAluOpRRImm5::VmsgtVI => 0b011111, - } - } - - pub fn category(&self) -> VecOpCategory { - match self { - VecAluOpRRImm5::VaddVI - | VecAluOpRRImm5::VrsubVI - | VecAluOpRRImm5::VsllVI - | VecAluOpRRImm5::VsrlVI - | VecAluOpRRImm5::VsraVI - | VecAluOpRRImm5::VandVI - | VecAluOpRRImm5::VorVI - | VecAluOpRRImm5::VxorVI - | VecAluOpRRImm5::VssrlVI - | VecAluOpRRImm5::VslidedownVI - | VecAluOpRRImm5::VmergeVIM - | VecAluOpRRImm5::VsadduVI - | VecAluOpRRImm5::VsaddVI - | VecAluOpRRImm5::VrgatherVI - | VecAluOpRRImm5::VmvrV - | VecAluOpRRImm5::VnclipWI - | VecAluOpRRImm5::VnclipuWI - | VecAluOpRRImm5::VmseqVI - | VecAluOpRRImm5::VmsneVI - | VecAluOpRRImm5::VmsleuVI - | VecAluOpRRImm5::VmsleVI - | VecAluOpRRImm5::VmsgtuVI - | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI, - } - } - - pub fn imm_is_unsigned(&self) -> bool { - match self { - VecAluOpRRImm5::VsllVI - | VecAluOpRRImm5::VsrlVI - | VecAluOpRRImm5::VssrlVI - | VecAluOpRRImm5::VsraVI - | VecAluOpRRImm5::VslidedownVI - | VecAluOpRRImm5::VrgatherVI - | VecAluOpRRImm5::VmvrV - | VecAluOpRRImm5::VnclipWI - | VecAluOpRRImm5::VnclipuWI => true, - VecAluOpRRImm5::VaddVI - | VecAluOpRRImm5::VrsubVI - | VecAluOpRRImm5::VandVI - | VecAluOpRRImm5::VorVI - | VecAluOpRRImm5::VxorVI - | VecAluOpRRImm5::VmergeVIM - | VecAluOpRRImm5::VsadduVI - | VecAluOpRRImm5::VsaddVI - | VecAluOpRRImm5::VmseqVI - | VecAluOpRRImm5::VmsneVI - | VecAluOpRRImm5::VmsleuVI - | VecAluOpRRImm5::VmsleVI - | VecAluOpRRImm5::VmsgtuVI - | VecAluOpRRImm5::VmsgtVI => false, - } - } -} - -impl VecInstOverlapInfo for VecAluOpRRImm5 { - fn forbids_src_dst_overlaps(&self) -> bool { - match self { - VecAluOpRRImm5::VrgatherVI => true, - _ => false, - } - } - - // Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps. - fn forbids_mask_dst_overlaps(&self) -> bool { - match self { - VecAluOpRRImm5::VmseqVI - | VecAluOpRRImm5::VmsneVI - | VecAluOpRRImm5::VmsleuVI - | VecAluOpRRImm5::VmsleVI - | VecAluOpRRImm5::VmsgtuVI - | VecAluOpRRImm5::VmsgtVI => false, - _ => true, - } - } -} - -impl fmt::Display for VecAluOpRRImm5 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let suffix_length = match self { - VecAluOpRRImm5::VmergeVIM => 3, - _ => 2, - }; - - let mut s = format!("{self:?}"); - s.make_ascii_lowercase(); - let (opcode, category) = s.split_at(s.len() - suffix_length); - f.write_str(&format!("{opcode}.{category}")) - } -} - -impl VecAluOpRR { - pub fn opcode(&self) -> u32 { - // Vector Opcode - 0x57 - } - - pub fn funct3(&self) -> u32 { - self.category().encode() - } - - pub fn funct6(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc - match self { - VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => { - 0b010000 - } - VecAluOpRR::VzextVF2 - | VecAluOpRR::VzextVF4 - | VecAluOpRR::VzextVF8 - | VecAluOpRR::VsextVF2 - | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 => 0b010010, - VecAluOpRR::VfsqrtV => 0b010011, - VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111, - VecAluOpRR::VfcvtxufV - | VecAluOpRR::VfcvtxfV - | VecAluOpRR::VfcvtrtzxufV - | VecAluOpRR::VfcvtrtzxfV - | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV - | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => 0b010010, - } - } - - pub fn category(&self) -> VecOpCategory { - match self { - VecAluOpRR::VmvSX => VecOpCategory::OPMVX, - VecAluOpRR::VmvXS - | VecAluOpRR::VzextVF2 - | VecAluOpRR::VzextVF4 - | VecAluOpRR::VzextVF8 - | VecAluOpRR::VsextVF2 - | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV, - VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF, - VecAluOpRR::VfmvFS - | VecAluOpRR::VfsqrtV - | VecAluOpRR::VfcvtxufV - | VecAluOpRR::VfcvtxfV - | VecAluOpRR::VfcvtrtzxufV - | VecAluOpRR::VfcvtrtzxfV - | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV - | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => VecOpCategory::OPFVV, - VecAluOpRR::VmvVV => VecOpCategory::OPIVV, - VecAluOpRR::VmvVX => VecOpCategory::OPIVX, - } - } - - /// Returns the auxiliary encoding field for the instruction, if any. - pub fn aux_encoding(&self) -> u32 { - match self { - // VRXUNARY0 - VecAluOpRR::VmvSX => 0b00000, - // VWXUNARY0 - VecAluOpRR::VmvXS => 0b00000, - // VRFUNARY0 - VecAluOpRR::VfmvSF => 0b00000, - // VWFUNARY0 - VecAluOpRR::VfmvFS => 0b00000, - // VFUNARY1 - VecAluOpRR::VfsqrtV => 0b00000, - // VXUNARY0 - VecAluOpRR::VzextVF8 => 0b00010, - VecAluOpRR::VsextVF8 => 0b00011, - VecAluOpRR::VzextVF4 => 0b00100, - VecAluOpRR::VsextVF4 => 0b00101, - VecAluOpRR::VzextVF2 => 0b00110, - VecAluOpRR::VsextVF2 => 0b00111, - // VFUNARY0 - // single-width converts - VecAluOpRR::VfcvtxufV => 0b00000, - VecAluOpRR::VfcvtxfV => 0b00001, - VecAluOpRR::VfcvtrtzxufV => 0b00110, - VecAluOpRR::VfcvtrtzxfV => 0b00111, - VecAluOpRR::VfcvtfxuV => 0b00010, - VecAluOpRR::VfcvtfxV => 0b00011, - // widening converts - VecAluOpRR::VfwcvtffV => 0b01100, - // narrowing converts - VecAluOpRR::VfncvtffW => 0b10100, - // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: - // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. - VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0, - } - } - - /// Most of these opcodes have the source register encoded in the VS2 field and - /// the `aux_encoding` field in VS1. However some special snowflakes have it the - /// other way around. As far as I can tell only vmv.v.* are backwards. - pub fn vs_is_vs2_encoded(&self) -> bool { - match self { - VecAluOpRR::VmvXS - | VecAluOpRR::VfmvFS - | VecAluOpRR::VfsqrtV - | VecAluOpRR::VzextVF2 - | VecAluOpRR::VzextVF4 - | VecAluOpRR::VzextVF8 - | VecAluOpRR::VsextVF2 - | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 - | VecAluOpRR::VfcvtxufV - | VecAluOpRR::VfcvtxfV - | VecAluOpRR::VfcvtrtzxufV - | VecAluOpRR::VfcvtrtzxfV - | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV - | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => true, - VecAluOpRR::VmvSX - | VecAluOpRR::VfmvSF - | VecAluOpRR::VmvVV - | VecAluOpRR::VmvVX - | VecAluOpRR::VfmvVF => false, - } - } - - pub fn dst_regclass(&self) -> RegClass { - match self { - VecAluOpRR::VfmvSF - | VecAluOpRR::VmvSX - | VecAluOpRR::VmvVV - | VecAluOpRR::VmvVX - | VecAluOpRR::VfmvVF - | VecAluOpRR::VfsqrtV - | VecAluOpRR::VzextVF2 - | VecAluOpRR::VzextVF4 - | VecAluOpRR::VzextVF8 - | VecAluOpRR::VsextVF2 - | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 - | VecAluOpRR::VfcvtxufV - | VecAluOpRR::VfcvtxfV - | VecAluOpRR::VfcvtrtzxufV - | VecAluOpRR::VfcvtrtzxfV - | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV - | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => RegClass::Vector, - VecAluOpRR::VmvXS => RegClass::Int, - VecAluOpRR::VfmvFS => RegClass::Float, - } - } - - pub fn src_regclass(&self) -> RegClass { - match self { - VecAluOpRR::VmvXS - | VecAluOpRR::VfmvFS - | VecAluOpRR::VmvVV - | VecAluOpRR::VfsqrtV - | VecAluOpRR::VzextVF2 - | VecAluOpRR::VzextVF4 - | VecAluOpRR::VzextVF8 - | VecAluOpRR::VsextVF2 - | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 - | VecAluOpRR::VfcvtxufV - | VecAluOpRR::VfcvtxfV - | VecAluOpRR::VfcvtrtzxufV - | VecAluOpRR::VfcvtrtzxfV - | VecAluOpRR::VfcvtfxuV - | VecAluOpRR::VfcvtfxV - | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => RegClass::Vector, - VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float, - VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int, - } - } -} - -impl VecInstOverlapInfo for VecAluOpRR { - fn forbids_src_dst_overlaps(&self) -> bool { - match self { - VecAluOpRR::VzextVF2 - | VecAluOpRR::VzextVF4 - | VecAluOpRR::VzextVF8 - | VecAluOpRR::VsextVF2 - | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 - | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => true, - _ => false, - } - } -} - -impl fmt::Display for VecAluOpRR { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(match self { - VecAluOpRR::VmvSX => "vmv.s.x", - VecAluOpRR::VmvXS => "vmv.x.s", - VecAluOpRR::VfmvSF => "vfmv.s.f", - VecAluOpRR::VfmvFS => "vfmv.f.s", - VecAluOpRR::VfsqrtV => "vfsqrt.v", - VecAluOpRR::VzextVF2 => "vzext.vf2", - VecAluOpRR::VzextVF4 => "vzext.vf4", - VecAluOpRR::VzextVF8 => "vzext.vf8", - VecAluOpRR::VsextVF2 => "vsext.vf2", - VecAluOpRR::VsextVF4 => "vsext.vf4", - VecAluOpRR::VsextVF8 => "vsext.vf8", - VecAluOpRR::VmvVV => "vmv.v.v", - VecAluOpRR::VmvVX => "vmv.v.x", - VecAluOpRR::VfmvVF => "vfmv.v.f", - VecAluOpRR::VfcvtxufV => "vfcvt.xu.f.v", - VecAluOpRR::VfcvtxfV => "vfcvt.x.f.v", - VecAluOpRR::VfcvtrtzxufV => "vfcvt.rtz.xu.f.v", - VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v", - VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v", - VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v", - VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v", - VecAluOpRR::VfncvtffW => "vfncvt.f.f.w", - }) - } -} - -impl VecAluOpRImm5 { - pub fn opcode(&self) -> u32 { - // Vector Opcode - 0x57 - } - - pub fn funct3(&self) -> u32 { - self.category().encode() - } - - pub fn funct6(&self) -> u32 { - // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc - match self { - VecAluOpRImm5::VmvVI => 0b010111, - } - } - - pub fn category(&self) -> VecOpCategory { - match self { - VecAluOpRImm5::VmvVI => VecOpCategory::OPIVI, - } - } - - /// Returns the auxiliary encoding field for the instruction, if any. - pub fn aux_encoding(&self) -> u32 { - match self { - // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: - // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. - VecAluOpRImm5::VmvVI => 0, - } - } -} - -impl VecInstOverlapInfo for VecAluOpRImm5 { - fn forbids_src_dst_overlaps(&self) -> bool { - match self { - VecAluOpRImm5::VmvVI => false, - } - } -} - -impl fmt::Display for VecAluOpRImm5 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(match self { - VecAluOpRImm5::VmvVI => "vmv.v.i", - }) - } -} - -impl VecAMode { - pub fn get_base_register(&self) -> Option { - match self { - VecAMode::UnitStride { base, .. } => base.get_base_register(), - } - } - - pub fn get_operands(&mut self, collector: &mut impl OperandVisitor) { - match self { - VecAMode::UnitStride { base, .. } => base.get_operands(collector), - } - } - - /// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes - /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes - pub fn mop(&self) -> u32 { - match self { - VecAMode::UnitStride { .. } => 0b00, - } - } - - /// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes - /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes - pub fn lumop(&self) -> u32 { - match self { - VecAMode::UnitStride { .. } => 0b00000, - } - } - - /// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes - /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes - pub fn sumop(&self) -> u32 { - match self { - VecAMode::UnitStride { .. } => 0b00000, - } - } - - /// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and - /// stores, nf=0, indicating that a single value is moved between a vector register group and memory - /// at each element position. Larger values in the nf field are used to access multiple contiguous - /// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions. - /// - /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes - pub fn nf(&self) -> u32 { - match self { - VecAMode::UnitStride { .. } => 0b000, - } - } -} - -pub trait VecInstOverlapInfo { - /// § 5.2 Vector Operands states: - /// - /// A destination vector register group can overlap a source vector register group - /// only if one of the following holds: - /// - /// * The destination EEW equals the source EEW. - /// - /// * The destination EEW is smaller than the source EEW and the overlap is - /// in the lowest-numbered part of the source register group (e.g., when LMUL=1, - /// vnsrl.wi v0, v0, 3 is legal, but a destination of v1 is not). - /// - /// * The destination EEW is greater than the source EEW, the source EMUL is at - /// least 1, and the overlap is in the highest-numbered part of the destination register - /// group (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not). - /// - /// For the purpose of determining register group overlap constraints, mask elements have EEW=1. - fn forbids_src_dst_overlaps(&self) -> bool; - - /// § 5.3 Vector Masking states: - /// - /// > The destination vector register group for a masked vector instruction - /// > cannot overlap the source mask register (v0), unless the destination - /// > vector register is being written with a mask value (e.g., compares) or - /// > the scalar result of a reduction. These instruction encodings are reserved. - /// - /// In almost all instructions we should not allow the mask to be re-used as - /// a destination register. - fn forbids_mask_dst_overlaps(&self) -> bool { - true - } - - /// There are two broad categories of overlaps (see above). But we can't represent such - /// fine grained overlaps to regalloc. So if any of the two come into play we forbid - /// all source and destination overlaps (including masks). - fn forbids_overlaps(&self, mask: &VecOpMasking) -> bool { - self.forbids_src_dst_overlaps() || (mask.is_enabled() && self.forbids_mask_dst_overlaps()) - } -} diff --git a/hbcb/src/inst_vector.isle b/hbcb/src/inst_vector.isle deleted file mode 100644 index 4b63618..0000000 --- a/hbcb/src/inst_vector.isle +++ /dev/null @@ -1,1907 +0,0 @@ -;; Represents the possible widths of an element when used in an operation. -(type VecElementWidth (enum - (E8) - (E16) - (E32) - (E64) -)) - -;; Vector Register Group Multiplier (LMUL) -;; -;; The LMUL setting specifies how we should group registers together. LMUL can -;; also be a fractional value, reducing the number of bits used in a single -;; vector register. Fractional LMUL is used to increase the number of effective -;; usable vector register groups when operating on mixed-width values. -(type VecLmul (enum - (LmulF8) - (LmulF4) - (LmulF2) - (Lmul1) - (Lmul2) - (Lmul4) - (Lmul8) -)) - -;; Tail Mode -;; -;; The tail mode specifies how the tail elements of a vector register are handled. -(type VecTailMode (enum - ;; Tail Agnostic means that the tail elements are left in an undefined state. - (Agnostic) - ;; Tail Undisturbed means that the tail elements are left in their original values. - (Undisturbed) -)) - -;; Mask Mode -;; -;; The mask mode specifies how the masked elements of a vector register are handled. -(type VecMaskMode (enum - ;; Mask Agnostic means that the masked out elements are left in an undefined state. - (Agnostic) - ;; Mask Undisturbed means that the masked out elements are left in their original values. - (Undisturbed) -)) - -;; Application Vector Length (AVL) -;; -;; This setting specifies the number of elements that are going to be processed -;; in a single instruction. Note: We may end up processing fewer elements than -;; the AVL setting, if they don't fit in a single register. -(type VecAvl (enum - ;; Static AVL emits a `vsetivli` that uses a constant value - (Static (size UImm5)) - ;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it -)) - -(type VType (primitive VType)) -(type VState (primitive VState)) - - -;; Vector Opcode Category -;; -;; These categories are used to determine the type of operands that are allowed in the -;; instruction. -(type VecOpCategory (enum - (OPIVV) - (OPFVV) - (OPMVV) - (OPIVI) - (OPIVX) - (OPFVF) - (OPMVX) - (OPCFG) -)) - -;; Vector Opcode Masking -;; -;; When masked, the instruction will only operate on the elements that are dictated by -;; the mask register. Currently this is always fixed to v0. -(type VecOpMasking (enum - (Enabled (reg Reg)) - (Disabled) -)) - -(decl pure masked (VReg) VecOpMasking) -(rule (masked reg) (VecOpMasking.Enabled reg)) - -(decl pure unmasked () VecOpMasking) -(rule (unmasked) (VecOpMasking.Disabled)) - -;; Register to Register ALU Ops -(type VecAluOpRRR (enum - ;; Vector-Vector Opcodes - (VaddVV) - (VsaddVV) - (VsadduVV) - (VwaddVV) - (VwaddWV) - (VwadduVV) - (VwadduWV) - (VsubVV) - (VwsubVV) - (VwsubWV) - (VwsubuVV) - (VwsubuWV) - (VssubVV) - (VssubuVV) - (VmulVV) - (VmulhVV) - (VmulhuVV) - (VsmulVV) - (VsllVV) - (VsrlVV) - (VsraVV) - (VandVV) - (VorVV) - (VxorVV) - (VmaxVV) - (VmaxuVV) - (VminVV) - (VminuVV) - (VfaddVV) - (VfsubVV) - (VfmulVV) - (VfdivVV) - (VfminVV) - (VfmaxVV) - (VfsgnjVV) - (VfsgnjnVV) - (VfsgnjxVV) - (VmergeVVM) - (VredmaxuVS) - (VredminuVS) - (VrgatherVV) - (VcompressVM) - (VmseqVV) - (VmsneVV) - (VmsltuVV) - (VmsltVV) - (VmsleuVV) - (VmsleVV) - (VmfeqVV) - (VmfneVV) - (VmfltVV) - (VmfleVV) - (VmandMM) - (VmorMM) - (VmnandMM) - (VmnorMM) - - - ;; Vector-Scalar Opcodes - (VaddVX) - (VsaddVX) - (VsadduVX) - (VwaddVX) - (VwaddWX) - (VwadduVX) - (VwadduWX) - (VsubVX) - (VrsubVX) - (VwsubVX) - (VwsubWX) - (VwsubuVX) - (VwsubuWX) - (VssubVX) - (VssubuVX) - (VmulVX) - (VmulhVX) - (VmulhuVX) - (VsmulVX) - (VsllVX) - (VsrlVX) - (VsraVX) - (VandVX) - (VorVX) - (VxorVX) - (VmaxVX) - (VmaxuVX) - (VminVX) - (VminuVX) - (VslidedownVX) - (VfaddVF) - (VfsubVF) - (VfrsubVF) - (VfmulVF) - (VfdivVF) - (VfsgnjVF) - (VfrdivVF) - (VmergeVXM) - (VfmergeVFM) - (VrgatherVX) - (VmseqVX) - (VmsneVX) - (VmsltuVX) - (VmsltVX) - (VmsleuVX) - (VmsleVX) - (VmsgtuVX) - (VmsgtVX) - (VmfeqVF) - (VmfneVF) - (VmfltVF) - (VmfleVF) - (VmfgtVF) - (VmfgeVF) -)) - - - -;; Register-Imm ALU Ops that modify the destination register -(type VecAluOpRRRImm5 (enum - (VslideupVI) -)) - -;; Register-Register ALU Ops that modify the destination register -(type VecAluOpRRRR (enum - ;; Vector-Vector Opcodes - (VmaccVV) - (VnmsacVV) - (VfmaccVV) - (VfnmaccVV) - (VfmsacVV) - (VfnmsacVV) - - ;; Vector-Scalar Opcodes - (VmaccVX) - (VnmsacVX) - (VfmaccVF) - (VfnmaccVF) - (VfmsacVF) - (VfnmsacVF) - (Vslide1upVX) -)) - -;; Register-Imm ALU Ops -(type VecAluOpRRImm5 (enum - ;; Regular VI Opcodes - (VaddVI) - (VsaddVI) - (VsadduVI) - (VrsubVI) - (VsllVI) - (VsrlVI) - (VsraVI) - (VandVI) - (VorVI) - (VxorVI) - (VssrlVI) - (VslidedownVI) - (VmergeVIM) - (VrgatherVI) - ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc... - ;; The immediate field specifies how many registers should be copied. - (VmvrV) - (VnclipWI) - (VnclipuWI) - (VmseqVI) - (VmsneVI) - (VmsleuVI) - (VmsleVI) - (VmsgtuVI) - (VmsgtVI) -)) - -;; Imm only ALU Ops -(type VecAluOpRImm5 (enum - (VmvVI) -)) - -;; These are all of the special cases that have weird encodings. They are all -;; single source, single destination instructions, and usually use one of -;; the two source registers as auxiliary encoding space. -(type VecAluOpRR (enum - (VmvSX) - (VmvXS) - (VfmvSF) - (VfmvFS) - ;; vmv.v* is special in that vs2 must be v0 (and is ignored) otherwise the instruction is illegal. - (VmvVV) - (VmvVX) - (VfmvVF) - (VfsqrtV) - (VsextVF2) - (VsextVF4) - (VsextVF8) - (VzextVF2) - (VzextVF4) - (VzextVF8) - (VfcvtxufV) - (VfcvtxfV) - (VfcvtrtzxufV) - (VfcvtrtzxfV) - (VfcvtfxuV) - (VfcvtfxV) - (VfwcvtffV) - (VfncvtffW) -)) - -;; Returns the canonical destination type for a VecAluOpRRImm5. -(decl pure vec_alu_rr_dst_type (VecAluOpRR) Type) -(extern constructor vec_alu_rr_dst_type vec_alu_rr_dst_type) - - -;; Vector Addressing Mode -(type VecAMode (enum - ;; Vector unit-stride operations access elements stored contiguously in memory - ;; starting from the base effective address. - (UnitStride - (base AMode)) - ;; TODO: Constant Stride - ;; TODO: Indexed Operations -)) - - -;; Builds a static VState matching a SIMD type. -;; The VState is guaranteed to be static with AVL set to the number of lanes. -;; Element size is set to the size of the type. -;; LMUL is set to 1. -;; Tail mode is set to agnostic. -;; Mask mode is set to agnostic. -(decl pure vstate_from_type (Type) VState) -(extern constructor vstate_from_type vstate_from_type) -(convert Type VState vstate_from_type) - -;; Alters the LMUL of a VState to mf2 -(decl pure vstate_mf2 (VState) VState) -(extern constructor vstate_mf2 vstate_mf2) - -;; Extracts an element width from a SIMD type. -(decl pure element_width_from_type (Type) VecElementWidth) -(rule (element_width_from_type ty) - (if-let $I8 (lane_type ty)) - (VecElementWidth.E8)) -(rule (element_width_from_type ty) - (if-let $I16 (lane_type ty)) - (VecElementWidth.E16)) -(rule (element_width_from_type ty) - (if-let $I32 (lane_type ty)) - (VecElementWidth.E32)) -(rule (element_width_from_type ty) - (if-let $F32 (lane_type ty)) - (VecElementWidth.E32)) -(rule (element_width_from_type ty) - (if-let $I64 (lane_type ty)) - (VecElementWidth.E64)) -(rule (element_width_from_type ty) - (if-let $F64 (lane_type ty)) - (VecElementWidth.E64)) - -(decl pure min_vec_reg_size () u64) -(extern constructor min_vec_reg_size min_vec_reg_size) - -;; An extractor that matches any type that is known to fit in a single vector -;; register. -(decl ty_vec_fits_in_register (Type) Type) -(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register) - -;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; As noted in the RISC-V Vector Extension Specification, rs2 is the first -;; source register and rs1 is the second source register. This is the opposite -;; of the usual RISC-V register order. -;; See Section 10.1 of the RISC-V Vector Extension Specification. - - -;; Helper for emitting `MInst.VecAluRRRR` instructions. -;; These instructions modify the destination register. -(decl vec_alu_rrrr (VecAluOpRRRR VReg VReg Reg VecOpMasking VState) VReg) -(rule (vec_alu_rrrr op vd_src vs2 vs1 mask vstate) - (let ((vd WritableVReg (temp_writable_vreg)) - (_ Unit (emit (MInst.VecAluRRRR op vd vd_src vs2 vs1 mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecAluRRRImm5` instructions. -;; These instructions modify the destination register. -(decl vec_alu_rrr_imm5 (VecAluOpRRRImm5 VReg VReg Imm5 VecOpMasking VState) VReg) -(rule (vec_alu_rrr_imm5 op vd_src vs2 imm mask vstate) - (let ((vd WritableVReg (temp_writable_vreg)) - (_ Unit (emit (MInst.VecAluRRRImm5 op vd vd_src vs2 imm mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecAluRRRImm5` instructions where the immediate -;; is zero extended instead of sign extended. -(decl vec_alu_rrr_uimm5 (VecAluOpRRRImm5 VReg VReg UImm5 VecOpMasking VState) VReg) -(rule (vec_alu_rrr_uimm5 op vd_src vs2 imm mask vstate) - (vec_alu_rrr_imm5 op vd_src vs2 (uimm5_bitcast_to_imm5 imm) mask vstate)) - -;; Helper for emitting `MInst.VecAluRRR` instructions. -(decl vec_alu_rrr (VecAluOpRRR Reg Reg VecOpMasking VState) Reg) -(rule (vec_alu_rrr op vs2 vs1 mask vstate) - (let ((vd WritableVReg (temp_writable_vreg)) - (_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecAluRRImm5` instructions. -(decl vec_alu_rr_imm5 (VecAluOpRRImm5 Reg Imm5 VecOpMasking VState) Reg) -(rule (vec_alu_rr_imm5 op vs2 imm mask vstate) - (let ((vd WritableVReg (temp_writable_vreg)) - (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecAluRRImm5` instructions where the immediate -;; is zero extended instead of sign extended. -(decl vec_alu_rr_uimm5 (VecAluOpRRImm5 Reg UImm5 VecOpMasking VState) Reg) -(rule (vec_alu_rr_uimm5 op vs2 imm mask vstate) - (vec_alu_rr_imm5 op vs2 (uimm5_bitcast_to_imm5 imm) mask vstate)) - -;; Helper for emitting `MInst.VecAluRRImm5` instructions that use the Imm5 as -;; auxiliary encoding space. -(decl vec_alu_rr (VecAluOpRR Reg VecOpMasking VState) Reg) -(rule (vec_alu_rr op vs mask vstate) - (let ((vd WritableReg (temp_writable_reg (vec_alu_rr_dst_type op))) - (_ Unit (emit (MInst.VecAluRR op vd vs mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecAluRImm5` instructions. -(decl vec_alu_r_imm5 (VecAluOpRImm5 Imm5 VecOpMasking VState) Reg) -(rule (vec_alu_r_imm5 op imm mask vstate) - (let ((vd WritableVReg (temp_writable_vreg)) - (_ Unit (emit (MInst.VecAluRImm5 op vd imm mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecLoad` instructions. -(decl vec_load (VecElementWidth VecAMode MemFlags VecOpMasking VState) Reg) -(rule (vec_load eew from flags mask vstate) - (let ((vd WritableVReg (temp_writable_vreg)) - (_ Unit (emit (MInst.VecLoad eew vd from flags mask vstate)))) - vd)) - -;; Helper for emitting `MInst.VecStore` instructions. -(decl vec_store (VecElementWidth VecAMode VReg MemFlags VecOpMasking VState) InstOutput) -(rule (vec_store eew to from flags mask vstate) - (side_effect - (SideEffectNoResult.Inst (MInst.VecStore eew to from flags mask vstate)))) - -;; Helper for emitting the `vadd.vv` instruction. -(decl rv_vadd_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vadd_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vadd.vx` instruction. -(decl rv_vadd_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vadd_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vadd.vi` instruction. -(decl rv_vadd_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vadd_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vsadd.vv` instruction. -(decl rv_vsadd_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsadd_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsaddVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsadd.vx` instruction. -(decl rv_vsadd_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsadd_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsaddVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsadd.vi` instruction. -(decl rv_vsadd_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vsadd_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VsaddVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vsaddu.vv` instruction. -(decl rv_vsaddu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsaddu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsadduVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsaddu.vx` instruction. -(decl rv_vsaddu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsaddu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsadduVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsaddu.vi` instruction. -(decl rv_vsaddu_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vsaddu_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VsadduVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vwadd.vv` instruction. -;; -;; Widening integer add, 2*SEW = SEW + SEW -(decl rv_vwadd_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwadd_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwaddVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwadd.vx` instruction. -;; -;; Widening integer add, 2*SEW = SEW + SEW -(decl rv_vwadd_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwadd_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwaddVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwadd.wv` instruction. -;; -;; Widening integer add, 2*SEW = 2*SEW + SEW -(decl rv_vwadd_wv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwadd_wv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwaddWV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwadd.wx` instruction. -;; -;; Widening integer add, 2*SEW = 2*SEW + SEW -(decl rv_vwadd_wx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwadd_wx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwaddWX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwaddu.vv` instruction. -;; -;; Widening unsigned integer add, 2*SEW = SEW + SEW -(decl rv_vwaddu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwaddu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwadduVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwaddu.vv` instruction. -;; -;; Widening unsigned integer add, 2*SEW = SEW + SEW -(decl rv_vwaddu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwaddu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwadduVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwaddu.wv` instruction. -;; -;; Widening integer add, 2*SEW = 2*SEW + SEW -(decl rv_vwaddu_wv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwaddu_wv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwadduWV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwaddu.wx` instruction. -;; -;; Widening integer add, 2*SEW = 2*SEW + SEW -(decl rv_vwaddu_wx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwaddu_wx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwadduWX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsub.vv` instruction. -(decl rv_vsub_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsub_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsub.vx` instruction. -(decl rv_vsub_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsub_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vrsub.vx` instruction. -(decl rv_vrsub_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vrsub_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsub.vv` instruction. -;; -;; Widening integer sub, 2*SEW = SEW + SEW -(decl rv_vwsub_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwsub_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsub.vx` instruction. -;; -;; Widening integer sub, 2*SEW = SEW + SEW -(decl rv_vwsub_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwsub_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsub.wv` instruction. -;; -;; Widening integer sub, 2*SEW = 2*SEW + SEW -(decl rv_vwsub_wv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwsub_wv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubWV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsub.wx` instruction. -;; -;; Widening integer sub, 2*SEW = 2*SEW + SEW -(decl rv_vwsub_wx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwsub_wx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubWX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsubu.vv` instruction. -;; -;; Widening unsigned integer sub, 2*SEW = SEW + SEW -(decl rv_vwsubu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwsubu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsubu.vv` instruction. -;; -;; Widening unsigned integer sub, 2*SEW = SEW + SEW -(decl rv_vwsubu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwsubu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsubu.wv` instruction. -;; -;; Widening integer sub, 2*SEW = 2*SEW + SEW -(decl rv_vwsubu_wv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vwsubu_wv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubuWV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vwsubu.wx` instruction. -;; -;; Widening integer sub, 2*SEW = 2*SEW + SEW -(decl rv_vwsubu_wx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vwsubu_wx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VwsubuWX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vssub.vv` instruction. -(decl rv_vssub_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vssub_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VssubVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vssub.vx` instruction. -(decl rv_vssub_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vssub_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VssubVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vssubu.vv` instruction. -(decl rv_vssubu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vssubu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VssubuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vssubu.vx` instruction. -(decl rv_vssubu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vssubu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VssubuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vneg.v` pseudo-instruction. -(decl rv_vneg_v (VReg VecOpMasking VState) VReg) -(rule (rv_vneg_v vs2 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 (zero_reg) mask vstate)) - -;; Helper for emitting the `vrsub.vi` instruction. -(decl rv_vrsub_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vrsub_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VrsubVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmul.vv` instruction. -(decl rv_vmul_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmul_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmulVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmul.vx` instruction. -(decl rv_vmul_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmul_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmulVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmulh.vv` instruction. -(decl rv_vmulh_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmulh_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmulhVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmulh.vx` instruction. -(decl rv_vmulh_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmulh_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmulhVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmulhu.vv` instruction. -(decl rv_vmulhu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmulhu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmulhu.vx` instruction. -(decl rv_vmulhu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmulhu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmulhuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsmul.vv` instruction. -;; -;; Signed saturating and rounding fractional multiply -;; # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1)) -(decl rv_vsmul_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsmul_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsmulVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsmul.vx` instruction. -;; -;; Signed saturating and rounding fractional multiply -;; # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1)) -(decl rv_vsmul_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsmul_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsmulVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmacc.vv` instruction. -;; -;; Integer multiply-add, overwrite addend -;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i] -(decl rv_vmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmacc_vv vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VmaccVV) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmacc.vx` instruction. -;; -;; Integer multiply-add, overwrite addend -;; # vd[i] = +(x[rs1] * vs2[i]) + vd[i] -(decl rv_vmacc_vx (VReg VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmacc_vx vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VmaccVX) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vnmsac.vv` instruction. -;; -;; Integer multiply-sub, overwrite minuend -;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i] -(decl rv_vnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) -(rule (rv_vnmsac_vv vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VnmsacVV) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vnmsac.vx` instruction. -;; -;; Integer multiply-sub, overwrite minuend -;; # vd[i] = -(x[rs1] * vs2[i]) + vd[i] -(decl rv_vnmsac_vx (VReg VReg XReg VecOpMasking VState) VReg) -(rule (rv_vnmsac_vx vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VnmsacVX) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `sll.vv` instruction. -(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsll_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `sll.vx` instruction. -(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsll_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsll.vi` instruction. -(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vsll_vi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate)) - -;; Helper for emitting the `srl.vv` instruction. -(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsrl_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `srl.vx` instruction. -(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsrl_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsrl.vi` instruction. -(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vsrl_vi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate)) - -;; Helper for emitting the `sra.vv` instruction. -(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vsra_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `sra.vx` instruction. -(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vsra_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vsra.vi` instruction. -(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vsra_vi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vand.vv` instruction. -(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vand_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VandVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vand.vx` instruction. -(decl rv_vand_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vand_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VandVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vand.vi` instruction. -(decl rv_vand_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vand_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VandVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vor.vv` instruction. -(decl rv_vor_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vor_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VorVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vor.vx` instruction. -(decl rv_vor_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vor_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VorVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vor.vi` instruction. -(decl rv_vor_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vor_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VorVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vxor.vv` instruction. -(decl rv_vxor_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vxor_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vxor.vx` instruction. -(decl rv_vxor_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vxor_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VxorVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vxor.vi` instruction. -(decl rv_vxor_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vxor_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VxorVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vssrl.vi` instruction. -;; -;; vd[i] = (unsigned(vs2[i]) >> imm) + r -;; -;; `r` here is the rounding mode currently selected. -(decl rv_vssrl_vi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vssrl_vi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VssrlVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vnot.v` instruction. -;; This is just a mnemonic for `vxor.vi vd, vs, -1` -(decl rv_vnot_v (VReg VecOpMasking VState) VReg) -(rule (rv_vnot_v vs2 mask vstate) - (if-let neg1 (i8_to_imm5 -1)) - (rv_vxor_vi vs2 neg1 mask vstate)) - -;; Helper for emitting the `vmax.vv` instruction. -(decl rv_vmax_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmax_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmaxVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmax.vx` instruction. -(decl rv_vmax_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmax_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmaxVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmin.vv` instruction. -(decl rv_vmin_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmin_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VminVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmin.vx` instruction. -(decl rv_vmin_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmin_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VminVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmaxu.vv` instruction. -(decl rv_vmaxu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmaxu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmaxuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmaxu.vx` instruction. -(decl rv_vmaxu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmaxu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmaxuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vminu.vv` instruction. -(decl rv_vminu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vminu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VminuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vminu.vx` instruction. -(decl rv_vminu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vminu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VminuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfadd.vv` instruction. -(decl rv_vfadd_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfadd_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfadd.vf` instruction. -(decl rv_vfadd_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfadd_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfsub.vv` instruction. -(decl rv_vfsub_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfsub_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfsub.vf` instruction. -(decl rv_vfsub_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfsub_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfrsub.vf` instruction. -(decl rv_vfrsub_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfrsub_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmul.vv` instruction. -(decl rv_vfmul_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfmul_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmul.vf` instruction. -(decl rv_vfmul_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfmul_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmacc.vv` instruction. -;; -;; FP multiply-accumulate, overwrites addend -;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i] -(decl rv_vfmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfmacc_vv vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfmaccVV) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmacc.vf` instruction. -;; -;; FP multiply-accumulate, overwrites addend -;; # vd[i] = +(f[rs1] * vs2[i]) + vd[i] -(decl rv_vfmacc_vf (VReg VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfmacc_vf vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfmaccVF) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfnmacc.vv` instruction. -;; -;; FP negate-(multiply-accumulate), overwrites subtrahend -;; # vd[i] = -(vs1[i] * vs2[i]) - vd[i] -(decl rv_vfnmacc_vv (VReg VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfnmacc_vv vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVV) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfnmacc.vf` instruction. -;; -;; FP negate-(multiply-accumulate), overwrites subtrahend -;; # vd[i] = -(f[rs1] * vs2[i]) - vd[i] -(decl rv_vfnmacc_vf (VReg VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfnmacc_vf vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVF) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmsac.vv` instruction. -;; -;; FP multiply-subtract-accumulator, overwrites subtrahend -;; # vd[i] = +(vs1[i] * vs2[i]) - vd[i] -(decl rv_vfmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfmsac_vv vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfmsacVV) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmsac.vf` instruction. -;; -;; FP multiply-subtract-accumulator, overwrites subtrahend -;; # vd[i] = +(f[rs1] * vs2[i]) - vd[i] -(decl rv_vfmsac_vf (VReg VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfmsac_vf vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfmsacVF) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfnmsac.vv` instruction. -;; -;; FP negate-(multiply-subtract-accumulator), overwrites minuend -;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i] -(decl rv_vfnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfnmsac_vv vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVV) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfnmsac.vf` instruction. -;; -;; FP negate-(multiply-subtract-accumulator), overwrites minuend -;; # vd[i] = -(f[rs1] * vs2[i]) + vd[i] -(decl rv_vfnmsac_vf (VReg VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfnmsac_vf vd vs2 vs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVF) vd vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfdiv.vv` instruction. -(decl rv_vfdiv_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfdiv_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfdiv.vf` instruction. -(decl rv_vfdiv_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfdiv_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfrdiv.vf` instruction. -(decl rv_vfrdiv_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfrdiv_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmin.vv` instruction. -(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfmin_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfmax.vv` instruction. -(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfmax_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction. -;; The output of this instruction is `vs2` with the sign bit from `vs1` -(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfsgnj_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction. -(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vfsgnj_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction. -;; The output of this instruction is `vs2` with the negated sign bit from `vs1` -(decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfsgnjn_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfneg.v` instruction. -;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs` -(decl rv_vfneg_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate)) - -;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction. -;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`. -;; When `vs2 == vs1` this implements `fabs` -(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vfabs.v` instruction. -;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs` -(decl rv_vfabs_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate)) - -;; Helper for emitting the `vfsqrt.v` instruction. -;; This instruction splats the F register into all elements of the destination vector. -(decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfsqrt_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfsqrtV) vs mask vstate)) - -;; Helper for emitting the `vfcvt.xu.f.v` instruction. -;; This instruction converts a float to an unsigned integer. -(decl rv_vfcvt_xu_f_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfcvt_xu_f_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfcvtxufV) vs mask vstate)) - -;; Helper for emitting the `vfcvt.x.f.v` instruction. -;; This instruction converts a float to a signed integer. -(decl rv_vfcvt_x_f_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfcvt_x_f_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfcvtxfV) vs mask vstate)) - -;; Helper for emitting the `vfcvt.rtz.xu.f.v` instruction. -;; This instruction converts a float to an unsigned integer -;; using the Round to Zero (RTZ) rounding mode and ignoring -;; the currently set FRM rounding mode. -(decl rv_vfcvt_rtz_xu_f_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfcvt_rtz_xu_f_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfcvtrtzxufV) vs mask vstate)) - -;; Helper for emitting the `vfcvt.rtz.x.f.v` instruction. -;; This instruction converts a float to a signed integer. -;; using the Round to Zero (RTZ) rounding mode and ignoring -;; the currently set FRM rounding mode. -(decl rv_vfcvt_rtz_x_f_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfcvt_rtz_x_f_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfcvtrtzxfV) vs mask vstate)) - -;; Helper for emitting the `vfcvt.f.xu.v` instruction. -;; This instruction converts a unsigned integer to a float. -(decl rv_vfcvt_f_xu_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfcvt_f_xu_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfcvtfxuV) vs mask vstate)) - -;; Helper for emitting the `vfcvt.x.f.v` instruction. -;; This instruction converts a signed integer to a float. -(decl rv_vfcvt_f_x_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfcvt_f_x_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate)) - - ;; Helper for emitting the `vfwcvt.f.f.v` instruction. -;; Convert single-width float to double-width float. -(decl rv_vfwcvt_f_f_v (VReg VecOpMasking VState) VReg) -(rule (rv_vfwcvt_f_f_v vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfwcvtffV) vs mask vstate)) - -;; Helper for emitting the `vfncvt.f.f.w` instruction. -;; Convert double-width float to single-width float. -(decl rv_vfncvt_f_f_w (VReg VecOpMasking VState) VReg) -(rule (rv_vfncvt_f_f_w vs mask vstate) - (vec_alu_rr (VecAluOpRR.VfncvtffW) vs mask vstate)) - -;; Helper for emitting the `vslidedown.vx` instruction. -;; `vslidedown` moves all elements in the vector down by n elements. -;; The top most elements are up to the tail policy. -(decl rv_vslidedown_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vslidedown_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VslidedownVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vslidedown.vi` instruction. -;; Unlike other `vi` instructions the immediate is zero extended. -(decl rv_vslidedown_vi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vslidedown_vi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VslidedownVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vslideup.vi` instruction. -;; Unlike other `vi` instructions the immediate is zero extended. -;; This is implemented as a 2 source operand instruction, since it only -;; partially modifies the destination register. -(decl rv_vslideup_vvi (VReg VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vslideup_vvi vd vs2 imm mask vstate) - (vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate)) - -;; Helper for emitting the `vslide1up.vx` instruction. -;; -;; # vd[0]=x[rs1], vd[i+1] = vs2[i] -(decl rv_vslide1up_vx (VReg VReg XReg VecOpMasking VState) VReg) -(rule (rv_vslide1up_vx vd vs2 rs1 mask vstate) - (vec_alu_rrrr (VecAluOpRRRR.Vslide1upVX) vd vs2 rs1 mask vstate)) - -;; Helper for emitting the `vmv.x.s` instruction. -;; This instruction copies the first element of the source vector to the destination X register. -;; Masked versions of this instruction are not supported. -(decl rv_vmv_xs (VReg VState) XReg) -(rule (rv_vmv_xs vs vstate) - (vec_alu_rr (VecAluOpRR.VmvXS) vs (unmasked) vstate)) - -;; Helper for emitting the `vfmv.f.s` instruction. -;; This instruction copies the first element of the source vector to the destination F register. -;; Masked versions of this instruction are not supported. -(decl rv_vfmv_fs (VReg VState) FReg) -(rule (rv_vfmv_fs vs vstate) - (vec_alu_rr (VecAluOpRR.VfmvFS) vs (unmasked) vstate)) - -;; Helper for emitting the `vmv.s.x` instruction. -;; This instruction copies the source X register into first element of the source vector. -;; Masked versions of this instruction are not supported. -(decl rv_vmv_sx (XReg VState) VReg) -(rule (rv_vmv_sx vs vstate) - (vec_alu_rr (VecAluOpRR.VmvSX) vs (unmasked) vstate)) - -;; Helper for emitting the `vfmv.s.f` instruction. -;; This instruction copies the source F register into first element of the source vector. -;; Masked versions of this instruction are not supported. -(decl rv_vfmv_sf (FReg VState) VReg) -(rule (rv_vfmv_sf vs vstate) - (vec_alu_rr (VecAluOpRR.VfmvSF) vs (unmasked) vstate)) - -;; Helper for emitting the `vmv.v.x` instruction. -;; This instruction splats the X register into all elements of the destination vector. -;; Masked versions of this instruction are called `vmerge` -(decl rv_vmv_vx (XReg VState) VReg) -(rule (rv_vmv_vx vs vstate) - (vec_alu_rr (VecAluOpRR.VmvVX) vs (unmasked) vstate)) - -;; Helper for emitting the `vfmv.v.f` instruction. -;; This instruction splats the F register into all elements of the destination vector. -;; Masked versions of this instruction are called `vmerge` -(decl rv_vfmv_vf (FReg VState) VReg) -(rule (rv_vfmv_vf vs vstate) - (vec_alu_rr (VecAluOpRR.VfmvVF) vs (unmasked) vstate)) - -;; Helper for emitting the `vmv.v.i` instruction. -;; This instruction splat's the immediate value into all elements of the destination vector. -;; Masked versions of this instruction are called `vmerge` -(decl rv_vmv_vi (Imm5 VState) VReg) -(rule (rv_vmv_vi imm vstate) - (vec_alu_r_imm5 (VecAluOpRImm5.VmvVI) imm (unmasked) vstate)) - -;; Helper for emitting the `vmerge.vvm` instruction. -;; This instruction merges the elements of the two source vectors into the destination vector -;; based on a mask. Elements are taken from the first source vector if the mask bit is clear, -;; and from the second source vector if the mask bit is set. This instruction is always masked. -;; -;; vd[i] = v0.mask[i] ? vs1[i] : vs2[i] -(decl rv_vmerge_vvm (VReg VReg VReg VState) VReg) -(rule (rv_vmerge_vvm vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmergeVVM) vs2 vs1 (masked mask) vstate)) - -;; Helper for emitting the `vmerge.vxm` instruction. -;; Elements are taken from the first source vector if the mask bit is clear, and from the X -;; register if the mask bit is set. This instruction is always masked. -;; -;; vd[i] = v0.mask[i] ? x[rs1] : vs2[i] -(decl rv_vmerge_vxm (VReg XReg VReg VState) VReg) -(rule (rv_vmerge_vxm vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmergeVXM) vs2 vs1 (masked mask) vstate)) - -;; Helper for emitting the `vfmerge.vfm` instruction. -;; Elements are taken from the first source vector if the mask bit is clear, and from the F -;; register if the mask bit is set. This instruction is always masked. -;; -;; vd[i] = v0.mask[i] ? f[rs1] : vs2[i] -(decl rv_vfmerge_vfm (VReg FReg VReg VState) VReg) -(rule (rv_vfmerge_vfm vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VfmergeVFM) vs2 vs1 (masked mask) vstate)) - -;; Helper for emitting the `vmerge.vim` instruction. -;; Elements are taken from the first source vector if the mask bit is clear, and from the -;; immediate value if the mask bit is set. This instruction is always masked. -;; -;; vd[i] = v0.mask[i] ? imm : vs2[i] -(decl rv_vmerge_vim (VReg Imm5 VReg VState) VReg) -(rule (rv_vmerge_vim vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate)) - - -;; Helper for emitting the `vredminu.vs` instruction. -;; -;; vd[0] = minu( vs1[0] , vs2[*] ) -(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vredminu_vs vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vredmaxu.vs` instruction. -;; -;; vd[0] = maxu( vs1[0] , vs2[*] ) -(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vredmaxu_vs vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vrgather.vv` instruction. -;; -;; vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; -(decl rv_vrgather_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vrgather_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VrgatherVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vrgather.vx` instruction. -;; -;; vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]] -(decl rv_vrgather_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vrgather_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VrgatherVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vrgather.vi` instruction. -(decl rv_vrgather_vi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vrgather_vi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VrgatherVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vcompress.vm` instruction. -;; -;; The vector compress instruction allows elements selected by a vector mask -;; register from a source vector register group to be packed into contiguous -;; elements at the start of the destination vector register group. -;; -;; The mask register is specified through vs1 -(decl rv_vcompress_vm (VReg VReg VState) VReg) -(rule (rv_vcompress_vm vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate)) - -;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction. -(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmseq_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction. -(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmseq_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction. -(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vmseq_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction. -(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsne_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction. -(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmsne_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction. -(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vmsne_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction. -(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsltu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction. -(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmsltu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction. -(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmslt_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction. -(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmslt_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. -(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsleu_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. -(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmsleu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction. -(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vmsleu_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction. -(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsle_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction. -(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmsle_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction. -(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vmsle_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction. -;; This is an alias for `vmsltu.vv` with the operands inverted. -(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate)) - -;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction. -(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmsgtu_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction. -(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vmsgtu_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction. -;; This is an alias for `vmslt.vv` with the operands inverted. -(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate)) - -;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction. -(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg) -(rule (rv_vmsgt_vx vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction. -(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg) -(rule (rv_vmsgt_vi vs2 imm mask vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction. -;; This is an alias for `vmsleu.vv` with the operands inverted. -(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate)) - -;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction. -;; This is an alias for `vmsle.vv` with the operands inverted. -(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate)) - -;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction. -(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmfeq_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction. -(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vmfeq_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction. -(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmfne_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction. -(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vmfne_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction. -(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmflt_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction. -(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vmflt_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction. -(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmfle_vv vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction. -(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vmfle_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction. -;; This is an alias for `vmflt.vv` with the operands inverted. -(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate)) - -;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction. -(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vmfgt_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction. -;; This is an alias for `vmfle.vv` with the operands inverted. -(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg) -(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate)) - -;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction. -(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg) -(rule (rv_vmfge_vf vs2 vs1 mask vstate) - (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate)) - -;; Helper for emitting the `vzext.vf2` instruction. -;; Zero-extend SEW/2 source to SEW destination -(decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg) -(rule (rv_vzext_vf2 vs mask vstate) - (vec_alu_rr (VecAluOpRR.VzextVF2) vs mask vstate)) - -;; Helper for emitting the `vzext.vf4` instruction. -;; Zero-extend SEW/4 source to SEW destination -(decl rv_vzext_vf4 (VReg VecOpMasking VState) VReg) -(rule (rv_vzext_vf4 vs mask vstate) - (vec_alu_rr (VecAluOpRR.VzextVF4) vs mask vstate)) - -;; Helper for emitting the `vzext.vf8` instruction. -;; Zero-extend SEW/8 source to SEW destination -(decl rv_vzext_vf8 (VReg VecOpMasking VState) VReg) -(rule (rv_vzext_vf8 vs mask vstate) - (vec_alu_rr (VecAluOpRR.VzextVF8) vs mask vstate)) - -;; Helper for emitting the `vsext.vf2` instruction. -;; Sign-extend SEW/2 source to SEW destination -(decl rv_vsext_vf2 (VReg VecOpMasking VState) VReg) -(rule (rv_vsext_vf2 vs mask vstate) - (vec_alu_rr (VecAluOpRR.VsextVF2) vs mask vstate)) - -;; Helper for emitting the `vsext.vf4` instruction. -;; Sign-extend SEW/4 source to SEW destination -(decl rv_vsext_vf4 (VReg VecOpMasking VState) VReg) -(rule (rv_vsext_vf4 vs mask vstate) - (vec_alu_rr (VecAluOpRR.VsextVF4) vs mask vstate)) - -;; Helper for emitting the `vsext.vf8` instruction. -;; Sign-extend SEW/8 source to SEW destination -(decl rv_vsext_vf8 (VReg VecOpMasking VState) VReg) -(rule (rv_vsext_vf8 vs mask vstate) - (vec_alu_rr (VecAluOpRR.VsextVF8) vs mask vstate)) - -;; Helper for emitting the `vnclip.wi` instruction. -;; -;; vd[i] = clip(roundoff_signed(vs2[i], uimm)) -(decl rv_vnclip_wi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vnclip_wi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipWI) vs2 imm mask vstate)) - -;; Helper for emitting the `vnclipu.wi` instruction. -;; -;; vd[i] = clip(roundoff_unsigned(vs2[i], uimm)) -(decl rv_vnclipu_wi (VReg UImm5 VecOpMasking VState) VReg) -(rule (rv_vnclipu_wi vs2 imm mask vstate) - (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate)) - -;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction. -;; -;; vd.mask[i] = vs2.mask[i] && vs1.mask[i] -(decl rv_vmand_mm (VReg VReg VState) VReg) -(rule (rv_vmand_mm vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate)) - -;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction. -;; -;; vd.mask[i] = vs2.mask[i] || vs1.mask[i] -(decl rv_vmor_mm (VReg VReg VState) VReg) -(rule (rv_vmor_mm vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate)) - -;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction. -;; -;; vd.mask[i] = !(vs2.mask[i] && vs1.mask[i]) -(decl rv_vmnand_mm (VReg VReg VState) VReg) -(rule (rv_vmnand_mm vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate)) - -;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction. -;; This is an alias for `vmnand.mm vd, vs, vs` -;; -;; vd.mask[i] = !vs.mask[i] -(decl rv_vmnot_m (VReg VState) VReg) -(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate)) - -;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction. -;; -;; vd.mask[i] = !(vs2.mask[i] || vs1.mask[i]) -(decl rv_vmnor_mm (VReg VReg VState) VReg) -(rule (rv_vmnor_mm vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate)) - -;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl gen_extractlane (Type VReg u8) Reg) - -;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly. -(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0) - (if (ty_vector_float ty)) - (rv_vfmv_fs src ty)) - -;; When extracting lane 0 for integers, we can use `vmv.x.s` directly. -(rule 2 (gen_extractlane (ty_vec_fits_in_register ty) src 0) - (if (ty_vector_not_float ty)) - (rv_vmv_xs src ty)) - -;; In the general case, we must first use a `vslidedown` to place the correct lane -;; in index 0, and then use the appropriate `vmv` instruction. -;; If the index fits into a 5-bit immediate, we can emit a `vslidedown.vi`. -(rule 1 (gen_extractlane (ty_vec_fits_in_register ty) src (uimm5_from_u8 idx)) - (gen_extractlane ty (rv_vslidedown_vi src idx (unmasked) ty) 0)) - -;; Otherwise lower it into an X register. -(rule 0 (gen_extractlane (ty_vec_fits_in_register ty) src idx) - (gen_extractlane ty (rv_vslidedown_vx src (imm $I64 idx) (unmasked) ty) 0)) - - -;; Build a vector mask from a u64 -;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of -;; the other existing `vconst` rules. -(decl gen_vec_mask (u64) VReg) - -;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly. -(rule 1 (gen_vec_mask (imm5_from_u64 imm)) - (rv_vmv_vi imm (vstate_from_type $I64X2))) - -;; Materialize the mask into an X register, and move it into the bottom of -;; the vector register. -(rule 0 (gen_vec_mask mask) - (rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2))) - - -;; Loads a `VCodeConstant` value into a vector register. For some special `VCodeConstant`s -;; we can use a dedicated instruction, otherwise we load the value from the pool. -;; -;; Type is the preferred type to use when loading the constant. -(decl gen_constant (Type VCodeConstant) VReg) - -;; The fallback case is to load the constant from the pool. -(rule (gen_constant ty n) - (vec_load - (element_width_from_type ty) - (VecAMode.UnitStride (gen_const_amode n)) - (mem_flags_trusted) - (unmasked) - ty)) - - -;; Emits a vslidedown instruction that moves half the lanes down. -(decl gen_slidedown_half (Type VReg) VReg) - -;; If the lane count can fit in a 5-bit immediate, we can use `vslidedown.vi`. -(rule 1 (gen_slidedown_half (ty_vec_fits_in_register ty) src) - (if-let (uimm5_from_u64 amt) (u64_udiv (ty_lane_count ty) 2)) - (rv_vslidedown_vi src amt (unmasked) ty)) - -;; Otherwise lower it into an X register. -(rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src) - (if-let amt (u64_udiv (ty_lane_count ty) 2)) - (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty)) - - -;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled -;; lanes are set to all zeros. -(decl gen_expand_mask (Type VReg) VReg) -(rule (gen_expand_mask ty mask) - (if-let zero (i8_to_imm5 0)) - (if-let neg1 (i8_to_imm5 -1)) - (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty)) - - -;; Builds a vector mask corresponding to the IntCC operation. -;; TODO: We are still missing some rules here for immediates. See #6623 -(decl gen_icmp_mask (Type IntCC Value Value) VReg) - -;; IntCC.Equal - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) - (rv_vmseq_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y)) - (rv_vmseq_vx x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y) - (rv_vmseq_vx y x (unmasked) ty)) - -(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) - (if-let y_imm (replicated_imm5 y)) - (rv_vmseq_vi x y_imm (unmasked) ty)) - -(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y) - (if-let x_imm (replicated_imm5 x)) - (rv_vmseq_vi y x_imm (unmasked) ty)) - -;; IntCC.NotEqual - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) - (rv_vmsne_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y)) - (rv_vmsne_vx x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y) - (rv_vmsne_vx y x (unmasked) ty)) - -(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) - (if-let y_imm (replicated_imm5 y)) - (rv_vmsne_vi x y_imm (unmasked) ty)) - -(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y) - (if-let x_imm (replicated_imm5 x)) - (rv_vmsne_vi y x_imm (unmasked) ty)) - -;; IntCC.UnsignedLessThan - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y) - (rv_vmsltu_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y)) - (rv_vmsltu_vx x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (splat x) y) - (rv_vmsgtu_vx y x (unmasked) ty)) - -(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y) - (if-let x_imm (replicated_imm5 x)) - (rv_vmsgtu_vi y x_imm (unmasked) ty)) - -;; IntCC.SignedLessThan - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y) - (rv_vmslt_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y)) - (rv_vmslt_vx x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (splat x) y) - (rv_vmsgt_vx y x (unmasked) ty)) - -(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y) - (if-let x_imm (replicated_imm5 x)) - (rv_vmsgt_vi y x_imm (unmasked) ty)) - -;; IntCC.UnsignedLessThanOrEqual - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y) - (rv_vmsleu_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y)) - (rv_vmsleu_vx x y (unmasked) ty)) - -(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y) - (if-let y_imm (replicated_imm5 y)) - (rv_vmsleu_vi x y_imm (unmasked) ty)) - -;; IntCC.SignedLessThanOrEqual - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y) - (rv_vmsle_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y)) - (rv_vmsle_vx x y (unmasked) ty)) - -(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y) - (if-let y_imm (replicated_imm5 y)) - (rv_vmsle_vi x y_imm (unmasked) ty)) - -;; IntCC.UnsignedGreaterThan - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y) - (rv_vmsgtu_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y)) - (rv_vmsgtu_vx x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) (splat x) y) - (rv_vmsltu_vx y x (unmasked) ty)) - -(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y) - (if-let y_imm (replicated_imm5 y)) - (rv_vmsgtu_vi x y_imm (unmasked) ty)) - -;; IntCC.SignedGreaterThan - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y) - (rv_vmsgt_vv x y (unmasked) ty)) - -(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y)) - (rv_vmsgt_vx x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) (splat x) y) - (rv_vmslt_vx y x (unmasked) ty)) - -(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y) - (if-let y_imm (replicated_imm5 y)) - (rv_vmsgt_vi x y_imm (unmasked) ty)) - -;; IntCC.UnsignedGreaterThanOrEqual - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y) - (rv_vmsgeu_vv x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (splat x) y) - (rv_vmsleu_vx y x (unmasked) ty)) - -(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y) - (if-let x_imm (replicated_imm5 x)) - (rv_vmsleu_vi y x_imm (unmasked) ty)) - -;; IntCC.SignedGreaterThanOrEqual - -(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y) - (rv_vmsge_vv x y (unmasked) ty)) - -(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (splat x) y) - (rv_vmsle_vx y x (unmasked) ty)) - -(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y) - (if-let x_imm (replicated_imm5 x)) - (rv_vmsle_vi y x_imm (unmasked) ty)) - - - -;; Builds a vector mask corresponding to the FloatCC operation. -(decl gen_fcmp_mask (Type FloatCC Value Value) VReg) - -;; FloatCC.Equal - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y) - (rv_vmfeq_vv x y (unmasked) ty)) - -(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y)) - (rv_vmfeq_vf x y (unmasked) ty)) - -(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y) - (rv_vmfeq_vf y x (unmasked) ty)) - -;; FloatCC.NotEqual -;; Note: This is UnorderedNotEqual. It is the only unordered comparison that is not named as such. - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y) - (rv_vmfne_vv x y (unmasked) ty)) - -(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y)) - (rv_vmfne_vf x y (unmasked) ty)) - -(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y) - (rv_vmfne_vf y x (unmasked) ty)) - -;; FloatCC.LessThan - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y) - (rv_vmflt_vv x y (unmasked) ty)) - -(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y)) - (rv_vmflt_vf x y (unmasked) ty)) - -(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y) - (rv_vmfgt_vf y x (unmasked) ty)) - -;; FloatCC.LessThanOrEqual - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y) - (rv_vmfle_vv x y (unmasked) ty)) - -(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y)) - (rv_vmfle_vf x y (unmasked) ty)) - -(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y) - (rv_vmfge_vf y x (unmasked) ty)) - -;; FloatCC.GreaterThan - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y) - (rv_vmfgt_vv x y (unmasked) ty)) - -(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y)) - (rv_vmfgt_vf x y (unmasked) ty)) - -(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y) - (rv_vmflt_vf y x (unmasked) ty)) - -;; FloatCC.GreaterThanOrEqual - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y) - (rv_vmfge_vv x y (unmasked) ty)) - -(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y)) - (rv_vmfge_vf x y (unmasked) ty)) - -(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y) - (rv_vmfle_vf y x (unmasked) ty)) - -;; FloatCC.Ordered - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y) - (rv_vmand_mm - (gen_fcmp_mask ty (FloatCC.Equal) x x) - (gen_fcmp_mask ty (FloatCC.Equal) y y) - ty)) - -;; FloatCC.Unordered - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y) - (rv_vmor_mm - (gen_fcmp_mask ty (FloatCC.NotEqual) x x) - (gen_fcmp_mask ty (FloatCC.NotEqual) y y) - ty)) - -;; FloatCC.OrderedNotEqual - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y) - (rv_vmor_mm - (gen_fcmp_mask ty (FloatCC.LessThan) x y) - (gen_fcmp_mask ty (FloatCC.LessThan) y x) - ty)) - -;; FloatCC.UnorderedOrEqual - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y) - (rv_vmnor_mm - (gen_fcmp_mask ty (FloatCC.LessThan) x y) - (gen_fcmp_mask ty (FloatCC.LessThan) y x) - ty)) - -;; FloatCC.UnorderedOrGreaterThan - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y) - (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty)) - -;; FloatCC.UnorderedOrGreaterThanOrEqual - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y) - (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty)) - -;; FloatCC.UnorderedOrLessThan - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y) - (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty)) - -;; FloatCC.UnorderedOrLessThanOrEqual - -(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y) - (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty)) - - -;; Emits a `vfcvt.x.f.v` instruction with the given rounding mode. -(decl gen_vfcvt_x_f (VReg FRM VState) VReg) - -;; We have a special instruction for RTZ -(rule 1 (gen_vfcvt_x_f x (FRM.RTZ) vstate) - (rv_vfcvt_rtz_x_f_v x (unmasked) vstate)) - -;; In the general case we need to first switch into the appropriate rounding mode. -(rule 0 (gen_vfcvt_x_f x frm vstate) - (let (;; Set the rounding mode and save the current mode - (saved_frm XReg (rv_fsrmi frm)) - (res VReg (rv_vfcvt_x_f_v x (unmasked) vstate)) - ;; Restore the previous rounding mode - (_ Unit (rv_fsrm saved_frm))) - res)) - - -;; Returns the maximum value integer value that can be represented by a float -(decl float_int_max (Type) u64) -(rule (float_int_max $F32) 0x4B000000) -(rule (float_int_max $F64) 0x4330000000000000) - -;; Builds the instruction sequence to round a vector register to FRM -(decl gen_vec_round (VReg FRM Type) VReg) - -;; For floating-point round operations, if the input is NaN, +/-infinity, or +/-0, the -;; same input is returned as the rounded result; this differs from behavior of -;; RISCV fcvt instructions (which round out-of-range values to the nearest -;; max or min value), therefore special handling is needed for these values. -(rule (gen_vec_round x frm (ty_vec_fits_in_register ty)) - (let ((scalar_ty Type (lane_type ty)) - ;; if x is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits - ;; in mantissa, the result is the same as src, build a mask for those cases. - ;; (There is an additional fixup for NaN's at the end) - (abs VReg (rv_vfabs_v x (unmasked) ty)) - (max FReg (imm scalar_ty (float_int_max scalar_ty))) - (exact VReg (rv_vmflt_vf abs max (unmasked) ty)) - - ;; The rounding is performed by converting from float to integer, with the - ;; desired rounding mode. And then converting back with the default rounding - ;; mode. - (int VReg (gen_vfcvt_x_f x frm ty)) - (cvt VReg (rv_vfcvt_f_x_v int (unmasked) ty)) - ;; Copy the sign bit from the original value. - (signed VReg (rv_vfsgnj_vv cvt x (unmasked) ty)) - - ;; We want to return a arithmetic nan if the input is a canonical nan. - ;; Convert them by adding 0.0 to the input. - (float_zero FReg (gen_bitcast (zero_reg) (float_int_of_same_size scalar_ty) scalar_ty)) - (corrected_nan VReg (rv_vfadd_vf x float_zero (unmasked) ty))) - ;; Merge the original value if it does not need rounding, or the rounded value - (rv_vmerge_vvm corrected_nan signed exact ty))) diff --git a/hbcb/src/lib.rs b/hbcb/src/lib.rs deleted file mode 100644 index 6dd7b4a..0000000 --- a/hbcb/src/lib.rs +++ /dev/null @@ -1,197 +0,0 @@ -//! risc-v 64-bit Instruction Set Architecture. - -#![allow(clippy::all)] - -extern crate alloc; - -use { - crate::settings as riscv_settings, - alloc::{boxed::Box, vec::Vec}, - core::fmt, - cranelift_codegen::{ - dominator_tree::DominatorTree, - ir::{self, Function, Type}, - isa::{Builder as IsaBuilder, FunctionAlignment, OwnedTargetIsa, TargetIsa}, - machinst::{ - compile, CompiledCode, CompiledCodeStencil, MachInst, MachTextSectionBuilder, Reg, - SigSet, TextSectionBuilder, VCode, - }, - settings::{self as shared_settings, Flags}, - CodegenError, CodegenResult, - }, - cranelift_control::ControlPlane, - target_lexicon::{Architecture, Triple}, -}; -mod abi; -pub(crate) mod inst; -mod lower; -mod settings; -use self::inst::EmitInfo; - -/// An riscv64 backend. -pub struct Riscv64Backend { - triple: Triple, - flags: shared_settings::Flags, - isa_flags: riscv_settings::Flags, -} - -impl Riscv64Backend { - /// Create a new riscv64 backend with the given (shared) flags. - pub fn new_with_flags( - triple: Triple, - flags: shared_settings::Flags, - isa_flags: riscv_settings::Flags, - ) -> Riscv64Backend { - Riscv64Backend { triple, flags, isa_flags } - } - - /// This performs lowering to VCode, register-allocates the code, computes block layout and - /// finalizes branches. The result is ready for binary emission. - fn compile_vcode( - &self, - func: &Function, - domtree: &DominatorTree, - ctrl_plane: &mut ControlPlane, - ) -> CodegenResult<(VCode, regalloc2::Output)> { - let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone()); - let sigs = SigSet::new::(func, &self.flags)?; - let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::(func, domtree, self, abi, emit_info, sigs, ctrl_plane) - } -} - -impl TargetIsa for Riscv64Backend { - fn compile_function( - &self, - func: &Function, - domtree: &DominatorTree, - want_disasm: bool, - ctrl_plane: &mut ControlPlane, - ) -> CodegenResult { - let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?; - - let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); - let emit_result = vcode.emit(®alloc_result, want_disasm, &self.flags, ctrl_plane); - let frame_size = emit_result.frame_size; - let value_labels_ranges = emit_result.value_labels_ranges; - let buffer = emit_result.buffer; - let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; - let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; - - if let Some(disasm) = emit_result.disasm.as_ref() { - log::debug!("disassembly:\n{}", disasm); - } - - Ok(CompiledCodeStencil { - buffer, - frame_size, - vcode: emit_result.disasm, - value_labels_ranges, - sized_stackslot_offsets, - dynamic_stackslot_offsets, - bb_starts: emit_result.bb_offsets, - bb_edges: emit_result.bb_edges, - }) - } - - fn name(&self) -> &'static str { - "riscv64" - } - - fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 { - 16 - } - - fn triple(&self) -> &Triple { - &self.triple - } - - fn flags(&self) -> &shared_settings::Flags { - &self.flags - } - - fn isa_flags(&self) -> Vec { - self.isa_flags.iter().collect() - } - - fn text_section_builder(&self, num_funcs: usize) -> Box { - Box::new(MachTextSectionBuilder::::new(num_funcs)) - } - - fn function_alignment(&self) -> FunctionAlignment { - inst::Inst::function_alignment() - } - - fn page_size_align_log2(&self) -> u8 { - debug_assert_eq!(1 << 12, 0x1000); - 12 - } - - fn has_native_fma(&self) -> bool { - true - } - - fn has_x86_blendv_lowering(&self, _: Type) -> bool { - false - } - - fn has_x86_pshufb_lowering(&self) -> bool { - false - } - - fn has_x86_pmulhrsw_lowering(&self) -> bool { - false - } - - fn has_x86_pmaddubsw_lowering(&self) -> bool { - false - } -} - -impl fmt::Display for Riscv64Backend { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("MachBackend") - .field("name", &self.name()) - .field("triple", &self.triple()) - .field("flags", &format!("{}", self.flags())) - .finish() - } -} - -/// Create a new `isa::Builder`. -pub fn isa_builder(triple: Triple) -> IsaBuilder { - match triple.architecture { - Architecture::Riscv64(..) => {} - _ => unreachable!(), - } - IsaBuilder::new(triple, riscv_settings::builder(), isa_constructor) -} - -fn isa_constructor( - triple: Triple, - shared_flags: Flags, - builder: &shared_settings::Builder, -) -> CodegenResult { - let isa_flags = riscv_settings::Flags::new(&shared_flags, builder); - - // The RISC-V backend does not work without at least the G extension enabled. - // The G extension is simply a combination of the following extensions: - // - I: Base Integer Instruction Set - // - M: Integer Multiplication and Division - // - A: Atomic Instructions - // - F: Single-Precision Floating-Point - // - D: Double-Precision Floating-Point - // - Zicsr: Control and Status Register Instructions - // - Zifencei: Instruction-Fetch Fence - // - // Ensure that those combination of features is enabled. - if !isa_flags.has_g() { - return Err(CodegenError::Unsupported( - "The RISC-V Backend currently requires all the features in the G Extension enabled" - .into(), - )); - } - - let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags); - Ok(backend.wrapped()) -} diff --git a/hbcb/src/lower.isle b/hbcb/src/lower.isle deleted file mode 100644 index fff894e..0000000 --- a/hbcb/src/lower.isle +++ /dev/null @@ -1,2966 +0,0 @@ -;; riscv64 instruction selection and CLIF-to-MachInst lowering. - -;; The main lowering constructor term: takes a clif `Inst` and returns the -;; register(s) within which the lowered instruction's result values live. -(decl partial lower (Inst) InstOutput) - -;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type ty (iconst (u64_from_imm64 n)))) - (imm ty n)) - -;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type (ty_supported_vec ty) (vconst n))) - (gen_constant ty (const_to_vconst n))) - -;;;; Rules for `f16const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (f16const (u16_from_ieee16 n))) - (imm $F16 n)) - -;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (f32const (u32_from_ieee32 n))) - (imm $F32 n)) - -;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (f64const (u64_from_ieee64 n))) - (imm $F64 n)) - -;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Base case, simply adding things in registers. -(rule -1 (lower (has_type (fits_in_32 (ty_int ty)) (iadd x y))) - (rv_addw x y)) - -(rule 0 (lower (has_type $I64 (iadd x y))) - (rv_add x y)) - -;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y)))) - (alu_rr_imm12 (select_addi ty) x y)) - -(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y))) - (alu_rr_imm12 (select_addi ty) y x)) - -;; Special case when one of the operands is uextended -;; Needs `Zba` -(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32))))) - (if-let $true (has_zba)) - (rv_adduw y x)) - -(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y))) - (if-let $true (has_zba)) - (rv_adduw x y)) - -;; Add with const shift. We have a few of these instructions with `Zba`. -(decl pure partial match_shnadd (Imm64) AluOPRRR) -(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add)) -(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add)) -(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add)) - -(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n)))))) - (if-let $true (has_zba)) - (if-let shnadd (match_shnadd n)) - (alu_rrr shnadd y x)) - -(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y))) - (if-let $true (has_zba)) - (if-let shnadd (match_shnadd n)) - (alu_rrr shnadd x y)) - - -;; Add with uextended const shift. We have a few of these instructions with `Zba`. -;; -;; !!! Important !!! -;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))! -;; Getting this wrong means a potential misscalculation of the shift amount. -;; Additionally we can only ensure that this is correct if the uextend is 32 to 64 bits. -(decl pure partial match_shnadd_uw (Imm64) AluOPRRR) -(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw)) -(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw)) -(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw)) - -(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n)))))) - (if-let $true (has_zba)) - (if-let shnadd_uw (match_shnadd_uw n)) - (alu_rrr shnadd_uw y x)) - -(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y))) - (if-let $true (has_zba)) - (if-let shnadd_uw (match_shnadd_uw n)) - (alu_rrr shnadd_uw x y)) - -;; I128 cases -(rule 7 (lower (has_type $I128 (iadd x y))) - (let ((low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0))) - ;; compute carry. - (carry XReg (rv_sltu low (value_regs_get y 0))) - ;; - (high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1))) - ;; add carry. - (high XReg (rv_add high_tmp carry))) - (value_regs low high))) - -;; SIMD Vectors -(rule 8 (lower (has_type (ty_supported_vec ty) (iadd x y))) - (rv_vadd_vv x y (unmasked) ty)) - -(rule 9 (lower (has_type (ty_supported_vec ty) (iadd x (splat y)))) - (rv_vadd_vx x y (unmasked) ty)) - -(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (splat (sextend y @ (value_type sext_ty)))))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) sext_ty)) - (rv_vwadd_wx x y (unmasked) (vstate_mf2 half_ty))) - -(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (splat (uextend y @ (value_type uext_ty)))))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) uext_ty)) - (rv_vwaddu_wx x y (unmasked) (vstate_mf2 half_ty))) - -(rule 20 (lower (has_type (ty_supported_vec ty) (iadd x y))) - (if-let y_imm (replicated_imm5 y)) - (rv_vadd_vi x y_imm (unmasked) ty)) - - -(rule 12 (lower (has_type (ty_supported_vec ty) (iadd (splat x) y))) - (rv_vadd_vx y x (unmasked) ty)) - -(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (splat (sextend x @ (value_type sext_ty))) y))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) sext_ty)) - (rv_vwadd_wx y x (unmasked) (vstate_mf2 half_ty))) - -(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (splat (uextend x @ (value_type uext_ty))) y))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) uext_ty)) - (rv_vwaddu_wx y x (unmasked) (vstate_mf2 half_ty))) - -(rule 21 (lower (has_type (ty_supported_vec ty) (iadd x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vadd_vi y x_imm (unmasked) ty)) - -;; Signed Widening Low Additions - -(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (swiden_low y @ (value_type in_ty))))) - (rv_vwadd_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty)) y))) - (rv_vwadd_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty)) - (swiden_low y)))) - (rv_vwadd_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty)) - (splat (sextend y @ (value_type sext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (sextend x @ (value_type sext_ty))) - (swiden_low y @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Signed Widening High Additions -;; These are the same as the low additions, but we first slide down the inputs. - -(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (swiden_high y @ (value_type in_ty))))) - (rv_vwadd_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty)) y))) - (rv_vwadd_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty)) - (swiden_high y)))) - (rv_vwadd_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty)) - (splat (sextend y @ (value_type sext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (sextend x @ (value_type sext_ty))) - (swiden_high y @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwadd_vx (gen_slidedown_half in_ty y) x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening Low Additions - -(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (uwiden_low y @ (value_type in_ty))))) - (rv_vwaddu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty)) y))) - (rv_vwaddu_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty)) - (uwiden_low y)))) - (rv_vwaddu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty)) - (splat (uextend y @ (value_type uext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (uextend x @ (value_type uext_ty))) - (uwiden_low y @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening High Additions -;; These are the same as the low additions, but we first slide down the inputs. - -(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (uwiden_high y @ (value_type in_ty))))) - (rv_vwaddu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 12 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty)) y))) - (rv_vwaddu_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty)) - (uwiden_high y)))) - (rv_vwaddu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty)) - (splat (uextend y @ (value_type uext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (uextend y @ (value_type uext_ty))) - (uwiden_high x @ (value_type in_ty))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Signed Widening Mixed High/Low Additions - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty)) - (swiden_high y)))) - (rv_vwadd_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty)) - (swiden_low y)))) - (rv_vwadd_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening Mixed High/Low Additions - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty)) - (uwiden_high y)))) - (rv_vwaddu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty)) - (uwiden_low y)))) - (rv_vwaddu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Fused Multiply Accumulate Rules `vmacc` -;; -;; I dont think we can use `vmadd`/`vmnsub` here since it just modifies the multiplication -;; register instead of the addition one. The actual pattern matched seems to be -;; exactly the same. - -(rule 9 (lower (has_type (ty_supported_vec ty) (iadd x (imul y z)))) - (rv_vmacc_vv x y z (unmasked) ty)) - -(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (imul y (splat z))))) - (rv_vmacc_vx x y z (unmasked) ty)) - -(rule 11 (lower (has_type (ty_supported_vec ty) (iadd x (imul (splat y) z)))) - (rv_vmacc_vx x z y (unmasked) ty)) - -(rule 12 (lower (has_type (ty_supported_vec ty) (iadd (imul x y) z))) - (rv_vmacc_vv z x y (unmasked) ty)) - -(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (imul x (splat y)) z))) - (rv_vmacc_vx z x y (unmasked) ty)) - -(rule 14 (lower (has_type (ty_supported_vec ty) (iadd (imul (splat x) y) z))) - (rv_vmacc_vx z y x (unmasked) ty)) - -;; Fused Multiply Subtract Rules `vnmsac` - -(rule 9 (lower (has_type (ty_supported_vec ty) (iadd x (ineg (imul y z))))) - (rv_vnmsac_vv x y z (unmasked) ty)) - -(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (ineg (imul y (splat z)))))) - (rv_vnmsac_vx x y z (unmasked) ty)) - -(rule 11 (lower (has_type (ty_supported_vec ty) (iadd x (ineg (imul (splat y) z))))) - (rv_vnmsac_vx x z y (unmasked) ty)) - -(rule 12 (lower (has_type (ty_supported_vec ty) (iadd (ineg (imul x y)) z))) - (rv_vnmsac_vv z x y (unmasked) ty)) - -(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (ineg (imul x (splat y))) z))) - (rv_vnmsac_vx z x y (unmasked) ty)) - -(rule 14 (lower (has_type (ty_supported_vec ty) (iadd (ineg (imul (splat x) y)) z))) - (rv_vnmsac_vx z y x (unmasked) ty)) - -;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; -(rule 0 (lower (has_type (fits_in_32 ty) (uadd_overflow_trap x y tc))) - (let ((tmp_x XReg (zext x)) - (tmp_y XReg (zext y)) - (sum XReg (rv_add tmp_x tmp_y)) - (test XReg (rv_srli sum (imm12_const (ty_bits ty)))) - (_ InstOutput (gen_trapnz test tc))) - sum)) - -(rule 1 (lower (has_type $I64 (uadd_overflow_trap x y tc))) - (let ((tmp XReg (rv_add x y)) - (_ InstOutput (gen_trapif (IntCC.UnsignedLessThan) tmp x tc))) - tmp)) - -;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Base case, simply subtracting things in registers. - -(rule 0 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y))) - (rv_subw x y)) - -(rule 1 (lower (has_type $I64 (isub x y))) - (rv_sub x y)) - -(rule 2 (lower (has_type $I128 (isub x y))) - (i128_sub x y)) - -;; Switch to an `addi` by a negative if we can fit the value in an `imm12`. -(rule 3 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y))) - (if-let imm12_neg (imm12_from_negated_value y)) - (alu_rr_imm12 (select_addi ty) x imm12_neg)) - -;; SIMD Vectors -(rule 4 (lower (has_type (ty_supported_vec ty) (isub x y))) - (rv_vsub_vv x y (unmasked) ty)) - -(rule 5 (lower (has_type (ty_supported_vec ty) (isub x (splat y)))) - (rv_vsub_vx x y (unmasked) ty)) - -(rule 6 (lower (has_type (ty_supported_vec ty) (isub x (splat (sextend y @ (value_type sext_ty)))))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) sext_ty)) - (rv_vwsub_wx x y (unmasked) (vstate_mf2 half_ty))) - -(rule 6 (lower (has_type (ty_supported_vec ty) (isub x (splat (uextend y @ (value_type uext_ty)))))) - (if-let half_ty (ty_half_width ty)) - (if-let $true (ty_equal (lane_type half_ty) uext_ty)) - (rv_vwsubu_wx x y (unmasked) (vstate_mf2 half_ty))) - -(rule 7 (lower (has_type (ty_supported_vec ty) (isub (splat x) y))) - (rv_vrsub_vx y x (unmasked) ty)) - -(rule 8 (lower (has_type (ty_supported_vec ty) (isub x y))) - (if-let imm5_neg (negated_replicated_imm5 y)) - (rv_vadd_vi x imm5_neg (unmasked) ty)) - -(rule 9 (lower (has_type (ty_supported_vec ty) (isub x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vrsub_vi y x_imm (unmasked) ty)) - - -;; Signed Widening Low Subtractions - -(rule 6 (lower (has_type (ty_supported_vec _) (isub x (swiden_low y @ (value_type in_ty))))) - (rv_vwsub_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_low x @ (value_type in_ty)) - (swiden_low y)))) - (rv_vwsub_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_low x @ (value_type in_ty)) - (splat (sextend y @ (value_type sext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwsub_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Signed Widening High Subtractions -;; These are the same as the low widenings, but we first slide down the inputs. - -(rule 6 (lower (has_type (ty_supported_vec _) (isub x (swiden_high y @ (value_type in_ty))))) - (rv_vwsub_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_high x @ (value_type in_ty)) - (swiden_high y)))) - (rv_vwsub_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_high x @ (value_type in_ty)) - (splat (sextend y @ (value_type sext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) sext_ty)) - (rv_vwsub_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening Low Subtractions - -(rule 6 (lower (has_type (ty_supported_vec _) (isub x (uwiden_low y @ (value_type in_ty))))) - (rv_vwsubu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_low x @ (value_type in_ty)) - (uwiden_low y)))) - (rv_vwsubu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_low x @ (value_type in_ty)) - (splat (uextend y @ (value_type uext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwsubu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening High Subtractions -;; These are the same as the low widenings, but we first slide down the inputs. - -(rule 6 (lower (has_type (ty_supported_vec _) (isub x (uwiden_high y @ (value_type in_ty))))) - (rv_vwsubu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_high x @ (value_type in_ty)) - (uwiden_high y)))) - (rv_vwsubu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_high x @ (value_type in_ty)) - (splat (uextend y @ (value_type uext_ty)))))) - (if-let $true (ty_equal (lane_type in_ty) uext_ty)) - (rv_vwsubu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Signed Widening Mixed High/Low Subtractions - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_low x @ (value_type in_ty)) - (swiden_high y)))) - (rv_vwsub_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_high x @ (value_type in_ty)) - (swiden_low y)))) - (rv_vwsub_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -;; Unsigned Widening Mixed High/Low Subtractions - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_low x @ (value_type in_ty)) - (uwiden_high y)))) - (rv_vwsubu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - -(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_high x @ (value_type in_ty)) - (uwiden_low y)))) - (rv_vwsubu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty)))) - - -;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type (ty_int ty) (ineg val))) - (neg ty val)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (ineg x))) - (rv_vneg_v x (unmasked) ty)) - - -;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y))) - (rv_mul x y)) - -(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y))) - (rv_mulw x y)) - -;; for I128 -(rule 2 (lower (has_type $I128 (imul x y))) - (let - ((x_regs ValueRegs x) - (x_lo XReg (value_regs_get x_regs 0)) - (x_hi XReg (value_regs_get x_regs 1)) - - ;; Get the high/low registers for `y`. - (y_regs ValueRegs y) - (y_lo XReg (value_regs_get y_regs 0)) - (y_hi XReg (value_regs_get y_regs 1)) - - ;; 128bit mul formula: - ;; dst_lo = x_lo * y_lo - ;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) - ;; - ;; We can convert the above formula into the following - ;; mulhu dst_hi, x_lo, y_lo - ;; madd dst_hi, x_lo, y_hi, dst_hi - ;; madd dst_hi, x_hi, y_lo, dst_hi - ;; madd dst_lo, x_lo, y_lo, zero - (dst_hi1 XReg (rv_mulhu x_lo y_lo)) - (dst_hi2 XReg (madd x_lo y_hi dst_hi1)) - (dst_hi XReg (madd x_hi y_lo dst_hi2)) - (dst_lo XReg (madd x_lo y_lo (zero_reg)))) - (value_regs dst_lo dst_hi))) - -;; Special case 128-bit multiplication where the operands are extended since -;; that maps directly to the `mulhu` and `mulh` instructions. -(rule 6 (lower (has_type $I128 (imul (uextend x) (uextend y)))) - (let ((x XReg (zext x)) - (y XReg (zext y))) - (value_regs (rv_mul x y) (rv_mulhu x y)))) - -(rule 6 (lower (has_type $I128 (imul (sextend x) (sextend y)))) - (let ((x XReg (sext x)) - (y XReg (sext y))) - (value_regs (rv_mul x y) (rv_mulh x y)))) - -;; Vector multiplication - -(rule 3 (lower (has_type (ty_supported_vec ty) (imul x y))) - (rv_vmul_vv x y (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (imul (splat x) y))) - (rv_vmul_vx y x (unmasked) ty)) - -(rule 5 (lower (has_type (ty_supported_vec ty) (imul x (splat y)))) - (rv_vmul_vx x y (unmasked) ty)) - -;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y))) - (lower_smlhi ty (sext x) (sext y))) - -(rule 1 (lower (has_type (ty_supported_vec ty) (smulhi x y))) - (rv_vmulh_vv x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (smulhi (splat x) y))) - (rv_vmulh_vx y x (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (smulhi x (splat y)))) - (rv_vmulh_vx x y (unmasked) ty)) - -;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (fits_in_32 ty) (umulhi x y))) - (let ((tmp XReg (rv_mul (zext x) (zext y)))) - (rv_srli tmp (imm12_const (ty_bits ty))))) - -(rule 1 (lower (has_type $I64 (umulhi x y))) - (rv_mulhu x y)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (umulhi x y))) - (rv_vmulhu_vv x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (umulhi (splat x) y))) - (rv_vmulhu_vx y x (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (umulhi x (splat y)))) - (rv_vmulhu_vx x y (unmasked) ty)) - -;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_16 ty) (udiv x y))) - (if-let $true (has_m)) - (rv_divuw (zext x) (nonzero_divisor (zext y)))) - -(rule 1 (lower (has_type (fits_in_16 ty) (udiv x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 ty imm)) - (rv_divuw (zext x) (zext y))) - -(rule 2 (lower (has_type $I32 (udiv x y))) - (if-let $true (has_m)) - (rv_divuw x (nonzero_divisor (zext y)))) - -(rule 3 (lower (has_type $I32 (udiv x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I32 imm)) - (rv_divuw x y)) - -(rule 2 (lower (has_type $I64 (udiv x y))) - (if-let $true (has_m)) - (rv_divu x (nonzero_divisor y))) - -(rule 3 (lower (has_type $I64 (udiv x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I64 imm)) - (rv_divu x y)) - -;; Traps if the input register is zero, otherwise returns the same register. -(decl nonzero_divisor (XReg) XReg) -(rule (nonzero_divisor val) - (let ((_ InstOutput (gen_trapif (IntCC.Equal) val (zero_reg) (TrapCode.IntegerDivisionByZero)))) - val)) - -;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_16 ty) (sdiv x y))) - (if-let $true (has_m)) - (let ((x XReg (sext x))) - (rv_divw x (safe_sdiv_divisor ty x (sext y))))) - -(rule 1 (lower (has_type (fits_in_16 ty) (sdiv x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 ty imm)) - (rv_divw (sext x) (sext y))) - -(rule 2 (lower (has_type $I32 (sdiv x y))) - (if-let $true (has_m)) - (let ((x XReg (sext x))) - (rv_divw x (safe_sdiv_divisor $I32 x (sext y))))) - -(rule 3 (lower (has_type $I32 (sdiv x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I32 imm)) - (rv_divw x y)) - -(rule 2 (lower (has_type $I64 (sdiv x y))) - (if-let $true (has_m)) - (rv_div x (safe_sdiv_divisor $I64 x y))) - -(rule 3 (lower (has_type $I64 (sdiv x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I64 imm)) - (rv_div x y)) - -;; Check for two trapping conditions: -;; -;; * the divisor is 0, or... -;; * the divisor is -1 and the dividend is $ty::MIN -(decl safe_sdiv_divisor (Type XReg XReg) XReg) -(rule (safe_sdiv_divisor ty x y) - (let ( - (y XReg (nonzero_divisor y)) - (min XReg (imm $I64 (u64_shl 0xffffffff_ffffffff (u64_sub (ty_bits ty) 1)))) - (x_is_not_min XReg (rv_xor x min)) - (y_is_not_neg_one XReg (rv_not y)) - (no_int_overflow XReg (rv_or x_is_not_min y_is_not_neg_one)) - (_ InstOutput (gen_trapif - (IntCC.Equal) - no_int_overflow (zero_reg) - (TrapCode.IntegerOverflow)))) - y)) - -;;;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_16 ty) (urem x y))) - (if-let $true (has_m)) - (rv_remuw (zext x) (nonzero_divisor (zext y)))) - -(rule 1 (lower (has_type (fits_in_16 ty) (urem x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 ty imm)) - (rv_remuw (zext x) (zext y))) - -(rule 2 (lower (has_type $I32 (urem x y))) - (if-let $true (has_m)) - (rv_remuw x (nonzero_divisor (zext y)))) - -(rule 3 (lower (has_type $I32 (urem x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I32 imm)) - (rv_remuw x y)) - -(rule 2 (lower (has_type $I64 (urem x y))) - (if-let $true (has_m)) - (rv_remu x (nonzero_divisor y))) - -(rule 3 (lower (has_type $I64 (urem x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I64 imm)) - (rv_remu x y)) - -;;;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_16 ty) (srem x y))) - (if-let $true (has_m)) - (rv_remw (sext x) (nonzero_divisor (sext y)))) - -(rule 1 (lower (has_type (fits_in_16 ty) (srem x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 ty imm)) - (rv_remw (sext x) (sext y))) - -(rule 2 (lower (has_type $I32 (srem x y))) - (if-let $true (has_m)) - (rv_remw x (nonzero_divisor (sext y)))) - -(rule 3 (lower (has_type $I32 (srem x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I32 imm)) - (rv_remw x y)) - -(rule 2 (lower (has_type $I64 (srem x y))) - (if-let $true (has_m)) - (rv_rem x (nonzero_divisor y))) - -(rule 3 (lower (has_type $I64 (srem x y @ (iconst imm)))) - (if-let $true (has_m)) - (if (safe_divisor_from_imm64 $I64 imm)) - (rv_rem x y)) - -;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_64 ty) (band x y))) - (rv_and x y)) - -(rule 0 (lower (has_type $I128 (band x y))) - (value_regs - (rv_and (value_regs_get x 0) (value_regs_get y 0)) - (rv_and (value_regs_get x 1) (value_regs_get y 1)))) - -;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y)))) - (rv_andi x y)) - -(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y))) - (rv_andi y x)) - -(rule 3 (lower (has_type (ty_supported_float ty) (band x y))) - (lower_float_binary (AluOPRRR.And) x y ty)) - -;; Specialized lowerings for `(band x (bnot y))` which is additionally produced -;; by Cranelift's `band_not` instruction that is legalized into the simpler -;; forms early on. - -(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y)))) - (if-let $true (has_zbb)) - (rv_andn x y)) - -(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x))) - (if-let $true (has_zbb)) - (rv_andn x y)) - -(rule 6 (lower (has_type $I128 (band x (bnot y)))) - (if-let $true (has_zbb)) - (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0))) - (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high))) - -(rule 7 (lower (has_type $I128 (band (bnot y) x))) - (if-let $true (has_zbb)) - (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0))) - (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high))) - -(rule 8 (lower (has_type (ty_supported_vec ty) (band x y))) - (rv_vand_vv x y (unmasked) ty)) - -(rule 9 (lower (has_type (ty_supported_vec ty) (band x (splat y)))) - (if (ty_vector_not_float ty)) - (rv_vand_vx x y (unmasked) ty)) - -(rule 10 (lower (has_type (ty_supported_vec ty) (band (splat x) y))) - (if (ty_vector_not_float ty)) - (rv_vand_vx y x (unmasked) ty)) - -(rule 11 (lower (has_type (ty_supported_vec ty) (band x y))) - (if-let y_imm (replicated_imm5 y)) - (rv_vand_vi x y_imm (unmasked) ty)) - -(rule 12 (lower (has_type (ty_supported_vec ty) (band x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vand_vi y x_imm (unmasked) ty)) - -;; `bclr{,i}` specializations from `zbs` - -(rule 13 (lower (has_type (fits_in_32 ty) (band x (bnot (ishl (i64_from_iconst 1) y))))) - (if-let $true (has_zbs)) - (rv_bclr x (rv_andi y (imm12_const (u8_sub (ty_bits ty) 1))))) -(rule 14 (lower (has_type (fits_in_32 ty) (band (bnot (ishl (i64_from_iconst 1) y)) x))) - (if-let $true (has_zbs)) - (rv_bclr x (rv_andi y (imm12_const (u8_sub (ty_bits ty) 1))))) - -(rule 15 (lower (has_type $I64 (band x (bnot (ishl (i64_from_iconst 1) y))))) - (if-let $true (has_zbs)) - (rv_bclr x y)) -(rule 16 (lower (has_type $I64 (band (bnot (ishl (i64_from_iconst 1) y)) x))) - (if-let $true (has_zbs)) - (rv_bclr x y)) - -(rule 17 (lower (has_type (fits_in_64 ty) (band x (u64_from_iconst n)))) - (if-let $true (has_zbs)) - (if-let imm (bclr_imm ty n)) - (rv_bclri x imm)) -(rule 18 (lower (has_type (fits_in_64 ty) (band (u64_from_iconst n) x))) - (if-let $true (has_zbs)) - (if-let imm (bclr_imm ty n)) - (rv_bclri x imm)) - -(decl pure partial bclr_imm (Type u64) Imm12) -(extern constructor bclr_imm bclr_imm) - -;; `bext{,i}` specializations from `zbs` - -(rule 19 (lower (has_type $I32 (band (ushr x y) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bext x (rv_andi y (imm12_const 31)))) -(rule 19 (lower (has_type $I32 (band (sshr x y) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bext x (rv_andi y (imm12_const 31)))) -(rule 19 (lower (has_type $I32 (band (u64_from_iconst 1) (ushr x y)))) - (if-let $true (has_zbs)) - (rv_bext x (rv_andi y (imm12_const 31)))) -(rule 19 (lower (has_type $I32 (band (u64_from_iconst 1) (sshr x y)))) - (if-let $true (has_zbs)) - (rv_bext x (rv_andi y (imm12_const 31)))) - -(rule 19 (lower (has_type $I64 (band (ushr x y) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bext x y)) -(rule 19 (lower (has_type $I64 (band (sshr x y) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bext x y)) -(rule 19 (lower (has_type $I64 (band (u64_from_iconst 1) (ushr x y)))) - (if-let $true (has_zbs)) - (rv_bext x y)) -(rule 19 (lower (has_type $I64 (band (u64_from_iconst 1) (sshr x y)))) - (if-let $true (has_zbs)) - (rv_bext x y)) - -(rule 20 (lower (has_type $I32 (band (ushr x (imm12_from_value y)) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bexti x (imm12_and y 31))) -(rule 20 (lower (has_type $I32 (band (sshr x (imm12_from_value y)) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bexti x (imm12_and y 31))) -(rule 20 (lower (has_type $I64 (band (ushr x (imm12_from_value y)) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bexti x (imm12_and y 63))) -(rule 20 (lower (has_type $I64 (band (sshr x (imm12_from_value y)) (u64_from_iconst 1)))) - (if-let $true (has_zbs)) - (rv_bexti x (imm12_and y 63))) - -;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_int ty) (bor x y))) - (gen_or ty x y)) - -;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y)))) - (rv_ori x y)) - -(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y))) - (rv_ori y x)) - -(rule 3 (lower (has_type (ty_supported_float ty) (bor x y))) - (lower_float_binary (AluOPRRR.Or) x y ty)) - -;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced -;; by Cranelift's `bor_not` instruction that is legalized into the simpler -;; forms early on. - -(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y)))) - (if-let $true (has_zbb)) - (rv_orn x y)) - -(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x))) - (if-let $true (has_zbb)) - (rv_orn x y)) - -(rule 6 (lower (has_type $I128 (bor x (bnot y)))) - (if-let $true (has_zbb)) - (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0))) - (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high))) - -(rule 7 (lower (has_type $I128 (bor (bnot y) x))) - (if-let $true (has_zbb)) - (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0))) - (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1)))) - (value_regs low high))) - -(rule 8 (lower (has_type (ty_supported_vec ty) (bor x y))) - (rv_vor_vv x y (unmasked) ty)) - -(rule 9 (lower (has_type (ty_supported_vec ty) (bor x (splat y)))) - (if (ty_vector_not_float ty)) - (rv_vor_vx x y (unmasked) ty)) - -(rule 10 (lower (has_type (ty_supported_vec ty) (bor (splat x) y))) - (if (ty_vector_not_float ty)) - (rv_vor_vx y x (unmasked) ty)) - -(rule 11 (lower (has_type (ty_supported_vec ty) (bor x y))) - (if-let y_imm (replicated_imm5 y)) - (rv_vor_vi x y_imm (unmasked) ty)) - -(rule 12 (lower (has_type (ty_supported_vec ty) (bor x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vor_vi y x_imm (unmasked) ty)) - -;; `bset{,i}` specializations from `zbs` - -(rule 13 (lower (has_type $I32 (bor x (ishl (i64_from_iconst 1) y)))) - (if-let $true (has_zbs)) - (rv_bset x (rv_andi y (imm12_const 31)))) -(rule 14 (lower (has_type $I32 (bor (ishl (i64_from_iconst 1) y) x))) - (if-let $true (has_zbs)) - (rv_bset x (rv_andi y (imm12_const 31)))) - -(rule 13 (lower (has_type $I64 (bor x (ishl (i64_from_iconst 1) y)))) - (if-let $true (has_zbs)) - (rv_bset x y)) -(rule 14 (lower (has_type $I64 (bor (ishl (i64_from_iconst 1) y) x))) - (if-let $true (has_zbs)) - (rv_bset x y)) - -(rule 15 (lower (has_type (fits_in_64 _) (bor x (u64_from_iconst n)))) - (if-let $true (has_zbs)) - (if-let imm (bseti_imm n)) - (rv_bseti x imm)) -(rule 16 (lower (has_type (fits_in_64 _) (bor (u64_from_iconst n) x))) - (if-let $true (has_zbs)) - (if-let imm (bseti_imm n)) - (rv_bseti x imm)) - -(decl pure partial bseti_imm (u64) Imm12) -(extern constructor bseti_imm bseti_imm) - -;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y))) - (rv_xor x y)) - -;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y)))) - (rv_xori x y)) - -(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y))) - (rv_xori y x)) - -(rule 3 (lower (has_type $I128 (bxor x y))) - (lower_b128_binary (AluOPRRR.Xor) x y)) - -(rule 4 (lower (has_type (ty_supported_float ty) (bxor x y))) - (lower_float_binary (AluOPRRR.Xor) x y ty)) - -(rule 5 (lower (has_type (ty_supported_vec ty) (bxor x y))) - (rv_vxor_vv x y (unmasked) ty)) - -(rule 6 (lower (has_type (ty_supported_vec ty) (bxor x (splat y)))) - (if (ty_vector_not_float ty)) - (rv_vxor_vx x y (unmasked) ty)) - -(rule 7 (lower (has_type (ty_supported_vec ty) (bxor (splat x) y))) - (if (ty_vector_not_float ty)) - (rv_vxor_vx y x (unmasked) ty)) - -(rule 8 (lower (has_type (ty_supported_vec ty) (bxor x y))) - (if-let y_imm (replicated_imm5 y)) - (rv_vxor_vi x y_imm (unmasked) ty)) - -(rule 9 (lower (has_type (ty_supported_vec ty) (bxor x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vxor_vi y x_imm (unmasked) ty)) - -;; `binv{,i}` specializations from `zbs` - -(rule 13 (lower (has_type $I32 (bxor x (ishl (i64_from_iconst 1) y)))) - (if-let $true (has_zbs)) - (rv_binv x (rv_andi y (imm12_const 31)))) -(rule 14 (lower (has_type $I32 (bxor (ishl (i64_from_iconst 1) y) x))) - (if-let $true (has_zbs)) - (rv_binv x (rv_andi y (imm12_const 31)))) - -(rule 13 (lower (has_type $I64 (bxor x (ishl (i64_from_iconst 1) y)))) - (if-let $true (has_zbs)) - (rv_binv x y)) -(rule 14 (lower (has_type $I64 (bxor (ishl (i64_from_iconst 1) y) x))) - (if-let $true (has_zbs)) - (rv_binv x y)) - -(rule 15 (lower (has_type (fits_in_64 _) (bxor x (u64_from_iconst n)))) - (if-let $true (has_zbs)) - (if-let imm (binvi_imm n)) - (rv_binvi x imm)) -(rule 16 (lower (has_type (fits_in_64 _) (bxor (u64_from_iconst n) x))) - (if-let $true (has_zbs)) - (if-let imm (binvi_imm n)) - (rv_binvi x imm)) - -(decl pure partial binvi_imm (u64) Imm12) -(extern constructor binvi_imm binvi_imm) - -;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_int_ref_scalar_64 _) (bnot x))) - (rv_not x)) - -(rule 1 (lower (has_type (ty_supported_float ty) (bnot x))) - (move_x_to_f (rv_not (move_f_to_x x ty)) (float_int_of_same_size ty))) - -(rule 2 (lower (has_type $I128 (bnot x))) - (value_regs - (rv_not (value_regs_get x 0)) - (rv_not (value_regs_get x 1)))) - -(rule 3 (lower (has_type (ty_supported_vec ty) (bnot x))) - (rv_vnot_v x (unmasked) ty)) - -(rule 4 (lower (has_type (ty_int_ref_scalar_64 _) (bnot (bxor x y)))) - (if-let $true (has_zbb)) - (rv_xnor x y)) - -;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitrev x))) - (gen_bitrev ty x)) - -(rule 1 (lower (has_type $I128 (bitrev x))) - (value_regs - (gen_bitrev $I64 (value_regs_get x 1)) - (gen_bitrev $I64 (value_regs_get x 0)))) - - -;; Constructs a sequence of instructions that reverse all bits in `x` up to -;; the given type width. -(decl gen_bitrev (Type XReg) XReg) - -(rule 0 (gen_bitrev (ty_16_or_32 (ty_int ty)) x) - (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) - (rv_srli (gen_bitrev $I64 x) shift_amt)) - -(rule 1 (gen_bitrev $I8 x) - (gen_brev8 x $I8)) - -(rule 1 (gen_bitrev $I64 x) - (gen_brev8 (gen_bswap $I64 x) $I64)) - - -;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bswap x))) - (gen_bswap ty x)) - -(rule 2 (lower (has_type $I128 (bswap x))) - (value_regs - (gen_bswap $I64 (value_regs_get x 1)) - (gen_bswap $I64 (value_regs_get x 0)))) - -;; Builds a sequence of instructions that swaps the bytes in `x` up to the given -;; type width. -(decl gen_bswap (Type XReg) XReg) - -;; This is only here to make the rule below work. bswap.i8 isn't valid -(rule 0 (gen_bswap $I8 x) x) -(rule 1 (gen_bswap (ty_int_ref_16_to_64 ty) x) - (if-let half_ty (ty_half_width ty)) - (if-let half_size (u64_to_imm12 (ty_bits half_ty))) - (let (;; This swaps the top bytes and zeroes the bottom bytes, so that - ;; we can or it with the bottom bytes later. - (swap_top XReg (gen_bswap half_ty x)) - (top XReg (rv_slli swap_top half_size)) - - ;; Get the top half, swap it, and zero extend it so we can `or` it - ;; with the bottom half. Note that zero extension here already knows - ;; that `zbb` isn't available and that `half_ty` is not `$I64`, so this - ;; falls back to the shift-then-shift sequence. - (shifted XReg (rv_srli x half_size)) - (swap_bot XReg (gen_bswap half_ty shifted)) - (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits half_ty)))) - (bot_shifted_left XReg (rv_slli swap_bot shift)) - (bot XReg (rv_srli bot_shifted_left shift))) - (rv_or top bot))) - -(rule 2 (gen_bswap (ty_16_or_32 (ty_int ty)) x) - (if-let $true (has_zbb)) - (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty)))) - (rv_srli (rv_rev8 x) shift_amt)) - -(rule 3 (gen_bswap $I64 x) - (if-let $true (has_zbb)) - (rv_rev8 x)) - -;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (fits_in_64 ty) (ctz x))) - (lower_ctz ty x)) - -(rule 1 (lower (has_type $I128 (ctz x))) - (let ((x_lo XReg (value_regs_get x 0)) - (x_hi XReg (value_regs_get x 1)) - ;; Count both halves - (high XReg (lower_ctz $I64 x_hi)) - (low XReg (lower_ctz $I64 x_lo)) - ;; Only add the top half if the bottom is zero - (high XReg (gen_select_xreg (cmp_eqz x_lo) high (zero_reg))) - (result XReg (rv_add low high))) - (value_regs result (imm $I64 0)))) - -;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (fits_in_64 ty) (clz x))) - (gen_cltz $true x ty)) - -(rule 1 (lower (has_type $I128 (clz x))) - (let ((x_lo XReg (value_regs_get x 0)) - (x_hi XReg (value_regs_get x 1)) - ;; Count both halves - (high XReg (gen_clz x_hi)) - (low XReg (gen_clz x_lo)) - ;; Only add the bottom zeros if the top half is zero - (low XReg (gen_select_xreg (cmp_eqz x_hi) low (zero_reg)))) - (value_regs (rv_add high low) (imm $I64 0)))) - -(rule 2 (lower (has_type (fits_in_16 ty) (clz x))) - (if-let $true (has_zbb)) - (let ((tmp XReg (zext x)) - (count XReg (rv_clz tmp))) - ;; We always do the operation on the full 64-bit register, so subtract 64 from the result. - (rv_addi count (imm12_const_add (ty_bits ty) -64)))) - -(rule 3 (lower (has_type $I32 (clz x))) - (if-let $true (has_zbb)) - (rv_clzw x)) - -(rule 3 (lower (has_type $I64 (clz x))) - (if-let $true (has_zbb)) - (rv_clz x)) - -(decl gen_clz (XReg) XReg) -(rule 0 (gen_clz rs) - (gen_cltz $true rs $I64)) -(rule 1 (gen_clz rs) - (if-let $true (has_zbb)) - (rv_clz rs)) - -;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type (fits_in_64 ty) (cls x))) - (let ((tmp XReg (sext x)) - (tmp2 XReg (gen_select_xreg (cmp_ltz tmp) (rv_not tmp) tmp)) - (tmp3 XReg (gen_clz tmp2))) - ;; clz counted the full register width, so subtract (64-$width), and then - ;; additionally subtract one more, meaning here -65+width is added. - (rv_addi tmp3 (imm12_const_add (ty_bits ty) -65)))) - -;; If the sign bit is set, we count the leading zeros of the inverted value. -;; Otherwise we can just count the leading zeros of the original value. -;; Subtract 1 since the sign bit does not count. -(rule 1 (lower (has_type $I128 (cls x))) - (let ((low XReg (value_regs_get x 0)) - (high XReg (value_regs_get x 1)) - (low XReg (gen_select_xreg (cmp_ltz high) (rv_not low) low)) - (high XReg (gen_select_xreg (cmp_ltz high) (rv_not high) high)) - - ;; Count both halves - (high_cnt XReg (gen_clz high)) - (low_cnt XReg (gen_clz low)) - ;; Only add the bottom zeros if the top half is zero - (low_cnt XReg (gen_select_xreg (cmp_eqz high) low_cnt (zero_reg))) - (count XReg (rv_add high_cnt low_cnt)) - (result XReg (rv_addi count (imm12_const -1)))) - (value_regs result (imm $I64 0)))) - - -;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (fits_in_64 _) (uextend val))) - (zext val)) - -(rule 1 (lower (has_type $I128 (uextend val))) - (value_regs (zext val) (imm $I64 0))) - -;; When the source of an `uextend` is a load, we can merge both ops -(rule 2 (lower (has_type (fits_in_64 _) (uextend (sinkable_load inst ty flags addr offset)))) - (gen_sunk_load inst (amode addr offset) (uextend_load_op ty) flags)) - -(decl pure uextend_load_op (Type) LoadOP) -(rule (uextend_load_op $I8) (LoadOP.Lbu)) -(rule (uextend_load_op $I16) (LoadOP.Lhu)) -(rule (uextend_load_op $I32) (LoadOP.Lwu)) - -;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (fits_in_64 _) (sextend val @ (value_type in_ty)))) - (sext val)) - -(rule 1 (lower (has_type $I128 (sextend val @ (value_type in_ty)))) - (let ((lo XReg (sext val))) - (value_regs lo (rv_srai lo (imm12_const 63))))) - -;; When the source of an `sextend` is a load, we can merge both ops -(rule 2 (lower (has_type (fits_in_64 _) (sextend (sinkable_load inst ty flags addr offset)))) - (gen_sunk_load inst (amode addr offset) (sextend_load_op ty) flags)) - -(decl pure sextend_load_op (Type) LoadOP) -(rule (sextend_load_op $I8) (LoadOP.Lb)) -(rule (sextend_load_op $I16) (LoadOP.Lh)) -(rule (sextend_load_op $I32) (LoadOP.Lw)) - -;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_64 _) (popcnt x))) - (gen_popcnt (zext x))) - -(rule 1 (lower (has_type $I128 (popcnt x))) - (let - ((x ValueRegs x) - (low XReg (gen_popcnt (value_regs_get x 0))) - (high XReg (gen_popcnt (value_regs_get x 1))) - (result XReg (rv_add low high))) - (value_regs result (imm $I64 0)))) - -(rule 2 (lower (has_type (fits_in_64 _) (popcnt x))) - (if-let $true (has_zbb)) - (rv_cpop (zext x))) - -(rule 3 (lower (has_type $I32 (popcnt x))) - (if-let $true (has_zbb)) - (rv_cpopw x)) - -(rule 3 (lower (has_type $I128 (popcnt x))) - (if-let $true (has_zbb)) - (let - ((x ValueRegs x) - (low XReg (rv_cpop (value_regs_get x 0))) - (high XReg (rv_cpop (value_regs_get x 1))) - (result XReg (rv_add low high))) - (value_regs result (imm $I64 0)))) - -;; Popcount using multiply. -;; This is popcount64c() from -;; http://en.wikipedia.org/wiki/Hamming_weight -;; -;; Here's the C version for 32 bits: -;; x = x - ((x>> 1) & 0x55555555); -;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); -;; x = ((x + (x >> 4)) & 0x0F0F0F0F); -;; return (x * 0x01010101) >> 24; // Here 24 is the type width - 8. -;; -;; TODO: LLVM generates a much better implementation for I8X16. See: https://godbolt.org/z/qr6vf9Gr3 -;; For the other types it seems to be largely the same. -(rule 4 (lower (has_type (ty_supported_vec ty) (popcnt x))) - (if-let one (u64_to_uimm5 1)) - (if-let two (u64_to_uimm5 2)) - (if-let four (u64_to_uimm5 4)) - - (let (;; x = x - ((x >> 1) & 0x55555555); - (mask_55 XReg (imm (lane_type ty) (u64_and 0x5555555555555555 (ty_mask (lane_type ty))))) - (count2_shr VReg (rv_vsrl_vi x one (unmasked) ty)) - (count2_and VReg (rv_vand_vx count2_shr mask_55 (unmasked) ty)) - (count2 VReg (rv_vsub_vv x count2_and (unmasked) ty)) - - ;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - (mask_33 XReg (imm (lane_type ty) (u64_and 0x3333333333333333 (ty_mask (lane_type ty))))) - (count4_shr VReg (rv_vsrl_vi count2 two (unmasked) ty)) - (count4_and VReg (rv_vand_vx count4_shr mask_33 (unmasked) ty)) - (count4_lhs VReg (rv_vand_vx count2 mask_33 (unmasked) ty)) - (count4 VReg (rv_vadd_vv count4_lhs count4_and (unmasked) ty)) - - ;; x = (x + (x >> 4)) & 0x0F0F0F0F; - (mask_0f XReg (imm (lane_type ty) (u64_and 0x0f0f0f0f0f0f0f0f (ty_mask (lane_type ty))))) - (count8_shr VReg (rv_vsrl_vi count4 four (unmasked) ty)) - (count8_add VReg (rv_vadd_vv count4 count8_shr (unmasked) ty)) - (count8 VReg (rv_vand_vx count8_add mask_0f (unmasked) ty)) - - ;; (x * 0x01010101) >> ( - 8) - (mask_01 XReg (imm (lane_type ty) (u64_and 0x0101010101010101 (ty_mask (lane_type ty))))) - (mul VReg (rv_vmul_vx count8 mask_01 (unmasked) ty)) - (shift XReg (imm $I64 (u64_sub (ty_bits (lane_type ty)) 8))) - (res VReg (rv_vsrl_vx mul shift (unmasked) ty))) - res)) - -;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; 8/16 bit types need a mask on the shift amount -(rule 0 (lower (has_type (ty_int (ty_8_or_16 ty)) (ishl x y))) - (if-let mask (u64_to_imm12 (ty_shift_mask ty))) - (rv_sllw x (rv_andi (value_regs_get y 0) mask))) - -;; Using the 32bit version of `sll` automatically masks the shift amount. -(rule 1 (lower (has_type $I32 (ishl x y))) - (rv_sllw x (value_regs_get y 0))) - -;; Similarly, the 64bit version does the right thing. -(rule 1 (lower (has_type $I64 (ishl x y))) - (rv_sll x (value_regs_get y 0))) - -;; If the shift amount is known. We can mask it and encode it in the instruction. -(rule 2 (lower (has_type (int_fits_in_32 ty) (ishl x (maybe_uextend (imm12_from_value y))))) - (rv_slliw x (imm12_and y (ty_shift_mask ty)))) - -;; We technically don't need to mask the shift amount here. The instruction -;; does the right thing. But it's neater when pretty printing it. -(rule 3 (lower (has_type ty @ $I64 (ishl x (maybe_uextend (imm12_from_value y))))) - (rv_slli x (imm12_and y (ty_shift_mask ty)))) - -;; With `Zba` we have a shift that zero extends the LHS argument. -(rule 4 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y))))) - (if-let $true (has_zba)) - (rv_slliuw x y)) - -;; I128 cases -(rule 4 (lower (has_type $I128 (ishl x y))) - (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) - (shamt XReg (value_regs_get tmp 0)) - (len_sub_shamt XReg (value_regs_get tmp 1)) - ;; - (low XReg (rv_sll (value_regs_get x 0) shamt)) - ;; high part. - (high_part1 XReg (rv_srl (value_regs_get x 0) len_sub_shamt)) - (high_part2 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part1)) - ;; - (high_part3 XReg (rv_sll (value_regs_get x 1) shamt)) - (high XReg (rv_or high_part2 high_part3)) - ;; - (const64 XReg (imm $I64 64)) - (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) - (gen_select_regs - (cmp_geu shamt_128 const64) - (value_regs (zero_reg) low) - (value_regs low high)))) - -;; SIMD Cases -;; We don't need to mask anything since it is done by the instruction according to SEW. - -(rule 5 (lower (has_type (ty_supported_vec ty) (ishl x y))) - (rv_vsll_vx x (value_regs_get y 0) (unmasked) ty)) - -(rule 6 (lower (has_type (ty_supported_vec ty) (ishl x (maybe_uextend (uimm5_from_value y))))) - (rv_vsll_vi x y (unmasked) ty)) - -;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be -;; zero extended. -(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x y))) - (if-let mask (u64_to_imm12 (ty_shift_mask ty))) - (rv_srlw (zext x) (rv_andi (value_regs_get y 0) mask))) - -;; Using the 32bit version of `srl` automatically masks the shift amount. -(rule 1 (lower (has_type $I32 (ushr x y))) - (rv_srlw x (value_regs_get y 0))) - -;; Similarly, the 64bit version does the right thing. -(rule 1 (lower (has_type $I64 (ushr x y))) - (rv_srl x (value_regs_get y 0))) - -;; When the RHS is known we can just encode it in the instruction. -(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x (maybe_uextend (imm12_from_value y))))) - (rv_srliw (zext x) (imm12_and y (ty_shift_mask ty)))) - -(rule 3 (lower (has_type $I32 (ushr x (maybe_uextend (imm12_from_value y))))) - (rv_srliw x y)) - -(rule 3 (lower (has_type $I64 (ushr x (maybe_uextend (imm12_from_value y))))) - (rv_srli x y)) - -(rule 3 (lower (has_type $I128 (ushr x y))) - (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) - (shamt XReg (value_regs_get tmp 0)) - (len_sub_shamt XReg (value_regs_get tmp 1)) - ;; low part. - (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) - (low_part2 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part1)) - ;; - (low_part3 XReg (rv_srl (value_regs_get x 0) shamt)) - (low XReg (rv_or low_part2 low_part3)) - ;; - (const64 XReg (imm $I64 64)) - ;; - (high XReg (rv_srl (value_regs_get x 1) shamt)) - (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) - (gen_select_regs - (cmp_geu shamt_128 const64) - (value_regs high (zero_reg)) - (value_regs low high)))) - -;; SIMD Cases -;; We don't need to mask or extend anything since it is done by the instruction according to SEW. - -(rule 4 (lower (has_type (ty_supported_vec ty) (ushr x y))) - (rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty)) - -(rule 5 (lower (has_type (ty_supported_vec ty) (ushr x (maybe_uextend (uimm5_from_value y))))) - (rv_vsrl_vi x y (unmasked) ty)) - -;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be -;; zero extended. -(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x y))) - (if-let mask (u64_to_imm12 (ty_shift_mask ty))) - (rv_sraw (sext x) (rv_andi (value_regs_get y 0) mask))) - -;; Using the 32bit version of `sra` automatically masks the shift amount. -(rule 1 (lower (has_type $I32 (sshr x y))) - (rv_sraw x (value_regs_get y 0))) - -;; Similarly, the 64bit version does the right thing. -(rule 1 (lower (has_type $I64 (sshr x y))) - (rv_sra x (value_regs_get y 0))) - -;; When the RHS is known we can just encode it in the instruction. -(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x (maybe_uextend (imm12_from_value y))))) - (rv_sraiw (sext x) (imm12_and y (ty_shift_mask ty)))) - -(rule 3 (lower (has_type $I32 (sshr x (maybe_uextend (imm12_from_value y))))) - (rv_sraiw x y)) - -(rule 3 (lower (has_type $I64 (sshr x (maybe_uextend (imm12_from_value y))))) - (rv_srai x y)) - -(rule 3 (lower (has_type $I128 (sshr x y))) - (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) - (shamt XReg (value_regs_get tmp 0)) - (len_sub_shamt XReg (value_regs_get tmp 1)) - ;; low part. - (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) - (low_part2 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part1)) - ;; - (low_part3 XReg (rv_srl (value_regs_get x 0) shamt)) - (low XReg (rv_or low_part2 low_part3)) - ;; - (const64 XReg (imm $I64 64)) - ;; - (high XReg (rv_sra (value_regs_get x 1) shamt)) - ;; - (const_neg_1 XReg (imm $I64 (i64_as_u64 -1))) - ;; - (high_replacement XReg (gen_select_xreg (cmp_ltz (value_regs_get x 1)) const_neg_1 (zero_reg))) - (const64 XReg (imm $I64 64)) - (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) - (gen_select_regs - (cmp_geu shamt_128 const64) - (value_regs high high_replacement) - (value_regs low high)))) - -;; SIMD Cases -;; We don't need to mask or extend anything since it is done by the instruction according to SEW. - -(rule 4 (lower (has_type (ty_supported_vec ty) (sshr x y))) - (rv_vsra_vx x (value_regs_get y 0) (unmasked) ty)) - -(rule 5 (lower (has_type (ty_supported_vec ty) (sshr x (maybe_uextend (uimm5_from_value y))))) - (rv_vsra_vi x y (unmasked) ty)) - - -;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_64 ty) (rotl rs amount))) - (let - ((rs XReg (zext rs)) - (amount XReg (value_regs_get amount 0)) - (x ValueRegs (gen_shamt ty amount)) - (shamt XReg (value_regs_get x 0)) - (len_sub_shamt Reg (value_regs_get x 1)) - (part1 Reg (rv_sll rs shamt)) - (part2 Reg (rv_srl rs len_sub_shamt)) - (part3 Reg (gen_select_xreg (cmp_eqz shamt) (zero_reg) part2))) - (rv_or part1 part3))) - -(rule 1 (lower (has_type $I32 (rotl rs amount))) - (if-let $true (has_zbb)) - (rv_rolw rs (value_regs_get amount 0))) - -(rule 2 (lower (has_type $I32 (rotl rs (u64_from_iconst n)))) - (if-let $true (has_zbb)) - (if-let (imm12_from_u64 imm) (u64_sub 32 (u64_and n 31))) - (rv_roriw rs imm)) - -(rule 1 (lower (has_type $I64 (rotl rs amount))) - (if-let $true (has_zbb)) - (rv_rol rs (value_regs_get amount 0))) - -(rule 2 (lower (has_type $I64 (rotl rs (u64_from_iconst n)))) - (if-let $true (has_zbb)) - (if-let (imm12_from_u64 imm) (u64_sub 64 (u64_and n 63))) - (rv_rori rs imm)) - -(rule 1 (lower (has_type $I128 (rotl x y))) - (let - ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) - (shamt XReg (value_regs_get tmp 0)) - (len_sub_shamt XReg (value_regs_get tmp 1)) - (low_part1 XReg (rv_sll (value_regs_get x 0) shamt)) - (low_part2 XReg (rv_srl (value_regs_get x 1) len_sub_shamt)) - ;;; if shamt == 0 low_part2 will overflow we should zero instead. - (low_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part2)) - (low XReg (rv_or low_part1 low_part3)) - (high_part1 XReg (rv_sll (value_regs_get x 1) shamt)) - (high_part2 XReg (rv_srl (value_regs_get x 0) len_sub_shamt)) - (high_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part2)) - (high XReg (rv_or high_part1 high_part3)) - (const64 XReg (imm $I64 64)) - (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) - ;; right now we only rotate less than 64 bits. - ;; if shamt is greater than or equal 64 , we should switch low and high. - (gen_select_regs - (cmp_geu shamt_128 const64) - (value_regs high low) - (value_regs low high) - ))) - -;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type (fits_in_64 ty) (rotr rs amount))) - (let - ((rs XReg (zext rs)) - (amount XReg (value_regs_get amount 0)) - (x ValueRegs (gen_shamt ty amount)) - (shamt XReg (value_regs_get x 0)) - (len_sub_shamt XReg (value_regs_get x 1)) - (part1 XReg (rv_srl rs shamt)) - (part2 XReg (rv_sll rs len_sub_shamt)) - (part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) part2))) - (rv_or part1 part3))) - -(rule 1 (lower (has_type $I32 (rotr rs amount))) - (if-let $true (has_zbb)) - (rv_rorw rs (value_regs_get amount 0))) - -(rule 2 (lower (has_type $I32 (rotr rs (imm12_from_value n)))) - (if-let $true (has_zbb)) - (rv_roriw rs n)) - -(rule 1 (lower (has_type $I64 (rotr rs amount))) - (if-let $true (has_zbb)) - (rv_ror rs (value_regs_get amount 0))) - -(rule 2 (lower (has_type $I64 (rotr rs (imm12_from_value n)))) - (if-let $true (has_zbb)) - (rv_rori rs n)) - -(rule 1 (lower (has_type $I128 (rotr x y))) - (let - ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) - (shamt XReg (value_regs_get tmp 0)) - (len_sub_shamt XReg (value_regs_get tmp 1)) - (low_part1 XReg (rv_srl (value_regs_get x 0) shamt)) - (low_part2 XReg (rv_sll (value_regs_get x 1) len_sub_shamt)) - ;;; if shamt == 0 low_part2 will overflow we should zero instead. - (low_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part2)) - (low XReg (rv_or low_part1 low_part3)) - (high_part1 XReg (rv_srl (value_regs_get x 1) shamt)) - (high_part2 XReg (rv_sll (value_regs_get x 0) len_sub_shamt)) - (high_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part2)) - (high XReg (rv_or high_part1 high_part3)) - (const64 XReg (imm $I64 64)) - (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127)))) - ;; right now we only rotate less than 64 bits. - ;; if shamt is greater than or equal 64 , we should switch low and high. - (gen_select_regs - (cmp_geu shamt_128 const64) - (value_regs high low) - (value_regs low high) - ))) - -;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (fabs x))) - (rv_fabs ty x)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fabs x))) - (rv_vfabs_v x (unmasked) ty)) - -;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (fneg x))) - (rv_fneg ty x)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fneg x))) - (rv_vfneg_v x (unmasked) ty)) - -;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (fcopysign x y))) - (rv_fsgnj ty x y)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fcopysign x y))) - (rv_vfsgnj_vv x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (fcopysign x (splat y)))) - (rv_vfsgnj_vf x y (unmasked) ty)) - -;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; RISC-V has 4 FMA instructions that do a slightly different computation. -;; -;; fmadd: (rs1 * rs2) + rs3 -;; fmsub: (rs1 * rs2) - rs3 -;; fnmadd: -(rs1 * rs2) - rs3 -;; fnmsub: -(rs1 * rs2) + rs3 -;; -;; Additionally there are vector versions of these instructions with slightly different names. -;; The vector instructions also have two variants each. `.vv` and `.vf`, where `.vv` variants -;; take two vector operands and the `.vf` variants take a vector operand and a scalar operand. -;; -;; Due to this, variation they receive the arguments in a different order. So we need to swap -;; the arguments below. -;; -;; vfmacc: vd[i] = +(vs1[i] * vs2[i]) + vd[i] -;; vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i] -;; vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i] -;; vfnmsac: vd[i] = -(vs1[i] * vs2[i]) + vd[i] - -(type IsFneg (enum (Result (negate u64) (value Value)))) - -(decl pure is_fneg (Value) IsFneg) -(rule 1 (is_fneg (fneg x)) (IsFneg.Result 1 x)) -(rule 0 (is_fneg x) (IsFneg.Result 0 x)) - -(decl pure is_fneg_neg (IsFneg) u64) -(rule (is_fneg_neg (IsFneg.Result n _)) n) - -(decl pure get_fneg_value (IsFneg) Value) -(rule (get_fneg_value (IsFneg.Result _ v)) v) - -(rule (lower (has_type ty (fma x_src y_src z_src))) - (let - ((x_res IsFneg (is_fneg x_src)) - (y_res IsFneg (is_fneg y_src)) - (z_res IsFneg (is_fneg z_src)) - (x Value (get_fneg_value x_res)) - (y Value (get_fneg_value y_res)) - (z Value (get_fneg_value z_res))) - (rv_fma ty (u64_xor (is_fneg_neg x_res) (is_fneg_neg y_res)) (is_fneg_neg z_res) x y z))) - -; parity arguments indicate whether to negate the x*y term or the z term, respectively -(decl rv_fma (Type u64 u64 Value Value Value) InstOutput) -(rule 0 (rv_fma (ty_supported_float ty) 0 0 x y z) (rv_fmadd ty (FRM.RNE) x y z)) -(rule 0 (rv_fma (ty_supported_float ty) 0 1 x y z) (rv_fmsub ty (FRM.RNE) x y z)) -(rule 0 (rv_fma (ty_supported_float ty) 1 0 x y z) (rv_fnmsub ty (FRM.RNE) x y z)) -(rule 0 (rv_fma (ty_supported_float ty) 1 1 x y z) (rv_fnmadd ty (FRM.RNE) x y z)) -(rule 1 (rv_fma (ty_supported_vec ty) 0 0 x y z) (rv_vfmacc_vv z y x (unmasked) ty)) -(rule 1 (rv_fma (ty_supported_vec ty) 0 1 x y z) (rv_vfmsac_vv z y x (unmasked) ty)) -(rule 1 (rv_fma (ty_supported_vec ty) 1 0 x y z) (rv_vfnmsac_vv z y x (unmasked) ty)) -(rule 1 (rv_fma (ty_supported_vec ty) 1 1 x y z) (rv_vfnmacc_vv z y x (unmasked) ty)) -(rule 2 (rv_fma (ty_supported_vec ty) 0 0 (splat x) y z) (rv_vfmacc_vf z y x (unmasked) ty)) -(rule 2 (rv_fma (ty_supported_vec ty) 0 1 (splat x) y z) (rv_vfmsac_vf z y x (unmasked) ty)) -(rule 2 (rv_fma (ty_supported_vec ty) 1 0 (splat x) y z) (rv_vfnmsac_vf z y x (unmasked) ty)) -(rule 2 (rv_fma (ty_supported_vec ty) 1 1 (splat x) y z) (rv_vfnmacc_vf z y x (unmasked) ty)) -(rule 3 (rv_fma (ty_supported_vec ty) 0 0 x (splat y) z) (rv_vfmacc_vf z x y (unmasked) ty)) -(rule 3 (rv_fma (ty_supported_vec ty) 0 1 x (splat y) z) (rv_vfmsac_vf z x y (unmasked) ty)) -(rule 3 (rv_fma (ty_supported_vec ty) 1 0 x (splat y) z) (rv_vfnmsac_vf z x y (unmasked) ty)) -(rule 3 (rv_fma (ty_supported_vec ty) 1 1 x (splat y) z) (rv_vfnmacc_vf z x y (unmasked) ty)) - -;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (sqrt x))) - (rv_fsqrt ty (FRM.RNE) x)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (sqrt x))) - (rv_vfsqrt_v x (unmasked) ty)) - -;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 - ;; - (lower - (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x))) - (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo))) - -;;; for I8 and I16 -(rule 1 - (lower - (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x))) - (gen_atomic_rmw_loop op ty addr x)) - -;;;special for I8 and I16 max min etc. -;;;because I need uextend or sextend the value. -(rule 2 - (lower - (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x))) - (gen_atomic_rmw_loop op ty addr (sext x))) - - -(rule 2 - ;; - (lower - (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x))) - ;; - (gen_atomic_rmw_loop op ty addr (zext x))) - -;;;;; Rules for `AtomicRmwOp.Sub` -(rule - (lower - (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x))) - (let - ((tmp WritableReg (temp_writable_reg ty)) - (x2 Reg (rv_neg x))) - (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo)))) - -(decl gen_atomic_rmw_loop (AtomicRmwOp Type XReg XReg) XReg) -(rule - (gen_atomic_rmw_loop op ty addr x) - (let - ((dst WritableXReg (temp_writable_xreg)) - (t0 WritableXReg (temp_writable_xreg)) - (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0)))) - (writable_reg_to_reg dst))) - -;;;;; Rules for `AtomicRmwOp.Nand` -(rule - (lower - (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x))) - (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x)) - -(decl is_atomic_rmw_max_etc (AtomicRmwOp bool) AtomicRmwOp) -(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc) - -;;;;; Rules for `atomic load`;;;;;;;;;;;;;;;;; -(rule - (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p))) - (gen_atomic_load p ty)) - - -;;;;; Rules for `atomic store`;;;;;;;;;;;;;;;;; -(rule - (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p)) - (gen_atomic_store p ty src)) - -(decl gen_atomic_offset (XReg Type) XReg) -(rule 1 (gen_atomic_offset p (fits_in_16 ty)) - (rv_slli (rv_andi p (imm12_const 3)) (imm12_const 3))) - -(rule (gen_atomic_offset p _) - (zero_reg)) - -(decl gen_atomic_p (XReg Type) XReg) -(rule 1 (gen_atomic_p p (fits_in_16 ty)) - (rv_andi p (imm12_const -4))) - -(rule (gen_atomic_p p _) - p) - - -;;;;; Rules for `atomic cas`;;;;;;;;;;;;;;;;; -(rule - (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x))) - (let - ((t0 WritableReg (temp_writable_reg ty)) - (dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (zext e) (gen_atomic_p p ty) x ty)))) - (writable_reg_to_reg dst))) - -;;;;; Rules for `ireduce`;;;;;;;;;;;;;;;;; -(rule - (lower (has_type ty (ireduce x))) - (value_regs_get x 0)) - -;;;;; Rules for `fpromote`;;;;;;;;;;;;;;;;; -(rule (lower (fpromote x)) - (rv_fcvtds x)) - -;;;;; Rules for `fvpromote_low`;;;;;;;;;;;; - -(rule (lower (has_type (ty_supported_vec ty) (fvpromote_low x))) - (if-let half_ty (ty_half_width ty)) - (rv_vfwcvt_f_f_v x (unmasked) (vstate_mf2 half_ty))) - -;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;; -(rule (lower (fdemote x)) - (rv_fcvtsd (FRM.RNE) x)) - -;;;;; Rules for `fvdemote`;;;;;;;;;;;;;;;;; - -;; `vfncvt...` leaves the upper bits of the register undefined so -;; we need to zero them out. -(rule (lower (has_type (ty_supported_vec ty @ $F32X4) (fvdemote x))) - (if-let zero (i8_to_imm5 0)) - (let ((narrow VReg (rv_vfncvt_f_f_w x (unmasked) (vstate_mf2 ty))) - (mask VReg (gen_vec_mask 0xC))) - (rv_vmerge_vim narrow zero mask ty))) - - -;;;;; Rules for for float arithmetic - - -;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_float ty) (fadd x y))) - (rv_fadd ty (FRM.RNE) x y)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fadd x y))) - (rv_vfadd_vv x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (fadd x (splat y)))) - (rv_vfadd_vf x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (fadd (splat x) y))) - (rv_vfadd_vf y x (unmasked) ty)) - - -;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (fsub x y))) - (rv_fsub ty (FRM.RNE) x y)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fsub x y))) - (rv_vfsub_vv x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (fsub x (splat y)))) - (rv_vfsub_vf x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (fsub (splat x) y))) - (rv_vfrsub_vf y x (unmasked) ty)) - -;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (fmul x y))) - (rv_fmul ty (FRM.RNE) x y)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fmul x y))) - (rv_vfmul_vv x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (fmul x (splat y)))) - (rv_vfmul_vf x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (fmul (splat x) y))) - (rv_vfmul_vf y x (unmasked) ty)) - - -;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (fdiv x y))) - (rv_fdiv ty (FRM.RNE) x y)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (fdiv x y))) - (rv_vfdiv_vv x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (fdiv x (splat y)))) - (rv_vfdiv_vf x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (fdiv (splat x) y))) - (rv_vfrdiv_vf y x (unmasked) ty)) - -;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; RISC-V's `fmin` instruction returns the number input if one of inputs is a -;; NaN. We handle this by manually checking if one of the inputs is a NaN -;; and selecting based on that result. -(rule 0 (lower (has_type (ty_supported_float ty) (fmin x y))) - (let (;; Check if both inputs are not nan. - (is_ordered FloatCompare (fcmp_to_float_compare (FloatCC.Ordered) ty x y)) - ;; `fadd` returns a nan if any of the inputs is a NaN. - (nan FReg (rv_fadd ty (FRM.RNE) x y)) - (min FReg (rv_fmin ty x y))) - (gen_select_freg is_ordered min nan))) - -;; With Zfa we can use the special `fminm` that precisely matches the expected -;; NaN behavior. -(rule 1 (lower (has_type (ty_supported_float ty) (fmin x y))) - (if-let $true (has_zfa)) - (rv_fminm ty x y)) - -;; vfmin does almost the right thing, but it does not handle NaN's correctly. -;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the -;; number input instead. -;; -;; TODO: We can improve this by using a masked `fmin` instruction that modifies -;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. -(rule 2 (lower (has_type (ty_supported_vec ty) (fmin x y))) - (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) - (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) - (vec_nan VReg (rv_vmv_vx nan ty)) - (min VReg (rv_vfmin_vv x y (unmasked) ty))) - (rv_vmerge_vvm vec_nan min is_not_nan ty))) - -;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; RISC-V's `fmax` instruction returns the number input if one of inputs is a -;; NaN. We handle this by manually checking if one of the inputs is a NaN -;; and selecting based on that result. -(rule 0 (lower (has_type (ty_supported_float ty) (fmax x y))) - (let (;; Check if both inputs are not nan. - (is_ordered FloatCompare (fcmp_to_float_compare (FloatCC.Ordered) ty x y)) - ;; `fadd` returns a NaN if any of the inputs is a NaN. - (nan FReg (rv_fadd ty (FRM.RNE) x y)) - (max FReg (rv_fmax ty x y))) - (gen_select_freg is_ordered max nan))) - -;; With Zfa we can use the special `fmaxm` that precisely matches the expected -;; NaN behavior. -(rule 1 (lower (has_type (ty_supported_float ty) (fmax x y))) - (if-let $true (has_zfa)) - (rv_fmaxm ty x y)) - -;; vfmax does almost the right thing, but it does not handle NaN's correctly. -;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the -;; number input instead. -;; -;; TODO: We can improve this by using a masked `fmax` instruction that modifies -;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction. -(rule 2 (lower (has_type (ty_supported_vec ty) (fmax x y))) - (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y)) - (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty)))) - (vec_nan VReg (rv_vmv_vx nan ty)) - (max VReg (rv_vfmax_vv x y (unmasked) ty))) - (rv_vmerge_vvm vec_nan max is_not_nan ty))) - -;;;;; Rules for `stack_addr`;;;;;;;;; -(rule - (lower (stack_addr ss offset)) - (gen_stack_addr ss offset)) - -;;;;; Rules for `select`;;;;;;;;; - -;; Manually matching (iconst 0) here is a bit of a hack. We can't do that as part -;; of the iconst rule because that runs into regalloc issues. gen_select_xreg -;; has some optimizations based on the use of the zero register so we have to -;; manually match it here. -(rule 5 (lower (has_type (ty_int_ref_scalar_64 _) (select c (i64_from_iconst 0) y))) - (gen_select_xreg (is_nonzero_cmp c) (zero_reg) y)) - -(rule 4 (lower (has_type (ty_int_ref_scalar_64 _) (select c x (i64_from_iconst 0)))) - (gen_select_xreg (is_nonzero_cmp c) x (zero_reg))) - -(rule 3 (lower (has_type (ty_int_ref_scalar_64 _) (select c x y))) - (gen_select_xreg (is_nonzero_cmp c) x y)) - -(rule 2 (lower (has_type $I128 (select c x y))) - (gen_select_regs (is_nonzero_cmp c) x y)) - -(rule 1 (lower (has_type (ty_supported_vec _) (select c x y))) - (gen_select_vreg (is_nonzero_cmp c) x y)) - -(rule 0 (lower (has_type (ty_supported_float _) (select c x y))) - (gen_select_freg (is_nonzero_cmp c) x y)) - -;;;;; Rules for `bitselect`;;;;;;;;; - -;; Do a (c & x) | (~c & y) operation. -(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitselect c x y))) - (let ((tmp_x XReg (rv_and c x)) - (c_inverse XReg (rv_not c)) - (tmp_y XReg (rv_and c_inverse y))) - (rv_or tmp_x tmp_y))) - -;; For vectors, we also do the same operation. -;; We can technically use any type in the bitwise operations, but prefer -;; using the type of the inputs so that we avoid emitting unnecessary -;; `vsetvl` instructions. it's likely that the vector unit is already -;; configured for that type. -(rule 1 (lower (has_type (ty_supported_vec ty) (bitselect c x y))) - (let ((tmp_x VReg (rv_vand_vv c x (unmasked) ty)) - (c_inverse VReg (rv_vnot_v c (unmasked) ty)) - (tmp_y VReg (rv_vand_vv c_inverse y (unmasked) ty))) - (rv_vor_vv tmp_x tmp_y (unmasked) ty))) - -;; Special case for bitselects with cmp's as an input. -;; -;; This allows us to skip the mask expansion step and use the more efficient -;; vmerge.vvm instruction. -;; -;; We should be careful to ensure that the mask and the vmerge have the -;; same type. So that we don't generate a mask with length 16 (i.e. for i8x16), and then -;; only copy the first few lanes of the result to the destination register because -;; the bitselect has a different length (i.e. i64x2). -;; -;; See: https://github.com/bytecodealliance/wasmtime/issues/8131 - -(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (icmp cc a @ (value_type (ty_supported_vec cmp_ty)) b) x y))) - (let ((mask VReg (gen_icmp_mask cmp_ty cc a b))) - (rv_vmerge_vvm y x mask cmp_ty))) - -(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (fcmp cc a @ (value_type (ty_supported_vec cmp_ty)) b) x y))) - (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b))) - (rv_vmerge_vvm y x mask cmp_ty))) - -(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (bitcast _ (fcmp cc a @ (value_type (ty_supported_vec cmp_ty)) b)) x y))) - (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b))) - (rv_vmerge_vvm y x mask cmp_ty))) - -(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (bitcast _ (icmp cc a @ (value_type (ty_supported_vec cmp_ty)) b)) x y))) - (let ((mask VReg (gen_icmp_mask cmp_ty cc a b))) - (rv_vmerge_vvm y x mask cmp_ty))) - - -;;;;; Rules for `isplit`;;;;;;;;; -(rule - (lower (isplit x)) - (let - ((t1 XReg (value_regs_get x 0)) - (t2 XReg (value_regs_get x 1))) - (output_pair t1 t2))) - -;;;;; Rules for `iconcat`;;;;;;;;; -(rule - (lower (has_type $I128 (iconcat x y))) - (let - ((t1 XReg x) - (t2 XReg y)) - (value_regs t1 t2))) - -;; Special-case the lowering of an `isplit` of a 128-bit multiply where the -;; lower bits of the result are discarded and the operands are sign or zero -;; extended. This maps directly to `umulh` and `smulh`. -(rule 1 (lower i @ (isplit (has_type $I128 (imul (uextend x) (uextend y))))) - (if-let (first_result lo) i) - (if-let $true (value_is_unused lo)) - (output_pair (invalid_reg) (rv_mulhu (zext x) (zext y)))) - -(rule 1 (lower i @ (isplit (has_type $I128 (imul (sextend x) (sextend y))))) - (if-let (first_result lo) i) - (if-let $true (value_is_unused lo)) - (output_pair (invalid_reg) (rv_mulh (sext x) (sext y)))) - -;;;;; Rules for `smax`;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_64 ty) (smax x y))) - (let ((x XReg (sext x)) - (y XReg (sext y))) - (gen_select_xreg (cmp_gt x y) x y))) - -(rule 1 (lower (has_type $I128 (smax x y))) - (gen_select_regs (icmp_to_int_compare (IntCC.SignedGreaterThan) x y) x y)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (smax x y))) - (rv_vmax_vv x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (smax x (splat y)))) - (rv_vmax_vx x y (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (smax (splat x) y))) - (rv_vmax_vx y x (unmasked) ty)) - -;;;;; Rules for `smin`;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_64 ty) (smin x y))) - (let ((x XReg (sext x)) - (y XReg (sext y))) - (gen_select_xreg (cmp_lt x y) x y))) - -(rule 1 (lower (has_type $I128 (smin x y))) - (gen_select_regs (icmp_to_int_compare (IntCC.SignedLessThan) x y) x y)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (smin x y))) - (rv_vmin_vv x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (smin x (splat y)))) - (rv_vmin_vx x y (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (smin (splat x) y))) - (rv_vmin_vx y x (unmasked) ty)) - -;;;;; Rules for `umax`;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_64 ty) (umax x y))) - (let ((x XReg (zext x)) - (y XReg (zext y))) - (gen_select_xreg (cmp_gtu x y) x y))) - -(rule 1 (lower (has_type $I128 (umax x y))) - (gen_select_regs (icmp_to_int_compare (IntCC.UnsignedGreaterThan) x y) x y)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (umax x y))) - (rv_vmaxu_vv x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (umax x (splat y)))) - (rv_vmaxu_vx x y (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (umax (splat x) y))) - (rv_vmaxu_vx y x (unmasked) ty)) - -;;;;; Rules for `umin`;;;;;;;;; - -(rule 0 (lower (has_type (fits_in_64 ty) (umin x y))) - (let ((x XReg (zext x)) - (y XReg (zext y))) - (gen_select_xreg (cmp_ltu x y) x y))) - -(rule 1 (lower (has_type $I128 (umin x y))) - (gen_select_regs (icmp_to_int_compare (IntCC.UnsignedLessThan) x y) x y)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (umin x y))) - (rv_vminu_vv x y (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (umin x (splat y)))) - (rv_vminu_vx x y (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (umin (splat x) y))) - (rv_vminu_vx y x (unmasked) ty)) - - -;;;;; Rules for `debugtrap`;;;;;;;;; -(rule - (lower (debugtrap)) - (side_effect (SideEffectNoResult.Inst (MInst.EBreak)))) - -;;;;; Rules for `fence`;;;;;;;;; -(rule - (lower (fence)) - (side_effect (SideEffectNoResult.Inst (MInst.Fence 15 15)))) - -;;;;; Rules for `trap`;;;;;;;;; -(rule - (lower (trap code)) - (udf code)) - -;;;;; Rules for `uload8`;;;;;;;;; -(rule (lower (uload8 flags addr offset)) - (gen_load (amode addr offset) (LoadOP.Lbu) flags)) - -;;;;; Rules for `sload8`;;;;;;;;; -(rule (lower (sload8 flags addr offset)) - (gen_load (amode addr offset) (LoadOP.Lb) flags)) - -;;;;; Rules for `uload16`;;;;;;;;; -(rule (lower (uload16 flags addr offset)) - (gen_load (amode addr offset) (LoadOP.Lhu) flags)) - -;;;;; Rules for `iload16`;;;;;;;;; -(rule (lower (sload16 flags addr offset)) - (gen_load (amode addr offset) (LoadOP.Lh) flags)) - -;;;;; Rules for `uload32`;;;;;;;;; -(rule (lower (uload32 flags addr offset)) - (gen_load (amode addr offset) (LoadOP.Lwu) flags)) - -;;;;; Rules for `sload32`;;;;;;;;; -(rule (lower (sload32 flags addr offset)) - (gen_load (amode addr offset) (LoadOP.Lw) flags)) - -;;;;; Rules for `load`;;;;;;;;; -(rule (lower (has_type ty (load flags addr offset))) - (gen_load (amode addr offset) (load_op ty) flags)) - -(rule 1 (lower (has_type $I128 (load flags addr offset))) - (if-let offset_plus_8 (s32_add_fallible offset 8)) - (let ((lo XReg (gen_load (amode addr offset) (LoadOP.Ld) flags)) - (hi XReg (gen_load (amode addr offset_plus_8) (LoadOP.Ld) flags))) - (value_regs lo hi))) - -(rule 2 (lower (has_type (ty_supported_vec ty) (load flags addr offset))) - (let ((eew VecElementWidth (element_width_from_type ty)) - (amode AMode (amode addr offset))) - (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) ty))) - -;;;;; Rules for Load + Extend Combos ;;;;;;;;; - -;; These rules cover the special loads that load a 64bit value and do some sort of extension. -;; We don't have any special instructions to do this, so just load the 64 bits as a vector, and -;; do a SEW/2 extension. This only reads half width elements from the source vector register -;; extends it, and writes the back the full register. - -(decl gen_load64_extend (Type ExtendOp MemFlags AMode) VReg) - -(rule (gen_load64_extend ty (ExtendOp.Signed) flags amode) - (let ((eew VecElementWidth (element_width_from_type $I64)) - (load_state VState (vstate_from_type $I64)) - (loaded VReg (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) load_state))) - (rv_vsext_vf2 loaded (unmasked) ty))) - -(rule (gen_load64_extend ty (ExtendOp.Zero) flags amode) - (let ((eew VecElementWidth (element_width_from_type $I64)) - (load_state VState (vstate_from_type $I64)) - (loaded VReg (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) load_state))) - (rv_vzext_vf2 loaded (unmasked) ty))) - -;;;;; Rules for `uload8x8`;;;;;;;;;; -(rule (lower (has_type (ty_supported_vec ty @ $I16X8) (uload8x8 flags addr offset))) - (gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset))) - -;;;;; Rules for `uload16x4`;;;;;;;;; -(rule (lower (has_type (ty_supported_vec ty @ $I32X4) (uload16x4 flags addr offset))) - (gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset))) - -;;;;; Rules for `uload32x2`;;;;;;;;; -(rule (lower (has_type (ty_supported_vec ty @ $I64X2) (uload32x2 flags addr offset))) - (gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset))) - -;;;;; Rules for `sload8x8`;;;;;;;;;; -(rule (lower (has_type (ty_supported_vec ty @ $I16X8) (sload8x8 flags addr offset))) - (gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset))) - -;;;;; Rules for `sload16x4`;;;;;;;;; -(rule (lower (has_type (ty_supported_vec ty @ $I32X4) (sload16x4 flags addr offset))) - (gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset))) - -;;;;; Rules for `sload32x2`;;;;;;;;; -(rule (lower (has_type (ty_supported_vec ty @ $I64X2) (sload32x2 flags addr offset))) - (gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset))) - -;;;;; Rules for `istore8`;;;;;;;;; -(rule (lower (istore8 flags src addr offset)) - (rv_store (amode addr offset) (StoreOP.Sb) flags src)) - -;;;;; Rules for `istore16`;;;;;;;;; -(rule (lower (istore16 flags src addr offset)) - (rv_store (amode addr offset) (StoreOP.Sh) flags src)) - -;;;;; Rules for `istore32`;;;;;;;;; -(rule (lower (istore32 flags src addr offset)) - (rv_store (amode addr offset) (StoreOP.Sw) flags src)) - -;;;;; Rules for `store`;;;;;;;;; -(rule (lower (store flags src @ (value_type ty) addr offset)) - (gen_store (amode addr offset) flags src)) - -(rule 1 (lower (store flags src @ (value_type $I128) addr offset)) - (if-let offset_plus_8 (s32_add_fallible offset 8)) - (let ((_ InstOutput (rv_store (amode addr offset) (StoreOP.Sd) flags (value_regs_get src 0)))) - (rv_store (amode addr offset_plus_8) (StoreOP.Sd) flags (value_regs_get src 1)))) - -(rule 2 (lower (store flags src @ (value_type (ty_supported_vec ty)) addr offset)) - (let ((eew VecElementWidth (element_width_from_type ty)) - (amode AMode (amode addr offset))) - (vec_store eew (VecAMode.UnitStride amode) src flags (unmasked) ty))) - - -;;;;; Rules for `icmp`;;;;;;;;; - -;; 8-64 bit comparisons. Mostly fall back onto `IntegerCompare` and then -;; materializing that, but before that happens try to match some -;; constant-related patterns - -(rule 0 (lower (icmp cc x @ (value_type (fits_in_64 ty)) y)) - (lower_icmp cc x y)) - -(decl lower_icmp (IntCC Value Value) XReg) -(rule 0 (lower_icmp cc x y) - (lower_int_compare (icmp_to_int_compare cc x y))) - -;; a == $imm => seqz(xori(..)) -(rule 1 (lower_icmp (IntCC.Equal) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) y) - (rv_seqz (rv_xori (sext x) imm))) -(rule 2 (lower_icmp (IntCC.Equal) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) x) - (rv_seqz (rv_xori (sext y) imm))) - -;; a != $imm => snez(xori(..)) -(rule 1 (lower_icmp (IntCC.NotEqual) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) y) - (rv_snez (rv_xori (sext x) imm))) -(rule 2 (lower_icmp (IntCC.NotEqual) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) x) - (rv_snez (rv_xori (sext y) imm))) - -;; a < $imm => slti(..) -(rule 1 (lower_icmp (IntCC.SignedLessThan) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) y) - (rv_slti (sext x) imm)) -(rule 1 (lower_icmp (IntCC.SignedGreaterThan) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) x) - (rv_slti (sext y) imm)) -(rule 1 (lower_icmp (IntCC.UnsignedLessThan) x y) - (if-let (u64_from_iconst (u64_nonzero (imm12_from_u64 imm))) y) - (rv_sltiu (zext x) imm)) -(rule 1 (lower_icmp (IntCC.UnsignedGreaterThan) x y) - (if-let (u64_from_iconst (u64_nonzero (imm12_from_u64 imm))) x) - (rv_sltiu (zext y) imm)) - -;; a >= $imm => !(a < $imm) -(rule 2 (lower_icmp cc @ (IntCC.SignedGreaterThanOrEqual) x y) - (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 _))) y) - (rv_xori (lower_icmp (intcc_complement cc) x y) (imm12_const 1))) -(rule 2 (lower_icmp cc @ (IntCC.UnsignedGreaterThanOrEqual) x y) - (if-let (u64_from_iconst (u64_nonzero (imm12_from_u64 _))) y) - (rv_xori (lower_icmp (intcc_complement cc) x y) (imm12_const 1))) - -;; Materializes an `IntegerCompare` bundle directly into an `XReg` with a 0 -;; or 1 value. -(decl lower_int_compare (IntegerCompare) XReg) - -;; x == y => x ^ y == 0 -(rule 0 (lower_int_compare (int_compare_decompose (IntCC.Equal) x y)) - (rv_seqz (rv_xor x y))) -(rule 1 (lower_int_compare (int_compare_decompose (IntCC.Equal) x (zero_reg))) - (rv_seqz x)) -(rule 2 (lower_int_compare (int_compare_decompose (IntCC.Equal) (zero_reg) y)) - (rv_seqz y)) -;; x != y => x ^ y != 0 -(rule 0 (lower_int_compare (int_compare_decompose (IntCC.NotEqual) x y)) - (rv_snez (rv_xor x y))) -(rule 1 (lower_int_compare (int_compare_decompose (IntCC.NotEqual) x (zero_reg))) - (rv_snez x)) -(rule 2 (lower_int_compare (int_compare_decompose (IntCC.NotEqual) (zero_reg) x)) - (rv_snez x)) -;; x < y => x < y -(rule (lower_int_compare (int_compare_decompose (IntCC.SignedLessThan) x y)) - (rv_slt x y)) -(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedLessThan) x y)) - (rv_sltu x y)) -;; x > y => y < x -(rule (lower_int_compare (int_compare_decompose (IntCC.SignedGreaterThan) x y)) - (rv_slt y x)) -(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedGreaterThan) x y)) - (rv_sltu y x)) -;; x <= y => !(y < x) -(rule (lower_int_compare (int_compare_decompose (IntCC.SignedLessThanOrEqual) x y)) - (rv_xori (rv_slt y x) (imm12_const 1))) -(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedLessThanOrEqual) x y)) - (rv_xori (rv_sltu y x) (imm12_const 1))) -;; x >= y => !(x < y) -(rule (lower_int_compare (int_compare_decompose (IntCC.SignedGreaterThanOrEqual) x y)) - (rv_xori (rv_slt x y) (imm12_const 1))) -(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedGreaterThanOrEqual) x y)) - (rv_xori (rv_sltu x y) (imm12_const 1))) - -;; 128-bit comparisons. -;; -;; Currently only `==`, `!=`, and `<` are implemented, and everything else -;; delegates to one of those. - -(rule 20 (lower (icmp cc x @ (value_type $I128) y)) - (lower_icmp_i128 cc x y)) - -(decl lower_icmp_i128 (IntCC ValueRegs ValueRegs) XReg) -(rule 0 (lower_icmp_i128 (IntCC.Equal) x y) - (let ((lo XReg (rv_xor (value_regs_get x 0) (value_regs_get y 0))) - (hi XReg (rv_xor (value_regs_get x 1) (value_regs_get y 1)))) - (rv_seqz (rv_or lo hi)))) -(rule 0 (lower_icmp_i128 (IntCC.NotEqual) x y) - (let ((lo XReg (rv_xor (value_regs_get x 0) (value_regs_get y 0))) - (hi XReg (rv_xor (value_regs_get x 1) (value_regs_get y 1)))) - (rv_snez (rv_or lo hi)))) - -;; swap args for `>` to use `<` instead -(rule 0 (lower_icmp_i128 cc @ (IntCC.SignedGreaterThan) x y) - (lower_icmp_i128 (intcc_swap_args cc) y x)) -(rule 0 (lower_icmp_i128 cc @ (IntCC.UnsignedGreaterThan) x y) - (lower_icmp_i128 (intcc_swap_args cc) y x)) - -;; complement `=`-related conditions to get ones that don't use `=`. -(rule 0 (lower_icmp_i128 cc @ (IntCC.SignedLessThanOrEqual) x y) - (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1))) -(rule 0 (lower_icmp_i128 cc @ (IntCC.SignedGreaterThanOrEqual) x y) - (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1))) -(rule 0 (lower_icmp_i128 cc @ (IntCC.UnsignedLessThanOrEqual) x y) - (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1))) -(rule 0 (lower_icmp_i128 cc @ (IntCC.UnsignedGreaterThanOrEqual) x y) - (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1))) - -;; Compare both the bottom and upper halves of the 128-bit values. If -;; the top half is equal use the bottom comparison, otherwise use the upper -;; comparison. Note that the lower comparison is always unsigned since if it's -;; used the top halves are all zeros and the semantic values are positive. -(rule 1 (lower_icmp_i128 cc x y) - (if-let (IntCC.UnsignedLessThan) (intcc_unsigned cc)) - (let ((x_lo Reg (value_regs_get x 0)) - (x_hi Reg (value_regs_get x 1)) - (y_lo Reg (value_regs_get y 0)) - (y_hi Reg (value_regs_get y 1)) - (top_cmp XReg (lower_int_compare (int_compare cc x_hi y_hi))) - (bottom_cmp XReg (rv_sltu x_lo y_lo))) - (gen_select_xreg (cmp_eqz (rv_xor x_hi y_hi)) bottom_cmp top_cmp))) - -;; vector icmp comparisons - -(rule 30 (lower (icmp cc x @ (value_type (ty_supported_vec ty)) y)) - (gen_expand_mask ty (gen_icmp_mask ty cc x y))) - -;;;;; Rules for `fcmp`;;;;;;;;; -(rule 0 (lower (fcmp cc x @ (value_type (ty_supported_float ty)) y)) - (lower_float_compare (fcmp_to_float_compare cc ty x y))) - -(decl lower_float_compare (FloatCompare) XReg) -(rule (lower_float_compare (FloatCompare.One r)) r) -(rule (lower_float_compare (FloatCompare.Zero r)) (rv_seqz r)) - -(rule 1 (lower (fcmp cc x @ (value_type (ty_supported_vec ty)) y)) - (gen_expand_mask ty (gen_fcmp_mask ty cc x y))) - -;;;;; Rules for `func_addr`;;;;;;;;; -(rule - (lower (func_addr (func_ref_data _ name _))) - (load_ext_name name 0)) - -;;;;; Rules for `fcvt_to_uint`;;;;;;;;; - -;; RISC-V float-to-integer conversion does not trap, but Cranelift semantics are -;; to trap. This manually performs checks for NaN and out-of-bounds values and -;; traps in such cases. -;; -;; TODO: could this perhaps be more optimal through inspection of the `fcsr`? -;; Unsure whether that needs to be preserved across function calls and/or would -;; cause other problems. Also unsure whether it's actually more performant. -(rule (lower (has_type ity (fcvt_to_uint v @ (value_type fty)))) - (let ((_ InstOutput (gen_trapz (rv_feq fty v v) (TrapCode.BadConversionToInteger))) - (min FReg (imm fty (fcvt_umin_bound fty $false))) - (_ InstOutput (gen_trapnz (rv_fle fty v min) (TrapCode.IntegerOverflow))) - (max FReg (imm fty (fcvt_umax_bound fty ity $false))) - (_ InstOutput (gen_trapnz (rv_fge fty v max) (TrapCode.IntegerOverflow)))) - (lower_inbounds_fcvt_to_uint ity fty v))) - -(decl lower_inbounds_fcvt_to_uint (Type Type FReg) XReg) -(rule 0 (lower_inbounds_fcvt_to_uint (fits_in_32 _) fty v) - (rv_fcvtwu fty (FRM.RTZ) v)) -(rule 1 (lower_inbounds_fcvt_to_uint $I64 fty v) - (rv_fcvtlu fty (FRM.RTZ) v)) - -;;;;; Rules for `fcvt_to_sint`;;;;;;;;; - -;; NB: see above with `fcvt_to_uint` as this is similar -(rule (lower (has_type ity (fcvt_to_sint v @ (value_type fty)))) - (let ((_ InstOutput (gen_trapz (rv_feq fty v v) (TrapCode.BadConversionToInteger))) - (min FReg (imm fty (fcvt_smin_bound fty ity $false))) - (_ InstOutput (gen_trapnz (rv_fle fty v min) (TrapCode.IntegerOverflow))) - (max FReg (imm fty (fcvt_smax_bound fty ity $false))) - (_ InstOutput (gen_trapnz (rv_fge fty v max) (TrapCode.IntegerOverflow)))) - (lower_inbounds_fcvt_to_sint ity fty v))) - -(decl lower_inbounds_fcvt_to_sint (Type Type FReg) XReg) -(rule 0 (lower_inbounds_fcvt_to_sint (fits_in_32 _) fty v) - (rv_fcvtw fty (FRM.RTZ) v)) -(rule 1 (lower_inbounds_fcvt_to_sint $I64 fty v) - (rv_fcvtl fty (FRM.RTZ) v)) - -;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;; - -(rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_supported_float from))))) - (handle_fcvt_to_int_nan from v (lower_fcvt_to_sint_sat from to v))) - -;; Lowers to a `rv_fcvt*` instruction but handles 8/16-bit cases where the -;; float is clamped before the conversion. -(decl lower_fcvt_to_sint_sat (Type Type FReg) XReg) -(rule 0 (lower_fcvt_to_sint_sat ty (fits_in_16 out_ty) v) - (let ((max FReg (imm ty (fcvt_smax_bound ty out_ty $true))) - (min FReg (imm ty (fcvt_smin_bound ty out_ty $true))) - (clamped FReg (rv_fmin ty max (rv_fmax ty min v)))) - (rv_fcvtw ty (FRM.RTZ) clamped))) -(rule 1 (lower_fcvt_to_sint_sat ty $I32 v) (rv_fcvtw ty (FRM.RTZ) v)) -(rule 1 (lower_fcvt_to_sint_sat ty $I64 v) (rv_fcvtl ty (FRM.RTZ) v)) - -(decl fcvt_smax_bound (Type Type bool) u64) -(extern constructor fcvt_smax_bound fcvt_smax_bound) -(decl fcvt_smin_bound (Type Type bool) u64) -(extern constructor fcvt_smin_bound fcvt_smin_bound) - -;; RISC-V float-to-int conversions generate the same output for NaN and +Inf, -;; but Cranelift semantics are to produce 0 for NaN instead. This helper -;; translates these semantics by taking the float being converted (with the type -;; specified) and the native RISC-V output as an `XReg`. The returned `XReg` -;; will be zeroed out if the float is NaN. -;; -;; This is done by comparing the float to itself, generating 0 if it's NaN. This -;; bit is then negated to become either all-ones or all-zeros which is then -;; and-ed against the native output. That'll produce all zeros if the input is -;; NaN or the native output otherwise. -(decl handle_fcvt_to_int_nan (Type FReg XReg) XReg) -(rule (handle_fcvt_to_int_nan ty freg xreg) - (let ((is_not_nan XReg (rv_feq ty freg freg)) - (not_nan_mask XReg (rv_neg is_not_nan))) - (rv_and xreg not_nan_mask))) - -(rule 1 (lower (has_type (ty_supported_vec _) (fcvt_to_sint_sat v @ (value_type from_ty)))) - (if-let zero (i8_to_imm5 0)) - (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty)) - (cvt VReg (rv_vfcvt_rtz_x_f_v v (unmasked) from_ty))) - (rv_vmerge_vim cvt zero is_nan from_ty))) - -;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;; - -(rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_supported_float from))))) - (handle_fcvt_to_int_nan from v (lower_fcvt_to_uint_sat from to v))) - -;; Lowers to a `rv_fcvt*` instruction but handles 8/16-bit cases where the -;; float is clamped before the conversion. -(decl lower_fcvt_to_uint_sat (Type Type FReg) XReg) -(rule 0 (lower_fcvt_to_uint_sat ty (fits_in_16 out_ty) v) - (let ((max FReg (imm ty (fcvt_umax_bound ty out_ty $true))) - (min FReg (rv_fmvdx (zero_reg))) - (clamped FReg (rv_fmin ty max (rv_fmax ty min v)))) - (rv_fcvtwu ty (FRM.RTZ) clamped))) -(rule 1 (lower_fcvt_to_uint_sat ty $I32 v) (rv_fcvtwu ty (FRM.RTZ) v)) -(rule 1 (lower_fcvt_to_uint_sat ty $I64 v) (rv_fcvtlu ty (FRM.RTZ) v)) - -(decl fcvt_umax_bound (Type Type bool) u64) -(extern constructor fcvt_umax_bound fcvt_umax_bound) -(decl fcvt_umin_bound (Type bool) u64) -(extern constructor fcvt_umin_bound fcvt_umin_bound) - -(rule 1 (lower (has_type (ty_supported_vec _) (fcvt_to_uint_sat v @ (value_type from_ty)))) - (if-let zero (i8_to_imm5 0)) - (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty)) - (cvt VReg (rv_vfcvt_rtz_xu_f_v v (unmasked) from_ty))) - (rv_vmerge_vim cvt zero is_nan from_ty))) - -;;;;; Rules for `fcvt_from_sint`;;;;;;;;; -(rule 0 (lower (has_type $F32 (fcvt_from_sint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtsl (FRM.RNE) (sext v))) - -(rule 1 (lower (has_type $F32 (fcvt_from_sint v @ (value_type $I32)))) - (rv_fcvtsw (FRM.RNE) v)) - -(rule 1 (lower (has_type $F32 (fcvt_from_sint v @ (value_type $I64)))) - (rv_fcvtsl (FRM.RNE) v)) - -(rule 0 (lower (has_type $F64 (fcvt_from_sint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtdl (FRM.RNE) (sext v))) - -(rule 1 (lower (has_type $F64 (fcvt_from_sint v @ (value_type $I32)))) - (rv_fcvtdw v)) - -(rule 1 (lower (has_type $F64 (fcvt_from_sint v @ (value_type $I64)))) - (rv_fcvtdl (FRM.RNE) v)) - -(rule 2 (lower (has_type (ty_supported_vec _) (fcvt_from_sint v @ (value_type from_ty)))) - (rv_vfcvt_f_x_v v (unmasked) from_ty)) - -;;;;; Rules for `fcvt_from_uint`;;;;;;;;; -(rule 0 (lower (has_type $F32 (fcvt_from_uint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtslu (FRM.RNE) (zext v))) - -(rule 1 (lower (has_type $F32 (fcvt_from_uint v @ (value_type $I32)))) - (rv_fcvtswu (FRM.RNE) v)) - -(rule 1 (lower (has_type $F32 (fcvt_from_uint v @ (value_type $I64)))) - (rv_fcvtslu (FRM.RNE) v)) - -(rule 0 (lower (has_type $F64 (fcvt_from_uint v @ (value_type (fits_in_16 ty))))) - (rv_fcvtdlu (FRM.RNE) (zext v))) - -(rule 1 (lower (has_type $F64 (fcvt_from_uint v @ (value_type $I32)))) - (rv_fcvtdwu v)) - -(rule 1 (lower (has_type $F64 (fcvt_from_uint v @ (value_type $I64)))) - (rv_fcvtdlu (FRM.RNE) v)) - -(rule 2 (lower (has_type (ty_supported_vec _) (fcvt_from_uint v @ (value_type from_ty)))) - (rv_vfcvt_f_xu_v v (unmasked) from_ty)) - -;;;;; Rules for `symbol_value`;;;;;;;;; -(rule - (lower (symbol_value (symbol_value_data name _ offset))) - (load_ext_name name offset)) - -;;;;; Rules for `tls_value` ;;;;;;;;;;;;;; - -(rule (lower (has_type (tls_model (TlsModel.ElfGd)) (tls_value (symbol_value_data name _ _)))) - (elf_tls_get_addr name)) - -;;;;; Rules for `bitcast`;;;;;;;;; - -;; These rules should probably be handled in `gen_bitcast`, but it's convenient to have that return -;; a single register, instead of a `ValueRegs` -(rule 3 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_supported_vec _))))) - (value_regs - (gen_extractlane $I64X2 v 0) - (gen_extractlane $I64X2 v 1))) - -;; Move the high half into a vector register, and then use vslide1up to move it up and -;; insert the lower half in one instruction. -(rule 2 (lower (has_type (ty_supported_vec _) (bitcast _ v @ (value_type $I128)))) - (let ((lo XReg (value_regs_get v 0)) - (hi XReg (value_regs_get v 1)) - (vstate VState (vstate_from_type $I64X2)) - (vec VReg (rv_vmv_sx hi vstate))) - (rv_vslide1up_vx vec vec lo (unmasked) vstate))) - -;; `gen_bitcast` below only works with single register values, so handle I128 -;; specially here. -(rule 1 (lower (has_type $I128 (bitcast _ v @ (value_type $I128)))) - v) - -(rule 0 (lower (has_type out_ty (bitcast _ v @ (value_type in_ty)))) - (gen_bitcast v in_ty out_ty)) - -;;;;; Rules for `ceil`;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (ceil x))) - (gen_float_round (FRM.RUP) x ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (ceil x))) - (gen_vec_round x (FRM.RUP) ty)) - -;;;;; Rules for `floor`;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (floor x))) - (gen_float_round (FRM.RDN) x ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (floor x))) - (gen_vec_round x (FRM.RDN) ty)) - -;;;;; Rules for `trunc`;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (trunc x))) - (gen_float_round (FRM.RTZ) x ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (trunc x))) - (gen_vec_round x (FRM.RTZ) ty)) - -;;;;; Rules for `nearest`;;;;;;;;; -(rule 0 (lower (has_type (ty_supported_float ty) (nearest x))) - (gen_float_round (FRM.RNE) x ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (nearest x))) - (gen_vec_round x (FRM.RNE) ty)) - - -;;;;; Rules for `select_spectre_guard`;;;;;;;;; - -;; SelectSpectreGuard is equivalent to Select, but we should not use a branch based -;; lowering for it. Instead we use a conditional move based lowering. -;; -;; We don't have cmov's in RISC-V either, but we can emulate those using bitwise -;; operations, which is what we do below. - -;; Base case: use `gen_bmask` to generate a 0 mask or -1 mask from the value of -;; `cmp`. This is then used with some bit twiddling to produce the final result. -(rule 0 (lower (has_type (fits_in_64 _) (select_spectre_guard cmp x y))) - (let ((mask XReg (gen_bmask cmp))) - (rv_or (rv_and mask x) (rv_andn y mask)))) -(rule 1 (lower (has_type $I128 (select_spectre_guard cmp x y))) - (let ((mask XReg (gen_bmask cmp))) - (value_regs - (rv_or (rv_and mask (value_regs_get x 0)) (rv_andn (value_regs_get y 0) mask)) - (rv_or (rv_and mask (value_regs_get x 1)) (rv_andn (value_regs_get y 1) mask))))) - -;; Special case when an argument is the constant zero as some ands and ors -;; can be folded away. -(rule 2 (lower (has_type (fits_in_64 _) (select_spectre_guard cmp (i64_from_iconst 0) y))) - (rv_andn y (gen_bmask cmp))) -(rule 3 (lower (has_type (fits_in_64 _) (select_spectre_guard cmp x (i64_from_iconst 0)))) - (rv_and x (gen_bmask cmp))) - -;;;;; Rules for `bmask`;;;;;;;;; -(rule - (lower (has_type oty (bmask x))) - (lower_bmask x oty)) - -;; N.B.: the Ret itself is generated by the ABI. -(rule (lower (return args)) - (lower_return args)) - -;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;; - -(rule (lower (get_frame_pointer)) - (gen_mov_from_preg (fp_reg))) - -(rule (lower (get_stack_pointer)) - (gen_mov_from_preg (sp_reg))) - -(rule (lower (get_return_address)) - (load_ra)) - -;;; Rules for `iabs` ;;;;;;;;;;;;; - -;; I64 and lower -;; Generate the following code: -;; sext.{b,h,w} a0, a0 -;; neg a1, a0 -;; max a0, a0, a1 -(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x))) - (let ((extended XReg (sext x)) - (negated XReg (rv_neg extended))) - (gen_select_xreg (cmp_gt extended negated) extended negated))) - -;; For vectors we generate the same code, but with vector instructions -;; we can skip the sign extension, since the vector unit will only process -;; Element Sized chunks. -(rule 1 (lower (has_type (ty_supported_vec ty) (iabs x))) - (let ((negated VReg (rv_vneg_v x (unmasked) ty))) - (rv_vmax_vv x negated (unmasked) ty))) - -;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (call (func_ref_data sig_ref extname dist) inputs)) - (gen_call sig_ref extname dist inputs)) - -(rule (lower (call_indirect sig_ref val inputs)) - (gen_call_indirect sig_ref val inputs)) - -;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (return_call (func_ref_data sig_ref extname dist) args)) - (gen_return_call sig_ref extname dist args)) - -(rule (lower (return_call_indirect sig_ref callee args)) - (gen_return_call_indirect sig_ref callee args)) - - -;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (extractlane x @ (value_type ty) (u8_from_uimm8 idx))) - (gen_extractlane ty x idx)) - -;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; We can insert a lane by using a masked splat from an X register. -;; Build a mask that is only enabled in the lane we want to insert. -;; Then use a masked splat (vmerge) to insert the value. -(rule 0 (lower (insertlane vec @ (value_type (ty_supported_vec ty)) - val @ (value_type (ty_int _)) - (u8_from_uimm8 lane))) - (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) - (rv_vmerge_vxm vec val mask ty))) - -;; Similar to above, but using the float variants of the instructions. -(rule 1 (lower (insertlane vec @ (value_type (ty_supported_vec ty)) - val @ (value_type (ty_supported_float _)) - (u8_from_uimm8 lane))) - (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) - (rv_vfmerge_vfm vec val mask ty))) - -;; If we are inserting from an Imm5 const we can use the immediate -;; variant of vmerge. -(rule 2 (lower (insertlane vec @ (value_type (ty_supported_vec ty)) - (i64_from_iconst (imm5_from_i64 imm)) - (u8_from_uimm8 lane))) - (let ((mask VReg (gen_vec_mask (u64_shl 1 lane)))) - (rv_vmerge_vim vec imm mask ty))) - -;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type ty (splat n @ (value_type (ty_supported_float _))))) - (rv_vfmv_vf n ty)) - -(rule 1 (lower (has_type ty (splat n @ (value_type (ty_int_ref_scalar_64 _))))) - (rv_vmv_vx n ty)) - -(rule 2 (lower (has_type ty (splat (iconst (u64_from_imm64 (imm5_from_u64 imm)))))) - (rv_vmv_vi imm ty)) - -;; TODO: We can splat out more patterns by using for example a vmv.v.i i8x16 for -;; a i64x2 const with a compatible bit pattern. The AArch64 Backend does something -;; similar in its splat rules. -;; TODO: Look through bitcasts when splatting out registers. We can use -;; `vmv.v.x` in a `(splat.f32x4 (bitcast.f32 val))`. And vice versa for integers. - -;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (uadd_sat x y))) - (rv_vsaddu_vv x y (unmasked) ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (uadd_sat x (splat y)))) - (rv_vsaddu_vx x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (uadd_sat (splat x) y))) - (rv_vsaddu_vx y x (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (uadd_sat x y))) - (if-let y_imm (replicated_imm5 y)) - (rv_vsaddu_vi x y_imm (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (uadd_sat x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vsaddu_vi y x_imm (unmasked) ty)) - -;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (sadd_sat x y))) - (rv_vsadd_vv x y (unmasked) ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (sadd_sat x (splat y)))) - (rv_vsadd_vx x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (sadd_sat (splat x) y))) - (rv_vsadd_vx y x (unmasked) ty)) - -(rule 3 (lower (has_type (ty_supported_vec ty) (sadd_sat x y))) - (if-let y_imm (replicated_imm5 y)) - (rv_vsadd_vi x y_imm (unmasked) ty)) - -(rule 4 (lower (has_type (ty_supported_vec ty) (sadd_sat x y))) - (if-let x_imm (replicated_imm5 x)) - (rv_vsadd_vi y x_imm (unmasked) ty)) - -;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (usub_sat x y))) - (rv_vssubu_vv x y (unmasked) ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (usub_sat x (splat y)))) - (rv_vssubu_vx x y (unmasked) ty)) - -;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (ssub_sat x y))) - (rv_vssub_vv x y (unmasked) ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (ssub_sat x (splat y)))) - (rv_vssub_vx x y (unmasked) ty)) - -;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any -;; lane in the vector. The fixed input to the reduce operation is a 1. -;; This way, if any lane is 0, the result will be 0. Otherwise, the result will -;; be a 1. -;; The reduce operation leaves the result in the lowest lane, we then move it -;; into the destination X register. -(rule (lower (vall_true x @ (value_type (ty_supported_vec ty)))) - (if-let one (i8_to_imm5 1)) - ;; We don't need to broadcast the immediate into all lanes, only into lane 0. - ;; I did it this way since it uses one less instruction than with a vmv.s.x. - (let ((fixed VReg (rv_vmv_vi one ty)) - (min VReg (rv_vredminu_vs x fixed (unmasked) ty))) - (rv_vmv_xs min ty))) - - -;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the -;; input vector register. Move the max to an X register, and do a `snez` on it -;; to ensure its either 1 or 0. -(rule (lower (vany_true x @ (value_type (ty_supported_vec ty)))) - (let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty)) - (x_max XReg (rv_vmv_xs max ty))) - (rv_snez x_max))) - - -;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets -;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then -;; just move that mask to an X Register. -;; -;; We must ensure that the move to the X register has a SEW with enough bits -;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going -;; to read some tail bits. These are undefined, so we need to further mask them -;; off. -(rule (lower (vhigh_bits x @ (value_type (ty_supported_vec ty)))) - (let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty)) - ;; Here we only need I64X1, but emit an AVL of 2 since it - ;; saves one vector state change in the case of I64X2. - ;; - ;; TODO: For types that have more lanes than element bits, we can - ;; use the original type as a VState and avoid a state change. - (x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2)))) - (gen_andi x_mask (ty_lane_mask ty)))) - -;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (swizzle x y))) - (rv_vrgather_vv x y (unmasked) ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (swizzle x (splat y)))) - (rv_vrgather_vx x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (swizzle x y))) - (if-let y_imm (replicated_uimm5 y)) - (rv_vrgather_vi x y_imm (unmasked) ty)) - -;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Use a vrgather to load all 0-15 lanes from x. And then modify the mask to load all -;; 16-31 lanes from y. Finally, use a vor to combine the two vectors. -;; -;; vrgather will insert a 0 for lanes that are out of bounds, so we can let it load -;; negative and out of bounds indexes. -(rule (lower (has_type (ty_supported_vec ty @ $I8X16) (shuffle x y (vconst_from_immediate mask)))) - (if-let neg16 (i8_to_imm5 -16)) - (let ((x_mask VReg (gen_constant ty mask)) - (x_lanes VReg (rv_vrgather_vv x x_mask (unmasked) ty)) - (y_mask VReg (rv_vadd_vi x_mask neg16 (unmasked) ty)) - (y_lanes VReg (rv_vrgather_vv y y_mask (unmasked) ty))) - (rv_vor_vv x_lanes y_lanes (unmasked) ty))) - -;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Slide down half the vector, and do a signed extension. -(rule 0 (lower (has_type (ty_supported_vec out_ty) (swiden_high x @ (value_type in_ty)))) - (rv_vsext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty)) - -(rule 1 (lower (has_type (ty_supported_vec out_ty) (swiden_high (swiden_high x @ (value_type in_ty))))) - (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) - (rv_vsext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) - -(rule 2 (lower (has_type (ty_supported_vec out_ty) (swiden_high (swiden_high (swiden_high x @ (value_type in_ty)))))) - (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) - (rv_vsext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) - -;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Slide down half the vector, and do a zero extension. -(rule 0 (lower (has_type (ty_supported_vec out_ty) (uwiden_high x @ (value_type in_ty)))) - (rv_vzext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty)) - -(rule 1 (lower (has_type (ty_supported_vec out_ty) (uwiden_high (uwiden_high x @ (value_type in_ty))))) - (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) - (rv_vzext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) - -(rule 2 (lower (has_type (ty_supported_vec out_ty) (uwiden_high (uwiden_high (uwiden_high x @ (value_type in_ty)))))) - (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty))) - (rv_vzext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty)) - -;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec out_ty) (swiden_low x))) - (rv_vsext_vf2 x (unmasked) out_ty)) - -(rule 1 (lower (has_type (ty_supported_vec out_ty) (swiden_low (swiden_low x)))) - (rv_vsext_vf4 x (unmasked) out_ty)) - -(rule 2 (lower (has_type (ty_supported_vec out_ty) (swiden_low (swiden_low (swiden_low x))))) - (rv_vsext_vf8 x (unmasked) out_ty)) - -;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec out_ty) (uwiden_low x))) - (rv_vzext_vf2 x (unmasked) out_ty)) - -(rule 1 (lower (has_type (ty_supported_vec out_ty) (uwiden_low (uwiden_low x)))) - (rv_vzext_vf4 x (unmasked) out_ty)) - -(rule 2 (lower (has_type (ty_supported_vec out_ty) (uwiden_low (uwiden_low (uwiden_low x))))) - (rv_vzext_vf8 x (unmasked) out_ty)) - -;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; We don't have a dedicated instruction for this, rearrange the register elements -;; and use a vadd. -;; -;; We do this by building two masks, one for the even elements and one for the odd -;; elements. Using vcompress we can extract the elements and group them together. -;; -;; This is likely not the optimal way of doing this. LLVM does this using a bunch -;; of vrgathers (See: https://godbolt.org/z/jq8Wj8WG4), that doesn't seem to be -;; too much better than this. -;; -;; However V8 does something better. They use 2 vcompresses using LMUL2, that means -;; that they can do the whole thing in 3 instructions (2 vcompress + vadd). We don't -;; support LMUL > 1, so we can't do that. -(rule (lower (has_type (ty_supported_vec ty) (iadd_pairwise x y))) - (if-let half_size (u64_to_uimm5 (u64_udiv (ty_lane_count ty) 2))) - (let ((odd_mask VReg (gen_vec_mask 0x5555555555555555)) - (lhs_lo VReg (rv_vcompress_vm x odd_mask ty)) - (lhs_hi VReg (rv_vcompress_vm y odd_mask ty)) - (lhs VReg (rv_vslideup_vvi lhs_lo lhs_hi half_size (unmasked) ty)) - - (even_mask VReg (gen_vec_mask 0xAAAAAAAAAAAAAAAA)) - (rhs_lo VReg (rv_vcompress_vm x even_mask ty)) - (rhs_hi VReg (rv_vcompress_vm y even_mask ty)) - (rhs VReg (rv_vslideup_vvi rhs_lo rhs_hi half_size (unmasked) ty))) - (rv_vadd_vv lhs rhs (unmasked) ty))) - -;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; `avg_round` computes the unsigned average with rounding: a := (x + y + 1) // 2 -;; -;; See Section "2–5 Average of Two Integers" of the Hacker's Delight book -;; -;; The floor average of two integers without overflow can be computed as: -;; t = (x & y) + ((x ^ y) >> 1) -;; -;; The right shift should be a logical shift if the integers are unsigned. -;; -;; We are however interested in the ceiling average (x + y + 1). For that -;; we use a special rounding mode in the right shift instruction. -;; -;; For the right shift instruction we use `vssrl` which is a Scaling Shift -;; Right Logical instruction using the `vxrm` fixed-point rounding mode. The -;; default rounding mode is `rnu` (round-to-nearest-up (add +0.5 LSB)). -;; Which is coincidentally the rounding mode we want for `avg_round`. -(rule (lower (has_type (ty_supported_vec ty) (avg_round x y))) - (if-let one (u64_to_uimm5 1)) - (let ((lhs VReg (rv_vand_vv x y (unmasked) ty)) - (xor VReg (rv_vxor_vv x y (unmasked) ty)) - (rhs VReg (rv_vssrl_vi xor one (unmasked) ty))) - (rv_vadd_vv lhs rhs (unmasked) ty))) - -;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (scalar_to_vector x))) - (if (ty_vector_float ty)) - (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) - (elem VReg (rv_vfmv_sf x ty)) - (mask VReg (gen_vec_mask 1))) - (rv_vmerge_vvm zero elem mask ty))) - -(rule 1 (lower (has_type (ty_supported_vec ty) (scalar_to_vector x))) - (if (ty_vector_not_float ty)) - (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) - (mask VReg (gen_vec_mask 1))) - (rv_vmerge_vxm zero x mask ty))) - -(rule 2 (lower (has_type (ty_supported_vec ty) (scalar_to_vector (imm5_from_value x)))) - (let ((zero VReg (rv_vmv_vx (zero_reg) ty)) - (mask VReg (gen_vec_mask 1))) - (rv_vmerge_vim zero x mask ty))) - -;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule 0 (lower (has_type (ty_supported_vec ty) (sqmul_round_sat x y))) - (rv_vsmul_vv x y (unmasked) ty)) - -(rule 1 (lower (has_type (ty_supported_vec ty) (sqmul_round_sat x (splat y)))) - (rv_vsmul_vx x y (unmasked) ty)) - -(rule 2 (lower (has_type (ty_supported_vec ty) (sqmul_round_sat (splat x) y))) - (rv_vsmul_vx y x (unmasked) ty)) - -;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type (ty_supported_vec out_ty) (snarrow x @ (value_type in_ty) y))) - (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) - (if-let zero (u64_to_uimm5 0)) - (let ((x_clip VReg (rv_vnclip_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) - (y_clip VReg (rv_vnclip_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) - (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) - -;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(rule (lower (has_type (ty_supported_vec out_ty) (uunarrow x @ (value_type in_ty) y))) - (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) - (if-let zero (u64_to_uimm5 0)) - (let ((x_clip VReg (rv_vnclipu_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) - (y_clip VReg (rv_vnclipu_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) - (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) - -;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; We don't have a instruction that saturates a signed source into an unsigned destination. -;; To correct for this we just remove negative values using `vmax` and then use the normal -;; unsigned to unsigned narrowing instruction. - -(rule (lower (has_type (ty_supported_vec out_ty) (unarrow x @ (value_type in_ty) y))) - (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2))) - (if-let zero (u64_to_uimm5 0)) - (let ((x_pos VReg (rv_vmax_vx x (zero_reg) (unmasked) in_ty)) - (y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty)) - (x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))) - (y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))) - (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty))) diff --git a/hbcb/src/lower.rs b/hbcb/src/lower.rs deleted file mode 100644 index 6e12183..0000000 --- a/hbcb/src/lower.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! Lowering rules for Riscv64. -use { - crate::{inst::*, Riscv64Backend}, - cranelift_codegen::{ - ir::Inst as IRInst, - machinst::{lower::*, *}, - }, -}; -pub mod isle; - -//============================================================================= -// Lowering-backend trait implementation. - -impl LowerBackend for Riscv64Backend { - type FactFlowState = (); - type MInst = Inst; - - fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> Option { - isle::lower(ctx, self, ir_inst) - } - - fn lower_branch( - &self, - ctx: &mut Lower, - ir_inst: IRInst, - targets: &[MachLabel], - ) -> Option<()> { - isle::lower_branch(ctx, self, ir_inst, targets) - } - - fn maybe_pinned_reg(&self) -> Option { - // pinned register is a register that you want put anything in it. - // right now riscv64 not support this feature. - None - } -} diff --git a/hbcb/src/lower/isle.rs b/hbcb/src/lower/isle.rs deleted file mode 100644 index 293e082..0000000 --- a/hbcb/src/lower/isle.rs +++ /dev/null @@ -1,733 +0,0 @@ -//! ISLE integration glue code for riscv64 lowering. - -// Pull in the ISLE generated code. -#[allow(unused)] -pub mod generated_code; -// Types that the generated ISLE code uses via `use super::*`. -use { - self::generated_code::{FpuOPWidth, VecAluOpRR, VecLmul}, - crate::{ - abi::Riscv64ABICallSite, - inst::*, - lower::args::{FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg}, - Riscv64Backend, - }, - cranelift_codegen::{ - ir::{ - immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData, - MemFlags, Opcode, TrapCode, Value, ValueList, - }, - isa::{self}, - machinst::{ - self, isle::*, ArgPair, CallInfo, InstOutput, IsTailCall, MachInst, Reg, VCodeConstant, - VCodeConstantData, - }, - }, - generated_code::MInst, - regalloc2::PReg, - std::{boxed::Box, vec::Vec}, -}; - -type BoxCallInfo = Box>; -type BoxCallIndInfo = Box>; -type BoxReturnCallInfo = Box>; -type BoxReturnCallIndInfo = Box>; -type BoxExternalName = Box; -type VecMachLabel = Vec; -type VecArgPair = Vec; - -pub(crate) struct RV64IsleContext<'a, 'b, I, B> -where - I: VCodeInst, - B: LowerBackend, -{ - pub lower_ctx: &'a mut Lower<'b, I>, - pub backend: &'a B, - /// Precalucated value for the minimum vector register size. Will be 0 if - /// vectors are not supported. - min_vec_reg_size: u64, -} - -impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> { - fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self { - Self { lower_ctx, backend, min_vec_reg_size: backend.isa_flags.min_vec_reg_size() } - } -} - -impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> { - cranelift_codegen::isle_lower_prelude_methods!(); - - cranelift_codegen::isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite); - - fn gen_return_call( - &mut self, - callee_sig: SigRef, - callee: ExternalName, - distance: RelocDistance, - args: ValueSlice, - ) -> InstOutput { - let caller_conv = isa::CallConv::Tail; - debug_assert_eq!( - self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), - caller_conv, - "Can only do `return_call`s from within a `tail` calling convention function" - ); - - let call_site = Riscv64ABICallSite::from_func( - self.lower_ctx.sigs(), - callee_sig, - &callee, - IsTailCall::Yes, - distance, - caller_conv, - self.backend.flags().clone(), - ); - call_site.emit_return_call(self.lower_ctx, args); - - InstOutput::new() - } - - fn gen_return_call_indirect( - &mut self, - callee_sig: SigRef, - callee: Value, - args: ValueSlice, - ) -> InstOutput { - let caller_conv = isa::CallConv::Tail; - debug_assert_eq!( - self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), - caller_conv, - "Can only do `return_call`s from within a `tail` calling convention function" - ); - - let callee = self.put_in_reg(callee); - - let call_site = Riscv64ABICallSite::from_ptr( - self.lower_ctx.sigs(), - callee_sig, - callee, - IsTailCall::Yes, - caller_conv, - self.backend.flags().clone(), - ); - call_site.emit_return_call(self.lower_ctx, args); - - InstOutput::new() - } - - fn fpu_op_width_from_ty(&mut self, ty: Type) -> FpuOPWidth { - match ty { - F16 => FpuOPWidth::H, - F32 => FpuOPWidth::S, - F64 => FpuOPWidth::D, - F128 => FpuOPWidth::Q, - _ => unimplemented!("Unimplemented FPU Op Width: {ty}"), - } - } - - fn vreg_new(&mut self, r: Reg) -> VReg { - VReg::new(r).unwrap() - } - - fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg { - r.map(|wr| VReg::new(wr).unwrap()) - } - - fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg { - arg0.to_reg() - } - - fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg { - arg0.map(|vr| vr.to_reg()) - } - - fn vreg_to_reg(&mut self, arg0: VReg) -> Reg { - *arg0 - } - - fn xreg_new(&mut self, r: Reg) -> XReg { - XReg::new(r).unwrap() - } - - fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg { - r.map(|wr| XReg::new(wr).unwrap()) - } - - fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg { - arg0.to_reg() - } - - fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg { - arg0.map(|xr| xr.to_reg()) - } - - fn xreg_to_reg(&mut self, arg0: XReg) -> Reg { - *arg0 - } - - fn freg_new(&mut self, r: Reg) -> FReg { - FReg::new(r).unwrap() - } - - fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg { - r.map(|wr| FReg::new(wr).unwrap()) - } - - fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg { - arg0.to_reg() - } - - fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg { - arg0.map(|fr| fr.to_reg()) - } - - fn freg_to_reg(&mut self, arg0: FReg) -> Reg { - *arg0 - } - - fn min_vec_reg_size(&mut self) -> u64 { - self.min_vec_reg_size - } - - #[inline] - fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option { - if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() { - Some(ty) - } else { - None - } - } - - fn ty_supported(&mut self, ty: Type) -> Option { - let lane_type = ty.lane_type(); - let supported = match ty { - // Scalar integers are always supported - ty if ty.is_int() => true, - // Floating point types depend on certain extensions - F16 => self.backend.isa_flags.has_zfh(), - // F32 depends on the F extension - F32 => self.backend.isa_flags.has_f(), - // F64 depends on the D extension - F64 => self.backend.isa_flags.has_d(), - - // The base vector extension supports all integer types, up to 64 bits - // as long as they fit in a register - ty if self.ty_vec_fits_in_register(ty).is_some() - && lane_type.is_int() - && lane_type.bits() <= 64 => - { - true - } - - // If the vector type has floating point lanes then the spec states: - // - // Vector instructions where any floating-point vector operand’s EEW is not a - // supported floating-point type width (which includes when FLEN < SEW) are reserved. - // - // So we also have to check if we support the scalar version of the type. - ty if self.ty_vec_fits_in_register(ty).is_some() - && lane_type.is_float() - && self.ty_supported(lane_type).is_some() - // Additionally the base V spec only supports 32 and 64 bit floating point types. - && (lane_type.bits() == 32 || lane_type.bits() == 64) => - { - true - } - - // Otherwise do not match - _ => false, - }; - - if supported { - Some(ty) - } else { - None - } - } - - fn ty_supported_float(&mut self, ty: Type) -> Option { - self.ty_supported(ty).filter(|ty| ty.is_float()) - } - - fn ty_supported_vec(&mut self, ty: Type) -> Option { - self.ty_supported(ty).filter(|ty| ty.is_vector()) - } - - fn load_ra(&mut self) -> Reg { - if self.backend.flags.preserve_frame_pointers() { - let tmp = self.temp_writable_reg(I64); - self.emit(&MInst::Load { - rd: tmp, - op: LoadOP::Ld, - flags: MemFlags::trusted(), - from: AMode::FPOffset(8), - }); - tmp.to_reg() - } else { - link_reg() - } - } - - fn label_to_br_target(&mut self, label: MachLabel) -> CondBrTarget { - CondBrTarget::Label(label) - } - - fn imm12_and(&mut self, imm: Imm12, x: u64) -> Imm12 { - Imm12::from_i16(imm.as_i16() & (x as i16)) - } - - fn fli_constant_from_u64(&mut self, ty: Type, imm: u64) -> Option { - FliConstant::maybe_from_u64(ty, imm) - } - - fn fli_constant_from_negated_u64(&mut self, ty: Type, imm: u64) -> Option { - let negated_imm = match ty { - F64 => imm ^ 0x8000000000000000, - F32 => imm ^ 0x80000000, - _ => unimplemented!(), - }; - - FliConstant::maybe_from_u64(ty, negated_imm) - } - - fn i64_generate_imm(&mut self, imm: i64) -> Option<(Imm20, Imm12)> { - MInst::generate_imm(imm as u64) - } - - fn i64_shift_for_lui(&mut self, imm: i64) -> Option<(u64, Imm12)> { - let trailing = imm.trailing_zeros(); - if trailing < 12 { - return None; - } - - let shift = Imm12::from_i16(trailing as i16 - 12); - let base = (imm as u64) >> trailing; - Some((base, shift)) - } - - fn i64_shift(&mut self, imm: i64) -> Option<(i64, Imm12)> { - let trailing = imm.trailing_zeros(); - // We can do without this condition but in this case there is no need to go further - if trailing == 0 { - return None; - } - - let shift = Imm12::from_i16(trailing as i16); - let base = imm >> trailing; - Some((base, shift)) - } - - #[inline] - fn emit(&mut self, arg0: &MInst) -> Unit { - self.lower_ctx.emit(arg0.clone()); - } - - #[inline] - fn imm12_from_u64(&mut self, arg0: u64) -> Option { - Imm12::maybe_from_u64(arg0) - } - - #[inline] - fn imm12_from_i64(&mut self, arg0: i64) -> Option { - Imm12::maybe_from_i64(arg0) - } - - #[inline] - fn imm12_is_zero(&mut self, imm: Imm12) -> Option<()> { - if imm.as_i16() == 0 { - Some(()) - } else { - None - } - } - - #[inline] - fn imm20_from_u64(&mut self, arg0: u64) -> Option { - Imm20::maybe_from_u64(arg0) - } - - #[inline] - fn imm20_from_i64(&mut self, arg0: i64) -> Option { - Imm20::maybe_from_i64(arg0) - } - - #[inline] - fn imm20_is_zero(&mut self, imm: Imm20) -> Option<()> { - if imm.as_i32() == 0 { - Some(()) - } else { - None - } - } - - #[inline] - fn imm5_from_u64(&mut self, arg0: u64) -> Option { - Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?) - } - - #[inline] - fn imm5_from_i64(&mut self, arg0: i64) -> Option { - Imm5::maybe_from_i8(i8::try_from(arg0).ok()?) - } - - #[inline] - fn i8_to_imm5(&mut self, arg0: i8) -> Option { - Imm5::maybe_from_i8(arg0) - } - - #[inline] - fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 { - Imm5::from_bits(arg0.bits() as u8) - } - - #[inline] - fn uimm5_from_u8(&mut self, arg0: u8) -> Option { - UImm5::maybe_from_u8(arg0) - } - - #[inline] - fn uimm5_from_u64(&mut self, arg0: u64) -> Option { - arg0.try_into().ok().and_then(UImm5::maybe_from_u8) - } - - #[inline] - fn writable_zero_reg(&mut self) -> WritableReg { - writable_zero_reg() - } - - #[inline] - fn zero_reg(&mut self) -> XReg { - XReg::new(zero_reg()).unwrap() - } - - fn is_non_zero_reg(&mut self, reg: XReg) -> Option<()> { - if reg != self.zero_reg() { - Some(()) - } else { - None - } - } - - fn is_zero_reg(&mut self, reg: XReg) -> Option<()> { - if reg == self.zero_reg() { - Some(()) - } else { - None - } - } - - #[inline] - fn imm_from_bits(&mut self, val: u64) -> Imm12 { - Imm12::maybe_from_u64(val).unwrap() - } - - #[inline] - fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 { - Imm12::maybe_from_i64(val).unwrap() - } - - fn frm_bits(&mut self, frm: &FRM) -> UImm5 { - UImm5::maybe_from_u8(frm.bits()).unwrap() - } - - fn u8_as_i32(&mut self, x: u8) -> i32 { - x as i32 - } - - fn imm12_const(&mut self, val: i32) -> Imm12 { - if let Some(res) = Imm12::maybe_from_i64(val as i64) { - res - } else { - panic!("Unable to make an Imm12 value from {val}") - } - } - - fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 { - Imm12::maybe_from_i64((val + add) as i64).unwrap() - } - - fn imm12_add(&mut self, val: Imm12, add: i32) -> Option { - Imm12::maybe_from_i64((i32::from(val.as_i16()) + add).into()) - } - - // - fn gen_shamt(&mut self, ty: Type, shamt: XReg) -> ValueRegs { - let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() }; - let ty_bits = i16::try_from(ty_bits).unwrap(); - let shamt = { - let tmp = self.temp_writable_reg(I64); - self.emit(&MInst::AluRRImm12 { - alu_op: AluOPRRI::Andi, - rd: tmp, - rs: shamt.to_reg(), - imm12: Imm12::from_i16(ty_bits - 1), - }); - tmp.to_reg() - }; - let len_sub_shamt = { - let tmp = self.temp_writable_reg(I64); - self.emit(&MInst::load_imm12(tmp, Imm12::from_i16(ty_bits))); - let len_sub_shamt = self.temp_writable_reg(I64); - self.emit(&MInst::AluRRR { - alu_op: AluOPRRR::Sub, - rd: len_sub_shamt, - rs1: tmp.to_reg(), - rs2: shamt, - }); - len_sub_shamt.to_reg() - }; - ValueRegs::two(shamt, len_sub_shamt) - } - - fn has_v(&mut self) -> bool { - self.backend.isa_flags.has_v() - } - - fn has_m(&mut self) -> bool { - self.backend.isa_flags.has_m() - } - - fn has_zfa(&mut self) -> bool { - self.backend.isa_flags.has_zfa() - } - - fn has_zfh(&mut self) -> bool { - self.backend.isa_flags.has_zfh() - } - - fn has_zbkb(&mut self) -> bool { - self.backend.isa_flags.has_zbkb() - } - - fn has_zba(&mut self) -> bool { - self.backend.isa_flags.has_zba() - } - - fn has_zbb(&mut self) -> bool { - self.backend.isa_flags.has_zbb() - } - - fn has_zbc(&mut self) -> bool { - self.backend.isa_flags.has_zbc() - } - - fn has_zbs(&mut self) -> bool { - self.backend.isa_flags.has_zbs() - } - - fn has_zicond(&mut self) -> bool { - self.backend.isa_flags.has_zicond() - } - - fn gen_reg_offset_amode(&mut self, base: Reg, offset: i64) -> AMode { - AMode::RegOffset(base, offset) - } - - fn gen_sp_offset_amode(&mut self, offset: i64) -> AMode { - AMode::SPOffset(offset) - } - - fn gen_fp_offset_amode(&mut self, offset: i64) -> AMode { - AMode::FPOffset(offset) - } - - fn gen_stack_slot_amode(&mut self, ss: StackSlot, offset: i64) -> AMode { - // Offset from beginning of stackslot area. - let stack_off = self.lower_ctx.abi().sized_stackslot_offsets()[ss] as i64; - let sp_off: i64 = stack_off + offset; - AMode::SlotOffset(sp_off) - } - - fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode { - AMode::Const(c) - } - - fn valid_atomic_transaction(&mut self, ty: Type) -> Option { - if ty.is_int() && ty.bits() <= 64 { - Some(ty) - } else { - None - } - } - - fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> { - let op = *op; - match op { - crate::ir::AtomicRmwOp::Umin => Some((op, false)), - crate::ir::AtomicRmwOp::Umax => Some((op, false)), - crate::ir::AtomicRmwOp::Smin => Some((op, true)), - crate::ir::AtomicRmwOp::Smax => Some((op, true)), - _ => None, - } - } - - fn sinkable_inst(&mut self, val: Value) -> Option { - self.is_sinkable_inst(val) - } - - fn load_op(&mut self, ty: Type) -> LoadOP { - LoadOP::from_type(ty) - } - - fn store_op(&mut self, ty: Type) -> StoreOP { - StoreOP::from_type(ty) - } - - fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg { - let tmp = self.temp_writable_reg(I64); - self.emit(&MInst::LoadExtName { rd: tmp, name: Box::new(name), offset }); - tmp.to_reg() - } - - fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg { - let result = self.temp_writable_reg(I64); - let i = self.lower_ctx.abi().sized_stackslot_addr(slot, i64::from(offset) as u32, result); - self.emit(&i); - result.to_reg() - } - - fn atomic_amo(&mut self) -> AMO { - AMO::SeqCst - } - - fn lower_br_table(&mut self, index: Reg, targets: &[MachLabel]) -> Unit { - let tmp1 = self.temp_writable_reg(I64); - let tmp2 = self.temp_writable_reg(I64); - self.emit(&MInst::BrTable { index, tmp1, tmp2, targets: targets.to_vec() }); - } - - fn fp_reg(&mut self) -> PReg { - px_reg(8) - } - - fn sp_reg(&mut self) -> PReg { - px_reg(2) - } - - #[inline] - fn int_compare(&mut self, kind: &IntCC, rs1: XReg, rs2: XReg) -> IntegerCompare { - IntegerCompare { kind: *kind, rs1: rs1.to_reg(), rs2: rs2.to_reg() } - } - - #[inline] - fn int_compare_decompose(&mut self, cmp: IntegerCompare) -> (IntCC, XReg, XReg) { - (cmp.kind, self.xreg_new(cmp.rs1), self.xreg_new(cmp.rs2)) - } - - #[inline] - fn vstate_from_type(&mut self, ty: Type) -> VState { - VState::from_type(ty) - } - - #[inline] - fn vstate_mf2(&mut self, vs: VState) -> VState { - VState { vtype: VType { lmul: VecLmul::LmulF2, ..vs.vtype }, ..vs } - } - - fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type { - MInst::canonical_type_for_rc(op.dst_regclass()) - } - - fn bclr_imm(&mut self, ty: Type, i: u64) -> Option { - // Only consider those bits in the immediate which are up to the width - // of `ty`. - let neg = !i & (u64::MAX >> (64 - ty.bits())); - if neg.count_ones() != 1 { - return None; - } - Imm12::maybe_from_u64(neg.trailing_zeros().into()) - } - - fn binvi_imm(&mut self, i: u64) -> Option { - if i.count_ones() != 1 { - return None; - } - Imm12::maybe_from_u64(i.trailing_zeros().into()) - } - - fn bseti_imm(&mut self, i: u64) -> Option { - self.binvi_imm(i) - } - - fn fcvt_smin_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 { - match (int, float) { - // Saturating cases for larger integers are handled using the - // `fcvt.{w,d}.{s,d}` instruction directly, that automatically - // saturates up/down to the correct limit. - // - // NB: i32/i64 don't use this function because the native RISC-V - // instruction does everything we already need, so only cases for - // i8/i16 are listed here. - (I8, F32) if saturating => f32::from(i8::MIN).to_bits().into(), - (I8, F64) if saturating => f64::from(i8::MIN).to_bits(), - (I16, F32) if saturating => f32::from(i16::MIN).to_bits().into(), - (I16, F64) if saturating => f64::from(i16::MIN).to_bits(), - - (_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).0.to_bits().into(), - (_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).0.to_bits(), - _ => unimplemented!(), - } - } - - fn fcvt_smax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 { - // NB: see `fcvt_smin_bound` for some more comments - match (int, float) { - (I8, F32) if saturating => f32::from(i8::MAX).to_bits().into(), - (I8, F64) if saturating => f64::from(i8::MAX).to_bits(), - (I16, F32) if saturating => f32::from(i16::MAX).to_bits().into(), - (I16, F64) if saturating => f64::from(i16::MAX).to_bits(), - - (_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).1.to_bits().into(), - (_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).1.to_bits(), - _ => unimplemented!(), - } - } - - fn fcvt_umax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 { - // NB: see `fcvt_smin_bound` for some more comments - match (int, float) { - (I8, F32) if saturating => f32::from(u8::MAX).to_bits().into(), - (I8, F64) if saturating => f64::from(u8::MAX).to_bits(), - (I16, F32) if saturating => f32::from(u16::MAX).to_bits().into(), - (I16, F64) if saturating => f64::from(u16::MAX).to_bits(), - - (_, F32) if !saturating => f32_cvt_to_int_bounds(false, int.bits()).1.to_bits().into(), - (_, F64) if !saturating => f64_cvt_to_int_bounds(false, int.bits()).1.to_bits(), - _ => unimplemented!(), - } - } - - fn fcvt_umin_bound(&mut self, float: Type, saturating: bool) -> u64 { - assert!(!saturating); - match float { - F32 => (-1.0f32).to_bits().into(), - F64 => (-1.0f64).to_bits(), - _ => unimplemented!(), - } - } -} - -/// The main entry point for lowering with ISLE. -pub(crate) fn lower( - lower_ctx: &mut Lower, - backend: &Riscv64Backend, - inst: Inst, -) -> Option { - // TODO: reuse the ISLE context across lowerings so we can reuse its - // internal heap allocations. - let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); - generated_code::constructor_lower(&mut isle_ctx, inst) -} - -/// The main entry point for branch lowering with ISLE. -pub(crate) fn lower_branch( - lower_ctx: &mut Lower, - backend: &Riscv64Backend, - branch: Inst, - targets: &[MachLabel], -) -> Option<()> { - // TODO: reuse the ISLE context across lowerings so we can reuse its - // internal heap allocations. - let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend); - generated_code::constructor_lower_branch(&mut isle_ctx, branch, targets) -} diff --git a/hbcb/src/lower/isle/generated_code.rs b/hbcb/src/lower/isle/generated_code.rs deleted file mode 100644 index d5d1fea..0000000 --- a/hbcb/src/lower/isle/generated_code.rs +++ /dev/null @@ -1,9 +0,0 @@ -// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of -// the generated ISLE source below because we include!() it. We must include!() it because its path -// depends on an environment variable; and also because of this, we can't do the `#[path = "..."] -// mod generated_code;` trick either. -#![allow(dead_code, unreachable_code, unreachable_patterns)] -#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)] -#![allow(irrefutable_let_patterns, clippy::clone_on_copy)] - -include!(concat!(env!("ISLE_DIR"), "/isle_riscv64.rs")); diff --git a/hbcb/src/prelude.isle b/hbcb/src/prelude.isle deleted file mode 100644 index 413ff00..0000000 --- a/hbcb/src/prelude.isle +++ /dev/null @@ -1,752 +0,0 @@ -;; This is a prelude of standard definitions for ISLE, the instruction-selector -;; DSL, as we use it bound to our interfaces. -;; -;; Note that all `extern` functions here are typically defined in the -;; `isle_prelude_methods` macro defined in `src/isa/isle.rs` - -;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; `()` -(type Unit (primitive Unit)) - -(decl pure unit () Unit) -(extern constructor unit unit) - -(type bool (primitive bool)) -(extern const $true bool) -(extern const $false bool) - -(type u8 (primitive u8)) -(type u16 (primitive u16)) -(type u32 (primitive u32)) -(type u64 (primitive u64)) -(type u128 (primitive u128)) -(type usize (primitive usize)) - -(type i8 (primitive i8)) -(type i16 (primitive i16)) -(type i32 (primitive i32)) -(type i64 (primitive i64)) -(type i128 (primitive i128)) -(type isize (primitive isize)) - -;; `cranelift-entity`-based identifiers. -(type Type (primitive Type)) -(type Value (primitive Value)) -(type ValueList (primitive ValueList)) -(type BlockCall (primitive BlockCall)) - -;; ISLE representation of `&[Value]`. -(type ValueSlice (primitive ValueSlice)) - -;; Extract the type of a `Value`. -(decl value_type (Type) Value) -(extern extractor infallible value_type value_type) - -;; Extractor that matches a `u32` only if non-negative. -(decl u32_nonnegative (u32) u32) -(extern extractor u32_nonnegative u32_nonnegative) - -;; Extractor that pulls apart an Offset32 into a i32 with the raw -;; signed-32-bit twos-complement bits. -(decl offset32 (i32) Offset32) -(extern extractor infallible offset32 offset32) - -;; Pure/fallible constructor that tests if one u32 is less than or -;; equal to another. -(decl pure partial u32_lteq (u32 u32) Unit) -(extern constructor u32_lteq u32_lteq) - -;; Pure/fallible constructor that tests if one u8 is less than or -;; equal to another. -(decl pure partial u8_lteq (u8 u8) Unit) -(extern constructor u8_lteq u8_lteq) - -;; Pure/fallible constructor that tests if one u8 is strictly less -;; than another. -(decl pure partial u8_lt (u8 u8) Unit) -(extern constructor u8_lt u8_lt) - -;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl pure u8_as_i8 (u8) i8) -(extern constructor u8_as_i8 u8_as_i8) - -(decl pure u8_as_u32 (u8) u32) -(extern constructor u8_as_u32 u8_as_u32) -(convert u8 u32 u8_as_u32) - -(decl pure u8_as_u64 (u8) u64) -(extern constructor u8_as_u64 u8_as_u64) -(convert u8 u64 u8_as_u64) - -(decl pure u16_as_i16 (u16) i16) -(extern constructor u16_as_i16 u16_as_i16) - -(decl pure u16_as_u32 (u16) u32) -(extern constructor u16_as_u32 u16_as_u32) -(convert u16 u32 u16_as_u32) - -(decl pure u16_as_u64 (u16) u64) -(extern constructor u16_as_u64 u16_as_u64) -(convert u16 u64 u16_as_u64) - -(decl pure u64_as_u8 (u64) u8) -(extern constructor u64_as_u8 u64_as_u8) - -(decl pure u64_as_u16 (u64) u16) -(extern constructor u64_as_u16 u64_as_u16) - -(decl pure u64_as_i64 (u64) i64) -(extern constructor u64_as_i64 u64_as_i64) - -(decl pure partial u16_try_from_u64 (u64) u16) -(extern constructor u16_try_from_u64 u16_try_from_u64) - -(decl pure partial u32_try_from_u64 (u64) u32) -(extern constructor u32_try_from_u64 u32_try_from_u64) - -(decl pure partial i8_try_from_u64 (u64) i8) -(extern constructor i8_try_from_u64 i8_try_from_u64) - -(decl pure partial i16_try_from_u64 (u64) i16) -(extern constructor i16_try_from_u64 i16_try_from_u64) - -(decl pure partial i32_try_from_u64 (u64) i32) -(extern constructor i32_try_from_u64 i32_try_from_u64) - -(decl pure u32_as_u64 (u32) u64) -(extern constructor u32_as_u64 u32_as_u64) -(convert u32 u64 u32_as_u64) - -(decl pure i32_as_i64 (i32) i64) -(extern constructor i32_as_i64 i32_as_i64) -(convert i32 i64 i32_as_i64) - -(decl pure i64_as_u64 (i64) u64) -(extern constructor i64_as_u64 i64_as_u64) - -(decl pure i64_neg (i64) i64) -(extern constructor i64_neg i64_neg) - -(decl pure i8_neg (i8) i8) -(extern constructor i8_neg i8_neg) - -(decl u128_as_u64 (u64) u128) -(extern extractor u128_as_u64 u128_as_u64) - -(decl u64_as_u32 (u32) u64) -(extern extractor u64_as_u32 u64_as_u32) - -(decl u32_as_u16 (u16) u32) -(extern extractor u32_as_u16 u32_as_u16) - -(decl pure u64_as_i32 (u64) i32) -(extern constructor u64_as_i32 u64_as_i32) - -;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl pure u8_and (u8 u8) u8) -(extern constructor u8_and u8_and) - -(decl pure u8_shl (u8 u8) u8) -(extern constructor u8_shl u8_shl) - -(decl pure u8_shr (u8 u8) u8) -(extern constructor u8_shr u8_shr) - -(decl pure u8_sub (u8 u8) u8) -(extern constructor u8_sub u8_sub) - -(decl pure u32_add (u32 u32) u32) -(extern constructor u32_add u32_add) - -(decl pure u32_sub (u32 u32) u32) -(extern constructor u32_sub u32_sub) - -(decl pure u32_and (u32 u32) u32) -(extern constructor u32_and u32_and) - -(decl pure u32_shl (u32 u32) u32) -(extern constructor u32_shl u32_shl) - -;; Pure/fallible constructor that tries to add two `u32`s, interpreted -;; as signed values, and fails to match on overflow. -(decl pure partial s32_add_fallible (i32 i32) i32) -(extern constructor s32_add_fallible s32_add_fallible) - -(decl pure u64_add (u64 u64) u64) -(extern constructor u64_add u64_add) - -(decl pure u64_sub (u64 u64) u64) -(extern constructor u64_sub u64_sub) - -(decl pure u64_mul (u64 u64) u64) -(extern constructor u64_mul u64_mul) - -(decl pure partial u64_sdiv (u64 u64) u64) -(extern constructor u64_sdiv u64_sdiv) - -(decl pure partial u64_udiv (u64 u64) u64) -(extern constructor u64_udiv u64_udiv) - -(decl pure u64_and (u64 u64) u64) -(extern constructor u64_and u64_and) - -(decl pure u64_or (u64 u64) u64) -(extern constructor u64_or u64_or) - -(decl pure u64_xor (u64 u64) u64) -(extern constructor u64_xor u64_xor) - -(decl pure u64_shl (u64 u64) u64) -(extern constructor u64_shl u64_shl) - -(decl pure imm64_shl (Type Imm64 Imm64) Imm64) -(extern constructor imm64_shl imm64_shl) - -(decl pure imm64_ushr (Type Imm64 Imm64) Imm64) -(extern constructor imm64_ushr imm64_ushr) - -(decl pure imm64_sshr (Type Imm64 Imm64) Imm64) -(extern constructor imm64_sshr imm64_sshr) - -(decl pure u64_not (u64) u64) -(extern constructor u64_not u64_not) - -(decl pure u64_eq (u64 u64) bool) -(extern constructor u64_eq u64_eq) - -(decl pure u64_le (u64 u64) bool) -(extern constructor u64_le u64_le) - -(decl pure u64_lt (u64 u64) bool) -(extern constructor u64_lt u64_lt) - -(decl pure i64_shr (i64 i64) i64) -(extern constructor i64_shr i64_shr) - -(decl pure i64_ctz (i64) i64) -(extern constructor i64_ctz i64_ctz) - -;; Sign extends a u64 from ty bits up to 64bits -(decl pure i64_sextend_u64 (Type u64) i64) -(extern constructor i64_sextend_u64 i64_sextend_u64) - -(decl pure i64_sextend_imm64 (Type Imm64) i64) -(extern constructor i64_sextend_imm64 i64_sextend_imm64) - -(decl pure u64_uextend_imm64 (Type Imm64) u64) -(extern constructor u64_uextend_imm64 u64_uextend_imm64) - -(decl pure imm64_icmp (Type IntCC Imm64 Imm64) Imm64) -(extern constructor imm64_icmp imm64_icmp) - -(decl u64_is_zero (bool) u64) -(extern extractor infallible u64_is_zero u64_is_zero) - -(decl i64_is_zero (bool) i64) -(extern extractor infallible i64_is_zero i64_is_zero) - -(decl u64_zero () u64) -(extractor (u64_zero) (u64_is_zero $true)) - -(decl u64_nonzero (u64) u64) -(extractor (u64_nonzero x) (and (u64_is_zero $false) x)) - -(decl i64_nonzero (i64) i64) -(extractor (i64_nonzero x) (and (i64_is_zero $false) x)) - -(decl pure u64_is_odd (u64) bool) -(extern constructor u64_is_odd u64_is_odd) - -;; Each of these extractors tests whether the upper half of the input equals the -;; lower half of the input -(decl u128_replicated_u64 (u64) u128) -(extern extractor u128_replicated_u64 u128_replicated_u64) -(decl u64_replicated_u32 (u64) u64) -(extern extractor u64_replicated_u32 u64_replicated_u32) -(decl u32_replicated_u16 (u64) u64) -(extern extractor u32_replicated_u16 u32_replicated_u16) -(decl u16_replicated_u8 (u8) u64) -(extern extractor u16_replicated_u8 u16_replicated_u8) - -;; Floating point operations - -(decl pure partial f16_min (Ieee16 Ieee16) Ieee16) -(extern constructor f16_min f16_min) -(decl pure partial f16_max (Ieee16 Ieee16) Ieee16) -(extern constructor f16_max f16_max) -(decl pure f16_neg (Ieee16) Ieee16) -(extern constructor f16_neg f16_neg) -(decl pure f16_abs (Ieee16) Ieee16) -(extern constructor f16_abs f16_abs) -(decl pure f16_copysign (Ieee16 Ieee16) Ieee16) -(extern constructor f16_copysign f16_copysign) -(decl pure partial f32_add (Ieee32 Ieee32) Ieee32) -(extern constructor f32_add f32_add) -(decl pure partial f32_sub (Ieee32 Ieee32) Ieee32) -(extern constructor f32_sub f32_sub) -(decl pure partial f32_mul (Ieee32 Ieee32) Ieee32) -(extern constructor f32_mul f32_mul) -(decl pure partial f32_div (Ieee32 Ieee32) Ieee32) -(extern constructor f32_div f32_div) -(decl pure partial f32_sqrt (Ieee32) Ieee32) -(extern constructor f32_sqrt f32_sqrt) -(decl pure partial f32_ceil (Ieee32) Ieee32) -(extern constructor f32_ceil f32_ceil) -(decl pure partial f32_floor (Ieee32) Ieee32) -(extern constructor f32_floor f32_floor) -(decl pure partial f32_trunc (Ieee32) Ieee32) -(extern constructor f32_trunc f32_trunc) -(decl pure partial f32_nearest (Ieee32) Ieee32) -(extern constructor f32_nearest f32_nearest) -(decl pure partial f32_min (Ieee32 Ieee32) Ieee32) -(extern constructor f32_min f32_min) -(decl pure partial f32_max (Ieee32 Ieee32) Ieee32) -(extern constructor f32_max f32_max) -(decl pure f32_neg (Ieee32) Ieee32) -(extern constructor f32_neg f32_neg) -(decl pure f32_abs (Ieee32) Ieee32) -(extern constructor f32_abs f32_abs) -(decl pure f32_copysign (Ieee32 Ieee32) Ieee32) -(extern constructor f32_copysign f32_copysign) -(decl pure partial f64_add (Ieee64 Ieee64) Ieee64) -(extern constructor f64_add f64_add) -(decl pure partial f64_sub (Ieee64 Ieee64) Ieee64) -(extern constructor f64_sub f64_sub) -(decl pure partial f64_mul (Ieee64 Ieee64) Ieee64) -(extern constructor f64_mul f64_mul) -(decl pure partial f64_div (Ieee64 Ieee64) Ieee64) -(extern constructor f64_div f64_div) -(decl pure partial f64_sqrt (Ieee64) Ieee64) -(extern constructor f64_sqrt f64_sqrt) -(decl pure partial f64_ceil (Ieee64) Ieee64) -(extern constructor f64_ceil f64_ceil) -(decl pure partial f64_floor (Ieee64) Ieee64) -(extern constructor f64_floor f64_floor) -(decl pure partial f64_trunc (Ieee64) Ieee64) -(extern constructor f64_trunc f64_trunc) -(decl pure partial f64_nearest (Ieee64) Ieee64) -(extern constructor f64_nearest f64_nearest) -(decl pure partial f64_min (Ieee64 Ieee64) Ieee64) -(extern constructor f64_min f64_min) -(decl pure partial f64_max (Ieee64 Ieee64) Ieee64) -(extern constructor f64_max f64_max) -(decl pure f64_neg (Ieee64) Ieee64) -(extern constructor f64_neg f64_neg) -(decl pure f64_abs (Ieee64) Ieee64) -(extern constructor f64_abs f64_abs) -(decl pure f64_copysign (Ieee64 Ieee64) Ieee64) -(extern constructor f64_copysign f64_copysign) -(decl pure partial f128_min (Ieee128 Ieee128) Ieee128) -(extern constructor f128_min f128_min) -(decl pure partial f128_max (Ieee128 Ieee128) Ieee128) -(extern constructor f128_max f128_max) -(decl pure f128_neg (Ieee128) Ieee128) -(extern constructor f128_neg f128_neg) -(decl pure f128_abs (Ieee128) Ieee128) -(extern constructor f128_abs f128_abs) -(decl pure f128_copysign (Ieee128 Ieee128) Ieee128) -(extern constructor f128_copysign f128_copysign) -(type Ieee128 (primitive Ieee128)) - -;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(extern const $I8 Type) -(extern const $I16 Type) -(extern const $I32 Type) -(extern const $I64 Type) -(extern const $I128 Type) - -(extern const $F16 Type) -(extern const $F32 Type) -(extern const $F64 Type) -(extern const $F128 Type) - -(extern const $I8X8 Type) -(extern const $I8X16 Type) -(extern const $I16X4 Type) -(extern const $I16X8 Type) -(extern const $I32X2 Type) -(extern const $I32X4 Type) -(extern const $I64X2 Type) - -(extern const $F32X4 Type) -(extern const $F64X2 Type) - -(extern const $I32X4XN Type) - -;; Get the unsigned minimum value for a given type. -;; This always zero, but is included for completeness. -(decl pure ty_umin (Type) u64) -(extern constructor ty_umin ty_umin) - -;; Get the unsigned maximum value for a given type. -(decl pure ty_umax (Type) u64) -(extern constructor ty_umax ty_umax) - -;; Get the signed minimum value for a given type. -(decl pure ty_smin (Type) u64) -(extern constructor ty_smin ty_smin) - -;; Get the signed maximum value for a given type. -(decl pure ty_smax (Type) u64) -(extern constructor ty_smax ty_smax) - -;; Get the bit width of a given type. -(decl pure ty_bits (Type) u8) -(extern constructor ty_bits ty_bits) - -;; Get the bit width of a given type. -(decl pure ty_bits_u16 (Type) u16) -(extern constructor ty_bits_u16 ty_bits_u16) - -;; Get the bit width of a given type. -(decl pure ty_bits_u64 (Type) u64) -(extern constructor ty_bits_u64 ty_bits_u64) - -;; Get a mask for the width of a given type. -(decl pure ty_mask (Type) u64) -(extern constructor ty_mask ty_mask) - -;; Get a mask that is set for each lane in a given type. -(decl pure ty_lane_mask (Type) u64) -(extern constructor ty_lane_mask ty_lane_mask) - -;; Get the number of lanes for a given type. -(decl pure ty_lane_count (Type) u64) -(extern constructor ty_lane_count ty_lane_count) - -;; Get the byte width of a given type. -(decl pure ty_bytes (Type) u16) -(extern constructor ty_bytes ty_bytes) - -;; Get the type of each lane in the given type. -(decl pure lane_type (Type) Type) -(extern constructor lane_type lane_type) - -;; Get a type with the same element type, but half the number of lanes. -(decl pure partial ty_half_lanes (Type) Type) -(extern constructor ty_half_lanes ty_half_lanes) - -;; Get a type with the same number of lanes but a lane type that is half as small. -(decl pure partial ty_half_width (Type) Type) -(extern constructor ty_half_width ty_half_width) - -;; Generate a mask for the maximum shift amount for a given type. i.e 31 for I32. -(decl pure ty_shift_mask (Type) u64) -(rule (ty_shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1)) - -;; Compare two types for equality. -(decl pure ty_equal (Type Type) bool) -(extern constructor ty_equal ty_equal) - -;;;; `cranelift_codegen::ir::MemFlags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; `MemFlags::trusted` -(decl pure mem_flags_trusted () MemFlags) -(extern constructor mem_flags_trusted mem_flags_trusted) - -;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Swap args of an IntCC flag. -(decl intcc_swap_args (IntCC) IntCC) -(extern constructor intcc_swap_args intcc_swap_args) - -;; Complement an IntCC flag. -(decl intcc_complement (IntCC) IntCC) -(extern constructor intcc_complement intcc_complement) - -;; This is a direct import of `IntCC::without_equal`. -;; Get the corresponding IntCC with the equal component removed. -;; For conditions without a zero component, this is a no-op. -(decl pure intcc_without_eq (IntCC) IntCC) -(extern constructor intcc_without_eq intcc_without_eq) - -;; Swap args of a FloatCC flag. -(decl floatcc_swap_args (FloatCC) FloatCC) -(extern constructor floatcc_swap_args floatcc_swap_args) - -;; Complement a FloatCC flag. -(decl floatcc_complement (FloatCC) FloatCC) -(extern constructor floatcc_complement floatcc_complement) - -;; True when this FloatCC involves an unordered comparison. -(decl pure floatcc_unordered (FloatCC) bool) -(extern constructor floatcc_unordered floatcc_unordered) - -;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl eq (Type Value Value) Value) -(extractor (eq ty x y) (icmp ty (IntCC.Equal) x y)) - -(decl ne (Type Value Value) Value) -(extractor (ne ty x y) (icmp ty (IntCC.NotEqual) x y)) - -(decl ult (Type Value Value) Value) -(extractor (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y)) - -(decl ule (Type Value Value) Value) -(extractor (ule ty x y) (icmp ty (IntCC.UnsignedLessThanOrEqual) x y)) - -(decl ugt (Type Value Value) Value) -(extractor (ugt ty x y) (icmp ty (IntCC.UnsignedGreaterThan) x y)) - -(decl uge (Type Value Value) Value) -(extractor (uge ty x y) (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x y)) - -(decl slt (Type Value Value) Value) -(extractor (slt ty x y) (icmp ty (IntCC.SignedLessThan) x y)) - -(decl sle (Type Value Value) Value) -(extractor (sle ty x y) (icmp ty (IntCC.SignedLessThanOrEqual) x y)) - -(decl sgt (Type Value Value) Value) -(extractor (sgt ty x y) (icmp ty (IntCC.SignedGreaterThan) x y)) - -(decl sge (Type Value Value) Value) -(extractor (sge ty x y) (icmp ty (IntCC.SignedGreaterThanOrEqual) x y)) - -;; An extractor that only matches types that can fit in 16 bits. -(decl fits_in_16 (Type) Type) -(extern extractor fits_in_16 fits_in_16) - -;; An extractor that only matches types that can fit in 32 bits. -(decl fits_in_32 (Type) Type) -(extern extractor fits_in_32 fits_in_32) - -;; An extractor that only matches types that can fit in 32 bits. -(decl lane_fits_in_32 (Type) Type) -(extern extractor lane_fits_in_32 lane_fits_in_32) - -;; An extractor that only matches types that can fit in 64 bits. -(decl fits_in_64 (Type) Type) -(extern extractor fits_in_64 fits_in_64) - -;; An extractor that only matches types that fit in exactly 32 bits. -(decl ty_32 (Type) Type) -(extern extractor ty_32 ty_32) - -;; An extractor that only matches types that fit in exactly 64 bits. -(decl ty_64 (Type) Type) -(extern extractor ty_64 ty_64) - -;; A pure constructor/extractor that only matches scalar integers, and -;; references that can fit in 64 bits. -(decl pure partial ty_int_ref_scalar_64 (Type) Type) -(extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64) -(extern extractor ty_int_ref_scalar_64 ty_int_ref_scalar_64_extract) - -;; An extractor that matches 32- and 64-bit types only. -(decl ty_32_or_64 (Type) Type) -(extern extractor ty_32_or_64 ty_32_or_64) - -;; An extractor that matches 8- and 16-bit types only. -(decl ty_8_or_16 (Type) Type) -(extern extractor ty_8_or_16 ty_8_or_16) - -;; An extractor that matches 16- and 32-bit types only. -(decl ty_16_or_32 (Type) Type) -(extern extractor ty_16_or_32 ty_16_or_32) - -;; An extractor that matches int types that fit in 32 bits. -(decl int_fits_in_32 (Type) Type) -(extern extractor int_fits_in_32 int_fits_in_32) - -;; An extractor that matches I64. -(decl ty_int_ref_64 (Type) Type) -(extern extractor ty_int_ref_64 ty_int_ref_64) - -;; An extractor that matches int or reference types bigger than 16 bits but at most 64 bits. -(decl ty_int_ref_16_to_64 (Type) Type) -(extern extractor ty_int_ref_16_to_64 ty_int_ref_16_to_64) - -;; An extractor that only matches integers. -(decl ty_int (Type) Type) -(extern extractor ty_int ty_int) - -;; An extractor that only matches scalar types, float or int or ref's. -(decl ty_scalar (Type) Type) -(extern extractor ty_scalar ty_scalar) - -;; An extractor that only matches scalar floating-point types--F32 or F64. -(decl ty_scalar_float (Type) Type) -(extern extractor ty_scalar_float ty_scalar_float) - -;; An extractor that matches scalar floating-point types or vector types. -(decl ty_float_or_vec (Type) Type) -(extern extractor ty_float_or_vec ty_float_or_vec) - -;; A pure constructor that only matches vector floating-point types. -(decl pure partial ty_vector_float (Type) Type) -(extern constructor ty_vector_float ty_vector_float) - -;; A pure constructor that only matches vector types with lanes which -;; are not floating-point. -(decl pure partial ty_vector_not_float (Type) Type) -(extern constructor ty_vector_not_float ty_vector_not_float) - -;; A pure constructor/extractor that only matches 64-bit vector types. -(decl pure partial ty_vec64 (Type) Type) -(extern constructor ty_vec64 ty_vec64_ctor) -(extern extractor ty_vec64 ty_vec64) - -;; An extractor that only matches 128-bit vector types. -(decl ty_vec128 (Type) Type) -(extern extractor ty_vec128 ty_vec128) - -;; An extractor that only matches dynamic vector types with a 64-bit -;; base type. -(decl ty_dyn_vec64 (Type) Type) -(extern extractor ty_dyn_vec64 ty_dyn_vec64) - -;; An extractor that only matches dynamic vector types with a 128-bit -;; base type. -(decl ty_dyn_vec128 (Type) Type) -(extern extractor ty_dyn_vec128 ty_dyn_vec128) - -;; An extractor that only matches 64-bit vector types with integer -;; lanes (I8X8, I16X4, I32X2) -(decl ty_vec64_int (Type) Type) -(extern extractor ty_vec64_int ty_vec64_int) - -;; An extractor that only matches 128-bit vector types with integer -;; lanes (I8X16, I16X8, I32X4, I64X2). -(decl ty_vec128_int (Type) Type) -(extern extractor ty_vec128_int ty_vec128_int) - -;; An extractor that only matches types that can be a 64-bit address. -(decl ty_addr64 (Type) Type) -(extern extractor ty_addr64 ty_addr64) - -;; A pure constructor that matches everything except vectors with size 32X2. -(decl pure partial not_vec32x2 (Type) Type) -(extern constructor not_vec32x2 not_vec32x2) - -;; An extractor that matches everything except I64X2 -(decl not_i64x2 () Type) -(extern extractor not_i64x2 not_i64x2) - -;; Extract a `u8` from an `Uimm8`. -(decl u8_from_uimm8 (u8) Uimm8) -(extern extractor infallible u8_from_uimm8 u8_from_uimm8) - -;; Extract a `u64` from a `bool`. -(decl u64_from_bool (u64) bool) -(extern extractor infallible u64_from_bool u64_from_bool) - -;; Extract a `u64` from an `Imm64`. -(decl u64_from_imm64 (u64) Imm64) -(extern extractor infallible u64_from_imm64 u64_from_imm64) - -;; Extract a `u64` from an `Imm64` which is not zero. -(decl nonzero_u64_from_imm64 (u64) Imm64) -(extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64) - -;; If the given `Imm64` is a power-of-two, extract its log2 value. -(decl imm64_power_of_two (u64) Imm64) -(extern extractor imm64_power_of_two imm64_power_of_two) - -;; Create a new Imm64. -(decl pure imm64 (u64) Imm64) -(extern constructor imm64 imm64) - -;; Create a new Imm64, masked to the width of the given type. -(decl pure imm64_masked (Type u64) Imm64) -(extern constructor imm64_masked imm64_masked) - -;; Extract a `u16` from an `Ieee16`. -(decl u16_from_ieee16 (u16) Ieee16) -(extern extractor infallible u16_from_ieee16 u16_from_ieee16) - -;; Extract a `u32` from an `Ieee32`. -(decl u32_from_ieee32 (u32) Ieee32) -(extern extractor infallible u32_from_ieee32 u32_from_ieee32) - -;; Extract a `u64` from an `Ieee64`. -(decl u64_from_ieee64 (u64) Ieee64) -(extern extractor infallible u64_from_ieee64 u64_from_ieee64) - -;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given -;; type. Will only match when there is more than one lane. -(decl multi_lane (u32 u32) Type) -(extern extractor multi_lane multi_lane) - -;; Match a dynamic-lane type, extracting (# bits per lane) from the given -;; type. -(decl dynamic_lane (u32 u32) Type) -(extern extractor dynamic_lane dynamic_lane) - -;; An extractor that only matches 64-bit dynamic vector types with integer -;; lanes (I8X8XN, I16X4XN, I32X2XN) -(decl ty_dyn64_int (Type) Type) -(extern extractor ty_dyn64_int ty_dyn64_int) - -;; An extractor that only matches 128-bit dynamic vector types with integer -;; lanes (I8X16XN, I16X8XN, I32X4XN, I64X2XN). -(decl ty_dyn128_int (Type) Type) -(extern extractor ty_dyn128_int ty_dyn128_int) - -;; Convert an `Offset32` to a primitive number. -(decl pure offset32_to_i32 (Offset32) i32) -(extern constructor offset32_to_i32 offset32_to_i32) - -;; Convert a number to an `Offset32` -(decl pure i32_to_offset32 (i32) Offset32) -(extern constructor i32_to_offset32 i32_to_offset32) - -;; This is a direct import of `IntCC::unsigned`. -;; Get the corresponding IntCC with the signed component removed. -;; For conditions without a signed component, this is a no-op. -(decl pure intcc_unsigned (IntCC) IntCC) -(extern constructor intcc_unsigned intcc_unsigned) - -;; Pure constructor that only matches signed integer cond codes. -(decl pure partial signed_cond_code (IntCC) IntCC) -(extern constructor signed_cond_code signed_cond_code) - -;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl pure trap_code_division_by_zero () TrapCode) -(extern constructor trap_code_division_by_zero trap_code_division_by_zero) - -(decl pure trap_code_integer_overflow () TrapCode) -(extern constructor trap_code_integer_overflow trap_code_integer_overflow) - -(decl pure trap_code_bad_conversion_to_integer () TrapCode) -(extern constructor trap_code_bad_conversion_to_integer trap_code_bad_conversion_to_integer) - -;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; A range of integers to loop through. -(type Range (primitive Range)) - -;; Create a new range from `start` through `end` (exclusive). -(decl pure range (usize usize) Range) -(extern constructor range range) - -;; A view on the current state of the range. -(type RangeView extern - (enum - (Empty) - (NonEmpty (index usize) (rest Range)))) - -;; View the current state of the range. -(decl range_view (RangeView) Range) -(extern extractor infallible range_view range_view) - -;; Extractor to test whether a range is empty. -(decl range_empty () Range) -(extractor (range_empty) (range_view (RangeView.Empty))) - -;; Extractor to return the first value in the range, and a sub-range -;; containing the remaining values. -(decl range_unwrap (usize Range) Range) -(extractor (range_unwrap index rest) (range_view (RangeView.NonEmpty index rest))) - -;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(convert Offset32 i32 offset32_to_i32) -(convert i32 Offset32 i32_to_offset32) - diff --git a/hbcb/src/prelude_lower.isle b/hbcb/src/prelude_lower.isle deleted file mode 100644 index ec34312..0000000 --- a/hbcb/src/prelude_lower.isle +++ /dev/null @@ -1,1082 +0,0 @@ -;; Prelude definitions specific to lowering environments (backends) in -;; ISLE. - -;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; `cranelift-entity`-based identifiers. -(type Inst (primitive Inst)) - -;; ISLE representation of `Vec` -(type VecMask extern (enum)) - -(type ValueRegs (primitive ValueRegs)) -(type WritableValueRegs (primitive WritableValueRegs)) - -;; Instruction lowering result: a vector of `ValueRegs`. -(type InstOutput (primitive InstOutput)) -;; (Mutable) builder to incrementally construct an `InstOutput`. -(type InstOutputBuilder extern (enum)) - -;; Type to hold multiple Regs -(type MultiReg - (enum - (Empty) - (One (a Reg)) - (Two (a Reg) (b Reg)) - (Three (a Reg) (b Reg) (c Reg)) - (Four (a Reg) (b Reg) (c Reg) (d Reg)) - )) - -;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(type Reg (primitive Reg)) -(type WritableReg (primitive WritableReg)) -(type OptionWritableReg (primitive OptionWritableReg)) -(type VecReg extern (enum)) -(type VecWritableReg extern (enum)) -(type PReg (primitive PReg)) - -;; Construct a `ValueRegs` of one register. -(decl value_reg (Reg) ValueRegs) -(extern constructor value_reg value_reg) - -;; Construct a `WritableValueRegs` of one register. -(decl writable_value_reg (WritableReg) WritableValueRegs) -(extern constructor writable_value_reg writable_value_reg) - -;; Construct a `ValueRegs` of two registers. -(decl value_regs (Reg Reg) ValueRegs) -(extern constructor value_regs value_regs) - -;; Construct a `WritableValueRegs` of two registers. -(decl writable_value_regs (WritableReg WritableReg) WritableValueRegs) -(extern constructor writable_value_regs writable_value_regs) - -;; Construct an empty `ValueRegs` containing only invalid register sentinels. -(decl value_regs_invalid () ValueRegs) -(extern constructor value_regs_invalid value_regs_invalid) - -;; Construct an empty `InstOutput`. -(decl output_none () InstOutput) -(extern constructor output_none output_none) - -;; Construct a single-element `InstOutput`. -(decl output (ValueRegs) InstOutput) -(extern constructor output output) - -;; Construct a two-element `InstOutput`. -(decl output_pair (ValueRegs ValueRegs) InstOutput) -(extern constructor output_pair output_pair) - -;; Construct a single-element `InstOutput` from a single register. -(decl output_reg (Reg) InstOutput) -(rule (output_reg reg) (output (value_reg reg))) - -;; Construct a single-element `InstOutput` from a value. -(decl output_value (Value) InstOutput) -(rule (output_value val) (output (put_in_regs val))) - -;; Initially empty `InstOutput` builder. -(decl output_builder_new () InstOutputBuilder) -(extern constructor output_builder_new output_builder_new) - -;; Append a `ValueRegs` to an `InstOutput` under construction. -(decl output_builder_push (InstOutputBuilder ValueRegs) Unit) -(extern constructor output_builder_push output_builder_push) - -;; Finish building an `InstOutput` incrementally. -(decl output_builder_finish (InstOutputBuilder) InstOutput) -(extern constructor output_builder_finish output_builder_finish) - -;; Get a temporary register for writing. -(decl temp_writable_reg (Type) WritableReg) -(extern constructor temp_writable_reg temp_writable_reg) - -;; Get a temporary register for reading. -(decl temp_reg (Type) Reg) -(rule (temp_reg ty) - (writable_reg_to_reg (temp_writable_reg ty))) - -(decl is_valid_reg (bool) Reg) -(extern extractor infallible is_valid_reg is_valid_reg) - -;; Get or match the invalid register. -(decl invalid_reg () Reg) -(extern constructor invalid_reg invalid_reg) -(extractor (invalid_reg) (is_valid_reg $false)) - -;; Match any register but the invalid register. -(decl valid_reg (Reg) Reg) -(extractor (valid_reg reg) (and (is_valid_reg $true) reg)) - -;; Mark this value as used, to ensure that it gets lowered. -(decl mark_value_used (Value) Unit) -(extern constructor mark_value_used mark_value_used) - -;; Put the given value into a register. -;; -;; Asserts that the value fits into a single register, and doesn't require -;; multiple registers for its representation (like `i128` on x64 for example). -;; -;; As a side effect, this marks the value as used. -(decl put_in_reg (Value) Reg) -(extern constructor put_in_reg put_in_reg) - -;; Put the given value into one or more registers. -;; -;; As a side effect, this marks the value as used. -(decl put_in_regs (Value) ValueRegs) -(extern constructor put_in_regs put_in_regs) - -;; If the given reg is a real register, cause the value in reg to be in a virtual -;; reg, by copying it into a new virtual reg. -(decl ensure_in_vreg (Reg Type) Reg) -(extern constructor ensure_in_vreg ensure_in_vreg) - -;; Get the `n`th register inside a `ValueRegs`. -(decl value_regs_get (ValueRegs usize) Reg) -(extern constructor value_regs_get value_regs_get) - -;; Get the number of registers in a `ValueRegs`. -(decl pure value_regs_len (ValueRegs) usize) -(extern constructor value_regs_len value_regs_len) - -;; Get a range for the number of regs in a `ValueRegs`. -(decl value_regs_range (ValueRegs) Range) -(rule (value_regs_range regs) (range 0 (value_regs_len regs))) - -;; Put the value into one or more registers and return the first register. -;; -;; Unlike `put_in_reg`, this does not assert that the value fits in a single -;; register. This is useful for things like a `i128` shift amount, where we mask -;; the shift amount to the bit width of the value being shifted, and so the high -;; half of the `i128` won't ever be used. -;; -;; As a side effect, this marks that value as used. -(decl lo_reg (Value) Reg) -(rule (lo_reg val) - (let ((regs ValueRegs (put_in_regs val))) - (value_regs_get regs 0))) - -;; Convert a `PReg` into a `Reg`. -(decl preg_to_reg (PReg) Reg) -(extern constructor preg_to_reg preg_to_reg) - -;; Convert a MultiReg with three registers into an InstOutput containing -;; one ValueRegs containing the first two regs and one containing the third reg -(decl multi_reg_to_pair_and_single (MultiReg) InstOutput) -(rule (multi_reg_to_pair_and_single (MultiReg.Three a b c)) - (output_pair (value_regs a b) c)) - -;; Convert a MultiReg with two registers into an InstOutput containing one ValueRegs with both regs -(decl multi_reg_to_pair (MultiReg) InstOutput) -(rule (multi_reg_to_pair (MultiReg.Two a b)) - (value_regs a b)) - -;; Convert a MultiReg with one register into an InstOutput containing one ValueRegs with the register -(decl multi_reg_to_single (MultiReg) InstOutput) -(rule (multi_reg_to_single (MultiReg.One a)) - (value_reg a)) - -;; Add a range fact to a register, when compiling with -;; proof-carrying-code enabled. -(decl add_range_fact (Reg u16 u64 u64) Reg) -(extern constructor add_range_fact add_range_fact) - -;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(type MachLabel (primitive MachLabel)) -(type ValueLabel (primitive ValueLabel)) -(type UnwindInst (primitive UnwindInst)) -(type ExternalName (primitive ExternalName)) -(type BoxExternalName (primitive BoxExternalName)) -(type RelocDistance (primitive RelocDistance)) -(type VecArgPair extern (enum)) -(type VecRetPair extern (enum)) -(type CallArgList extern (enum)) -(type MachLabelSlice extern (enum)) -(type BoxVecMachLabel extern (enum)) - -;; Extract a the target from a MachLabelSlice with exactly one target. -(decl single_target (MachLabel) MachLabelSlice) -(extern extractor single_target single_target) - -;; Extract a the targets from a MachLabelSlice with exactly two targets. -(decl two_targets (MachLabel MachLabel) MachLabelSlice) -(extern extractor two_targets two_targets) - -;; Extract the default target and jump table from a MachLabelSlice. -(decl jump_table_targets (MachLabel BoxVecMachLabel) MachLabelSlice) -(extern extractor jump_table_targets jump_table_targets) - -;; The size of the jump table. -(decl jump_table_size (BoxVecMachLabel) u32) -(extern constructor jump_table_size jump_table_size) - -;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Extractor to get a `ValueSlice` out of a `ValueList`. -(decl value_list_slice (ValueSlice) ValueList) -(extern extractor infallible value_list_slice value_list_slice) - -;; Extractor to test whether a `ValueSlice` is empty. -(decl value_slice_empty () ValueSlice) -(extern extractor value_slice_empty value_slice_empty) - -;; Extractor to split a `ValueSlice` into its first element plus a tail. -(decl value_slice_unwrap (Value ValueSlice) ValueSlice) -(extern extractor value_slice_unwrap value_slice_unwrap) - -;; Return the length of a `ValueSlice`. -(decl value_slice_len (ValueSlice) usize) -(extern constructor value_slice_len value_slice_len) - -;; Return any element of a `ValueSlice`. -(decl value_slice_get (ValueSlice usize) Value) -(extern constructor value_slice_get value_slice_get) - -;; Extractor to get the first element from a value list, along with its tail as -;; a `ValueSlice`. -(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList) -(extractor (unwrap_head_value_list_1 head tail) - (value_list_slice (value_slice_unwrap head tail))) - -;; Extractor to get the first two elements from a value list, along with its -;; tail as a `ValueSlice`. -(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList) -(extractor (unwrap_head_value_list_2 head1 head2 tail) - (value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail)))) - -;; Turn a `Writable` into a `Reg` via `Writable::to_reg`. -(decl pure writable_reg_to_reg (WritableReg) Reg) -(extern constructor writable_reg_to_reg writable_reg_to_reg) - -;; Extract the result values for the given instruction. -(decl inst_results (ValueSlice) Inst) -(extern extractor infallible inst_results inst_results) - -;; Returns whether the given value is unused in this function and is a dead -;; result. -(decl pure value_is_unused (Value) bool) -(extern constructor value_is_unused value_is_unused) - -;; Extract the first result value of the given instruction. -(decl first_result (Value) Inst) -(extern extractor first_result first_result) - -;; Extract the `InstructionData` for an `Inst`. -(decl inst_data (InstructionData) Inst) -(extern extractor infallible inst_data inst_data) - -;; Extract the type of the instruction's first result. -(decl result_type (Type) Inst) -(extractor (result_type ty) - (first_result (value_type ty))) - -;; Extract the type of the instruction's first result and pass along the -;; instruction as well. -(decl has_type (Type Inst) Inst) -(extractor (has_type ty inst) - (and (result_type ty) - inst)) - -;; Match the instruction that defines the given value, if any. -(decl def_inst (Inst) Value) -(extern extractor def_inst def_inst) - -;; Extract a constant `u64` from a value defined by an `iconst`. -(decl u64_from_iconst (u64) Value) -(extractor (u64_from_iconst x) - (def_inst (iconst (u64_from_imm64 x)))) - -;; Extract a constant `i32` from a value defined by an `iconst`. -;; The value is sign extended to 32 bits. -(decl i32_from_iconst (i32) Value) -(extern extractor i32_from_iconst i32_from_iconst) - -;; Extract a constant `i64` from a value defined by an `iconst`. -;; The value is sign extended to 64 bits. -(decl i64_from_iconst (i64) Value) -(extern extractor i64_from_iconst i64_from_iconst) - -;; Match any zero value for iconst, fconst32, fconst64, vconst and splat. -(decl pure partial zero_value (Value) Value) -(extern constructor zero_value zero_value) - -;; Match a sinkable instruction from a value operand. -(decl pure partial is_sinkable_inst (Value) Inst) -(extern constructor is_sinkable_inst is_sinkable_inst) - -;; Match a uextend or any other instruction, "seeing through" the uextend if -;; present. -(decl maybe_uextend (Value) Value) -(extern extractor maybe_uextend maybe_uextend) - -;; Get an unsigned 8-bit immediate in a u8 from an Imm64, if possible. -(decl uimm8 (u8) Imm64) -(extern extractor uimm8 uimm8) - -;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Emit an instruction. -;; -;; This is low-level and side-effectful; it should only be used as an -;; implementation detail by helpers that preserve the SSA facade themselves. - -(decl emit (MInst) Unit) -(extern constructor emit emit) - -;; Sink an instruction. -;; -;; This is a side-effectful operation that notifies the context that the -;; instruction has been sunk into another instruction, and no longer needs to -;; be lowered. -(decl sink_inst (Inst) Unit) -(extern constructor sink_inst sink_inst) - -;; Constant pool emission. - -(type VCodeConstant (primitive VCodeConstant)) - -;; Add a u64 little-endian constant to the in-memory constant pool and -;; return a VCodeConstant index that refers to it. This is -;; side-effecting but idempotent (constants are deduplicated). -(decl emit_u64_le_const (u64) VCodeConstant) -(extern constructor emit_u64_le_const emit_u64_le_const) - -;; Add a u128 little-endian constant to the in-memory constant pool and -;; return a VCodeConstant index that refers to it. This is -;; side-effecting but idempotent (constants are deduplicated). -(decl emit_u128_le_const (u128) VCodeConstant) -(extern constructor emit_u128_le_const emit_u128_le_const) - -;; Fetch the VCodeConstant associated with a Constant. -(decl const_to_vconst (Constant) VCodeConstant) -(extern constructor const_to_vconst const_to_vconst) - -;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;; - -(type SideEffectNoResult (enum - (Inst (inst MInst)) - (Inst2 (inst1 MInst) - (inst2 MInst)) - (Inst3 (inst1 MInst) - (inst2 MInst) - (inst3 MInst)))) - -;; Emit given side-effectful instruction. -(decl emit_side_effect (SideEffectNoResult) Unit) -(rule (emit_side_effect (SideEffectNoResult.Inst inst)) - (emit inst)) -(rule (emit_side_effect (SideEffectNoResult.Inst2 inst1 inst2)) - (let ((_ Unit (emit inst1))) - (emit inst2))) -(rule (emit_side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3)) - (let ((_ Unit (emit inst1)) - (_ Unit (emit inst2))) - (emit inst3))) - -;; Create an empty `InstOutput`, but do emit the given side-effectful -;; instruction. -(decl side_effect (SideEffectNoResult) InstOutput) -(rule (side_effect inst) - (let ((_ Unit (emit_side_effect inst))) - (output_none))) - -(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult) -(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2)) - (SideEffectNoResult.Inst2 inst1 inst2)) -(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3)) - (SideEffectNoResult.Inst3 inst1 inst2 inst3)) -(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3)) - (SideEffectNoResult.Inst3 inst1 inst2 inst3)) - -;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Newtype wrapper around `MInst` for instructions that are used for their -;; effect on flags. -;; -;; Variant determines how result is given when combined with a -;; ConsumesFlags. See `with_flags` below for more. -(type ProducesFlags (enum - ;; For cases where the flags have been produced by another - ;; instruction, and we have out-of-band reasons to know - ;; that they won't be clobbered by the time we depend on - ;; them. - (AlreadyExistingFlags) - (ProducesFlagsSideEffect (inst MInst)) - (ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst)) - ;; Not directly combinable with a ConsumesFlags; - ;; used in s390x and unwrapped directly by `trapif`. - (ProducesFlagsReturnsReg (inst MInst) (result Reg)) - (ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg)))) - -;; Chain another producer to a `ProducesFlags`. -(decl produces_flags_concat (ProducesFlags ProducesFlags) ProducesFlags) -(rule (produces_flags_concat (ProducesFlags.ProducesFlagsSideEffect inst1) (ProducesFlags.ProducesFlagsSideEffect inst2)) - (ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2)) - -;; Newtype wrapper around `MInst` for instructions that consume and produce flags -(type ConsumesAndProducesFlags (enum - (SideEffect (inst MInst)) - (ReturnsReg (inst MInst) (result Reg)))) - -;; Newtype wrapper around `MInst` for instructions that consume flags. -;; -;; Variant determines how result is given when combined with a -;; ProducesFlags. See `with_flags` below for more. -(type ConsumesFlags (enum - (ConsumesFlagsSideEffect (inst MInst)) - (ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst)) - (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg)) - (ConsumesFlagsReturnsReg (inst MInst) (result Reg)) - (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) - (inst2 MInst) - (result ValueRegs)) - (ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst) - (inst2 MInst) - (inst3 MInst) - (inst4 MInst) - (result ValueRegs)))) - - - -;; Get the produced register out of a ProducesFlags. -(decl produces_flags_get_reg (ProducesFlags) Reg) -(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg) -(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsResultWithConsumer _ reg)) reg) - -;; Modify a ProducesFlags to use it only for its side-effect, ignoring -;; its result. -(decl produces_flags_ignore (ProducesFlags) ProducesFlags) -(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _)) - (ProducesFlags.ProducesFlagsSideEffect inst)) -(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _)) - (ProducesFlags.ProducesFlagsSideEffect inst)) - -;; Helper for combining two flags-consumer instructions that return a -;; single Reg, giving a ConsumesFlags that returns both values in a -;; ValueRegs. -(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags) -(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1) - (ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2)) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs - inst1 - inst2 - (value_regs reg1 reg2))) -(rule (consumes_flags_concat - (ConsumesFlags.ConsumesFlagsSideEffect inst1) - (ConsumesFlags.ConsumesFlagsSideEffect inst2)) - (ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2)) - -;; Combine flags-producing and -consuming instructions together, ensuring that -;; they are emitted back-to-back and no other instructions can be emitted -;; between them and potentially clobber the flags. -;; -;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes: -;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer -;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer -;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer -;; -;; See `with_flags_reg` below for a variant that extracts out just the lower Reg. -(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs) - -(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result) - (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result)) - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst))) - (value_regs producer_result consumer_result))) - -;; A flag-producer that also produces a result, paired with a consumer that has -;; no results. -(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result) - (ConsumesFlags.ConsumesFlagsSideEffect consumer_inst)) - (let ((_ Unit (emit producer_inst)) - (_ Unit (emit consumer_inst))) - (value_reg producer_result))) - -(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) - (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst))) - (value_reg consumer_result))) - -(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst_1)) - (_z Unit (emit consumer_inst_2))) - consumer_result)) - -(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_inst_3 - consumer_inst_4 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_x Unit (emit producer_inst)) - (_y Unit (emit consumer_inst_1)) - (_z Unit (emit consumer_inst_2)) - (_w Unit (emit consumer_inst_3)) - (_v Unit (emit consumer_inst_4))) - consumer_result)) - -(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) - (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result)) - (let ((_ Unit (emit producer_inst1)) - (_ Unit (emit producer_inst2)) - (_ Unit (emit consumer_inst))) - (value_reg consumer_result))) - -(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_ Unit (emit producer_inst1)) - (_ Unit (emit producer_inst2)) - (_ Unit (emit consumer_inst_1)) - (_ Unit (emit consumer_inst_2))) - consumer_result)) - -(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 - consumer_inst_2 - consumer_inst_3 - consumer_inst_4 - consumer_result)) - ;; We must emit these instructions in order as the creator of - ;; the ConsumesFlags may be relying on dataflow dependencies - ;; amongst them. - (let ((_ Unit (emit producer_inst1)) - (_ Unit (emit producer_inst2)) - (_ Unit (emit consumer_inst_1)) - (_ Unit (emit consumer_inst_2)) - (_ Unit (emit consumer_inst_3)) - (_ Unit (emit consumer_inst_4))) - consumer_result)) - -(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg) -(rule (with_flags_reg p c) - (let ((v ValueRegs (with_flags p c))) - (value_regs_get v 0))) - -;; Indicate that the current state of the flags register from the instruction -;; that produces this Value is relied on. -(decl flags_to_producesflags (Value) ProducesFlags) -(rule (flags_to_producesflags val) - (let ((_ Unit (mark_value_used val))) - (ProducesFlags.AlreadyExistingFlags))) - -;; Combine a flags-producing instruction and a flags-consuming instruction that -;; produces no results. -;; -;; This function handles the following case only: -;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect -(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult) - -(rule (with_flags_side_effect - (ProducesFlags.AlreadyExistingFlags) - (ConsumesFlags.ConsumesFlagsSideEffect c)) - (SideEffectNoResult.Inst c)) - -(rule (with_flags_side_effect - (ProducesFlags.AlreadyExistingFlags) - (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) - (SideEffectNoResult.Inst2 c1 c2)) - -(rule (with_flags_side_effect - (ProducesFlags.ProducesFlagsSideEffect p) - (ConsumesFlags.ConsumesFlagsSideEffect c)) - (SideEffectNoResult.Inst2 p c)) - -(rule (with_flags_side_effect - (ProducesFlags.ProducesFlagsSideEffect p) - (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) - (SideEffectNoResult.Inst3 p c1 c2)) - -(rule (with_flags_side_effect - (ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2) - (ConsumesFlags.ConsumesFlagsSideEffect c)) - (SideEffectNoResult.Inst3 p1 p2 c)) - -;; Combine flag-producing and -consuming instruction that allows more than two results to be returned -(decl with_flags_chained (ProducesFlags ConsumesAndProducesFlags ConsumesFlags) MultiReg) - -;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Empty))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Empty))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst reg)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.One reg))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2)) - (_ Unit (emit consume_inst3)) - (_ Unit (emit consume_inst4))) - (MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - - -;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.One prod_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.One prod_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Two prod_result consume_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.SideEffect middle_inst) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2)) - (_ Unit (emit consume_inst3)) - (_ Unit (emit consume_inst4))) - (MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - - -;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.One middle_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.One middle_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Two middle_result consume_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2)) - (_ Unit (emit consume_inst3)) - (_ Unit (emit consume_inst4))) - (MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - - -;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Two prod_result middle_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Two prod_result middle_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Three prod_result middle_result consume_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2)) - (_ Unit (emit consume_inst3)) - (_ Unit (emit consume_inst4))) - (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -;; ProducesFlags.ReturnsResultWithConsumer + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Two prod_result middle_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Two prod_result middle_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Three prod_result middle_result consume_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consume_inst consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst))) - (MultiReg.Three prod_result middle_result consume_result))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2))) - (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) - (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) - (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) - (let ((_ Unit (emit prod_inst)) - (_ Unit (emit middle_inst)) - (_ Unit (emit consume_inst1)) - (_ Unit (emit consume_inst2)) - (_ Unit (emit consume_inst3)) - (_ Unit (emit consume_inst4))) - (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) - -;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; This definition should be kept up to date with the values defined in -;; cranelift/codegen/meta/src/shared/settings.rs -(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff))) - -(decl tls_model (TlsModel) Type) -(extern extractor infallible tls_model tls_model) - -(decl pure partial tls_model_is_elf_gd () Unit) -(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd) - -(decl pure partial tls_model_is_macho () Unit) -(extern constructor tls_model_is_macho tls_model_is_macho) - -(decl pure partial tls_model_is_coff () Unit) -(extern constructor tls_model_is_coff tls_model_is_coff) - -(decl pure partial preserve_frame_pointers () Unit) -(extern constructor preserve_frame_pointers preserve_frame_pointers) - -;; This definition should be kept up to date with the values defined in -;; cranelift/codegen/meta/src/shared/settings.rs -(type StackSwitchModel extern (enum (None) (Basic) (UpdateWindowsTib))) - -(decl pure partial stack_switch_model () StackSwitchModel) -(extern constructor stack_switch_model stack_switch_model) - -;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl box_external_name (ExternalName) BoxExternalName) -(extern constructor box_external_name box_external_name) - -;; Accessor for `FuncRef`. - -(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef) -(extern extractor infallible func_ref_data func_ref_data) - -;; Accessor for `GlobalValue`. - -(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue) -(extern extractor symbol_value_data symbol_value_data) - -;; Accessor for `RelocDistance`. - -(decl reloc_distance_near () RelocDistance) -(extern extractor reloc_distance_near reloc_distance_near) - -;; Accessor for `Immediate` as a vector of u8 values. - -(decl vec_mask_from_immediate (VecMask) Immediate) -(extern extractor vec_mask_from_immediate vec_mask_from_immediate) - -;; Accessor for `Immediate` as u128. - -(decl u128_from_immediate (u128) Immediate) -(extern extractor u128_from_immediate u128_from_immediate) - -;; Extracts an `Immediate` as a `VCodeConstant`. - -(decl vconst_from_immediate (VCodeConstant) Immediate) -(extern extractor vconst_from_immediate vconst_from_immediate) - -;; Accessor for `Constant` as u128. - -(decl u128_from_constant (u128) Constant) -(extern extractor u128_from_constant u128_from_constant) - -;; Accessor for `Constant` as u64. - -(decl u64_from_constant (u64) Constant) -(extern extractor u64_from_constant u64_from_constant) - -;; Extracts lane indices, represented as u8's, if the immediate for a -;; `shuffle` instruction represents shuffling N-bit values. The u8 values -;; returned will be in the range of 0 to (256/N)-1, inclusive, and index the -;; N-bit chunks of two concatenated 128-bit vectors starting from the -;; least-significant bits. -(decl shuffle64_from_imm (u8 u8) Immediate) -(extern extractor shuffle64_from_imm shuffle64_from_imm) -(decl shuffle32_from_imm (u8 u8 u8 u8) Immediate) -(extern extractor shuffle32_from_imm shuffle32_from_imm) -(decl shuffle16_from_imm (u8 u8 u8 u8 u8 u8 u8 u8) Immediate) -(extern extractor shuffle16_from_imm shuffle16_from_imm) - -;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Extractor to check for the special case that a `WritableValueRegs` -;; contains only a single register. -(decl only_writable_reg (WritableReg) WritableValueRegs) -(extern extractor only_writable_reg only_writable_reg) - -;; Get the `n`th register inside a `WritableValueRegs`. -(decl writable_regs_get (WritableValueRegs usize) WritableReg) -(extern constructor writable_regs_get writable_regs_get) - -;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Type to hold information about a function call signature. -(type Sig (primitive Sig)) - -;; Information how to pass one argument or return value. -(type ABIArg extern (enum)) - -;; Information how to pass a single slot of one argument or return value. -(type ABIArgSlot extern - (enum - (Reg - (reg RealReg) - (ty Type) - (extension ArgumentExtension)) - (Stack - (offset i64) - (ty Type) - (extension ArgumentExtension)))) - -;; Physical register that may hold an argument or return value. -(type RealReg (primitive RealReg)) - -;; Instruction on whether and how to extend an argument value. -(type ArgumentExtension extern - (enum - (None) - (Uext) - (Sext))) - -;; Get the number of arguments expected. -(decl abi_num_args (Sig) usize) -(extern constructor abi_num_args abi_num_args) - -;; Get information specifying how to pass one argument. -(decl abi_get_arg (Sig usize) ABIArg) -(extern constructor abi_get_arg abi_get_arg) - -;; Get the number of return values expected. -(decl abi_num_rets (Sig) usize) -(extern constructor abi_num_rets abi_num_rets) - -;; Get information specifying how to pass one return value. -(decl abi_get_ret (Sig usize) ABIArg) -(extern constructor abi_get_ret abi_get_ret) - -;; Get information specifying how to pass the implicit pointer -;; to the return-value area on the stack, if required. -(decl abi_ret_arg (ABIArg) Sig) -(extern extractor abi_ret_arg abi_ret_arg) - -;; Succeeds if no implicit return-value area pointer is required. -(decl abi_no_ret_arg () Sig) -(extern extractor abi_no_ret_arg abi_no_ret_arg) - -;; Size of the argument area. -(decl abi_sized_stack_arg_space (Sig) i64) -(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space) - -;; Size of the return-value area. -(decl abi_sized_stack_ret_space (Sig) i64) -(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space) - -;; Incoming return area pointer (must be present). -(decl abi_unwrap_ret_area_ptr () Reg) -(extern constructor abi_unwrap_ret_area_ptr abi_unwrap_ret_area_ptr) - -;; StackSlot addr -(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst) -(extern constructor abi_stackslot_addr abi_stackslot_addr) - -;; DynamicStackSlot addr -(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst) -(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr) - -;; Extractor to detect the special case where an argument or -;; return value only requires a single slot to be passed. -(decl abi_arg_only_slot (ABIArgSlot) ABIArg) -(extern extractor abi_arg_only_slot abi_arg_only_slot) - -;; Extractor to detect the special case where a non-struct argument -;; is implicitly passed by reference using a hidden pointer. -(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg) -(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer) - -;; Convert a real register number into a virtual register. -(decl real_reg_to_reg (RealReg) Reg) -(extern constructor real_reg_to_reg real_reg_to_reg) - -;; Convert a real register number into a writable virtual register. -(decl real_reg_to_writable_reg (RealReg) WritableReg) -(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg) - -;; Generate a move between two registers. -(decl gen_move (Type WritableReg Reg) MInst) -(extern constructor gen_move gen_move) - -;; Generate a return instruction -(decl lower_return (ValueSlice) InstOutput) -(rule (lower_return vals) - (let ((_ Unit (gen_return vals))) - (output_none))) - -(decl gen_return (ValueSlice) Unit) -(extern constructor gen_return gen_return) - -(decl gen_return_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) -(extern constructor gen_return_call gen_return_call) - -(decl gen_return_call_indirect (SigRef Value ValueSlice) InstOutput) -(extern constructor gen_return_call_indirect gen_return_call_indirect) - -;; Helper for extracting an immediate that's not 0 and not -1 from an imm64. -(decl pure partial safe_divisor_from_imm64 (Type Imm64) u64) -(extern constructor safe_divisor_from_imm64 safe_divisor_from_imm64) - -;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(convert Inst Value def_inst) -(convert Reg ValueRegs value_reg) -(convert WritableReg WritableValueRegs writable_value_reg) -(convert Value Reg put_in_reg) -(convert Value ValueRegs put_in_regs) -(convert WritableReg Reg writable_reg_to_reg) -(convert ValueRegs InstOutput output) -(convert Reg InstOutput output_reg) -(convert Value InstOutput output_value) -(convert ExternalName BoxExternalName box_external_name) -(convert PReg Reg preg_to_reg) - diff --git a/hbcb/src/prelude_opt.isle b/hbcb/src/prelude_opt.isle deleted file mode 100644 index b8b9fc4..0000000 --- a/hbcb/src/prelude_opt.isle +++ /dev/null @@ -1,123 +0,0 @@ -;; Prelude definitions specific to the mid-end. - -;; Any `extern` definitions here are generally implemented in `src/opts.rs`. - -;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Extract any node(s) for the given eclass ID. -(decl multi inst_data (Type InstructionData) Value) -(extern extractor inst_data inst_data_etor) - -;; Identical to `inst_data`, just with a different ISLE type. -;; This is basically a manual version of `curry`/`uncurry` in Haskell: -;; to compose extractors the outer one needs to be single-parameter, -;; so this combines the two parameters of `inst_data` into one. -(type TypeAndInstructionData (primitive TypeAndInstructionData)) -(decl multi inst_data_tupled (TypeAndInstructionData) Value) -(extern extractor inst_data_tupled inst_data_tupled_etor) - -;; Construct a pure node, returning a new (or deduplicated -;; already-existing) eclass ID. -(decl make_inst (Type InstructionData) Value) -(extern constructor make_inst make_inst_ctor) - -;; Constructors for value arrays. -(decl value_array_2_ctor (Value Value) ValueArray2) -(extern constructor value_array_2_ctor value_array_2_ctor) -(decl value_array_3_ctor (Value Value Value) ValueArray3) -(extern constructor value_array_3_ctor value_array_3_ctor) - -(rule (eq ty x y) (icmp ty (IntCC.Equal) x y)) -(rule (ne ty x y) (icmp ty (IntCC.NotEqual) x y)) -(rule (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y)) -(rule (ule ty x y) (icmp ty (IntCC.UnsignedLessThanOrEqual) x y)) -(rule (ugt ty x y) (icmp ty (IntCC.UnsignedGreaterThan) x y)) -(rule (uge ty x y) (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x y)) -(rule (slt ty x y) (icmp ty (IntCC.SignedLessThan) x y)) -(rule (sle ty x y) (icmp ty (IntCC.SignedLessThanOrEqual) x y)) -(rule (sgt ty x y) (icmp ty (IntCC.SignedGreaterThan) x y)) -(rule (sge ty x y) (icmp ty (IntCC.SignedGreaterThanOrEqual) x y)) - -;; 3-way comparison, returning -1/0/+1 in I8 -(decl spaceship_s (Type Value Value) Value) -(rule (spaceship_s ty x y) (isub $I8 (sgt ty x y) (slt ty x y))) -(extractor (spaceship_s ty x y) (isub $I8 (sgt ty x y) (slt ty x y))) -(decl spaceship_u (Type Value Value) Value) -(rule (spaceship_u ty x y) (isub $I8 (ugt ty x y) (ult ty x y))) -(extractor (spaceship_u ty x y) (isub $I8 (ugt ty x y) (ult ty x y))) - -;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; The main matcher rule invoked by the toplevel driver. -(decl multi simplify (Value) Value) - -;; Mark a node as requiring remat when used in a different block. -(decl remat (Value) Value) -(extern constructor remat remat) - -;; Mark a node as subsuming whatever else it's rewritten from -- this -;; is definitely preferable, not just a possible option. Useful for, -;; e.g., constant propagation where we arrive at a definite "final -;; answer". -(decl subsume (Value) Value) -(extern constructor subsume subsume) - -;;;;; constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl iconst_sextend_etor (Type i64) TypeAndInstructionData) -(extern extractor iconst_sextend_etor iconst_sextend_etor) - -;; Construct an `iconst` from an `i64` or Extract an `i64` from an `iconst` -;; by treating the constant as signed. -;; When extracting, smaller types get their value sign-extended to 64-bits, -;; so that `iconst.i8 255` will give you a `-1_i64`. -;; When constructing, the rule will fail if the value cannot be represented in -;; the target type. If it fits, it'll be masked accordingly in the constant. -(decl iconst_s (Type i64) Value) -(extractor (iconst_s ty c) (inst_data_tupled (iconst_sextend_etor ty c))) -(rule 0 (iconst_s ty c) - (if-let c_masked (u64_and (i64_as_u64 c) (ty_umax ty))) - (if-let c_reextended (i64_sextend_u64 ty c_masked)) - (if-let $true (u64_eq (i64_as_u64 c) (i64_as_u64 c_reextended))) - (iconst ty (imm64 c_masked))) -(rule 1 (iconst_s $I128 c) (sextend $I128 (iconst_s $I64 c))) - -;; Construct an `iconst` from a `u64` or Extract a `u64` from an `iconst` -;; by treating the constant as unsigned. -;; When extracting, smaller types get their value zero-extended to 64-bits, -;; so that `iconst.i8 255` will give you a `255_u64`. -;; When constructing, the rule will fail if the value cannot be represented in -;; the target type. -(decl iconst_u (Type u64) Value) -(extractor (iconst_u ty c) (iconst ty (u64_from_imm64 c))) -(rule 0 (iconst_u ty c) - (if-let $true (u64_le c (ty_umax ty))) - (iconst ty (imm64 c))) -(rule 1 (iconst_u $I128 c) (uextend $I128 (iconst_u $I64 c))) - -;; These take `Value`, rather than going through `inst_data_tupled`, because -;; most of the time they want to return the original `Value`, and it would be -;; a waste to need to re-GVN the instruction data in those cases. -(decl multi sextend_maybe_etor (Type Value) Value) -(extern extractor infallible sextend_maybe_etor sextend_maybe_etor) -(decl multi uextend_maybe_etor (Type Value) Value) -(extern extractor infallible uextend_maybe_etor uextend_maybe_etor) - -;; Match or Construct a possibly-`uextend`ed value. -;; Gives the extended-to type and inner value when matching something that was -;; extended, or the input value and its type when the value isn't an extension. -;; Useful to write a single pattern that can match things that may or may not -;; have undergone C's "usual arithmetic conversions". -;; When generating values, extending to the same type is invalid CLIF, -;; so this avoids doing that where there's no extension actually needed. -(decl uextend_maybe (Type Value) Value) -(extractor (uextend_maybe ty val) (uextend_maybe_etor ty val)) -(rule 0 (uextend_maybe ty val) (uextend ty val)) -(rule 1 (uextend_maybe ty val@(value_type ty)) val) - -;; Same as `uextend_maybe` above, just for `sextend`. -(decl sextend_maybe (Type Value) Value) -(extractor (sextend_maybe ty val) (sextend_maybe_etor ty val)) -(rule 0 (sextend_maybe ty val) (sextend ty val)) -(rule 1 (sextend_maybe ty val@(value_type ty)) val) - diff --git a/hbcb/src/settings.rs b/hbcb/src/settings.rs deleted file mode 100644 index 5b52469..0000000 --- a/hbcb/src/settings.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! riscv64 Settings. - -use { - core::fmt, - cranelift_codegen::settings::{self, detail, Builder, PredicateView, Value}, -}; - -// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a -// public `Flags` struct with an impl for all of the settings defined in -include!(concat!(env!("OUT_DIR"), "/settings-riscv64.rs"));