initiating cranelift backend

This commit is contained in:
mlokr 2024-09-17 10:39:56 +02:00
parent 254d5ed962
commit da69b705f1
No known key found for this signature in database
GPG key ID: DEA147DDEE644993
26 changed files with 23610 additions and 1 deletions

307
Cargo.lock generated
View file

@ -2,10 +2,172 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "arbitrary"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
[[package]]
name = "bumpalo"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cranelift-bforest"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b80c3a50b9c4c7e5b5f73c0ed746687774fc9e36ef652b110da8daebf0c6e0e6"
dependencies = [
"cranelift-entity",
]
[[package]]
name = "cranelift-bitset"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38778758c2ca918b05acb2199134e0c561fb577c50574259b26190b6c2d95ded"
[[package]]
name = "cranelift-codegen"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58258667ad10e468bfc13a8d620f50dfcd4bb35d668123e97defa2549b9ad397"
dependencies = [
"bumpalo",
"cranelift-bforest",
"cranelift-bitset",
"cranelift-codegen-meta",
"cranelift-codegen-shared",
"cranelift-control",
"cranelift-entity",
"cranelift-isle",
"gimli",
"hashbrown 0.14.5",
"log",
"regalloc2 0.9.3",
"rustc-hash 1.1.0",
"smallvec",
"target-lexicon",
]
[[package]]
name = "cranelift-codegen-meta"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "043f0b702e529dcb07ff92bd7d40e7d5317b5493595172c5eb0983343751ee06"
dependencies = [
"cranelift-codegen-shared",
]
[[package]]
name = "cranelift-codegen-shared"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7763578888ab53eca5ce7da141953f828e82c2bfadcffc106d10d1866094ffbb"
[[package]]
name = "cranelift-control"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32db15f08c05df570f11e8ab33cb1ec449a64b37c8a3498377b77650bef33d8b"
dependencies = [
"arbitrary",
]
[[package]]
name = "cranelift-entity"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5289cdb399381a27e7bbfa1b42185916007c3d49aeef70b1d01cb4caa8010130"
dependencies = [
"cranelift-bitset",
]
[[package]]
name = "cranelift-isle"
version = "0.111.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b72a3c5c166a70426dcb209bdd0bb71a787c1ea76023dc0974fbabca770e8f9"
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "gimli"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
dependencies = [
"fallible-iterator",
"indexmap",
"stable_deref_trait",
]
[[package]]
name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
]
[[package]]
name = "hbbytecode"
version = "0.1.0"
[[package]]
name = "hbcb"
version = "0.1.0"
dependencies = [
"cranelift-codegen",
"cranelift-codegen-meta",
"cranelift-control",
"cranelift-isle",
"log",
"regalloc2 0.10.2",
"smallvec",
"target-lexicon",
]
[[package]]
name = "hbjit"
version = "0.1.0"
@ -32,12 +194,28 @@ dependencies = [
"memmap2",
]
[[package]]
name = "indexmap"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5"
dependencies = [
"equivalent",
"hashbrown 0.14.5",
]
[[package]]
name = "libc"
version = "0.2.158"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
[[package]]
name = "log"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memmap2"
version = "0.9.5"
@ -47,6 +225,135 @@ dependencies = [
"libc",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regalloc2"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6"
dependencies = [
"hashbrown 0.13.2",
"log",
"rustc-hash 1.1.0",
"slice-group-by",
"smallvec",
]
[[package]]
name = "regalloc2"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12908dbeb234370af84d0579b9f68258a0f67e201412dd9a2814e6f45b2fc0f0"
dependencies = [
"hashbrown 0.14.5",
"log",
"rustc-hash 2.0.0",
"slice-group-by",
"smallvec",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
[[package]]
name = "slice-group-by"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7"
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "syn"
version = "2.0.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.12.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "unicode-ident"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "xtask"
version = "0.1.0"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit"]
members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit", "hbcb"]
[profile.release]
strip = true

22
hbcb/Cargo.toml Normal file
View file

@ -0,0 +1,22 @@
[package]
name = "hbcb"
version = "0.1.0"
edition = "2021"
[dependencies]
cranelift-codegen = "0.111.0"
cranelift-control = "0.111.0"
log = "0.4.22"
regalloc2 = "0.10.2"
smallvec = "1.13.2"
target-lexicon = "0.12.16"
[features]
default = ["isle-errors"]
unwind = []
isle-in-source-tree = []
isle-errors = []
[build-dependencies]
cranelift-codegen-meta = "0.111.0"
cranelift-isle = "0.111.0"

310
hbcb/build.rs Normal file
View file

@ -0,0 +1,310 @@
// Build script.
//
// This program is run by Cargo when building cranelift-codegen. It is used to generate Rust code from
// the language definitions in the cranelift-codegen/meta directory.
//
// Environment:
//
// OUT_DIR
// Directory where generated files should be placed.
//
// TARGET
// Target triple provided by Cargo.
//
// The build script expects to be run from the directory where this build.rs file lives. The
// current directory is used to find the sources.
use {
cranelift_codegen_meta::{self as meta, isle::IsleCompilations},
cranelift_isle::error::Errors,
meta::isle::IsleCompilation,
std::{env, io::Read, process, time::Instant},
};
fn main() {
let start_time = Instant::now();
let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set");
let out_dir = std::path::Path::new(&out_dir);
//let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set");
//let all_arch = env::var("CARGO_FEATURE_ALL_ARCH").is_ok();
//let all_native_arch = env::var("CARGO_FEATURE_ALL_NATIVE_ARCH").is_ok();
let isas = &[meta::isa::Isa::Riscv64];
// let mut isas = meta::isa::Isa::all()
// .iter()
// .cloned()
// .filter(|isa| {
// let env_key = format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase());
// all_arch || env::var(env_key).is_ok()
// })
// .collect::<Vec<_>>();
// Don't require host isa if under 'all-arch' feature.
//let host_isa = env::var("CARGO_FEATURE_HOST_ARCH").is_ok() && !all_native_arch;
//if isas.is_empty() || host_isa {
// // Try to match native target.
// let target_name = target_triple.split('-').next().unwrap();
// let isa = meta::isa_from_arch(target_name).expect("error when identifying target");
// println!("cargo:rustc-cfg=feature=\"{isa}\"");
// isas.push(isa);
//}
let cur_dir = env::current_dir().expect("Can't access current working directory");
let crate_dir = cur_dir.as_path();
println!("cargo:rerun-if-changed=build.rs");
let explicit_isle_dir = &crate_dir.join("isle_generated_code");
#[cfg(feature = "isle-in-source-tree")]
let isle_dir = explicit_isle_dir;
#[cfg(not(feature = "isle-in-source-tree"))]
let isle_dir = &out_dir;
#[cfg(feature = "isle-in-source-tree")]
{
std::fs::create_dir_all(isle_dir).expect("Could not create ISLE source directory");
}
#[cfg(not(feature = "isle-in-source-tree"))]
{
if explicit_isle_dir.is_dir() {
eprintln!(concat!(
"Error: directory isle_generated_code/ exists but is only used when\n",
"`--feature isle-in-source-tree` is specified. To prevent confusion,\n",
"this build script requires the directory to be removed when reverting\n",
"to the usual generated code in target/. Please delete the directory and\n",
"re-run this build.\n",
));
std::process::exit(1);
}
}
if let Err(err) = meta::generate(isas, out_dir, isle_dir) {
eprintln!("Error: {err}");
process::exit(1);
}
if &std::env::var("SKIP_ISLE").unwrap_or("0".to_string()) != "1" {
if let Err(err) = build_isle(crate_dir, isle_dir) {
eprintln!("Error: {err}");
process::exit(1);
}
}
if env::var("CRANELIFT_VERBOSE").is_ok() {
for isa in isas {
println!("cargo:warning=Includes support for {} ISA", isa);
}
println!("cargo:warning=Build step took {:?}.", Instant::now() - start_time);
println!("cargo:warning=Generated files are in {}", out_dir.display());
}
let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
let mut cmd = std::process::Command::new("git");
cmd.arg("rev-parse")
.arg("HEAD")
.stdout(std::process::Stdio::piped())
.current_dir(env::var("CARGO_MANIFEST_DIR").unwrap());
let version = if let Ok(mut child) = cmd.spawn() {
let mut git_rev = String::new();
child.stdout.as_mut().unwrap().read_to_string(&mut git_rev).unwrap();
let status = child.wait().unwrap();
if status.success() {
let git_rev = git_rev.trim().chars().take(9).collect::<String>();
format!("{pkg_version}-{git_rev}")
} else {
// not a git repo
pkg_version
}
} else {
// git not available
pkg_version
};
std::fs::write(
std::path::Path::new(&out_dir).join("version.rs"),
format!(
"/// Version number of this crate. \n\
pub const VERSION: &str = \"{version}\";"
),
)
.unwrap();
}
/// Strip the current directory from the file paths, because `islec`
/// includes them in the generated source, and this helps us maintain
/// deterministic builds that don't include those local file paths.
fn make_isle_source_path_relative(
cur_dir: &std::path::Path,
filename: &std::path::Path,
) -> std::path::PathBuf {
if let Ok(suffix) = filename.strip_prefix(cur_dir) {
suffix.to_path_buf()
} else {
filename.to_path_buf()
}
}
fn build_isle(
crate_dir: &std::path::Path,
isle_dir: &std::path::Path,
) -> Result<(), Box<dyn std::error::Error + 'static>> {
let cur_dir = std::env::current_dir()?;
let codegen_crate_dir = &make_isle_source_path_relative(&cur_dir, crate_dir);
let gen_dir = &make_isle_source_path_relative(&cur_dir, isle_dir);
// Preludes.
let clif_lower_isle = gen_dir.join("clif_lower.isle");
//let clif_opt_isle = gen_dir.join("clif_opt.isle");
let prelude_isle = codegen_crate_dir.join("src").join("prelude.isle");
//let prelude_opt_isle = codegen_crate_dir.join("src").join("prelude_opt.isle");
let prelude_lower_isle = codegen_crate_dir.join("src").join("prelude_lower.isle");
// Directory for mid-end optimizations.
//let src_opts = codegen_crate_dir.join("src").join("opts");
let src_isa_risc_v = codegen_crate_dir.join("src");
// This is a set of ISLE compilation units.
//
// The format of each entry is:
//
// (output Rust code file, input ISLE source files)
//
// There should be one entry for each backend that uses ISLE for lowering,
// and if/when we replace our peephole optimization passes with ISLE, there
// should be an entry for each of those as well.
//
// N.B.: add any new compilation outputs to
// `scripts/force-rebuild-isle.sh` if they do not fit the pattern
// `cranelift/codegen/src/isa/*/lower/isle/generated_code.rs`!
let isle_compilations = IsleCompilations {
items: vec![
// // The mid-end optimization rules.
// IsleCompilation {
// output: gen_dir.join("isle_opt.rs"),
// inputs: vec![
// prelude_isle.clone(),
// prelude_opt_isle,
// src_opts.join("arithmetic.isle"),
// src_opts.join("bitops.isle"),
// src_opts.join("cprop.isle"),
// src_opts.join("extends.isle"),
// src_opts.join("icmp.isle"),
// src_opts.join("remat.isle"),
// src_opts.join("selects.isle"),
// src_opts.join("shifts.isle"),
// src_opts.join("spaceship.isle"),
// src_opts.join("spectre.isle"),
// src_opts.join("vector.isle"),
// ],
// untracked_inputs: vec![clif_opt_isle],
// },
// The risc-v instruction selector.
IsleCompilation {
output: gen_dir.join("isle_riscv64.rs"),
inputs: vec![
prelude_isle.clone(),
prelude_lower_isle.clone(),
src_isa_risc_v.join("inst.isle"),
src_isa_risc_v.join("inst_vector.isle"),
src_isa_risc_v.join("lower.isle"),
],
untracked_inputs: vec![clif_lower_isle.clone()],
},
],
};
let mut had_error = false;
for compilation in &isle_compilations.items {
for file in &compilation.inputs {
println!("cargo:rerun-if-changed={}", file.display());
}
if let Err(e) = run_compilation(compilation) {
had_error = true;
eprintln!("Error building ISLE files:");
eprintln!("{e:?}");
#[cfg(not(feature = "isle-errors"))]
{
eprintln!("To see a more detailed error report, run: ");
eprintln!();
eprintln!(" $ cargo check -p cranelift-codegen --features isle-errors");
eprintln!();
}
}
}
if had_error {
std::process::exit(1);
}
println!("cargo:rustc-env=ISLE_DIR={}", isle_dir.to_str().unwrap());
Ok(())
}
/// Build ISLE DSL source text into generated Rust code.
///
/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since
/// it consumes files generated by them.
fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> {
use cranelift_isle as isle;
eprintln!("Rebuilding {}", compilation.output.display());
let code = {
let file_paths = compilation.inputs.iter().chain(compilation.untracked_inputs.iter());
let options = isle::codegen::CodegenOptions {
// Because we include!() the generated ISLE source, we cannot
// put the global pragmas (`#![allow(...)]`) in the ISLE
// source itself; we have to put them in the source that
// include!()s it. (See
// https://github.com/rust-lang/rust/issues/47995.)
exclude_global_allow_pragmas: true,
};
isle::compile::from_files(file_paths, &options)?
};
let code = rustfmt(&code).unwrap_or_else(|e| {
println!("cargo:warning=Failed to run `rustfmt` on ISLE-generated code: {e:?}");
code
});
eprintln!("Writing ISLE-generated Rust code to {}", compilation.output.display());
std::fs::write(&compilation.output, code)
.map_err(|e| Errors::from_io(e, "failed writing output"))?;
Ok(())
}
fn rustfmt(code: &str) -> std::io::Result<String> {
use std::io::Write;
let mut rustfmt = std::process::Command::new("rustfmt")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.spawn()?;
let mut stdin = rustfmt.stdin.take().unwrap();
stdin.write_all(code.as_bytes())?;
drop(stdin);
let mut stdout = rustfmt.stdout.take().unwrap();
let mut data = vec![];
stdout.read_to_end(&mut data)?;
let status = rustfmt.wait()?;
if !status.success() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
format!("`rustfmt` exited with status {status}"),
));
}
Ok(String::from_utf8(data).expect("rustfmt always writs utf-8 to stdout"))
}

900
hbcb/src/abi.rs Normal file
View file

@ -0,0 +1,900 @@
//! Implementation of a standard Riscv64 ABI.
use {
alloc::{boxed::Box, vec::Vec},
cranelift_codegen::{
inst::*,
ir::{self, types::*, LibCall, Signature},
isa::{self, unwind::UnwindInst, CallConv},
machinst::*,
settings::{self, Flags as RiscvFlags},
CodegenError, CodegenResult,
},
regalloc2::{MachineEnv, PReg, PRegSet},
smallvec::{smallvec, SmallVec},
std::sync::OnceLock,
};
/// Support for the Riscv64 ABI from the callee side (within a function body).
pub(crate) type Riscv64Callee = Callee<Riscv64MachineDeps>;
/// Support for the Riscv64 ABI from the caller side (at a callsite).
pub(crate) type Riscv64ABICallSite = CallSite<Riscv64MachineDeps>;
/// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
/// Riscv64-specific ABI behavior. This struct just serves as an implementation
/// point for the trait; it is never actually instantiated.
pub struct Riscv64MachineDeps;
impl IsaFlags for RiscvFlags {}
impl RiscvFlags {
pub(crate) fn min_vec_reg_size(&self) -> u64 {
let entries = [
(self.has_zvl65536b(), 65536),
(self.has_zvl32768b(), 32768),
(self.has_zvl16384b(), 16384),
(self.has_zvl8192b(), 8192),
(self.has_zvl4096b(), 4096),
(self.has_zvl2048b(), 2048),
(self.has_zvl1024b(), 1024),
(self.has_zvl512b(), 512),
(self.has_zvl256b(), 256),
// In order to claim the Application Profile V extension, a minimum
// register size of 128 is required. i.e. V implies Zvl128b.
(self.has_v(), 128),
(self.has_zvl128b(), 128),
(self.has_zvl64b(), 64),
(self.has_zvl32b(), 32),
];
for (has_flag, size) in entries.into_iter() {
if !has_flag {
continue;
}
// Due to a limitation in regalloc2, we can't support types
// larger than 1024 bytes. So limit that here.
return std::cmp::min(size, 1024);
}
return 0;
}
}
impl ABIMachineSpec for Riscv64MachineDeps {
type F = RiscvFlags;
type I = Inst;
fn word_bits() -> u32 {
64
}
/// Return required stack alignment in bytes.
fn stack_align(_call_conv: isa::CallConv) -> u32 {
16
}
fn compute_arg_locs(
call_conv: isa::CallConv,
_flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
mut args: ArgsAccumulator,
) -> CodegenResult<(u32, Option<usize>)> {
assert_ne!(
call_conv,
isa::CallConv::Winch,
"riscv64 does not support the 'winch' calling convention yet"
);
// All registers that can be used as parameters or rets.
// both start and end are included.
let (x_start, x_end, f_start, f_end) = match args_or_rets {
ArgsOrRets::Args => (10, 17, 10, 17),
ArgsOrRets::Rets => (10, 11, 10, 11),
};
let mut next_x_reg = x_start;
let mut next_f_reg = f_start;
// Stack space.
let mut next_stack: u32 = 0;
for param in params {
if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
panic!(
"StructArgument parameters are not supported on riscv64. \
Use regular pointer arguments instead."
);
}
// Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
let mut slots = ABIArgSlotVec::new();
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
let x = Some(x_reg(next_x_reg));
next_x_reg += 1;
x
} else if (next_f_reg <= f_end) && *rc == RegClass::Float {
let x = Some(f_reg(next_f_reg));
next_f_reg += 1;
x
} else {
None
};
if let Some(reg) = next_reg {
slots.push(ABIArgSlot::Reg {
reg: reg.to_real_reg().unwrap(),
ty: *reg_ty,
extension: param.extension,
});
} else {
// Compute size and 16-byte stack alignment happens
// separately after all args.
let size = reg_ty.bits() / 8;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
slots.push(ABIArgSlot::Stack {
offset: next_stack as i64,
ty: *reg_ty,
extension: param.extension,
});
next_stack += size;
}
}
args.push(ABIArg::Slots { slots, purpose: param.purpose });
}
let pos: Option<usize> = if add_ret_area_ptr {
assert!(ArgsOrRets::Args == args_or_rets);
if next_x_reg <= x_end {
let arg = ABIArg::reg(
x_reg(next_x_reg).to_real_reg().unwrap(),
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
);
args.push_non_formal(arg);
} else {
let arg = ABIArg::stack(
next_stack as i64,
I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
);
args.push_non_formal(arg);
next_stack += 8;
}
Some(args.args().len() - 1)
} else {
None
};
next_stack = align_to(next_stack, Self::stack_align(call_conv));
// To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
return Err(CodegenError::ImplLimitExceeded);
}
Ok((next_stack, pos))
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
}
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
}
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
Inst::gen_move(to_reg, from_reg, ty)
}
fn gen_extend(
to_reg: Writable<Reg>,
from_reg: Reg,
signed: bool,
from_bits: u8,
to_bits: u8,
) -> Inst {
assert!(from_bits < to_bits);
Inst::Extend { rd: to_reg, rn: from_reg, signed, from_bits, to_bits }
}
fn get_ext_mode(
_call_conv: isa::CallConv,
specified: ir::ArgumentExtension,
) -> ir::ArgumentExtension {
specified
}
fn gen_args(args: Vec<ArgPair>) -> Inst {
Inst::Args { args }
}
fn gen_rets(rets: Vec<RetPair>) -> Inst {
Inst::Rets { rets }
}
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
spilltmp_reg()
}
fn gen_add_imm(
_call_conv: isa::CallConv,
into_reg: Writable<Reg>,
from_reg: Reg,
imm: u32,
) -> SmallInstVec<Inst> {
let mut insts = SmallInstVec::new();
if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
insts.push(Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: into_reg,
rs: from_reg,
imm12,
});
} else {
insts.extend(Inst::load_constant_u32(writable_spilltmp_reg2(), imm as u64));
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: into_reg,
rs1: spilltmp_reg2(),
rs2: from_reg,
});
}
insts
}
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
insts.push(Inst::TrapIf {
cc: IntCC::UnsignedLessThan,
rs1: stack_reg(),
rs2: limit_reg,
trap_code: ir::TrapCode::StackOverflow,
});
insts
}
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
Inst::LoadAddr { rd: into_reg, mem: mem.into() }
}
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
let mem = AMode::RegOffset(base, offset as i64);
Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
}
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
let mem = AMode::RegOffset(base, offset as i64);
Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
}
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
if amount == 0 {
return insts;
}
if let Some(imm) = Imm12::maybe_from_i64(amount as i64) {
insts.push(Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: writable_stack_reg(),
rs: stack_reg(),
imm12: imm,
})
} else {
let tmp = writable_spilltmp_reg();
insts.extend(Inst::load_constant_u64(tmp, amount as i64 as u64));
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: writable_stack_reg(),
rs1: stack_reg(),
rs2: tmp.to_reg(),
});
}
insts
}
fn gen_prologue_frame_setup(
_call_conv: isa::CallConv,
flags: &settings::Flags,
_isa_flags: &RiscvFlags,
frame_layout: &FrameLayout,
) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
if frame_layout.setup_area_size > 0 {
// add sp,sp,-16 ;; alloc stack space for fp.
// sd ra,8(sp) ;; save ra.
// sd fp,0(sp) ;; store old fp.
// mv fp,sp ;; set fp to sp.
insts.extend(Self::gen_sp_reg_adjust(-16));
insts.push(Inst::gen_store(AMode::SPOffset(8), link_reg(), I64, MemFlags::trusted()));
insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted()));
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::PushFrameRegs {
offset_upward_to_caller_sp: frame_layout.setup_area_size,
},
});
}
insts.push(Inst::Mov { rd: writable_fp_reg(), rm: stack_reg(), ty: I64 });
}
insts
}
/// reverse of gen_prologue_frame_setup.
fn gen_epilogue_frame_restore(
call_conv: isa::CallConv,
_flags: &settings::Flags,
_isa_flags: &RiscvFlags,
frame_layout: &FrameLayout,
) -> SmallInstVec<Inst> {
let mut insts = SmallVec::new();
if frame_layout.setup_area_size > 0 {
insts.push(Inst::gen_load(
writable_link_reg(),
AMode::SPOffset(8),
I64,
MemFlags::trusted(),
));
insts.push(Inst::gen_load(
writable_fp_reg(),
AMode::SPOffset(0),
I64,
MemFlags::trusted(),
));
insts.extend(Self::gen_sp_reg_adjust(16));
}
if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
insts.extend(Self::gen_sp_reg_adjust(frame_layout.tail_args_size.try_into().unwrap()));
}
insts
}
fn gen_return(
_call_conv: isa::CallConv,
_isa_flags: &RiscvFlags,
_frame_layout: &FrameLayout,
) -> SmallInstVec<Inst> {
smallvec![Inst::Ret {}]
}
fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64));
let mut info =
CallInfo::empty(ExternalName::LibCall(LibCall::Probestack), CallConv::SystemV);
info.uses.push(CallArgPair { vreg: a0(), preg: a0() });
insts.push(Inst::Call { info: Box::new(info) });
}
fn gen_clobber_save(
_call_conv: isa::CallConv,
flags: &settings::Flags,
frame_layout: &FrameLayout,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let setup_frame = frame_layout.setup_area_size > 0;
let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
if incoming_args_diff > 0 {
// Decrement SP by the amount of additional incoming argument space we need
insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
if setup_frame {
// Write the lr position on the stack again, as it hasn't changed since it was
// pushed in `gen_prologue_frame_setup`
insts.push(Inst::gen_store(
AMode::SPOffset(8),
link_reg(),
I64,
MemFlags::trusted(),
));
insts.push(Inst::gen_load(
writable_fp_reg(),
AMode::SPOffset(i64::from(incoming_args_diff)),
I64,
MemFlags::trusted(),
));
insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted()));
// Finally, sync the frame pointer with SP
insts.push(Inst::gen_move(writable_fp_reg(), stack_reg(), I64));
}
}
if flags.unwind_info() && setup_frame {
// The *unwind* frame (but not the actual frame) starts at the
// clobbers, just below the saved FP/LR pair.
insts.push(Inst::Unwind {
inst: UnwindInst::DefineNewFrame {
offset_downward_to_clobbers: frame_layout.clobber_size,
offset_upward_to_caller_sp: frame_layout.setup_area_size,
},
});
}
// Adjust the stack pointer downward for clobbers, the function fixed
// frame (spillslots and storage slots), and outgoing arguments.
let stack_size = frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.outgoing_args_size;
// Store each clobbered register in order at offsets from SP,
// placing them above the fixed frame slots.
if stack_size > 0 {
insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
let mut cur_offset = 8;
for reg in &frame_layout.clobbered_callee_saves {
let r_reg = reg.to_reg();
let ty = match r_reg.class() {
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Saves"),
};
insts.push(Inst::gen_store(
AMode::SPOffset((stack_size - cur_offset) as i64),
Reg::from(reg.to_reg()),
ty,
MemFlags::trusted(),
));
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: frame_layout.clobber_size - cur_offset,
reg: r_reg,
},
});
}
cur_offset += 8
}
}
insts
}
fn gen_clobber_restore(
_call_conv: isa::CallConv,
_flags: &settings::Flags,
frame_layout: &FrameLayout,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let stack_size = frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.outgoing_args_size;
let mut cur_offset = 8;
for reg in &frame_layout.clobbered_callee_saves {
let rreg = reg.to_reg();
let ty = match rreg.class() {
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
};
insts.push(Inst::gen_load(
reg.map(Reg::from),
AMode::SPOffset(i64::from(stack_size - cur_offset)),
ty,
MemFlags::trusted(),
));
cur_offset += 8
}
if stack_size > 0 {
insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
}
insts
}
fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> {
let mut insts = SmallVec::new();
match &dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => {
let info = Box::new(info.map(|()| name.clone()));
insts.push(Inst::Call { info })
}
&CallDest::ExtName(ref name, RelocDistance::Far) => {
insts.push(Inst::LoadExtName { rd: tmp, name: Box::new(name.clone()), offset: 0 });
let info = Box::new(info.map(|()| tmp.to_reg()));
insts.push(Inst::CallInd { info });
}
&CallDest::Reg(reg) => {
let info = Box::new(info.map(|()| *reg));
insts.push(Inst::CallInd { info });
}
}
insts
}
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
src: Reg,
size: usize,
mut alloc_tmp: F,
) -> SmallVec<[Self::I; 8]> {
let mut insts = SmallVec::new();
let arg0 = Writable::from_reg(x_reg(10));
let arg1 = Writable::from_reg(x_reg(11));
let arg2 = Writable::from_reg(x_reg(12));
let tmp = alloc_tmp(Self::word_type());
insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
insts.push(Inst::Call {
info: Box::new(CallInfo {
dest: ExternalName::LibCall(LibCall::Memcpy),
uses: smallvec![
CallArgPair { vreg: dst, preg: arg0.to_reg() },
CallArgPair { vreg: src, preg: arg1.to_reg() },
CallArgPair { vreg: tmp.to_reg(), preg: arg2.to_reg() }
],
defs: smallvec![],
clobbers: Self::get_regs_clobbered_by_call(call_conv),
caller_conv: call_conv,
callee_conv: call_conv,
callee_pop_size: 0,
}),
});
insts
}
fn get_number_of_spillslots_for_value(
rc: RegClass,
_target_vector_bytes: u32,
isa_flags: &RiscvFlags,
) -> u32 {
// We allocate in terms of 8-byte slots.
match rc {
RegClass::Int => 1,
RegClass::Float => 1,
RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
}
}
fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
MACHINE_ENV.get_or_init(create_reg_enviroment)
}
fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
DEFAULT_CLOBBERS
}
fn compute_frame_layout(
_call_conv: isa::CallConv,
flags: &settings::Flags,
_sig: &Signature,
regs: &[Writable<RealReg>],
is_leaf: bool,
incoming_args_size: u32,
tail_args_size: u32,
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> FrameLayout {
let mut regs: Vec<Writable<RealReg>> = regs
.iter()
.cloned()
.filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into()))
.collect();
regs.sort_unstable();
// Compute clobber size.
let clobber_size = compute_clobber_size(&regs);
// Compute linkage frame size.
let setup_area_size = if flags.preserve_frame_pointers()
|| !is_leaf
// The function arguments that are passed on the stack are addressed
// relative to the Frame Pointer.
|| incoming_args_size > 0
|| clobber_size > 0
|| fixed_frame_storage_size > 0
{
16 // FP, LR
} else {
0
};
// Return FrameLayout structure.
FrameLayout {
incoming_args_size,
tail_args_size,
setup_area_size,
clobber_size,
fixed_frame_storage_size,
outgoing_args_size,
clobbered_callee_saves: regs,
}
}
fn gen_inline_probestack(
insts: &mut SmallInstVec<Self::I>,
_call_conv: isa::CallConv,
frame_size: u32,
guard_size: u32,
) {
// Unroll at most n consecutive probes, before falling back to using a loop
const PROBE_MAX_UNROLL: u32 = 3;
// Number of probes that we need to perform
let probe_count = align_to(frame_size, guard_size) / guard_size;
// Must be a caller-saved register that is not an argument.
let tmp = Writable::from_reg(x_reg(28)); // t3
if probe_count <= PROBE_MAX_UNROLL {
Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count)
} else {
insts.push(Inst::StackProbeLoop { guard_size, probe_count, tmp });
}
}
}
impl Riscv64ABICallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
let new_stack_arg_size =
u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);
// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
self.emit_stack_ret_arg_for_tail_call(ctx);
let dest = self.dest().clone();
let uses = self.take_uses();
match dest {
CallDest::ExtName(name, RelocDistance::Near) => {
let info = Box::new(ReturnCallInfo { dest: name, uses, new_stack_arg_size });
ctx.emit(Inst::ReturnCall { info });
}
CallDest::ExtName(name, RelocDistance::Far) => {
let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap();
ctx.emit(Inst::LoadExtName { rd: callee, name: Box::new(name), offset: 0 });
let info =
Box::new(ReturnCallInfo { dest: callee.to_reg(), uses, new_stack_arg_size });
ctx.emit(Inst::ReturnCallInd { info });
}
CallDest::Reg(callee) => {
let info = Box::new(ReturnCallInfo { dest: callee, uses, new_stack_arg_size });
ctx.emit(Inst::ReturnCallInd { info });
}
}
}
}
// NOTE: no V regs are callee save.
const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty()
// X Regs
.with(px_reg(2))
.with(px_reg(8))
.with(px_reg(9))
.with(px_reg(18))
.with(px_reg(19))
.with(px_reg(20))
.with(px_reg(21))
.with(px_reg(22))
.with(px_reg(23))
.with(px_reg(24))
.with(px_reg(25))
.with(px_reg(26))
.with(px_reg(27))
// F Regs
.with(pf_reg(8))
.with(pf_reg(18))
.with(pf_reg(19))
.with(pf_reg(20))
.with(pf_reg(21))
.with(pf_reg(22))
.with(pf_reg(23))
.with(pf_reg(24))
.with(pf_reg(25))
.with(pf_reg(26))
.with(pf_reg(27));
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {
match reg.to_reg().class() {
RegClass::Int => {
clobbered_size += 8;
}
RegClass::Float => {
clobbered_size += 8;
}
RegClass::Vector => unimplemented!("Vector Size Clobbered"),
}
}
align_to(clobbered_size, 16)
}
const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
.with(px_reg(1))
.with(px_reg(5))
.with(px_reg(6))
.with(px_reg(7))
.with(px_reg(10))
.with(px_reg(11))
.with(px_reg(12))
.with(px_reg(13))
.with(px_reg(14))
.with(px_reg(15))
.with(px_reg(16))
.with(px_reg(17))
.with(px_reg(28))
.with(px_reg(29))
.with(px_reg(30))
.with(px_reg(31))
// F Regs
.with(pf_reg(0))
.with(pf_reg(1))
.with(pf_reg(2))
.with(pf_reg(3))
.with(pf_reg(4))
.with(pf_reg(5))
.with(pf_reg(6))
.with(pf_reg(7))
.with(pf_reg(9))
.with(pf_reg(10))
.with(pf_reg(11))
.with(pf_reg(12))
.with(pf_reg(13))
.with(pf_reg(14))
.with(pf_reg(15))
.with(pf_reg(16))
.with(pf_reg(17))
.with(pf_reg(28))
.with(pf_reg(29))
.with(pf_reg(30))
.with(pf_reg(31))
// V Regs - All vector regs get clobbered
.with(pv_reg(0))
.with(pv_reg(1))
.with(pv_reg(2))
.with(pv_reg(3))
.with(pv_reg(4))
.with(pv_reg(5))
.with(pv_reg(6))
.with(pv_reg(7))
.with(pv_reg(8))
.with(pv_reg(9))
.with(pv_reg(10))
.with(pv_reg(11))
.with(pv_reg(12))
.with(pv_reg(13))
.with(pv_reg(14))
.with(pv_reg(15))
.with(pv_reg(16))
.with(pv_reg(17))
.with(pv_reg(18))
.with(pv_reg(19))
.with(pv_reg(20))
.with(pv_reg(21))
.with(pv_reg(22))
.with(pv_reg(23))
.with(pv_reg(24))
.with(pv_reg(25))
.with(pv_reg(26))
.with(pv_reg(27))
.with(pv_reg(28))
.with(pv_reg(29))
.with(pv_reg(30))
.with(pv_reg(31));
fn create_reg_enviroment() -> MachineEnv {
// Some C Extension instructions can only use a subset of the registers.
// x8 - x15, f8 - f15, v8 - v15 so we should prefer to use those since
// they allow us to emit C instructions more often.
//
// In general the order of preference is:
// 1. Compressible Caller Saved registers.
// 2. Non-Compressible Caller Saved registers.
// 3. Compressible Callee Saved registers.
// 4. Non-Compressible Callee Saved registers.
let preferred_regs_by_class: [Vec<PReg>; 3] = {
let x_registers: Vec<PReg> = (10..=15).map(px_reg).collect();
let f_registers: Vec<PReg> = (10..=15).map(pf_reg).collect();
let v_registers: Vec<PReg> = (8..=15).map(pv_reg).collect();
[x_registers, f_registers, v_registers]
};
let non_preferred_regs_by_class: [Vec<PReg>; 3] = {
// x0 - x4 are special registers, so we don't want to use them.
// Omit x30 and x31 since they are the spilltmp registers.
// Start with the Non-Compressible Caller Saved registers.
let x_registers: Vec<PReg> = (5..=7)
.chain(16..=17)
.chain(28..=29)
// The first Callee Saved register is x9 since its Compressible
// Omit x8 since it's the frame pointer.
.chain(9..=9)
// The rest of the Callee Saved registers are Non-Compressible
.chain(18..=27)
.map(px_reg)
.collect();
// Prefer Caller Saved registers.
let f_registers: Vec<PReg> = (0..=7)
.chain(16..=17)
.chain(28..=31)
// Once those are exhausted, we should prefer f8 and f9 since they are
// callee saved, but compressible.
.chain(8..=9)
.chain(18..=27)
.map(pf_reg)
.collect();
let v_registers = (0..=7).chain(16..=31).map(pv_reg).collect();
[x_registers, f_registers, v_registers]
};
MachineEnv {
preferred_regs_by_class,
non_preferred_regs_by_class,
fixed_stack_slots: vec![],
scratch_by_class: [None, None, None],
}
}
impl Riscv64MachineDeps {
fn gen_probestack_unroll(
insts: &mut SmallInstVec<Inst>,
tmp: Writable<Reg>,
guard_size: u32,
probe_count: u32,
) {
// When manually unrolling adjust the stack pointer and then write a zero
// to the stack at that offset.
//
// We do this because valgrind expects us to never write beyond the stack
// pointer and associated redzone.
// See: https://github.com/bytecodealliance/wasmtime/issues/7454
// Store the adjust amount in a register upfront, so we don't have to
// reload it for each probe. It's worth loading this as a negative and
// using an `add` instruction since we have compressed versions of `add`
// but not the `sub` instruction.
insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64));
for _ in 0..probe_count {
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: writable_stack_reg(),
rs1: stack_reg(),
rs2: tmp.to_reg(),
});
insts.push(Inst::gen_store(AMode::SPOffset(0), zero_reg(), I32, MemFlags::trusted()));
}
// Restore the stack pointer to its original value
insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
}
}

3128
hbcb/src/inst.isle Normal file

File diff suppressed because it is too large Load diff

1957
hbcb/src/inst/args.rs Normal file

File diff suppressed because it is too large Load diff

2685
hbcb/src/inst/emit.rs Normal file

File diff suppressed because it is too large Load diff

2277
hbcb/src/inst/emit_tests.rs Normal file

File diff suppressed because it is too large Load diff

721
hbcb/src/inst/encode.rs Normal file
View file

@ -0,0 +1,721 @@
//! Contains the RISC-V instruction encoding logic.
//!
//! These formats are specified in the RISC-V specification in section 2.2.
//! See: <https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf>
//!
//! Some instructions especially in extensions have slight variations from
//! the base RISC-V specification.
use super::*;
use crate::lower::isle::generated_code::{
COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth,
VecAluOpRImm5, VecAluOpRR, VecAluOpRRRImm5, VecAluOpRRRR, VecOpCategory, ZcbMemOp,
};
use crate::machinst::isle::WritableReg;
fn unsigned_field_width(value: u32, width: u8) -> u32 {
debug_assert_eq!(value & (!0 << width), 0);
value
}
/// Layout:
/// 0-------6-7-------11-12------14-15------19-20------24-25-------31
/// | Opcode | rd | funct3 | rs1 | rs2 | funct7 |
fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 {
let mut bits = 0;
bits |= unsigned_field_width(opcode, 7);
bits |= unsigned_field_width(rd, 5) << 7;
bits |= unsigned_field_width(funct3, 3) << 12;
bits |= unsigned_field_width(rs1, 5) << 15;
bits |= unsigned_field_width(rs2, 5) << 20;
bits |= unsigned_field_width(funct7, 7) << 25;
bits
}
/// Encode an R-type instruction.
pub fn encode_r_type(
opcode: u32,
rd: WritableReg,
funct3: u32,
rs1: Reg,
rs2: Reg,
funct7: u32,
) -> u32 {
encode_r_type_bits(
opcode,
reg_to_gpr_num(rd.to_reg()),
funct3,
reg_to_gpr_num(rs1),
reg_to_gpr_num(rs2),
funct7,
)
}
/// Layout:
/// 0-------6-7-------11-12------14-15------19-20------------------31
/// | Opcode | rd | width | rs1 | Offset[11:0] |
fn encode_i_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, offset: u32) -> u32 {
let mut bits = 0;
bits |= unsigned_field_width(opcode, 7);
bits |= unsigned_field_width(rd, 5) << 7;
bits |= unsigned_field_width(funct3, 3) << 12;
bits |= unsigned_field_width(rs1, 5) << 15;
bits |= unsigned_field_width(offset, 12) << 20;
bits
}
/// Encode an I-type instruction.
pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 {
encode_i_type_bits(
opcode,
reg_to_gpr_num(rd.to_reg()),
width,
reg_to_gpr_num(rs1),
offset.bits(),
)
}
/// Encode an S-type instruction.
///
/// Layout:
/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31
/// | Opcode | imm[4:0] | width | base | src | imm[11:5] |
pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 {
let mut bits = 0;
bits |= unsigned_field_width(opcode, 7);
bits |= (offset.bits() & 0b11111) << 7;
bits |= unsigned_field_width(width, 3) << 12;
bits |= reg_to_gpr_num(base) << 15;
bits |= reg_to_gpr_num(src) << 20;
bits |= unsigned_field_width(offset.bits() >> 5, 7) << 25;
bits
}
/// Encodes a Vector ALU instruction.
///
/// Fields:
/// - opcode (7 bits)
/// - vd (5 bits)
/// - funct3 (3 bits)
/// - vs1 (5 bits)
/// - vs2 (5 bits)
/// - vm (1 bit)
/// - funct6 (6 bits)
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
pub fn encode_valu(
op: VecAluOpRRR,
vd: WritableReg,
vs1: Reg,
vs2: Reg,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
reg_to_gpr_num(vs1),
reg_to_gpr_num(vs2),
funct7,
)
}
/// Encodes a Vector ALU+Imm instruction.
/// This is just a Vector ALU instruction with an immediate in the VS1 field.
///
/// Fields:
/// - opcode (7 bits)
/// - vd (5 bits)
/// - funct3 (3 bits)
/// - imm (5 bits)
/// - vs2 (5 bits)
/// - vm (1 bit)
/// - funct6 (6 bits)
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
pub fn encode_valu_rr_imm(
op: VecAluOpRRImm5,
vd: WritableReg,
imm: Imm5,
vs2: Reg,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
let imm = imm.bits() as u32;
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
imm,
reg_to_gpr_num(vs2),
funct7,
)
}
pub fn encode_valu_rrrr(
op: VecAluOpRRRR,
vd: WritableReg,
vs2: Reg,
vs1: Reg,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
reg_to_gpr_num(vs1),
reg_to_gpr_num(vs2),
funct7,
)
}
pub fn encode_valu_rrr_imm(
op: VecAluOpRRRImm5,
vd: WritableReg,
imm: Imm5,
vs2: Reg,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
let imm = imm.bits() as u32;
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
imm,
reg_to_gpr_num(vs2),
funct7,
)
}
pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
let (vs1, vs2) = if op.vs_is_vs2_encoded() {
(op.aux_encoding(), reg_to_gpr_num(vs))
} else {
(reg_to_gpr_num(vs), op.aux_encoding())
};
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
vs1,
vs2,
funct7,
)
}
pub fn encode_valu_r_imm(
op: VecAluOpRImm5,
vd: WritableReg,
imm: Imm5,
masking: VecOpMasking,
) -> u32 {
let funct7 = (op.funct6() << 1) | masking.encode();
// This is true for this opcode, not sure if there are any other ones.
debug_assert_eq!(op, VecAluOpRImm5::VmvVI);
let vs1 = imm.bits() as u32;
let vs2 = op.aux_encoding();
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(vd.to_reg()),
op.funct3(),
vs1,
vs2,
funct7,
)
}
/// Encodes a Vector CFG Imm instruction.
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
// TODO: Check if this is any of the known instruction types in the spec.
pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 {
let mut bits = 0;
bits |= unsigned_field_width(opcode, 7);
bits |= reg_to_gpr_num(rd) << 7;
bits |= VecOpCategory::OPCFG.encode() << 12;
bits |= unsigned_field_width(imm.bits(), 5) << 15;
bits |= unsigned_field_width(vtype.encode(), 10) << 20;
bits |= 0b11 << 30;
bits
}
/// Encodes a Vector Mem Unit Stride Load instruction.
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
pub fn encode_vmem_load(
opcode: u32,
vd: Reg,
width: VecElementWidth,
rs1: Reg,
lumop: u32,
masking: VecOpMasking,
mop: u32,
nf: u32,
) -> u32 {
// Width is encoded differently to avoid a clash with the FP load/store sizes.
let width = match width {
VecElementWidth::E8 => 0b000,
VecElementWidth::E16 => 0b101,
VecElementWidth::E32 => 0b110,
VecElementWidth::E64 => 0b111,
};
let mut bits = 0;
bits |= unsigned_field_width(opcode, 7);
bits |= reg_to_gpr_num(vd) << 7;
bits |= width << 12;
bits |= reg_to_gpr_num(rs1) << 15;
bits |= unsigned_field_width(lumop, 5) << 20;
bits |= masking.encode() << 25;
bits |= unsigned_field_width(mop, 2) << 26;
// The mew bit (inst[28]) when set is expected to be used to encode expanded
// memory sizes of 128 bits and above, but these encodings are currently reserved.
bits |= 0b0 << 28;
bits |= unsigned_field_width(nf, 3) << 29;
bits
}
/// Encodes a Vector Mem Unit Stride Load instruction.
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
pub fn encode_vmem_store(
opcode: u32,
vs3: Reg,
width: VecElementWidth,
rs1: Reg,
sumop: u32,
masking: VecOpMasking,
mop: u32,
nf: u32,
) -> u32 {
// This is pretty much the same as the load instruction, just
// with different names on the fields.
encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf)
}
// The CSR Reg instruction is really just an I type instruction with the CSR in
// the immediate field.
pub fn encode_csr_reg(op: CsrRegOP, rd: WritableReg, rs: Reg, csr: CSR) -> u32 {
encode_i_type(op.opcode(), rd, op.funct3(), rs, csr.bits())
}
// The CSR Imm instruction is an I type instruction with the CSR in
// the immediate field and the value to be set in the `rs1` field.
pub fn encode_csr_imm(op: CsrImmOP, rd: WritableReg, csr: CSR, imm: UImm5) -> u32 {
encode_i_type_bits(
op.opcode(),
reg_to_gpr_num(rd.to_reg()),
op.funct3(),
imm.bits(),
csr.bits().bits(),
)
}
// Encode a CR type instruction.
//
// 0--1-2-----6-7-------11-12-------15
// |op | rs2 | rd/rs1 | funct4 |
pub fn encode_cr_type(op: CrOp, rd: WritableReg, rs2: Reg) -> u16 {
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= reg_to_gpr_num(rs2) << 2;
bits |= reg_to_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(op.funct4(), 4) << 12;
bits.try_into().unwrap()
}
// This isn't technically a instruction format that exists. It's just a CR type
// where the source is rs1, rs2 is zero. rs1 is never written to.
//
// Used for C.JR and C.JALR
pub fn encode_cr2_type(op: CrOp, rs1: Reg) -> u16 {
encode_cr_type(op, WritableReg::from_reg(rs1), zero_reg())
}
// Encode a CA type instruction.
//
// 0--1-2-----4-5--------6-7--------9-10------15
// |op | rs2 | funct2 | rd/rs1 | funct6 |
pub fn encode_ca_type(op: CaOp, rd: WritableReg, rs2: Reg) -> u16 {
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= reg_to_compressed_gpr_num(rs2) << 2;
bits |= unsigned_field_width(op.funct2(), 2) << 5;
bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(op.funct6(), 6) << 10;
bits.try_into().unwrap()
}
// Encode a CJ type instruction.
//
// The imm field is a 11 bit signed immediate that is shifted left by 1.
//
// 0--1-2-----12-13--------15
// |op | imm | funct3 |
pub fn encode_cj_type(op: CjOp, imm: Imm12) -> u16 {
let imm = imm.bits();
debug_assert!(imm & 1 == 0);
// The offset bits are in rather weird positions.
// [11|4|9:8|10|6|7|3:1|5]
let mut imm_field = 0;
imm_field |= ((imm >> 11) & 1) << 10;
imm_field |= ((imm >> 4) & 1) << 9;
imm_field |= ((imm >> 8) & 3) << 7;
imm_field |= ((imm >> 10) & 1) << 6;
imm_field |= ((imm >> 6) & 1) << 5;
imm_field |= ((imm >> 7) & 1) << 4;
imm_field |= ((imm >> 1) & 7) << 1;
imm_field |= ((imm >> 5) & 1) << 0;
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= unsigned_field_width(imm_field, 11) << 2;
bits |= unsigned_field_width(op.funct3(), 3) << 13;
bits.try_into().unwrap()
}
// Encode a CI type instruction.
//
// The imm field is a 6 bit signed immediate.
//
// 0--1-2-------6-7-------11-12-----12-13-----15
// |op | imm[4:0] | src | imm[5] | funct3 |
pub fn encode_ci_type(op: CiOp, rd: WritableReg, imm: Imm6) -> u16 {
let imm = imm.bits();
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2;
bits |= reg_to_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12;
bits |= unsigned_field_width(op.funct3(), 3) << 13;
bits.try_into().unwrap()
}
// Stack-Pointer relative loads are regular CI instructions, but, the immediate
// is zero extended, and with a slightly different immediate field encoding.
pub fn encode_ci_sp_load(op: CiOp, rd: WritableReg, imm: Uimm6) -> u16 {
let imm = imm.bits();
// These are the spec encoded offsets.
// LWSP: [5|4:2|7:6]
// LDSP: [5|4:3|8:6]
// FLDSP: [5|4:3|8:6]
//
// We don't receive the entire offset in `imm`, just a multiple of the load-size.
// Number of bits in the lowest position of imm. 3 for lwsp, 2 for {f,}ldsp.
let low_bits = match op {
CiOp::CLwsp => 3, // [4:2]
CiOp::CLdsp | CiOp::CFldsp => 2, // [4:3]
_ => unreachable!(),
};
let high_bits = 6 - 1 - low_bits;
let mut enc_imm = 0;
// Encode [7:6] at the bottom of imm
enc_imm |= imm >> (6 - high_bits);
// Next place [4:2] in the middle
enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits;
// Finally place [5] at the top
enc_imm |= ((imm >> low_bits) & 1) << 5;
let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap();
encode_ci_type(op, rd, enc_imm)
}
/// c.addi16sp is a regular CI op, but the immediate field is encoded in a weird way
pub fn encode_c_addi16sp(imm: Imm6) -> u16 {
let imm = imm.bits();
// [6|1|3|5:4|2]
let mut enc_imm = 0;
enc_imm |= ((imm >> 5) & 1) << 5;
enc_imm |= ((imm >> 0) & 1) << 4;
enc_imm |= ((imm >> 2) & 1) << 3;
enc_imm |= ((imm >> 3) & 3) << 1;
enc_imm |= ((imm >> 1) & 1) << 0;
let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap();
encode_ci_type(CiOp::CAddi16sp, writable_stack_reg(), enc_imm)
}
// Encode a CIW type instruction.
//
// 0--1-2------4-5------12-13--------15
// |op | rd | imm | funct3 |
pub fn encode_ciw_type(op: CiwOp, rd: WritableReg, imm: u8) -> u16 {
// [3:2|7:4|0|1]
let mut imm_field = 0;
imm_field |= ((imm >> 1) & 1) << 0;
imm_field |= ((imm >> 0) & 1) << 1;
imm_field |= ((imm >> 4) & 15) << 2;
imm_field |= ((imm >> 2) & 3) << 6;
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 2;
bits |= unsigned_field_width(imm_field as u32, 8) << 5;
bits |= unsigned_field_width(op.funct3(), 3) << 13;
bits.try_into().unwrap()
}
// Encode a CB type instruction.
//
// The imm field is a 6 bit signed immediate.
//
// 0--1-2-------6-7-------9-10-------11-12-------13--------15
// |op | imm[4:0] | dst | funct2 | imm[5] | funct3 |
pub fn encode_cb_type(op: CbOp, rd: WritableReg, imm: Imm6) -> u16 {
let imm = imm.bits();
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2;
bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(op.funct2(), 2) << 10;
bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12;
bits |= unsigned_field_width(op.funct3(), 3) << 13;
bits.try_into().unwrap()
}
// Encode a CSS type instruction.
//
// The imm field is a 6 bit unsigned immediate.
//
// 0--1-2-------6-7--------12-13-------15
// |op | src | imm | funct3 |
pub fn encode_css_type(op: CssOp, src: Reg, imm: Uimm6) -> u16 {
let imm = imm.bits();
// These are the spec encoded offsets.
// c.swsp: [5:2|7:6]
// c.sdsp: [5:3|8:6]
// c.fsdsp: [5:3|8:6]
//
// We don't receive the entire offset in `imm`, just a multiple of the load-size.
// Number of bits in the lowest position of imm. 4 for c.swsp, 3 for c.{f,}sdsp.
let low_bits = match op {
CssOp::CSwsp => 4, // [5:2]
CssOp::CSdsp | CssOp::CFsdsp => 3, // [5:3]
};
let high_bits = 6 - low_bits;
let mut enc_imm = 0;
enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits;
enc_imm |= imm >> low_bits;
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= reg_to_gpr_num(src) << 2;
bits |= unsigned_field_width(enc_imm as u32, 6) << 7;
bits |= unsigned_field_width(op.funct3(), 3) << 13;
bits.try_into().unwrap()
}
// Encode a CS type instruction.
//
// The imm field is a 5 bit unsigned immediate.
//
// 0--1-2-----4-5----------6-7---------9-10----------12-13-----15
// |op | src | imm(2-bit) | base | imm(3-bit) | funct3 |
pub fn encode_cs_type(op: CsOp, src: Reg, base: Reg, imm: Uimm5) -> u16 {
let size = match op {
CsOp::CFsd | CsOp::CSd => 8,
CsOp::CSw => 4,
};
encode_cs_cl_type_bits(op.op(), op.funct3(), size, src, base, imm)
}
// Encode a CL type instruction.
//
// The imm field is a 5 bit unsigned immediate.
//
// 0--1-2------4-5----------6-7---------9-10----------12-13-----15
// |op | dest | imm(2-bit) | base | imm(3-bit) | funct3 |
pub fn encode_cl_type(op: ClOp, dest: WritableReg, base: Reg, imm: Uimm5) -> u16 {
let size = match op {
ClOp::CFld | ClOp::CLd => 8,
ClOp::CLw => 4,
};
encode_cs_cl_type_bits(op.op(), op.funct3(), size, dest.to_reg(), base, imm)
}
// CL and CS type instructions have the same physical layout.
//
// 0--1-2----------4-5----------6-7---------9-10----------12-13-----15
// |op | dest/src | imm(2-bit) | base | imm(3-bit) | funct3 |
fn encode_cs_cl_type_bits(
op: COpcodeSpace,
funct3: u32,
size: u32,
dest_src: Reg,
base: Reg,
imm: Uimm5,
) -> u16 {
let imm = imm.bits();
// c.sw / c.lw: [2|6]
// c.sd / c.ld: [7:6]
// c.fsd / c.fld: [7:6]
//
// We differentiate these based on the operation size
let imm2 = match size {
4 => ((imm >> 4) & 1) | ((imm & 1) << 1),
8 => (imm >> 3) & 0b11,
_ => unreachable!(),
};
// [5:3] on all opcodes
let imm3 = match size {
4 => (imm >> 1) & 0b111,
8 => (imm >> 0) & 0b111,
_ => unreachable!(),
};
let mut bits = 0;
bits |= unsigned_field_width(op.bits(), 2);
bits |= reg_to_compressed_gpr_num(dest_src) << 2;
bits |= unsigned_field_width(imm2 as u32, 2) << 5;
bits |= reg_to_compressed_gpr_num(base) << 7;
bits |= unsigned_field_width(imm3 as u32, 3) << 10;
bits |= unsigned_field_width(funct3, 3) << 13;
bits.try_into().unwrap()
}
// Encode a CSZN type instruction.
//
// This is an additional encoding format that is introduced in the Zcb extension.
//
// 0--1-2---------6-7--------9-10------15
// |op | funct5 | rd/rs1 | funct6 |
pub fn encode_cszn_type(op: CsznOp, rd: WritableReg) -> u16 {
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= unsigned_field_width(op.funct5(), 5) << 2;
bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(op.funct6(), 6) << 10;
bits.try_into().unwrap()
}
// Encodes the various memory operations in the Zcb extension.
//
// 0--1-2----------4-5----------6-7---------9-10-------15
// |op | dest/src | imm(2-bit) | base | funct6 |
fn encode_zcbmem_bits(op: ZcbMemOp, dest_src: Reg, base: Reg, imm: Uimm2) -> u16 {
let imm = imm.bits();
// For these ops, bit 6 is part of the opcode, and bit 5 encodes the imm offset.
let imm = match op {
ZcbMemOp::CLh | ZcbMemOp::CLhu | ZcbMemOp::CSh => {
debug_assert_eq!(imm & !1, 0);
// Only c.lh has this bit as 1
let opcode_bit = (op == ZcbMemOp::CLh) as u8;
imm | (opcode_bit << 1)
}
// In the rest of the ops the imm is reversed.
_ => ((imm & 1) << 1) | ((imm >> 1) & 1),
};
let mut bits = 0;
bits |= unsigned_field_width(op.op().bits(), 2);
bits |= reg_to_compressed_gpr_num(dest_src) << 2;
bits |= unsigned_field_width(imm as u32, 2) << 5;
bits |= reg_to_compressed_gpr_num(base) << 7;
bits |= unsigned_field_width(op.funct6(), 6) << 10;
bits.try_into().unwrap()
}
pub fn encode_zcbmem_load(op: ZcbMemOp, rd: WritableReg, base: Reg, imm: Uimm2) -> u16 {
encode_zcbmem_bits(op, rd.to_reg(), base, imm)
}
pub fn encode_zcbmem_store(op: ZcbMemOp, src: Reg, base: Reg, imm: Uimm2) -> u16 {
encode_zcbmem_bits(op, src, base, imm)
}
pub fn encode_fli(ty: Type, imm: FliConstant, rd: WritableReg) -> u32 {
// FLI.{S,D} is encoded as a FMV.{W,D} instruction with rs2 set to the
// immediate value to be loaded.
let op = FpuOPRR::FmvFmtX;
let width = FpuOPWidth::try_from(ty).unwrap();
let frm = 0; // FRM is hard coded to 0 in both instructions
let rs2 = 1; // rs2 set to 1 is what differentiates FLI from FMV
let mut bits = 0;
bits |= unsigned_field_width(op.opcode(), 7);
bits |= reg_to_gpr_num(rd.to_reg()) << 7;
bits |= unsigned_field_width(frm, 3) << 12;
bits |= unsigned_field_width(imm.bits() as u32, 5) << 15;
bits |= unsigned_field_width(rs2, 6) << 20;
bits |= unsigned_field_width(op.funct7(width), 7) << 25;
bits
}
pub fn encode_fp_rr(op: FpuOPRR, width: FpuOPWidth, frm: FRM, rd: WritableReg, rs: Reg) -> u32 {
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(rd.to_reg()),
frm.as_u32(),
reg_to_gpr_num(rs),
op.rs2(),
op.funct7(width),
)
}
pub fn encode_fp_rrr(
op: FpuOPRRR,
width: FpuOPWidth,
frm: FRM,
rd: WritableReg,
rs1: Reg,
rs2: Reg,
) -> u32 {
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(rd.to_reg()),
frm.as_u32(),
reg_to_gpr_num(rs1),
reg_to_gpr_num(rs2),
op.funct7(width),
)
}
pub fn encode_fp_rrrr(
op: FpuOPRRRR,
width: FpuOPWidth,
frm: FRM,
rd: WritableReg,
rs1: Reg,
rs2: Reg,
rs3: Reg,
) -> u32 {
let funct7 = (reg_to_gpr_num(rs3) << 2) | width.as_u32();
encode_r_type_bits(
op.opcode(),
reg_to_gpr_num(rd.to_reg()),
frm.as_u32(),
reg_to_gpr_num(rs1),
reg_to_gpr_num(rs2),
funct7,
)
}

374
hbcb/src/inst/imms.rs Normal file
View file

@ -0,0 +1,374 @@
//! Riscv64 ISA definitions: immediate constants.
// Some variants are never constructed, but we still want them as options in the future.
use super::Inst;
#[allow(dead_code)]
use std::fmt::{Debug, Display, Formatter, Result};
#[derive(Copy, Clone, Debug, Default)]
pub struct Imm12 {
/// 16-bit container where the low 12 bits are the data payload.
///
/// Acquiring the underlying value requires sign-extending the 12th bit.
bits: u16,
}
impl Imm12 {
pub(crate) const ZERO: Self = Self { bits: 0 };
pub(crate) const ONE: Self = Self { bits: 1 };
pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
Self::maybe_from_i64(val as i64)
}
pub fn maybe_from_i64(val: i64) -> Option<Imm12> {
if val >= -2048 && val <= 2047 {
Some(Imm12 {
bits: val as u16 & 0xfff,
})
} else {
None
}
}
#[inline]
pub fn from_i16(bits: i16) -> Self {
assert!(bits >= -2048 && bits <= 2047);
Self {
bits: (bits & 0xfff) as u16,
}
}
#[inline]
pub fn as_i16(self) -> i16 {
(self.bits << 4) as i16 >> 4
}
#[inline]
pub fn bits(&self) -> u32 {
self.bits.into()
}
}
impl Into<i64> for Imm12 {
fn into(self) -> i64 {
self.as_i16().into()
}
}
impl Display for Imm12 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{:+}", self.as_i16())
}
}
// signed
#[derive(Clone, Copy, Default)]
pub struct Imm20 {
/// 32-bit container where the low 20 bits are the data payload.
///
/// Acquiring the underlying value requires sign-extending the 20th bit.
bits: u32,
}
impl Imm20 {
pub(crate) const ZERO: Self = Self { bits: 0 };
pub fn maybe_from_u64(val: u64) -> Option<Imm20> {
Self::maybe_from_i64(val as i64)
}
pub fn maybe_from_i64(val: i64) -> Option<Imm20> {
if val >= -(0x7_ffff + 1) && val <= 0x7_ffff {
Some(Imm20 { bits: val as u32 })
} else {
None
}
}
#[inline]
pub fn from_i32(bits: i32) -> Self {
assert!(bits >= -(0x7_ffff + 1) && bits <= 0x7_ffff);
Self {
bits: (bits as u32) & 0xf_ffff,
}
}
#[inline]
pub fn as_i32(&self) -> i32 {
((self.bits << 12) as i32) >> 12
}
#[inline]
pub fn bits(&self) -> u32 {
self.bits
}
}
impl Debug for Imm20 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.as_i32())
}
}
impl Display for Imm20 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.bits)
}
}
/// An unsigned 5-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct UImm5 {
value: u8,
}
impl UImm5 {
/// Create an unsigned 5-bit immediate from u8.
pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
if value < 32 {
Some(UImm5 { value })
} else {
None
}
}
/// Bits for encoding.
pub fn bits(&self) -> u32 {
u32::from(self.value)
}
}
impl Display for UImm5 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.value)
}
}
/// A Signed 5-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Imm5 {
value: i8,
}
impl Imm5 {
/// Create an signed 5-bit immediate from an i8.
pub fn maybe_from_i8(value: i8) -> Option<Imm5> {
if value >= -16 && value <= 15 {
Some(Imm5 { value })
} else {
None
}
}
pub fn from_bits(value: u8) -> Imm5 {
assert_eq!(value & 0x1f, value);
let signed = ((value << 3) as i8) >> 3;
Imm5 { value: signed }
}
/// Bits for encoding.
pub fn bits(&self) -> u8 {
self.value as u8 & 0x1f
}
}
impl Display for Imm5 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.value)
}
}
/// A Signed 6-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Imm6 {
value: i8,
}
impl Imm6 {
/// Create an signed 6-bit immediate from an i16
pub fn maybe_from_i16(value: i16) -> Option<Self> {
if value >= -32 && value <= 31 {
Some(Self { value: value as i8 })
} else {
None
}
}
pub fn maybe_from_i32(value: i32) -> Option<Self> {
value.try_into().ok().and_then(Imm6::maybe_from_i16)
}
pub fn maybe_from_imm12(value: Imm12) -> Option<Self> {
Imm6::maybe_from_i16(value.as_i16())
}
/// Bits for encoding.
pub fn bits(&self) -> u8 {
self.value as u8 & 0x3f
}
}
impl Display for Imm6 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.value)
}
}
/// A unsigned 6-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Uimm6 {
value: u8,
}
impl Uimm6 {
/// Create an unsigned 6-bit immediate from an u8
pub fn maybe_from_u8(value: u8) -> Option<Self> {
if value <= 63 {
Some(Self { value })
} else {
None
}
}
/// Bits for encoding.
pub fn bits(&self) -> u8 {
self.value & 0x3f
}
}
impl Display for Uimm6 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.value)
}
}
/// A unsigned 5-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Uimm5 {
value: u8,
}
impl Uimm5 {
/// Create an unsigned 5-bit immediate from an u8
pub fn maybe_from_u8(value: u8) -> Option<Self> {
if value <= 31 {
Some(Self { value })
} else {
None
}
}
/// Bits for encoding.
pub fn bits(&self) -> u8 {
self.value & 0x1f
}
}
impl Display for Uimm5 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.value)
}
}
/// A unsigned 2-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Uimm2 {
value: u8,
}
impl Uimm2 {
/// Create an unsigned 2-bit immediate from an u8
pub fn maybe_from_u8(value: u8) -> Option<Self> {
if value <= 3 {
Some(Self { value })
} else {
None
}
}
/// Bits for encoding.
pub fn bits(&self) -> u8 {
self.value & 0x3
}
}
impl Display for Uimm2 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.value)
}
}
impl Inst {
pub(crate) fn imm_min() -> i64 {
let imm20_max: i64 = (1 << 19) << 12;
let imm12_max = 1 << 11;
-imm20_max - imm12_max
}
pub(crate) fn imm_max() -> i64 {
let imm20_max: i64 = ((1 << 19) - 1) << 12;
let imm12_max = (1 << 11) - 1;
imm20_max + imm12_max
}
/// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate.
/// This helper produces an imm12, imm20, or both to generate the value.
///
/// `value` must be between `imm_min()` and `imm_max()`, or else
/// this helper returns `None`.
pub(crate) fn generate_imm(value: u64) -> Option<(Imm20, Imm12)> {
if let Some(imm12) = Imm12::maybe_from_u64(value) {
// can be load using single imm12.
return Some((Imm20::ZERO, imm12));
}
let value = value as i64;
if !(value >= Self::imm_min() && value <= Self::imm_max()) {
// not in range, return None.
return None;
}
const MOD_NUM: i64 = 4096;
let (imm20, imm12) = if value > 0 {
let mut imm20 = value / MOD_NUM;
let mut imm12 = value % MOD_NUM;
if imm12 >= 2048 {
imm12 -= MOD_NUM;
imm20 += 1;
}
assert!(imm12 >= -2048 && imm12 <= 2047);
(imm20, imm12)
} else {
// this is the abs value.
let value_abs = value.abs();
let imm20 = value_abs / MOD_NUM;
let imm12 = value_abs % MOD_NUM;
let mut imm20 = -imm20;
let mut imm12 = -imm12;
if imm12 < -2048 {
imm12 += MOD_NUM;
imm20 -= 1;
}
(imm20, imm12)
};
assert!(imm20 != 0 || imm12 != 0);
let imm20 = i32::try_from(imm20).unwrap();
let imm12 = i16::try_from(imm12).unwrap();
Some((Imm20::from_i32(imm20), Imm12::from_i16(imm12)))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_imm12() {
let x = Imm12::ZERO;
assert_eq!(0, x.bits());
Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap();
}
#[test]
fn imm20_and_imm12() {
assert!(Inst::imm_max() == (i32::MAX - 2048) as i64);
assert!(Inst::imm_min() == i32::MIN as i64 - 2048);
}
}

1559
hbcb/src/inst/mod.rs Normal file

File diff suppressed because it is too large Load diff

168
hbcb/src/inst/regs.rs Normal file
View file

@ -0,0 +1,168 @@
//! Riscv64 ISA definitions: registers.
//!
use crate::machinst::{Reg, Writable};
use alloc::vec;
use alloc::vec::Vec;
use regalloc2::{PReg, RegClass, VReg};
// first argument of function call
#[inline]
pub fn a0() -> Reg {
x_reg(10)
}
// second argument of function call
#[inline]
#[allow(dead_code)]
pub fn a1() -> Reg {
x_reg(11)
}
// third argument of function call
#[inline]
#[allow(dead_code)]
pub fn a2() -> Reg {
x_reg(12)
}
#[inline]
#[allow(dead_code)]
pub fn writable_a0() -> Writable<Reg> {
Writable::from_reg(a0())
}
#[inline]
#[allow(dead_code)]
pub fn writable_a1() -> Writable<Reg> {
Writable::from_reg(a1())
}
#[inline]
#[allow(dead_code)]
pub fn writable_a2() -> Writable<Reg> {
Writable::from_reg(a2())
}
#[inline]
#[allow(dead_code)]
pub fn fa0() -> Reg {
f_reg(10)
}
#[inline]
#[allow(dead_code)]
pub fn writable_fa0() -> Writable<Reg> {
Writable::from_reg(fa0())
}
#[inline]
#[allow(dead_code)]
pub fn writable_fa1() -> Writable<Reg> {
Writable::from_reg(fa1())
}
#[inline]
pub fn fa1() -> Reg {
f_reg(11)
}
/// Get a reference to the zero-register.
#[inline]
pub fn zero_reg() -> Reg {
x_reg(0)
}
/// Get a writable reference to the zero-register (this discards a result).
#[inline]
pub fn writable_zero_reg() -> Writable<Reg> {
Writable::from_reg(zero_reg())
}
#[inline]
pub fn stack_reg() -> Reg {
x_reg(2)
}
/// Get a writable reference to the stack-pointer register.
#[inline]
pub fn writable_stack_reg() -> Writable<Reg> {
Writable::from_reg(stack_reg())
}
/// Get a reference to the link register (x1).
pub fn link_reg() -> Reg {
x_reg(1)
}
/// Get a writable reference to the link register.
#[inline]
pub fn writable_link_reg() -> Writable<Reg> {
Writable::from_reg(link_reg())
}
/// Get a reference to the frame pointer (x8).
#[inline]
pub fn fp_reg() -> Reg {
x_reg(8)
}
/// Get a writable reference to the frame pointer.
#[inline]
pub fn writable_fp_reg() -> Writable<Reg> {
Writable::from_reg(fp_reg())
}
/// Get a reference to the first temporary, sometimes "spill temporary",
/// register. This register is used in various ways as a temporary.
#[inline]
pub fn spilltmp_reg() -> Reg {
x_reg(31)
}
/// Get a writable reference to the spilltmp reg.
#[inline]
pub fn writable_spilltmp_reg() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg())
}
///spilltmp2
#[inline]
pub fn spilltmp_reg2() -> Reg {
x_reg(30)
}
/// Get a writable reference to the spilltmp2 reg.
#[inline]
pub fn writable_spilltmp_reg2() -> Writable<Reg> {
Writable::from_reg(spilltmp_reg2())
}
#[inline]
pub fn x_reg(enc: usize) -> Reg {
let p_reg = PReg::new(enc, RegClass::Int);
let v_reg = VReg::new(p_reg.index(), p_reg.class());
Reg::from(v_reg)
}
pub const fn px_reg(enc: usize) -> PReg {
PReg::new(enc, RegClass::Int)
}
#[inline]
pub fn f_reg(enc: usize) -> Reg {
let p_reg = PReg::new(enc, RegClass::Float);
let v_reg = VReg::new(p_reg.index(), p_reg.class());
Reg::from(v_reg)
}
pub const fn pf_reg(enc: usize) -> PReg {
PReg::new(enc, RegClass::Float)
}
#[allow(dead_code)]
pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec<Writable<Reg>> {
let mut regs = vec![];
for i in start..=end {
regs.push(Writable::from_reg(x_reg(i)));
}
regs
}
pub const fn pv_reg(enc: usize) -> PReg {
PReg::new(enc, RegClass::Vector)
}

2
hbcb/src/inst/unwind.rs Normal file
View file

@ -0,0 +1,2 @@
#[cfg(feature = "unwind")]
pub(crate) mod systemv;

View file

@ -0,0 +1,170 @@
//! Unwind information for System V ABI (Riscv64).
use crate::inst::regs;
use crate::isa::unwind::systemv::RegisterMappingError;
use crate::machinst::Reg;
use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
use regalloc2::RegClass;
/// Creates a new riscv64 common information entry (CIE).
pub fn create_cie() -> CommonInformationEntry {
use gimli::write::CallFrameInstruction;
let mut entry = CommonInformationEntry::new(
Encoding {
address_size: 8,
format: Format::Dwarf32,
version: 1,
},
2, // Code alignment factor
-8, // Data alignment factor
Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16),
);
// Every frame will start with the call frame address (CFA) at SP
let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into());
entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
entry
}
/// Map Cranelift registers to their corresponding Gimli registers.
pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
let reg_offset = match reg.class() {
RegClass::Int => 0,
RegClass::Float => 32,
RegClass::Vector => 64,
};
let reg = reg.to_real_reg().unwrap().hw_enc() as u16;
Ok(Register(reg_offset + reg))
}
pub(crate) struct RegisterMapper;
impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
Ok(map_reg(reg)?.0)
}
fn fp(&self) -> Option<u16> {
Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16)
}
fn lr(&self) -> Option<u16> {
Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16)
}
fn lr_offset(&self) -> Option<u32> {
Some(8)
}
}
#[cfg(test)]
mod tests {
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::{
types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind,
UserFuncName,
};
use crate::isa::{lookup, CallConv};
use crate::settings::{builder, Flags};
use crate::Context;
use gimli::write::Address;
use target_lexicon::triple;
#[test]
fn test_simple_func() {
let isa = lookup(triple!("riscv64"))
.expect("expect riscv64 ISA")
.finish(Flags::new(builder()))
.expect("Creating compiler backend");
let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));
let code = context
.compile(&*isa, &mut Default::default())
.expect("expected compilation");
let fde = match code
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(1234))
}
_ => panic!("expected unwind information"),
};
assert_eq!(format!("{fde:?}"), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }");
}
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
let mut func =
Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv));
let block0 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().return_(&[]);
if let Some(stack_slot) = stack_slot {
func.sized_stack_slots.push(stack_slot);
}
func
}
#[test]
fn test_multi_return_func() {
let isa = lookup(triple!("riscv64"))
.expect("expect riscv64 ISA")
.finish(Flags::new(builder()))
.expect("Creating compiler backend");
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
let code = context
.compile(&*isa, &mut Default::default())
.expect("expected compilation");
let fde = match code
.create_unwind_info(isa.as_ref())
.expect("can create unwind info")
{
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
info.to_fde(Address::Constant(4321))
}
_ => panic!("expected unwind information"),
};
assert_eq!(
format!("{fde:?}"),
"FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }"
);
}
fn create_multi_return_function(call_conv: CallConv) -> Function {
let mut sig = Signature::new(call_conv);
sig.params.push(AbiParam::new(types::I32));
let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig);
let block0 = func.dfg.make_block();
let v0 = func.dfg.append_block_param(block0, types::I32);
let block1 = func.dfg.make_block();
let block2 = func.dfg.make_block();
let mut pos = FuncCursor::new(&mut func);
pos.insert_block(block0);
pos.ins().brif(v0, block2, &[], block1, &[]);
pos.insert_block(block1);
pos.ins().return_(&[]);
pos.insert_block(block2);
pos.ins().return_(&[]);
func
}
}

1150
hbcb/src/inst/vector.rs Normal file

File diff suppressed because it is too large Load diff

1907
hbcb/src/inst_vector.isle Normal file

File diff suppressed because it is too large Load diff

264
hbcb/src/lib.rs Normal file
View file

@ -0,0 +1,264 @@
//! risc-v 64-bit Instruction Set Architecture.
#![allow(clippy::all)]
extern crate alloc;
use {
crate::settings as riscv_settings,
alloc::{boxed::Box, vec::Vec},
core::fmt,
cranelift_codegen::{
dominator_tree::DominatorTree,
ir::{self, Function, Type},
isa::{Builder as IsaBuilder, FunctionAlignment, OwnedTargetIsa, TargetIsa},
machinst::{
compile, CompiledCode, CompiledCodeStencil, MachInst, MachTextSectionBuilder, Reg,
SigSet, TextSectionBuilder, VCode,
},
result::CodegenResult,
settings::{self as shared_settings, Flags},
CodegenError,
},
cranelift_control::ControlPlane,
target_lexicon::{Architecture, Triple},
};
mod abi;
pub(crate) mod inst;
mod lower;
mod settings;
use self::inst::EmitInfo;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv;
/// An riscv64 backend.
pub struct Riscv64Backend {
triple: Triple,
flags: shared_settings::Flags,
isa_flags: riscv_settings::Flags,
}
impl Riscv64Backend {
/// Create a new riscv64 backend with the given (shared) flags.
pub fn new_with_flags(
triple: Triple,
flags: shared_settings::Flags,
isa_flags: riscv_settings::Flags,
) -> Riscv64Backend {
Riscv64Backend { triple, flags, isa_flags }
}
/// This performs lowering to VCode, register-allocates the code, computes block layout and
/// finalizes branches. The result is ready for binary emission.
fn compile_vcode(
&self,
func: &Function,
domtree: &DominatorTree,
ctrl_plane: &mut ControlPlane,
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone());
let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
compile::compile::<Riscv64Backend>(func, domtree, self, abi, emit_info, sigs, ctrl_plane)
}
}
impl TargetIsa for Riscv64Backend {
fn compile_function(
&self,
func: &Function,
domtree: &DominatorTree,
want_disasm: bool,
ctrl_plane: &mut ControlPlane,
) -> CodegenResult<CompiledCodeStencil> {
let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?;
let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
let emit_result = vcode.emit(&regalloc_result, want_disasm, &self.flags, ctrl_plane);
let frame_size = emit_result.frame_size;
let value_labels_ranges = emit_result.value_labels_ranges;
let buffer = emit_result.buffer;
let sized_stackslot_offsets = emit_result.sized_stackslot_offsets;
let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets;
if let Some(disasm) = emit_result.disasm.as_ref() {
log::debug!("disassembly:\n{}", disasm);
}
Ok(CompiledCodeStencil {
buffer,
frame_size,
vcode: emit_result.disasm,
value_labels_ranges,
sized_stackslot_offsets,
dynamic_stackslot_offsets,
bb_starts: emit_result.bb_offsets,
bb_edges: emit_result.bb_edges,
})
}
fn name(&self) -> &'static str {
"riscv64"
}
fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 {
16
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &shared_settings::Flags {
&self.flags
}
fn isa_flags(&self) -> Vec<shared_settings::Value> {
self.isa_flags.iter().collect()
}
#[cfg(feature = "unwind")]
fn emit_unwind_info(
&self,
result: &CompiledCode,
kind: crate::isa::unwind::UnwindInfoKind,
) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
use crate::isa::unwind::{UnwindInfo, UnwindInfoKind};
Ok(match kind {
UnwindInfoKind::SystemV => {
let mapper = self::inst::unwind::systemv::RegisterMapper;
Some(UnwindInfo::SystemV(
crate::isa::unwind::systemv::create_unwind_info_from_insts(
&result.buffer.unwind_info[..],
result.buffer.data().len(),
&mapper,
)?,
))
}
UnwindInfoKind::Windows => None,
_ => None,
})
}
#[cfg(feature = "unwind")]
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
Some(inst::unwind::systemv::create_cie())
}
fn text_section_builder(&self, num_funcs: usize) -> Box<dyn TextSectionBuilder> {
Box::new(MachTextSectionBuilder::<inst::Inst>::new(num_funcs))
}
#[cfg(feature = "unwind")]
fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
}
fn function_alignment(&self) -> FunctionAlignment {
inst::Inst::function_alignment()
}
fn page_size_align_log2(&self) -> u8 {
debug_assert_eq!(1 << 12, 0x1000);
12
}
#[cfg(feature = "disas")]
fn to_capstone(&self) -> Result<capstone::Capstone, capstone::Error> {
use capstone::prelude::*;
let mut cs_builder = Capstone::new().riscv().mode(arch::riscv::ArchMode::RiscV64);
// Enable C instruction decoding if we have compressed instructions enabled.
//
// We can't enable this unconditionally because it will cause Capstone to
// emit weird instructions and generally mess up when it encounters unknown
// instructions, such as any Zba,Zbb,Zbc or Vector instructions.
//
// This causes the default disassembly to be quite unreadable, so enable
// it only when we are actually going to be using them.
let uses_compressed = self
.isa_flags()
.iter()
.filter(|f| ["has_zca", "has_zcb", "has_zcd"].contains(&f.name))
.any(|f| f.as_bool().unwrap_or(false));
if uses_compressed {
cs_builder = cs_builder.extra_mode([arch::riscv::ArchExtraMode::RiscVC].into_iter());
}
let mut cs = cs_builder.build()?;
// Similar to AArch64, RISC-V uses inline constants rather than a separate
// constant pool. We want to skip disassembly over inline constants instead
// of stopping on invalid bytes.
cs.set_skipdata(true)?;
Ok(cs)
}
fn has_native_fma(&self) -> bool {
true
}
fn has_x86_blendv_lowering(&self, _: Type) -> bool {
false
}
fn has_x86_pshufb_lowering(&self) -> bool {
false
}
fn has_x86_pmulhrsw_lowering(&self) -> bool {
false
}
fn has_x86_pmaddubsw_lowering(&self) -> bool {
false
}
}
impl fmt::Display for Riscv64Backend {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("MachBackend")
.field("name", &self.name())
.field("triple", &self.triple())
.field("flags", &format!("{}", self.flags()))
.finish()
}
}
/// Create a new `isa::Builder`.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
match triple.architecture {
Architecture::Riscv64(..) => {}
_ => unreachable!(),
}
IsaBuilder { triple, setup: riscv_settings::builder(), constructor: isa_constructor }
}
fn isa_constructor(
triple: Triple,
shared_flags: Flags,
builder: &shared_settings::Builder,
) -> CodegenResult<OwnedTargetIsa> {
let isa_flags = riscv_settings::Flags::new(&shared_flags, builder);
// The RISC-V backend does not work without at least the G extension enabled.
// The G extension is simply a combination of the following extensions:
// - I: Base Integer Instruction Set
// - M: Integer Multiplication and Division
// - A: Atomic Instructions
// - F: Single-Precision Floating-Point
// - D: Double-Precision Floating-Point
// - Zicsr: Control and Status Register Instructions
// - Zifencei: Instruction-Fetch Fence
//
// Ensure that those combination of features is enabled.
if !isa_flags.has_g() {
return Err(CodegenError::Unsupported(
"The RISC-V Backend currently requires all the features in the G Extension enabled"
.into(),
));
}
let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags);
Ok(backend.wrapped())
}

2966
hbcb/src/lower.isle Normal file

File diff suppressed because it is too large Load diff

36
hbcb/src/lower.rs Normal file
View file

@ -0,0 +1,36 @@
//! Lowering rules for Riscv64.
use {
crate::{inst::*, Riscv64Backend},
cranelift_codegen::{
ir::Inst as IRInst,
machinst::{lower::*, *},
},
};
pub mod isle;
//=============================================================================
// Lowering-backend trait implementation.
impl LowerBackend for Riscv64Backend {
type FactFlowState = ();
type MInst = Inst;
fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {
isle::lower(ctx, self, ir_inst)
}
fn lower_branch(
&self,
ctx: &mut Lower<Inst>,
ir_inst: IRInst,
targets: &[MachLabel],
) -> Option<()> {
isle::lower_branch(ctx, self, ir_inst, targets)
}
fn maybe_pinned_reg(&self) -> Option<Reg> {
// pinned register is a register that you want put anything in it.
// right now riscv64 not support this feature.
None
}
}

730
hbcb/src/lower/isle.rs Normal file
View file

@ -0,0 +1,730 @@
//! ISLE integration glue code for riscv64 lowering.
// Pull in the ISLE generated code.
#[allow(unused)]
pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use {
self::generated_code::{FpuOPWidth, VecAluOpRR, VecLmul},
crate::{inst::*, Riscv64Backend},
cranelift_codegen::{
abi::Riscv64ABICallSite,
ir::{
immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData,
MemFlags, Opcode, TrapCode, Value, ValueList,
},
isa::{self},
lower::args::{FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg},
machinst::{
isle::*, ArgPair, CallInfo, InstOutput, IsTailCall, MachInst, Reg, VCodeConstant,
VCodeConstantData,
},
},
generated_code::MInst,
regalloc2::PReg,
std::{boxed::Box, vec::Vec},
};
type BoxCallInfo = Box<CallInfo<ExternalName>>;
type BoxCallIndInfo = Box<CallInfo<Reg>>;
type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
type BoxExternalName = Box<ExternalName>;
type VecMachLabel = Vec<MachLabel>;
type VecArgPair = Vec<ArgPair>;
pub(crate) struct RV64IsleContext<'a, 'b, I, B>
where
I: VCodeInst,
B: LowerBackend,
{
pub lower_ctx: &'a mut Lower<'b, I>,
pub backend: &'a B,
/// Precalucated value for the minimum vector register size. Will be 0 if
/// vectors are not supported.
min_vec_reg_size: u64,
}
impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> {
fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self {
Self { lower_ctx, backend, min_vec_reg_size: backend.isa_flags.min_vec_reg_size() }
}
}
impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> {
isle_lower_prelude_methods!();
isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite);
fn gen_return_call(
&mut self,
callee_sig: SigRef,
callee: ExternalName,
distance: RelocDistance,
args: ValueSlice,
) -> InstOutput {
let caller_conv = isa::CallConv::Tail;
debug_assert_eq!(
self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
caller_conv,
"Can only do `return_call`s from within a `tail` calling convention function"
);
let call_site = Riscv64ABICallSite::from_func(
self.lower_ctx.sigs(),
callee_sig,
&callee,
IsTailCall::Yes,
distance,
caller_conv,
self.backend.flags().clone(),
);
call_site.emit_return_call(self.lower_ctx, args);
InstOutput::new()
}
fn gen_return_call_indirect(
&mut self,
callee_sig: SigRef,
callee: Value,
args: ValueSlice,
) -> InstOutput {
let caller_conv = isa::CallConv::Tail;
debug_assert_eq!(
self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
caller_conv,
"Can only do `return_call`s from within a `tail` calling convention function"
);
let callee = self.put_in_reg(callee);
let call_site = Riscv64ABICallSite::from_ptr(
self.lower_ctx.sigs(),
callee_sig,
callee,
IsTailCall::Yes,
caller_conv,
self.backend.flags().clone(),
);
call_site.emit_return_call(self.lower_ctx, args);
InstOutput::new()
}
fn fpu_op_width_from_ty(&mut self, ty: Type) -> FpuOPWidth {
match ty {
F16 => FpuOPWidth::H,
F32 => FpuOPWidth::S,
F64 => FpuOPWidth::D,
F128 => FpuOPWidth::Q,
_ => unimplemented!("Unimplemented FPU Op Width: {ty}"),
}
}
fn vreg_new(&mut self, r: Reg) -> VReg {
VReg::new(r).unwrap()
}
fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg {
r.map(|wr| VReg::new(wr).unwrap())
}
fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg {
arg0.to_reg()
}
fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg {
arg0.map(|vr| vr.to_reg())
}
fn vreg_to_reg(&mut self, arg0: VReg) -> Reg {
*arg0
}
fn xreg_new(&mut self, r: Reg) -> XReg {
XReg::new(r).unwrap()
}
fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg {
r.map(|wr| XReg::new(wr).unwrap())
}
fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg {
arg0.to_reg()
}
fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg {
arg0.map(|xr| xr.to_reg())
}
fn xreg_to_reg(&mut self, arg0: XReg) -> Reg {
*arg0
}
fn freg_new(&mut self, r: Reg) -> FReg {
FReg::new(r).unwrap()
}
fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg {
r.map(|wr| FReg::new(wr).unwrap())
}
fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg {
arg0.to_reg()
}
fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg {
arg0.map(|fr| fr.to_reg())
}
fn freg_to_reg(&mut self, arg0: FReg) -> Reg {
*arg0
}
fn min_vec_reg_size(&mut self) -> u64 {
self.min_vec_reg_size
}
#[inline]
fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() {
Some(ty)
} else {
None
}
}
fn ty_supported(&mut self, ty: Type) -> Option<Type> {
let lane_type = ty.lane_type();
let supported = match ty {
// Scalar integers are always supported
ty if ty.is_int() => true,
// Floating point types depend on certain extensions
F16 => self.backend.isa_flags.has_zfh(),
// F32 depends on the F extension
F32 => self.backend.isa_flags.has_f(),
// F64 depends on the D extension
F64 => self.backend.isa_flags.has_d(),
// The base vector extension supports all integer types, up to 64 bits
// as long as they fit in a register
ty if self.ty_vec_fits_in_register(ty).is_some()
&& lane_type.is_int()
&& lane_type.bits() <= 64 =>
{
true
}
// If the vector type has floating point lanes then the spec states:
//
// Vector instructions where any floating-point vector operands EEW is not a
// supported floating-point type width (which includes when FLEN < SEW) are reserved.
//
// So we also have to check if we support the scalar version of the type.
ty if self.ty_vec_fits_in_register(ty).is_some()
&& lane_type.is_float()
&& self.ty_supported(lane_type).is_some()
// Additionally the base V spec only supports 32 and 64 bit floating point types.
&& (lane_type.bits() == 32 || lane_type.bits() == 64) =>
{
true
}
// Otherwise do not match
_ => false,
};
if supported {
Some(ty)
} else {
None
}
}
fn ty_supported_float(&mut self, ty: Type) -> Option<Type> {
self.ty_supported(ty).filter(|ty| ty.is_float())
}
fn ty_supported_vec(&mut self, ty: Type) -> Option<Type> {
self.ty_supported(ty).filter(|ty| ty.is_vector())
}
fn load_ra(&mut self) -> Reg {
if self.backend.flags.preserve_frame_pointers() {
let tmp = self.temp_writable_reg(I64);
self.emit(&MInst::Load {
rd: tmp,
op: LoadOP::Ld,
flags: MemFlags::trusted(),
from: AMode::FPOffset(8),
});
tmp.to_reg()
} else {
link_reg()
}
}
fn label_to_br_target(&mut self, label: MachLabel) -> CondBrTarget {
CondBrTarget::Label(label)
}
fn imm12_and(&mut self, imm: Imm12, x: u64) -> Imm12 {
Imm12::from_i16(imm.as_i16() & (x as i16))
}
fn fli_constant_from_u64(&mut self, ty: Type, imm: u64) -> Option<FliConstant> {
FliConstant::maybe_from_u64(ty, imm)
}
fn fli_constant_from_negated_u64(&mut self, ty: Type, imm: u64) -> Option<FliConstant> {
let negated_imm = match ty {
F64 => imm ^ 0x8000000000000000,
F32 => imm ^ 0x80000000,
_ => unimplemented!(),
};
FliConstant::maybe_from_u64(ty, negated_imm)
}
fn i64_generate_imm(&mut self, imm: i64) -> Option<(Imm20, Imm12)> {
MInst::generate_imm(imm as u64)
}
fn i64_shift_for_lui(&mut self, imm: i64) -> Option<(u64, Imm12)> {
let trailing = imm.trailing_zeros();
if trailing < 12 {
return None;
}
let shift = Imm12::from_i16(trailing as i16 - 12);
let base = (imm as u64) >> trailing;
Some((base, shift))
}
fn i64_shift(&mut self, imm: i64) -> Option<(i64, Imm12)> {
let trailing = imm.trailing_zeros();
// We can do without this condition but in this case there is no need to go further
if trailing == 0 {
return None;
}
let shift = Imm12::from_i16(trailing as i16);
let base = imm >> trailing;
Some((base, shift))
}
#[inline]
fn emit(&mut self, arg0: &MInst) -> Unit {
self.lower_ctx.emit(arg0.clone());
}
#[inline]
fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12> {
Imm12::maybe_from_u64(arg0)
}
#[inline]
fn imm12_from_i64(&mut self, arg0: i64) -> Option<Imm12> {
Imm12::maybe_from_i64(arg0)
}
#[inline]
fn imm12_is_zero(&mut self, imm: Imm12) -> Option<()> {
if imm.as_i16() == 0 {
Some(())
} else {
None
}
}
#[inline]
fn imm20_from_u64(&mut self, arg0: u64) -> Option<Imm20> {
Imm20::maybe_from_u64(arg0)
}
#[inline]
fn imm20_from_i64(&mut self, arg0: i64) -> Option<Imm20> {
Imm20::maybe_from_i64(arg0)
}
#[inline]
fn imm20_is_zero(&mut self, imm: Imm20) -> Option<()> {
if imm.as_i32() == 0 {
Some(())
} else {
None
}
}
#[inline]
fn imm5_from_u64(&mut self, arg0: u64) -> Option<Imm5> {
Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?)
}
#[inline]
fn imm5_from_i64(&mut self, arg0: i64) -> Option<Imm5> {
Imm5::maybe_from_i8(i8::try_from(arg0).ok()?)
}
#[inline]
fn i8_to_imm5(&mut self, arg0: i8) -> Option<Imm5> {
Imm5::maybe_from_i8(arg0)
}
#[inline]
fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 {
Imm5::from_bits(arg0.bits() as u8)
}
#[inline]
fn uimm5_from_u8(&mut self, arg0: u8) -> Option<UImm5> {
UImm5::maybe_from_u8(arg0)
}
#[inline]
fn uimm5_from_u64(&mut self, arg0: u64) -> Option<UImm5> {
arg0.try_into().ok().and_then(UImm5::maybe_from_u8)
}
#[inline]
fn writable_zero_reg(&mut self) -> WritableReg {
writable_zero_reg()
}
#[inline]
fn zero_reg(&mut self) -> XReg {
XReg::new(zero_reg()).unwrap()
}
fn is_non_zero_reg(&mut self, reg: XReg) -> Option<()> {
if reg != self.zero_reg() {
Some(())
} else {
None
}
}
fn is_zero_reg(&mut self, reg: XReg) -> Option<()> {
if reg == self.zero_reg() {
Some(())
} else {
None
}
}
#[inline]
fn imm_from_bits(&mut self, val: u64) -> Imm12 {
Imm12::maybe_from_u64(val).unwrap()
}
#[inline]
fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 {
Imm12::maybe_from_i64(val).unwrap()
}
fn frm_bits(&mut self, frm: &FRM) -> UImm5 {
UImm5::maybe_from_u8(frm.bits()).unwrap()
}
fn u8_as_i32(&mut self, x: u8) -> i32 {
x as i32
}
fn imm12_const(&mut self, val: i32) -> Imm12 {
if let Some(res) = Imm12::maybe_from_i64(val as i64) {
res
} else {
panic!("Unable to make an Imm12 value from {val}")
}
}
fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 {
Imm12::maybe_from_i64((val + add) as i64).unwrap()
}
fn imm12_add(&mut self, val: Imm12, add: i32) -> Option<Imm12> {
Imm12::maybe_from_i64((i32::from(val.as_i16()) + add).into())
}
//
fn gen_shamt(&mut self, ty: Type, shamt: XReg) -> ValueRegs {
let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() };
let ty_bits = i16::try_from(ty_bits).unwrap();
let shamt = {
let tmp = self.temp_writable_reg(I64);
self.emit(&MInst::AluRRImm12 {
alu_op: AluOPRRI::Andi,
rd: tmp,
rs: shamt.to_reg(),
imm12: Imm12::from_i16(ty_bits - 1),
});
tmp.to_reg()
};
let len_sub_shamt = {
let tmp = self.temp_writable_reg(I64);
self.emit(&MInst::load_imm12(tmp, Imm12::from_i16(ty_bits)));
let len_sub_shamt = self.temp_writable_reg(I64);
self.emit(&MInst::AluRRR {
alu_op: AluOPRRR::Sub,
rd: len_sub_shamt,
rs1: tmp.to_reg(),
rs2: shamt,
});
len_sub_shamt.to_reg()
};
ValueRegs::two(shamt, len_sub_shamt)
}
fn has_v(&mut self) -> bool {
self.backend.isa_flags.has_v()
}
fn has_m(&mut self) -> bool {
self.backend.isa_flags.has_m()
}
fn has_zfa(&mut self) -> bool {
self.backend.isa_flags.has_zfa()
}
fn has_zfh(&mut self) -> bool {
self.backend.isa_flags.has_zfh()
}
fn has_zbkb(&mut self) -> bool {
self.backend.isa_flags.has_zbkb()
}
fn has_zba(&mut self) -> bool {
self.backend.isa_flags.has_zba()
}
fn has_zbb(&mut self) -> bool {
self.backend.isa_flags.has_zbb()
}
fn has_zbc(&mut self) -> bool {
self.backend.isa_flags.has_zbc()
}
fn has_zbs(&mut self) -> bool {
self.backend.isa_flags.has_zbs()
}
fn has_zicond(&mut self) -> bool {
self.backend.isa_flags.has_zicond()
}
fn gen_reg_offset_amode(&mut self, base: Reg, offset: i64) -> AMode {
AMode::RegOffset(base, offset)
}
fn gen_sp_offset_amode(&mut self, offset: i64) -> AMode {
AMode::SPOffset(offset)
}
fn gen_fp_offset_amode(&mut self, offset: i64) -> AMode {
AMode::FPOffset(offset)
}
fn gen_stack_slot_amode(&mut self, ss: StackSlot, offset: i64) -> AMode {
// Offset from beginning of stackslot area.
let stack_off = self.lower_ctx.abi().sized_stackslot_offsets()[ss] as i64;
let sp_off: i64 = stack_off + offset;
AMode::SlotOffset(sp_off)
}
fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode {
AMode::Const(c)
}
fn valid_atomic_transaction(&mut self, ty: Type) -> Option<Type> {
if ty.is_int() && ty.bits() <= 64 {
Some(ty)
} else {
None
}
}
fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> {
let op = *op;
match op {
crate::ir::AtomicRmwOp::Umin => Some((op, false)),
crate::ir::AtomicRmwOp::Umax => Some((op, false)),
crate::ir::AtomicRmwOp::Smin => Some((op, true)),
crate::ir::AtomicRmwOp::Smax => Some((op, true)),
_ => None,
}
}
fn sinkable_inst(&mut self, val: Value) -> Option<Inst> {
self.is_sinkable_inst(val)
}
fn load_op(&mut self, ty: Type) -> LoadOP {
LoadOP::from_type(ty)
}
fn store_op(&mut self, ty: Type) -> StoreOP {
StoreOP::from_type(ty)
}
fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg {
let tmp = self.temp_writable_reg(I64);
self.emit(&MInst::LoadExtName { rd: tmp, name: Box::new(name), offset });
tmp.to_reg()
}
fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg {
let result = self.temp_writable_reg(I64);
let i = self.lower_ctx.abi().sized_stackslot_addr(slot, i64::from(offset) as u32, result);
self.emit(&i);
result.to_reg()
}
fn atomic_amo(&mut self) -> AMO {
AMO::SeqCst
}
fn lower_br_table(&mut self, index: Reg, targets: &[MachLabel]) -> Unit {
let tmp1 = self.temp_writable_reg(I64);
let tmp2 = self.temp_writable_reg(I64);
self.emit(&MInst::BrTable { index, tmp1, tmp2, targets: targets.to_vec() });
}
fn fp_reg(&mut self) -> PReg {
px_reg(8)
}
fn sp_reg(&mut self) -> PReg {
px_reg(2)
}
#[inline]
fn int_compare(&mut self, kind: &IntCC, rs1: XReg, rs2: XReg) -> IntegerCompare {
IntegerCompare { kind: *kind, rs1: rs1.to_reg(), rs2: rs2.to_reg() }
}
#[inline]
fn int_compare_decompose(&mut self, cmp: IntegerCompare) -> (IntCC, XReg, XReg) {
(cmp.kind, self.xreg_new(cmp.rs1), self.xreg_new(cmp.rs2))
}
#[inline]
fn vstate_from_type(&mut self, ty: Type) -> VState {
VState::from_type(ty)
}
#[inline]
fn vstate_mf2(&mut self, vs: VState) -> VState {
VState { vtype: VType { lmul: VecLmul::LmulF2, ..vs.vtype }, ..vs }
}
fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type {
MInst::canonical_type_for_rc(op.dst_regclass())
}
fn bclr_imm(&mut self, ty: Type, i: u64) -> Option<Imm12> {
// Only consider those bits in the immediate which are up to the width
// of `ty`.
let neg = !i & (u64::MAX >> (64 - ty.bits()));
if neg.count_ones() != 1 {
return None;
}
Imm12::maybe_from_u64(neg.trailing_zeros().into())
}
fn binvi_imm(&mut self, i: u64) -> Option<Imm12> {
if i.count_ones() != 1 {
return None;
}
Imm12::maybe_from_u64(i.trailing_zeros().into())
}
fn bseti_imm(&mut self, i: u64) -> Option<Imm12> {
self.binvi_imm(i)
}
fn fcvt_smin_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 {
match (int, float) {
// Saturating cases for larger integers are handled using the
// `fcvt.{w,d}.{s,d}` instruction directly, that automatically
// saturates up/down to the correct limit.
//
// NB: i32/i64 don't use this function because the native RISC-V
// instruction does everything we already need, so only cases for
// i8/i16 are listed here.
(I8, F32) if saturating => f32::from(i8::MIN).to_bits().into(),
(I8, F64) if saturating => f64::from(i8::MIN).to_bits(),
(I16, F32) if saturating => f32::from(i16::MIN).to_bits().into(),
(I16, F64) if saturating => f64::from(i16::MIN).to_bits(),
(_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).0.to_bits().into(),
(_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).0.to_bits(),
_ => unimplemented!(),
}
}
fn fcvt_smax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 {
// NB: see `fcvt_smin_bound` for some more comments
match (int, float) {
(I8, F32) if saturating => f32::from(i8::MAX).to_bits().into(),
(I8, F64) if saturating => f64::from(i8::MAX).to_bits(),
(I16, F32) if saturating => f32::from(i16::MAX).to_bits().into(),
(I16, F64) if saturating => f64::from(i16::MAX).to_bits(),
(_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).1.to_bits().into(),
(_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).1.to_bits(),
_ => unimplemented!(),
}
}
fn fcvt_umax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 {
// NB: see `fcvt_smin_bound` for some more comments
match (int, float) {
(I8, F32) if saturating => f32::from(u8::MAX).to_bits().into(),
(I8, F64) if saturating => f64::from(u8::MAX).to_bits(),
(I16, F32) if saturating => f32::from(u16::MAX).to_bits().into(),
(I16, F64) if saturating => f64::from(u16::MAX).to_bits(),
(_, F32) if !saturating => f32_cvt_to_int_bounds(false, int.bits()).1.to_bits().into(),
(_, F64) if !saturating => f64_cvt_to_int_bounds(false, int.bits()).1.to_bits(),
_ => unimplemented!(),
}
}
fn fcvt_umin_bound(&mut self, float: Type, saturating: bool) -> u64 {
assert!(!saturating);
match float {
F32 => (-1.0f32).to_bits().into(),
F64 => (-1.0f64).to_bits(),
_ => unimplemented!(),
}
}
}
/// The main entry point for lowering with ISLE.
pub(crate) fn lower(
lower_ctx: &mut Lower<MInst>,
backend: &Riscv64Backend,
inst: Inst,
) -> Option<InstOutput> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
generated_code::constructor_lower(&mut isle_ctx, inst)
}
/// The main entry point for branch lowering with ISLE.
pub(crate) fn lower_branch(
lower_ctx: &mut Lower<MInst>,
backend: &Riscv64Backend,
branch: Inst,
targets: &[MachLabel],
) -> Option<()> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
generated_code::constructor_lower_branch(&mut isle_ctx, branch, targets)
}

View file

@ -0,0 +1,9 @@
// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of
// the generated ISLE source below because we include!() it. We must include!() it because its path
// depends on an environment variable; and also because of this, we can't do the `#[path = "..."]
// mod generated_code;` trick either.
#![allow(dead_code, unreachable_code, unreachable_patterns)]
#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]
#![allow(irrefutable_let_patterns, clippy::clone_on_copy)]
include!(concat!(env!("ISLE_DIR"), "/isle_riscv64.rs"));

752
hbcb/src/prelude.isle Normal file
View file

@ -0,0 +1,752 @@
;; This is a prelude of standard definitions for ISLE, the instruction-selector
;; DSL, as we use it bound to our interfaces.
;;
;; Note that all `extern` functions here are typically defined in the
;; `isle_prelude_methods` macro defined in `src/isa/isle.rs`
;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `()`
(type Unit (primitive Unit))
(decl pure unit () Unit)
(extern constructor unit unit)
(type bool (primitive bool))
(extern const $true bool)
(extern const $false bool)
(type u8 (primitive u8))
(type u16 (primitive u16))
(type u32 (primitive u32))
(type u64 (primitive u64))
(type u128 (primitive u128))
(type usize (primitive usize))
(type i8 (primitive i8))
(type i16 (primitive i16))
(type i32 (primitive i32))
(type i64 (primitive i64))
(type i128 (primitive i128))
(type isize (primitive isize))
;; `cranelift-entity`-based identifiers.
(type Type (primitive Type))
(type Value (primitive Value))
(type ValueList (primitive ValueList))
(type BlockCall (primitive BlockCall))
;; ISLE representation of `&[Value]`.
(type ValueSlice (primitive ValueSlice))
;; Extract the type of a `Value`.
(decl value_type (Type) Value)
(extern extractor infallible value_type value_type)
;; Extractor that matches a `u32` only if non-negative.
(decl u32_nonnegative (u32) u32)
(extern extractor u32_nonnegative u32_nonnegative)
;; Extractor that pulls apart an Offset32 into a i32 with the raw
;; signed-32-bit twos-complement bits.
(decl offset32 (i32) Offset32)
(extern extractor infallible offset32 offset32)
;; Pure/fallible constructor that tests if one u32 is less than or
;; equal to another.
(decl pure partial u32_lteq (u32 u32) Unit)
(extern constructor u32_lteq u32_lteq)
;; Pure/fallible constructor that tests if one u8 is less than or
;; equal to another.
(decl pure partial u8_lteq (u8 u8) Unit)
(extern constructor u8_lteq u8_lteq)
;; Pure/fallible constructor that tests if one u8 is strictly less
;; than another.
(decl pure partial u8_lt (u8 u8) Unit)
(extern constructor u8_lt u8_lt)
;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl pure u8_as_i8 (u8) i8)
(extern constructor u8_as_i8 u8_as_i8)
(decl pure u8_as_u32 (u8) u32)
(extern constructor u8_as_u32 u8_as_u32)
(convert u8 u32 u8_as_u32)
(decl pure u8_as_u64 (u8) u64)
(extern constructor u8_as_u64 u8_as_u64)
(convert u8 u64 u8_as_u64)
(decl pure u16_as_i16 (u16) i16)
(extern constructor u16_as_i16 u16_as_i16)
(decl pure u16_as_u32 (u16) u32)
(extern constructor u16_as_u32 u16_as_u32)
(convert u16 u32 u16_as_u32)
(decl pure u16_as_u64 (u16) u64)
(extern constructor u16_as_u64 u16_as_u64)
(convert u16 u64 u16_as_u64)
(decl pure u64_as_u8 (u64) u8)
(extern constructor u64_as_u8 u64_as_u8)
(decl pure u64_as_u16 (u64) u16)
(extern constructor u64_as_u16 u64_as_u16)
(decl pure u64_as_i64 (u64) i64)
(extern constructor u64_as_i64 u64_as_i64)
(decl pure partial u16_try_from_u64 (u64) u16)
(extern constructor u16_try_from_u64 u16_try_from_u64)
(decl pure partial u32_try_from_u64 (u64) u32)
(extern constructor u32_try_from_u64 u32_try_from_u64)
(decl pure partial i8_try_from_u64 (u64) i8)
(extern constructor i8_try_from_u64 i8_try_from_u64)
(decl pure partial i16_try_from_u64 (u64) i16)
(extern constructor i16_try_from_u64 i16_try_from_u64)
(decl pure partial i32_try_from_u64 (u64) i32)
(extern constructor i32_try_from_u64 i32_try_from_u64)
(decl pure u32_as_u64 (u32) u64)
(extern constructor u32_as_u64 u32_as_u64)
(convert u32 u64 u32_as_u64)
(decl pure i32_as_i64 (i32) i64)
(extern constructor i32_as_i64 i32_as_i64)
(convert i32 i64 i32_as_i64)
(decl pure i64_as_u64 (i64) u64)
(extern constructor i64_as_u64 i64_as_u64)
(decl pure i64_neg (i64) i64)
(extern constructor i64_neg i64_neg)
(decl pure i8_neg (i8) i8)
(extern constructor i8_neg i8_neg)
(decl u128_as_u64 (u64) u128)
(extern extractor u128_as_u64 u128_as_u64)
(decl u64_as_u32 (u32) u64)
(extern extractor u64_as_u32 u64_as_u32)
(decl u32_as_u16 (u16) u32)
(extern extractor u32_as_u16 u32_as_u16)
(decl pure u64_as_i32 (u64) i32)
(extern constructor u64_as_i32 u64_as_i32)
;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl pure u8_and (u8 u8) u8)
(extern constructor u8_and u8_and)
(decl pure u8_shl (u8 u8) u8)
(extern constructor u8_shl u8_shl)
(decl pure u8_shr (u8 u8) u8)
(extern constructor u8_shr u8_shr)
(decl pure u8_sub (u8 u8) u8)
(extern constructor u8_sub u8_sub)
(decl pure u32_add (u32 u32) u32)
(extern constructor u32_add u32_add)
(decl pure u32_sub (u32 u32) u32)
(extern constructor u32_sub u32_sub)
(decl pure u32_and (u32 u32) u32)
(extern constructor u32_and u32_and)
(decl pure u32_shl (u32 u32) u32)
(extern constructor u32_shl u32_shl)
;; Pure/fallible constructor that tries to add two `u32`s, interpreted
;; as signed values, and fails to match on overflow.
(decl pure partial s32_add_fallible (i32 i32) i32)
(extern constructor s32_add_fallible s32_add_fallible)
(decl pure u64_add (u64 u64) u64)
(extern constructor u64_add u64_add)
(decl pure u64_sub (u64 u64) u64)
(extern constructor u64_sub u64_sub)
(decl pure u64_mul (u64 u64) u64)
(extern constructor u64_mul u64_mul)
(decl pure partial u64_sdiv (u64 u64) u64)
(extern constructor u64_sdiv u64_sdiv)
(decl pure partial u64_udiv (u64 u64) u64)
(extern constructor u64_udiv u64_udiv)
(decl pure u64_and (u64 u64) u64)
(extern constructor u64_and u64_and)
(decl pure u64_or (u64 u64) u64)
(extern constructor u64_or u64_or)
(decl pure u64_xor (u64 u64) u64)
(extern constructor u64_xor u64_xor)
(decl pure u64_shl (u64 u64) u64)
(extern constructor u64_shl u64_shl)
(decl pure imm64_shl (Type Imm64 Imm64) Imm64)
(extern constructor imm64_shl imm64_shl)
(decl pure imm64_ushr (Type Imm64 Imm64) Imm64)
(extern constructor imm64_ushr imm64_ushr)
(decl pure imm64_sshr (Type Imm64 Imm64) Imm64)
(extern constructor imm64_sshr imm64_sshr)
(decl pure u64_not (u64) u64)
(extern constructor u64_not u64_not)
(decl pure u64_eq (u64 u64) bool)
(extern constructor u64_eq u64_eq)
(decl pure u64_le (u64 u64) bool)
(extern constructor u64_le u64_le)
(decl pure u64_lt (u64 u64) bool)
(extern constructor u64_lt u64_lt)
(decl pure i64_shr (i64 i64) i64)
(extern constructor i64_shr i64_shr)
(decl pure i64_ctz (i64) i64)
(extern constructor i64_ctz i64_ctz)
;; Sign extends a u64 from ty bits up to 64bits
(decl pure i64_sextend_u64 (Type u64) i64)
(extern constructor i64_sextend_u64 i64_sextend_u64)
(decl pure i64_sextend_imm64 (Type Imm64) i64)
(extern constructor i64_sextend_imm64 i64_sextend_imm64)
(decl pure u64_uextend_imm64 (Type Imm64) u64)
(extern constructor u64_uextend_imm64 u64_uextend_imm64)
(decl pure imm64_icmp (Type IntCC Imm64 Imm64) Imm64)
(extern constructor imm64_icmp imm64_icmp)
(decl u64_is_zero (bool) u64)
(extern extractor infallible u64_is_zero u64_is_zero)
(decl i64_is_zero (bool) i64)
(extern extractor infallible i64_is_zero i64_is_zero)
(decl u64_zero () u64)
(extractor (u64_zero) (u64_is_zero $true))
(decl u64_nonzero (u64) u64)
(extractor (u64_nonzero x) (and (u64_is_zero $false) x))
(decl i64_nonzero (i64) i64)
(extractor (i64_nonzero x) (and (i64_is_zero $false) x))
(decl pure u64_is_odd (u64) bool)
(extern constructor u64_is_odd u64_is_odd)
;; Each of these extractors tests whether the upper half of the input equals the
;; lower half of the input
(decl u128_replicated_u64 (u64) u128)
(extern extractor u128_replicated_u64 u128_replicated_u64)
(decl u64_replicated_u32 (u64) u64)
(extern extractor u64_replicated_u32 u64_replicated_u32)
(decl u32_replicated_u16 (u64) u64)
(extern extractor u32_replicated_u16 u32_replicated_u16)
(decl u16_replicated_u8 (u8) u64)
(extern extractor u16_replicated_u8 u16_replicated_u8)
;; Floating point operations
(decl pure partial f16_min (Ieee16 Ieee16) Ieee16)
(extern constructor f16_min f16_min)
(decl pure partial f16_max (Ieee16 Ieee16) Ieee16)
(extern constructor f16_max f16_max)
(decl pure f16_neg (Ieee16) Ieee16)
(extern constructor f16_neg f16_neg)
(decl pure f16_abs (Ieee16) Ieee16)
(extern constructor f16_abs f16_abs)
(decl pure f16_copysign (Ieee16 Ieee16) Ieee16)
(extern constructor f16_copysign f16_copysign)
(decl pure partial f32_add (Ieee32 Ieee32) Ieee32)
(extern constructor f32_add f32_add)
(decl pure partial f32_sub (Ieee32 Ieee32) Ieee32)
(extern constructor f32_sub f32_sub)
(decl pure partial f32_mul (Ieee32 Ieee32) Ieee32)
(extern constructor f32_mul f32_mul)
(decl pure partial f32_div (Ieee32 Ieee32) Ieee32)
(extern constructor f32_div f32_div)
(decl pure partial f32_sqrt (Ieee32) Ieee32)
(extern constructor f32_sqrt f32_sqrt)
(decl pure partial f32_ceil (Ieee32) Ieee32)
(extern constructor f32_ceil f32_ceil)
(decl pure partial f32_floor (Ieee32) Ieee32)
(extern constructor f32_floor f32_floor)
(decl pure partial f32_trunc (Ieee32) Ieee32)
(extern constructor f32_trunc f32_trunc)
(decl pure partial f32_nearest (Ieee32) Ieee32)
(extern constructor f32_nearest f32_nearest)
(decl pure partial f32_min (Ieee32 Ieee32) Ieee32)
(extern constructor f32_min f32_min)
(decl pure partial f32_max (Ieee32 Ieee32) Ieee32)
(extern constructor f32_max f32_max)
(decl pure f32_neg (Ieee32) Ieee32)
(extern constructor f32_neg f32_neg)
(decl pure f32_abs (Ieee32) Ieee32)
(extern constructor f32_abs f32_abs)
(decl pure f32_copysign (Ieee32 Ieee32) Ieee32)
(extern constructor f32_copysign f32_copysign)
(decl pure partial f64_add (Ieee64 Ieee64) Ieee64)
(extern constructor f64_add f64_add)
(decl pure partial f64_sub (Ieee64 Ieee64) Ieee64)
(extern constructor f64_sub f64_sub)
(decl pure partial f64_mul (Ieee64 Ieee64) Ieee64)
(extern constructor f64_mul f64_mul)
(decl pure partial f64_div (Ieee64 Ieee64) Ieee64)
(extern constructor f64_div f64_div)
(decl pure partial f64_sqrt (Ieee64) Ieee64)
(extern constructor f64_sqrt f64_sqrt)
(decl pure partial f64_ceil (Ieee64) Ieee64)
(extern constructor f64_ceil f64_ceil)
(decl pure partial f64_floor (Ieee64) Ieee64)
(extern constructor f64_floor f64_floor)
(decl pure partial f64_trunc (Ieee64) Ieee64)
(extern constructor f64_trunc f64_trunc)
(decl pure partial f64_nearest (Ieee64) Ieee64)
(extern constructor f64_nearest f64_nearest)
(decl pure partial f64_min (Ieee64 Ieee64) Ieee64)
(extern constructor f64_min f64_min)
(decl pure partial f64_max (Ieee64 Ieee64) Ieee64)
(extern constructor f64_max f64_max)
(decl pure f64_neg (Ieee64) Ieee64)
(extern constructor f64_neg f64_neg)
(decl pure f64_abs (Ieee64) Ieee64)
(extern constructor f64_abs f64_abs)
(decl pure f64_copysign (Ieee64 Ieee64) Ieee64)
(extern constructor f64_copysign f64_copysign)
(decl pure partial f128_min (Ieee128 Ieee128) Ieee128)
(extern constructor f128_min f128_min)
(decl pure partial f128_max (Ieee128 Ieee128) Ieee128)
(extern constructor f128_max f128_max)
(decl pure f128_neg (Ieee128) Ieee128)
(extern constructor f128_neg f128_neg)
(decl pure f128_abs (Ieee128) Ieee128)
(extern constructor f128_abs f128_abs)
(decl pure f128_copysign (Ieee128 Ieee128) Ieee128)
(extern constructor f128_copysign f128_copysign)
(type Ieee128 (primitive Ieee128))
;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(extern const $I8 Type)
(extern const $I16 Type)
(extern const $I32 Type)
(extern const $I64 Type)
(extern const $I128 Type)
(extern const $F16 Type)
(extern const $F32 Type)
(extern const $F64 Type)
(extern const $F128 Type)
(extern const $I8X8 Type)
(extern const $I8X16 Type)
(extern const $I16X4 Type)
(extern const $I16X8 Type)
(extern const $I32X2 Type)
(extern const $I32X4 Type)
(extern const $I64X2 Type)
(extern const $F32X4 Type)
(extern const $F64X2 Type)
(extern const $I32X4XN Type)
;; Get the unsigned minimum value for a given type.
;; This always zero, but is included for completeness.
(decl pure ty_umin (Type) u64)
(extern constructor ty_umin ty_umin)
;; Get the unsigned maximum value for a given type.
(decl pure ty_umax (Type) u64)
(extern constructor ty_umax ty_umax)
;; Get the signed minimum value for a given type.
(decl pure ty_smin (Type) u64)
(extern constructor ty_smin ty_smin)
;; Get the signed maximum value for a given type.
(decl pure ty_smax (Type) u64)
(extern constructor ty_smax ty_smax)
;; Get the bit width of a given type.
(decl pure ty_bits (Type) u8)
(extern constructor ty_bits ty_bits)
;; Get the bit width of a given type.
(decl pure ty_bits_u16 (Type) u16)
(extern constructor ty_bits_u16 ty_bits_u16)
;; Get the bit width of a given type.
(decl pure ty_bits_u64 (Type) u64)
(extern constructor ty_bits_u64 ty_bits_u64)
;; Get a mask for the width of a given type.
(decl pure ty_mask (Type) u64)
(extern constructor ty_mask ty_mask)
;; Get a mask that is set for each lane in a given type.
(decl pure ty_lane_mask (Type) u64)
(extern constructor ty_lane_mask ty_lane_mask)
;; Get the number of lanes for a given type.
(decl pure ty_lane_count (Type) u64)
(extern constructor ty_lane_count ty_lane_count)
;; Get the byte width of a given type.
(decl pure ty_bytes (Type) u16)
(extern constructor ty_bytes ty_bytes)
;; Get the type of each lane in the given type.
(decl pure lane_type (Type) Type)
(extern constructor lane_type lane_type)
;; Get a type with the same element type, but half the number of lanes.
(decl pure partial ty_half_lanes (Type) Type)
(extern constructor ty_half_lanes ty_half_lanes)
;; Get a type with the same number of lanes but a lane type that is half as small.
(decl pure partial ty_half_width (Type) Type)
(extern constructor ty_half_width ty_half_width)
;; Generate a mask for the maximum shift amount for a given type. i.e 31 for I32.
(decl pure ty_shift_mask (Type) u64)
(rule (ty_shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1))
;; Compare two types for equality.
(decl pure ty_equal (Type Type) bool)
(extern constructor ty_equal ty_equal)
;;;; `cranelift_codegen::ir::MemFlags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `MemFlags::trusted`
(decl pure mem_flags_trusted () MemFlags)
(extern constructor mem_flags_trusted mem_flags_trusted)
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Swap args of an IntCC flag.
(decl intcc_swap_args (IntCC) IntCC)
(extern constructor intcc_swap_args intcc_swap_args)
;; Complement an IntCC flag.
(decl intcc_complement (IntCC) IntCC)
(extern constructor intcc_complement intcc_complement)
;; This is a direct import of `IntCC::without_equal`.
;; Get the corresponding IntCC with the equal component removed.
;; For conditions without a zero component, this is a no-op.
(decl pure intcc_without_eq (IntCC) IntCC)
(extern constructor intcc_without_eq intcc_without_eq)
;; Swap args of a FloatCC flag.
(decl floatcc_swap_args (FloatCC) FloatCC)
(extern constructor floatcc_swap_args floatcc_swap_args)
;; Complement a FloatCC flag.
(decl floatcc_complement (FloatCC) FloatCC)
(extern constructor floatcc_complement floatcc_complement)
;; True when this FloatCC involves an unordered comparison.
(decl pure floatcc_unordered (FloatCC) bool)
(extern constructor floatcc_unordered floatcc_unordered)
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl eq (Type Value Value) Value)
(extractor (eq ty x y) (icmp ty (IntCC.Equal) x y))
(decl ne (Type Value Value) Value)
(extractor (ne ty x y) (icmp ty (IntCC.NotEqual) x y))
(decl ult (Type Value Value) Value)
(extractor (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y))
(decl ule (Type Value Value) Value)
(extractor (ule ty x y) (icmp ty (IntCC.UnsignedLessThanOrEqual) x y))
(decl ugt (Type Value Value) Value)
(extractor (ugt ty x y) (icmp ty (IntCC.UnsignedGreaterThan) x y))
(decl uge (Type Value Value) Value)
(extractor (uge ty x y) (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x y))
(decl slt (Type Value Value) Value)
(extractor (slt ty x y) (icmp ty (IntCC.SignedLessThan) x y))
(decl sle (Type Value Value) Value)
(extractor (sle ty x y) (icmp ty (IntCC.SignedLessThanOrEqual) x y))
(decl sgt (Type Value Value) Value)
(extractor (sgt ty x y) (icmp ty (IntCC.SignedGreaterThan) x y))
(decl sge (Type Value Value) Value)
(extractor (sge ty x y) (icmp ty (IntCC.SignedGreaterThanOrEqual) x y))
;; An extractor that only matches types that can fit in 16 bits.
(decl fits_in_16 (Type) Type)
(extern extractor fits_in_16 fits_in_16)
;; An extractor that only matches types that can fit in 32 bits.
(decl fits_in_32 (Type) Type)
(extern extractor fits_in_32 fits_in_32)
;; An extractor that only matches types that can fit in 32 bits.
(decl lane_fits_in_32 (Type) Type)
(extern extractor lane_fits_in_32 lane_fits_in_32)
;; An extractor that only matches types that can fit in 64 bits.
(decl fits_in_64 (Type) Type)
(extern extractor fits_in_64 fits_in_64)
;; An extractor that only matches types that fit in exactly 32 bits.
(decl ty_32 (Type) Type)
(extern extractor ty_32 ty_32)
;; An extractor that only matches types that fit in exactly 64 bits.
(decl ty_64 (Type) Type)
(extern extractor ty_64 ty_64)
;; A pure constructor/extractor that only matches scalar integers, and
;; references that can fit in 64 bits.
(decl pure partial ty_int_ref_scalar_64 (Type) Type)
(extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64)
(extern extractor ty_int_ref_scalar_64 ty_int_ref_scalar_64_extract)
;; An extractor that matches 32- and 64-bit types only.
(decl ty_32_or_64 (Type) Type)
(extern extractor ty_32_or_64 ty_32_or_64)
;; An extractor that matches 8- and 16-bit types only.
(decl ty_8_or_16 (Type) Type)
(extern extractor ty_8_or_16 ty_8_or_16)
;; An extractor that matches 16- and 32-bit types only.
(decl ty_16_or_32 (Type) Type)
(extern extractor ty_16_or_32 ty_16_or_32)
;; An extractor that matches int types that fit in 32 bits.
(decl int_fits_in_32 (Type) Type)
(extern extractor int_fits_in_32 int_fits_in_32)
;; An extractor that matches I64.
(decl ty_int_ref_64 (Type) Type)
(extern extractor ty_int_ref_64 ty_int_ref_64)
;; An extractor that matches int or reference types bigger than 16 bits but at most 64 bits.
(decl ty_int_ref_16_to_64 (Type) Type)
(extern extractor ty_int_ref_16_to_64 ty_int_ref_16_to_64)
;; An extractor that only matches integers.
(decl ty_int (Type) Type)
(extern extractor ty_int ty_int)
;; An extractor that only matches scalar types, float or int or ref's.
(decl ty_scalar (Type) Type)
(extern extractor ty_scalar ty_scalar)
;; An extractor that only matches scalar floating-point types--F32 or F64.
(decl ty_scalar_float (Type) Type)
(extern extractor ty_scalar_float ty_scalar_float)
;; An extractor that matches scalar floating-point types or vector types.
(decl ty_float_or_vec (Type) Type)
(extern extractor ty_float_or_vec ty_float_or_vec)
;; A pure constructor that only matches vector floating-point types.
(decl pure partial ty_vector_float (Type) Type)
(extern constructor ty_vector_float ty_vector_float)
;; A pure constructor that only matches vector types with lanes which
;; are not floating-point.
(decl pure partial ty_vector_not_float (Type) Type)
(extern constructor ty_vector_not_float ty_vector_not_float)
;; A pure constructor/extractor that only matches 64-bit vector types.
(decl pure partial ty_vec64 (Type) Type)
(extern constructor ty_vec64 ty_vec64_ctor)
(extern extractor ty_vec64 ty_vec64)
;; An extractor that only matches 128-bit vector types.
(decl ty_vec128 (Type) Type)
(extern extractor ty_vec128 ty_vec128)
;; An extractor that only matches dynamic vector types with a 64-bit
;; base type.
(decl ty_dyn_vec64 (Type) Type)
(extern extractor ty_dyn_vec64 ty_dyn_vec64)
;; An extractor that only matches dynamic vector types with a 128-bit
;; base type.
(decl ty_dyn_vec128 (Type) Type)
(extern extractor ty_dyn_vec128 ty_dyn_vec128)
;; An extractor that only matches 64-bit vector types with integer
;; lanes (I8X8, I16X4, I32X2)
(decl ty_vec64_int (Type) Type)
(extern extractor ty_vec64_int ty_vec64_int)
;; An extractor that only matches 128-bit vector types with integer
;; lanes (I8X16, I16X8, I32X4, I64X2).
(decl ty_vec128_int (Type) Type)
(extern extractor ty_vec128_int ty_vec128_int)
;; An extractor that only matches types that can be a 64-bit address.
(decl ty_addr64 (Type) Type)
(extern extractor ty_addr64 ty_addr64)
;; A pure constructor that matches everything except vectors with size 32X2.
(decl pure partial not_vec32x2 (Type) Type)
(extern constructor not_vec32x2 not_vec32x2)
;; An extractor that matches everything except I64X2
(decl not_i64x2 () Type)
(extern extractor not_i64x2 not_i64x2)
;; Extract a `u8` from an `Uimm8`.
(decl u8_from_uimm8 (u8) Uimm8)
(extern extractor infallible u8_from_uimm8 u8_from_uimm8)
;; Extract a `u64` from a `bool`.
(decl u64_from_bool (u64) bool)
(extern extractor infallible u64_from_bool u64_from_bool)
;; Extract a `u64` from an `Imm64`.
(decl u64_from_imm64 (u64) Imm64)
(extern extractor infallible u64_from_imm64 u64_from_imm64)
;; Extract a `u64` from an `Imm64` which is not zero.
(decl nonzero_u64_from_imm64 (u64) Imm64)
(extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64)
;; If the given `Imm64` is a power-of-two, extract its log2 value.
(decl imm64_power_of_two (u64) Imm64)
(extern extractor imm64_power_of_two imm64_power_of_two)
;; Create a new Imm64.
(decl pure imm64 (u64) Imm64)
(extern constructor imm64 imm64)
;; Create a new Imm64, masked to the width of the given type.
(decl pure imm64_masked (Type u64) Imm64)
(extern constructor imm64_masked imm64_masked)
;; Extract a `u16` from an `Ieee16`.
(decl u16_from_ieee16 (u16) Ieee16)
(extern extractor infallible u16_from_ieee16 u16_from_ieee16)
;; Extract a `u32` from an `Ieee32`.
(decl u32_from_ieee32 (u32) Ieee32)
(extern extractor infallible u32_from_ieee32 u32_from_ieee32)
;; Extract a `u64` from an `Ieee64`.
(decl u64_from_ieee64 (u64) Ieee64)
(extern extractor infallible u64_from_ieee64 u64_from_ieee64)
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
;; type. Will only match when there is more than one lane.
(decl multi_lane (u32 u32) Type)
(extern extractor multi_lane multi_lane)
;; Match a dynamic-lane type, extracting (# bits per lane) from the given
;; type.
(decl dynamic_lane (u32 u32) Type)
(extern extractor dynamic_lane dynamic_lane)
;; An extractor that only matches 64-bit dynamic vector types with integer
;; lanes (I8X8XN, I16X4XN, I32X2XN)
(decl ty_dyn64_int (Type) Type)
(extern extractor ty_dyn64_int ty_dyn64_int)
;; An extractor that only matches 128-bit dynamic vector types with integer
;; lanes (I8X16XN, I16X8XN, I32X4XN, I64X2XN).
(decl ty_dyn128_int (Type) Type)
(extern extractor ty_dyn128_int ty_dyn128_int)
;; Convert an `Offset32` to a primitive number.
(decl pure offset32_to_i32 (Offset32) i32)
(extern constructor offset32_to_i32 offset32_to_i32)
;; Convert a number to an `Offset32`
(decl pure i32_to_offset32 (i32) Offset32)
(extern constructor i32_to_offset32 i32_to_offset32)
;; This is a direct import of `IntCC::unsigned`.
;; Get the corresponding IntCC with the signed component removed.
;; For conditions without a signed component, this is a no-op.
(decl pure intcc_unsigned (IntCC) IntCC)
(extern constructor intcc_unsigned intcc_unsigned)
;; Pure constructor that only matches signed integer cond codes.
(decl pure partial signed_cond_code (IntCC) IntCC)
(extern constructor signed_cond_code signed_cond_code)
;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl pure trap_code_division_by_zero () TrapCode)
(extern constructor trap_code_division_by_zero trap_code_division_by_zero)
(decl pure trap_code_integer_overflow () TrapCode)
(extern constructor trap_code_integer_overflow trap_code_integer_overflow)
(decl pure trap_code_bad_conversion_to_integer () TrapCode)
(extern constructor trap_code_bad_conversion_to_integer trap_code_bad_conversion_to_integer)
;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; A range of integers to loop through.
(type Range (primitive Range))
;; Create a new range from `start` through `end` (exclusive).
(decl pure range (usize usize) Range)
(extern constructor range range)
;; A view on the current state of the range.
(type RangeView extern
(enum
(Empty)
(NonEmpty (index usize) (rest Range))))
;; View the current state of the range.
(decl range_view (RangeView) Range)
(extern extractor infallible range_view range_view)
;; Extractor to test whether a range is empty.
(decl range_empty () Range)
(extractor (range_empty) (range_view (RangeView.Empty)))
;; Extractor to return the first value in the range, and a sub-range
;; containing the remaining values.
(decl range_unwrap (usize Range) Range)
(extractor (range_unwrap index rest) (range_view (RangeView.NonEmpty index rest)))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Offset32 i32 offset32_to_i32)
(convert i32 Offset32 i32_to_offset32)

1082
hbcb/src/prelude_lower.isle Normal file

File diff suppressed because it is too large Load diff

123
hbcb/src/prelude_opt.isle Normal file
View file

@ -0,0 +1,123 @@
;; Prelude definitions specific to the mid-end.
;; Any `extern` definitions here are generally implemented in `src/opts.rs`.
;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract any node(s) for the given eclass ID.
(decl multi inst_data (Type InstructionData) Value)
(extern extractor inst_data inst_data_etor)
;; Identical to `inst_data`, just with a different ISLE type.
;; This is basically a manual version of `curry`/`uncurry` in Haskell:
;; to compose extractors the outer one needs to be single-parameter,
;; so this combines the two parameters of `inst_data` into one.
(type TypeAndInstructionData (primitive TypeAndInstructionData))
(decl multi inst_data_tupled (TypeAndInstructionData) Value)
(extern extractor inst_data_tupled inst_data_tupled_etor)
;; Construct a pure node, returning a new (or deduplicated
;; already-existing) eclass ID.
(decl make_inst (Type InstructionData) Value)
(extern constructor make_inst make_inst_ctor)
;; Constructors for value arrays.
(decl value_array_2_ctor (Value Value) ValueArray2)
(extern constructor value_array_2_ctor value_array_2_ctor)
(decl value_array_3_ctor (Value Value Value) ValueArray3)
(extern constructor value_array_3_ctor value_array_3_ctor)
(rule (eq ty x y) (icmp ty (IntCC.Equal) x y))
(rule (ne ty x y) (icmp ty (IntCC.NotEqual) x y))
(rule (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y))
(rule (ule ty x y) (icmp ty (IntCC.UnsignedLessThanOrEqual) x y))
(rule (ugt ty x y) (icmp ty (IntCC.UnsignedGreaterThan) x y))
(rule (uge ty x y) (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x y))
(rule (slt ty x y) (icmp ty (IntCC.SignedLessThan) x y))
(rule (sle ty x y) (icmp ty (IntCC.SignedLessThanOrEqual) x y))
(rule (sgt ty x y) (icmp ty (IntCC.SignedGreaterThan) x y))
(rule (sge ty x y) (icmp ty (IntCC.SignedGreaterThanOrEqual) x y))
;; 3-way comparison, returning -1/0/+1 in I8
(decl spaceship_s (Type Value Value) Value)
(rule (spaceship_s ty x y) (isub $I8 (sgt ty x y) (slt ty x y)))
(extractor (spaceship_s ty x y) (isub $I8 (sgt ty x y) (slt ty x y)))
(decl spaceship_u (Type Value Value) Value)
(rule (spaceship_u ty x y) (isub $I8 (ugt ty x y) (ult ty x y)))
(extractor (spaceship_u ty x y) (isub $I8 (ugt ty x y) (ult ty x y)))
;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The main matcher rule invoked by the toplevel driver.
(decl multi simplify (Value) Value)
;; Mark a node as requiring remat when used in a different block.
(decl remat (Value) Value)
(extern constructor remat remat)
;; Mark a node as subsuming whatever else it's rewritten from -- this
;; is definitely preferable, not just a possible option. Useful for,
;; e.g., constant propagation where we arrive at a definite "final
;; answer".
(decl subsume (Value) Value)
(extern constructor subsume subsume)
;;;;; constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl iconst_sextend_etor (Type i64) TypeAndInstructionData)
(extern extractor iconst_sextend_etor iconst_sextend_etor)
;; Construct an `iconst` from an `i64` or Extract an `i64` from an `iconst`
;; by treating the constant as signed.
;; When extracting, smaller types get their value sign-extended to 64-bits,
;; so that `iconst.i8 255` will give you a `-1_i64`.
;; When constructing, the rule will fail if the value cannot be represented in
;; the target type. If it fits, it'll be masked accordingly in the constant.
(decl iconst_s (Type i64) Value)
(extractor (iconst_s ty c) (inst_data_tupled (iconst_sextend_etor ty c)))
(rule 0 (iconst_s ty c)
(if-let c_masked (u64_and (i64_as_u64 c) (ty_umax ty)))
(if-let c_reextended (i64_sextend_u64 ty c_masked))
(if-let $true (u64_eq (i64_as_u64 c) (i64_as_u64 c_reextended)))
(iconst ty (imm64 c_masked)))
(rule 1 (iconst_s $I128 c) (sextend $I128 (iconst_s $I64 c)))
;; Construct an `iconst` from a `u64` or Extract a `u64` from an `iconst`
;; by treating the constant as unsigned.
;; When extracting, smaller types get their value zero-extended to 64-bits,
;; so that `iconst.i8 255` will give you a `255_u64`.
;; When constructing, the rule will fail if the value cannot be represented in
;; the target type.
(decl iconst_u (Type u64) Value)
(extractor (iconst_u ty c) (iconst ty (u64_from_imm64 c)))
(rule 0 (iconst_u ty c)
(if-let $true (u64_le c (ty_umax ty)))
(iconst ty (imm64 c)))
(rule 1 (iconst_u $I128 c) (uextend $I128 (iconst_u $I64 c)))
;; These take `Value`, rather than going through `inst_data_tupled`, because
;; most of the time they want to return the original `Value`, and it would be
;; a waste to need to re-GVN the instruction data in those cases.
(decl multi sextend_maybe_etor (Type Value) Value)
(extern extractor infallible sextend_maybe_etor sextend_maybe_etor)
(decl multi uextend_maybe_etor (Type Value) Value)
(extern extractor infallible uextend_maybe_etor uextend_maybe_etor)
;; Match or Construct a possibly-`uextend`ed value.
;; Gives the extended-to type and inner value when matching something that was
;; extended, or the input value and its type when the value isn't an extension.
;; Useful to write a single pattern that can match things that may or may not
;; have undergone C's "usual arithmetic conversions".
;; When generating values, extending to the same type is invalid CLIF,
;; so this avoids doing that where there's no extension actually needed.
(decl uextend_maybe (Type Value) Value)
(extractor (uextend_maybe ty val) (uextend_maybe_etor ty val))
(rule 0 (uextend_maybe ty val) (uextend ty val))
(rule 1 (uextend_maybe ty val@(value_type ty)) val)
;; Same as `uextend_maybe` above, just for `sextend`.
(decl sextend_maybe (Type Value) Value)
(extractor (sextend_maybe ty val) (sextend_maybe_etor ty val))
(rule 0 (sextend_maybe ty val) (sextend ty val))
(rule 1 (sextend_maybe ty val@(value_type ty)) val)

10
hbcb/src/settings.rs Normal file
View file

@ -0,0 +1,10 @@
//! riscv64 Settings.
use {
core::fmt,
cranelift_codegen::settings::{self, detail, Builder, Value},
};
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
// public `Flags` struct with an impl for all of the settings defined in
include!(concat!(env!("OUT_DIR"), "/settings-riscv64.rs"));