diff --git a/Cargo.lock b/Cargo.lock
index 59a7e4e3..9801447c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,10 +2,172 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "ahash"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "arbitrary"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
+
+[[package]]
+name = "bumpalo"
+version = "3.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "cranelift-bforest"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b80c3a50b9c4c7e5b5f73c0ed746687774fc9e36ef652b110da8daebf0c6e0e6"
+dependencies = [
+ "cranelift-entity",
+]
+
+[[package]]
+name = "cranelift-bitset"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38778758c2ca918b05acb2199134e0c561fb577c50574259b26190b6c2d95ded"
+
+[[package]]
+name = "cranelift-codegen"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "58258667ad10e468bfc13a8d620f50dfcd4bb35d668123e97defa2549b9ad397"
+dependencies = [
+ "bumpalo",
+ "cranelift-bforest",
+ "cranelift-bitset",
+ "cranelift-codegen-meta",
+ "cranelift-codegen-shared",
+ "cranelift-control",
+ "cranelift-entity",
+ "cranelift-isle",
+ "gimli",
+ "hashbrown 0.14.5",
+ "log",
+ "regalloc2 0.9.3",
+ "rustc-hash 1.1.0",
+ "smallvec",
+ "target-lexicon",
+]
+
+[[package]]
+name = "cranelift-codegen-meta"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "043f0b702e529dcb07ff92bd7d40e7d5317b5493595172c5eb0983343751ee06"
+dependencies = [
+ "cranelift-codegen-shared",
+]
+
+[[package]]
+name = "cranelift-codegen-shared"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7763578888ab53eca5ce7da141953f828e82c2bfadcffc106d10d1866094ffbb"
+
+[[package]]
+name = "cranelift-control"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32db15f08c05df570f11e8ab33cb1ec449a64b37c8a3498377b77650bef33d8b"
+dependencies = [
+ "arbitrary",
+]
+
+[[package]]
+name = "cranelift-entity"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5289cdb399381a27e7bbfa1b42185916007c3d49aeef70b1d01cb4caa8010130"
+dependencies = [
+ "cranelift-bitset",
+]
+
+[[package]]
+name = "cranelift-isle"
+version = "0.111.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b72a3c5c166a70426dcb209bdd0bb71a787c1ea76023dc0974fbabca770e8f9"
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
+[[package]]
+name = "gimli"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
+dependencies = [
+ "fallible-iterator",
+ "indexmap",
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
+dependencies = [
+ "ahash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+]
+
 [[package]]
 name = "hbbytecode"
 version = "0.1.0"
 
+[[package]]
+name = "hbcb"
+version = "0.1.0"
+dependencies = [
+ "cranelift-codegen",
+ "cranelift-codegen-meta",
+ "cranelift-control",
+ "cranelift-isle",
+ "log",
+ "regalloc2 0.10.2",
+ "smallvec",
+ "target-lexicon",
+]
+
 [[package]]
 name = "hbjit"
 version = "0.1.0"
@@ -32,12 +194,28 @@ dependencies = [
  "memmap2",
 ]
 
+[[package]]
+name = "indexmap"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.14.5",
+]
+
 [[package]]
 name = "libc"
 version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
+[[package]]
+name = "log"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+
 [[package]]
 name = "memmap2"
 version = "0.9.5"
@@ -47,6 +225,135 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regalloc2"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6"
+dependencies = [
+ "hashbrown 0.13.2",
+ "log",
+ "rustc-hash 1.1.0",
+ "slice-group-by",
+ "smallvec",
+]
+
+[[package]]
+name = "regalloc2"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12908dbeb234370af84d0579b9f68258a0f67e201412dd9a2814e6f45b2fc0f0"
+dependencies = [
+ "hashbrown 0.14.5",
+ "log",
+ "rustc-hash 2.0.0",
+ "slice-group-by",
+ "smallvec",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustc-hash"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
+
+[[package]]
+name = "slice-group-by"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7"
+
+[[package]]
+name = "smallvec"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
+[[package]]
+name = "syn"
+version = "2.0.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
 [[package]]
 name = "xtask"
 version = "0.1.0"
+
+[[package]]
+name = "zerocopy"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/Cargo.toml b/Cargo.toml
index acb90241..fbc1d6d5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit"]
+members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit", "hbcb"]
 
 [profile.release]
 strip = true
diff --git a/hbcb/Cargo.toml b/hbcb/Cargo.toml
new file mode 100644
index 00000000..799a0a99
--- /dev/null
+++ b/hbcb/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "hbcb"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+cranelift-codegen = "0.111.0"
+cranelift-control = "0.111.0"
+log = "0.4.22"
+regalloc2 = "0.10.2"
+smallvec = "1.13.2"
+target-lexicon = "0.12.16"
+
+[features]
+default = ["isle-errors"]
+unwind = []
+isle-in-source-tree = []
+isle-errors = []
+
+[build-dependencies]
+cranelift-codegen-meta = "0.111.0"
+cranelift-isle = "0.111.0"
diff --git a/hbcb/build.rs b/hbcb/build.rs
new file mode 100644
index 00000000..67c63fbb
--- /dev/null
+++ b/hbcb/build.rs
@@ -0,0 +1,310 @@
+// Build script.
+//
+// This program is run by Cargo when building cranelift-codegen. It is used to generate Rust code from
+// the language definitions in the cranelift-codegen/meta directory.
+//
+// Environment:
+//
+// OUT_DIR
+//     Directory where generated files should be placed.
+//
+// TARGET
+//     Target triple provided by Cargo.
+//
+// The build script expects to be run from the directory where this build.rs file lives. The
+// current directory is used to find the sources.
+
+use {
+    cranelift_codegen_meta::{self as meta, isle::IsleCompilations},
+    cranelift_isle::error::Errors,
+    meta::isle::IsleCompilation,
+    std::{env, io::Read, process, time::Instant},
+};
+
+fn main() {
+    let start_time = Instant::now();
+
+    let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set");
+    let out_dir = std::path::Path::new(&out_dir);
+    //let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set");
+
+    //let all_arch = env::var("CARGO_FEATURE_ALL_ARCH").is_ok();
+    //let all_native_arch = env::var("CARGO_FEATURE_ALL_NATIVE_ARCH").is_ok();
+
+    let isas = &[meta::isa::Isa::Riscv64];
+
+    // let mut isas = meta::isa::Isa::all()
+    //     .iter()
+    //     .cloned()
+    //     .filter(|isa| {
+    //         let env_key = format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase());
+    //         all_arch || env::var(env_key).is_ok()
+    //     })
+    //     .collect::<Vec<_>>();
+
+    // Don't require host isa if under 'all-arch' feature.
+    //let host_isa = env::var("CARGO_FEATURE_HOST_ARCH").is_ok() && !all_native_arch;
+
+    //if isas.is_empty() || host_isa {
+    //    // Try to match native target.
+    //    let target_name = target_triple.split('-').next().unwrap();
+    //    let isa = meta::isa_from_arch(target_name).expect("error when identifying target");
+    //    println!("cargo:rustc-cfg=feature=\"{isa}\"");
+    //    isas.push(isa);
+    //}
+
+    let cur_dir = env::current_dir().expect("Can't access current working directory");
+    let crate_dir = cur_dir.as_path();
+
+    println!("cargo:rerun-if-changed=build.rs");
+
+    let explicit_isle_dir = &crate_dir.join("isle_generated_code");
+    #[cfg(feature = "isle-in-source-tree")]
+    let isle_dir = explicit_isle_dir;
+    #[cfg(not(feature = "isle-in-source-tree"))]
+    let isle_dir = &out_dir;
+
+    #[cfg(feature = "isle-in-source-tree")]
+    {
+        std::fs::create_dir_all(isle_dir).expect("Could not create ISLE source directory");
+    }
+    #[cfg(not(feature = "isle-in-source-tree"))]
+    {
+        if explicit_isle_dir.is_dir() {
+            eprintln!(concat!(
+                "Error: directory isle_generated_code/ exists but is only used when\n",
+                "`--feature isle-in-source-tree` is specified. To prevent confusion,\n",
+                "this build script requires the directory to be removed when reverting\n",
+                "to the usual generated code in target/. Please delete the directory and\n",
+                "re-run this build.\n",
+            ));
+            std::process::exit(1);
+        }
+    }
+
+    if let Err(err) = meta::generate(isas, out_dir, isle_dir) {
+        eprintln!("Error: {err}");
+        process::exit(1);
+    }
+
+    if &std::env::var("SKIP_ISLE").unwrap_or("0".to_string()) != "1" {
+        if let Err(err) = build_isle(crate_dir, isle_dir) {
+            eprintln!("Error: {err}");
+            process::exit(1);
+        }
+    }
+
+    if env::var("CRANELIFT_VERBOSE").is_ok() {
+        for isa in isas {
+            println!("cargo:warning=Includes support for {} ISA", isa);
+        }
+        println!("cargo:warning=Build step took {:?}.", Instant::now() - start_time);
+        println!("cargo:warning=Generated files are in {}", out_dir.display());
+    }
+
+    let pkg_version = env::var("CARGO_PKG_VERSION").unwrap();
+    let mut cmd = std::process::Command::new("git");
+    cmd.arg("rev-parse")
+        .arg("HEAD")
+        .stdout(std::process::Stdio::piped())
+        .current_dir(env::var("CARGO_MANIFEST_DIR").unwrap());
+    let version = if let Ok(mut child) = cmd.spawn() {
+        let mut git_rev = String::new();
+        child.stdout.as_mut().unwrap().read_to_string(&mut git_rev).unwrap();
+        let status = child.wait().unwrap();
+        if status.success() {
+            let git_rev = git_rev.trim().chars().take(9).collect::<String>();
+            format!("{pkg_version}-{git_rev}")
+        } else {
+            // not a git repo
+            pkg_version
+        }
+    } else {
+        // git not available
+        pkg_version
+    };
+    std::fs::write(
+        std::path::Path::new(&out_dir).join("version.rs"),
+        format!(
+            "/// Version number of this crate. \n\
+            pub const VERSION: &str = \"{version}\";"
+        ),
+    )
+    .unwrap();
+}
+
+/// Strip the current directory from the file paths, because `islec`
+/// includes them in the generated source, and this helps us maintain
+/// deterministic builds that don't include those local file paths.
+fn make_isle_source_path_relative(
+    cur_dir: &std::path::Path,
+    filename: &std::path::Path,
+) -> std::path::PathBuf {
+    if let Ok(suffix) = filename.strip_prefix(cur_dir) {
+        suffix.to_path_buf()
+    } else {
+        filename.to_path_buf()
+    }
+}
+
+fn build_isle(
+    crate_dir: &std::path::Path,
+    isle_dir: &std::path::Path,
+) -> Result<(), Box<dyn std::error::Error + 'static>> {
+    let cur_dir = std::env::current_dir()?;
+    let codegen_crate_dir = &make_isle_source_path_relative(&cur_dir, crate_dir);
+    let gen_dir = &make_isle_source_path_relative(&cur_dir, isle_dir);
+
+    // Preludes.
+    let clif_lower_isle = gen_dir.join("clif_lower.isle");
+    //let clif_opt_isle = gen_dir.join("clif_opt.isle");
+    let prelude_isle = codegen_crate_dir.join("src").join("prelude.isle");
+    //let prelude_opt_isle = codegen_crate_dir.join("src").join("prelude_opt.isle");
+    let prelude_lower_isle = codegen_crate_dir.join("src").join("prelude_lower.isle");
+
+    // Directory for mid-end optimizations.
+    //let src_opts = codegen_crate_dir.join("src").join("opts");
+
+    let src_isa_risc_v = codegen_crate_dir.join("src");
+
+    // This is a set of ISLE compilation units.
+    //
+    // The format of each entry is:
+    //
+    //     (output Rust code file, input ISLE source files)
+    //
+    // There should be one entry for each backend that uses ISLE for lowering,
+    // and if/when we replace our peephole optimization passes with ISLE, there
+    // should be an entry for each of those as well.
+    //
+    // N.B.: add any new compilation outputs to
+    // `scripts/force-rebuild-isle.sh` if they do not fit the pattern
+    // `cranelift/codegen/src/isa/*/lower/isle/generated_code.rs`!
+    let isle_compilations = IsleCompilations {
+        items: vec![
+            // // The mid-end optimization rules.
+            // IsleCompilation {
+            //     output: gen_dir.join("isle_opt.rs"),
+            //     inputs: vec![
+            //         prelude_isle.clone(),
+            //         prelude_opt_isle,
+            //         src_opts.join("arithmetic.isle"),
+            //         src_opts.join("bitops.isle"),
+            //         src_opts.join("cprop.isle"),
+            //         src_opts.join("extends.isle"),
+            //         src_opts.join("icmp.isle"),
+            //         src_opts.join("remat.isle"),
+            //         src_opts.join("selects.isle"),
+            //         src_opts.join("shifts.isle"),
+            //         src_opts.join("spaceship.isle"),
+            //         src_opts.join("spectre.isle"),
+            //         src_opts.join("vector.isle"),
+            //     ],
+            //     untracked_inputs: vec![clif_opt_isle],
+            // },
+            // The risc-v instruction selector.
+            IsleCompilation {
+                output: gen_dir.join("isle_riscv64.rs"),
+                inputs: vec![
+                    prelude_isle.clone(),
+                    prelude_lower_isle.clone(),
+                    src_isa_risc_v.join("inst.isle"),
+                    src_isa_risc_v.join("inst_vector.isle"),
+                    src_isa_risc_v.join("lower.isle"),
+                ],
+                untracked_inputs: vec![clif_lower_isle.clone()],
+            },
+        ],
+    };
+
+    let mut had_error = false;
+    for compilation in &isle_compilations.items {
+        for file in &compilation.inputs {
+            println!("cargo:rerun-if-changed={}", file.display());
+        }
+
+        if let Err(e) = run_compilation(compilation) {
+            had_error = true;
+            eprintln!("Error building ISLE files:");
+            eprintln!("{e:?}");
+            #[cfg(not(feature = "isle-errors"))]
+            {
+                eprintln!("To see a more detailed error report, run: ");
+                eprintln!();
+                eprintln!("    $ cargo check -p cranelift-codegen --features isle-errors");
+                eprintln!();
+            }
+        }
+    }
+
+    if had_error {
+        std::process::exit(1);
+    }
+
+    println!("cargo:rustc-env=ISLE_DIR={}", isle_dir.to_str().unwrap());
+
+    Ok(())
+}
+
+/// Build ISLE DSL source text into generated Rust code.
+///
+/// NB: This must happen *after* the `cranelift-codegen-meta` functions, since
+/// it consumes files generated by them.
+fn run_compilation(compilation: &IsleCompilation) -> Result<(), Errors> {
+    use cranelift_isle as isle;
+
+    eprintln!("Rebuilding {}", compilation.output.display());
+
+    let code = {
+        let file_paths = compilation.inputs.iter().chain(compilation.untracked_inputs.iter());
+
+        let options = isle::codegen::CodegenOptions {
+            // Because we include!() the generated ISLE source, we cannot
+            // put the global pragmas (`#![allow(...)]`) in the ISLE
+            // source itself; we have to put them in the source that
+            // include!()s it. (See
+            // https://github.com/rust-lang/rust/issues/47995.)
+            exclude_global_allow_pragmas: true,
+        };
+
+        isle::compile::from_files(file_paths, &options)?
+    };
+
+    let code = rustfmt(&code).unwrap_or_else(|e| {
+        println!("cargo:warning=Failed to run `rustfmt` on ISLE-generated code: {e:?}");
+        code
+    });
+
+    eprintln!("Writing ISLE-generated Rust code to {}", compilation.output.display());
+    std::fs::write(&compilation.output, code)
+        .map_err(|e| Errors::from_io(e, "failed writing output"))?;
+
+    Ok(())
+}
+
+fn rustfmt(code: &str) -> std::io::Result<String> {
+    use std::io::Write;
+
+    let mut rustfmt = std::process::Command::new("rustfmt")
+        .stdin(std::process::Stdio::piped())
+        .stdout(std::process::Stdio::piped())
+        .spawn()?;
+
+    let mut stdin = rustfmt.stdin.take().unwrap();
+    stdin.write_all(code.as_bytes())?;
+    drop(stdin);
+
+    let mut stdout = rustfmt.stdout.take().unwrap();
+    let mut data = vec![];
+    stdout.read_to_end(&mut data)?;
+
+    let status = rustfmt.wait()?;
+    if !status.success() {
+        return Err(std::io::Error::new(
+            std::io::ErrorKind::Other,
+            format!("`rustfmt` exited with status {status}"),
+        ));
+    }
+
+    Ok(String::from_utf8(data).expect("rustfmt always writs utf-8 to stdout"))
+}
diff --git a/hbcb/src/abi.rs b/hbcb/src/abi.rs
new file mode 100644
index 00000000..fb8fc263
--- /dev/null
+++ b/hbcb/src/abi.rs
@@ -0,0 +1,900 @@
+//! Implementation of a standard Riscv64 ABI.
+
+use {
+    alloc::{boxed::Box, vec::Vec},
+    cranelift_codegen::{
+        inst::*,
+        ir::{self, types::*, LibCall, Signature},
+        isa::{self, unwind::UnwindInst, CallConv},
+        machinst::*,
+        settings::{self, Flags as RiscvFlags},
+        CodegenError, CodegenResult,
+    },
+    regalloc2::{MachineEnv, PReg, PRegSet},
+    smallvec::{smallvec, SmallVec},
+    std::sync::OnceLock,
+};
+
+/// Support for the Riscv64 ABI from the callee side (within a function body).
+pub(crate) type Riscv64Callee = Callee<Riscv64MachineDeps>;
+
+/// Support for the Riscv64 ABI from the caller side (at a callsite).
+pub(crate) type Riscv64ABICallSite = CallSite<Riscv64MachineDeps>;
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
+
+/// Riscv64-specific ABI behavior. This struct just serves as an implementation
+/// point for the trait; it is never actually instantiated.
+pub struct Riscv64MachineDeps;
+
+impl IsaFlags for RiscvFlags {}
+
+impl RiscvFlags {
+    pub(crate) fn min_vec_reg_size(&self) -> u64 {
+        let entries = [
+            (self.has_zvl65536b(), 65536),
+            (self.has_zvl32768b(), 32768),
+            (self.has_zvl16384b(), 16384),
+            (self.has_zvl8192b(), 8192),
+            (self.has_zvl4096b(), 4096),
+            (self.has_zvl2048b(), 2048),
+            (self.has_zvl1024b(), 1024),
+            (self.has_zvl512b(), 512),
+            (self.has_zvl256b(), 256),
+            // In order to claim the Application Profile V extension, a minimum
+            // register size of 128 is required. i.e. V implies Zvl128b.
+            (self.has_v(), 128),
+            (self.has_zvl128b(), 128),
+            (self.has_zvl64b(), 64),
+            (self.has_zvl32b(), 32),
+        ];
+
+        for (has_flag, size) in entries.into_iter() {
+            if !has_flag {
+                continue;
+            }
+
+            // Due to a limitation in regalloc2, we can't support types
+            // larger than 1024 bytes. So limit that here.
+            return std::cmp::min(size, 1024);
+        }
+
+        return 0;
+    }
+}
+
+impl ABIMachineSpec for Riscv64MachineDeps {
+    type F = RiscvFlags;
+    type I = Inst;
+
+    fn word_bits() -> u32 {
+        64
+    }
+
+    /// Return required stack alignment in bytes.
+    fn stack_align(_call_conv: isa::CallConv) -> u32 {
+        16
+    }
+
+    fn compute_arg_locs(
+        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        params: &[ir::AbiParam],
+        args_or_rets: ArgsOrRets,
+        add_ret_area_ptr: bool,
+        mut args: ArgsAccumulator,
+    ) -> CodegenResult<(u32, Option<usize>)> {
+        assert_ne!(
+            call_conv,
+            isa::CallConv::Winch,
+            "riscv64 does not support the 'winch' calling convention yet"
+        );
+
+        // All registers that can be used as parameters or rets.
+        // both start and end are included.
+        let (x_start, x_end, f_start, f_end) = match args_or_rets {
+            ArgsOrRets::Args => (10, 17, 10, 17),
+            ArgsOrRets::Rets => (10, 11, 10, 11),
+        };
+        let mut next_x_reg = x_start;
+        let mut next_f_reg = f_start;
+        // Stack space.
+        let mut next_stack: u32 = 0;
+
+        for param in params {
+            if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {
+                panic!(
+                    "StructArgument parameters are not supported on riscv64. \
+                    Use regular pointer arguments instead."
+                );
+            }
+
+            // Find regclass(es) of the register(s) used to store a value of this type.
+            let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
+            let mut slots = ABIArgSlotVec::new();
+            for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
+                let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int {
+                    let x = Some(x_reg(next_x_reg));
+                    next_x_reg += 1;
+                    x
+                } else if (next_f_reg <= f_end) && *rc == RegClass::Float {
+                    let x = Some(f_reg(next_f_reg));
+                    next_f_reg += 1;
+                    x
+                } else {
+                    None
+                };
+                if let Some(reg) = next_reg {
+                    slots.push(ABIArgSlot::Reg {
+                        reg: reg.to_real_reg().unwrap(),
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                } else {
+                    // Compute size and 16-byte stack alignment happens
+                    // separately after all args.
+                    let size = reg_ty.bits() / 8;
+                    let size = std::cmp::max(size, 8);
+                    // Align.
+                    debug_assert!(size.is_power_of_two());
+                    next_stack = align_to(next_stack, size);
+                    slots.push(ABIArgSlot::Stack {
+                        offset: next_stack as i64,
+                        ty: *reg_ty,
+                        extension: param.extension,
+                    });
+                    next_stack += size;
+                }
+            }
+            args.push(ABIArg::Slots { slots, purpose: param.purpose });
+        }
+        let pos: Option<usize> = if add_ret_area_ptr {
+            assert!(ArgsOrRets::Args == args_or_rets);
+            if next_x_reg <= x_end {
+                let arg = ABIArg::reg(
+                    x_reg(next_x_reg).to_real_reg().unwrap(),
+                    I64,
+                    ir::ArgumentExtension::None,
+                    ir::ArgumentPurpose::Normal,
+                );
+                args.push_non_formal(arg);
+            } else {
+                let arg = ABIArg::stack(
+                    next_stack as i64,
+                    I64,
+                    ir::ArgumentExtension::None,
+                    ir::ArgumentPurpose::Normal,
+                );
+                args.push_non_formal(arg);
+                next_stack += 8;
+            }
+            Some(args.args().len() - 1)
+        } else {
+            None
+        };
+
+        next_stack = align_to(next_stack, Self::stack_align(call_conv));
+
+        // To avoid overflow issues, limit the arg/return size to something
+        // reasonable -- here, 128 MB.
+        if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+            return Err(CodegenError::ImplLimitExceeded);
+        }
+
+        Ok((next_stack, pos))
+    }
+
+    fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
+        Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
+    }
+
+    fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
+        Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        Inst::gen_move(to_reg, from_reg, ty)
+    }
+
+    fn gen_extend(
+        to_reg: Writable<Reg>,
+        from_reg: Reg,
+        signed: bool,
+        from_bits: u8,
+        to_bits: u8,
+    ) -> Inst {
+        assert!(from_bits < to_bits);
+        Inst::Extend { rd: to_reg, rn: from_reg, signed, from_bits, to_bits }
+    }
+
+    fn get_ext_mode(
+        _call_conv: isa::CallConv,
+        specified: ir::ArgumentExtension,
+    ) -> ir::ArgumentExtension {
+        specified
+    }
+
+    fn gen_args(args: Vec<ArgPair>) -> Inst {
+        Inst::Args { args }
+    }
+
+    fn gen_rets(rets: Vec<RetPair>) -> Inst {
+        Inst::Rets { rets }
+    }
+
+    fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
+        spilltmp_reg()
+    }
+
+    fn gen_add_imm(
+        _call_conv: isa::CallConv,
+        into_reg: Writable<Reg>,
+        from_reg: Reg,
+        imm: u32,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd: into_reg,
+                rs: from_reg,
+                imm12,
+            });
+        } else {
+            insts.extend(Inst::load_constant_u32(writable_spilltmp_reg2(), imm as u64));
+            insts.push(Inst::AluRRR {
+                alu_op: AluOPRRR::Add,
+                rd: into_reg,
+                rs1: spilltmp_reg2(),
+                rs2: from_reg,
+            });
+        }
+        insts
+    }
+
+    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+        insts.push(Inst::TrapIf {
+            cc: IntCC::UnsignedLessThan,
+            rs1: stack_reg(),
+            rs2: limit_reg,
+            trap_code: ir::TrapCode::StackOverflow,
+        });
+        insts
+    }
+
+    fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {
+        Inst::LoadAddr { rd: into_reg, mem: mem.into() }
+    }
+
+    fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64);
+        Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
+    }
+
+    fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
+        let mem = AMode::RegOffset(base, offset as i64);
+        Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
+    }
+
+    fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+
+        if amount == 0 {
+            return insts;
+        }
+
+        if let Some(imm) = Imm12::maybe_from_i64(amount as i64) {
+            insts.push(Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd: writable_stack_reg(),
+                rs: stack_reg(),
+                imm12: imm,
+            })
+        } else {
+            let tmp = writable_spilltmp_reg();
+            insts.extend(Inst::load_constant_u64(tmp, amount as i64 as u64));
+            insts.push(Inst::AluRRR {
+                alu_op: AluOPRRR::Add,
+                rd: writable_stack_reg(),
+                rs1: stack_reg(),
+                rs2: tmp.to_reg(),
+            });
+        }
+
+        insts
+    }
+
+    fn gen_prologue_frame_setup(
+        _call_conv: isa::CallConv,
+        flags: &settings::Flags,
+        _isa_flags: &RiscvFlags,
+        frame_layout: &FrameLayout,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+
+        if frame_layout.setup_area_size > 0 {
+            // add  sp,sp,-16    ;; alloc stack space for fp.
+            // sd   ra,8(sp)     ;; save ra.
+            // sd   fp,0(sp)     ;; store old fp.
+            // mv   fp,sp        ;; set fp to sp.
+            insts.extend(Self::gen_sp_reg_adjust(-16));
+            insts.push(Inst::gen_store(AMode::SPOffset(8), link_reg(), I64, MemFlags::trusted()));
+            insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted()));
+
+            if flags.unwind_info() {
+                insts.push(Inst::Unwind {
+                    inst: UnwindInst::PushFrameRegs {
+                        offset_upward_to_caller_sp: frame_layout.setup_area_size,
+                    },
+                });
+            }
+            insts.push(Inst::Mov { rd: writable_fp_reg(), rm: stack_reg(), ty: I64 });
+        }
+
+        insts
+    }
+
+    /// reverse of gen_prologue_frame_setup.
+    fn gen_epilogue_frame_restore(
+        call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        _isa_flags: &RiscvFlags,
+        frame_layout: &FrameLayout,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = SmallVec::new();
+
+        if frame_layout.setup_area_size > 0 {
+            insts.push(Inst::gen_load(
+                writable_link_reg(),
+                AMode::SPOffset(8),
+                I64,
+                MemFlags::trusted(),
+            ));
+            insts.push(Inst::gen_load(
+                writable_fp_reg(),
+                AMode::SPOffset(0),
+                I64,
+                MemFlags::trusted(),
+            ));
+            insts.extend(Self::gen_sp_reg_adjust(16));
+        }
+
+        if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {
+            insts.extend(Self::gen_sp_reg_adjust(frame_layout.tail_args_size.try_into().unwrap()));
+        }
+
+        insts
+    }
+
+    fn gen_return(
+        _call_conv: isa::CallConv,
+        _isa_flags: &RiscvFlags,
+        _frame_layout: &FrameLayout,
+    ) -> SmallInstVec<Inst> {
+        smallvec![Inst::Ret {}]
+    }
+
+    fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
+        insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64));
+        let mut info =
+            CallInfo::empty(ExternalName::LibCall(LibCall::Probestack), CallConv::SystemV);
+        info.uses.push(CallArgPair { vreg: a0(), preg: a0() });
+        insts.push(Inst::Call { info: Box::new(info) });
+    }
+
+    fn gen_clobber_save(
+        _call_conv: isa::CallConv,
+        flags: &settings::Flags,
+        frame_layout: &FrameLayout,
+    ) -> SmallVec<[Inst; 16]> {
+        let mut insts = SmallVec::new();
+        let setup_frame = frame_layout.setup_area_size > 0;
+
+        let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;
+        if incoming_args_diff > 0 {
+            // Decrement SP by the amount of additional incoming argument space we need
+            insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));
+
+            if setup_frame {
+                // Write the lr position on the stack again, as it hasn't changed since it was
+                // pushed in `gen_prologue_frame_setup`
+                insts.push(Inst::gen_store(
+                    AMode::SPOffset(8),
+                    link_reg(),
+                    I64,
+                    MemFlags::trusted(),
+                ));
+                insts.push(Inst::gen_load(
+                    writable_fp_reg(),
+                    AMode::SPOffset(i64::from(incoming_args_diff)),
+                    I64,
+                    MemFlags::trusted(),
+                ));
+                insts.push(Inst::gen_store(AMode::SPOffset(0), fp_reg(), I64, MemFlags::trusted()));
+
+                // Finally, sync the frame pointer with SP
+                insts.push(Inst::gen_move(writable_fp_reg(), stack_reg(), I64));
+            }
+        }
+
+        if flags.unwind_info() && setup_frame {
+            // The *unwind* frame (but not the actual frame) starts at the
+            // clobbers, just below the saved FP/LR pair.
+            insts.push(Inst::Unwind {
+                inst: UnwindInst::DefineNewFrame {
+                    offset_downward_to_clobbers: frame_layout.clobber_size,
+                    offset_upward_to_caller_sp: frame_layout.setup_area_size,
+                },
+            });
+        }
+
+        // Adjust the stack pointer downward for clobbers, the function fixed
+        // frame (spillslots and storage slots), and outgoing arguments.
+        let stack_size = frame_layout.clobber_size
+            + frame_layout.fixed_frame_storage_size
+            + frame_layout.outgoing_args_size;
+
+        // Store each clobbered register in order at offsets from SP,
+        // placing them above the fixed frame slots.
+        if stack_size > 0 {
+            insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));
+
+            let mut cur_offset = 8;
+            for reg in &frame_layout.clobbered_callee_saves {
+                let r_reg = reg.to_reg();
+                let ty = match r_reg.class() {
+                    RegClass::Int => I64,
+                    RegClass::Float => F64,
+                    RegClass::Vector => unimplemented!("Vector Clobber Saves"),
+                };
+                insts.push(Inst::gen_store(
+                    AMode::SPOffset((stack_size - cur_offset) as i64),
+                    Reg::from(reg.to_reg()),
+                    ty,
+                    MemFlags::trusted(),
+                ));
+
+                if flags.unwind_info() {
+                    insts.push(Inst::Unwind {
+                        inst: UnwindInst::SaveReg {
+                            clobber_offset: frame_layout.clobber_size - cur_offset,
+                            reg: r_reg,
+                        },
+                    });
+                }
+
+                cur_offset += 8
+            }
+        }
+        insts
+    }
+
+    fn gen_clobber_restore(
+        _call_conv: isa::CallConv,
+        _flags: &settings::Flags,
+        frame_layout: &FrameLayout,
+    ) -> SmallVec<[Inst; 16]> {
+        let mut insts = SmallVec::new();
+
+        let stack_size = frame_layout.clobber_size
+            + frame_layout.fixed_frame_storage_size
+            + frame_layout.outgoing_args_size;
+
+        let mut cur_offset = 8;
+        for reg in &frame_layout.clobbered_callee_saves {
+            let rreg = reg.to_reg();
+            let ty = match rreg.class() {
+                RegClass::Int => I64,
+                RegClass::Float => F64,
+                RegClass::Vector => unimplemented!("Vector Clobber Restores"),
+            };
+            insts.push(Inst::gen_load(
+                reg.map(Reg::from),
+                AMode::SPOffset(i64::from(stack_size - cur_offset)),
+                ty,
+                MemFlags::trusted(),
+            ));
+            cur_offset += 8
+        }
+
+        if stack_size > 0 {
+            insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
+        }
+
+        insts
+    }
+
+    fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Self::I; 2]> {
+        let mut insts = SmallVec::new();
+        match &dest {
+            &CallDest::ExtName(ref name, RelocDistance::Near) => {
+                let info = Box::new(info.map(|()| name.clone()));
+                insts.push(Inst::Call { info })
+            }
+            &CallDest::ExtName(ref name, RelocDistance::Far) => {
+                insts.push(Inst::LoadExtName { rd: tmp, name: Box::new(name.clone()), offset: 0 });
+                let info = Box::new(info.map(|()| tmp.to_reg()));
+                insts.push(Inst::CallInd { info });
+            }
+            &CallDest::Reg(reg) => {
+                let info = Box::new(info.map(|()| *reg));
+                insts.push(Inst::CallInd { info });
+            }
+        }
+        insts
+    }
+
+    fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
+        call_conv: isa::CallConv,
+        dst: Reg,
+        src: Reg,
+        size: usize,
+        mut alloc_tmp: F,
+    ) -> SmallVec<[Self::I; 8]> {
+        let mut insts = SmallVec::new();
+        let arg0 = Writable::from_reg(x_reg(10));
+        let arg1 = Writable::from_reg(x_reg(11));
+        let arg2 = Writable::from_reg(x_reg(12));
+        let tmp = alloc_tmp(Self::word_type());
+        insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter());
+        insts.push(Inst::Call {
+            info: Box::new(CallInfo {
+                dest: ExternalName::LibCall(LibCall::Memcpy),
+                uses: smallvec![
+                    CallArgPair { vreg: dst, preg: arg0.to_reg() },
+                    CallArgPair { vreg: src, preg: arg1.to_reg() },
+                    CallArgPair { vreg: tmp.to_reg(), preg: arg2.to_reg() }
+                ],
+                defs: smallvec![],
+                clobbers: Self::get_regs_clobbered_by_call(call_conv),
+                caller_conv: call_conv,
+                callee_conv: call_conv,
+                callee_pop_size: 0,
+            }),
+        });
+        insts
+    }
+
+    fn get_number_of_spillslots_for_value(
+        rc: RegClass,
+        _target_vector_bytes: u32,
+        isa_flags: &RiscvFlags,
+    ) -> u32 {
+        // We allocate in terms of 8-byte slots.
+        match rc {
+            RegClass::Int => 1,
+            RegClass::Float => 1,
+            RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
+        }
+    }
+
+    fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {
+        static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();
+        MACHINE_ENV.get_or_init(create_reg_enviroment)
+    }
+
+    fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
+        DEFAULT_CLOBBERS
+    }
+
+    fn compute_frame_layout(
+        _call_conv: isa::CallConv,
+        flags: &settings::Flags,
+        _sig: &Signature,
+        regs: &[Writable<RealReg>],
+        is_leaf: bool,
+        incoming_args_size: u32,
+        tail_args_size: u32,
+        fixed_frame_storage_size: u32,
+        outgoing_args_size: u32,
+    ) -> FrameLayout {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into()))
+            .collect();
+
+        regs.sort_unstable();
+
+        // Compute clobber size.
+        let clobber_size = compute_clobber_size(&regs);
+
+        // Compute linkage frame size.
+        let setup_area_size = if flags.preserve_frame_pointers()
+            || !is_leaf
+            // The function arguments that are passed on the stack are addressed
+            // relative to the Frame Pointer.
+            || incoming_args_size > 0
+            || clobber_size > 0
+            || fixed_frame_storage_size > 0
+        {
+            16 // FP, LR
+        } else {
+            0
+        };
+
+        // Return FrameLayout structure.
+        FrameLayout {
+            incoming_args_size,
+            tail_args_size,
+            setup_area_size,
+            clobber_size,
+            fixed_frame_storage_size,
+            outgoing_args_size,
+            clobbered_callee_saves: regs,
+        }
+    }
+
+    fn gen_inline_probestack(
+        insts: &mut SmallInstVec<Self::I>,
+        _call_conv: isa::CallConv,
+        frame_size: u32,
+        guard_size: u32,
+    ) {
+        // Unroll at most n consecutive probes, before falling back to using a loop
+        const PROBE_MAX_UNROLL: u32 = 3;
+        // Number of probes that we need to perform
+        let probe_count = align_to(frame_size, guard_size) / guard_size;
+
+        // Must be a caller-saved register that is not an argument.
+        let tmp = Writable::from_reg(x_reg(28)); // t3
+
+        if probe_count <= PROBE_MAX_UNROLL {
+            Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count)
+        } else {
+            insts.push(Inst::StackProbeLoop { guard_size, probe_count, tmp });
+        }
+    }
+}
+
+impl Riscv64ABICallSite {
+    pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
+        let new_stack_arg_size =
+            u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
+
+        ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);
+
+        // Put all arguments in registers and stack slots (within that newly
+        // allocated stack space).
+        self.emit_args(ctx, args);
+        self.emit_stack_ret_arg_for_tail_call(ctx);
+
+        let dest = self.dest().clone();
+        let uses = self.take_uses();
+
+        match dest {
+            CallDest::ExtName(name, RelocDistance::Near) => {
+                let info = Box::new(ReturnCallInfo { dest: name, uses, new_stack_arg_size });
+                ctx.emit(Inst::ReturnCall { info });
+            }
+            CallDest::ExtName(name, RelocDistance::Far) => {
+                let callee = ctx.alloc_tmp(ir::types::I64).only_reg().unwrap();
+                ctx.emit(Inst::LoadExtName { rd: callee, name: Box::new(name), offset: 0 });
+                let info =
+                    Box::new(ReturnCallInfo { dest: callee.to_reg(), uses, new_stack_arg_size });
+                ctx.emit(Inst::ReturnCallInd { info });
+            }
+            CallDest::Reg(callee) => {
+                let info = Box::new(ReturnCallInfo { dest: callee, uses, new_stack_arg_size });
+                ctx.emit(Inst::ReturnCallInd { info });
+            }
+        }
+    }
+}
+
+// NOTE: no V regs are callee save.
+const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty()
+    // X Regs
+    .with(px_reg(2))
+    .with(px_reg(8))
+    .with(px_reg(9))
+    .with(px_reg(18))
+    .with(px_reg(19))
+    .with(px_reg(20))
+    .with(px_reg(21))
+    .with(px_reg(22))
+    .with(px_reg(23))
+    .with(px_reg(24))
+    .with(px_reg(25))
+    .with(px_reg(26))
+    .with(px_reg(27))
+    // F Regs
+    .with(pf_reg(8))
+    .with(pf_reg(18))
+    .with(pf_reg(19))
+    .with(pf_reg(20))
+    .with(pf_reg(21))
+    .with(pf_reg(22))
+    .with(pf_reg(23))
+    .with(pf_reg(24))
+    .with(pf_reg(25))
+    .with(pf_reg(26))
+    .with(pf_reg(27));
+
+fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
+    let mut clobbered_size = 0;
+    for reg in clobbers {
+        match reg.to_reg().class() {
+            RegClass::Int => {
+                clobbered_size += 8;
+            }
+            RegClass::Float => {
+                clobbered_size += 8;
+            }
+            RegClass::Vector => unimplemented!("Vector Size Clobbered"),
+        }
+    }
+    align_to(clobbered_size, 16)
+}
+
+const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty()
+    .with(px_reg(1))
+    .with(px_reg(5))
+    .with(px_reg(6))
+    .with(px_reg(7))
+    .with(px_reg(10))
+    .with(px_reg(11))
+    .with(px_reg(12))
+    .with(px_reg(13))
+    .with(px_reg(14))
+    .with(px_reg(15))
+    .with(px_reg(16))
+    .with(px_reg(17))
+    .with(px_reg(28))
+    .with(px_reg(29))
+    .with(px_reg(30))
+    .with(px_reg(31))
+    // F Regs
+    .with(pf_reg(0))
+    .with(pf_reg(1))
+    .with(pf_reg(2))
+    .with(pf_reg(3))
+    .with(pf_reg(4))
+    .with(pf_reg(5))
+    .with(pf_reg(6))
+    .with(pf_reg(7))
+    .with(pf_reg(9))
+    .with(pf_reg(10))
+    .with(pf_reg(11))
+    .with(pf_reg(12))
+    .with(pf_reg(13))
+    .with(pf_reg(14))
+    .with(pf_reg(15))
+    .with(pf_reg(16))
+    .with(pf_reg(17))
+    .with(pf_reg(28))
+    .with(pf_reg(29))
+    .with(pf_reg(30))
+    .with(pf_reg(31))
+    // V Regs - All vector regs get clobbered
+    .with(pv_reg(0))
+    .with(pv_reg(1))
+    .with(pv_reg(2))
+    .with(pv_reg(3))
+    .with(pv_reg(4))
+    .with(pv_reg(5))
+    .with(pv_reg(6))
+    .with(pv_reg(7))
+    .with(pv_reg(8))
+    .with(pv_reg(9))
+    .with(pv_reg(10))
+    .with(pv_reg(11))
+    .with(pv_reg(12))
+    .with(pv_reg(13))
+    .with(pv_reg(14))
+    .with(pv_reg(15))
+    .with(pv_reg(16))
+    .with(pv_reg(17))
+    .with(pv_reg(18))
+    .with(pv_reg(19))
+    .with(pv_reg(20))
+    .with(pv_reg(21))
+    .with(pv_reg(22))
+    .with(pv_reg(23))
+    .with(pv_reg(24))
+    .with(pv_reg(25))
+    .with(pv_reg(26))
+    .with(pv_reg(27))
+    .with(pv_reg(28))
+    .with(pv_reg(29))
+    .with(pv_reg(30))
+    .with(pv_reg(31));
+
+fn create_reg_enviroment() -> MachineEnv {
+    // Some C Extension instructions can only use a subset of the registers.
+    // x8 - x15, f8 - f15, v8 - v15 so we should prefer to use those since
+    // they allow us to emit C instructions more often.
+    //
+    // In general the order of preference is:
+    //   1. Compressible Caller Saved registers.
+    //   2. Non-Compressible Caller Saved registers.
+    //   3. Compressible Callee Saved registers.
+    //   4. Non-Compressible Callee Saved registers.
+
+    let preferred_regs_by_class: [Vec<PReg>; 3] = {
+        let x_registers: Vec<PReg> = (10..=15).map(px_reg).collect();
+        let f_registers: Vec<PReg> = (10..=15).map(pf_reg).collect();
+        let v_registers: Vec<PReg> = (8..=15).map(pv_reg).collect();
+
+        [x_registers, f_registers, v_registers]
+    };
+
+    let non_preferred_regs_by_class: [Vec<PReg>; 3] = {
+        // x0 - x4 are special registers, so we don't want to use them.
+        // Omit x30 and x31 since they are the spilltmp registers.
+
+        // Start with the Non-Compressible Caller Saved registers.
+        let x_registers: Vec<PReg> = (5..=7)
+            .chain(16..=17)
+            .chain(28..=29)
+            // The first Callee Saved register is x9 since its Compressible
+            // Omit x8 since it's the frame pointer.
+            .chain(9..=9)
+            // The rest of the Callee Saved registers are Non-Compressible
+            .chain(18..=27)
+            .map(px_reg)
+            .collect();
+
+        // Prefer Caller Saved registers.
+        let f_registers: Vec<PReg> = (0..=7)
+            .chain(16..=17)
+            .chain(28..=31)
+            // Once those are exhausted, we should prefer f8 and f9 since they are
+            // callee saved, but compressible.
+            .chain(8..=9)
+            .chain(18..=27)
+            .map(pf_reg)
+            .collect();
+
+        let v_registers = (0..=7).chain(16..=31).map(pv_reg).collect();
+
+        [x_registers, f_registers, v_registers]
+    };
+
+    MachineEnv {
+        preferred_regs_by_class,
+        non_preferred_regs_by_class,
+        fixed_stack_slots: vec![],
+        scratch_by_class: [None, None, None],
+    }
+}
+
+impl Riscv64MachineDeps {
+    fn gen_probestack_unroll(
+        insts: &mut SmallInstVec<Inst>,
+        tmp: Writable<Reg>,
+        guard_size: u32,
+        probe_count: u32,
+    ) {
+        // When manually unrolling adjust the stack pointer and then write a zero
+        // to the stack at that offset.
+        //
+        // We do this because valgrind expects us to never write beyond the stack
+        // pointer and associated redzone.
+        // See: https://github.com/bytecodealliance/wasmtime/issues/7454
+
+        // Store the adjust amount in a register upfront, so we don't have to
+        // reload it for each probe. It's worth loading this as a negative and
+        // using an `add` instruction since we have compressed versions of `add`
+        // but not the `sub` instruction.
+        insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64));
+
+        for _ in 0..probe_count {
+            insts.push(Inst::AluRRR {
+                alu_op: AluOPRRR::Add,
+                rd: writable_stack_reg(),
+                rs1: stack_reg(),
+                rs2: tmp.to_reg(),
+            });
+
+            insts.push(Inst::gen_store(AMode::SPOffset(0), zero_reg(), I32, MemFlags::trusted()));
+        }
+
+        // Restore the stack pointer to its original value
+        insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
+    }
+}
diff --git a/hbcb/src/inst.isle b/hbcb/src/inst.isle
new file mode 100644
index 00000000..f6e45709
--- /dev/null
+++ b/hbcb/src/inst.isle
@@ -0,0 +1,3128 @@
+;; Instruction formats.
+(type MInst
+  (enum
+    ;; A no-op of zero size.
+    (Nop0)
+    (Nop4)
+
+    ;; load immediate
+    (Lui
+      (rd WritableReg)
+      (imm Imm20))
+
+    (LoadInlineConst
+      (rd WritableReg)
+      (ty Type)
+      (imm u64))
+
+     (Auipc
+      (rd WritableReg)
+      (imm Imm20))
+
+    (Fli
+      (ty Type)
+      (imm FliConstant)
+      (rd WritableReg))
+
+    ;; An ALU operation with one register sources and a register destination.
+    (FpuRR
+      (alu_op FpuOPRR)
+      (width FpuOPWidth)
+      (frm FRM)
+      (rd WritableReg)
+      (rs Reg))
+
+
+    ;; An ALU operation with two register sources and a register destination.
+    (AluRRR
+      (alu_op AluOPRRR)
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg))
+
+    ;; An ALU operation with two register sources and a register destination.
+    (FpuRRR
+      (alu_op FpuOPRRR)
+      (width FpuOPWidth)
+      (frm FRM)
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg))
+
+    ;; An ALU operation with three register sources and a register destination.
+    (FpuRRRR
+      (alu_op FpuOPRRRR)
+      (width FpuOPWidth)
+      (frm FRM)
+      (rd WritableReg)
+      (rs1 Reg)
+      (rs2 Reg)
+      (rs3 Reg))
+
+    ;; An ALU operation with a register source and an immediate-12 source, and a register
+    ;; destination.
+    (AluRRImm12
+      (alu_op AluOPRRI)
+      (rd WritableReg)
+      (rs Reg)
+      (imm12 Imm12))
+
+    ;; A CSR Reading or Writing instruction with a register source and a register destination.
+    (CsrReg
+      (op CsrRegOP)
+      (rd WritableReg)
+      (rs Reg)
+      (csr CSR))
+
+    ;; A CSR Writing instruction with an immediate source and a register destination.
+    (CsrImm
+      (op CsrImmOP)
+      (rd WritableReg)
+      (imm UImm5)
+      (csr CSR))
+
+    ;; An load
+    (Load
+      (rd WritableReg)
+      (op LoadOP)
+      (flags MemFlags)
+      (from AMode))
+    ;; An Store
+    (Store
+      (to AMode)
+      (op StoreOP)
+      (flags MemFlags)
+      (src Reg))
+
+    ;; A pseudo-instruction that captures register arguments in vregs.
+    (Args
+      (args VecArgPair))
+
+    ;; A pseudo-instruction that moves vregs to return registers.
+    (Rets
+      (rets VecRetPair))
+
+    (Ret)
+
+     (Extend
+      (rd WritableReg)
+      (rn Reg)
+      (signed bool)
+      (from_bits u8)
+      (to_bits u8))
+
+    (Call (info BoxCallInfo))
+
+      ;; A machine indirect-call instruction.
+    (CallInd (info BoxCallIndInfo))
+
+    ;; A direct return-call macro instruction.
+    (ReturnCall (info BoxReturnCallInfo))
+
+    ;; An indirect return-call macro instruction.
+    (ReturnCallInd (info BoxReturnCallIndInfo))
+
+    ;; Emits a trap with the given trap code if the comparison succeeds
+    (TrapIf
+      (rs1 Reg)
+      (rs2 Reg)
+      (cc IntCC)
+      (trap_code TrapCode))
+
+    (Jal
+      ;; (rd WritableReg) don't use
+      (label MachLabel))
+
+    (CondBr
+      (taken CondBrTarget)
+      (not_taken CondBrTarget)
+      (kind IntegerCompare))
+
+    ;; Load an inline symbol reference.
+    (LoadExtName
+      (rd WritableReg)
+      (name BoxExternalName)
+      (offset i64))
+
+    ;; Load a TLS symbol address
+    (ElfTlsGetAddr
+      (rd WritableReg)
+      (name BoxExternalName))
+
+    ;; Load address referenced by `mem` into `rd`.
+    (LoadAddr
+      (rd WritableReg)
+      (mem AMode))
+
+    ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we
+    ;; keep them separate at the `Inst` level for better pretty-printing
+    ;; and faster `is_move()` logic.
+    (Mov
+      (rd WritableReg)
+      (rm Reg)
+      (ty Type))
+
+    ;; A MOV instruction, but where the source register is a non-allocatable
+    ;; PReg. It's important that the register be non-allocatable, as regalloc2
+    ;; will not see it as used.
+    (MovFromPReg
+      (rd WritableReg)
+      (rm PReg))
+
+    (Fence
+      (pred FenceReq)
+      (succ FenceReq))
+
+    (EBreak)
+
+    ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+    ;; runtime.
+    (Udf
+      (trap_code TrapCode))
+    ;; a jump and link register operation
+    (Jalr
+      ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0.
+      (rd WritableReg)
+      (base Reg)
+      (offset Imm12))
+
+    ;; atomic operations.
+    (Atomic
+      (op AtomicOP)
+      (rd WritableReg)
+      (addr Reg)
+      (src Reg)
+      (amo AMO))
+    ;; an atomic store
+    (AtomicStore
+      (src Reg)
+      (ty Type)
+      (p Reg))
+    ;; an atomic load.
+    (AtomicLoad
+      (rd WritableReg)
+      (ty Type)
+      (p Reg))
+
+    ;; an atomic nand need using loop to implement.
+    (AtomicRmwLoop
+      (offset Reg)
+      (op AtomicRmwOp)
+      (dst WritableReg)
+      (ty Type)
+      (p Reg)
+      (x Reg)
+      (t0 WritableReg))
+
+    ;; select x or y base on condition
+    (Select
+      (dst WritableValueRegs)
+      (condition IntegerCompare)
+      (x ValueRegs)
+      (y ValueRegs))
+
+    (BrTable
+      (index Reg)
+      (tmp1 WritableReg)
+      (tmp2 WritableReg)
+      (targets VecMachLabel))
+
+    ;; atomic compare and set operation
+    (AtomicCas
+      (offset Reg)
+      (t0 WritableReg)
+      (dst WritableReg)
+      (e Reg)
+      (addr Reg)
+      (v Reg)
+      (ty Type))
+
+    (RawData (data VecU8))
+
+    ;; An unwind pseudo-instruction.
+       (Unwind
+        (inst UnwindInst))
+
+    ;; A dummy use, useful to keep a value alive.
+       (DummyUse
+        (reg Reg))
+
+    ;; popcnt  if target doesn't support extension B
+    ;; use iteration to implement.
+    (Popcnt
+      (sum WritableReg)
+      (step WritableReg)
+      (tmp WritableReg)
+      (rs Reg)
+      (ty Type))
+
+    ;;; counting leading or trailing zeros.
+    (Cltz
+      ;; leading or trailing.
+      (leading bool)
+      (sum WritableReg)
+      (step WritableReg)
+      (tmp WritableReg)
+      (rs Reg)
+      (ty Type))
+
+    (Brev8
+      (rs Reg)
+      (ty Type)
+      (step WritableReg)
+      (tmp WritableReg)
+      (tmp2 WritableReg)
+      (rd WritableReg))
+    (StackProbeLoop
+      (guard_size u32)
+      (probe_count u32)
+      (tmp WritableReg))
+
+    (VecAluRRRR
+      (op VecAluOpRRRR)
+      (vd WritableReg)
+      (vd_src Reg)
+      (vs2 Reg)
+      (vs1 Reg)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRRRImm5
+      (op VecAluOpRRRImm5)
+      (vd WritableReg)
+      (vd_src Reg)
+      (vs2 Reg)
+      (imm Imm5)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRRR
+      (op VecAluOpRRR)
+      (vd WritableReg)
+      (vs2 Reg)
+      (vs1 Reg)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRRImm5
+      (op VecAluOpRRImm5)
+      (vd WritableReg)
+      (vs2 Reg)
+      (imm Imm5)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRR
+      (op VecAluOpRR)
+      (vd WritableReg)
+      (vs Reg)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecAluRImm5
+      (op VecAluOpRImm5)
+      (vd WritableReg)
+      (imm Imm5)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecSetState
+      (rd WritableReg)
+      (vstate VState))
+
+    (VecLoad
+      (eew VecElementWidth)
+      (to WritableReg)
+      (from VecAMode)
+      (flags MemFlags)
+      (mask VecOpMasking)
+      (vstate VState))
+
+    (VecStore
+      (eew VecElementWidth)
+      (to VecAMode)
+      (from Reg)
+      (flags MemFlags)
+      (mask VecOpMasking)
+      (vstate VState))
+))
+
+(type AtomicOP (enum
+  (LrW)
+  (ScW)
+  (AmoswapW)
+  (AmoaddW)
+  (AmoxorW)
+  (AmoandW)
+  (AmoorW)
+  (AmominW)
+  (AmomaxW)
+  (AmominuW)
+  (AmomaxuW)
+  (LrD)
+  (ScD)
+  (AmoswapD)
+  (AmoaddD)
+  (AmoxorD)
+  (AmoandD)
+  (AmoorD)
+  (AmominD)
+  (AmomaxD)
+  (AmominuD)
+  (AmomaxuD)
+))
+
+(type FpuOPRRRR (enum
+  (Fmadd)
+  (Fmsub)
+  (Fnmsub)
+  (Fnmadd)
+))
+
+(type FClassResult (enum
+  ;;0 rs1 is −∞.
+  (NegInfinite)
+  ;; 1 rs1 is a negative normal number.
+  (NegNormal)
+  ;; 2 rs1 is a negative subnormal number.
+  (NegSubNormal)
+  ;; 3 rs1 is −0.
+  (NegZero)
+  ;; 4 rs1 is +0.
+  (PosZero)
+  ;; 5 rs1 is a positive subnormal number.
+  (PosSubNormal)
+  ;; 6 rs1 is a positive normal number.
+  (PosNormal)
+  ;; 7 rs1 is +∞.
+  (PosInfinite)
+  ;; 8 rs1 is a signaling NaN.
+  (SNaN)
+  ;; 9 rs1 is a quiet NaN.
+  (QNaN)
+))
+
+(type FliConstant (primitive FliConstant))
+
+(type FpuOPWidth (enum
+  (S)
+  (D)
+  (H)
+  (Q)
+))
+
+(decl pure fpu_op_width_from_ty (Type) FpuOPWidth)
+(extern constructor fpu_op_width_from_ty fpu_op_width_from_ty)
+(convert Type FpuOPWidth fpu_op_width_from_ty)
+
+(type FpuOPRR (enum
+  (Fsqrt) ;; fsqrt.{fmt}
+  (Fclass) ;; fclass.{fmt}
+  (FcvtWFmt) ;; fcvt.w.{fmt}
+  (FcvtWuFmt) ;; fcvt.wu.{fmt}
+  (FcvtLFmt) ;; fcvt.l.{fmt}
+  (FcvtLuFmt) ;; fcvt.lu.{fmt}
+  (FcvtFmtW) ;; fcvt.{fmt}.w
+  (FcvtFmtWu) ;; fcvt.{fmt}.wu
+  (FcvtFmtL) ;; fcvt.{fmt}.l
+  (FcvtFmtLu) ;; fcvt.{fmt}.lu
+  (FmvXFmt) ;; fmv.x.{fmt}
+  (FmvFmtX) ;; fmv.{fmt}.x
+  (FcvtSD) ;; fcvt.s.d
+  (FcvtDS) ;; fcvt.d.s
+
+  ;; Zfa Extension
+  (Fround) ;; fround.{fmt}
+))
+
+(type LoadOP (enum
+  (Lb)
+  (Lh)
+  (Lw)
+  (Lbu)
+  (Lhu)
+  (Lwu)
+  (Ld)
+  (Flh)
+  (Flw)
+  (Fld)
+))
+
+(type StoreOP (enum
+  (Sb)
+  (Sh)
+  (Sw)
+  (Sd)
+  (Fsh)
+  (Fsw)
+  (Fsd)
+))
+
+(type AluOPRRR (enum
+  ;; base set
+  (Add)
+  (Sub)
+  (Sll)
+  (Slt)
+  (SltU)
+  (Sgt)
+  (Sgtu)
+  (Xor)
+  (Srl)
+  (Sra)
+  (Or)
+  (And)
+
+  ;; RV64I Base Instruction Set (in addition to RV32I)
+  (Addw)
+  (Subw)
+  (Sllw)
+  (Srlw)
+  (Sraw)
+
+
+  ;;RV32M Standard Extension
+  (Mul)
+  (Mulh)
+  (Mulhsu)
+  (Mulhu)
+  (Div)
+  (DivU)
+  (Rem)
+  (RemU)
+
+  ;; RV64M Standard Extension (in addition to RV32M)
+  (Mulw)
+  (Divw)
+  (Divuw)
+  (Remw)
+  (Remuw)
+
+  ;; Zba: Address Generation Instructions
+  (Adduw)
+  (Sh1add)
+  (Sh1adduw)
+  (Sh2add)
+  (Sh2adduw)
+  (Sh3add)
+  (Sh3adduw)
+
+  ;; Zbb: Bit Manipulation Instructions
+  (Andn)
+  (Orn)
+  (Xnor)
+  (Max)
+  (Maxu)
+  (Min)
+  (Minu)
+  (Rol)
+  (Rolw)
+  (Ror)
+  (Rorw)
+
+  ;; Zbs: Single-bit instructions
+  (Bclr)
+  (Bext)
+  (Binv)
+  (Bset)
+
+  ;; Zbc: Carry-less multiplication
+  (Clmul)
+  (Clmulh)
+  (Clmulr)
+
+  ;; Zbkb: Bit-manipulation for Cryptography
+  (Pack)
+  (Packw)
+  (Packh)
+
+  ;; ZiCond: Integer Conditional Operations
+  (CzeroEqz)
+  (CzeroNez)
+))
+
+
+(type FpuOPRRR (enum
+  (Fadd)
+  (Fsub)
+  (Fmul)
+  (Fdiv)
+  (Fsgnj)
+  (Fsgnjn)
+  (Fsgnjx)
+  (Fmin)
+  (Fmax)
+  (Feq)
+  (Flt)
+  (Fle)
+
+  ;; Zfa Extension
+  (Fminm)
+  (Fmaxm)
+))
+
+
+
+(type AluOPRRI (enum
+  ;; Base ISA
+  (Addi)
+  (Slti)
+  (SltiU)
+  (Xori)
+  (Ori)
+  (Andi)
+  (Slli)
+  (Srli)
+  (Srai)
+  (Addiw)
+  (Slliw)
+  (SrliW)
+  (Sraiw)
+
+  ;; Zba: Address Generation Instructions
+  (SlliUw)
+
+  ;; Zbb: Bit Manipulation Instructions
+  (Clz)
+  (Clzw)
+  (Ctz)
+  (Ctzw)
+  (Cpop)
+  (Cpopw)
+  (Sextb)
+  (Sexth)
+  (Zexth)
+  (Rori)
+  (Roriw)
+  (Rev8)
+  (Brev8)
+  (Orcb)
+
+  ;; Zbs: Single-bit instructions
+  (Bclri)
+  (Bexti)
+  (Binvi)
+  (Bseti)
+))
+
+(type COpcodeSpace (enum
+  (C0)
+  (C1)
+  (C2)
+))
+
+;; Opcodes for the CR compressed instruction format
+(type CrOp (enum
+  (CMv)
+  (CAdd)
+  (CJr)
+  (CJalr)
+  ;; c.ebreak technically isn't a CR format instruction, but it's encoding
+  ;; lines up with this format.
+  (CEbreak)
+))
+
+;; Opcodes for the CA compressed instruction format
+(type CaOp (enum
+  (CAnd)
+  (COr)
+  (CXor)
+  (CSub)
+  (CAddw)
+  (CSubw)
+  (CMul)
+))
+
+;; Opcodes for the CJ compressed instruction format
+(type CjOp (enum
+  (CJ)
+))
+
+;; Opcodes for the CI compressed instruction format
+(type CiOp (enum
+  (CAddi)
+  (CAddiw)
+  (CAddi16sp)
+  (CSlli)
+  (CLi)
+  (CLui)
+  (CLwsp)
+  (CLdsp)
+  (CFldsp)
+))
+
+;; Opcodes for the CIW compressed instruction format
+(type CiwOp (enum
+  (CAddi4spn)
+))
+
+;; Opcodes for the CB compressed instruction format
+(type CbOp (enum
+  (CSrli)
+  (CSrai)
+  (CAndi)
+))
+
+;; Opcodes for the CSS compressed instruction format
+(type CssOp (enum
+  (CSwsp)
+  (CSdsp)
+  (CFsdsp)
+))
+
+;; Opcodes for the CS compressed instruction format
+(type CsOp (enum
+  (CSw)
+  (CSd)
+  (CFsd)
+))
+
+;; Opcodes for the CL compressed instruction format
+(type ClOp (enum
+  (CLw)
+  (CLd)
+  (CFld)
+))
+
+;; Opcodes for the CSZN compressed instruction format
+(type CsznOp (enum
+  (CNot)
+  (CZextb)
+  (CZexth)
+  (CZextw)
+  (CSextb)
+  (CSexth)
+))
+
+;; This is a mix of all Zcb memory addressing instructions
+;;
+;; Technically they are split across 4 different formats.
+;; But they are all very similar, so we just group them all together.
+(type ZcbMemOp (enum
+  (CLbu)
+  (CLhu)
+  (CLh)
+  (CSb)
+  (CSh)
+))
+
+
+(type CsrRegOP (enum
+  ;; Atomic Read/Write CSR
+  (CsrRW)
+  ;; Atomic Read and Set Bits in CSR
+  (CsrRS)
+  ;; Atomic Read and Clear Bits in CSR
+  (CsrRC)
+))
+
+(type CsrImmOP (enum
+  ;; Atomic Read/Write CSR (Immediate Source)
+  (CsrRWI)
+  ;; Atomic Read and Set Bits in CSR (Immediate Source)
+  (CsrRSI)
+  ;; Atomic Read and Clear Bits in CSR (Immediate Source)
+  (CsrRCI)
+))
+
+;; Enum of the known CSR registers
+(type CSR (enum
+  ;; Floating-Point Dynamic Rounding Mode
+  (Frm)
+))
+
+
+(type FRM (enum
+  ;; Round to Nearest, ties to Even
+  (RNE)
+  ;; Round towards Zero
+  (RTZ)
+  ;;  Round Down (towards −∞)
+  (RDN)
+  ;; Round Up (towards +∞)
+  (RUP)
+  ;; Round to Nearest, ties to Max Magnitude
+  (RMM)
+  ;; In instruction’s rm field, selects dynamic rounding mode;
+  ;;In Rounding Mode register, Invalid.
+  (Fcsr)
+))
+
+(decl pure frm_bits (FRM) UImm5)
+(extern constructor frm_bits frm_bits)
+(convert FRM UImm5 frm_bits)
+
+(type FFlagsException (enum
+  ;; Invalid Operation
+  (NV)
+  ;; Divide by Zero
+  (DZ)
+  ;; Overflow
+  (OF)
+  ;; Underflow
+  (UF)
+  ;; Inexact
+  (NX)
+))
+
+;;;; input output read write
+;;;; SI SO SR SW
+;;;; PI PO PR PW
+;;;; lowest four bit are used.
+(type FenceReq (primitive u8))
+
+(type BoxCallInfo (primitive BoxCallInfo))
+(type BoxCallIndInfo (primitive BoxCallIndInfo))
+(type BoxReturnCallInfo (primitive BoxReturnCallInfo))
+(type BoxReturnCallIndInfo (primitive BoxReturnCallIndInfo))
+(type IntegerCompare (primitive IntegerCompare))
+(type AMode (primitive AMode))
+(type OptionReg (primitive OptionReg))
+(type OptionImm12 (primitive OptionImm12))
+(type OptionUimm5 (primitive OptionUimm5))
+(type Imm12 (primitive Imm12))
+(type UImm5 (primitive UImm5))
+(type Imm5 (primitive Imm5))
+(type Imm20 (primitive Imm20))
+(type Imm3 (primitive Imm3))
+(type CondBrTarget (primitive CondBrTarget))
+(type VecU8 (primitive VecU8))
+(type AMO (primitive AMO))
+(type VecMachLabel extern (enum))
+
+
+;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type XReg (primitive XReg))
+(type WritableXReg (primitive WritableXReg))
+(type FReg (primitive FReg))
+(type WritableFReg (primitive WritableFReg))
+(type VReg (primitive VReg))
+(type WritableVReg (primitive WritableVReg))
+
+;; Construct a new `XReg` from a `Reg`.
+;;
+;; Asserts that the register has a Integer RegClass.
+(decl xreg_new (Reg) XReg)
+(extern constructor xreg_new xreg_new)
+(convert Reg XReg xreg_new)
+
+;; Construct a new `WritableXReg` from a `WritableReg`.
+;;
+;; Asserts that the register has a Integer RegClass.
+(decl writable_xreg_new (WritableReg) WritableXReg)
+(extern constructor writable_xreg_new writable_xreg_new)
+(convert WritableReg WritableXReg writable_xreg_new)
+
+;; Put a value into a XReg.
+;;
+;; Asserts that the value goes into a XReg.
+(decl put_in_xreg (Value) XReg)
+(rule (put_in_xreg val) (xreg_new (put_in_reg val)))
+(convert Value XReg put_in_xreg)
+
+;; Construct an `InstOutput` out of a single XReg register.
+(decl output_xreg (XReg) InstOutput)
+(rule (output_xreg x) (output_reg x))
+(convert XReg InstOutput output_xreg)
+
+;; Convert a `WritableXReg` to an `XReg`.
+(decl pure writable_xreg_to_xreg (WritableXReg) XReg)
+(extern constructor writable_xreg_to_xreg writable_xreg_to_xreg)
+(convert WritableXReg XReg writable_xreg_to_xreg)
+
+;; Convert a `WritableXReg` to an `WritableReg`.
+(decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg)
+(extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg)
+(convert WritableXReg WritableReg writable_xreg_to_writable_reg)
+
+;; Convert a `WritableXReg` to an `Reg`.
+(decl pure writable_xreg_to_reg (WritableXReg) Reg)
+(rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x))
+(convert WritableXReg Reg writable_xreg_to_reg)
+
+;; Convert an `XReg` to a `Reg`.
+(decl pure xreg_to_reg (XReg) Reg)
+(extern constructor xreg_to_reg xreg_to_reg)
+(convert XReg Reg xreg_to_reg)
+
+;; Convert a `XReg` to a `ValueRegs`.
+(decl xreg_to_value_regs (XReg) ValueRegs)
+(rule (xreg_to_value_regs x) (value_reg x))
+(convert XReg ValueRegs xreg_to_reg)
+
+;; Convert a `WritableXReg` to a `ValueRegs`.
+(decl writable_xreg_to_value_regs (WritableXReg) ValueRegs)
+(rule (writable_xreg_to_value_regs x) (value_reg x))
+(convert WritableXReg ValueRegs writable_xreg_to_value_regs)
+
+;; Allocates a new `WritableXReg`.
+(decl temp_writable_xreg () WritableXReg)
+(rule (temp_writable_xreg) (temp_writable_reg $I64))
+
+
+;; Construct a new `FReg` from a `Reg`.
+;;
+;; Asserts that the register has a Float RegClass.
+(decl freg_new (Reg) FReg)
+(extern constructor freg_new freg_new)
+(convert Reg FReg freg_new)
+
+;; Construct a new `WritableFReg` from a `WritableReg`.
+;;
+;; Asserts that the register has a Float RegClass.
+(decl writable_freg_new (WritableReg) WritableFReg)
+(extern constructor writable_freg_new writable_freg_new)
+(convert WritableReg WritableFReg writable_freg_new)
+
+;; Put a value into a FReg.
+;;
+;; Asserts that the value goes into a FReg.
+(decl put_in_freg (Value) FReg)
+(rule (put_in_freg val) (freg_new (put_in_reg val)))
+(convert Value FReg put_in_freg)
+
+;; Construct an `InstOutput` out of a single FReg register.
+(decl output_freg (FReg) InstOutput)
+(rule (output_freg x) (output_reg x))
+(convert FReg InstOutput output_freg)
+
+;; Convert a `WritableFReg` to an `FReg`.
+(decl pure writable_freg_to_freg (WritableFReg) FReg)
+(extern constructor writable_freg_to_freg writable_freg_to_freg)
+(convert WritableFReg FReg writable_freg_to_freg)
+
+;; Convert a `WritableFReg` to an `WritableReg`.
+(decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg)
+(extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg)
+(convert WritableFReg WritableReg writable_freg_to_writable_reg)
+
+;; Convert a `WritableFReg` to an `Reg`.
+(decl pure writable_freg_to_reg (WritableFReg) Reg)
+(rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x))
+(convert WritableFReg Reg writable_freg_to_reg)
+
+;; Convert an `FReg` to a `Reg`.
+(decl pure freg_to_reg (FReg) Reg)
+(extern constructor freg_to_reg freg_to_reg)
+(convert FReg Reg freg_to_reg)
+
+;; Convert a `FReg` to a `ValueRegs`.
+(decl freg_to_value_regs (FReg) ValueRegs)
+(rule (freg_to_value_regs x) (value_reg x))
+(convert FReg ValueRegs xreg_to_reg)
+
+;; Convert a `WritableFReg` to a `ValueRegs`.
+(decl writable_freg_to_value_regs (WritableFReg) ValueRegs)
+(rule (writable_freg_to_value_regs x) (value_reg x))
+(convert WritableFReg ValueRegs writable_freg_to_value_regs)
+
+;; Allocates a new `WritableFReg`.
+(decl temp_writable_freg () WritableFReg)
+(rule (temp_writable_freg) (temp_writable_reg $F64))
+
+
+
+;; Construct a new `VReg` from a `Reg`.
+;;
+;; Asserts that the register has a Vector RegClass.
+(decl vreg_new (Reg) VReg)
+(extern constructor vreg_new vreg_new)
+(convert Reg VReg vreg_new)
+
+;; Construct a new `WritableVReg` from a `WritableReg`.
+;;
+;; Asserts that the register has a Vector RegClass.
+(decl writable_vreg_new (WritableReg) WritableVReg)
+(extern constructor writable_vreg_new writable_vreg_new)
+(convert WritableReg WritableVReg writable_vreg_new)
+
+;; Put a value into a VReg.
+;;
+;; Asserts that the value goes into a VReg.
+(decl put_in_vreg (Value) VReg)
+(rule (put_in_vreg val) (vreg_new (put_in_reg val)))
+(convert Value VReg put_in_vreg)
+
+;; Construct an `InstOutput` out of a single VReg register.
+(decl output_vreg (VReg) InstOutput)
+(rule (output_vreg x) (output_reg x))
+(convert VReg InstOutput output_vreg)
+
+;; Convert a `WritableVReg` to an `VReg`.
+(decl pure writable_vreg_to_vreg (WritableVReg) VReg)
+(extern constructor writable_vreg_to_vreg writable_vreg_to_vreg)
+(convert WritableVReg VReg writable_vreg_to_vreg)
+
+;; Convert a `WritableVReg` to an `WritableReg`.
+(decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg)
+(extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg)
+(convert WritableVReg WritableReg writable_vreg_to_writable_reg)
+
+;; Convert a `WritableVReg` to an `Reg`.
+(decl pure writable_vreg_to_reg (WritableVReg) Reg)
+(rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x))
+(convert WritableVReg Reg writable_vreg_to_reg)
+
+;; Convert an `VReg` to a `Reg`.
+(decl pure vreg_to_reg (VReg) Reg)
+(extern constructor vreg_to_reg vreg_to_reg)
+(convert VReg Reg vreg_to_reg)
+
+;; Convert a `VReg` to a `ValueRegs`.
+(decl vreg_to_value_regs (VReg) ValueRegs)
+(rule (vreg_to_value_regs x) (value_reg x))
+(convert VReg ValueRegs xreg_to_reg)
+
+;; Convert a `WritableVReg` to a `ValueRegs`.
+(decl writable_vreg_to_value_regs (WritableVReg) ValueRegs)
+(rule (writable_vreg_to_value_regs x) (value_reg x))
+(convert WritableVReg ValueRegs writable_vreg_to_value_regs)
+
+;; Allocates a new `WritableVReg`.
+(decl temp_writable_vreg () WritableVReg)
+(rule (temp_writable_vreg) (temp_writable_reg $I8X16))
+
+
+;; Converters
+
+(convert u8 i32 u8_as_i32)
+(decl u8_as_i32 (u8) i32)
+(extern constructor u8_as_i32 u8_as_i32)
+
+;; ISA Extension helpers
+
+(decl pure has_m () bool)
+(extern constructor has_m has_m)
+
+(decl pure has_v () bool)
+(extern constructor has_v has_v)
+
+(decl pure has_zfa () bool)
+(extern constructor has_zfa has_zfa)
+
+(decl pure has_zfh () bool)
+(extern constructor has_zfh has_zfh)
+
+(decl pure has_zbkb () bool)
+(extern constructor has_zbkb has_zbkb)
+
+(decl pure has_zba () bool)
+(extern constructor has_zba has_zba)
+
+(decl pure has_zbb () bool)
+(extern constructor has_zbb has_zbb)
+
+(decl pure has_zbc () bool)
+(extern constructor has_zbc has_zbc)
+
+(decl pure has_zbs () bool)
+(extern constructor has_zbs has_zbs)
+
+(decl pure has_zicond () bool)
+(extern constructor has_zicond has_zicond)
+
+
+;;;; Type Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Helper that matches any supported type. This extractor checks the ISA flags
+;; to determine if the type is supported.
+(decl ty_supported (Type) Type)
+(extern extractor ty_supported ty_supported)
+
+;; Helper that matches any scalar floating point type
+(decl ty_supported_float (Type) Type)
+(extern extractor ty_supported_float ty_supported_float)
+
+;; Helper that matches any supported vector type
+(decl ty_supported_vec (Type) Type)
+(extern extractor ty_supported_vec ty_supported_vec)
+
+
+;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; RV32I Base Integer Instruction Set
+
+;; Helper for emitting the `add` instruction.
+;; rd ← rs1 + rs2
+(decl rv_add (XReg XReg) XReg)
+(rule (rv_add rs1 rs2)
+  (alu_rrr (AluOPRRR.Add) rs1 rs2))
+
+;; Helper for emitting the `addi` ("Add Immediate") instruction.
+;; rd ← rs1 + sext(imm)
+(decl rv_addi (XReg Imm12) XReg)
+(rule (rv_addi rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm))
+
+;; Helper for emitting the `sub` instruction.
+;; rd ← rs1 - rs2
+(decl rv_sub (XReg XReg) XReg)
+(rule (rv_sub rs1 rs2)
+  (alu_rrr (AluOPRRR.Sub) rs1 rs2))
+
+;; Helper for emitting the `neg` instruction.
+;; This instruction is a mnemonic for `sub rd, zero, rs1`.
+(decl rv_neg (XReg) XReg)
+(rule (rv_neg rs1)
+  (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1))
+
+;; Helper for emitting the `sll` ("Shift Left Logical") instruction.
+;; rd ← rs1 << rs2
+(decl rv_sll (XReg XReg) XReg)
+(rule (rv_sll rs1 rs2)
+  (alu_rrr (AluOPRRR.Sll) rs1 rs2))
+
+;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction.
+;; rd ← rs1 << uext(imm)
+(decl rv_slli (XReg Imm12) XReg)
+(rule (rv_slli rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm))
+
+;; Helper for emitting the `srl` ("Shift Right Logical") instruction.
+;; rd ← rs1 >> rs2
+(decl rv_srl (XReg XReg) XReg)
+(rule (rv_srl rs1 rs2)
+  (alu_rrr (AluOPRRR.Srl) rs1 rs2))
+
+;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction.
+;; rd ← rs1 >> uext(imm)
+(decl rv_srli (XReg Imm12) XReg)
+(rule (rv_srli rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm))
+
+;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction.
+;; rd ← rs1 >> rs2
+(decl rv_sra (XReg XReg) XReg)
+(rule (rv_sra rs1 rs2)
+  (alu_rrr (AluOPRRR.Sra) rs1 rs2))
+
+;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction.
+;; rd ← rs1 >> uext(imm)
+(decl rv_srai (XReg Imm12) XReg)
+(rule (rv_srai rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm))
+
+;; Helper for emitting the `or` instruction.
+;; rd ← rs1 ∨ rs2
+(decl rv_or (XReg XReg) XReg)
+(rule (rv_or rs1 rs2)
+  (alu_rrr (AluOPRRR.Or) rs1 rs2))
+
+;; Helper for emitting the `ori` ("Or Immediate") instruction.
+;; rd ← rs1 ∨ uext(imm)
+(decl rv_ori (XReg Imm12) XReg)
+(rule (rv_ori rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm))
+
+;; Helper for emitting the `xor` instruction.
+;; rd ← rs1 ⊕ rs2
+(decl rv_xor (XReg XReg) XReg)
+(rule (rv_xor rs1 rs2)
+  (alu_rrr (AluOPRRR.Xor) rs1 rs2))
+
+;; Helper for emitting the `xori` ("Exclusive Or Immediate") instruction.
+;; rd ← rs1 ⊕ uext(imm)
+(decl rv_xori (XReg Imm12) XReg)
+(rule (rv_xori rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm))
+
+;; Helper for emitting the `not` instruction.
+;; This instruction is a mnemonic for `xori rd, rs1, -1`.
+(decl rv_not (XReg) XReg)
+(rule (rv_not rs1)
+  (rv_xori rs1 (imm12_const -1)))
+
+;; Helper for emitting the `and` instruction.
+;; rd ← rs1 ∧ rs2
+(decl rv_and (XReg XReg) XReg)
+(rule (rv_and rs1 rs2)
+  (alu_rrr (AluOPRRR.And) rs1 rs2))
+
+;; Helper for emitting the `andi` ("And Immediate") instruction.
+;; rd ← rs1 ∧ uext(imm)
+(decl rv_andi (XReg Imm12) XReg)
+(rule (rv_andi rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm))
+
+;; Helper for emitting the `slt` ("Set Less Than") instruction.
+;; rd ← rs1 < rs2
+(decl rv_slt (XReg XReg) XReg)
+(rule (rv_slt rs1 rs2)
+  (alu_rrr (AluOPRRR.Slt) rs1 rs2))
+
+;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction.
+;; rd ← rs1 < rs2
+(decl rv_sltu (XReg XReg) XReg)
+(rule (rv_sltu rs1 rs2)
+  (alu_rrr (AluOPRRR.SltU) rs1 rs2))
+
+;; Helper for emitting the `snez` instruction.
+;; This instruction is a mnemonic for `sltu rd, zero, rs`.
+(decl rv_snez (XReg) XReg)
+(rule (rv_snez rs1)
+  (rv_sltu (zero_reg) rs1))
+
+;; Helper for emitting the `slti` ("Set Less Than Immediate") instruction.
+;; rd ← rs1 < imm
+(decl rv_slti (XReg Imm12) XReg)
+(rule (rv_slti rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Slti) rs1 imm))
+
+;; Helper for emitting the `sltiu` ("Set Less Than Immediate Unsigned") instruction.
+;; rd ← rs1 < imm
+(decl rv_sltiu (XReg Imm12) XReg)
+(rule (rv_sltiu rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm))
+
+;; Helper for emitting the `seqz` instruction.
+;; This instruction is a mnemonic for `sltiu rd, rs, 1`.
+(decl rv_seqz (XReg) XReg)
+(rule (rv_seqz rs1)
+  (rv_sltiu rs1 (imm12_const 1)))
+
+
+;; RV64I Base Integer Instruction Set
+;; Unlike RV32I instructions these are only present in the 64bit ISA
+
+;; Helper for emitting the `addw` ("Add Word") instruction.
+;; rd ← sext32(rs1) + sext32(rs2)
+(decl rv_addw (XReg XReg) XReg)
+(rule (rv_addw rs1 rs2)
+  (alu_rrr (AluOPRRR.Addw) rs1 rs2))
+
+;; Helper for emitting the `addiw` ("Add Word Immediate") instruction.
+;; rd ← sext32(rs1) + imm
+(decl rv_addiw (XReg Imm12) XReg)
+(rule (rv_addiw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm))
+
+;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction.
+;; This instruction is a mnemonic for `addiw rd, rs, zero`.
+(decl rv_sextw (XReg) XReg)
+(rule (rv_sextw rs1)
+  (rv_addiw rs1 (imm12_const 0)))
+
+;; Helper for emitting the `subw` ("Subtract Word") instruction.
+;; rd ← sext32(rs1) - sext32(rs2)
+(decl rv_subw (XReg XReg) XReg)
+(rule (rv_subw rs1 rs2)
+  (alu_rrr (AluOPRRR.Subw) rs1 rs2))
+
+;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction.
+;; rd ← sext32(uext32(rs1) << rs2)
+(decl rv_sllw (XReg XReg) XReg)
+(rule (rv_sllw rs1 rs2)
+  (alu_rrr (AluOPRRR.Sllw) rs1 rs2))
+
+;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction.
+;; rd ← sext32(uext32(rs1) << imm)
+(decl rv_slliw (XReg Imm12) XReg)
+(rule (rv_slliw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm))
+
+;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction.
+;; rd ← sext32(uext32(rs1) >> rs2)
+(decl rv_srlw (XReg XReg) XReg)
+(rule (rv_srlw rs1 rs2)
+  (alu_rrr (AluOPRRR.Srlw) rs1 rs2))
+
+;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction.
+;; rd ← sext32(uext32(rs1) >> imm)
+(decl rv_srliw (XReg Imm12) XReg)
+(rule (rv_srliw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm))
+
+;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction.
+;; rd ← sext32(rs1 >> rs2)
+(decl rv_sraw (XReg XReg) XReg)
+(rule (rv_sraw rs1 rs2)
+  (alu_rrr (AluOPRRR.Sraw) rs1 rs2))
+
+;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction.
+;; rd ← sext32(rs1 >> imm)
+(decl rv_sraiw (XReg Imm12) XReg)
+(rule (rv_sraiw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm))
+
+
+;; RV32M Extension
+;; TODO: Enable these instructions only when we have the M extension
+
+;; Helper for emitting the `mul` instruction.
+;; rd ← rs1 × rs2
+(decl rv_mul (XReg XReg) XReg)
+(rule (rv_mul rs1 rs2)
+  (alu_rrr (AluOPRRR.Mul) rs1 rs2))
+
+;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction.
+;; rd ← (sext(rs1) × sext(rs2)) » xlen
+(decl rv_mulh (XReg XReg) XReg)
+(rule (rv_mulh rs1 rs2)
+  (alu_rrr (AluOPRRR.Mulh) rs1 rs2))
+
+;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction.
+;; rd ← (uext(rs1) × uext(rs2)) » xlen
+(decl rv_mulhu (XReg XReg) XReg)
+(rule (rv_mulhu rs1 rs2)
+  (alu_rrr (AluOPRRR.Mulhu) rs1 rs2))
+
+;; Helper for emitting the `div` instruction.
+;; rd ← rs1 ÷ rs2
+(decl rv_div (XReg XReg) XReg)
+(rule (rv_div rs1 rs2)
+  (alu_rrr (AluOPRRR.Div) rs1 rs2))
+
+;; Helper for emitting the `divu` ("Divide Unsigned") instruction.
+;; rd ← rs1 ÷ rs2
+(decl rv_divu (XReg XReg) XReg)
+(rule (rv_divu rs1 rs2)
+  (alu_rrr (AluOPRRR.DivU) rs1 rs2))
+
+;; Helper for emitting the `rem` instruction.
+;; rd ← rs1 mod rs2
+(decl rv_rem (XReg XReg) XReg)
+(rule (rv_rem rs1 rs2)
+  (alu_rrr (AluOPRRR.Rem) rs1 rs2))
+
+;; Helper for emitting the `remu` ("Remainder Unsigned") instruction.
+;; rd ← rs1 mod rs2
+(decl rv_remu (XReg XReg) XReg)
+(rule (rv_remu rs1 rs2)
+  (alu_rrr (AluOPRRR.RemU) rs1 rs2))
+
+;; RV64M Extension
+;; TODO: Enable these instructions only when we have the M extension
+
+;; Helper for emitting the `mulw` ("Multiply Word") instruction.
+;; rd ← uext32(rs1) × uext32(rs2)
+(decl rv_mulw (XReg XReg) XReg)
+(rule (rv_mulw rs1 rs2)
+  (alu_rrr (AluOPRRR.Mulw) rs1 rs2))
+
+;; Helper for emitting the `divw` ("Divide Word") instruction.
+;; rd ← sext32(rs1) ÷ sext32(rs2)
+(decl rv_divw (XReg XReg) XReg)
+(rule (rv_divw rs1 rs2)
+  (alu_rrr (AluOPRRR.Divw) rs1 rs2))
+
+;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction.
+;; rd ← uext32(rs1) ÷ uext32(rs2)
+(decl rv_divuw (XReg XReg) XReg)
+(rule (rv_divuw rs1 rs2)
+  (alu_rrr (AluOPRRR.Divuw) rs1 rs2))
+
+;; Helper for emitting the `remw` ("Remainder Word") instruction.
+;; rd ← sext32(rs1) mod sext32(rs2)
+(decl rv_remw (XReg XReg) XReg)
+(rule (rv_remw rs1 rs2)
+  (alu_rrr (AluOPRRR.Remw) rs1 rs2))
+
+;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction.
+;; rd ← uext32(rs1) mod uext32(rs2)
+(decl rv_remuw (XReg XReg) XReg)
+(rule (rv_remuw rs1 rs2)
+  (alu_rrr (AluOPRRR.Remuw) rs1 rs2))
+
+
+;; F and D Extensions
+;; TODO: Enable these instructions only when we have the F or D extensions
+
+;; Helper for emitting the `fadd` instruction.
+(decl rv_fadd (Type FRM FReg FReg) FReg)
+(rule (rv_fadd ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fadd) ty frm rs1 rs2))
+
+;; Helper for emitting the `fsub` instruction.
+(decl rv_fsub (Type FRM FReg FReg) FReg)
+(rule (rv_fsub ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fsub) ty frm rs1 rs2))
+
+;; Helper for emitting the `fmul` instruction.
+(decl rv_fmul (Type FRM FReg FReg) FReg)
+(rule (rv_fmul ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fmul) ty frm rs1 rs2))
+
+;; Helper for emitting the `fdiv` instruction.
+(decl rv_fdiv (Type FRM FReg FReg) FReg)
+(rule (rv_fdiv ty frm rs1 rs2) (fpu_rrr (FpuOPRRR.Fdiv) ty frm rs1 rs2))
+
+;; Helper for emitting the `fsqrt` instruction.
+(decl rv_fsqrt (Type FRM FReg) FReg)
+(rule (rv_fsqrt ty frm rs1) (fpu_rr (FpuOPRR.Fsqrt) ty frm rs1))
+
+;; Helper for emitting the `fmadd` instruction.
+(decl rv_fmadd (Type FRM FReg FReg FReg) FReg)
+(rule (rv_fmadd ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fmadd) ty frm rs1 rs2 rs3))
+
+;; Helper for emitting the `fmsub` instruction.
+(decl rv_fmsub (Type FRM FReg FReg FReg) FReg)
+(rule (rv_fmsub ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fmsub) ty frm rs1 rs2 rs3))
+
+;; Helper for emitting the `fnmadd` instruction.
+(decl rv_fnmadd (Type FRM FReg FReg FReg) FReg)
+(rule (rv_fnmadd ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fnmadd) ty frm rs1 rs2 rs3))
+
+;; Helper for emitting the `fnmsub` instruction.
+(decl rv_fnmsub (Type FRM FReg FReg FReg) FReg)
+(rule (rv_fnmsub ty frm rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.Fnmsub) ty frm rs1 rs2 rs3))
+
+;; Helper for emitting the `fmv.x.h` instruction.
+(decl rv_fmvxh (FReg) XReg)
+(rule (rv_fmvxh r) (fpu_rr_int (FpuOPRR.FmvXFmt) $F16 (FRM.RNE) r))
+
+;; Helper for emitting the `fmv.x.w` instruction.
+(decl rv_fmvxw (FReg) XReg)
+(rule (rv_fmvxw r) (fpu_rr_int (FpuOPRR.FmvXFmt) $F32 (FRM.RNE) r))
+
+;; Helper for emitting the `fmv.x.d` instruction.
+(decl rv_fmvxd (FReg) XReg)
+(rule (rv_fmvxd r) (fpu_rr_int (FpuOPRR.FmvXFmt) $F64 (FRM.RNE) r))
+
+;; Helper for emitting the `fmv.h.x` instruction.
+(decl rv_fmvhx (XReg) FReg)
+(rule (rv_fmvhx r) (fpu_rr (FpuOPRR.FmvFmtX) $F16 (FRM.RNE) r))
+
+;; Helper for emitting the `fmv.w.x` instruction.
+(decl rv_fmvwx (XReg) FReg)
+(rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvFmtX) $F32 (FRM.RNE) r))
+
+;; Helper for emitting the `fmv.d.x` instruction.
+(decl rv_fmvdx (XReg) FReg)
+(rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvFmtX) $F64 (FRM.RNE) r))
+
+;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction.
+(decl rv_fcvtds (FReg) FReg)
+(rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F64 (FRM.RNE) rs1))
+
+;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction.
+(decl rv_fcvtsd (FRM FReg) FReg)
+(rule (rv_fcvtsd frm rs1) (fpu_rr (FpuOPRR.FcvtSD) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.s.w` instruction.
+(decl rv_fcvtsw (FRM XReg) FReg)
+(rule (rv_fcvtsw frm rs1) (fpu_rr (FpuOPRR.FcvtFmtW) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.s.wu` instruction.
+(decl rv_fcvtswu (FRM XReg) FReg)
+(rule (rv_fcvtswu frm rs1) (fpu_rr (FpuOPRR.FcvtFmtWu) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.d.w` instruction.
+(decl rv_fcvtdw (XReg) FReg)
+(rule (rv_fcvtdw rs1) (fpu_rr (FpuOPRR.FcvtFmtW) $F64 (FRM.RNE) rs1))
+
+;; Helper for emitting the `fcvt.d.wu` instruction.
+(decl rv_fcvtdwu (XReg) FReg)
+(rule (rv_fcvtdwu rs1) (fpu_rr (FpuOPRR.FcvtFmtWu) $F64 (FRM.RNE) rs1))
+
+;; Helper for emitting the `fcvt.s.l` instruction.
+(decl rv_fcvtsl (FRM XReg) FReg)
+(rule (rv_fcvtsl frm rs1) (fpu_rr (FpuOPRR.FcvtFmtL) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.s.lu` instruction.
+(decl rv_fcvtslu (FRM XReg) FReg)
+(rule (rv_fcvtslu frm rs1) (fpu_rr (FpuOPRR.FcvtFmtLu) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.d.l` instruction.
+(decl rv_fcvtdl (FRM XReg) FReg)
+(rule (rv_fcvtdl frm rs1) (fpu_rr (FpuOPRR.FcvtFmtL) $F64 frm rs1))
+
+;; Helper for emitting the `fcvt.d.lu` instruction.
+(decl rv_fcvtdlu (FRM XReg) FReg)
+(rule (rv_fcvtdlu frm rs1) (fpu_rr (FpuOPRR.FcvtFmtLu) $F64 frm rs1))
+
+;; Helper for emitting the `fcvt.w.s` instruction.
+(decl rv_fcvtws (FRM FReg) XReg)
+(rule (rv_fcvtws frm rs1) (fpu_rr_int (FpuOPRR.FcvtWFmt) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.l.s` instruction.
+(decl rv_fcvtls (FRM FReg) XReg)
+(rule (rv_fcvtls frm rs1) (fpu_rr_int (FpuOPRR.FcvtLFmt) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.wu.s` instruction.
+(decl rv_fcvtwus (FRM FReg) XReg)
+(rule (rv_fcvtwus frm rs1) (fpu_rr_int (FpuOPRR.FcvtWuFmt) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.lu.s` instruction.
+(decl rv_fcvtlus (FRM FReg) XReg)
+(rule (rv_fcvtlus frm rs1) (fpu_rr_int (FpuOPRR.FcvtLuFmt) $F32 frm rs1))
+
+;; Helper for emitting the `fcvt.w.d` instruction.
+(decl rv_fcvtwd (FRM FReg) XReg)
+(rule (rv_fcvtwd frm rs1) (fpu_rr_int (FpuOPRR.FcvtWFmt) $F64 frm rs1))
+
+;; Helper for emitting the `fcvt.l.d` instruction.
+(decl rv_fcvtld (FRM FReg) XReg)
+(rule (rv_fcvtld frm rs1) (fpu_rr_int (FpuOPRR.FcvtLFmt) $F64 frm rs1))
+
+;; Helper for emitting the `fcvt.wu.d` instruction.
+(decl rv_fcvtwud (FRM FReg) XReg)
+(rule (rv_fcvtwud frm rs1) (fpu_rr_int (FpuOPRR.FcvtWuFmt) $F64 frm rs1))
+
+;; Helper for emitting the `fcvt.lu.d` instruction.
+(decl rv_fcvtlud (FRM FReg) XReg)
+(rule (rv_fcvtlud frm rs1) (fpu_rr_int (FpuOPRR.FcvtLuFmt) $F64 frm rs1))
+
+;; Helper for emitting the `fcvt.w.*` instructions.
+(decl rv_fcvtw (Type FRM FReg) XReg)
+(rule (rv_fcvtw $F32 frm rs1) (rv_fcvtws frm rs1))
+(rule (rv_fcvtw $F64 frm rs1) (rv_fcvtwd frm rs1))
+
+;; Helper for emitting the `fcvt.l.*` instructions.
+(decl rv_fcvtl (Type FRM FReg) XReg)
+(rule (rv_fcvtl $F32 frm rs1) (rv_fcvtls frm rs1))
+(rule (rv_fcvtl $F64 frm rs1) (rv_fcvtld frm rs1))
+
+;; Helper for emitting the `fcvt.wu.*` instructions.
+(decl rv_fcvtwu (Type FRM FReg) XReg)
+(rule (rv_fcvtwu $F32 frm rs1) (rv_fcvtwus frm rs1))
+(rule (rv_fcvtwu $F64 frm rs1) (rv_fcvtwud frm rs1))
+
+;; Helper for emitting the `fcvt.lu.*` instructions.
+(decl rv_fcvtlu (Type FRM FReg) XReg)
+(rule (rv_fcvtlu $F32 frm rs1) (rv_fcvtlus frm rs1))
+(rule (rv_fcvtlu $F64 frm rs1) (rv_fcvtlud frm rs1))
+
+;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction.
+;; The output of this instruction is `rs1` with the sign bit from `rs2`
+;; This implements the `copysign` operation
+(decl rv_fsgnj (Type FReg FReg) FReg)
+(rule (rv_fsgnj ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fsgnj) ty (FRM.RNE) rs1 rs2))
+
+;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction.
+;; The output of this instruction is `rs1` with the negated sign bit from `rs2`
+;; When `rs1 == rs2` this implements the `neg` operation
+(decl rv_fsgnjn (Type FReg FReg) FReg)
+(rule (rv_fsgnjn ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fsgnjn) ty (FRM.RTZ) rs1 rs2))
+
+;; Helper for emitting the `fneg` ("Floating Point Negate") instruction.
+;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1`
+(decl rv_fneg (Type FReg) FReg)
+(rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1))
+
+;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction.
+;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`.
+;; When `rs1 == rs2` this implements `fabs`
+(decl rv_fsgnjx (Type FReg FReg) FReg)
+(rule (rv_fsgnjx ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fsgnjx) ty (FRM.RDN) rs1 rs2))
+
+;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction.
+;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1`
+(decl rv_fabs (Type FReg) FReg)
+(rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1))
+
+;; Helper for emitting the `feq` ("Float Equal") instruction.
+(decl rv_feq (Type FReg FReg) XReg)
+(rule (rv_feq ty rs1 rs2) (fpu_rrr_int (FpuOPRRR.Feq) ty (FRM.RDN) rs1 rs2))
+
+;; Helper for emitting the `flt` ("Float Less Than") instruction.
+(decl rv_flt (Type FReg FReg) XReg)
+(rule (rv_flt ty rs1 rs2) (fpu_rrr_int (FpuOPRRR.Flt) ty (FRM.RTZ) rs1 rs2))
+
+;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction.
+(decl rv_fle (Type FReg FReg) XReg)
+(rule (rv_fle ty rs1 rs2) (fpu_rrr_int (FpuOPRRR.Fle) ty (FRM.RNE) rs1 rs2))
+
+;; Helper for emitting the `fgt` ("Float Greater Than") instruction.
+;; Note: The arguments are reversed
+(decl rv_fgt (Type FReg FReg) XReg)
+(rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1))
+
+;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction.
+;; Note: The arguments are reversed
+(decl rv_fge (Type FReg FReg) XReg)
+(rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1))
+
+;; Helper for emitting the `fmin` instruction.
+(decl rv_fmin (Type FReg FReg) FReg)
+(rule (rv_fmin ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fmin) ty (FRM.RNE) rs1 rs2))
+
+;; Helper for emitting the `fmax` instruction.
+(decl rv_fmax (Type FReg FReg) FReg)
+(rule (rv_fmax ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fmax) ty (FRM.RTZ) rs1 rs2))
+
+;; `Zfa` Extension Instructions
+
+;; Helper for emitting the `fminm` instruction.
+(decl rv_fminm (Type FReg FReg) FReg)
+(rule (rv_fminm ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fminm) ty (FRM.RDN) rs1 rs2))
+
+;; Helper for emitting the `fmaxm` instruction.
+(decl rv_fmaxm (Type FReg FReg) FReg)
+(rule (rv_fmaxm ty rs1 rs2) (fpu_rrr (FpuOPRRR.Fmaxm) ty (FRM.RUP) rs1 rs2))
+
+;; Helper for emitting the `fround` instruction.
+(decl rv_fround (Type FRM FReg) FReg)
+(rule (rv_fround ty frm rs) (fpu_rr (FpuOPRR.Fround) ty frm rs))
+
+;; Helper for emitting the `fli` instruction.
+(decl rv_fli (Type FliConstant) FReg)
+(rule (rv_fli ty imm)
+      (let ((dst WritableFReg (temp_writable_freg))
+            (_ Unit (emit (MInst.Fli ty
+                                     imm
+                                     dst))))
+        dst))
+
+;; `Zba` Extension Instructions
+
+;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction.
+;; rd ← uext32(rs1) + uext32(rs2)
+(decl rv_adduw (XReg XReg) XReg)
+(rule (rv_adduw rs1 rs2)
+  (alu_rrr (AluOPRRR.Adduw) rs1 rs2))
+
+;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction.
+;; This instruction is a mnemonic for `adduw rd, rs1, zero`.
+;; rd ← uext32(rs1)
+(decl rv_zextw (XReg) XReg)
+(rule (rv_zextw rs1)
+  (rv_adduw rs1 (zero_reg)))
+
+;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction.
+;; rd ← uext32(rs1) << imm
+(decl rv_slliuw (XReg Imm12) XReg)
+(rule (rv_slliuw rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm))
+
+
+;; `Zbb` Extension Instructions
+
+;; Helper for emitting the `andn` ("And Negated") instruction.
+;; rd ← rs1 ∧ ~(rs2)
+(decl rv_andn (XReg XReg) XReg)
+(rule (rv_andn rs1 rs2)
+  (if-let $true (has_zbb))
+  (alu_rrr (AluOPRRR.Andn) rs1 rs2))
+(rule (rv_andn rs1 rs2)
+  (if-let $false (has_zbb))
+  (rv_and rs1 (rv_not rs2)))
+
+;; Helper for emitting the `orn` ("Or Negated") instruction.
+;; rd ← rs1 ∨ ~(rs2)
+(decl rv_orn (XReg XReg) XReg)
+(rule (rv_orn rs1 rs2)
+  (alu_rrr (AluOPRRR.Orn) rs1 rs2))
+
+;; Helper for emitting the `xnor` ("Exclusive NOR") instruction.
+;; rd ← ~(rs1 ^ rs2)
+(decl rv_xnor (XReg XReg) XReg)
+(rule (rv_xnor rs1 rs2)
+  (alu_rrr (AluOPRRR.Xnor) rs1 rs2))
+
+;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction.
+(decl rv_clz (XReg) XReg)
+(rule (rv_clz rs1)
+  (alu_rr_funct12 (AluOPRRI.Clz) rs1))
+
+;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction.
+(decl rv_clzw (XReg) XReg)
+(rule (rv_clzw rs1)
+  (alu_rr_funct12 (AluOPRRI.Clzw) rs1))
+
+;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction.
+(decl rv_ctz (XReg) XReg)
+(rule (rv_ctz rs1)
+  (alu_rr_funct12 (AluOPRRI.Ctz) rs1))
+
+;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction.
+(decl rv_ctzw (XReg) XReg)
+(rule (rv_ctzw rs1)
+  (alu_rr_funct12 (AluOPRRI.Ctzw) rs1))
+
+;; Helper for emitting the `cpop` ("Count Population") instruction.
+(decl rv_cpop (XReg) XReg)
+(rule (rv_cpop rs1)
+  (alu_rr_funct12 (AluOPRRI.Cpop) rs1))
+
+;; Helper for emitting the `cpopw` ("Count Population") instruction.
+(decl rv_cpopw (XReg) XReg)
+(rule (rv_cpopw rs1)
+  (alu_rr_funct12 (AluOPRRI.Cpopw) rs1))
+
+;; Helper for emitting the `max` instruction.
+(decl rv_max (XReg XReg) XReg)
+(rule (rv_max rs1 rs2)
+  (alu_rrr (AluOPRRR.Max) rs1 rs2))
+
+;; Helper for emitting the `maxu` instruction.
+(decl rv_maxu (XReg XReg) XReg)
+(rule (rv_maxu rs1 rs2)
+  (alu_rrr (AluOPRRR.Maxu) rs1 rs2))
+
+;; Helper for emitting the `min` instruction.
+(decl rv_min (XReg XReg) XReg)
+(rule (rv_min rs1 rs2)
+  (alu_rrr (AluOPRRR.Min) rs1 rs2))
+
+;; Helper for emitting the `minu` instruction.
+(decl rv_minu (XReg XReg) XReg)
+(rule (rv_minu rs1 rs2)
+  (alu_rrr (AluOPRRR.Minu) rs1 rs2))
+
+;; Helper for emitting the `sext.b` instruction.
+(decl rv_sextb (XReg) XReg)
+(rule (rv_sextb rs1)
+  (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0)))
+
+;; Helper for emitting the `sext.h` instruction.
+(decl rv_sexth (XReg) XReg)
+(rule (rv_sexth rs1)
+  (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0)))
+
+;; Helper for emitting the `zext.h` instruction.
+(decl rv_zexth (XReg) XReg)
+(rule (rv_zexth rs1)
+  (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0)))
+
+;; Helper for emitting the `rol` ("Rotate Left") instruction.
+(decl rv_rol (XReg XReg) XReg)
+(rule (rv_rol rs1 rs2)
+  (alu_rrr (AluOPRRR.Rol) rs1 rs2))
+
+;; Helper for emitting the `rolw` ("Rotate Left Word") instruction.
+(decl rv_rolw (XReg XReg) XReg)
+(rule (rv_rolw rs1 rs2)
+  (alu_rrr (AluOPRRR.Rolw) rs1 rs2))
+
+;; Helper for emitting the `ror` ("Rotate Right") instruction.
+(decl rv_ror (XReg XReg) XReg)
+(rule (rv_ror rs1 rs2)
+  (alu_rrr (AluOPRRR.Ror) rs1 rs2))
+
+;; Helper for emitting the `rorw` ("Rotate Right Word") instruction.
+(decl rv_rorw (XReg XReg) XReg)
+(rule (rv_rorw rs1 rs2)
+  (alu_rrr (AluOPRRR.Rorw) rs1 rs2))
+
+;; Helper for emitting the `rori` ("Rotate Right") instruction.
+(decl rv_rori (XReg Imm12) XReg)
+(rule (rv_rori rs1 rs2)
+  (alu_rr_imm12 (AluOPRRI.Rori) rs1 rs2))
+
+;; Helper for emitting the `roriw` ("Rotate Right Word") instruction.
+(decl rv_roriw (XReg Imm12) XReg)
+(rule (rv_roriw rs1 rs2)
+  (alu_rr_imm12 (AluOPRRI.Roriw) rs1 rs2))
+
+;; Helper for emitting the `rev8` ("Byte Reverse") instruction.
+(decl rv_rev8 (XReg) XReg)
+(rule (rv_rev8 rs1)
+  (alu_rr_funct12 (AluOPRRI.Rev8) rs1))
+
+;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction.
+;; TODO: This instruction is mentioned in some older versions of the
+;; spec, but has since disappeared, we should follow up on this.
+;; It probably was renamed to `rev.b` which seems to be the closest match.
+(decl rv_brev8 (XReg) XReg)
+(rule (rv_brev8 rs1)
+  (alu_rr_funct12 (AluOPRRI.Brev8) rs1))
+
+;; `Zbs` Extension Instructions
+
+(decl rv_bclr (XReg XReg) XReg)
+(rule (rv_bclr rs1 rs2)
+  (alu_rrr (AluOPRRR.Bclr) rs1 rs2))
+
+(decl rv_bclri (XReg Imm12) XReg)
+(rule (rv_bclri rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Bclri) rs1 imm))
+
+(decl rv_bext (XReg XReg) XReg)
+(rule (rv_bext rs1 rs2)
+  (alu_rrr (AluOPRRR.Bext) rs1 rs2))
+
+(decl rv_bexti (XReg Imm12) XReg)
+(rule (rv_bexti rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Bexti) rs1 imm))
+
+(decl rv_binv (XReg XReg) XReg)
+(rule (rv_binv rs1 rs2)
+  (alu_rrr (AluOPRRR.Binv) rs1 rs2))
+
+(decl rv_binvi (XReg Imm12) XReg)
+(rule (rv_binvi rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Binvi) rs1 imm))
+
+(decl rv_bset (XReg XReg) XReg)
+(rule (rv_bset rs1 rs2)
+  (alu_rrr (AluOPRRR.Bset) rs1 rs2))
+
+;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction.
+(decl rv_bseti (XReg Imm12) XReg)
+(rule (rv_bseti rs1 imm)
+  (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm))
+
+;; `Zbkb` Extension Instructions
+
+;; Helper for emitting the `pack` ("Pack low halves of registers") instruction.
+(decl rv_pack (XReg XReg) XReg)
+(rule (rv_pack rs1 rs2)
+  (alu_rrr (AluOPRRR.Pack) rs1 rs2))
+
+;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction.
+(decl rv_packw (XReg XReg) XReg)
+(rule (rv_packw rs1 rs2)
+  (alu_rrr (AluOPRRR.Packw) rs1 rs2))
+
+;; `ZiCond` Extension Instructions
+
+;; Helper for emitting the `czero.eqz` ("Conditional zero, if condition is equal to zero") instruction.
+;; RS1 is the data source
+;; RS2 is the condition
+;;
+;; rd = (rs2 == 0) ? 0 : rs1
+(decl rv_czero_eqz (XReg XReg) XReg)
+(rule (rv_czero_eqz rs1 rs2)
+  (alu_rrr (AluOPRRR.CzeroEqz) rs1 rs2))
+
+;; Helper for emitting the `czero.nez` ("Conditional zero, if condition is nonzero") instruction.
+;; RS1 is the data source
+;; RS2 is the condition
+;;
+;; rd = (rs2 != 0) ? 0 : rs1
+(decl rv_czero_nez (XReg XReg) XReg)
+(rule (rv_czero_nez rs1 rs2)
+  (alu_rrr (AluOPRRR.CzeroNez) rs1 rs2))
+
+
+;; `Zicsr` Extension Instructions
+
+;; Helper for emitting the `csrrwi` instruction.
+(decl rv_csrrwi (CSR UImm5) XReg)
+(rule (rv_csrrwi csr imm)
+  (csr_imm (CsrImmOP.CsrRWI) csr imm))
+
+;; This is a special case of `csrrwi` when the CSR is the `frm` CSR.
+(decl rv_fsrmi (FRM) XReg)
+(rule (rv_fsrmi frm) (rv_csrrwi (CSR.Frm) frm))
+
+
+;; Helper for emitting the `csrw` instruction. This is a special case of
+;; `csrrw` where the destination register is always `x0`.
+(decl rv_csrw (CSR XReg) Unit)
+(rule (rv_csrw csr rs)
+  (csr_reg_dst_zero (CsrRegOP.CsrRW) csr rs))
+
+;; This is a special case of `csrw` when the CSR is the `frm` CSR.
+(decl rv_fsrm (XReg) Unit)
+(rule (rv_fsrm rs) (rv_csrw (CSR.Frm) rs))
+
+
+
+
+
+
+;; Helper for generating a FliConstant from a u64 constant
+(decl pure partial fli_constant_from_u64 (Type u64) FliConstant)
+(extern constructor fli_constant_from_u64 fli_constant_from_u64)
+
+;; Helper for generating a FliConstant from a u64 negated constant
+(decl pure partial fli_constant_from_negated_u64 (Type u64) FliConstant)
+(extern constructor fli_constant_from_negated_u64 fli_constant_from_negated_u64)
+
+;; Helper for generating a i64 from a pair of Imm20 and Imm12 constants
+(decl i64_generate_imm (Imm20 Imm12) i64)
+(extern extractor i64_generate_imm i64_generate_imm)
+
+;; Helper for generating a i64 from a shift of a Imm20 constant with LUI
+(decl i64_shift_for_lui (u64 Imm12) i64)
+(extern extractor i64_shift_for_lui i64_shift_for_lui)
+
+;; Helper for generating a i64 from a shift of a Imm20 constant
+(decl i64_shift (i64 Imm12) i64)
+(extern extractor i64_shift i64_shift)
+
+;; Immediate Loading rules
+;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it.
+;; TODO: Load floats using `fld` instead of `ld`
+(decl imm (Type u64) Reg)
+
+;; Special-case 0.0 for floats to use the `(zero_reg)` directly.
+;; See #7162 for why this doesn't fall out of the rules below.
+(rule 9 (imm (ty_supported_float $F16) 0) (gen_bitcast (zero_reg) $I16 $F16))
+(rule 9 (imm (ty_supported_float $F32) 0) (gen_bitcast (zero_reg) $I32 $F32))
+(rule 9 (imm (ty_supported_float $F64) 0) (gen_bitcast (zero_reg) $I64 $F64))
+
+;; If Zfa is enabled, we can load certain constants with the `fli` instruction.
+(rule 8 (imm (ty_supported_float (ty_32_or_64 ty)) imm)
+  (if-let $true (has_zfa))
+  (if-let const (fli_constant_from_u64 ty imm))
+  (rv_fli ty const))
+
+;; It is beneficial to load the negated constant with `fli` and then negate it
+;; in a register.
+;;
+;; For f64's this saves one instruction, and for f32's it avoids
+;; having to allocate an integer register, reducing integer register pressure.
+(rule 7 (imm (ty_supported_float (ty_32_or_64 ty)) imm)
+  (if-let $true (has_zfa))
+  (if-let const (fli_constant_from_negated_u64 ty imm))
+  (rv_fneg ty (rv_fli ty const)))
+
+;; Otherwise floats get loaded as integers and then moved into an F register.
+(rule 6 (imm (ty_supported_float $F16) c) (gen_bitcast (imm $I16 c) $I16 $F16))
+(rule 6 (imm (ty_supported_float $F32) c) (gen_bitcast (imm $I32 c) $I32 $F32))
+(rule 6 (imm (ty_supported_float $F64) c) (gen_bitcast (imm $I64 c) $I64 $F64))
+
+;; Try to match just an imm12
+(rule 4 (imm (ty_int ty) c)
+  (if-let (i64_generate_imm (imm20_is_zero) imm12) (i64_sextend_u64 ty c))
+  (rv_addi (zero_reg) imm12))
+
+;; We can also try to load using a single LUI.
+;; LUI takes a 20 bit immediate, places it on bits 13 to 32 of the register.
+;; In RV64 this value is then sign extended to 64bits.
+(rule 3 (imm (ty_int ty) c)
+  (if-let (i64_generate_imm imm20 (imm12_is_zero)) (i64_sextend_u64 ty c))
+  (rv_lui imm20))
+
+;; We can combo addi + lui to represent all 32-bit immediates
+;; And some 64-bit immediates as well.
+(rule 2 (imm (ty_int ty) c)
+  (if-let (i64_generate_imm imm20 imm12) (i64_sextend_u64 ty c))
+  (rv_addi (rv_lui imm20) imm12))
+
+;; If the non-zero bits of the immediate fit in 20 bits, we can use LUI + shift
+(rule 1 (imm (ty_int ty) c)
+  (if-let (i64_shift_for_lui (imm20_from_u64 base) shift) (i64_sextend_u64 ty c))
+  (rv_slli (rv_lui base) shift))
+
+;; Combine one of the above rules with a shift-left if possible, This chops off
+;; all trailing zeros from the input constant and then attempts if the resulting
+;; constant can itself use one of the above rules via the `i64_generate_imm`
+;; matcher. This will then recurse on the above rules to materialize a smaller
+;; constant which is then shifted left to create the desired constant.
+(rule 0 (imm (ty_int ty) c)
+  (if-let (i64_shift c_shifted shift) (i64_sextend_u64 ty c))  ;; constant to make
+  (if-let (i64_generate_imm _ _) c_shifted)                    ;; can the smaller constant be made?
+  (rv_slli (imm ty (i64_as_u64 c_shifted)) shift))
+
+;; Otherwise we fall back to loading the immediate from the constant pool.
+(rule -1 (imm (ty_int ty) c)
+  (gen_load
+    (gen_const_amode (emit_u64_le_const c))
+    (LoadOP.Ld)
+    (mem_flags_trusted)))
+
+;; Imm12 Rules
+
+(decl pure imm12_zero () Imm12)
+(rule (imm12_zero) (imm12_const 0))
+
+(decl pure imm12_const (i32) Imm12)
+(extern constructor imm12_const imm12_const)
+
+(decl load_imm12 (i32) Reg)
+(rule
+  (load_imm12 x)
+  (rv_addi (zero_reg) (imm12_const x)))
+
+;; for load immediate
+(decl imm_from_bits (u64) Imm12)
+(extern constructor imm_from_bits imm_from_bits)
+
+(decl imm_from_neg_bits (i64) Imm12)
+(extern constructor imm_from_neg_bits imm_from_neg_bits)
+
+(decl imm12_const_add (i32 i32) Imm12)
+(extern constructor imm12_const_add imm12_const_add)
+
+;; Performs a fallible add of the `Imm12` value and the 32-bit value provided.
+(decl pure partial imm12_add (Imm12 i32) Imm12)
+(extern constructor imm12_add imm12_add)
+
+(decl imm12_and (Imm12 u64) Imm12)
+(extern constructor imm12_and imm12_and)
+
+;; Imm12 Extractors
+
+;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`.
+(decl imm12_from_value (Imm12) Value)
+(extractor (imm12_from_value n) (i64_from_iconst (imm12_from_i64 n)))
+
+;; Conceptually the same as `imm12_from_value`, but tries negating the constant
+;; value (first sign-extending to handle narrow widths).
+(decl pure partial imm12_from_negated_value (Value) Imm12)
+(rule
+  (imm12_from_negated_value (has_type ty (iconst n)))
+  (if-let (imm12_from_u64 imm) (i64_as_u64 (i64_neg (i64_sextend_imm64 ty n))))
+  imm)
+
+(decl imm12_from_u64 (Imm12) u64)
+(extern extractor imm12_from_u64 imm12_from_u64)
+
+(decl imm12_from_i64 (Imm12) i64)
+(extern extractor imm12_from_i64 imm12_from_i64)
+
+(decl pure partial u64_to_imm12 (u64) Imm12)
+(rule (u64_to_imm12 (imm12_from_u64 n)) n)
+
+(decl pure imm12_is_zero () Imm12)
+(extern extractor imm12_is_zero imm12_is_zero)
+
+;; Imm20
+
+;; Extractor that matches if a Imm20 is zero
+(decl pure imm20_is_zero () Imm20)
+(extern extractor imm20_is_zero imm20_is_zero)
+
+(decl imm20_from_u64 (Imm20) u64)
+(extern extractor imm20_from_u64 imm20_from_u64)
+
+(decl imm20_from_i64 (Imm20) i64)
+(extern extractor imm20_from_i64 imm20_from_i64)
+
+
+;; Imm5 Extractors
+
+(decl imm5_from_u64 (Imm5) u64)
+(extern extractor imm5_from_u64 imm5_from_u64)
+
+(decl imm5_from_i64 (Imm5) i64)
+(extern extractor imm5_from_i64 imm5_from_i64)
+
+;; Construct a Imm5 from an i8
+(decl pure partial i8_to_imm5 (i8) Imm5)
+(extern constructor i8_to_imm5 i8_to_imm5)
+
+;; Helper to go directly from a `Value` to an `Imm5`.
+(decl imm5_from_value (Imm5) Value)
+(extractor (imm5_from_value n) (i64_from_iconst (imm5_from_i64 n)))
+
+;; Like imm5_from_value, but first negates the `Value`.
+(decl pure partial imm5_from_negated_value (Value) Imm5)
+(rule (imm5_from_negated_value (has_type ty (iconst n)))
+  (if-let (imm5_from_i64 imm) (i64_neg (i64_sextend_imm64 ty n)))
+  imm)
+
+;; Constructor that matches a `Value` equivalent to a replicated Imm5 on all lanes.
+(decl pure partial replicated_imm5 (Value) Imm5)
+(rule (replicated_imm5 (splat (imm5_from_value n))) n)
+(rule (replicated_imm5 (vconst (u128_from_constant n128)))
+  (if-let (u128_replicated_u64 n64) n128)
+  (if-let (u64_replicated_u32 n32) n64)
+  (if-let (u32_replicated_u16 n16) n32)
+  (if-let (u16_replicated_u8 n8) n16)
+  (if-let n (i8_to_imm5 (u8_as_i8 n8)))
+  n)
+
+;; Like replicated_imm5, but first negates the `Value`.
+(decl pure partial negated_replicated_imm5 (Value) Imm5)
+(rule (negated_replicated_imm5 (splat n))
+  (if-let imm5 (imm5_from_negated_value n))
+  imm5)
+(rule (negated_replicated_imm5 (vconst (u128_from_constant n128)))
+  (if-let (u128_replicated_u64 n64) n128)
+  (if-let (u64_replicated_u32 n32) n64)
+  (if-let (u32_replicated_u16 n16) n32)
+  (if-let (u16_replicated_u8 n8) n16)
+  (if-let n (i8_to_imm5 (i8_neg (u8_as_i8 n8))))
+  n)
+
+;; UImm5 Helpers
+
+;; Constructor that matches a `Value` equivalent to a replicated UImm5 on all lanes.
+(decl pure partial replicated_uimm5 (Value) UImm5)
+(rule (replicated_uimm5 (splat (uimm5_from_value n))) n)
+(rule 1 (replicated_uimm5 (vconst (u128_from_constant n128)))
+  (if-let (u128_replicated_u64 n64) n128)
+  (if-let (u64_replicated_u32 n32) n64)
+  (if-let (u32_replicated_u16 n16) n32)
+  (if-let (u16_replicated_u8 n8) n16)
+  (if-let (uimm5_from_u8 n) n8)
+  n)
+
+;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`.
+(decl uimm5_from_value (UImm5) Value)
+(extractor (uimm5_from_value n)
+  (iconst (u64_from_imm64 (uimm5_from_u64 n))))
+
+;; Extract a `UImm5` from an `u8`.
+(decl pure partial uimm5_from_u8 (UImm5) u8)
+(extern extractor uimm5_from_u8 uimm5_from_u8)
+
+;; Extract a `UImm5` from an `u64`.
+(decl pure partial uimm5_from_u64 (UImm5) u64)
+(extern extractor uimm5_from_u64 uimm5_from_u64)
+
+;; Convert a `u64` into an `UImm5`
+(decl pure partial u64_to_uimm5 (u64) UImm5)
+(rule (u64_to_uimm5 (uimm5_from_u64 n)) n)
+
+(decl uimm5_bitcast_to_imm5 (UImm5) Imm5)
+(extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5)
+
+;; Float Helpers
+
+;; Returns the bitpattern of the Canonical NaN for the given type.
+(decl pure canonical_nan_u64 (Type) u64)
+(rule (canonical_nan_u64 $F32) 0x7fc00000)
+(rule (canonical_nan_u64 $F64) 0x7ff8000000000000)
+
+;; Helper for emitting `MInst.FpuRR` instructions.
+(decl fpu_rr (FpuOPRR Type FRM Reg) FReg)
+(rule (fpu_rr op ty frm src)
+      (let ((dst WritableFReg (temp_writable_freg))
+            (_ Unit (emit (MInst.FpuRR op ty frm dst src))))
+        dst))
+
+;; Similar to fpu_rr but with an integer destination register
+(decl fpu_rr_int (FpuOPRR Type FRM Reg) XReg)
+(rule (fpu_rr_int op ty frm src)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.FpuRR op ty frm dst src))))
+        dst))
+
+;; Helper for emitting `MInst.AluRRR` instructions.
+(decl alu_rrr (AluOPRRR Reg Reg) Reg)
+(rule (alu_rrr op src1 src2)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.AluRRR op dst src1 src2))))
+        dst))
+
+;; Helper for emitting `MInst.FpuRRR` instructions.
+(decl fpu_rrr (FpuOPRRR Type FRM Reg Reg) FReg)
+(rule (fpu_rrr op ty frm src1 src2)
+      (let ((dst WritableFReg (temp_writable_freg))
+            (_ Unit (emit (MInst.FpuRRR op ty frm dst src1 src2))))
+        dst))
+
+;; Similar to fpu_rrr but with an integer destination register
+(decl fpu_rrr_int (FpuOPRRR Type FRM Reg Reg) XReg)
+(rule (fpu_rrr_int op ty frm src1 src2)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.FpuRRR op ty frm dst src1 src2))))
+        dst))
+
+;; Helper for emitting `MInst.FpuRRRR` instructions.
+(decl fpu_rrrr (FpuOPRRRR Type FRM Reg Reg Reg) FReg)
+(rule (fpu_rrrr op ty frm src1 src2 src3)
+      (let ((dst WritableFReg (temp_writable_freg))
+            (_ Unit (emit (MInst.FpuRRRR op ty frm dst src1 src2 src3))))
+        dst))
+
+
+;; Helper for emitting `MInst.AluRRImm12` instructions.
+(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg)
+(rule (alu_rr_imm12 op src imm)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.AluRRImm12 op dst src imm))))
+        dst))
+
+;; some instruction use imm12 as funct12.
+;; so we don't need the imm12 parameter.
+(decl alu_rr_funct12 (AluOPRRI Reg) Reg)
+(rule (alu_rr_funct12 op src)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
+        dst))
+
+;; Helper for emitting the `Lui` instruction.
+;; TODO: This should be something like `emit_u_type`. And should share the
+;; `MInst` with `auipc` since these instructions share the U-Type format.
+(decl rv_lui (Imm20) XReg)
+(rule (rv_lui imm)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.Lui dst imm))))
+        dst))
+
+;; Helper for emitting `MInst.CsrImm` instructions.
+(decl csr_imm (CsrImmOP CSR UImm5) XReg)
+(rule (csr_imm op csr imm)
+      (let ((dst WritableXReg (temp_writable_xreg))
+            (_ Unit (emit (MInst.CsrImm op dst imm csr))))
+        dst))
+
+;; Helper for emitting a `MInst.CsrReg` instruction that writes the result to x0.
+(decl csr_reg_dst_zero (CsrRegOP CSR XReg) Unit)
+(rule (csr_reg_dst_zero op csr rs)
+      (emit (MInst.CsrReg op (writable_zero_reg) rs csr)))
+
+
+
+(decl select_addi (Type) AluOPRRI)
+(rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw))
+(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi))
+
+
+(decl gen_andi (XReg u64) XReg)
+(rule 1 (gen_andi x (imm12_from_u64 y))
+  (rv_andi x y))
+
+(rule 0 (gen_andi x y)
+  (rv_and x (imm $I64 y)))
+
+
+(decl gen_or (Type ValueRegs ValueRegs) ValueRegs)
+(rule 1 (gen_or $I128 x y)
+  (value_regs
+    (rv_or (value_regs_get x 0) (value_regs_get y 0))
+    (rv_or (value_regs_get x 1) (value_regs_get y 1))))
+
+(rule 0 (gen_or (fits_in_64 _) x y)
+  (rv_or (value_regs_get x 0) (value_regs_get y 0)))
+
+
+(decl lower_ctz (Type Reg) Reg)
+(rule (lower_ctz ty x)
+  (gen_cltz $false x ty))
+
+(rule 1 (lower_ctz (fits_in_16 ty) x)
+  (if-let $true (has_zbb))
+  (let ((tmp Reg (gen_bseti x (ty_bits ty))))
+    (rv_ctzw tmp)))
+
+(rule 2 (lower_ctz $I32 x)
+  (if-let $true (has_zbb))
+  (rv_ctzw x))
+
+(rule 2 (lower_ctz $I64 x)
+  (if-let $true (has_zbb))
+  (rv_ctz x))
+
+;; Count leading zeros from a i128 bit value.
+;; We count both halves separately and conditionally add them if it makes sense.
+
+(decl gen_cltz (bool XReg Type) XReg)
+(rule (gen_cltz leading rs ty)
+  (let ((tmp WritableXReg (temp_writable_xreg))
+        (step WritableXReg (temp_writable_xreg))
+        (sum WritableXReg (temp_writable_xreg))
+        (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty))))
+    sum))
+
+;; Performs a zero extension of the given value
+(decl zext (Value) XReg)
+
+;; In the most generic case, we shift left and then shift right.
+(rule 0 (zext val @ (value_type (fits_in_32 ty)))
+  (let ((shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits ty)))))
+    (rv_srli (rv_slli val shift) shift)))
+
+;; If we are zero extending a U8 we can use a `andi` instruction.
+(rule 1 (zext val @ (value_type $I8))
+  (rv_andi val (imm12_const 0xff)))
+
+;; No point in trying to use `packh` here to zero extend 8 bit values
+;; since we can just use `andi` instead which is part of the base ISA.
+
+;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values
+(rule 1 (zext val @ (value_type $I16))
+  (if-let $true (has_zbkb))
+  (rv_packw val (zero_reg)))
+
+;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers
+(rule 1 (zext val @ (value_type $I32))
+  (if-let $true (has_zbkb))
+  (rv_pack val (zero_reg)))
+
+;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction.
+(rule 2 (zext val @ (value_type $I16))
+  (if-let $true (has_zbb))
+  (rv_zexth val))
+
+;; With `zba` we have a `zext.w` instruction
+(rule 2 (zext val @ (value_type $I32))
+  (if-let $true (has_zba))
+  (rv_zextw val))
+
+;; Ignore sign extensions for values whose representation is already the full
+;; register width.
+(rule 3 (zext val)
+  (if (val_already_extended (ExtendOp.Zero) val))
+  val)
+
+;; Performs a signed extension of the given value
+(decl sext (Value) XReg)
+
+;; Same base case as `zext`, shift left-then-right.
+(rule 0 (sext val @ (value_type (fits_in_32 ty)))
+  (let ((shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits ty)))))
+    (rv_srai (rv_slli val shift) shift)))
+
+;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction.
+(rule 1 (sext val @ (value_type $I8))
+  (if-let $true (has_zbb))
+  (rv_sextb val))
+
+;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction.
+(rule 1 (sext val @ (value_type $I16))
+  (if-let $true (has_zbb))
+  (rv_sexth val))
+
+;; When signed extending from 32 to 64 bits we can use a
+;; `addiw val 0`. Also known as a `sext.w`
+(rule 1 (sext val @ (value_type $I32))
+  (rv_sextw val))
+
+;; Ignore sign extensions for values whose representation is already the full
+;; register width.
+(rule 2 (sext val)
+  (if (val_already_extended (ExtendOp.Signed) val))
+  val)
+
+;; Helper matcher for when a value's representation is already sign or zero
+;; extended to the full 64-bit register representation. This is used by `zext`
+;; and `sext` above to skip the extension instruction entirely in some
+;; circumstances.
+(decl pure partial val_already_extended (ExtendOp Value) bool)
+(rule 0 (val_already_extended _ v @ (value_type $I64)) $true)
+
+;; When extending our backend always extends to the full register width, so
+;; there's no need to extend-an-extend.
+(rule 1 (val_already_extended (ExtendOp.Zero) (uextend _)) $true)
+(rule 1 (val_already_extended (ExtendOp.Signed) (sextend _)) $true)
+
+;; The result of `icmp`/`fcmp` is zero or one, meaning that it's already sign
+;; extended to the full register width.
+(rule 1 (val_already_extended _ (icmp _ _ _)) $true)
+(rule 1 (val_already_extended _ (fcmp _ _ _)) $true)
+
+;; The lowering for these operations always sign-extend their results due to the
+;; use of the `*w` instructions in RV64I. Note that this requires that the
+;; extension is from 32 to 64, 16/8-bit operations are explicitly excluded here.
+;; There are no native instructions for the 16/8 bit operations so they must
+;; fall through to actual sign extension above.
+(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (ishl _ _))) $true)
+(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (ushr _ _))) $true)
+(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (sshr _ _))) $true)
+(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (iadd _ _))) $true)
+(rule 1 (val_already_extended (ExtendOp.Signed) (has_type $I32 (isub _ _))) $true)
+
+(type ExtendOp
+  (enum
+    (Zero)
+    (Signed)))
+
+(decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs)
+(rule
+  (lower_b128_binary op a b)
+  (let
+    ( ;; low part.
+      (low XReg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0)))
+      ;; high part.
+      (high XReg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1))))
+    (value_regs low high)))
+
+(decl lower_smlhi (Type XReg XReg) XReg)
+(rule 1
+  (lower_smlhi $I64 rs1 rs2)
+  (rv_mulh rs1 rs2))
+
+(rule
+  (lower_smlhi ty rs1 rs2)
+  (let
+    ((tmp XReg (rv_mul rs1 rs2)))
+    (rv_srli tmp (imm12_const (ty_bits ty)))))
+
+;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function.
+;;;; this will return shift amount and (ty_bits - "shift amount")
+;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform.
+(decl gen_shamt (Type XReg) ValueRegs)
+(extern constructor gen_shamt gen_shamt)
+
+;; bseti: Set a single bit in a register, indexed by a constant.
+(decl gen_bseti (Reg u64) Reg)
+(rule (gen_bseti val bit)
+  (if-let $false (has_zbs))
+  (if-let $false (u64_le bit 12))
+  (let ((const XReg (imm $I64 (u64_shl 1 bit))))
+    (rv_or val const)))
+
+(rule (gen_bseti val bit)
+  (if-let $false (has_zbs))
+  (if-let $true (u64_le bit 12))
+  (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit)))))
+
+(rule (gen_bseti val bit)
+  (if-let $true (has_zbs))
+  (rv_bseti val (imm12_const (u64_as_i32 bit))))
+
+
+(decl gen_popcnt (XReg) Reg)
+(rule (gen_popcnt rs)
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (step WritableXReg (temp_writable_xreg))
+      (sum WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Popcnt sum step tmp rs $I64))))
+    (writable_reg_to_reg sum)))
+
+;; Generates a AMode that points to a register plus an offset.
+(decl gen_reg_offset_amode (Reg i64) AMode)
+(extern constructor gen_reg_offset_amode gen_reg_offset_amode)
+
+;; Generates a AMode that an offset from the stack pointer.
+(decl gen_sp_offset_amode (i64) AMode)
+(extern constructor gen_sp_offset_amode gen_sp_offset_amode)
+
+;; Generates a AMode that an offset from the frame pointer.
+(decl gen_fp_offset_amode (i64) AMode)
+(extern constructor gen_fp_offset_amode gen_fp_offset_amode)
+
+;; Generates an AMode that points to a stack slot + offset.
+(decl gen_stack_slot_amode (StackSlot i64) AMode)
+(extern constructor gen_stack_slot_amode gen_stack_slot_amode)
+
+;; Generates a AMode that points to a constant in the constant pool.
+(decl gen_const_amode (VCodeConstant) AMode)
+(extern constructor gen_const_amode gen_const_amode)
+
+
+
+;; Tries to match a Value + Offset into an AMode
+(decl amode (Value i32) AMode)
+(rule 0 (amode addr offset) (amode_inner addr offset))
+
+;; If we are adding a constant offset with an iadd we can instead make that
+;; offset part of the amode offset.
+;;
+;; We can't recurse into `amode` again since that could cause stack overflows.
+;; See: https://github.com/bytecodealliance/wasmtime/pull/6968
+(rule 1 (amode (iadd addr (i32_from_iconst y)) offset)
+  (if-let new_offset (s32_add_fallible y offset))
+  (amode_inner addr new_offset))
+(rule 2 (amode (iadd (i32_from_iconst x) addr) offset)
+  (if-let new_offset (s32_add_fallible x offset))
+  (amode_inner addr new_offset))
+
+
+;; These are the normal rules for generating an AMode.
+(decl amode_inner (Value i32) AMode)
+
+;; In the simplest case we just lower into a Reg+Offset
+(rule 0 (amode_inner r @ (value_type (ty_addr64 _)) offset)
+  (gen_reg_offset_amode r offset))
+
+;; If the value is a `get_frame_pointer`, we can just use the offset from that.
+(rule 1 (amode_inner (get_frame_pointer) offset)
+  (gen_fp_offset_amode offset))
+
+;; If the value is a `get_stack_pointer`, we can just use the offset from that.
+(rule 1 (amode_inner (get_stack_pointer) offset)
+  (gen_sp_offset_amode offset))
+
+;; Similarly if the value is a `stack_addr` we can also turn that into an sp offset.
+(rule 1 (amode_inner (stack_addr ss ss_offset) amode_offset)
+  (if-let combined_offset (s32_add_fallible ss_offset amode_offset))
+  (gen_stack_slot_amode ss combined_offset))
+
+
+;; Helpers for sinkable loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; RISC-V doesen't really have sinkable loads. But the regular load instructions
+;; sign / zero extend their results to 64 bits. So we can pretend they are
+;; an extend instruction with a sinkable load. This allows us to have better
+;; lowerings on these cases.
+
+;; Extract a sinkable instruction from a value operand.
+(decl sinkable_inst (Inst) Value)
+(extern extractor sinkable_inst sinkable_inst)
+
+;; Matches a sinkable load.
+(decl sinkable_load (Inst Type MemFlags Value Offset32) Value)
+(extractor (sinkable_load inst ty flags addr offset)
+           (and
+              (load flags addr offset)
+              (sinkable_inst (has_type ty inst))))
+
+;; Returns a canonical type for a LoadOP. We only return I64 or F64.
+(decl load_op_reg_type (LoadOP) Type)
+(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
+(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
+(rule 1 (load_op_reg_type (LoadOP.Flh)) $F64)
+(rule 0 (load_op_reg_type _) $I64)
+
+;; Helper constructor to build a load instruction.
+(decl gen_load (AMode LoadOP MemFlags) Reg)
+(rule (gen_load amode op flags)
+  (let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
+      (_ Unit (emit (MInst.Load dst op flags amode))))
+    dst))
+
+;; Similar to `gen_load` but marks `Inst` as sunk at the current point.
+;;
+;; This is only useful for load op's that perform some additional computation
+;; such as extending the loaded value.
+(decl gen_sunk_load (Inst AMode LoadOP MemFlags) Reg)
+(rule (gen_sunk_load inst amode op flags)
+  (let ((_ Unit (sink_inst inst)))
+        (gen_load amode op flags)))
+
+
+;; Helper constructor to build a store instruction.
+;;
+;; This helper contains a special-case for zero constants stored to memory to
+;; directly store the `zero` register to memory. See #7162 for some discussion
+;; on why this doesn't just fall out.
+(decl gen_store (AMode MemFlags Value) InstOutput)
+(rule 1 (gen_store amode flags val @ (value_type ty))
+  (if-let (u64_from_iconst 0) val)
+  (rv_store amode (store_op ty) flags (zero_reg)))
+(rule 0 (gen_store amode flags val @ (value_type ty))
+  (rv_store amode (store_op ty) flags val))
+
+;; Emit a raw instruction to store a register into memory.
+;;
+;; Note that the `src` operand must have the correct type for the `op`
+;; specified.
+(decl rv_store (AMode StoreOP MemFlags Reg) InstOutput)
+(rule (rv_store amode op flags src)
+  (side_effect (SideEffectNoResult.Inst (MInst.Store amode op flags src))))
+
+
+
+
+(decl valid_atomic_transaction (Type) Type)
+(extern extractor valid_atomic_transaction valid_atomic_transaction)
+
+;;helper function.
+;;construct an atomic instruction.
+(decl gen_atomic (AtomicOP Reg Reg AMO) Reg)
+(rule
+  (gen_atomic op addr src amo)
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Atomic op tmp addr src amo))))
+    tmp))
+
+;; helper function
+(decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP)
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Add))
+  (AtomicOP.AmoaddW))
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Add))
+  (AtomicOP.AmoaddD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.And))
+  (AtomicOP.AmoandW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.And))
+  (AtomicOP.AmoandD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Or))
+  (AtomicOP.AmoorW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Or))
+  (AtomicOP.AmoorD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax))
+  (AtomicOP.AmomaxW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax))
+  (AtomicOP.AmomaxD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin))
+  (AtomicOP.AmominW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin))
+  (AtomicOP.AmominD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax))
+  (AtomicOP.AmomaxuW)
+)
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax))
+  (AtomicOP.AmomaxuD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin))
+  (AtomicOP.AmominuW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin))
+  (AtomicOP.AmominuD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg))
+  (AtomicOP.AmoswapW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg))
+  (AtomicOP.AmoswapD))
+
+(rule
+  (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor))
+  (AtomicOP.AmoxorW))
+
+(rule
+  (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor))
+  (AtomicOP.AmoxorD))
+
+(decl atomic_amo () AMO)
+(extern constructor atomic_amo atomic_amo)
+
+
+(decl gen_atomic_load (Reg Type) Reg)
+(rule
+  (gen_atomic_load p ty)
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.AtomicLoad tmp ty p))))
+    (writable_reg_to_reg tmp)))
+
+;;;
+(decl gen_atomic_store (Reg Type Reg) InstOutput)
+(rule
+  (gen_atomic_store p ty src)
+  (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p)))
+)
+
+
+;; Rounds a FReg by converting the value into an integer and back with a specified
+;; float rounding mode.
+(decl float_round_fcvt (Type FRM FReg) FReg)
+(rule (float_round_fcvt $F32 frm rs) (rv_fcvtsw frm (rv_fcvtws frm rs)))
+(rule (float_round_fcvt $F64 frm rs) (rv_fcvtdl frm (rv_fcvtld frm rs)))
+
+(decl gen_float_round (FRM FReg Type) FReg)
+(rule 0 (gen_float_round frm rs ty)
+  (let (;; if rs is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
+        ;; in mantissa, the result is the same as src, check for these cases first.
+        (max FReg (imm ty (float_int_max ty)))
+        (abs FReg (rv_fabs ty rs))
+        (exact XReg (rv_flt ty abs max))
+
+        ;; Manually round the value using the fcvt instructions
+        ;; to move the value to an integer register and back.
+        (fcvt FReg (float_round_fcvt ty frm rs))
+        ;; Restore the sign bit from the initial value.
+        (rounded FReg (rv_fsgnj ty fcvt rs))
+
+        ;; We want to return a arithmetic nan if the input is a canonical nan.
+        ;; Convert them by adding 0.0 to the input.
+        (float_zero FReg (gen_bitcast (zero_reg) (float_int_of_same_size ty) ty))
+        (corrected_nan FReg (rv_fadd ty (FRM.RNE) rs float_zero)))
+
+    ;; Check if the value cannot be rounded exactly and return the source input if so
+    (gen_select_freg (cmp_eqz exact) corrected_nan rounded)))
+
+;; With Zfa we can use the dedicated `fround` instruction.
+(rule 1 (gen_float_round frm rs ty)
+  (if-let $true (has_zfa))
+  (rv_fround ty frm rs))
+
+
+
+(decl gen_stack_addr (StackSlot Offset32) Reg)
+(extern constructor gen_stack_addr gen_stack_addr)
+
+(decl gen_select_xreg (IntegerCompare XReg XReg) XReg)
+
+(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y)
+  (if-let (IntCC.UnsignedLessThan) (intcc_without_eq cc))
+  (if-let $true (has_zbb))
+  (rv_minu x y))
+
+(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y)
+  (if-let (IntCC.SignedLessThan) (intcc_without_eq cc))
+  (if-let $true (has_zbb))
+  (rv_min x y))
+
+(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y)
+  (if-let (IntCC.UnsignedGreaterThan) (intcc_without_eq cc))
+  (if-let $true (has_zbb))
+  (rv_maxu x y))
+
+(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y)
+  (if-let (IntCC.SignedGreaterThan) (intcc_without_eq cc))
+  (if-let $true (has_zbb))
+  (rv_max x y))
+
+;; Rotate Zero Reg to the right. This allows us to write fewer rules
+;; below when matching the zero register
+;;
+;; Additionally prevent this rule from recursing infinitely by only
+;; matching when one of the inputs is the zero register, but not both.
+
+(rule 5 (gen_select_xreg (int_compare_decompose cc a @ (zero_reg) b @ (non_zero_reg)) x y)
+  (if-let $true (has_zicond))
+  (gen_select_xreg (int_compare (intcc_swap_args cc) b a) x y))
+
+(rule 4 (gen_select_xreg c @ (int_compare_decompose cc a b) x @ (zero_reg) y @ (non_zero_reg))
+  (if-let $true (has_zicond))
+  (gen_select_xreg (int_compare (intcc_complement cc) a b) y x))
+
+(rule 3 (gen_select_xreg (int_compare_decompose (IntCC.Equal) c (zero_reg)) x (zero_reg))
+  (if-let $true (has_zicond))
+  (rv_czero_nez x c))
+
+(rule 3 (gen_select_xreg (int_compare_decompose (IntCC.NotEqual) c (zero_reg)) x (zero_reg))
+  (if-let $true (has_zicond))
+  (rv_czero_eqz x c))
+
+(rule 2 (gen_select_xreg (int_compare_decompose (IntCC.Equal) c (zero_reg)) x y)
+  (if-let $true (has_zicond))
+  (rv_or
+    (rv_czero_nez x c)
+    (rv_czero_eqz y c)))
+
+(rule 2 (gen_select_xreg (int_compare_decompose (IntCC.NotEqual) c (zero_reg)) x y)
+  (if-let $true (has_zicond))
+  (rv_or
+    (rv_czero_eqz x c)
+    (rv_czero_nez y c)))
+
+;; It is still beneficial to emit the full compare instruction, and then the 3 instruction
+;; select using zicond, so do that here as a last resort.
+(rule 1 (gen_select_xreg compare x y)
+  (if-let $true (has_zicond))
+  (gen_select_xreg (cmp_nez (lower_int_compare compare)) x y))
+
+;; In the base case we emit a conditional branch and a few moves.
+
+(rule 0 (gen_select_xreg c x y)
+  (let
+    ((dst WritableReg (temp_writable_xreg))
+     (_ Unit (emit (MInst.Select dst c x y))))
+    (writable_reg_to_reg dst)))
+
+
+(decl gen_select_vreg (IntegerCompare VReg VReg) VReg)
+(rule (gen_select_vreg c x y)
+  (let
+    ((dst WritableReg (temp_writable_vreg))
+     (_ Unit (emit (MInst.Select dst c (vreg_to_reg x) (vreg_to_reg y)))))
+    (writable_reg_to_reg dst)))
+(decl gen_select_freg (IntegerCompare FReg FReg) FReg)
+(rule (gen_select_freg c x y)
+  (let
+    ((dst WritableReg (temp_writable_freg))
+     (_ Unit (emit (MInst.Select dst c (freg_to_reg x) (freg_to_reg y)))))
+    (writable_reg_to_reg dst)))
+(decl gen_select_regs (IntegerCompare ValueRegs ValueRegs) ValueRegs)
+(rule (gen_select_regs c x y)
+  (let
+    ((dst1 WritableReg (temp_writable_xreg))
+     (dst2 WritableReg (temp_writable_xreg))
+     (_ Unit (emit (MInst.Select (writable_value_regs dst1 dst2) c x y))))
+    (value_regs dst1 dst2)))
+
+(decl udf (TrapCode) InstOutput)
+(rule
+  (udf code)
+  (side_effect (SideEffectNoResult.Inst (MInst.Udf code))))
+
+(decl load_op (Type) LoadOP)
+(extern constructor load_op load_op)
+
+(decl store_op (Type) StoreOP)
+(extern constructor store_op store_op)
+
+
+;;;; load extern name
+(decl load_ext_name (ExternalName i64) Reg)
+(extern constructor load_ext_name load_ext_name)
+
+(decl elf_tls_get_addr (ExternalName) Reg)
+(rule (elf_tls_get_addr name)
+      (let ((dst WritableReg (temp_writable_reg $I64))
+            (_ Unit (emit (MInst.ElfTlsGetAddr dst name))))
+        dst))
+
+;;; some float binary operation
+;;; 1. need move into x register.
+;;; 2. do the operation.
+;;; 3. move back.
+(decl lower_float_binary (AluOPRRR FReg FReg Type) FReg)
+(rule
+  (lower_float_binary op rs1 rs2 ty)
+  (let ((x_rs1 XReg (move_f_to_x rs1 ty))
+        (x_rs2 XReg (move_f_to_x rs2 ty))
+        (tmp XReg (alu_rrr op x_rs1 x_rs2)))
+    (move_x_to_f tmp (float_int_of_same_size ty))))
+
+
+(decl i128_sub (ValueRegs ValueRegs) ValueRegs)
+(rule
+  (i128_sub x y )
+  (let
+    (;; low part.
+      (low XReg (rv_sub (value_regs_get x 0) (value_regs_get y 0)))
+      ;; compute borrow.
+      (borrow XReg (rv_sltu (value_regs_get x 0) low))
+      ;;
+      (high_tmp XReg (rv_sub (value_regs_get x 1) (value_regs_get y 1)))
+      ;;
+      (high XReg (rv_sub high_tmp borrow)))
+    (value_regs low high)))
+
+;; Consume a CmpResult, producing a branch on its result.
+(decl cond_br (IntegerCompare CondBrTarget CondBrTarget) SideEffectNoResult)
+(rule (cond_br cmp then else)
+      (SideEffectNoResult.Inst
+        (MInst.CondBr then else cmp)))
+
+;; Helper for emitting the `j` mnemonic, an unconditional jump to label.
+(decl rv_j (MachLabel) SideEffectNoResult)
+(rule (rv_j label)
+  (SideEffectNoResult.Inst (MInst.Jal label)))
+
+;; Construct an IntegerCompare value.
+(decl int_compare (IntCC XReg XReg) IntegerCompare)
+(extern constructor int_compare int_compare)
+
+;; Extract the components of an `IntegerCompare`
+(decl int_compare_decompose (IntCC XReg XReg) IntegerCompare)
+(extern extractor infallible int_compare_decompose int_compare_decompose)
+
+(decl label_to_br_target (MachLabel) CondBrTarget)
+(extern constructor label_to_br_target label_to_br_target)
+(convert MachLabel CondBrTarget label_to_br_target)
+
+(decl cmp_eqz (XReg) IntegerCompare)
+(rule (cmp_eqz r) (int_compare (IntCC.Equal) r (zero_reg)))
+
+(decl cmp_nez (XReg) IntegerCompare)
+(rule (cmp_nez r) (int_compare (IntCC.NotEqual) r (zero_reg)))
+
+(decl cmp_eq (XReg XReg) IntegerCompare)
+(rule (cmp_eq rs1 rs2) (int_compare (IntCC.Equal) rs1 rs2))
+
+(decl cmp_ne (XReg XReg) IntegerCompare)
+(rule (cmp_ne rs1 rs2) (int_compare (IntCC.NotEqual) rs1 rs2))
+
+(decl cmp_lt (XReg XReg) IntegerCompare)
+(rule (cmp_lt rs1 rs2) (int_compare (IntCC.SignedLessThan) rs1 rs2))
+
+(decl cmp_ltz (XReg) IntegerCompare)
+(rule (cmp_ltz rs) (int_compare (IntCC.SignedLessThan) rs (zero_reg)))
+
+(decl cmp_gt (XReg XReg) IntegerCompare)
+(rule (cmp_gt rs1 rs2) (int_compare (IntCC.SignedGreaterThan) rs1 rs2))
+
+(decl cmp_ge (XReg XReg) IntegerCompare)
+(rule (cmp_ge rs1 rs2) (int_compare (IntCC.SignedGreaterThanOrEqual) rs1 rs2))
+
+(decl cmp_le (XReg XReg) IntegerCompare)
+(rule (cmp_le rs1 rs2) (int_compare (IntCC.SignedLessThanOrEqual) rs1 rs2))
+
+(decl cmp_gtu (XReg XReg) IntegerCompare)
+(rule (cmp_gtu rs1 rs2) (int_compare (IntCC.UnsignedGreaterThan) rs1 rs2))
+
+(decl cmp_geu (XReg XReg) IntegerCompare)
+(rule (cmp_geu rs1 rs2) (int_compare (IntCC.UnsignedGreaterThanOrEqual) rs1 rs2))
+
+(decl cmp_ltu (XReg XReg) IntegerCompare)
+(rule (cmp_ltu rs1 rs2) (int_compare (IntCC.UnsignedLessThan) rs1 rs2))
+
+(decl cmp_leu (XReg XReg) IntegerCompare)
+(rule (cmp_leu rs1 rs2) (int_compare (IntCC.UnsignedLessThanOrEqual) rs1 rs2))
+
+;; Helper to generate an `IntegerCompare` which represents the "truthy" value of
+;; the input provided.
+;;
+;; This is used in `Select` and `brif` for example to generate conditional
+;; branches. The returned comparison, when taken, represents that `Value` is
+;; nonzero. When not taken the input `Value` is zero.
+(decl is_nonzero_cmp (Value) IntegerCompare)
+
+;; Base case - convert to a "truthy" value and compare it against zero.
+;;
+;; Note that non-64-bit types need to be extended since the upper bits from
+;; Cranelift's point of view are undefined. Favor a zero extension for 8-bit
+;; types because that's a single `andi` instruction, but favor sign-extension
+;; for 16 and 32-bit types because many RISC-V which operate on the low 32-bits.
+;; Additionally the base 64-bit ISA has a single instruction for sign-extending
+;; from 32 to 64-bits which makes that a bit cheaper if used.
+;; of registers sign-extend the results.
+(rule 0 (is_nonzero_cmp val @ (value_type (fits_in_64 _)))
+  (cmp_nez (sext val)))
+(rule 1 (is_nonzero_cmp val @ (value_type $I8))
+  (cmp_nez (zext val)))
+(rule 1 (is_nonzero_cmp val @ (value_type $I128))
+  (cmp_nez (rv_or (value_regs_get val 0) (value_regs_get val 1))))
+
+;; If the input value is itself an `icmp` or `fcmp` we can avoid generating the
+;; result of the comparison and instead move the comparison directly into the
+;; `IntegerCompare` that's returned.
+(rule 2 (is_nonzero_cmp (maybe_uextend (icmp cc a b @ (value_type (fits_in_64 _)))))
+  (icmp_to_int_compare cc a b))
+(rule 2 (is_nonzero_cmp (maybe_uextend (fcmp cc a @ (value_type ty) b)))
+  (fcmp_to_float_compare cc ty a b))
+
+;; Creates an `IntegerCompare` from an `icmp` node's parts. This will extend
+;; values as necessary to their full register width to perform the
+;; comparison. The returned `IntegerCompare` is suitable to use in conditional
+;; branches for example.
+;;
+;; Note that this should ideally only be used when the `IntegerCompare` returned
+;; is fed into a branch. If `IntegerCompare` is materialized this will miss out
+;; on optimizations to compare against constants using some native instructions.
+(decl icmp_to_int_compare (IntCC Value Value) IntegerCompare)
+(rule 0 (icmp_to_int_compare cc a b @ (value_type (fits_in_64 in_ty)))
+  (int_compare cc (put_value_in_reg_for_icmp cc a) (put_value_in_reg_for_icmp cc b)))
+(rule 1 (icmp_to_int_compare cc a b @ (value_type $I128))
+  (cmp_nez (lower_icmp_i128 cc a b)))
+
+;; Places a `Value` into a full register width to prepare for a comparison
+;; using `IntCC`.
+;;
+;; This is largely a glorified means of choosing sign-extension or
+;; zero-extension for the `Value` input.
+(decl put_value_in_reg_for_icmp (IntCC Value) XReg)
+
+;; Base cases, use the `cc` to determine whether to zero or sign extend.
+(rule 0 (put_value_in_reg_for_icmp cc val)
+  (zext val))
+(rule 1 (put_value_in_reg_for_icmp cc val)
+  (if (signed_cond_code cc))
+  (sext val))
+
+;; For equality and inequality favor sign extension since it's generally
+;; easier to perform sign extension on RV64 via native instructions. For 8-bit
+;; types though use zero-extension since that's a single instruction `and`.
+(rule 2 (put_value_in_reg_for_icmp (IntCC.Equal) val @ (value_type (fits_in_64 _)))
+  (sext val))
+(rule 2 (put_value_in_reg_for_icmp (IntCC.NotEqual) val @ (value_type (fits_in_64 _)))
+  (sext val))
+(rule 3 (put_value_in_reg_for_icmp (IntCC.Equal) val @ (value_type $I8))
+  (zext val))
+(rule 3 (put_value_in_reg_for_icmp (IntCC.NotEqual) val @ (value_type $I8))
+  (zext val))
+
+;; As a special case use `x0` directly if a constant is 0.
+(rule 4 (put_value_in_reg_for_icmp _ (i64_from_iconst 0))
+  (zero_reg))
+
+
+(decl partial lower_branch (Inst MachLabelSlice) Unit)
+(rule (lower_branch (jump _) (single_target label))
+      (emit_side_effect (rv_j label)))
+
+(rule (lower_branch (brif v _ _) (two_targets then else))
+  (emit_side_effect (cond_br (is_nonzero_cmp v) then else)))
+
+(decl lower_br_table (Reg MachLabelSlice) Unit)
+(extern constructor lower_br_table lower_br_table)
+
+(rule (lower_branch (br_table index _) targets)
+  (lower_br_table index targets))
+
+(decl load_ra () Reg)
+(extern constructor load_ra load_ra)
+
+
+;; Generates a bitcast instruction.
+;; Args are: src, src_ty, dst_ty
+(decl gen_bitcast (Reg Type Type) Reg)
+
+;; To support FP16 vfmv.* we need to check for the `zvfh` isa flag, which we currently don't
+;; support, so restrict the floating point types to 32/64 bits.
+(rule 5 (gen_bitcast r (ty_supported_float (ty_32_or_64 src_ty)) (ty_supported_vec _)) (rv_vfmv_sf r src_ty))
+(rule 4 (gen_bitcast r (ty_supported_vec _) (ty_supported_float (ty_32_or_64 dst_ty))) (rv_vfmv_fs r dst_ty))
+
+(rule 3 (gen_bitcast r (ty_int_ref_scalar_64 src_ty) (ty_supported_vec _)) (rv_vmv_sx r src_ty))
+(rule 2 (gen_bitcast r (ty_supported_vec _) (ty_int_ref_scalar_64 dst_ty)) (rv_vmv_xs r dst_ty))
+(rule 1 (gen_bitcast r $F16 $I16) (rv_fmvxh r))
+(rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r))
+(rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r))
+(rule 1 (gen_bitcast r $I16 $F16) (rv_fmvhx r))
+(rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r))
+(rule 1 (gen_bitcast r $I64 $F64) (rv_fmvdx r))
+(rule (gen_bitcast r _ _) r)
+
+(decl move_f_to_x (FReg Type) XReg)
+(rule (move_f_to_x r $F32) (gen_bitcast r $F32 $I32))
+(rule (move_f_to_x r $F64) (gen_bitcast r $F64 $I64))
+
+(decl move_x_to_f (XReg Type) FReg)
+(rule (move_x_to_f r $I32) (gen_bitcast r $I32 $F32))
+(rule (move_x_to_f r $I64) (gen_bitcast r $I64 $F64))
+
+(decl float_int_of_same_size (Type) Type)
+(rule (float_int_of_same_size $F32) $I32)
+(rule (float_int_of_same_size $F64) $I64)
+
+
+(decl gen_brev8 (Reg Type) Reg)
+(rule 1
+  (gen_brev8 rs _)
+  (if-let $true (has_zbkb))
+  (rv_brev8 rs))
+(rule
+  (gen_brev8 rs ty)
+  (if-let $false (has_zbkb))
+  (let
+    ((tmp WritableXReg (temp_writable_xreg))
+      (tmp2 WritableXReg (temp_writable_xreg))
+      (step WritableXReg (temp_writable_xreg))
+      (rd WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd))))
+    (writable_reg_to_reg rd)))
+
+;; Negates x
+;; Equivalent to 0 - x
+(decl neg (Type ValueRegs) ValueRegs)
+(rule 1 (neg (fits_in_64 (ty_int ty)) val)
+  (value_reg
+    (rv_neg (value_regs_get val 0))))
+
+(rule 2 (neg $I128 val)
+  (i128_sub (value_regs_zero) val))
+
+
+;; Builds an instruction sequence that traps if the comparison succeeds.
+(decl gen_trapif (IntCC XReg XReg TrapCode) InstOutput)
+(rule (gen_trapif cc a b trap_code)
+  (side_effect (SideEffectNoResult.Inst (MInst.TrapIf a b cc trap_code))))
+
+;; Builds an instruction sequence that traps if the input is non-zero.
+(decl gen_trapnz (XReg TrapCode) InstOutput)
+(rule (gen_trapnz test trap_code)
+  (gen_trapif (IntCC.NotEqual) test (zero_reg) trap_code))
+
+;; Builds an instruction sequence that traps if the input is zero.
+(decl gen_trapz (XReg TrapCode) InstOutput)
+(rule (gen_trapz test trap_code)
+  (gen_trapif (IntCC.Equal) test (zero_reg) trap_code))
+
+;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
+(extern constructor gen_call gen_call)
+
+(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput)
+(extern constructor gen_call_indirect gen_call_indirect)
+
+;;; this is trying to imitate aarch64 `madd` instruction.
+(decl madd (XReg XReg XReg) XReg)
+(rule
+  (madd n m a)
+  (let
+    ((t XReg (rv_mul n m)))
+    (rv_add t a)))
+
+;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Generates either 0 if `Value` is zero or -1 otherwise.
+(decl gen_bmask (Value) XReg)
+
+;; Base cases: use `snez` after a sign extension to ensure that the entire
+;; register is defined. For i128 we test both the upper and lower half.
+(rule 0 (gen_bmask val @ (value_type (fits_in_64 _)))
+  (let ((non_zero XReg (rv_snez (sext val))))
+    (rv_neg non_zero)))
+(rule 1 (gen_bmask val @ (value_type $I128))
+  (let ((non_zero XReg (rv_snez (rv_or (value_regs_get val 0) (value_regs_get val 1)))))
+    (rv_neg non_zero)))
+
+;; If the input value is an `icmp` or an `fcmp` directly then the `snez` can
+;; be omitted because the result of the icmp or fcmp is a 0 or 1 directly. This
+;; means we can go straight to the `neg` instruction to produce the final
+;; result.
+(rule 2 (gen_bmask val @ (maybe_uextend (icmp _ _ _))) (rv_neg val))
+(rule 2 (gen_bmask val @ (maybe_uextend (fcmp _ _ _))) (rv_neg val))
+
+(decl lower_bmask (Value Type) ValueRegs)
+(rule 0 (lower_bmask val (fits_in_64 _))
+  (value_reg (gen_bmask val)))
+(rule 1 (lower_bmask val $I128)
+  (let ((bits XReg (gen_bmask val)))
+    (value_regs bits bits)))
+
+;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_mov_from_preg (PReg) Reg)
+
+(rule
+  (gen_mov_from_preg rm)
+  (let ((rd WritableXReg (temp_writable_xreg))
+        (_ Unit (emit (MInst.MovFromPReg rd rm))))
+    rd))
+
+(decl fp_reg () PReg)
+(extern constructor fp_reg fp_reg)
+
+(decl sp_reg () PReg)
+(extern constructor sp_reg sp_reg)
+
+;; Extractor that matches all registers, except the zero register
+(decl non_zero_reg () XReg)
+(extern extractor non_zero_reg is_non_zero_reg)
+
+;; Helper for creating the zero register.
+(decl zero_reg () XReg)
+(extern constructor zero_reg zero_reg)
+(extern extractor zero_reg is_zero_reg)
+
+(decl value_regs_zero () ValueRegs)
+(rule (value_regs_zero)
+  (value_regs (imm $I64 0) (imm $I64 0)))
+
+(decl writable_zero_reg () WritableReg)
+(extern constructor writable_zero_reg writable_zero_reg)
+
+
+;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type FloatCompare (enum
+  ;; The comparison succeeded if `r` is one
+  (One (r XReg))
+  ;; The comparison succeeded if `r` is zero
+  (Zero (r XReg))
+))
+
+(decl float_compare_invert (FloatCompare) FloatCompare)
+(rule (float_compare_invert (FloatCompare.One r)) (FloatCompare.Zero r))
+(rule (float_compare_invert (FloatCompare.Zero r)) (FloatCompare.One r))
+
+(decl float_to_int_compare (FloatCompare) IntegerCompare)
+(rule (float_to_int_compare (FloatCompare.One r)) (cmp_nez r))
+(rule (float_to_int_compare (FloatCompare.Zero r)) (cmp_eqz r))
+(convert FloatCompare IntegerCompare float_to_int_compare)
+
+;; Compare two floating point numbers and return a zero/non-zero result.
+(decl fcmp_to_float_compare (FloatCC Type FReg FReg) FloatCompare)
+
+;; Direct codegen for unordered comparisons is not that efficient, so invert
+;; the comparison to get an ordered comparison and generate that. Then invert
+;; the result to produce the final fcmp result.
+(rule 0 (fcmp_to_float_compare cc ty a b)
+  (if-let $true (floatcc_unordered cc))
+  (float_compare_invert (fcmp_to_float_compare (floatcc_complement cc) ty a b)))
+
+;; a is not nan && b is not nan
+(rule 1 (fcmp_to_float_compare (FloatCC.Ordered) ty a b)
+  (FloatCompare.One (rv_and (is_not_nan ty a) (is_not_nan ty b))))
+
+(decl is_not_nan (Type FReg) XReg)
+(rule (is_not_nan ty a) (rv_feq ty a a))
+
+;; a == b
+(rule 1 (fcmp_to_float_compare (FloatCC.Equal) ty a b)
+  (FloatCompare.One (rv_feq ty a b)))
+
+;; a != b
+;; == !(a == b)
+(rule 1 (fcmp_to_float_compare (FloatCC.NotEqual) ty a b)
+  (FloatCompare.Zero (rv_feq ty a b)))
+
+;; a < b || a > b
+(rule 1 (fcmp_to_float_compare (FloatCC.OrderedNotEqual) ty a b)
+  (FloatCompare.One (rv_or (rv_flt ty a b) (rv_fgt ty a b))))
+
+;; a < b
+(rule 1 (fcmp_to_float_compare (FloatCC.LessThan) ty a b)
+  (FloatCompare.One (rv_flt ty a b)))
+
+;; a <= b
+(rule 1 (fcmp_to_float_compare (FloatCC.LessThanOrEqual) ty a b)
+  (FloatCompare.One (rv_fle ty a b)))
+
+;; a > b
+(rule 1 (fcmp_to_float_compare (FloatCC.GreaterThan) ty a b)
+  (FloatCompare.One (rv_fgt ty a b)))
+
+;; a >= b
+(rule 1 (fcmp_to_float_compare (FloatCC.GreaterThanOrEqual) ty a b)
+  (FloatCompare.One (rv_fge ty a b)))
diff --git a/hbcb/src/inst/args.rs b/hbcb/src/inst/args.rs
new file mode 100644
index 00000000..d28e59b2
--- /dev/null
+++ b/hbcb/src/inst/args.rs
@@ -0,0 +1,1957 @@
+//! Riscv64 ISA definitions: instruction arguments.
+
+use super::*;
+use crate::ir::condcodes::CondCode;
+
+use crate::lower::isle::generated_code::{
+    COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
+};
+use crate::machinst::isle::WritableReg;
+
+use std::fmt::Result;
+
+/// A macro for defining a newtype of `Reg` that enforces some invariant about
+/// the wrapped `Reg` (such as that it is of a particular register class).
+macro_rules! newtype_of_reg {
+    (
+        $newtype_reg:ident,
+        $newtype_writable_reg:ident,
+        |$check_reg:ident| $check:expr
+    ) => {
+        /// A newtype wrapper around `Reg`.
+        #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+        pub struct $newtype_reg(Reg);
+
+        impl PartialEq<Reg> for $newtype_reg {
+            fn eq(&self, other: &Reg) -> bool {
+                self.0 == *other
+            }
+        }
+
+        impl From<$newtype_reg> for Reg {
+            fn from(r: $newtype_reg) -> Self {
+                r.0
+            }
+        }
+
+        impl $newtype_reg {
+            /// Create this newtype from the given register, or return `None` if the register
+            /// is not a valid instance of this newtype.
+            pub fn new($check_reg: Reg) -> Option<Self> {
+                if $check {
+                    Some(Self($check_reg))
+                } else {
+                    None
+                }
+            }
+
+            /// Get this newtype's underlying `Reg`.
+            pub fn to_reg(self) -> Reg {
+                self.0
+            }
+        }
+
+        // Convenience impl so that people working with this newtype can use it
+        // "just like" a plain `Reg`.
+        //
+        // NB: We cannot implement `DerefMut` because that would let people do
+        // nasty stuff like `*my_xreg.deref_mut() = some_freg`, breaking the
+        // invariants that `XReg` provides.
+        impl std::ops::Deref for $newtype_reg {
+            type Target = Reg;
+
+            fn deref(&self) -> &Reg {
+                &self.0
+            }
+        }
+
+        /// Writable Reg.
+        pub type $newtype_writable_reg = Writable<$newtype_reg>;
+    };
+}
+
+// Newtypes for registers classes.
+newtype_of_reg!(XReg, WritableXReg, |reg| reg.class() == RegClass::Int);
+newtype_of_reg!(FReg, WritableFReg, |reg| reg.class() == RegClass::Float);
+newtype_of_reg!(VReg, WritableVReg, |reg| reg.class() == RegClass::Vector);
+
+/// An addressing mode specified for a load/store operation.
+#[derive(Clone, Debug, Copy)]
+pub enum AMode {
+    /// Arbitrary offset from a register. Converted to generation of large
+    /// offsets with multiple instructions as necessary during code emission.
+    RegOffset(Reg, i64),
+    /// Offset from the stack pointer.
+    SPOffset(i64),
+
+    /// Offset from the frame pointer.
+    FPOffset(i64),
+
+    /// Offset into the slot area of the stack, which lies just above the
+    /// outgoing argument area that's setup by the function prologue.
+    /// At emission time, this is converted to `SPOffset` with a fixup added to
+    /// the offset constant. The fixup is a running value that is tracked as
+    /// emission iterates through instructions in linear order, and can be
+    /// adjusted up and down with [Inst::VirtualSPOffsetAdj].
+    ///
+    /// The standard ABI is in charge of handling this (by emitting the
+    /// adjustment meta-instructions). See the diagram in the documentation
+    /// for [crate::isa::aarch64::abi](the ABI module) for more details.
+    SlotOffset(i64),
+
+    /// Offset into the argument area.
+    IncomingArg(i64),
+
+    /// A reference to a constant which is placed outside of the function's
+    /// body, typically at the end.
+    Const(VCodeConstant),
+
+    /// A reference to a label.
+    Label(MachLabel),
+}
+
+impl AMode {
+    /// Add the registers referenced by this AMode to `collector`.
+    pub(crate) fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
+        match self {
+            AMode::RegOffset(reg, ..) => collector.reg_use(reg),
+            // Registers used in these modes aren't allocatable.
+            AMode::SPOffset(..)
+            | AMode::FPOffset(..)
+            | AMode::SlotOffset(..)
+            | AMode::IncomingArg(..)
+            | AMode::Const(..)
+            | AMode::Label(..) => {}
+        }
+    }
+
+    pub(crate) fn get_base_register(&self) -> Option<Reg> {
+        match self {
+            &AMode::RegOffset(reg, ..) => Some(reg),
+            &AMode::SPOffset(..) => Some(stack_reg()),
+            &AMode::FPOffset(..) => Some(fp_reg()),
+            &AMode::SlotOffset(..) => Some(stack_reg()),
+            &AMode::IncomingArg(..) => Some(stack_reg()),
+            &AMode::Const(..) | AMode::Label(..) => None,
+        }
+    }
+
+    pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 {
+        match self {
+            &AMode::SlotOffset(offset) => {
+                offset + i64::from(state.frame_layout().outgoing_args_size)
+            }
+
+            // Compute the offset into the incoming argument area relative to SP
+            &AMode::IncomingArg(offset) => {
+                let frame_layout = state.frame_layout();
+                let sp_offset = frame_layout.tail_args_size
+                    + frame_layout.setup_area_size
+                    + frame_layout.clobber_size
+                    + frame_layout.fixed_frame_storage_size
+                    + frame_layout.outgoing_args_size;
+                i64::from(sp_offset) - offset
+            }
+
+            &AMode::RegOffset(_, offset) => offset,
+            &AMode::SPOffset(offset) => offset,
+            &AMode::FPOffset(offset) => offset,
+            &AMode::Const(_) | &AMode::Label(_) => 0,
+        }
+    }
+
+    /// Retrieve a MachLabel that corresponds to this addressing mode, if it exists.
+    pub(crate) fn get_label_with_sink(&self, sink: &mut MachBuffer<Inst>) -> Option<MachLabel> {
+        match self {
+            &AMode::Const(addr) => Some(sink.get_label_for_constant(addr)),
+            &AMode::Label(label) => Some(label),
+            &AMode::RegOffset(..)
+            | &AMode::SPOffset(..)
+            | &AMode::FPOffset(..)
+            | &AMode::IncomingArg(..)
+            | &AMode::SlotOffset(..) => None,
+        }
+    }
+}
+
+impl Display for AMode {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        match self {
+            &AMode::RegOffset(r, offset, ..) => {
+                write!(f, "{}({})", offset, reg_name(r))
+            }
+            &AMode::SPOffset(offset, ..) => {
+                write!(f, "{offset}(sp)")
+            }
+            &AMode::SlotOffset(offset, ..) => {
+                write!(f, "{offset}(slot)")
+            }
+            &AMode::IncomingArg(offset) => {
+                write!(f, "-{offset}(incoming_arg)")
+            }
+            &AMode::FPOffset(offset, ..) => {
+                write!(f, "{offset}(fp)")
+            }
+            &AMode::Const(addr, ..) => {
+                write!(f, "[const({})]", addr.as_u32())
+            }
+            &AMode::Label(label) => {
+                write!(f, "[label{}]", label.as_u32())
+            }
+        }
+    }
+}
+
+impl Into<AMode> for StackAMode {
+    fn into(self) -> AMode {
+        match self {
+            StackAMode::IncomingArg(offset, stack_args_size) => {
+                AMode::IncomingArg(i64::from(stack_args_size) - offset)
+            }
+            StackAMode::OutgoingArg(offset) => AMode::SPOffset(offset),
+            StackAMode::Slot(offset) => AMode::SlotOffset(offset),
+        }
+    }
+}
+
+/// risc-v always take two register to compare
+#[derive(Clone, Copy, Debug)]
+pub struct IntegerCompare {
+    pub(crate) kind: IntCC,
+    pub(crate) rs1: Reg,
+    pub(crate) rs2: Reg,
+}
+
+pub(crate) enum BranchFunct3 {
+    // ==
+    Eq,
+    // !=
+    Ne,
+    // signed <
+    Lt,
+    // signed >=
+    Ge,
+    // unsigned <
+    Ltu,
+    // unsigned >=
+    Geu,
+}
+
+impl BranchFunct3 {
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            BranchFunct3::Eq => 0b000,
+            BranchFunct3::Ne => 0b001,
+            BranchFunct3::Lt => 0b100,
+            BranchFunct3::Ge => 0b101,
+            BranchFunct3::Ltu => 0b110,
+            BranchFunct3::Geu => 0b111,
+        }
+    }
+}
+
+impl IntegerCompare {
+    pub(crate) fn op_code(self) -> u32 {
+        0b1100011
+    }
+
+    // funct3 and if need inverse the register
+    pub(crate) fn funct3(&self) -> (BranchFunct3, bool) {
+        match self.kind {
+            IntCC::Equal => (BranchFunct3::Eq, false),
+            IntCC::NotEqual => (BranchFunct3::Ne, false),
+            IntCC::SignedLessThan => (BranchFunct3::Lt, false),
+            IntCC::SignedGreaterThanOrEqual => (BranchFunct3::Ge, false),
+
+            IntCC::SignedGreaterThan => (BranchFunct3::Lt, true),
+            IntCC::SignedLessThanOrEqual => (BranchFunct3::Ge, true),
+
+            IntCC::UnsignedLessThan => (BranchFunct3::Ltu, false),
+            IntCC::UnsignedGreaterThanOrEqual => (BranchFunct3::Geu, false),
+
+            IntCC::UnsignedGreaterThan => (BranchFunct3::Ltu, true),
+            IntCC::UnsignedLessThanOrEqual => (BranchFunct3::Geu, true),
+        }
+    }
+
+    #[inline]
+    pub(crate) fn op_name(&self) -> &'static str {
+        match self.kind {
+            IntCC::Equal => "beq",
+            IntCC::NotEqual => "bne",
+            IntCC::SignedLessThan => "blt",
+            IntCC::SignedGreaterThanOrEqual => "bge",
+            IntCC::SignedGreaterThan => "bgt",
+            IntCC::SignedLessThanOrEqual => "ble",
+            IntCC::UnsignedLessThan => "bltu",
+            IntCC::UnsignedGreaterThanOrEqual => "bgeu",
+            IntCC::UnsignedGreaterThan => "bgtu",
+            IntCC::UnsignedLessThanOrEqual => "bleu",
+        }
+    }
+
+    pub(crate) fn emit(self) -> u32 {
+        let (funct3, reverse) = self.funct3();
+        let (rs1, rs2) = if reverse {
+            (self.rs2, self.rs1)
+        } else {
+            (self.rs1, self.rs2)
+        };
+
+        self.op_code()
+            | funct3.funct3() << 12
+            | reg_to_gpr_num(rs1) << 15
+            | reg_to_gpr_num(rs2) << 20
+    }
+
+    pub(crate) fn inverse(self) -> Self {
+        Self {
+            kind: self.kind.complement(),
+            ..self
+        }
+    }
+
+    pub(crate) fn regs(&self) -> [Reg; 2] {
+        [self.rs1, self.rs2]
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct FliConstant(u8);
+
+impl FliConstant {
+    pub(crate) fn new(value: u8) -> Self {
+        debug_assert!(value <= 31, "Invalid FliConstant: {value}");
+        Self(value)
+    }
+
+    pub(crate) fn maybe_from_u64(ty: Type, imm: u64) -> Option<Self> {
+        // Convert the value into an F64, this allows us to represent
+        // values from both f32 and f64 in the same value.
+        let value = match ty {
+            F32 => f32::from_bits(imm as u32) as f64,
+            F64 => f64::from_bits(imm),
+            _ => unimplemented!(),
+        };
+
+        Some(match (ty, value) {
+            (_, f) if f == -1.0 => Self::new(0),
+
+            // Since f64 can represent all f32 values, f32::min_positive won't be
+            // the same as f64::min_positive, so we need to check for both indepenendtly
+            (F32, f) if f == (f32::MIN_POSITIVE as f64) => Self::new(1),
+            (F64, f) if f == f64::MIN_POSITIVE => Self::new(1),
+
+            (_, f) if f == 2.0f64.powi(-16) => Self::new(2),
+            (_, f) if f == 2.0f64.powi(-15) => Self::new(3),
+            (_, f) if f == 2.0f64.powi(-8) => Self::new(4),
+            (_, f) if f == 2.0f64.powi(-7) => Self::new(5),
+            (_, f) if f == 0.0625 => Self::new(6),
+            (_, f) if f == 0.125 => Self::new(7),
+            (_, f) if f == 0.25 => Self::new(8),
+            (_, f) if f == 0.3125 => Self::new(9),
+            (_, f) if f == 0.375 => Self::new(10),
+            (_, f) if f == 0.4375 => Self::new(11),
+            (_, f) if f == 0.5 => Self::new(12),
+            (_, f) if f == 0.625 => Self::new(13),
+            (_, f) if f == 0.75 => Self::new(14),
+            (_, f) if f == 0.875 => Self::new(15),
+            (_, f) if f == 1.0 => Self::new(16),
+            (_, f) if f == 1.25 => Self::new(17),
+            (_, f) if f == 1.5 => Self::new(18),
+            (_, f) if f == 1.75 => Self::new(19),
+            (_, f) if f == 2.0 => Self::new(20),
+            (_, f) if f == 2.5 => Self::new(21),
+            (_, f) if f == 3.0 => Self::new(22),
+            (_, f) if f == 4.0 => Self::new(23),
+            (_, f) if f == 8.0 => Self::new(24),
+            (_, f) if f == 16.0 => Self::new(25),
+            (_, f) if f == 128.0 => Self::new(26),
+            (_, f) if f == 256.0 => Self::new(27),
+            (_, f) if f == 32768.0 => Self::new(28),
+            (_, f) if f == 65536.0 => Self::new(29),
+            (_, f) if f == f64::INFINITY => Self::new(30),
+
+            // NaN's are not guaranteed to preserve the sign / payload bits, so we need to check
+            // the original bits directly.
+            (F32, f) if f.is_nan() && imm == 0x7fc0_0000 => Self::new(31), // Canonical NaN
+            (F64, f) if f.is_nan() && imm == 0x7ff8_0000_0000_0000 => Self::new(31), // Canonical NaN
+            _ => return None,
+        })
+    }
+
+    pub(crate) fn format(self) -> &'static str {
+        // The preferred assembly syntax for entries 1, 30, and 31 is min, inf, and nan, respectively.
+        // For entries 0 through 29 (including entry 1), the assembler will accept decimal constants
+        // in C-like syntax.
+        match self.0 {
+            0 => "-1.0",
+            1 => "min",
+            2 => "2^-16",
+            3 => "2^-15",
+            4 => "2^-8",
+            5 => "2^-7",
+            6 => "0.0625",
+            7 => "0.125",
+            8 => "0.25",
+            9 => "0.3125",
+            10 => "0.375",
+            11 => "0.4375",
+            12 => "0.5",
+            13 => "0.625",
+            14 => "0.75",
+            15 => "0.875",
+            16 => "1.0",
+            17 => "1.25",
+            18 => "1.5",
+            19 => "1.75",
+            20 => "2.0",
+            21 => "2.5",
+            22 => "3.0",
+            23 => "4.0",
+            24 => "8.0",
+            25 => "16.0",
+            26 => "128.0",
+            27 => "256.0",
+            28 => "32768.0",
+            29 => "65536.0",
+            30 => "inf",
+            31 => "nan",
+            _ => panic!("Invalid FliConstant"),
+        }
+    }
+
+    pub(crate) fn bits(self) -> u8 {
+        self.0
+    }
+}
+
+impl FpuOPRRRR {
+    pub(crate) fn op_name(self, width: FpuOPWidth) -> String {
+        match self {
+            Self::Fmadd => format!("fmadd.{width}"),
+            Self::Fmsub => format!("fmsub.{width}"),
+            Self::Fnmsub => format!("fnmsub.{width}"),
+            Self::Fnmadd => format!("fnmadd.{width}"),
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        match self {
+            Self::Fmadd => 0b1000011,
+            Self::Fmsub => 0b1000111,
+            Self::Fnmsub => 0b1001011,
+            Self::Fnmadd => 0b1001111,
+        }
+    }
+}
+
+impl FpuOPRR {
+    pub(crate) fn op_name(self, width: FpuOPWidth) -> String {
+        let fmv_width = match width {
+            FpuOPWidth::H => "h",
+            FpuOPWidth::S => "w",
+            FpuOPWidth::D => "d",
+            FpuOPWidth::Q => "q",
+        };
+        match self {
+            Self::Fsqrt => format!("fsqrt.{width}"),
+            Self::Fround => format!("fround.{width}"),
+            Self::Fclass => format!("fclass.{width}"),
+            Self::FcvtWFmt => format!("fcvt.w.{width}"),
+            Self::FcvtWuFmt => format!("fcvt.wu.{width}"),
+            Self::FcvtLFmt => format!("fcvt.l.{width}"),
+            Self::FcvtLuFmt => format!("fcvt.lu.{width}"),
+            Self::FcvtFmtW => format!("fcvt.{width}.w"),
+            Self::FcvtFmtWu => format!("fcvt.{width}.wu"),
+            Self::FcvtFmtL => format!("fcvt.{width}.l"),
+            Self::FcvtFmtLu => format!("fcvt.{width}.lu"),
+
+            // fmv instructions deviate from the normal encoding and instead
+            // encode the width as "w" instead of "s". The ISA manual gives this rationale:
+            //
+            // Instructions FMV.S.X and FMV.X.S were renamed to FMV.W.X and FMV.X.W respectively
+            // to be more consistent with their semantics, which did not change. The old names will continue
+            // to be supported in the tools.
+            Self::FmvXFmt => format!("fmv.x.{fmv_width}"),
+            Self::FmvFmtX => format!("fmv.{fmv_width}.x"),
+
+            Self::FcvtSD => "fcvt.s.d".to_string(),
+            Self::FcvtDS => "fcvt.d.s".to_string(),
+        }
+    }
+
+    pub(crate) fn is_convert_to_int(self) -> bool {
+        match self {
+            Self::FcvtWFmt | Self::FcvtWuFmt | Self::FcvtLFmt | Self::FcvtLuFmt => true,
+            _ => false,
+        }
+    }
+
+    pub(crate) fn has_frm(self) -> bool {
+        match self {
+            FpuOPRR::FmvXFmt | FpuOPRR::FmvFmtX | FpuOPRR::Fclass => false,
+            _ => true,
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        // OP-FP Major opcode
+        0b1010011
+    }
+
+    pub(crate) fn rs2(self) -> u32 {
+        match self {
+            Self::Fsqrt => 0b00000,
+            Self::Fround => 0b00100,
+            Self::Fclass => 0b00000,
+            Self::FcvtWFmt => 0b00000,
+            Self::FcvtWuFmt => 0b00001,
+            Self::FcvtLFmt => 0b00010,
+            Self::FcvtLuFmt => 0b00011,
+            Self::FcvtFmtW => 0b00000,
+            Self::FcvtFmtWu => 0b00001,
+            Self::FcvtFmtL => 0b00010,
+            Self::FcvtFmtLu => 0b00011,
+            Self::FmvXFmt => 0b00000,
+            Self::FmvFmtX => 0b00000,
+            Self::FcvtSD => 0b00001,
+            Self::FcvtDS => 0b00000,
+        }
+    }
+
+    pub(crate) fn funct5(self) -> u32 {
+        match self {
+            Self::Fsqrt => 0b01011,
+            Self::Fround => 0b01000,
+            Self::Fclass => 0b11100,
+            Self::FcvtWFmt => 0b11000,
+            Self::FcvtWuFmt => 0b11000,
+            Self::FcvtLFmt => 0b11000,
+            Self::FcvtLuFmt => 0b11000,
+            Self::FcvtFmtW => 0b11010,
+            Self::FcvtFmtWu => 0b11010,
+            Self::FcvtFmtL => 0b11010,
+            Self::FcvtFmtLu => 0b11010,
+            Self::FmvXFmt => 0b11100,
+            Self::FmvFmtX => 0b11110,
+            Self::FcvtSD => 0b01000,
+            Self::FcvtDS => 0b01000,
+        }
+    }
+
+    pub(crate) fn funct7(self, width: FpuOPWidth) -> u32 {
+        (self.funct5() << 2) | width.as_u32()
+    }
+}
+
+impl FpuOPRRR {
+    pub(crate) fn op_name(self, width: FpuOPWidth) -> String {
+        match self {
+            Self::Fadd => format!("fadd.{width}"),
+            Self::Fsub => format!("fsub.{width}"),
+            Self::Fmul => format!("fmul.{width}"),
+            Self::Fdiv => format!("fdiv.{width}"),
+            Self::Fsgnj => format!("fsgnj.{width}"),
+            Self::Fsgnjn => format!("fsgnjn.{width}"),
+            Self::Fsgnjx => format!("fsgnjx.{width}"),
+            Self::Fmin => format!("fmin.{width}"),
+            Self::Fmax => format!("fmax.{width}"),
+            Self::Feq => format!("feq.{width}"),
+            Self::Flt => format!("flt.{width}"),
+            Self::Fle => format!("fle.{width}"),
+            Self::Fminm => format!("fminm.{width}"),
+            Self::Fmaxm => format!("fmaxm.{width}"),
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        // OP-FP Major opcode
+        0b1010011
+    }
+
+    pub(crate) const fn funct5(self) -> u32 {
+        match self {
+            Self::Fadd => 0b00000,
+            Self::Fsub => 0b00001,
+            Self::Fmul => 0b00010,
+            Self::Fdiv => 0b00011,
+            Self::Fsgnj => 0b00100,
+            Self::Fsgnjn => 0b00100,
+            Self::Fsgnjx => 0b00100,
+            Self::Fmin => 0b00101,
+            Self::Fmax => 0b00101,
+            Self::Feq => 0b10100,
+            Self::Flt => 0b10100,
+            Self::Fle => 0b10100,
+            Self::Fminm => 0b00101,
+            Self::Fmaxm => 0b00101,
+        }
+    }
+
+    pub(crate) fn funct7(self, width: FpuOPWidth) -> u32 {
+        (self.funct5() << 2) | width.as_u32()
+    }
+
+    pub(crate) fn has_frm(self) -> bool {
+        match self {
+            FpuOPRRR::Fsgnj
+            | FpuOPRRR::Fsgnjn
+            | FpuOPRRR::Fsgnjx
+            | FpuOPRRR::Fmin
+            | FpuOPRRR::Fmax
+            | FpuOPRRR::Feq
+            | FpuOPRRR::Flt
+            | FpuOPRRR::Fle => false,
+            _ => true,
+        }
+    }
+}
+
+impl Display for FpuOPWidth {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                FpuOPWidth::H => "h",
+                FpuOPWidth::S => "s",
+                FpuOPWidth::D => "d",
+                FpuOPWidth::Q => "q",
+            }
+        )
+    }
+}
+
+impl TryFrom<Type> for FpuOPWidth {
+    type Error = &'static str;
+
+    fn try_from(value: Type) -> std::result::Result<Self, Self::Error> {
+        match value {
+            F16 => Ok(FpuOPWidth::H),
+            F32 => Ok(FpuOPWidth::S),
+            F64 => Ok(FpuOPWidth::D),
+            F128 => Ok(FpuOPWidth::Q),
+            _ => Err("Invalid type for FpuOPWidth"),
+        }
+    }
+}
+
+impl FpuOPWidth {
+    pub(crate) fn as_u32(&self) -> u32 {
+        match self {
+            FpuOPWidth::S => 0b00,
+            FpuOPWidth::D => 0b01,
+            FpuOPWidth::H => 0b10,
+            FpuOPWidth::Q => 0b11,
+        }
+    }
+}
+
+impl AluOPRRR {
+    pub(crate) const fn op_name(self) -> &'static str {
+        match self {
+            Self::Add => "add",
+            Self::Sub => "sub",
+            Self::Sll => "sll",
+            Self::Slt => "slt",
+            Self::Sgt => "sgt",
+            Self::SltU => "sltu",
+            Self::Sgtu => "sgtu",
+            Self::Xor => "xor",
+            Self::Srl => "srl",
+            Self::Sra => "sra",
+            Self::Or => "or",
+            Self::And => "and",
+            Self::Addw => "addw",
+            Self::Subw => "subw",
+            Self::Sllw => "sllw",
+            Self::Srlw => "srlw",
+            Self::Sraw => "sraw",
+            Self::Mul => "mul",
+            Self::Mulh => "mulh",
+            Self::Mulhsu => "mulhsu",
+            Self::Mulhu => "mulhu",
+            Self::Div => "div",
+            Self::DivU => "divu",
+            Self::Rem => "rem",
+            Self::RemU => "remu",
+            Self::Mulw => "mulw",
+            Self::Divw => "divw",
+            Self::Divuw => "divuw",
+            Self::Remw => "remw",
+            Self::Remuw => "remuw",
+            Self::Adduw => "add.uw",
+            Self::Andn => "andn",
+            Self::Bclr => "bclr",
+            Self::Bext => "bext",
+            Self::Binv => "binv",
+            Self::Bset => "bset",
+            Self::Clmul => "clmul",
+            Self::Clmulh => "clmulh",
+            Self::Clmulr => "clmulr",
+            Self::Max => "max",
+            Self::Maxu => "maxu",
+            Self::Min => "min",
+            Self::Minu => "minu",
+            Self::Orn => "orn",
+            Self::Rol => "rol",
+            Self::Rolw => "rolw",
+            Self::Ror => "ror",
+            Self::Rorw => "rorw",
+            Self::Sh1add => "sh1add",
+            Self::Sh1adduw => "sh1add.uw",
+            Self::Sh2add => "sh2add",
+            Self::Sh2adduw => "sh2add.uw",
+            Self::Sh3add => "sh3add",
+            Self::Sh3adduw => "sh3add.uw",
+            Self::Xnor => "xnor",
+            Self::Pack => "pack",
+            Self::Packw => "packw",
+            Self::Packh => "packh",
+            Self::CzeroEqz => "czero.eqz",
+            Self::CzeroNez => "czero.nez",
+        }
+    }
+
+    pub fn funct3(self) -> u32 {
+        match self {
+            AluOPRRR::Add => 0b000,
+            AluOPRRR::Sll => 0b001,
+            AluOPRRR::Slt => 0b010,
+            AluOPRRR::Sgt => 0b010,
+            AluOPRRR::SltU => 0b011,
+            AluOPRRR::Sgtu => 0b011,
+            AluOPRRR::Xor => 0b100,
+            AluOPRRR::Srl => 0b101,
+            AluOPRRR::Sra => 0b101,
+            AluOPRRR::Or => 0b110,
+            AluOPRRR::And => 0b111,
+            AluOPRRR::Sub => 0b000,
+
+            AluOPRRR::Addw => 0b000,
+            AluOPRRR::Subw => 0b000,
+            AluOPRRR::Sllw => 0b001,
+            AluOPRRR::Srlw => 0b101,
+            AluOPRRR::Sraw => 0b101,
+
+            AluOPRRR::Mul => 0b000,
+            AluOPRRR::Mulh => 0b001,
+            AluOPRRR::Mulhsu => 0b010,
+            AluOPRRR::Mulhu => 0b011,
+            AluOPRRR::Div => 0b100,
+            AluOPRRR::DivU => 0b101,
+            AluOPRRR::Rem => 0b110,
+            AluOPRRR::RemU => 0b111,
+
+            AluOPRRR::Mulw => 0b000,
+            AluOPRRR::Divw => 0b100,
+            AluOPRRR::Divuw => 0b101,
+            AluOPRRR::Remw => 0b110,
+            AluOPRRR::Remuw => 0b111,
+
+            // Zbb
+            AluOPRRR::Adduw => 0b000,
+            AluOPRRR::Andn => 0b111,
+            AluOPRRR::Bclr => 0b001,
+            AluOPRRR::Bext => 0b101,
+            AluOPRRR::Binv => 0b001,
+            AluOPRRR::Bset => 0b001,
+            AluOPRRR::Clmul => 0b001,
+            AluOPRRR::Clmulh => 0b011,
+            AluOPRRR::Clmulr => 0b010,
+            AluOPRRR::Max => 0b110,
+            AluOPRRR::Maxu => 0b111,
+            AluOPRRR::Min => 0b100,
+            AluOPRRR::Minu => 0b101,
+            AluOPRRR::Orn => 0b110,
+            AluOPRRR::Rol => 0b001,
+            AluOPRRR::Rolw => 0b001,
+            AluOPRRR::Ror => 0b101,
+            AluOPRRR::Rorw => 0b101,
+            AluOPRRR::Sh1add => 0b010,
+            AluOPRRR::Sh1adduw => 0b010,
+            AluOPRRR::Sh2add => 0b100,
+            AluOPRRR::Sh2adduw => 0b100,
+            AluOPRRR::Sh3add => 0b110,
+            AluOPRRR::Sh3adduw => 0b110,
+            AluOPRRR::Xnor => 0b100,
+
+            // Zbkb
+            AluOPRRR::Pack => 0b100,
+            AluOPRRR::Packw => 0b100,
+            AluOPRRR::Packh => 0b111,
+
+            // ZiCond
+            AluOPRRR::CzeroEqz => 0b101,
+            AluOPRRR::CzeroNez => 0b111,
+        }
+    }
+
+    pub fn op_code(self) -> u32 {
+        match self {
+            AluOPRRR::Add
+            | AluOPRRR::Sub
+            | AluOPRRR::Sll
+            | AluOPRRR::Slt
+            | AluOPRRR::Sgt
+            | AluOPRRR::SltU
+            | AluOPRRR::Sgtu
+            | AluOPRRR::Xor
+            | AluOPRRR::Srl
+            | AluOPRRR::Sra
+            | AluOPRRR::Or
+            | AluOPRRR::And
+            | AluOPRRR::Pack
+            | AluOPRRR::Packh => 0b0110011,
+
+            AluOPRRR::Addw
+            | AluOPRRR::Subw
+            | AluOPRRR::Sllw
+            | AluOPRRR::Srlw
+            | AluOPRRR::Sraw
+            | AluOPRRR::Packw => 0b0111011,
+
+            AluOPRRR::Mul
+            | AluOPRRR::Mulh
+            | AluOPRRR::Mulhsu
+            | AluOPRRR::Mulhu
+            | AluOPRRR::Div
+            | AluOPRRR::DivU
+            | AluOPRRR::Rem
+            | AluOPRRR::RemU => 0b0110011,
+
+            AluOPRRR::Mulw
+            | AluOPRRR::Divw
+            | AluOPRRR::Divuw
+            | AluOPRRR::Remw
+            | AluOPRRR::Remuw => 0b0111011,
+
+            AluOPRRR::Adduw => 0b0111011,
+            AluOPRRR::Andn
+            | AluOPRRR::Bclr
+            | AluOPRRR::Bext
+            | AluOPRRR::Binv
+            | AluOPRRR::Bset
+            | AluOPRRR::Clmul
+            | AluOPRRR::Clmulh
+            | AluOPRRR::Clmulr
+            | AluOPRRR::Max
+            | AluOPRRR::Maxu
+            | AluOPRRR::Min
+            | AluOPRRR::Minu
+            | AluOPRRR::Orn
+            | AluOPRRR::Rol
+            | AluOPRRR::Ror
+            | AluOPRRR::Sh1add
+            | AluOPRRR::Sh2add
+            | AluOPRRR::Sh3add
+            | AluOPRRR::Xnor
+            | AluOPRRR::CzeroEqz
+            | AluOPRRR::CzeroNez => 0b0110011,
+
+            AluOPRRR::Rolw
+            | AluOPRRR::Rorw
+            | AluOPRRR::Sh2adduw
+            | AluOPRRR::Sh3adduw
+            | AluOPRRR::Sh1adduw => 0b0111011,
+        }
+    }
+
+    pub const fn funct7(self) -> u32 {
+        match self {
+            AluOPRRR::Add => 0b0000000,
+            AluOPRRR::Sub => 0b0100000,
+            AluOPRRR::Sll => 0b0000000,
+            AluOPRRR::Slt => 0b0000000,
+            AluOPRRR::Sgt => 0b0000000,
+            AluOPRRR::SltU => 0b0000000,
+            AluOPRRR::Sgtu => 0b0000000,
+
+            AluOPRRR::Xor => 0b0000000,
+            AluOPRRR::Srl => 0b0000000,
+            AluOPRRR::Sra => 0b0100000,
+            AluOPRRR::Or => 0b0000000,
+            AluOPRRR::And => 0b0000000,
+
+            AluOPRRR::Addw => 0b0000000,
+            AluOPRRR::Subw => 0b0100000,
+            AluOPRRR::Sllw => 0b0000000,
+            AluOPRRR::Srlw => 0b0000000,
+            AluOPRRR::Sraw => 0b0100000,
+
+            AluOPRRR::Mul => 0b0000001,
+            AluOPRRR::Mulh => 0b0000001,
+            AluOPRRR::Mulhsu => 0b0000001,
+            AluOPRRR::Mulhu => 0b0000001,
+            AluOPRRR::Div => 0b0000001,
+            AluOPRRR::DivU => 0b0000001,
+            AluOPRRR::Rem => 0b0000001,
+            AluOPRRR::RemU => 0b0000001,
+
+            AluOPRRR::Mulw => 0b0000001,
+            AluOPRRR::Divw => 0b0000001,
+            AluOPRRR::Divuw => 0b0000001,
+            AluOPRRR::Remw => 0b0000001,
+            AluOPRRR::Remuw => 0b0000001,
+            AluOPRRR::Adduw => 0b0000100,
+            AluOPRRR::Andn => 0b0100000,
+            AluOPRRR::Bclr => 0b0100100,
+            AluOPRRR::Bext => 0b0100100,
+            AluOPRRR::Binv => 0b0110100,
+            AluOPRRR::Bset => 0b0010100,
+            AluOPRRR::Clmul => 0b0000101,
+            AluOPRRR::Clmulh => 0b0000101,
+            AluOPRRR::Clmulr => 0b0000101,
+            AluOPRRR::Max => 0b0000101,
+            AluOPRRR::Maxu => 0b0000101,
+            AluOPRRR::Min => 0b0000101,
+            AluOPRRR::Minu => 0b0000101,
+            AluOPRRR::Orn => 0b0100000,
+            AluOPRRR::Rol => 0b0110000,
+            AluOPRRR::Rolw => 0b0110000,
+            AluOPRRR::Ror => 0b0110000,
+            AluOPRRR::Rorw => 0b0110000,
+            AluOPRRR::Sh1add => 0b0010000,
+            AluOPRRR::Sh1adduw => 0b0010000,
+            AluOPRRR::Sh2add => 0b0010000,
+            AluOPRRR::Sh2adduw => 0b0010000,
+            AluOPRRR::Sh3add => 0b0010000,
+            AluOPRRR::Sh3adduw => 0b0010000,
+            AluOPRRR::Xnor => 0b0100000,
+
+            // Zbkb
+            AluOPRRR::Pack => 0b0000100,
+            AluOPRRR::Packw => 0b0000100,
+            AluOPRRR::Packh => 0b0000100,
+
+            // ZiCond
+            AluOPRRR::CzeroEqz => 0b0000111,
+            AluOPRRR::CzeroNez => 0b0000111,
+        }
+    }
+
+    pub(crate) fn reverse_rs(self) -> bool {
+        // special case.
+        // sgt and sgtu is not defined in isa.
+        // emit should reverse rs1 and rs2.
+        self == AluOPRRR::Sgt || self == AluOPRRR::Sgtu
+    }
+}
+
+impl AluOPRRI {
+    pub(crate) fn option_funct6(self) -> Option<u32> {
+        let x: Option<u32> = match self {
+            Self::Slli => Some(0b00_0000),
+            Self::Srli => Some(0b00_0000),
+            Self::Srai => Some(0b01_0000),
+            Self::Bclri => Some(0b010010),
+            Self::Bexti => Some(0b010010),
+            Self::Binvi => Some(0b011010),
+            Self::Bseti => Some(0b001010),
+            Self::Rori => Some(0b011000),
+            Self::SlliUw => Some(0b000010),
+            _ => None,
+        };
+        x
+    }
+
+    pub(crate) fn option_funct7(self) -> Option<u32> {
+        let x = match self {
+            Self::Slliw => Some(0b000_0000),
+            Self::SrliW => Some(0b000_0000),
+            Self::Sraiw => Some(0b010_0000),
+            Self::Roriw => Some(0b0110000),
+            _ => None,
+        };
+        x
+    }
+
+    pub(crate) fn imm12(self, imm12: Imm12) -> u32 {
+        let x = imm12.bits();
+        if let Some(func) = self.option_funct6() {
+            func << 6 | (x & 0b11_1111)
+        } else if let Some(func) = self.option_funct7() {
+            func << 5 | (x & 0b1_1111)
+        } else if let Some(func) = self.option_funct12() {
+            func
+        } else {
+            x
+        }
+    }
+
+    pub(crate) fn option_funct12(self) -> Option<u32> {
+        match self {
+            Self::Clz => Some(0b011000000000),
+            Self::Clzw => Some(0b011000000000),
+            Self::Cpop => Some(0b011000000010),
+            Self::Cpopw => Some(0b011000000010),
+            Self::Ctz => Some(0b011000000001),
+            Self::Ctzw => Some(0b011000000001),
+            Self::Rev8 => Some(0b011010111000),
+            Self::Sextb => Some(0b011000000100),
+            Self::Sexth => Some(0b011000000101),
+            Self::Zexth => Some(0b000010000000),
+            Self::Orcb => Some(0b001010000111),
+            Self::Brev8 => Some(0b0110_1000_0111),
+            _ => None,
+        }
+    }
+
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::Addi => "addi",
+            Self::Slti => "slti",
+            Self::SltiU => "sltiu",
+            Self::Xori => "xori",
+            Self::Ori => "ori",
+            Self::Andi => "andi",
+            Self::Slli => "slli",
+            Self::Srli => "srli",
+            Self::Srai => "srai",
+            Self::Addiw => "addiw",
+            Self::Slliw => "slliw",
+            Self::SrliW => "srliw",
+            Self::Sraiw => "sraiw",
+            Self::Bclri => "bclri",
+            Self::Bexti => "bexti",
+            Self::Binvi => "binvi",
+            Self::Bseti => "bseti",
+            Self::Rori => "rori",
+            Self::Roriw => "roriw",
+            Self::SlliUw => "slli.uw",
+            Self::Clz => "clz",
+            Self::Clzw => "clzw",
+            Self::Cpop => "cpop",
+            Self::Cpopw => "cpopw",
+            Self::Ctz => "ctz",
+            Self::Ctzw => "ctzw",
+            Self::Rev8 => "rev8",
+            Self::Sextb => "sext.b",
+            Self::Sexth => "sext.h",
+            Self::Zexth => "zext.h",
+            Self::Orcb => "orc.b",
+            Self::Brev8 => "brev8",
+        }
+    }
+
+    pub fn funct3(self) -> u32 {
+        match self {
+            AluOPRRI::Addi => 0b000,
+            AluOPRRI::Slti => 0b010,
+            AluOPRRI::SltiU => 0b011,
+            AluOPRRI::Xori => 0b100,
+            AluOPRRI::Ori => 0b110,
+            AluOPRRI::Andi => 0b111,
+            AluOPRRI::Slli => 0b001,
+            AluOPRRI::Srli => 0b101,
+            AluOPRRI::Srai => 0b101,
+            AluOPRRI::Addiw => 0b000,
+            AluOPRRI::Slliw => 0b001,
+            AluOPRRI::SrliW => 0b101,
+            AluOPRRI::Sraiw => 0b101,
+            AluOPRRI::Bclri => 0b001,
+            AluOPRRI::Bexti => 0b101,
+            AluOPRRI::Binvi => 0b001,
+            AluOPRRI::Bseti => 0b001,
+            AluOPRRI::Rori => 0b101,
+            AluOPRRI::Roriw => 0b101,
+            AluOPRRI::SlliUw => 0b001,
+            AluOPRRI::Clz => 0b001,
+            AluOPRRI::Clzw => 0b001,
+            AluOPRRI::Cpop => 0b001,
+            AluOPRRI::Cpopw => 0b001,
+            AluOPRRI::Ctz => 0b001,
+            AluOPRRI::Ctzw => 0b001,
+            AluOPRRI::Rev8 => 0b101,
+            AluOPRRI::Sextb => 0b001,
+            AluOPRRI::Sexth => 0b001,
+            AluOPRRI::Zexth => 0b100,
+            AluOPRRI::Orcb => 0b101,
+            AluOPRRI::Brev8 => 0b101,
+        }
+    }
+
+    pub fn op_code(self) -> u32 {
+        match self {
+            AluOPRRI::Addi
+            | AluOPRRI::Slti
+            | AluOPRRI::SltiU
+            | AluOPRRI::Xori
+            | AluOPRRI::Ori
+            | AluOPRRI::Andi
+            | AluOPRRI::Slli
+            | AluOPRRI::Srli
+            | AluOPRRI::Srai
+            | AluOPRRI::Bclri
+            | AluOPRRI::Bexti
+            | AluOPRRI::Binvi
+            | AluOPRRI::Bseti
+            | AluOPRRI::Rori
+            | AluOPRRI::Clz
+            | AluOPRRI::Cpop
+            | AluOPRRI::Ctz
+            | AluOPRRI::Rev8
+            | AluOPRRI::Sextb
+            | AluOPRRI::Sexth
+            | AluOPRRI::Orcb
+            | AluOPRRI::Brev8 => 0b0010011,
+
+            AluOPRRI::Addiw
+            | AluOPRRI::Slliw
+            | AluOPRRI::SrliW
+            | AluOPRRI::Sraiw
+            | AluOPRRI::Roriw
+            | AluOPRRI::SlliUw
+            | AluOPRRI::Clzw
+            | AluOPRRI::Cpopw
+            | AluOPRRI::Ctzw => 0b0011011,
+            AluOPRRI::Zexth => 0b0111011,
+        }
+    }
+}
+
+impl Default for FRM {
+    fn default() -> Self {
+        Self::Fcsr
+    }
+}
+
+/// float rounding mode.
+impl FRM {
+    pub(crate) fn to_static_str(self) -> &'static str {
+        match self {
+            FRM::RNE => "rne",
+            FRM::RTZ => "rtz",
+            FRM::RDN => "rdn",
+            FRM::RUP => "rup",
+            FRM::RMM => "rmm",
+            FRM::Fcsr => "fcsr",
+        }
+    }
+
+    #[inline]
+    pub(crate) fn bits(self) -> u8 {
+        match self {
+            FRM::RNE => 0b000,
+            FRM::RTZ => 0b001,
+            FRM::RDN => 0b010,
+            FRM::RUP => 0b011,
+            FRM::RMM => 0b100,
+            FRM::Fcsr => 0b111,
+        }
+    }
+    pub(crate) fn as_u32(self) -> u32 {
+        self.bits() as u32
+    }
+}
+
+impl FFlagsException {
+    #[inline]
+    #[allow(dead_code)]
+    pub(crate) fn mask(self) -> u32 {
+        match self {
+            FFlagsException::NV => 1 << 4,
+            FFlagsException::DZ => 1 << 3,
+            FFlagsException::OF => 1 << 2,
+            FFlagsException::UF => 1 << 1,
+            FFlagsException::NX => 1 << 0,
+        }
+    }
+}
+
+impl LoadOP {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::Lb => "lb",
+            Self::Lh => "lh",
+            Self::Lw => "lw",
+            Self::Lbu => "lbu",
+            Self::Lhu => "lhu",
+            Self::Lwu => "lwu",
+            Self::Ld => "ld",
+            Self::Flh => "flh",
+            Self::Flw => "flw",
+            Self::Fld => "fld",
+        }
+    }
+
+    pub(crate) fn from_type(ty: Type) -> Self {
+        match ty {
+            F16 => Self::Flh,
+            F32 => Self::Flw,
+            F64 => Self::Fld,
+            I8 => Self::Lb,
+            I16 => Self::Lh,
+            I32 => Self::Lw,
+            I64 => Self::Ld,
+            _ => unreachable!(),
+        }
+    }
+
+    pub(crate) fn size(&self) -> i64 {
+        match self {
+            Self::Lb | Self::Lbu => 1,
+            Self::Lh | Self::Lhu | Self::Flh => 2,
+            Self::Lw | Self::Lwu | Self::Flw => 4,
+            Self::Ld | Self::Fld => 8,
+        }
+    }
+
+    pub(crate) fn op_code(self) -> u32 {
+        match self {
+            Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => {
+                0b0000011
+            }
+            Self::Flh | Self::Flw | Self::Fld => 0b0000111,
+        }
+    }
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            Self::Lb => 0b000,
+            Self::Lh => 0b001,
+            Self::Lw => 0b010,
+            Self::Lwu => 0b110,
+            Self::Lbu => 0b100,
+            Self::Lhu => 0b101,
+            Self::Ld => 0b011,
+            Self::Flh => 0b001,
+            Self::Flw => 0b010,
+            Self::Fld => 0b011,
+        }
+    }
+}
+
+impl StoreOP {
+    pub(crate) fn op_name(self) -> &'static str {
+        match self {
+            Self::Sb => "sb",
+            Self::Sh => "sh",
+            Self::Sw => "sw",
+            Self::Sd => "sd",
+            Self::Fsh => "fsh",
+            Self::Fsw => "fsw",
+            Self::Fsd => "fsd",
+        }
+    }
+    pub(crate) fn from_type(ty: Type) -> Self {
+        match ty {
+            F16 => Self::Fsh,
+            F32 => Self::Fsw,
+            F64 => Self::Fsd,
+            I8 => Self::Sb,
+            I16 => Self::Sh,
+            I32 => Self::Sw,
+            I64 => Self::Sd,
+            _ => unreachable!(),
+        }
+    }
+
+    pub(crate) fn size(&self) -> i64 {
+        match self {
+            Self::Sb => 1,
+            Self::Sh | Self::Fsh => 2,
+            Self::Sw | Self::Fsw => 4,
+            Self::Sd | Self::Fsd => 8,
+        }
+    }
+
+    pub(crate) fn op_code(self) -> u32 {
+        match self {
+            Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011,
+            Self::Fsh | Self::Fsw | Self::Fsd => 0b0100111,
+        }
+    }
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            Self::Sb => 0b000,
+            Self::Sh => 0b001,
+            Self::Sw => 0b010,
+            Self::Sd => 0b011,
+            Self::Fsh => 0b001,
+            Self::Fsw => 0b010,
+            Self::Fsd => 0b011,
+        }
+    }
+}
+
+#[allow(dead_code)]
+impl FClassResult {
+    pub(crate) const fn bit(self) -> u32 {
+        match self {
+            FClassResult::NegInfinite => 1 << 0,
+            FClassResult::NegNormal => 1 << 1,
+            FClassResult::NegSubNormal => 1 << 2,
+            FClassResult::NegZero => 1 << 3,
+            FClassResult::PosZero => 1 << 4,
+            FClassResult::PosSubNormal => 1 << 5,
+            FClassResult::PosNormal => 1 << 6,
+            FClassResult::PosInfinite => 1 << 7,
+            FClassResult::SNaN => 1 << 8,
+            FClassResult::QNaN => 1 << 9,
+        }
+    }
+
+    #[inline]
+    pub(crate) const fn is_nan_bits() -> u32 {
+        Self::SNaN.bit() | Self::QNaN.bit()
+    }
+    #[inline]
+    pub(crate) fn is_zero_bits() -> u32 {
+        Self::NegZero.bit() | Self::PosZero.bit()
+    }
+
+    #[inline]
+    pub(crate) fn is_infinite_bits() -> u32 {
+        Self::PosInfinite.bit() | Self::NegInfinite.bit()
+    }
+}
+
+impl AtomicOP {
+    #[inline]
+    pub(crate) fn is_load(self) -> bool {
+        match self {
+            Self::LrW | Self::LrD => true,
+            _ => false,
+        }
+    }
+
+    #[inline]
+    pub(crate) fn op_name(self, amo: AMO) -> String {
+        let s = match self {
+            Self::LrW => "lr.w",
+            Self::ScW => "sc.w",
+
+            Self::AmoswapW => "amoswap.w",
+            Self::AmoaddW => "amoadd.w",
+            Self::AmoxorW => "amoxor.w",
+            Self::AmoandW => "amoand.w",
+            Self::AmoorW => "amoor.w",
+            Self::AmominW => "amomin.w",
+            Self::AmomaxW => "amomax.w",
+            Self::AmominuW => "amominu.w",
+            Self::AmomaxuW => "amomaxu.w",
+            Self::LrD => "lr.d",
+            Self::ScD => "sc.d",
+            Self::AmoswapD => "amoswap.d",
+            Self::AmoaddD => "amoadd.d",
+            Self::AmoxorD => "amoxor.d",
+            Self::AmoandD => "amoand.d",
+            Self::AmoorD => "amoor.d",
+            Self::AmominD => "amomin.d",
+            Self::AmomaxD => "amomax.d",
+            Self::AmominuD => "amominu.d",
+            Self::AmomaxuD => "amomaxu.d",
+        };
+        format!("{}{}", s, amo.to_static_str())
+    }
+    #[inline]
+    pub(crate) fn op_code(self) -> u32 {
+        0b0101111
+    }
+
+    #[inline]
+    pub(crate) fn funct7(self, amo: AMO) -> u32 {
+        self.funct5() << 2 | amo.as_u32() & 0b11
+    }
+
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            AtomicOP::LrW
+            | AtomicOP::ScW
+            | AtomicOP::AmoswapW
+            | AtomicOP::AmoaddW
+            | AtomicOP::AmoxorW
+            | AtomicOP::AmoandW
+            | AtomicOP::AmoorW
+            | AtomicOP::AmominW
+            | AtomicOP::AmomaxW
+            | AtomicOP::AmominuW
+            | AtomicOP::AmomaxuW => 0b010,
+            AtomicOP::LrD
+            | AtomicOP::ScD
+            | AtomicOP::AmoswapD
+            | AtomicOP::AmoaddD
+            | AtomicOP::AmoxorD
+            | AtomicOP::AmoandD
+            | AtomicOP::AmoorD
+            | AtomicOP::AmominD
+            | AtomicOP::AmomaxD
+            | AtomicOP::AmominuD
+            | AtomicOP::AmomaxuD => 0b011,
+        }
+    }
+    pub(crate) fn funct5(self) -> u32 {
+        match self {
+            AtomicOP::LrW => 0b00010,
+            AtomicOP::ScW => 0b00011,
+            AtomicOP::AmoswapW => 0b00001,
+            AtomicOP::AmoaddW => 0b00000,
+            AtomicOP::AmoxorW => 0b00100,
+            AtomicOP::AmoandW => 0b01100,
+            AtomicOP::AmoorW => 0b01000,
+            AtomicOP::AmominW => 0b10000,
+            AtomicOP::AmomaxW => 0b10100,
+            AtomicOP::AmominuW => 0b11000,
+            AtomicOP::AmomaxuW => 0b11100,
+            AtomicOP::LrD => 0b00010,
+            AtomicOP::ScD => 0b00011,
+            AtomicOP::AmoswapD => 0b00001,
+            AtomicOP::AmoaddD => 0b00000,
+            AtomicOP::AmoxorD => 0b00100,
+            AtomicOP::AmoandD => 0b01100,
+            AtomicOP::AmoorD => 0b01000,
+            AtomicOP::AmominD => 0b10000,
+            AtomicOP::AmomaxD => 0b10100,
+            AtomicOP::AmominuD => 0b11000,
+            AtomicOP::AmomaxuD => 0b11100,
+        }
+    }
+
+    pub(crate) fn load_op(t: Type) -> Self {
+        if t.bits() <= 32 {
+            Self::LrW
+        } else {
+            Self::LrD
+        }
+    }
+    pub(crate) fn store_op(t: Type) -> Self {
+        if t.bits() <= 32 {
+            Self::ScW
+        } else {
+            Self::ScD
+        }
+    }
+
+    /// extract
+    pub(crate) fn extract(rd: WritableReg, offset: Reg, rs: Reg, ty: Type) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Srl,
+            rd: rd,
+            rs1: rs,
+            rs2: offset,
+        });
+        //
+        insts.push(Inst::Extend {
+            rd: rd,
+            rn: rd.to_reg(),
+            signed: false,
+            from_bits: ty.bits() as u8,
+            to_bits: 64,
+        });
+        insts
+    }
+
+    /// like extract but sign extend the value.
+    /// suitable for smax,etc.
+    pub(crate) fn extract_sext(
+        rd: WritableReg,
+        offset: Reg,
+        rs: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Srl,
+            rd: rd,
+            rs1: rs,
+            rs2: offset,
+        });
+        //
+        insts.push(Inst::Extend {
+            rd: rd,
+            rn: rd.to_reg(),
+            signed: true,
+            from_bits: ty.bits() as u8,
+            to_bits: 64,
+        });
+        insts
+    }
+
+    pub(crate) fn unset(
+        rd: WritableReg,
+        tmp: WritableReg,
+        offset: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        assert!(rd != tmp);
+        let mut insts = SmallInstVec::new();
+        insts.extend(Inst::load_int_mask(tmp, ty));
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Sll,
+            rd: tmp,
+            rs1: tmp.to_reg(),
+            rs2: offset,
+        });
+        insts.push(Inst::construct_bit_not(tmp, tmp.to_reg()));
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::And,
+            rd: rd,
+            rs1: rd.to_reg(),
+            rs2: tmp.to_reg(),
+        });
+        insts
+    }
+
+    pub(crate) fn set(
+        rd: WritableReg,
+        tmp: WritableReg,
+        offset: Reg,
+        rs: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        assert!(rd != tmp);
+        let mut insts = SmallInstVec::new();
+        // make rs into tmp.
+        insts.push(Inst::Extend {
+            rd: tmp,
+            rn: rs,
+            signed: false,
+            from_bits: ty.bits() as u8,
+            to_bits: 64,
+        });
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Sll,
+            rd: tmp,
+            rs1: tmp.to_reg(),
+            rs2: offset,
+        });
+        insts.push(Inst::AluRRR {
+            alu_op: AluOPRRR::Or,
+            rd: rd,
+            rs1: rd.to_reg(),
+            rs2: tmp.to_reg(),
+        });
+        insts
+    }
+
+    /// Merge reset part of rs into rd.
+    /// Call this function must make sure that other part of value is already in rd.
+    pub(crate) fn merge(
+        rd: WritableReg,
+        tmp: WritableReg,
+        offset: Reg,
+        rs: Reg,
+        ty: Type,
+    ) -> SmallInstVec<Inst> {
+        let mut insts = Self::unset(rd, tmp, offset, ty);
+        insts.extend(Self::set(rd, tmp, offset, rs, ty));
+        insts
+    }
+}
+
+///Atomic Memory ordering.
+#[derive(Copy, Clone, Debug)]
+pub enum AMO {
+    Relax = 0b00,
+    Release = 0b01,
+    Aquire = 0b10,
+    SeqCst = 0b11,
+}
+
+impl AMO {
+    pub(crate) fn to_static_str(self) -> &'static str {
+        match self {
+            AMO::Relax => "",
+            AMO::Release => ".rl",
+            AMO::Aquire => ".aq",
+            AMO::SeqCst => ".aqrl",
+        }
+    }
+    pub(crate) fn as_u32(self) -> u32 {
+        self as u32
+    }
+}
+
+impl Inst {
+    /// fence request bits.
+    pub(crate) const FENCE_REQ_I: u8 = 1 << 3;
+    pub(crate) const FENCE_REQ_O: u8 = 1 << 2;
+    pub(crate) const FENCE_REQ_R: u8 = 1 << 1;
+    pub(crate) const FENCE_REQ_W: u8 = 1 << 0;
+    pub(crate) fn fence_req_to_string(x: u8) -> String {
+        let mut s = String::default();
+        if x & Self::FENCE_REQ_I != 0 {
+            s.push_str("i");
+        }
+        if x & Self::FENCE_REQ_O != 0 {
+            s.push_str("o");
+        }
+        if x & Self::FENCE_REQ_R != 0 {
+            s.push_str("r");
+        }
+        if x & Self::FENCE_REQ_W != 0 {
+            s.push_str("w");
+        }
+        s
+    }
+}
+
+pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u32) -> (f32, f32) {
+    match (signed, out_bits) {
+        (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.),
+        (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.),
+        (true, 32) => (-2147483904.0, 2147483648.0),
+        (true, 64) => (-9223373136366403584.0, 9223372036854775808.0),
+        (false, 8) => (-1., u8::max_value() as f32 + 1.),
+        (false, 16) => (-1., u16::max_value() as f32 + 1.),
+        (false, 32) => (-1., 4294967296.0),
+        (false, 64) => (-1., 18446744073709551616.0),
+        _ => unreachable!(),
+    }
+}
+
+pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u32) -> (f64, f64) {
+    match (signed, out_bits) {
+        (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.),
+        (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.),
+        (true, 32) => (-2147483649.0, 2147483648.0),
+        (true, 64) => (-9223372036854777856.0, 9223372036854775808.0),
+        (false, 8) => (-1., u8::max_value() as f64 + 1.),
+        (false, 16) => (-1., u16::max_value() as f64 + 1.),
+        (false, 32) => (-1., 4294967296.0),
+        (false, 64) => (-1., 18446744073709551616.0),
+        _ => unreachable!(),
+    }
+}
+
+impl CsrRegOP {
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            CsrRegOP::CsrRW => 0b001,
+            CsrRegOP::CsrRS => 0b010,
+            CsrRegOP::CsrRC => 0b011,
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        0b1110011
+    }
+
+    pub(crate) fn name(self) -> &'static str {
+        match self {
+            CsrRegOP::CsrRW => "csrrw",
+            CsrRegOP::CsrRS => "csrrs",
+            CsrRegOP::CsrRC => "csrrc",
+        }
+    }
+}
+
+impl Display for CsrRegOP {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.name())
+    }
+}
+
+impl CsrImmOP {
+    pub(crate) fn funct3(self) -> u32 {
+        match self {
+            CsrImmOP::CsrRWI => 0b101,
+            CsrImmOP::CsrRSI => 0b110,
+            CsrImmOP::CsrRCI => 0b111,
+        }
+    }
+
+    pub(crate) fn opcode(self) -> u32 {
+        0b1110011
+    }
+
+    pub(crate) fn name(self) -> &'static str {
+        match self {
+            CsrImmOP::CsrRWI => "csrrwi",
+            CsrImmOP::CsrRSI => "csrrsi",
+            CsrImmOP::CsrRCI => "csrrci",
+        }
+    }
+}
+
+impl Display for CsrImmOP {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.name())
+    }
+}
+
+impl CSR {
+    pub(crate) fn bits(self) -> Imm12 {
+        Imm12::from_i16(match self {
+            CSR::Frm => 0x0002,
+        })
+    }
+
+    pub(crate) fn name(self) -> &'static str {
+        match self {
+            CSR::Frm => "frm",
+        }
+    }
+}
+
+impl Display for CSR {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.name())
+    }
+}
+
+impl COpcodeSpace {
+    pub fn bits(&self) -> u32 {
+        match self {
+            COpcodeSpace::C0 => 0b00,
+            COpcodeSpace::C1 => 0b01,
+            COpcodeSpace::C2 => 0b10,
+        }
+    }
+}
+
+impl CrOp {
+    pub fn funct4(&self) -> u32 {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            // `c.jr` has the same op/funct4 as C.MV, but RS2 is 0, which is illegal for mv.
+            CrOp::CMv | CrOp::CJr => 0b1000,
+            CrOp::CAdd | CrOp::CJalr | CrOp::CEbreak => 0b1001,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CrOp::CMv | CrOp::CAdd | CrOp::CJr | CrOp::CJalr | CrOp::CEbreak => COpcodeSpace::C2,
+        }
+    }
+}
+
+impl CaOp {
+    pub fn funct2(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CaOp::CAnd => 0b11,
+            CaOp::COr => 0b10,
+            CaOp::CXor => 0b01,
+            CaOp::CSub => 0b00,
+            CaOp::CAddw => 0b01,
+            CaOp::CSubw => 0b00,
+            CaOp::CMul => 0b10,
+        }
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CaOp::CAnd | CaOp::COr | CaOp::CXor | CaOp::CSub => 0b100_011,
+            CaOp::CSubw | CaOp::CAddw | CaOp::CMul => 0b100_111,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CaOp::CAnd
+            | CaOp::COr
+            | CaOp::CXor
+            | CaOp::CSub
+            | CaOp::CAddw
+            | CaOp::CSubw
+            | CaOp::CMul => COpcodeSpace::C1,
+        }
+    }
+}
+
+impl CjOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CjOp::CJ => 0b101,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CjOp::CJ => COpcodeSpace::C1,
+        }
+    }
+}
+
+impl CiOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CiOp::CAddi | CiOp::CSlli => 0b000,
+            CiOp::CAddiw | CiOp::CFldsp => 0b001,
+            CiOp::CLi | CiOp::CLwsp => 0b010,
+            CiOp::CAddi16sp | CiOp::CLui | CiOp::CLdsp => 0b011,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CiOp::CAddi | CiOp::CAddiw | CiOp::CAddi16sp | CiOp::CLi | CiOp::CLui => {
+                COpcodeSpace::C1
+            }
+            CiOp::CSlli | CiOp::CLwsp | CiOp::CLdsp | CiOp::CFldsp => COpcodeSpace::C2,
+        }
+    }
+}
+
+impl CiwOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CiwOp::CAddi4spn => 0b000,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CiwOp::CAddi4spn => COpcodeSpace::C0,
+        }
+    }
+}
+
+impl CbOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CbOp::CSrli | CbOp::CSrai | CbOp::CAndi => 0b100,
+        }
+    }
+
+    pub fn funct2(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CbOp::CSrli => 0b00,
+            CbOp::CSrai => 0b01,
+            CbOp::CAndi => 0b10,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CbOp::CSrli | CbOp::CSrai | CbOp::CAndi => COpcodeSpace::C1,
+        }
+    }
+}
+
+impl CssOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CssOp::CFsdsp => 0b101,
+            CssOp::CSwsp => 0b110,
+            CssOp::CSdsp => 0b111,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CssOp::CSwsp | CssOp::CSdsp | CssOp::CFsdsp => COpcodeSpace::C2,
+        }
+    }
+}
+
+impl CsOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CsOp::CFsd => 0b101,
+            CsOp::CSw => 0b110,
+            CsOp::CSd => 0b111,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CsOp::CSw | CsOp::CSd | CsOp::CFsd => COpcodeSpace::C0,
+        }
+    }
+}
+
+impl ClOp {
+    pub fn funct3(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            ClOp::CFld => 0b001,
+            ClOp::CLw => 0b010,
+            ClOp::CLd => 0b011,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            ClOp::CLw | ClOp::CLd | ClOp::CFld => COpcodeSpace::C0,
+        }
+    }
+}
+
+impl CsznOp {
+    pub fn funct6(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CsznOp::CNot
+            | CsznOp::CZextw
+            | CsznOp::CZextb
+            | CsznOp::CZexth
+            | CsznOp::CSextb
+            | CsznOp::CSexth => 0b100_111,
+        }
+    }
+
+    pub fn funct5(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            CsznOp::CNot => 0b11_101,
+            CsznOp::CZextb => 0b11_000,
+            CsznOp::CZexth => 0b11_010,
+            CsznOp::CZextw => 0b11_100,
+            CsznOp::CSextb => 0b11_001,
+            CsznOp::CSexth => 0b11_011,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            CsznOp::CNot
+            | CsznOp::CZextb
+            | CsznOp::CZexth
+            | CsznOp::CZextw
+            | CsznOp::CSextb
+            | CsznOp::CSexth => COpcodeSpace::C1,
+        }
+    }
+}
+
+impl ZcbMemOp {
+    pub fn funct6(&self) -> u32 {
+        // https://github.com/michaeljclark/riscv-meta/blob/master/opcodes
+        match self {
+            ZcbMemOp::CLbu => 0b100_000,
+            // These two opcodes are differentiated in the imm field of the instruction.
+            ZcbMemOp::CLhu | ZcbMemOp::CLh => 0b100_001,
+            ZcbMemOp::CSb => 0b100_010,
+            ZcbMemOp::CSh => 0b100_011,
+        }
+    }
+
+    pub fn imm_bits(&self) -> u8 {
+        match self {
+            ZcbMemOp::CLhu | ZcbMemOp::CLh | ZcbMemOp::CSh => 1,
+            ZcbMemOp::CLbu | ZcbMemOp::CSb => 2,
+        }
+    }
+
+    pub fn op(&self) -> COpcodeSpace {
+        // https://five-embeddev.com/riscv-isa-manual/latest/rvc-opcode-map.html#rvcopcodemap
+        match self {
+            ZcbMemOp::CLbu | ZcbMemOp::CLhu | ZcbMemOp::CLh | ZcbMemOp::CSb | ZcbMemOp::CSh => {
+                COpcodeSpace::C0
+            }
+        }
+    }
+}
diff --git a/hbcb/src/inst/emit.rs b/hbcb/src/inst/emit.rs
new file mode 100644
index 00000000..96e21a14
--- /dev/null
+++ b/hbcb/src/inst/emit.rs
@@ -0,0 +1,2685 @@
+//! Riscv64 ISA: binary code emission.
+
+use crate::ir::{self, LibCall, TrapCode};
+use crate::inst::*;
+use crate::lower::isle::generated_code::{
+    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
+};
+use cranelift_control::ControlPlane;
+
+pub struct EmitInfo {
+    shared_flag: settings::Flags,
+    isa_flags: super::super::riscv_settings::Flags,
+}
+
+impl EmitInfo {
+    pub(crate) fn new(
+        shared_flag: settings::Flags,
+        isa_flags: super::super::riscv_settings::Flags,
+    ) -> Self {
+        Self {
+            shared_flag,
+            isa_flags,
+        }
+    }
+}
+
+pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
+    u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
+}
+
+pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
+    let real_reg = m.to_real_reg().unwrap().hw_enc();
+    debug_assert!(real_reg >= 8 && real_reg < 16);
+    let compressed_reg = real_reg - 8;
+    u32::try_from(compressed_reg).unwrap()
+}
+
+#[derive(Clone, Debug, PartialEq, Default)]
+pub enum EmitVState {
+    #[default]
+    Unknown,
+    Known(VState),
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+    /// The user stack map for the upcoming instruction, as provided to
+    /// `pre_safepoint()`.
+    user_stack_map: Option<ir::UserStackMap>,
+
+    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
+    /// optimized away at compiletime. See [cranelift_control].
+    ctrl_plane: ControlPlane,
+
+    /// Vector State
+    /// Controls the current state of the vector unit at the emission point.
+    vstate: EmitVState,
+
+    frame_layout: FrameLayout,
+}
+
+impl EmitState {
+    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
+        self.user_stack_map.take()
+    }
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+    fn new(
+        abi: &Callee<crate::abi::Riscv64MachineDeps>,
+        ctrl_plane: ControlPlane,
+    ) -> Self {
+        EmitState {
+            user_stack_map: None,
+            ctrl_plane,
+            vstate: EmitVState::Unknown,
+            frame_layout: abi.frame_layout().clone(),
+        }
+    }
+
+    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
+        self.user_stack_map = user_stack_map;
+    }
+
+    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
+        &mut self.ctrl_plane
+    }
+
+    fn take_ctrl_plane(self) -> ControlPlane {
+        self.ctrl_plane
+    }
+
+    fn on_new_block(&mut self) {
+        // Reset the vector state.
+        self.vstate = EmitVState::Unknown;
+    }
+
+    fn frame_layout(&self) -> &FrameLayout {
+        &self.frame_layout
+    }
+}
+
+impl Inst {
+    /// Load int mask.
+    /// If ty is int then 0xff in rd.
+    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
+        let mut insts = SmallInstVec::new();
+        assert!(ty.is_int() && ty.bits() <= 64);
+        match ty {
+            I64 => {
+                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
+            }
+            I32 | I16 => {
+                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
+                insts.push(Inst::Extend {
+                    rd: rd,
+                    rn: rd.to_reg(),
+                    signed: false,
+                    from_bits: ty.bits() as u8,
+                    to_bits: 64,
+                });
+            }
+            I8 => {
+                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
+            }
+            _ => unreachable!("ty:{:?}", ty),
+        }
+        insts
+    }
+    ///  inverse all bit
+    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Xori,
+            rd,
+            rs,
+            imm12: Imm12::from_i16(-1),
+        }
+    }
+
+    /// Returns Some(VState) if this instruction is expecting a specific vector state
+    /// before emission.
+    fn expected_vstate(&self) -> Option<&VState> {
+        match self {
+            Inst::Nop0
+            | Inst::Nop4
+            | Inst::BrTable { .. }
+            | Inst::Auipc { .. }
+            | Inst::Fli { .. }
+            | Inst::Lui { .. }
+            | Inst::LoadInlineConst { .. }
+            | Inst::AluRRR { .. }
+            | Inst::FpuRRR { .. }
+            | Inst::AluRRImm12 { .. }
+            | Inst::CsrReg { .. }
+            | Inst::CsrImm { .. }
+            | Inst::Load { .. }
+            | Inst::Store { .. }
+            | Inst::Args { .. }
+            | Inst::Rets { .. }
+            | Inst::Ret { .. }
+            | Inst::Extend { .. }
+            | Inst::Call { .. }
+            | Inst::CallInd { .. }
+            | Inst::ReturnCall { .. }
+            | Inst::ReturnCallInd { .. }
+            | Inst::Jal { .. }
+            | Inst::CondBr { .. }
+            | Inst::LoadExtName { .. }
+            | Inst::ElfTlsGetAddr { .. }
+            | Inst::LoadAddr { .. }
+            | Inst::Mov { .. }
+            | Inst::MovFromPReg { .. }
+            | Inst::Fence { .. }
+            | Inst::EBreak
+            | Inst::Udf { .. }
+            | Inst::FpuRR { .. }
+            | Inst::FpuRRRR { .. }
+            | Inst::Jalr { .. }
+            | Inst::Atomic { .. }
+            | Inst::Select { .. }
+            | Inst::AtomicCas { .. }
+            | Inst::RawData { .. }
+            | Inst::AtomicStore { .. }
+            | Inst::AtomicLoad { .. }
+            | Inst::AtomicRmwLoop { .. }
+            | Inst::TrapIf { .. }
+            | Inst::Unwind { .. }
+            | Inst::DummyUse { .. }
+            | Inst::Popcnt { .. }
+            | Inst::Cltz { .. }
+            | Inst::Brev8 { .. }
+            | Inst::StackProbeLoop { .. } => None,
+
+            // VecSetState does not expect any vstate, rather it updates it.
+            Inst::VecSetState { .. } => None,
+
+            // `vmv` instructions copy a set of registers and ignore vstate.
+            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
+
+            Inst::VecAluRR { vstate, .. } |
+            Inst::VecAluRRR { vstate, .. } |
+            Inst::VecAluRRRR { vstate, .. } |
+            Inst::VecAluRImm5 { vstate, .. } |
+            Inst::VecAluRRImm5 { vstate, .. } |
+            Inst::VecAluRRRImm5 { vstate, .. } |
+            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
+            // the full vtype. A future optimization could be to decouple these two when
+            // updating vstate. This would allow us to avoid emitting a VecSetState in
+            // some cases.
+            Inst::VecLoad { vstate, .. }
+            | Inst::VecStore { vstate, .. } => Some(vstate),
+        }
+    }
+}
+
+impl MachInstEmit for Inst {
+    type State = EmitState;
+    type Info = EmitInfo;
+
+    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+        // Check if we need to update the vector state before emitting this instruction
+        if let Some(expected) = self.expected_vstate() {
+            if state.vstate != EmitVState::Known(*expected) {
+                // Update the vector state.
+                Inst::VecSetState {
+                    rd: writable_zero_reg(),
+                    vstate: *expected,
+                }
+                .emit(sink, emit_info, state);
+            }
+        }
+
+        // N.B.: we *must* not exceed the "worst-case size" used to compute
+        // where to insert islands, except when islands are explicitly triggered
+        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
+        // to allow disabling the check for `JTSequence`, which is always
+        // emitted following an `EmitIsland`.
+        let mut start_off = sink.cur_offset();
+
+        // First try to emit this as a compressed instruction
+        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
+        if res.is_none() {
+            // If we can't lets emit it as a normal instruction
+            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
+        }
+
+        // We exclude br_table and return call from these checks since they emit
+        // their own islands, and thus are allowed to exceed the worst case size.
+        if !matches!(
+            self,
+            Inst::BrTable { .. } | Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. }
+        ) {
+            let end_off = sink.cur_offset();
+            assert!(
+                (end_off - start_off) <= Inst::worst_case_size(),
+                "Inst:{:?} length:{} worst_case_size:{}",
+                self,
+                end_off - start_off,
+                Inst::worst_case_size()
+            );
+        }
+    }
+
+    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
+        self.print_with_state(state)
+    }
+}
+
+impl Inst {
+    /// Tries to emit an instruction as compressed, if we can't return false.
+    fn try_emit_compressed(
+        &self,
+        sink: &mut MachBuffer<Inst>,
+        emit_info: &EmitInfo,
+        state: &mut EmitState,
+        start_off: &mut u32,
+    ) -> Option<()> {
+        let has_m = emit_info.isa_flags.has_m();
+        let has_zba = emit_info.isa_flags.has_zba();
+        let has_zbb = emit_info.isa_flags.has_zbb();
+        let has_zca = emit_info.isa_flags.has_zca();
+        let has_zcb = emit_info.isa_flags.has_zcb();
+        let has_zcd = emit_info.isa_flags.has_zcd();
+
+        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
+        // to be enabled, so check it early.
+        if !has_zca {
+            return None;
+        }
+
+        fn reg_is_compressible(r: Reg) -> bool {
+            r.to_real_reg()
+                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
+                .unwrap_or(false)
+        }
+
+        match *self {
+            // C.ADD
+            Inst::AluRRR {
+                alu_op: AluOPRRR::Add,
+                rd,
+                rs1,
+                rs2,
+            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
+                && rs1 != zero_reg()
+                && rs2 != zero_reg() =>
+            {
+                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
+                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
+                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
+                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
+
+                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
+            }
+
+            // C.MV
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() != rs
+                && rd.to_reg() != zero_reg()
+                && rs != zero_reg()
+                && imm12.as_i16() == 0 =>
+            {
+                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
+            }
+
+            // CA Ops
+            Inst::AluRRR {
+                alu_op:
+                    alu_op @ (AluOPRRR::And
+                    | AluOPRRR::Or
+                    | AluOPRRR::Xor
+                    | AluOPRRR::Addw
+                    | AluOPRRR::Mul),
+                rd,
+                rs1,
+                rs2,
+            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
+                && reg_is_compressible(rs1)
+                && reg_is_compressible(rs2) =>
+            {
+                let op = match alu_op {
+                    AluOPRRR::And => CaOp::CAnd,
+                    AluOPRRR::Or => CaOp::COr,
+                    AluOPRRR::Xor => CaOp::CXor,
+                    AluOPRRR::Addw => CaOp::CAddw,
+                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
+                    _ => return None,
+                };
+                // The canonical expansion for these instruction has `rd == rs1`, but
+                // these are all commutative operations, so we can swap the operands.
+                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
+
+                sink.put2(encode_ca_type(op, rd, src));
+            }
+
+            // The sub instructions are non commutative, so we can't swap the operands.
+            Inst::AluRRR {
+                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
+                rd,
+                rs1,
+                rs2,
+            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
+                let op = match alu_op {
+                    AluOPRRR::Sub => CaOp::CSub,
+                    AluOPRRR::Subw => CaOp::CSubw,
+                    _ => return None,
+                };
+                sink.put2(encode_ca_type(op, rd, rs2));
+            }
+
+            // c.j
+            //
+            // We don't have a separate JAL as that is only available in RV32C
+            Inst::Jal { label } => {
+                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
+                sink.add_uncond_branch(*start_off, *start_off + 2, label);
+                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
+            }
+
+            // c.jr
+            Inst::Jalr { rd, base, offset }
+                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
+            {
+                sink.put2(encode_cr2_type(CrOp::CJr, base));
+            }
+
+            // c.jalr
+            Inst::Jalr { rd, base, offset }
+                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
+            {
+                sink.put2(encode_cr2_type(CrOp::CJalr, base));
+            }
+
+            // c.ebreak
+            Inst::EBreak => {
+                sink.put2(encode_cr_type(
+                    CrOp::CEbreak,
+                    writable_zero_reg(),
+                    zero_reg(),
+                ));
+            }
+
+            // c.unimp
+            Inst::Udf { trap_code } => {
+                sink.add_trap(trap_code);
+                sink.put2(0x0000);
+            }
+            // c.addi16sp
+            //
+            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
+            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
+            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
+            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
+            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() == rs
+                && rs == stack_reg()
+                && imm12.as_i16() != 0
+                && (imm12.as_i16() % 16) == 0
+                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
+            {
+                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
+                sink.put2(encode_c_addi16sp(imm6));
+            }
+
+            // c.addi4spn
+            //
+            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
+            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
+            // rd. This instruction is used to generate pointers to stack-allocated variables
+            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
+            // the code points with nzuimm=0 are reserved.
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd,
+                rs,
+                imm12,
+            } if reg_is_compressible(rd.to_reg())
+                && rs == stack_reg()
+                && imm12.as_i16() != 0
+                && (imm12.as_i16() % 4) == 0
+                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
+            {
+                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
+                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
+            }
+
+            // c.li
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
+                let imm6 = Imm6::maybe_from_imm12(imm12)?;
+                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
+            }
+
+            // c.addi
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addi,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
+                let imm6 = Imm6::maybe_from_imm12(imm12)?;
+                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
+            }
+
+            // c.addiw
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Addiw,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() == rs && rs != zero_reg() => {
+                let imm6 = Imm6::maybe_from_imm12(imm12)?;
+                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
+            }
+
+            // c.lui
+            //
+            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
+            // of the destination register, clears the bottom 12 bits, and
+            // sign-extends bit 17 into all higher bits of the destination.
+            Inst::Lui { rd, imm: imm20 }
+                if rd.to_reg() != zero_reg()
+                    && rd.to_reg() != stack_reg()
+                    && imm20.as_i32() != 0 =>
+            {
+                // Check that the top bits are sign extended
+                let imm = imm20.as_i32() << 14 >> 14;
+                if imm != imm20.as_i32() {
+                    return None;
+                }
+                let imm6 = Imm6::maybe_from_i32(imm)?;
+                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
+            }
+
+            // c.slli
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Slli,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
+                // The shift amount is unsigned, but we encode it as signed.
+                let shift = imm12.as_i16() & 0x3f;
+                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
+                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
+            }
+
+            // c.srli / c.srai
+            Inst::AluRRImm12 {
+                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
+                let op = match op {
+                    AluOPRRI::Srli => CbOp::CSrli,
+                    AluOPRRI::Srai => CbOp::CSrai,
+                    _ => unreachable!(),
+                };
+
+                // The shift amount is unsigned, but we encode it as signed.
+                let shift = imm12.as_i16() & 0x3f;
+                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
+                sink.put2(encode_cb_type(op, rd, imm6));
+            }
+
+            // c.zextb
+            //
+            // This is an alias for `andi rd, rd, 0xff`
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Andi,
+                rd,
+                rs,
+                imm12,
+            } if has_zcb
+                && rd.to_reg() == rs
+                && reg_is_compressible(rs)
+                && imm12.as_i16() == 0xff =>
+            {
+                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
+            }
+
+            // c.andi
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Andi,
+                rd,
+                rs,
+                imm12,
+            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
+                let imm6 = Imm6::maybe_from_imm12(imm12)?;
+                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
+            }
+
+            // Stack Based Loads
+            Inst::Load {
+                rd,
+                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
+                from,
+                flags,
+            } if from.get_base_register() == Some(stack_reg())
+                && (from.get_offset_with_state(state) % op.size()) == 0 =>
+            {
+                // We encode the offset in multiples of the load size.
+                let offset = from.get_offset_with_state(state);
+                let imm6 = u8::try_from(offset / op.size())
+                    .ok()
+                    .and_then(Uimm6::maybe_from_u8)?;
+
+                // Some additional constraints on these instructions.
+                //
+                // Integer loads are not allowed to target x0, but floating point loads
+                // are, since f0 is not a special register.
+                //
+                // Floating point loads are not included in the base Zca extension
+                // but in a separate Zcd extension. Both of these are part of the C Extension.
+                let rd_is_zero = rd.to_reg() == zero_reg();
+                let op = match op {
+                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
+                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
+                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
+                    _ => return None,
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+                sink.put2(encode_ci_sp_load(op, rd, imm6));
+            }
+
+            // Regular Loads
+            Inst::Load {
+                rd,
+                op:
+                    op
+                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
+                from,
+                flags,
+            } if reg_is_compressible(rd.to_reg())
+                && from
+                    .get_base_register()
+                    .map(reg_is_compressible)
+                    .unwrap_or(false)
+                && (from.get_offset_with_state(state) % op.size()) == 0 =>
+            {
+                let base = from.get_base_register().unwrap();
+
+                // We encode the offset in multiples of the store size.
+                let offset = from.get_offset_with_state(state);
+                let offset = u8::try_from(offset / op.size()).ok()?;
+
+                // We mix two different formats here.
+                //
+                // c.lw / c.ld / c.fld instructions are available in the standard Zca
+                // extension using the CL format.
+                //
+                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
+                // are also encoded differently. Technically they each have a different
+                // format, but they are similar enough that we can group them.
+                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
+                let encoded = if is_zcb_load {
+                    if !has_zcb {
+                        return None;
+                    }
+
+                    let op = match op {
+                        LoadOP::Lbu => ZcbMemOp::CLbu,
+                        LoadOP::Lhu => ZcbMemOp::CLhu,
+                        LoadOP::Lh => ZcbMemOp::CLh,
+                        _ => unreachable!(),
+                    };
+
+                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
+                    // and loads only have 1 bit.
+                    let imm2 = Uimm2::maybe_from_u8(offset)?;
+                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
+                        return None;
+                    }
+
+                    encode_zcbmem_load(op, rd, base, imm2)
+                } else {
+                    // Floating point loads are not included in the base Zca extension
+                    // but in a separate Zcd extension. Both of these are part of the C Extension.
+                    let op = match op {
+                        LoadOP::Lw => ClOp::CLw,
+                        LoadOP::Ld => ClOp::CLd,
+                        LoadOP::Fld if has_zcd => ClOp::CFld,
+                        _ => return None,
+                    };
+                    let imm5 = Uimm5::maybe_from_u8(offset)?;
+
+                    encode_cl_type(op, rd, base, imm5)
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+                sink.put2(encoded);
+            }
+
+            // Stack Based Stores
+            Inst::Store {
+                src,
+                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
+                to,
+                flags,
+            } if to.get_base_register() == Some(stack_reg())
+                && (to.get_offset_with_state(state) % op.size()) == 0 =>
+            {
+                // We encode the offset in multiples of the store size.
+                let offset = to.get_offset_with_state(state);
+                let imm6 = u8::try_from(offset / op.size())
+                    .ok()
+                    .and_then(Uimm6::maybe_from_u8)?;
+
+                // Floating point stores are not included in the base Zca extension
+                // but in a separate Zcd extension. Both of these are part of the C Extension.
+                let op = match op {
+                    StoreOP::Sw => CssOp::CSwsp,
+                    StoreOP::Sd => CssOp::CSdsp,
+                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
+                    _ => return None,
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+                sink.put2(encode_css_type(op, src, imm6));
+            }
+
+            // Regular Stores
+            Inst::Store {
+                src,
+                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
+                to,
+                flags,
+            } if reg_is_compressible(src)
+                && to
+                    .get_base_register()
+                    .map(reg_is_compressible)
+                    .unwrap_or(false)
+                && (to.get_offset_with_state(state) % op.size()) == 0 =>
+            {
+                let base = to.get_base_register().unwrap();
+
+                // We encode the offset in multiples of the store size.
+                let offset = to.get_offset_with_state(state);
+                let offset = u8::try_from(offset / op.size()).ok()?;
+
+                // We mix two different formats here.
+                //
+                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
+                // extension using the CL format.
+                //
+                // c.sb / c.sh are only available in the Zcb extension and are also
+                // encoded differently.
+                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
+                let encoded = if is_zcb_store {
+                    if !has_zcb {
+                        return None;
+                    }
+
+                    let op = match op {
+                        StoreOP::Sh => ZcbMemOp::CSh,
+                        StoreOP::Sb => ZcbMemOp::CSb,
+                        _ => unreachable!(),
+                    };
+
+                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
+                    // and loads only have 1 bit.
+                    let imm2 = Uimm2::maybe_from_u8(offset)?;
+                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
+                        return None;
+                    }
+
+                    encode_zcbmem_store(op, src, base, imm2)
+                } else {
+                    // Floating point stores are not included in the base Zca extension
+                    // but in a separate Zcd extension. Both of these are part of the C Extension.
+                    let op = match op {
+                        StoreOP::Sw => CsOp::CSw,
+                        StoreOP::Sd => CsOp::CSd,
+                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
+                        _ => return None,
+                    };
+                    let imm5 = Uimm5::maybe_from_u8(offset)?;
+
+                    encode_cs_type(op, src, base, imm5)
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+                sink.put2(encoded);
+            }
+
+            // c.not
+            //
+            // This is an alias for `xori rd, rd, -1`
+            Inst::AluRRImm12 {
+                alu_op: AluOPRRI::Xori,
+                rd,
+                rs,
+                imm12,
+            } if has_zcb
+                && rd.to_reg() == rs
+                && reg_is_compressible(rs)
+                && imm12.as_i16() == -1 =>
+            {
+                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
+            }
+
+            // c.sext.b / c.sext.h / c.zext.h
+            //
+            // These are all the extend instructions present in `Zcb`, they
+            // also require `Zbb` since they aren't available in the base ISA.
+            Inst::AluRRImm12 {
+                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
+                rd,
+                rs,
+                imm12,
+            } if has_zcb
+                && has_zbb
+                && rd.to_reg() == rs
+                && reg_is_compressible(rs)
+                && imm12.as_i16() == 0 =>
+            {
+                let op = match alu_op {
+                    AluOPRRI::Sextb => CsznOp::CSextb,
+                    AluOPRRI::Sexth => CsznOp::CSexth,
+                    AluOPRRI::Zexth => CsznOp::CZexth,
+                    _ => unreachable!(),
+                };
+                sink.put2(encode_cszn_type(op, rd));
+            }
+
+            // c.zext.w
+            //
+            // This is an alias for `add.uw rd, rd, zero`
+            Inst::AluRRR {
+                alu_op: AluOPRRR::Adduw,
+                rd,
+                rs1,
+                rs2,
+            } if has_zcb
+                && has_zba
+                && rd.to_reg() == rs1
+                && reg_is_compressible(rs1)
+                && rs2 == zero_reg() =>
+            {
+                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
+            }
+
+            _ => return None,
+        }
+
+        return Some(());
+    }
+
+    fn emit_uncompressed(
+        &self,
+        sink: &mut MachBuffer<Inst>,
+        emit_info: &EmitInfo,
+        state: &mut EmitState,
+        start_off: &mut u32,
+    ) {
+        match self {
+            &Inst::Nop0 => {
+                // do nothing
+            }
+            // Addi x0, x0, 0
+            &Inst::Nop4 => {
+                let x = Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Addi,
+                    rd: Writable::from_reg(zero_reg()),
+                    rs: zero_reg(),
+                    imm12: Imm12::ZERO,
+                };
+                x.emit(sink, emit_info, state)
+            }
+            &Inst::RawData { ref data } => {
+                // Right now we only put a u32 or u64 in this instruction.
+                // It is not very long, no need to check if need `emit_island`.
+                // If data is very long , this is a bug because RawData is typically
+                // use to load some data and rely on some position in the code stream.
+                // and we may exceed `Inst::worst_case_size`.
+                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
+                sink.put_data(&data[..]);
+            }
+            &Inst::Lui { rd, ref imm } => {
+                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
+                sink.put4(x);
+            }
+            &Inst::Fli { rd, ty, imm } => {
+                sink.put4(encode_fli(ty, imm, rd));
+            }
+            &Inst::LoadInlineConst { rd, ty, imm } => {
+                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
+
+                let label_data: MachLabel = sink.get_label();
+                let label_end: MachLabel = sink.get_label();
+
+                // Load into rd
+                Inst::Load {
+                    rd,
+                    op: LoadOP::from_type(ty),
+                    flags: MemFlags::new(),
+                    from: AMode::Label(label_data),
+                }
+                .emit(sink, emit_info, state);
+
+                // Jump over the inline pool
+                Inst::gen_jump(label_end).emit(sink, emit_info, state);
+
+                // Emit the inline data
+                sink.bind_label(label_data, &mut state.ctrl_plane);
+                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
+
+                sink.bind_label(label_end, &mut state.ctrl_plane);
+            }
+            &Inst::FpuRR {
+                alu_op,
+                width,
+                frm,
+                rd,
+                rs,
+            } => {
+                if alu_op.is_convert_to_int() {
+                    sink.add_trap(TrapCode::BadConversionToInteger);
+                }
+                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
+            }
+            &Inst::FpuRRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+                rs3,
+                frm,
+                width,
+            } => {
+                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
+            }
+            &Inst::FpuRRR {
+                alu_op,
+                width,
+                frm,
+                rd,
+                rs1,
+                rs2,
+            } => {
+                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
+            }
+            &Inst::Unwind { ref inst } => {
+                sink.add_unwind(inst.clone());
+            }
+            &Inst::DummyUse { .. } => {
+                // This has already been handled by Inst::allocate.
+            }
+            &Inst::AluRRR {
+                alu_op,
+                rd,
+                rs1,
+                rs2,
+            } => {
+                let (rs1, rs2) = if alu_op.reverse_rs() {
+                    (rs2, rs1)
+                } else {
+                    (rs1, rs2)
+                };
+
+                sink.put4(encode_r_type(
+                    alu_op.op_code(),
+                    rd,
+                    alu_op.funct3(),
+                    rs1,
+                    rs2,
+                    alu_op.funct7(),
+                ));
+            }
+            &Inst::AluRRImm12 {
+                alu_op,
+                rd,
+                rs,
+                imm12,
+            } => {
+                let x = alu_op.op_code()
+                    | reg_to_gpr_num(rd.to_reg()) << 7
+                    | alu_op.funct3() << 12
+                    | reg_to_gpr_num(rs) << 15
+                    | alu_op.imm12(imm12) << 20;
+                sink.put4(x);
+            }
+            &Inst::CsrReg { op, rd, rs, csr } => {
+                sink.put4(encode_csr_reg(op, rd, rs, csr));
+            }
+            &Inst::CsrImm { op, rd, csr, imm } => {
+                sink.put4(encode_csr_imm(op, rd, csr, imm));
+            }
+            &Inst::Load {
+                rd,
+                op,
+                from,
+                flags,
+            } => {
+                let base = from.get_base_register();
+                let offset = from.get_offset_with_state(state);
+                let offset_imm12 = Imm12::maybe_from_i64(offset);
+                let label = from.get_label_with_sink(sink);
+
+                let (addr, imm12) = match (base, offset_imm12, label) {
+                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
+                    (Some(base), Some(imm12), None) => (base, imm12),
+
+                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
+                    // register and load from that.
+                    (Some(_), None, None) => {
+                        let tmp = writable_spilltmp_reg();
+                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
+                        (tmp.to_reg(), Imm12::ZERO)
+                    }
+
+                    // If the AMode contains a label we can emit an internal relocation that gets
+                    // resolved with the correct address later.
+                    (None, Some(imm), Some(label)) => {
+                        debug_assert_eq!(imm.as_i16(), 0);
+
+                        // Get the current PC.
+                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
+                        Inst::Auipc {
+                            rd,
+                            imm: Imm20::ZERO,
+                        }
+                        .emit_uncompressed(sink, emit_info, state, start_off);
+
+                        // Emit a relocation for the load. This patches the offset into the instruction.
+                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
+
+                        // Imm12 here is meaningless since it's going to get replaced.
+                        (rd.to_reg(), Imm12::ZERO)
+                    }
+
+                    // These cases are impossible with the current AModes that we have. We either
+                    // always have a register, or always have a label. Never both, and never neither.
+                    (None, None, None)
+                    | (None, Some(_), None)
+                    | (Some(_), None, Some(_))
+                    | (Some(_), Some(_), Some(_))
+                    | (None, None, Some(_)) => {
+                        unreachable!("Invalid load address")
+                    }
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+
+                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
+            }
+            &Inst::Store { op, src, flags, to } => {
+                let base = to.get_base_register();
+                let offset = to.get_offset_with_state(state);
+                let offset_imm12 = Imm12::maybe_from_i64(offset);
+
+                let (addr, imm12) = match (base, offset_imm12) {
+                    // If the offset fits into an imm12 we can directly encode it.
+                    (Some(base), Some(imm12)) => (base, imm12),
+                    // Otherwise load the address it into a reg and load from it.
+                    _ => {
+                        let tmp = writable_spilltmp_reg();
+                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
+                        (tmp.to_reg(), Imm12::ZERO)
+                    }
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+
+                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
+            }
+            &Inst::Args { .. } | &Inst::Rets { .. } => {
+                // Nothing: this is a pseudoinstruction that serves
+                // only to constrain registers at a certain point.
+            }
+            &Inst::Ret {} => {
+                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
+                // `jalr x0, x1, 0` that jumps to the return address.
+                Inst::Jalr {
+                    rd: writable_zero_reg(),
+                    base: link_reg(),
+                    offset: Imm12::ZERO,
+                }
+                .emit(sink, emit_info, state);
+            }
+
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits: _to_bits,
+            } => {
+                let mut insts = SmallInstVec::new();
+                let shift_bits = (64 - from_bits) as i16;
+                let is_u8 = || from_bits == 8 && signed == false;
+                if is_u8() {
+                    // special for u8.
+                    insts.push(Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Andi,
+                        rd,
+                        rs: rn,
+                        imm12: Imm12::from_i16(255),
+                    });
+                } else {
+                    insts.push(Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Slli,
+                        rd,
+                        rs: rn,
+                        imm12: Imm12::from_i16(shift_bits),
+                    });
+                    insts.push(Inst::AluRRImm12 {
+                        alu_op: if signed {
+                            AluOPRRI::Srai
+                        } else {
+                            AluOPRRI::Srli
+                        },
+                        rd,
+                        rs: rd.to_reg(),
+                        imm12: Imm12::from_i16(shift_bits),
+                    });
+                }
+                insts
+                    .into_iter()
+                    .for_each(|i| i.emit(sink, emit_info, state));
+            }
+
+            &Inst::Call { ref info } => {
+                sink.add_call_site();
+                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
+
+                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
+                    .into_iter()
+                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
+
+                if let Some(s) = state.take_stack_map() {
+                    let offset = sink.cur_offset();
+                    sink.push_user_stack_map(state, offset, s);
+                }
+
+                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
+                if callee_pop_size > 0 {
+                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
+                        inst.emit(sink, emit_info, state);
+                    }
+                }
+            }
+            &Inst::CallInd { ref info } => {
+                Inst::Jalr {
+                    rd: writable_link_reg(),
+                    base: info.dest,
+                    offset: Imm12::ZERO,
+                }
+                .emit(sink, emit_info, state);
+
+                if let Some(s) = state.take_stack_map() {
+                    let offset = sink.cur_offset();
+                    sink.push_user_stack_map(state, offset, s);
+                }
+
+                sink.add_call_site();
+
+                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
+                if callee_pop_size > 0 {
+                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
+                        inst.emit(sink, emit_info, state);
+                    }
+                }
+            }
+
+            &Inst::ReturnCall { ref info } => {
+                emit_return_call_common_sequence(sink, emit_info, state, info);
+
+                sink.add_call_site();
+                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
+                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
+                    .into_iter()
+                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
+            }
+
+            &Inst::ReturnCallInd { ref info } => {
+                emit_return_call_common_sequence(sink, emit_info, state, &info);
+
+                Inst::Jalr {
+                    rd: writable_zero_reg(),
+                    base: info.dest,
+                    offset: Imm12::ZERO,
+                }
+                .emit(sink, emit_info, state);
+            }
+            &Inst::Jal { label } => {
+                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
+                sink.add_uncond_branch(*start_off, *start_off + 4, label);
+                sink.put4(0b1101111);
+            }
+            &Inst::CondBr {
+                taken,
+                not_taken,
+                kind,
+            } => {
+                match taken {
+                    CondBrTarget::Label(label) => {
+                        let code = kind.emit();
+                        let code_inverse = kind.inverse().emit().to_le_bytes();
+                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
+                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
+                        sink.put4(code);
+                    }
+                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
+                }
+
+                match not_taken {
+                    CondBrTarget::Label(label) => {
+                        Inst::gen_jump(label).emit(sink, emit_info, state)
+                    }
+                    CondBrTarget::Fallthrough => {}
+                };
+            }
+
+            &Inst::Mov { rd, rm, ty } => {
+                debug_assert_eq!(rd.to_reg().class(), rm.class());
+                if rd.to_reg() == rm {
+                    return;
+                }
+
+                match rm.class() {
+                    RegClass::Int => Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: rd,
+                        rs: rm,
+                        imm12: Imm12::ZERO,
+                    },
+                    RegClass::Float => Inst::FpuRRR {
+                        alu_op: FpuOPRRR::Fsgnj,
+                        width: FpuOPWidth::try_from(ty).unwrap(),
+                        frm: FRM::RNE,
+                        rd: rd,
+                        rs1: rm,
+                        rs2: rm,
+                    },
+                    RegClass::Vector => Inst::VecAluRRImm5 {
+                        op: VecAluOpRRImm5::VmvrV,
+                        vd: rd,
+                        vs2: rm,
+                        // Imm 0 means copy 1 register.
+                        imm: Imm5::maybe_from_i8(0).unwrap(),
+                        mask: VecOpMasking::Disabled,
+                        // Vstate for this instruction is ignored.
+                        vstate: VState::from_type(ty),
+                    },
+                }
+                .emit(sink, emit_info, state);
+            }
+
+            &Inst::MovFromPReg { rd, rm } => {
+                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
+            }
+
+            &Inst::BrTable {
+                index,
+                tmp1,
+                tmp2,
+                ref targets,
+            } => {
+                let ext_index = writable_spilltmp_reg();
+
+                let label_compute_target = sink.get_label();
+
+                // The default target is passed in as the 0th element of `targets`
+                // separate it here for clarity.
+                let default_target = targets[0];
+                let targets = &targets[1..];
+
+                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
+                // now if we need one.
+                //
+                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
+                // Check if we need to emit a jump table here to support that jump.
+                let inst_count = 12 + (targets.len() * 2);
+                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
+                if sink.island_needed(distance) {
+                    let jump_around_label = sink.get_label();
+                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
+                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
+                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
+                }
+
+                // We emit a bounds check on the index, if the index is larger than the number of
+                // jump table entries, we jump to the default block.  Otherwise we compute a jump
+                // offset by multiplying the index by 8 (the size of each entry) and then jump to
+                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
+                //
+                // Build the following sequence:
+                //
+                // extend_index:
+                //     zext.w  ext_index, index
+                // bounds_check:
+                //     li      tmp, n_labels
+                //     bltu    ext_index, tmp, compute_target
+                // jump_to_default_block:
+                //     auipc   pc, 0
+                //     jalr    zero, pc, default_block
+                // compute_target:
+                //     auipc   pc, 0
+                //     slli    tmp, ext_index, 3
+                //     add     pc, pc, tmp
+                //     jalr    zero, pc, 0x10
+                // jump_table:
+                //     ; This repeats for each entry in the jumptable
+                //     auipc   pc, 0
+                //     jalr    zero, pc, block_target
+
+                // Extend the index to 64 bits.
+                //
+                // This prevents us branching on the top 32 bits of the index, which
+                // are undefined.
+                Inst::Extend {
+                    rd: ext_index,
+                    rn: index,
+                    signed: false,
+                    from_bits: 32,
+                    to_bits: 64,
+                }
+                .emit(sink, emit_info, state);
+
+                // Bounds check.
+                //
+                // Check if the index passed in is larger than the number of jumptable
+                // entries that we have. If it is, we fallthrough to a jump into the
+                // default block.
+                Inst::load_constant_u32(tmp2, targets.len() as u64)
+                    .iter()
+                    .for_each(|i| i.emit(sink, emit_info, state));
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_compute_target),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::UnsignedLessThan,
+                        rs1: ext_index.to_reg(),
+                        rs2: tmp2.to_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+
+                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
+                Inst::construct_auipc_and_jalr(None, tmp2, 0)
+                    .iter()
+                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
+
+                // Compute the jump table offset.
+                // We need to emit a PC relative offset,
+                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
+
+                // Get the current PC.
+                Inst::Auipc {
+                    rd: tmp1,
+                    imm: Imm20::ZERO,
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+
+                // These instructions must be emitted as uncompressed since we
+                // are manually computing the offset from the PC.
+
+                // Multiply the index by 8, since that is the size in
+                // bytes of each jump table entry
+                Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Slli,
+                    rd: tmp2,
+                    rs: ext_index.to_reg(),
+                    imm12: Imm12::from_i16(3),
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+
+                // Calculate the base of the jump, PC + the offset from above.
+                Inst::AluRRR {
+                    alu_op: AluOPRRR::Add,
+                    rd: tmp1,
+                    rs1: tmp1.to_reg(),
+                    rs2: tmp2.to_reg(),
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+
+                // Jump to the middle of the jump table.
+                // We add a 16 byte offset here, since we used 4 instructions
+                // since the AUIPC that was used to get the PC.
+                Inst::Jalr {
+                    rd: writable_zero_reg(),
+                    base: tmp1.to_reg(),
+                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+
+                // Emit the jump table.
+                //
+                // Each entry is a auipc + jalr to the target block. We also start with a island
+                // if necessary.
+
+                // Emit the jumps back to back
+                for target in targets.iter() {
+                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
+
+                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
+                        .iter()
+                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
+                }
+
+                // We've just emitted an island that is safe up to *here*.
+                // Mark it as such so that we don't needlessly emit additional islands.
+                *start_off = sink.cur_offset();
+            }
+
+            &Inst::Atomic {
+                op,
+                rd,
+                addr,
+                src,
+                amo,
+            } => {
+                // TODO: get flags from original CLIF atomic instruction
+                let flags = MemFlags::new();
+                if let Some(trap_code) = flags.trap_code() {
+                    sink.add_trap(trap_code);
+                }
+                let x = op.op_code()
+                    | reg_to_gpr_num(rd.to_reg()) << 7
+                    | op.funct3() << 12
+                    | reg_to_gpr_num(addr) << 15
+                    | reg_to_gpr_num(src) << 20
+                    | op.funct7(amo) << 25;
+
+                sink.put4(x);
+            }
+            &Inst::Fence { pred, succ } => {
+                let x = 0b0001111
+                    | 0b00000 << 7
+                    | 0b000 << 12
+                    | 0b00000 << 15
+                    | (succ as u32) << 20
+                    | (pred as u32) << 24;
+
+                sink.put4(x);
+            }
+            &Inst::Auipc { rd, imm } => {
+                sink.put4(enc_auipc(rd, imm));
+            }
+
+            &Inst::LoadAddr { rd, mem } => {
+                let base = mem.get_base_register();
+                let offset = mem.get_offset_with_state(state);
+                let offset_imm12 = Imm12::maybe_from_i64(offset);
+
+                match (mem, base, offset_imm12) {
+                    (_, Some(rs), Some(imm12)) => {
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Addi,
+                            rd,
+                            rs,
+                            imm12,
+                        }
+                        .emit(sink, emit_info, state);
+                    }
+                    (_, Some(rs), None) => {
+                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
+                        insts.push(Inst::AluRRR {
+                            alu_op: AluOPRRR::Add,
+                            rd,
+                            rs1: rd.to_reg(),
+                            rs2: rs,
+                        });
+                        insts
+                            .into_iter()
+                            .for_each(|inst| inst.emit(sink, emit_info, state));
+                    }
+                    (AMode::Const(addr), None, _) => {
+                        // Get an address label for the constant and recurse.
+                        let label = sink.get_label_for_constant(addr);
+                        Inst::LoadAddr {
+                            rd,
+                            mem: AMode::Label(label),
+                        }
+                        .emit(sink, emit_info, state);
+                    }
+                    (AMode::Label(label), None, _) => {
+                        // Get the current PC.
+                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
+                        let inst = Inst::Auipc {
+                            rd,
+                            imm: Imm20::ZERO,
+                        };
+                        inst.emit_uncompressed(sink, emit_info, state, start_off);
+
+                        // Emit an add to the address with a relocation.
+                        // This later gets patched up with the correct offset.
+                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Addi,
+                            rd,
+                            rs: rd.to_reg(),
+                            imm12: Imm12::ZERO,
+                        }
+                        .emit_uncompressed(sink, emit_info, state, start_off);
+                    }
+                    (amode, _, _) => {
+                        unimplemented!("LoadAddr: {:?}", amode);
+                    }
+                }
+            }
+
+            &Inst::Select {
+                ref dst,
+                condition,
+                ref x,
+                ref y,
+            } => {
+                // The general form for this select is the following:
+                //
+                //     mv rd, x
+                //     b{cond} rcond, label_end
+                //     mv rd, y
+                // label_end:
+                //     ... etc
+                //
+                // This is built on the assumption that moves are cheap, but branches and jumps
+                // are not. So with this format we always avoid one jump instruction at the expense
+                // of an unconditional move.
+                //
+                // We also perform another optimization here. If the destination register is the same
+                // as one of the input registers, we can avoid emitting the first unconditional move
+                // and emit just the branch and the second move.
+                //
+                // To make sure that this happens as often as possible, we also try to invert the
+                // condition, so that if either of the input registers are the same as the destination
+                // we avoid that move.
+
+                let label_end = sink.get_label();
+
+                let xregs = x.regs();
+                let yregs = y.regs();
+                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
+                let condregs = condition.regs();
+
+                // We are going to write to the destination register before evaluating
+                // the condition, so we need to make sure that the destination register
+                // is not one of the condition registers.
+                //
+                // This should never happen, since hopefully the regalloc constraints
+                // for this register are set up correctly.
+                debug_assert_ne!(dstregs, condregs);
+
+                // Check if we can invert the condition and avoid moving the y registers into
+                // the destination. This allows us to only emit the branch and one of the moves.
+                let (uncond_move, cond_move, condition) = if yregs == dstregs {
+                    (yregs, xregs, condition.inverse())
+                } else {
+                    (xregs, yregs, condition)
+                };
+
+                // Unconditionally move one of the values to the destination register.
+                //
+                // These moves may not end up being emitted if the source and
+                // destination registers are the same. That logic is built into
+                // the emit function for `Inst::Mov`.
+                for i in gen_moves(dst.regs(), uncond_move) {
+                    i.emit(sink, emit_info, state);
+                }
+
+                // If the condition passes we skip over the conditional move
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_end),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: condition,
+                }
+                .emit(sink, emit_info, state);
+
+                // Move the conditional value to the destination register.
+                for i in gen_moves(dst.regs(), cond_move) {
+                    i.emit(sink, emit_info, state);
+                }
+
+                sink.bind_label(label_end, &mut state.ctrl_plane);
+            }
+            &Inst::Jalr { rd, base, offset } => {
+                sink.put4(enc_jalr(rd, base, offset));
+            }
+            &Inst::EBreak => {
+                sink.put4(0x00100073);
+            }
+            &Inst::AtomicCas {
+                offset,
+                t0,
+                dst,
+                e,
+                addr,
+                v,
+                ty,
+            } => {
+                //     # addr holds address of memory location
+                //     # e holds expected value
+                //     # v holds desired value
+                //     # dst holds return value
+                // cas:
+                //     lr.w dst, (addr)       # Load original value.
+                //     bne dst, e, fail       # Doesn’t match, so fail.
+                //     sc.w t0, v, (addr)     # Try to update.
+                //     bnez t0 , cas          # if store not ok,retry.
+                // fail:
+                let fail_label = sink.get_label();
+                let cas_lebel = sink.get_label();
+                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
+                Inst::Atomic {
+                    op: AtomicOP::load_op(ty),
+                    rd: dst,
+                    addr,
+                    src: zero_reg(),
+                    amo: AMO::SeqCst,
+                }
+                .emit(sink, emit_info, state);
+                if ty.bits() < 32 {
+                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                        .iter()
+                        .for_each(|i| i.emit(sink, emit_info, state));
+                } else if ty.bits() == 32 {
+                    Inst::Extend {
+                        rd: dst,
+                        rn: dst.to_reg(),
+                        signed: false,
+                        from_bits: 32,
+                        to_bits: 64,
+                    }
+                    .emit(sink, emit_info, state);
+                }
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(fail_label),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::NotEqual,
+                        rs1: e,
+                        rs2: dst.to_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+                let store_value = if ty.bits() < 32 {
+                    // reload value to t0.
+                    Inst::Atomic {
+                        op: AtomicOP::load_op(ty),
+                        rd: t0,
+                        addr,
+                        src: zero_reg(),
+                        amo: AMO::SeqCst,
+                    }
+                    .emit(sink, emit_info, state);
+                    // set reset part.
+                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
+                        .iter()
+                        .for_each(|i| i.emit(sink, emit_info, state));
+                    t0.to_reg()
+                } else {
+                    v
+                };
+                Inst::Atomic {
+                    op: AtomicOP::store_op(ty),
+                    rd: t0,
+                    addr,
+                    src: store_value,
+                    amo: AMO::SeqCst,
+                }
+                .emit(sink, emit_info, state);
+                // check is our value stored.
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(cas_lebel),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::NotEqual,
+                        rs1: t0.to_reg(),
+                        rs2: zero_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+                sink.bind_label(fail_label, &mut state.ctrl_plane);
+            }
+            &Inst::AtomicRmwLoop {
+                offset,
+                op,
+                dst,
+                ty,
+                p,
+                x,
+                t0,
+            } => {
+                let retry = sink.get_label();
+                sink.bind_label(retry, &mut state.ctrl_plane);
+                // load old value.
+                Inst::Atomic {
+                    op: AtomicOP::load_op(ty),
+                    rd: dst,
+                    addr: p,
+                    src: zero_reg(),
+                    amo: AMO::SeqCst,
+                }
+                .emit(sink, emit_info, state);
+                //
+
+                let store_value: Reg = match op {
+                    crate::ir::AtomicRmwOp::Add
+                    | crate::ir::AtomicRmwOp::Sub
+                    | crate::ir::AtomicRmwOp::And
+                    | crate::ir::AtomicRmwOp::Or
+                    | crate::ir::AtomicRmwOp::Xor => {
+                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                            .iter()
+                            .for_each(|i| i.emit(sink, emit_info, state));
+                        Inst::AluRRR {
+                            alu_op: match op {
+                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
+                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
+                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
+                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
+                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
+                                _ => unreachable!(),
+                            },
+                            rd: t0,
+                            rs1: dst.to_reg(),
+                            rs2: x,
+                        }
+                        .emit(sink, emit_info, state);
+                        Inst::Atomic {
+                            op: AtomicOP::load_op(ty),
+                            rd: writable_spilltmp_reg2(),
+                            addr: p,
+                            src: zero_reg(),
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(sink, emit_info, state);
+                        AtomicOP::merge(
+                            writable_spilltmp_reg2(),
+                            writable_spilltmp_reg(),
+                            offset,
+                            t0.to_reg(),
+                            ty,
+                        )
+                        .iter()
+                        .for_each(|i| i.emit(sink, emit_info, state));
+                        spilltmp_reg2()
+                    }
+                    crate::ir::AtomicRmwOp::Nand => {
+                        if ty.bits() < 32 {
+                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                                .iter()
+                                .for_each(|i| i.emit(sink, emit_info, state));
+                        }
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::And,
+                            rd: t0,
+                            rs1: x,
+                            rs2: dst.to_reg(),
+                        }
+                        .emit(sink, emit_info, state);
+                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
+                        if ty.bits() < 32 {
+                            Inst::Atomic {
+                                op: AtomicOP::load_op(ty),
+                                rd: writable_spilltmp_reg2(),
+                                addr: p,
+                                src: zero_reg(),
+                                amo: AMO::SeqCst,
+                            }
+                            .emit(sink, emit_info, state);
+                            AtomicOP::merge(
+                                writable_spilltmp_reg2(),
+                                writable_spilltmp_reg(),
+                                offset,
+                                t0.to_reg(),
+                                ty,
+                            )
+                            .iter()
+                            .for_each(|i| i.emit(sink, emit_info, state));
+                            spilltmp_reg2()
+                        } else {
+                            t0.to_reg()
+                        }
+                    }
+
+                    crate::ir::AtomicRmwOp::Umin
+                    | crate::ir::AtomicRmwOp::Umax
+                    | crate::ir::AtomicRmwOp::Smin
+                    | crate::ir::AtomicRmwOp::Smax => {
+                        let label_select_dst = sink.get_label();
+                        let label_select_done = sink.get_label();
+                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
+                        {
+                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                        } else {
+                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
+                        }
+                        .iter()
+                        .for_each(|i| i.emit(sink, emit_info, state));
+
+                        Inst::CondBr {
+                            taken: CondBrTarget::Label(label_select_dst),
+                            not_taken: CondBrTarget::Fallthrough,
+                            kind: IntegerCompare {
+                                kind: match op {
+                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
+                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
+                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
+                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
+                                    _ => unreachable!(),
+                                },
+                                rs1: dst.to_reg(),
+                                rs2: x,
+                            },
+                        }
+                        .emit(sink, emit_info, state);
+                        // here we select x.
+                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
+                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
+                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
+                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
+                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
+                        Inst::Atomic {
+                            op: AtomicOP::load_op(ty),
+                            rd: writable_spilltmp_reg2(),
+                            addr: p,
+                            src: zero_reg(),
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(sink, emit_info, state);
+                        AtomicOP::merge(
+                            writable_spilltmp_reg2(),
+                            writable_spilltmp_reg(),
+                            offset,
+                            t0.to_reg(),
+                            ty,
+                        )
+                        .iter()
+                        .for_each(|i| i.emit(sink, emit_info, state));
+                        spilltmp_reg2()
+                    }
+                    crate::ir::AtomicRmwOp::Xchg => {
+                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
+                            .iter()
+                            .for_each(|i| i.emit(sink, emit_info, state));
+                        Inst::Atomic {
+                            op: AtomicOP::load_op(ty),
+                            rd: writable_spilltmp_reg2(),
+                            addr: p,
+                            src: zero_reg(),
+                            amo: AMO::SeqCst,
+                        }
+                        .emit(sink, emit_info, state);
+                        AtomicOP::merge(
+                            writable_spilltmp_reg2(),
+                            writable_spilltmp_reg(),
+                            offset,
+                            x,
+                            ty,
+                        )
+                        .iter()
+                        .for_each(|i| i.emit(sink, emit_info, state));
+                        spilltmp_reg2()
+                    }
+                };
+
+                Inst::Atomic {
+                    op: AtomicOP::store_op(ty),
+                    rd: t0,
+                    addr: p,
+                    src: store_value,
+                    amo: AMO::SeqCst,
+                }
+                .emit(sink, emit_info, state);
+
+                // if store is not ok,retry.
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(retry),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::NotEqual,
+                        rs1: t0.to_reg(),
+                        rs2: zero_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+            }
+
+            &Inst::LoadExtName {
+                rd,
+                ref name,
+                offset,
+            } => {
+                if emit_info.shared_flag.is_pic() {
+                    // Load a PC-relative address into a register.
+                    // RISC-V does this slightly differently from other arches. We emit a relocation
+                    // with a label, instead of the symbol itself.
+                    //
+                    // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
+                    //
+                    // Emit the following code:
+                    // label:
+                    //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
+                    //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
+
+                    // Create the label that is going to be published to the final binary object.
+                    let auipc_label = sink.get_label();
+                    sink.bind_label(auipc_label, &mut state.ctrl_plane);
+
+                    // Get the current PC.
+                    sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
+                    Inst::Auipc {
+                        rd: rd,
+                        imm: Imm20::from_i32(0),
+                    }
+                    .emit_uncompressed(sink, emit_info, state, start_off);
+
+                    // The `ld` here, points to the `auipc` label instead of directly to the symbol.
+                    sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
+                    Inst::Load {
+                        rd,
+                        op: LoadOP::Ld,
+                        flags: MemFlags::trusted(),
+                        from: AMode::RegOffset(rd.to_reg(), 0),
+                    }
+                    .emit_uncompressed(sink, emit_info, state, start_off);
+                } else {
+                    // In the non PIC sequence we relocate the absolute address into
+                    // a prealocatted space, load it into a register and jump over it.
+                    //
+                    // Emit the following code:
+                    //   ld rd, label_data
+                    //   j label_end
+                    // label_data:
+                    //   <8 byte space>           # ABS8
+                    // label_end:
+
+                    let label_data = sink.get_label();
+                    let label_end = sink.get_label();
+
+                    // Load the value from a label
+                    Inst::Load {
+                        rd,
+                        op: LoadOP::Ld,
+                        flags: MemFlags::trusted(),
+                        from: AMode::Label(label_data),
+                    }
+                    .emit(sink, emit_info, state);
+
+                    // Jump over the data
+                    Inst::gen_jump(label_end).emit(sink, emit_info, state);
+
+                    sink.bind_label(label_data, &mut state.ctrl_plane);
+                    sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
+                    sink.put8(0);
+
+                    sink.bind_label(label_end, &mut state.ctrl_plane);
+                }
+            }
+
+            &Inst::ElfTlsGetAddr { rd, ref name } => {
+                // RISC-V's TLS GD model is slightly different from other arches.
+                //
+                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
+                // of the address relative to the GOT entry. This relocation points to
+                // the symbol as usual.
+                //
+                // However when loading the bottom 12bits of the address, we need to
+                // use a label that points to the previous AUIPC instruction.
+                //
+                // label:
+                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
+                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
+                //
+                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
+
+                // Create the label that is going to be published to the final binary object.
+                let auipc_label = sink.get_label();
+                sink.bind_label(auipc_label, &mut state.ctrl_plane);
+
+                // Get the current PC.
+                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
+                Inst::Auipc {
+                    rd: rd,
+                    imm: Imm20::from_i32(0),
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+
+                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
+                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
+                Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Addi,
+                    rd: rd,
+                    rs: rd.to_reg(),
+                    imm12: Imm12::from_i16(0),
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+
+                Inst::Call {
+                    info: Box::new(CallInfo::empty(
+                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
+                        CallConv::SystemV,
+                    )),
+                }
+                .emit_uncompressed(sink, emit_info, state, start_off);
+            }
+
+            &Inst::TrapIf {
+                rs1,
+                rs2,
+                cc,
+                trap_code,
+            } => {
+                let label_end = sink.get_label();
+                let cond = IntegerCompare { kind: cc, rs1, rs2 };
+
+                // Jump over the trap if we the condition is false.
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_end),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: cond.inverse(),
+                }
+                .emit(sink, emit_info, state);
+                Inst::Udf { trap_code }.emit(sink, emit_info, state);
+
+                sink.bind_label(label_end, &mut state.ctrl_plane);
+            }
+            &Inst::Udf { trap_code } => {
+                sink.add_trap(trap_code);
+                sink.put_data(Inst::TRAP_OPCODE);
+            }
+            &Inst::AtomicLoad { rd, ty, p } => {
+                // emit the fence.
+                Inst::Fence {
+                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                }
+                .emit(sink, emit_info, state);
+                // load.
+                Inst::Load {
+                    rd: rd,
+                    op: LoadOP::from_type(ty),
+                    flags: MemFlags::new(),
+                    from: AMode::RegOffset(p, 0),
+                }
+                .emit(sink, emit_info, state);
+                Inst::Fence {
+                    pred: Inst::FENCE_REQ_R,
+                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                }
+                .emit(sink, emit_info, state);
+            }
+            &Inst::AtomicStore { src, ty, p } => {
+                Inst::Fence {
+                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
+                    succ: Inst::FENCE_REQ_W,
+                }
+                .emit(sink, emit_info, state);
+                Inst::Store {
+                    to: AMode::RegOffset(p, 0),
+                    op: StoreOP::from_type(ty),
+                    flags: MemFlags::new(),
+                    src,
+                }
+                .emit(sink, emit_info, state);
+            }
+
+            &Inst::Popcnt {
+                sum,
+                tmp,
+                step,
+                rs,
+                ty,
+            } => {
+                // load 0 to sum , init.
+                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
+                // load
+                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
+                    .emit(sink, emit_info, state);
+                //
+                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
+                Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Slli,
+                    rd: tmp,
+                    rs: tmp.to_reg(),
+                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
+                }
+                .emit(sink, emit_info, state);
+                let label_done = sink.get_label();
+                let label_loop = sink.get_label();
+                sink.bind_label(label_loop, &mut state.ctrl_plane);
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_done),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::SignedLessThanOrEqual,
+                        rs1: step.to_reg(),
+                        rs2: zero_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+                // test and add sum.
+                {
+                    Inst::AluRRR {
+                        alu_op: AluOPRRR::And,
+                        rd: writable_spilltmp_reg2(),
+                        rs1: tmp.to_reg(),
+                        rs2: rs,
+                    }
+                    .emit(sink, emit_info, state);
+                    let label_over = sink.get_label();
+                    Inst::CondBr {
+                        taken: CondBrTarget::Label(label_over),
+                        not_taken: CondBrTarget::Fallthrough,
+                        kind: IntegerCompare {
+                            kind: IntCC::Equal,
+                            rs1: zero_reg(),
+                            rs2: spilltmp_reg2(),
+                        },
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: sum,
+                        rs: sum.to_reg(),
+                        imm12: Imm12::ONE,
+                    }
+                    .emit(sink, emit_info, state);
+                    sink.bind_label(label_over, &mut state.ctrl_plane);
+                }
+                // set step and tmp.
+                {
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: step,
+                        rs: step.to_reg(),
+                        imm12: Imm12::from_i16(-1),
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Srli,
+                        rd: tmp,
+                        rs: tmp.to_reg(),
+                        imm12: Imm12::ONE,
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
+                }
+                sink.bind_label(label_done, &mut state.ctrl_plane);
+            }
+            &Inst::Cltz {
+                sum,
+                tmp,
+                step,
+                rs,
+                leading,
+                ty,
+            } => {
+                // load 0 to sum , init.
+                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
+                // load
+                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
+                    .emit(sink, emit_info, state);
+                //
+                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
+                if leading {
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Slli,
+                        rd: tmp,
+                        rs: tmp.to_reg(),
+                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
+                    }
+                    .emit(sink, emit_info, state);
+                }
+                let label_done = sink.get_label();
+                let label_loop = sink.get_label();
+                sink.bind_label(label_loop, &mut state.ctrl_plane);
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_done),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::SignedLessThanOrEqual,
+                        rs1: step.to_reg(),
+                        rs2: zero_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+                // test and add sum.
+                {
+                    Inst::AluRRR {
+                        alu_op: AluOPRRR::And,
+                        rd: writable_spilltmp_reg2(),
+                        rs1: tmp.to_reg(),
+                        rs2: rs,
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::CondBr {
+                        taken: CondBrTarget::Label(label_done),
+                        not_taken: CondBrTarget::Fallthrough,
+                        kind: IntegerCompare {
+                            kind: IntCC::NotEqual,
+                            rs1: zero_reg(),
+                            rs2: spilltmp_reg2(),
+                        },
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: sum,
+                        rs: sum.to_reg(),
+                        imm12: Imm12::ONE,
+                    }
+                    .emit(sink, emit_info, state);
+                }
+                // set step and tmp.
+                {
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: step,
+                        rs: step.to_reg(),
+                        imm12: Imm12::from_i16(-1),
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::AluRRImm12 {
+                        alu_op: if leading {
+                            AluOPRRI::Srli
+                        } else {
+                            AluOPRRI::Slli
+                        },
+                        rd: tmp,
+                        rs: tmp.to_reg(),
+                        imm12: Imm12::ONE,
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
+                }
+                sink.bind_label(label_done, &mut state.ctrl_plane);
+            }
+            &Inst::Brev8 {
+                rs,
+                ty,
+                step,
+                tmp,
+                tmp2,
+                rd,
+            } => {
+                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
+                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
+                    .emit(sink, emit_info, state);
+                //
+                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
+                Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Slli,
+                    rd: tmp,
+                    rs: tmp.to_reg(),
+                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
+                }
+                .emit(sink, emit_info, state);
+                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
+                Inst::AluRRImm12 {
+                    alu_op: AluOPRRI::Slli,
+                    rd: tmp2,
+                    rs: tmp2.to_reg(),
+                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
+                }
+                .emit(sink, emit_info, state);
+
+                let label_done = sink.get_label();
+                let label_loop = sink.get_label();
+                sink.bind_label(label_loop, &mut state.ctrl_plane);
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_done),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::SignedLessThanOrEqual,
+                        rs1: step.to_reg(),
+                        rs2: zero_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+                // test and set bit.
+                {
+                    Inst::AluRRR {
+                        alu_op: AluOPRRR::And,
+                        rd: writable_spilltmp_reg2(),
+                        rs1: tmp.to_reg(),
+                        rs2: rs,
+                    }
+                    .emit(sink, emit_info, state);
+                    let label_over = sink.get_label();
+                    Inst::CondBr {
+                        taken: CondBrTarget::Label(label_over),
+                        not_taken: CondBrTarget::Fallthrough,
+                        kind: IntegerCompare {
+                            kind: IntCC::Equal,
+                            rs1: zero_reg(),
+                            rs2: spilltmp_reg2(),
+                        },
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::AluRRR {
+                        alu_op: AluOPRRR::Or,
+                        rd: rd,
+                        rs1: rd.to_reg(),
+                        rs2: tmp2.to_reg(),
+                    }
+                    .emit(sink, emit_info, state);
+                    sink.bind_label(label_over, &mut state.ctrl_plane);
+                }
+                // set step and tmp.
+                {
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Addi,
+                        rd: step,
+                        rs: step.to_reg(),
+                        imm12: Imm12::from_i16(-1),
+                    }
+                    .emit(sink, emit_info, state);
+                    Inst::AluRRImm12 {
+                        alu_op: AluOPRRI::Srli,
+                        rd: tmp,
+                        rs: tmp.to_reg(),
+                        imm12: Imm12::ONE,
+                    }
+                    .emit(sink, emit_info, state);
+                    {
+                        // reset tmp2
+                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
+                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
+                        let label_over = sink.get_label();
+                        let label_sll_1 = sink.get_label();
+                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
+                            .emit(sink, emit_info, state);
+                        Inst::AluRRR {
+                            alu_op: AluOPRRR::Rem,
+                            rd: writable_spilltmp_reg2(),
+                            rs1: step.to_reg(),
+                            rs2: spilltmp_reg2(),
+                        }
+                        .emit(sink, emit_info, state);
+                        Inst::CondBr {
+                            taken: CondBrTarget::Label(label_sll_1),
+                            not_taken: CondBrTarget::Fallthrough,
+                            kind: IntegerCompare {
+                                kind: IntCC::NotEqual,
+                                rs1: spilltmp_reg2(),
+                                rs2: zero_reg(),
+                            },
+                        }
+                        .emit(sink, emit_info, state);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Srli,
+                            rd: tmp2,
+                            rs: tmp2.to_reg(),
+                            imm12: Imm12::from_i16(15),
+                        }
+                        .emit(sink, emit_info, state);
+                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
+                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
+                        Inst::AluRRImm12 {
+                            alu_op: AluOPRRI::Slli,
+                            rd: tmp2,
+                            rs: tmp2.to_reg(),
+                            imm12: Imm12::ONE,
+                        }
+                        .emit(sink, emit_info, state);
+                        sink.bind_label(label_over, &mut state.ctrl_plane);
+                    }
+                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
+                }
+                sink.bind_label(label_done, &mut state.ctrl_plane);
+            }
+            &Inst::StackProbeLoop {
+                guard_size,
+                probe_count,
+                tmp: guard_size_tmp,
+            } => {
+                let step = writable_spilltmp_reg();
+                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
+                    .iter()
+                    .for_each(|i| i.emit(sink, emit_info, state));
+                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
+                    .iter()
+                    .for_each(|i| i.emit(sink, emit_info, state));
+
+                let loop_start = sink.get_label();
+                let label_done = sink.get_label();
+                sink.bind_label(loop_start, &mut state.ctrl_plane);
+                Inst::CondBr {
+                    taken: CondBrTarget::Label(label_done),
+                    not_taken: CondBrTarget::Fallthrough,
+                    kind: IntegerCompare {
+                        kind: IntCC::UnsignedLessThanOrEqual,
+                        rs1: step.to_reg(),
+                        rs2: guard_size_tmp.to_reg(),
+                    },
+                }
+                .emit(sink, emit_info, state);
+                // compute address.
+                Inst::AluRRR {
+                    alu_op: AluOPRRR::Sub,
+                    rd: writable_spilltmp_reg2(),
+                    rs1: stack_reg(),
+                    rs2: step.to_reg(),
+                }
+                .emit(sink, emit_info, state);
+                Inst::Store {
+                    to: AMode::RegOffset(spilltmp_reg2(), 0),
+                    op: StoreOP::Sb,
+                    flags: MemFlags::new(),
+                    src: zero_reg(),
+                }
+                .emit(sink, emit_info, state);
+                // reset step.
+                Inst::AluRRR {
+                    alu_op: AluOPRRR::Sub,
+                    rd: step,
+                    rs1: step.to_reg(),
+                    rs2: guard_size_tmp.to_reg(),
+                }
+                .emit(sink, emit_info, state);
+                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
+                sink.bind_label(label_done, &mut state.ctrl_plane);
+            }
+            &Inst::VecAluRRRImm5 {
+                op,
+                vd,
+                vd_src,
+                imm,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                debug_assert_eq!(vd.to_reg(), vd_src);
+
+                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
+            }
+            &Inst::VecAluRRRR {
+                op,
+                vd,
+                vd_src,
+                vs1,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                debug_assert_eq!(vd.to_reg(), vd_src);
+
+                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
+            }
+            &Inst::VecAluRRR {
+                op,
+                vd,
+                vs1,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
+            }
+            &Inst::VecAluRRImm5 {
+                op,
+                vd,
+                imm,
+                vs2,
+                ref mask,
+                ..
+            } => {
+                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
+            }
+            &Inst::VecAluRR {
+                op,
+                vd,
+                vs,
+                ref mask,
+                ..
+            } => {
+                sink.put4(encode_valu_rr(op, vd, vs, *mask));
+            }
+            &Inst::VecAluRImm5 {
+                op,
+                vd,
+                imm,
+                ref mask,
+                ..
+            } => {
+                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
+            }
+            &Inst::VecSetState { rd, ref vstate } => {
+                sink.put4(encode_vcfg_imm(
+                    0x57,
+                    rd.to_reg(),
+                    vstate.avl.unwrap_static(),
+                    &vstate.vtype,
+                ));
+
+                // Update the current vector emit state.
+                state.vstate = EmitVState::Known(*vstate);
+            }
+
+            &Inst::VecLoad {
+                eew,
+                to,
+                ref from,
+                ref mask,
+                flags,
+                ..
+            } => {
+                // Vector Loads don't support immediate offsets, so we need to load it into a register.
+                let addr = match from {
+                    VecAMode::UnitStride { base } => {
+                        let base_reg = base.get_base_register();
+                        let offset = base.get_offset_with_state(state);
+
+                        // Reg+0 Offset can be directly encoded
+                        if let (Some(base_reg), 0) = (base_reg, offset) {
+                            base_reg
+                        } else {
+                            // Otherwise load the address it into a reg and load from it.
+                            let tmp = writable_spilltmp_reg();
+                            Inst::LoadAddr {
+                                rd: tmp,
+                                mem: *base,
+                            }
+                            .emit(sink, emit_info, state);
+                            tmp.to_reg()
+                        }
+                    }
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+
+                sink.put4(encode_vmem_load(
+                    0x07,
+                    to.to_reg(),
+                    eew,
+                    addr,
+                    from.lumop(),
+                    *mask,
+                    from.mop(),
+                    from.nf(),
+                ));
+            }
+
+            &Inst::VecStore {
+                eew,
+                ref to,
+                from,
+                ref mask,
+                flags,
+                ..
+            } => {
+                // Vector Stores don't support immediate offsets, so we need to load it into a register.
+                let addr = match to {
+                    VecAMode::UnitStride { base } => {
+                        let base_reg = base.get_base_register();
+                        let offset = base.get_offset_with_state(state);
+
+                        // Reg+0 Offset can be directly encoded
+                        if let (Some(base_reg), 0) = (base_reg, offset) {
+                            base_reg
+                        } else {
+                            // Otherwise load the address it into a reg and load from it.
+                            let tmp = writable_spilltmp_reg();
+                            Inst::LoadAddr {
+                                rd: tmp,
+                                mem: *base,
+                            }
+                            .emit(sink, emit_info, state);
+                            tmp.to_reg()
+                        }
+                    }
+                };
+
+                if let Some(trap_code) = flags.trap_code() {
+                    // Register the offset at which the actual load instruction starts.
+                    sink.add_trap(trap_code);
+                }
+
+                sink.put4(encode_vmem_store(
+                    0x27,
+                    from,
+                    eew,
+                    addr,
+                    to.sumop(),
+                    *mask,
+                    to.mop(),
+                    to.nf(),
+                ));
+            }
+        };
+    }
+}
+
+fn emit_return_call_common_sequence<T>(
+    sink: &mut MachBuffer<Inst>,
+    emit_info: &EmitInfo,
+    state: &mut EmitState,
+    info: &ReturnCallInfo<T>,
+) {
+    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
+    // So lets emit an island here if we need it.
+    //
+    // It is difficult to calculate exactly how many instructions are going to be emitted, so
+    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
+    // were actually emitted.
+    let mut buffer = MachBuffer::new();
+    let mut fake_emit_state = state.clone();
+
+    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
+
+    // Finalize the buffer and get the number of bytes emitted.
+    let buffer = buffer.finish(&Default::default(), &mut Default::default());
+    let length = buffer.data().len() as u32;
+
+    // And now emit the island inline with this instruction.
+    if sink.island_needed(length) {
+        let jump_around_label = sink.get_label();
+        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
+        sink.emit_island(length + 4, &mut state.ctrl_plane);
+        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
+    }
+
+    // Now that we're done, emit the *actual* return sequence.
+    return_call_emit_impl(sink, emit_info, state, info);
+}
+
+/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
+fn return_call_emit_impl<T>(
+    sink: &mut MachBuffer<Inst>,
+    emit_info: &EmitInfo,
+    state: &mut EmitState,
+    info: &ReturnCallInfo<T>,
+) {
+    let sp_to_fp_offset = {
+        let frame_layout = state.frame_layout();
+        i64::from(
+            frame_layout.clobber_size
+                + frame_layout.fixed_frame_storage_size
+                + frame_layout.outgoing_args_size,
+        )
+    };
+
+    let mut clobber_offset = sp_to_fp_offset - 8;
+    for reg in state.frame_layout().clobbered_callee_saves.clone() {
+        let rreg = reg.to_reg();
+        let ty = match rreg.class() {
+            RegClass::Int => I64,
+            RegClass::Float => F64,
+            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
+        };
+
+        Inst::gen_load(
+            reg.map(Reg::from),
+            AMode::SPOffset(clobber_offset),
+            ty,
+            MemFlags::trusted(),
+        )
+        .emit(sink, emit_info, state);
+
+        clobber_offset -= 8
+    }
+
+    // Restore the link register and frame pointer
+    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
+    if setup_area_size > 0 {
+        Inst::gen_load(
+            writable_link_reg(),
+            AMode::SPOffset(sp_to_fp_offset + 8),
+            I64,
+            MemFlags::trusted(),
+        )
+        .emit(sink, emit_info, state);
+
+        Inst::gen_load(
+            writable_fp_reg(),
+            AMode::SPOffset(sp_to_fp_offset),
+            I64,
+            MemFlags::trusted(),
+        )
+        .emit(sink, emit_info, state);
+    }
+
+    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
+    // is expecting.
+    let incoming_args_diff =
+        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
+
+    // Increment SP all at once
+    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
+    if sp_increment > 0 {
+        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
+            inst.emit(sink, emit_info, state);
+        }
+    }
+}
diff --git a/hbcb/src/inst/emit_tests.rs b/hbcb/src/inst/emit_tests.rs
new file mode 100644
index 00000000..668e1705
--- /dev/null
+++ b/hbcb/src/inst/emit_tests.rs
@@ -0,0 +1,2277 @@
+#[allow(unused)]
+use crate::ir::LibCall;
+use crate::inst::*;
+use crate::lower::isle::generated_code::FpuOPWidth;
+use std::borrow::Cow;
+
+fn fa7() -> Reg {
+    f_reg(17)
+}
+
+#[test]
+fn test_riscv64_binemit() {
+    struct TestUnit {
+        inst: Inst,
+        assembly: &'static str,
+        code: TestEncoding,
+    }
+
+    struct TestEncoding(Cow<'static, str>);
+
+    impl From<&'static str> for TestEncoding {
+        fn from(value: &'static str) -> Self {
+            Self(value.into())
+        }
+    }
+
+    impl From<u32> for TestEncoding {
+        fn from(value: u32) -> Self {
+            let value = value.swap_bytes();
+            let value = format!("{value:08X}");
+            Self(value.into())
+        }
+    }
+
+    impl TestUnit {
+        fn new(inst: Inst, assembly: &'static str, code: impl Into<TestEncoding>) -> Self {
+            let code = code.into();
+            Self {
+                inst,
+                assembly,
+                code,
+            }
+        }
+    }
+
+    let mut insns = Vec::<TestUnit>::with_capacity(500);
+
+    insns.push(TestUnit::new(Inst::Ret {}, "ret", 0x00008067));
+
+    insns.push(TestUnit::new(
+        Inst::Mov {
+            rd: writable_fa0(),
+            rm: fa1(),
+            ty: F32,
+        },
+        "fmv.s fa0,fa1",
+        0x20b58553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Mov {
+            rd: writable_fa0(),
+            rm: fa1(),
+            ty: F64,
+        },
+        "fmv.d fa0,fa1",
+        0x22b58553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Brev8,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "brev8 a1,a0",
+        0x68755593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Rev8,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "rev8 a1,a0",
+        0x6b855593,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Bclri,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "bclri a1,a0,5",
+        0x48551593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Bexti,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "bexti a1,a0,5",
+        0x48555593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Binvi,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "binvi a1,a0,5",
+        0x68551593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Bseti,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "bseti a1,a0,5",
+        0x28551593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Rori,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "rori a1,a0,5",
+        0x60555593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Roriw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "roriw a1,a0,5",
+        0x6055559b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::SlliUw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "slli.uw a1,a0,5",
+        0x855159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Clz,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "clz a1,a0",
+        0x60051593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Clzw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "clzw a1,a0",
+        0x6005159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Cpop,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "cpop a1,a0",
+        0x60251593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Cpopw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "cpopw a1,a0",
+        0x6025159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Ctz,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "ctz a1,a0",
+        0x60151593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Ctzw,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "ctzw a1,a0",
+        0x6015159b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sextb,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "sext.b a1,a0",
+        0x60451593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sexth,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "sext.h a1,a0",
+        0x60551593,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Zexth,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "zext.h a1,a0",
+        0x80545bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Orcb,
+            rd: writable_a1(),
+            rs: a0(),
+            imm12: Imm12::ZERO,
+        },
+        "orc.b a1,a0",
+        0x28755593,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "zext.w a1,a0",
+        0x80505bb,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "add.uw a1,a0,a1",
+        0x08b505bb,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Andn,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "andn a1,a0,zero",
+        0x400575b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Bclr,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "bclr a1,a0,zero",
+        0x480515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Bext,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "bext a1,a0,zero",
+        0x480555b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Binv,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "binv a1,a0,zero",
+        0x680515b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Bset,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "bset a1,a0,zero",
+        0x280515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Clmul,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "clmul a1,a0,zero",
+        0xa0515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Clmulh,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "clmulh a1,a0,zero",
+        0xa0535b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Clmulr,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "clmulr a1,a0,zero",
+        0xa0525b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Max,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "max a1,a0,zero",
+        0xa0565b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Maxu,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "maxu a1,a0,zero",
+        0xa0575b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Min,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "min a1,a0,zero",
+        0xa0545b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Minu,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "minu a1,a0,zero",
+        0xa0555b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Orn,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "orn a1,a0,zero",
+        0x400565b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rol,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "rol a1,a0,zero",
+        0x600515b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rolw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "rolw a1,a0,zero",
+        0x600515bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Ror,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "ror a1,a0,zero",
+        0x600555b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rorw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "rorw a1,a0,zero",
+        0x600555bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh1add,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh1add a1,a0,zero",
+        0x200525b3,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh1adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh1add.uw a1,a0,zero",
+        0x200525bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh2add,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh2add a1,a0,zero",
+        0x200545b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh2adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh2add.uw a1,a0,zero",
+        0x200545bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh3add,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh3add a1,a0,zero",
+        0x200565b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sh3adduw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "sh3add.uw a1,a0,zero",
+        0x200565bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Xnor,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "xnor a1,a0,zero",
+        0x400545b3,
+    ));
+
+    // Zbkb
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Pack,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "pack a1,a0,zero",
+        0x080545b3,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Packw,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "packw a1,a0,zero",
+        0x080545bb,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Packh,
+            rd: writable_a1(),
+            rs1: a0(),
+            rs2: zero_reg(),
+        },
+        "packh a1,a0,zero",
+        0x080575b3,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Add,
+            rd: writable_fp_reg(),
+            rs1: fp_reg(),
+            rs2: zero_reg(),
+        },
+        "add fp,fp,zero",
+        0x40433,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addi,
+            rd: writable_fp_reg(),
+            rs: stack_reg(),
+            imm12: Imm12::maybe_from_u64(100).unwrap(),
+        },
+        "addi fp,sp,100",
+        0x6410413,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Lui {
+            rd: writable_zero_reg(),
+            imm: Imm20::from_i32(120),
+        },
+        "lui zero,120",
+        0x78037,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Auipc {
+            rd: writable_zero_reg(),
+            imm: Imm20::from_i32(120),
+        },
+        "auipc zero,120",
+        0x78017,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Jalr {
+            rd: writable_a0(),
+            base: a0(),
+            offset: Imm12::from_i16(100),
+        },
+        "jalr a0,100(a0)",
+        0x6450567,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Lb,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100),
+        },
+        "lb a0,100(a1)",
+        0x6458503,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Lh,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100),
+        },
+        "lh a0,100(a1)",
+        0x6459503,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Lw,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100),
+        },
+        "lw a0,100(a1)",
+        0x645a503,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: writable_a0(),
+            op: LoadOP::Ld,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100),
+        },
+        "ld a0,100(a1)",
+        0x645b503,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: Writable::from_reg(fa0()),
+            op: LoadOP::Flw,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100),
+        },
+        "flw fa0,100(a1)",
+        0x645a507,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Load {
+            rd: Writable::from_reg(fa0()),
+            op: LoadOP::Fld,
+            flags: MemFlags::new(),
+            from: AMode::RegOffset(a1(), 100),
+        },
+        "fld fa0,100(a1)",
+        0x645b507,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100),
+            op: StoreOP::Sb,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sb a0,100(sp)",
+        0x6a10223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100),
+            op: StoreOP::Sh,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sh a0,100(sp)",
+        0x6a11223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100),
+            op: StoreOP::Sw,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sw a0,100(sp)",
+        0x6a12223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100),
+            op: StoreOP::Sd,
+            flags: MemFlags::new(),
+            src: a0(),
+        },
+        "sd a0,100(sp)",
+        0x6a13223,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100),
+            op: StoreOP::Fsw,
+            flags: MemFlags::new(),
+            src: fa0(),
+        },
+        "fsw fa0,100(sp)",
+        0x6a12227,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Store {
+            to: AMode::SPOffset(100),
+            op: StoreOP::Fsd,
+            flags: MemFlags::new(),
+            src: fa0(),
+        },
+        "fsd fa0,100(sp)",
+        0x6a13227,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addi,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(100),
+        },
+        "addi a0,a0,100",
+        0x6450513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Slti,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(100),
+        },
+        "slti a0,a0,100",
+        0x6452513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::SltiU,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(100),
+        },
+        "sltiu a0,a0,100",
+        0x6453513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Xori,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(100),
+        },
+        "xori a0,a0,100",
+        0x6454513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Andi,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(100),
+        },
+        "andi a0,a0,100",
+        0x6457513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Slli,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "slli a0,a0,5",
+        0x551513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Srli,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "srli a0,a0,5",
+        0x555513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Srai,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "srai a0,a0,5",
+        0x40555513,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Addiw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(120),
+        },
+        "addiw a0,a0,120",
+        0x785051b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Slliw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "slliw a0,a0,5",
+        0x55151b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::SrliW,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "srliw a0,a0,5",
+        0x55551b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sraiw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "sraiw a0,a0,5",
+        0x4055551b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRImm12 {
+            alu_op: AluOPRRI::Sraiw,
+            rd: writable_a0(),
+            rs: a0(),
+            imm12: Imm12::from_i16(5),
+        },
+        "sraiw a0,a0,5",
+        0x4055551b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Add,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "add a0,a0,a1",
+        0xb50533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sub,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sub a0,a0,a1",
+        0x40b50533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sll,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sll a0,a0,a1",
+        0xb51533,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Slt,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "slt a0,a0,a1",
+        0xb52533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::SltU,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sltu a0,a0,a1",
+        0xb53533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Xor,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "xor a0,a0,a1",
+        0xb54533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Srl,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "srl a0,a0,a1",
+        0xb55533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sra,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sra a0,a0,a1",
+        0x40b55533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Or,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "or a0,a0,a1",
+        0xb56533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::And,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "and a0,a0,a1",
+        0xb57533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Addw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "addw a0,a0,a1",
+        0xb5053b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Subw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "subw a0,a0,a1",
+        0x40b5053b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sllw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sllw a0,a0,a1",
+        0xb5153b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Srlw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "srlw a0,a0,a1",
+        0xb5553b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Sraw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "sraw a0,a0,a1",
+        0x40b5553b,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mul,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mul a0,a0,a1",
+        0x2b50533,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulh,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulh a0,a0,a1",
+        0x2b51533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulhsu,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulhsu a0,a0,a1",
+        0x2b52533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulhu,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulhu a0,a0,a1",
+        0x2b53533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Div,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "div a0,a0,a1",
+        0x2b54533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::DivU,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "divu a0,a0,a1",
+        0x2b55533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Rem,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "rem a0,a0,a1",
+        0x2b56533,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::RemU,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "remu a0,a0,a1",
+        0x2b57533,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Mulw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "mulw a0,a0,a1",
+        0x2b5053b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Divw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "divw a0,a0,a1",
+        0x2b5453b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Remw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "remw a0,a0,a1",
+        0x2b5653b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::AluRRR {
+            alu_op: AluOPRRR::Remuw,
+            rd: writable_a0(),
+            rs1: a0(),
+            rs2: a1(),
+        },
+        "remuw a0,a0,a1",
+        0x2b5753b,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fadd,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fadd.s fa0,fa0,fa1,rne",
+        0xb50553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fsub,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsub.s fa0,fa0,fa1,rtz",
+        0x8b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RUP,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fmul,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmul.s fa0,fa0,fa1,rup",
+        0x10b53553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fdiv,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fdiv.s fa0,fa0,fa1,fcsr",
+        0x18b57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fsgnj,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnj.s fa0,fa0,fa1",
+        0x20b50553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fsgnjn,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjn.s fa0,fa0,fa1",
+        0x20b51553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RDN,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fsgnjx,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjx.s fa0,fa0,fa1",
+        0x20b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fmin,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmin.s fa0,fa0,fa1",
+        0x28b50553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fmax,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmax.s fa0,fa0,fa1",
+        0x28b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RDN,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Feq,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "feq.s a0,fa0,fa1",
+        0xa0b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Flt,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "flt.s a0,fa0,fa1",
+        0xa0b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRR::Fle,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fle.s a0,fa0,fa1",
+        0xa0b50553,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fadd,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fadd.d fa0,fa0,fa1,fcsr",
+        0x2b57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fsub,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsub.d fa0,fa0,fa1,fcsr",
+        0xab57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fmul,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmul.d fa0,fa0,fa1,fcsr",
+        0x12b57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fdiv,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fdiv.d fa0,fa0,fa1,fcsr",
+        0x1ab57553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fsgnj,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnj.d fa0,fa0,fa1",
+        0x22b50553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fsgnjn,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjn.d fa0,fa0,fa1",
+        0x22b51553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RDN,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fsgnjx,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fsgnjx.d fa0,fa0,fa1",
+        0x22b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fmin,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmin.d fa0,fa0,fa1",
+        0x2ab50553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fmax,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fmax.d fa0,fa0,fa1",
+        0x2ab51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RDN,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Feq,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "feq.d a0,fa0,fa1",
+        0xa2b52553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Flt,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "flt.d a0,fa0,fa1",
+        0xa2b51553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRR::Fle,
+            rd: writable_a0(),
+            rs1: fa0(),
+            rs2: fa1(),
+        },
+        "fle.d a0,fa0,fa1",
+        0xa2b50553,
+    ));
+
+    //
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::Fsqrt,
+            rd: writable_fa0(),
+            rs: fa1(),
+        },
+        "fsqrt.s fa0,fa1,rne",
+        0x58058553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtWFmt,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.w.s a0,fa1,fcsr",
+        0xc005f553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtWuFmt,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.wu.s a0,fa1,fcsr",
+        0xc015f553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FmvXFmt,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fmv.x.w a0,fa1",
+        0xe0058553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::Fclass,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fclass.s a0,fa1",
+        0xe0059553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtFmtW,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.w fa0,a0,fcsr",
+        0xd0057553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtFmtWu,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.wu fa0,a0,fcsr",
+        0xd0157553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FmvFmtX,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fmv.w.x fa0,a0",
+        0xf0050553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtLFmt,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.l.s a0,fa0,fcsr",
+        0xc0257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtLuFmt,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.lu.s a0,fa0,fcsr",
+        0xc0357553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtFmtL,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.l fa0,a0,fcsr",
+        0xd0257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtFmtLu,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.s.lu fa0,a0,fcsr",
+        0xd0357553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::Fsqrt,
+            rd: writable_fa0(),
+            rs: fa1(),
+        },
+        "fsqrt.d fa0,fa1,fcsr",
+        0x5a05f553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtWFmt,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.w.d a0,fa1,fcsr",
+        0xc205f553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtWuFmt,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fcvt.wu.d a0,fa1,fcsr",
+        0xc215f553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FmvXFmt,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fmv.x.d a0,fa1",
+        0xe2058553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RTZ,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::Fclass,
+            rd: writable_a0(),
+            rs: fa1(),
+        },
+        "fclass.d a0,fa1",
+        0xe2059553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRR::FcvtSD,
+            rd: writable_fa0(),
+            rs: fa0(),
+        },
+        "fcvt.s.d fa0,fa0,fcsr",
+        0x40157553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtFmtWu,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.d.wu fa0,a0,rne",
+        0xd2150553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FmvFmtX,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fmv.d.x fa0,a0",
+        0xf2050553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtLFmt,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.l.d a0,fa0,fcsr",
+        0xc2257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtLuFmt,
+            rd: writable_a0(),
+            rs: fa0(),
+        },
+        "fcvt.lu.d a0,fa0,fcsr",
+        0xc2357553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtFmtL,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.d.l fa0,a0,fcsr",
+        0xd2257553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRR::FcvtFmtLu,
+            rd: writable_fa0(),
+            rs: a0(),
+        },
+        "fcvt.d.lu fa0,a0,fcsr",
+        0xd2357553,
+    ));
+    //////////////////////
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::RNE,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRRR::Fmadd,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmadd.s fa0,fa0,fa1,fa7,rne",
+        0x88b50543,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRRR::Fmsub,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmsub.s fa0,fa0,fa1,fa7,fcsr",
+        0x88b57547,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRRR::Fnmsub,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmsub.s fa0,fa0,fa1,fa7,fcsr",
+        0x88b5754b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::S,
+            alu_op: FpuOPRRRR::Fnmadd,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmadd.s fa0,fa0,fa1,fa7,fcsr",
+        0x88b5754f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRRR::Fmadd,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmadd.d fa0,fa0,fa1,fa7,fcsr",
+        0x8ab57543,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRRR::Fmsub,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fmsub.d fa0,fa0,fa1,fa7,fcsr",
+        0x8ab57547,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRRR::Fnmsub,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmsub.d fa0,fa0,fa1,fa7,fcsr",
+        0x8ab5754b,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRRR {
+            frm: FRM::Fcsr,
+            width: FpuOPWidth::D,
+            alu_op: FpuOPRRRR::Fnmadd,
+            rd: writable_fa0(),
+            rs1: fa0(),
+            rs2: fa1(),
+            rs3: fa7(),
+        },
+        "fnmadd.d fa0,fa0,fa1,fa7,fcsr",
+        0x8ab5754f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::LrW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: zero_reg(),
+            amo: AMO::Relax,
+        },
+        "lr.w a0,(a1)",
+        0x1005a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::ScW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Release,
+        },
+        "sc.w.rl a0,a2,(a1)",
+        0x1ac5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoswapW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Aquire,
+        },
+        "amoswap.w.aq a0,a2,(a1)",
+        0xcc5a52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoaddW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::SeqCst,
+        },
+        "amoadd.w.aqrl a0,a2,(a1)",
+        0x6c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoxorW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoxor.w a0,a2,(a1)",
+        0x20c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoandW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoand.w a0,a2,(a1)",
+        0x60c5a52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoorW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoor.w a0,a2,(a1)",
+        0x40c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomin.w a0,a2,(a1)",
+        0x80c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomax.w a0,a2,(a1)",
+        0xa0c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominuW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amominu.w a0,a2,(a1)",
+        0xc0c5a52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxuW,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomaxu.w a0,a2,(a1)",
+        0xe0c5a52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::LrD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: zero_reg(),
+            amo: AMO::Relax,
+        },
+        "lr.d a0,(a1)",
+        0x1005b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::ScD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "sc.d a0,a2,(a1)",
+        0x18c5b52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoswapD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoswap.d a0,a2,(a1)",
+        0x8c5b52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoaddD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoadd.d a0,a2,(a1)",
+        0xc5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoxorD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoxor.d a0,a2,(a1)",
+        0x20c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoandD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoand.d a0,a2,(a1)",
+        0x60c5b52f,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmoorD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amoor.d a0,a2,(a1)",
+        0x40c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomin.d a0,a2,(a1)",
+        0x80c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomax.d a0,a2,(a1)",
+        0xa0c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmominuD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amominu.d a0,a2,(a1)",
+        0xc0c5b52f,
+    ));
+    insns.push(TestUnit::new(
+        Inst::Atomic {
+            op: AtomicOP::AmomaxuD,
+            rd: writable_a0(),
+            addr: a1(),
+            src: a2(),
+            amo: AMO::Relax,
+        },
+        "amomaxu.d a0,a2,(a1)",
+        0xe0c5b52f,
+    ));
+
+    /////////
+    insns.push(TestUnit::new(
+        Inst::Fence {
+            pred: 1,
+            succ: 1 << 1,
+        },
+        "fence w,r",
+        0x120000f,
+    ));
+    insns.push(TestUnit::new(Inst::EBreak {}, "ebreak", 0x100073));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::Fsgnj,
+            width: FpuOPWidth::S,
+            frm: FRM::RNE,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fmv.s fa0,fa1",
+        0x20b58553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::Fsgnj,
+            width: FpuOPWidth::D,
+            frm: FRM::RNE,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fmv.d fa0,fa1",
+        0x22b58553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::Fsgnjn,
+            width: FpuOPWidth::S,
+            frm: FRM::RTZ,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fneg.s fa0,fa1",
+        0x20b59553,
+    ));
+    insns.push(TestUnit::new(
+        Inst::FpuRRR {
+            alu_op: FpuOPRRR::Fsgnjn,
+            width: FpuOPWidth::D,
+            frm: FRM::RTZ,
+            rd: writable_fa0(),
+            rs1: fa1(),
+            rs2: fa1(),
+        },
+        "fneg.d fa0,fa1",
+        0x22b59553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Fli {
+            ty: F32,
+            rd: writable_fa0(),
+            imm: FliConstant::new(0),
+        },
+        "fli.s fa0,-1.0",
+        0xf0100553,
+    ));
+
+    insns.push(TestUnit::new(
+        Inst::Fli {
+            ty: F64,
+            rd: writable_fa0(),
+            imm: FliConstant::new(13),
+        },
+        "fli.d fa0,0.625",
+        0xf2168553,
+    ));
+
+    let (flags, isa_flags) = make_test_flags();
+    let emit_info = EmitInfo::new(flags, isa_flags);
+
+    for unit in insns.iter() {
+        println!("Riscv64: {:?}, {}", unit.inst, unit.assembly);
+        // Check the printed text is as expected.
+        let actual_printing = unit.inst.print_with_state(&mut EmitState::default());
+        assert_eq!(unit.assembly, actual_printing);
+        let mut buffer = MachBuffer::new();
+        unit.inst
+            .emit(&mut buffer, &emit_info, &mut Default::default());
+        let buffer = buffer.finish(&Default::default(), &mut Default::default());
+        let actual_encoding = buffer.stringify_code_bytes();
+
+        assert_eq!(actual_encoding, unit.code.0);
+    }
+}
+
+fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) {
+    let b = settings::builder();
+    let flags = settings::Flags::new(b.clone());
+    let b2 = super::super::riscv_settings::builder();
+    let isa_flags = super::super::riscv_settings::Flags::new(&flags, &b2);
+    (flags, isa_flags)
+}
+
+#[test]
+fn riscv64_worst_case_instruction_size() {
+    let (flags, isa_flags) = make_test_flags();
+    let emit_info = EmitInfo::new(flags, isa_flags);
+
+    // These are all candidate instructions with potential to generate a lot of bytes.
+    let mut candidates: Vec<MInst> = vec![];
+
+    candidates.push(Inst::Popcnt {
+        sum: writable_a0(),
+        tmp: writable_a0(),
+        step: writable_a0(),
+        rs: a0(),
+        ty: I64,
+    });
+
+    candidates.push(Inst::Cltz {
+        sum: writable_a0(),
+        tmp: writable_a0(),
+        step: writable_a0(),
+        rs: a0(),
+        leading: true,
+        ty: I64,
+    });
+
+    candidates.push(Inst::Brev8 {
+        rd: writable_a0(),
+        tmp: writable_a0(),
+        step: writable_a0(),
+        tmp2: writable_a0(),
+        rs: a0(),
+        ty: I64,
+    });
+
+    candidates.push(Inst::AtomicCas {
+        offset: a0(),
+        t0: writable_a0(),
+        dst: writable_a0(),
+        e: a0(),
+        addr: a0(),
+        v: a0(),
+        ty: I64,
+    });
+
+    candidates.push(Inst::AtomicCas {
+        offset: a0(),
+        t0: writable_a0(),
+        dst: writable_a0(),
+        e: a0(),
+        addr: a0(),
+        v: a0(),
+        ty: I16,
+    });
+
+    candidates.extend(
+        crate::ir::AtomicRmwOp::all()
+            .iter()
+            .map(|op| Inst::AtomicRmwLoop {
+                op: *op,
+                offset: a0(),
+                dst: writable_a1(),
+                ty: I16,
+                p: a1(),
+                x: a2(),
+                t0: writable_a0(),
+            }),
+    );
+
+    // Return Call Indirect and BrTable are the largest instructions possible. However they
+    // emit their own island, so we don't account them here.
+
+    let mut max: (u32, MInst) = (0, Inst::Nop0);
+    for i in candidates {
+        let mut buffer = MachBuffer::new();
+        let mut emit_state = Default::default();
+        i.emit(&mut buffer, &emit_info, &mut emit_state);
+        let buffer = buffer.finish(&Default::default(), &mut Default::default());
+        let length = buffer.data().len() as u32;
+        if length > max.0 {
+            let length = buffer.data().len() as u32;
+            max = (length, i.clone());
+        }
+        println!("insn:{i:?}  length: {length}");
+    }
+    println!("calculate max size is {} , inst is {:?}", max.0, max.1);
+    assert!(max.0 <= Inst::worst_case_size());
+}
diff --git a/hbcb/src/inst/encode.rs b/hbcb/src/inst/encode.rs
new file mode 100644
index 00000000..0e2d4c47
--- /dev/null
+++ b/hbcb/src/inst/encode.rs
@@ -0,0 +1,721 @@
+//! Contains the RISC-V instruction encoding logic.
+//!
+//! These formats are specified in the RISC-V specification in section 2.2.
+//! See: <https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf>
+//!
+//! Some instructions especially in extensions have slight variations from
+//! the base RISC-V specification.
+
+use super::*;
+use crate::lower::isle::generated_code::{
+    COpcodeSpace, CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth,
+    VecAluOpRImm5, VecAluOpRR, VecAluOpRRRImm5, VecAluOpRRRR, VecOpCategory, ZcbMemOp,
+};
+use crate::machinst::isle::WritableReg;
+
+fn unsigned_field_width(value: u32, width: u8) -> u32 {
+    debug_assert_eq!(value & (!0 << width), 0);
+    value
+}
+
+/// Layout:
+/// 0-------6-7-------11-12------14-15------19-20------24-25-------31
+/// | Opcode |   rd     |  funct3  |   rs1    |   rs2    |   funct7  |
+fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= unsigned_field_width(rd, 5) << 7;
+    bits |= unsigned_field_width(funct3, 3) << 12;
+    bits |= unsigned_field_width(rs1, 5) << 15;
+    bits |= unsigned_field_width(rs2, 5) << 20;
+    bits |= unsigned_field_width(funct7, 7) << 25;
+    bits
+}
+
+/// Encode an R-type instruction.
+pub fn encode_r_type(
+    opcode: u32,
+    rd: WritableReg,
+    funct3: u32,
+    rs1: Reg,
+    rs2: Reg,
+    funct7: u32,
+) -> u32 {
+    encode_r_type_bits(
+        opcode,
+        reg_to_gpr_num(rd.to_reg()),
+        funct3,
+        reg_to_gpr_num(rs1),
+        reg_to_gpr_num(rs2),
+        funct7,
+    )
+}
+
+/// Layout:
+/// 0-------6-7-------11-12------14-15------19-20------------------31
+/// | Opcode |   rd     |  width   |   rs1    |     Offset[11:0]    |
+fn encode_i_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, offset: u32) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= unsigned_field_width(rd, 5) << 7;
+    bits |= unsigned_field_width(funct3, 3) << 12;
+    bits |= unsigned_field_width(rs1, 5) << 15;
+    bits |= unsigned_field_width(offset, 12) << 20;
+    bits
+}
+
+/// Encode an I-type instruction.
+pub fn encode_i_type(opcode: u32, rd: WritableReg, width: u32, rs1: Reg, offset: Imm12) -> u32 {
+    encode_i_type_bits(
+        opcode,
+        reg_to_gpr_num(rd.to_reg()),
+        width,
+        reg_to_gpr_num(rs1),
+        offset.bits(),
+    )
+}
+
+/// Encode an S-type instruction.
+///
+/// Layout:
+/// 0-------6-7-------11-12------14-15------19-20---24-25-------------31
+/// | Opcode | imm[4:0] |  width   |   base   |  src  |    imm[11:5]   |
+pub fn encode_s_type(opcode: u32, width: u32, base: Reg, src: Reg, offset: Imm12) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= (offset.bits() & 0b11111) << 7;
+    bits |= unsigned_field_width(width, 3) << 12;
+    bits |= reg_to_gpr_num(base) << 15;
+    bits |= reg_to_gpr_num(src) << 20;
+    bits |= unsigned_field_width(offset.bits() >> 5, 7) << 25;
+    bits
+}
+
+/// Encodes a Vector ALU instruction.
+///
+/// Fields:
+/// - opcode (7 bits)
+/// - vd     (5 bits)
+/// - funct3 (3 bits)
+/// - vs1    (5 bits)
+/// - vs2    (5 bits)
+/// - vm     (1 bit)
+/// - funct6 (6 bits)
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
+pub fn encode_valu(
+    op: VecAluOpRRR,
+    vd: WritableReg,
+    vs1: Reg,
+    vs2: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        reg_to_gpr_num(vs1),
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+/// Encodes a Vector ALU+Imm instruction.
+/// This is just a Vector ALU instruction with an immediate in the VS1 field.
+///
+/// Fields:
+/// - opcode (7 bits)
+/// - vd     (5 bits)
+/// - funct3 (3 bits)
+/// - imm    (5 bits)
+/// - vs2    (5 bits)
+/// - vm     (1 bit)
+/// - funct6 (6 bits)
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
+pub fn encode_valu_rr_imm(
+    op: VecAluOpRRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    vs2: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    let imm = imm.bits() as u32;
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        imm,
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+pub fn encode_valu_rrrr(
+    op: VecAluOpRRRR,
+    vd: WritableReg,
+    vs2: Reg,
+    vs1: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        reg_to_gpr_num(vs1),
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+pub fn encode_valu_rrr_imm(
+    op: VecAluOpRRRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    vs2: Reg,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    let imm = imm.bits() as u32;
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        imm,
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+pub fn encode_valu_rr(op: VecAluOpRR, vd: WritableReg, vs: Reg, masking: VecOpMasking) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+
+    let (vs1, vs2) = if op.vs_is_vs2_encoded() {
+        (op.aux_encoding(), reg_to_gpr_num(vs))
+    } else {
+        (reg_to_gpr_num(vs), op.aux_encoding())
+    };
+
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        vs1,
+        vs2,
+        funct7,
+    )
+}
+
+pub fn encode_valu_r_imm(
+    op: VecAluOpRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+
+    // This is true for this opcode, not sure if there are any other ones.
+    debug_assert_eq!(op, VecAluOpRImm5::VmvVI);
+    let vs1 = imm.bits() as u32;
+    let vs2 = op.aux_encoding();
+
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        vs1,
+        vs2,
+        funct7,
+    )
+}
+
+/// Encodes a Vector CFG Imm instruction.
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
+// TODO: Check if this is any of the known instruction types in the spec.
+pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= reg_to_gpr_num(rd) << 7;
+    bits |= VecOpCategory::OPCFG.encode() << 12;
+    bits |= unsigned_field_width(imm.bits(), 5) << 15;
+    bits |= unsigned_field_width(vtype.encode(), 10) << 20;
+    bits |= 0b11 << 30;
+    bits
+}
+
+/// Encodes a Vector Mem Unit Stride Load instruction.
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
+/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
+pub fn encode_vmem_load(
+    opcode: u32,
+    vd: Reg,
+    width: VecElementWidth,
+    rs1: Reg,
+    lumop: u32,
+    masking: VecOpMasking,
+    mop: u32,
+    nf: u32,
+) -> u32 {
+    // Width is encoded differently to avoid a clash with the FP load/store sizes.
+    let width = match width {
+        VecElementWidth::E8 => 0b000,
+        VecElementWidth::E16 => 0b101,
+        VecElementWidth::E32 => 0b110,
+        VecElementWidth::E64 => 0b111,
+    };
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= reg_to_gpr_num(vd) << 7;
+    bits |= width << 12;
+    bits |= reg_to_gpr_num(rs1) << 15;
+    bits |= unsigned_field_width(lumop, 5) << 20;
+    bits |= masking.encode() << 25;
+    bits |= unsigned_field_width(mop, 2) << 26;
+
+    // The mew bit (inst[28]) when set is expected to be used to encode expanded
+    // memory sizes of 128 bits and above, but these encodings are currently reserved.
+    bits |= 0b0 << 28;
+
+    bits |= unsigned_field_width(nf, 3) << 29;
+    bits
+}
+
+/// Encodes a Vector Mem Unit Stride Load instruction.
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
+/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
+pub fn encode_vmem_store(
+    opcode: u32,
+    vs3: Reg,
+    width: VecElementWidth,
+    rs1: Reg,
+    sumop: u32,
+    masking: VecOpMasking,
+    mop: u32,
+    nf: u32,
+) -> u32 {
+    // This is pretty much the same as the load instruction, just
+    // with different names on the fields.
+    encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf)
+}
+
+// The CSR Reg instruction is really just an I type instruction with the CSR in
+// the immediate field.
+pub fn encode_csr_reg(op: CsrRegOP, rd: WritableReg, rs: Reg, csr: CSR) -> u32 {
+    encode_i_type(op.opcode(), rd, op.funct3(), rs, csr.bits())
+}
+
+// The CSR Imm instruction is an I type instruction with the CSR in
+// the immediate field and the value to be set in the `rs1` field.
+pub fn encode_csr_imm(op: CsrImmOP, rd: WritableReg, csr: CSR, imm: UImm5) -> u32 {
+    encode_i_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(rd.to_reg()),
+        op.funct3(),
+        imm.bits(),
+        csr.bits().bits(),
+    )
+}
+
+// Encode a CR type instruction.
+//
+// 0--1-2-----6-7-------11-12-------15
+// |op |  rs2  |  rd/rs1  |  funct4  |
+pub fn encode_cr_type(op: CrOp, rd: WritableReg, rs2: Reg) -> u16 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= reg_to_gpr_num(rs2) << 2;
+    bits |= reg_to_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(op.funct4(), 4) << 12;
+    bits.try_into().unwrap()
+}
+
+// This isn't technically a instruction format that exists. It's just a CR type
+// where the source is rs1, rs2 is zero. rs1 is never written to.
+//
+// Used for C.JR and C.JALR
+pub fn encode_cr2_type(op: CrOp, rs1: Reg) -> u16 {
+    encode_cr_type(op, WritableReg::from_reg(rs1), zero_reg())
+}
+
+// Encode a CA type instruction.
+//
+// 0--1-2-----4-5--------6-7--------9-10------15
+// |op |  rs2  |  funct2  |  rd/rs1  | funct6 |
+pub fn encode_ca_type(op: CaOp, rd: WritableReg, rs2: Reg) -> u16 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= reg_to_compressed_gpr_num(rs2) << 2;
+    bits |= unsigned_field_width(op.funct2(), 2) << 5;
+    bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(op.funct6(), 6) << 10;
+    bits.try_into().unwrap()
+}
+
+// Encode a CJ type instruction.
+//
+// The imm field is a 11 bit signed immediate that is shifted left by 1.
+//
+// 0--1-2-----12-13--------15
+// |op |  imm   |  funct3  |
+pub fn encode_cj_type(op: CjOp, imm: Imm12) -> u16 {
+    let imm = imm.bits();
+    debug_assert!(imm & 1 == 0);
+
+    // The offset bits are in rather weird positions.
+    // [11|4|9:8|10|6|7|3:1|5]
+    let mut imm_field = 0;
+    imm_field |= ((imm >> 11) & 1) << 10;
+    imm_field |= ((imm >> 4) & 1) << 9;
+    imm_field |= ((imm >> 8) & 3) << 7;
+    imm_field |= ((imm >> 10) & 1) << 6;
+    imm_field |= ((imm >> 6) & 1) << 5;
+    imm_field |= ((imm >> 7) & 1) << 4;
+    imm_field |= ((imm >> 1) & 7) << 1;
+    imm_field |= ((imm >> 5) & 1) << 0;
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= unsigned_field_width(imm_field, 11) << 2;
+    bits |= unsigned_field_width(op.funct3(), 3) << 13;
+    bits.try_into().unwrap()
+}
+
+// Encode a CI type instruction.
+//
+// The imm field is a 6 bit signed immediate.
+//
+// 0--1-2-------6-7-------11-12-----12-13-----15
+// |op | imm[4:0] |   src   | imm[5]  | funct3  |
+pub fn encode_ci_type(op: CiOp, rd: WritableReg, imm: Imm6) -> u16 {
+    let imm = imm.bits();
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2;
+    bits |= reg_to_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12;
+    bits |= unsigned_field_width(op.funct3(), 3) << 13;
+    bits.try_into().unwrap()
+}
+
+// Stack-Pointer relative loads are regular CI instructions, but, the immediate
+// is zero extended, and with a slightly different immediate field encoding.
+pub fn encode_ci_sp_load(op: CiOp, rd: WritableReg, imm: Uimm6) -> u16 {
+    let imm = imm.bits();
+
+    // These are the spec encoded offsets.
+    // LWSP:  [5|4:2|7:6]
+    // LDSP:  [5|4:3|8:6]
+    // FLDSP: [5|4:3|8:6]
+    //
+    // We don't receive the entire offset in `imm`, just a multiple of the load-size.
+
+    // Number of bits in the lowest position of imm. 3 for lwsp, 2 for {f,}ldsp.
+    let low_bits = match op {
+        CiOp::CLwsp => 3,                // [4:2]
+        CiOp::CLdsp | CiOp::CFldsp => 2, // [4:3]
+        _ => unreachable!(),
+    };
+    let high_bits = 6 - 1 - low_bits;
+    let mut enc_imm = 0;
+
+    // Encode [7:6] at the bottom of imm
+    enc_imm |= imm >> (6 - high_bits);
+
+    // Next place [4:2] in the middle
+    enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits;
+
+    // Finally place [5] at the top
+    enc_imm |= ((imm >> low_bits) & 1) << 5;
+
+    let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap();
+
+    encode_ci_type(op, rd, enc_imm)
+}
+
+/// c.addi16sp is a regular CI op, but the immediate field is encoded in a weird way
+pub fn encode_c_addi16sp(imm: Imm6) -> u16 {
+    let imm = imm.bits();
+
+    // [6|1|3|5:4|2]
+    let mut enc_imm = 0;
+    enc_imm |= ((imm >> 5) & 1) << 5;
+    enc_imm |= ((imm >> 0) & 1) << 4;
+    enc_imm |= ((imm >> 2) & 1) << 3;
+    enc_imm |= ((imm >> 3) & 3) << 1;
+    enc_imm |= ((imm >> 1) & 1) << 0;
+    let enc_imm = Imm6::maybe_from_i16((enc_imm as i16) << 10 >> 10).unwrap();
+
+    encode_ci_type(CiOp::CAddi16sp, writable_stack_reg(), enc_imm)
+}
+
+// Encode a CIW type instruction.
+//
+// 0--1-2------4-5------12-13--------15
+// |op |   rd   |   imm   |  funct3  |
+pub fn encode_ciw_type(op: CiwOp, rd: WritableReg, imm: u8) -> u16 {
+    // [3:2|7:4|0|1]
+    let mut imm_field = 0;
+    imm_field |= ((imm >> 1) & 1) << 0;
+    imm_field |= ((imm >> 0) & 1) << 1;
+    imm_field |= ((imm >> 4) & 15) << 2;
+    imm_field |= ((imm >> 2) & 3) << 6;
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 2;
+    bits |= unsigned_field_width(imm_field as u32, 8) << 5;
+    bits |= unsigned_field_width(op.funct3(), 3) << 13;
+    bits.try_into().unwrap()
+}
+
+// Encode a CB type instruction.
+//
+// The imm field is a 6 bit signed immediate.
+//
+// 0--1-2-------6-7-------9-10-------11-12-------13--------15
+// |op | imm[4:0] |   dst  |  funct2   |  imm[5]  | funct3 |
+pub fn encode_cb_type(op: CbOp, rd: WritableReg, imm: Imm6) -> u16 {
+    let imm = imm.bits();
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= unsigned_field_width((imm & 0x1f) as u32, 5) << 2;
+    bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(op.funct2(), 2) << 10;
+    bits |= unsigned_field_width(((imm >> 5) & 1) as u32, 1) << 12;
+    bits |= unsigned_field_width(op.funct3(), 3) << 13;
+    bits.try_into().unwrap()
+}
+
+// Encode a CSS type instruction.
+//
+// The imm field is a 6 bit unsigned immediate.
+//
+// 0--1-2-------6-7--------12-13-------15
+// |op |   src   |    imm    |  funct3  |
+pub fn encode_css_type(op: CssOp, src: Reg, imm: Uimm6) -> u16 {
+    let imm = imm.bits();
+
+    // These are the spec encoded offsets.
+    // c.swsp:  [5:2|7:6]
+    // c.sdsp:  [5:3|8:6]
+    // c.fsdsp: [5:3|8:6]
+    //
+    // We don't receive the entire offset in `imm`, just a multiple of the load-size.
+
+    // Number of bits in the lowest position of imm. 4 for c.swsp, 3 for c.{f,}sdsp.
+    let low_bits = match op {
+        CssOp::CSwsp => 4,                 // [5:2]
+        CssOp::CSdsp | CssOp::CFsdsp => 3, // [5:3]
+    };
+    let high_bits = 6 - low_bits;
+
+    let mut enc_imm = 0;
+    enc_imm |= (imm & ((1 << low_bits) - 1)) << high_bits;
+    enc_imm |= imm >> low_bits;
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= reg_to_gpr_num(src) << 2;
+    bits |= unsigned_field_width(enc_imm as u32, 6) << 7;
+    bits |= unsigned_field_width(op.funct3(), 3) << 13;
+    bits.try_into().unwrap()
+}
+
+// Encode a CS type instruction.
+//
+// The imm field is a 5 bit unsigned immediate.
+//
+// 0--1-2-----4-5----------6-7---------9-10----------12-13-----15
+// |op |  src  | imm(2-bit) |   base    |  imm(3-bit)  | funct3  |
+pub fn encode_cs_type(op: CsOp, src: Reg, base: Reg, imm: Uimm5) -> u16 {
+    let size = match op {
+        CsOp::CFsd | CsOp::CSd => 8,
+        CsOp::CSw => 4,
+    };
+
+    encode_cs_cl_type_bits(op.op(), op.funct3(), size, src, base, imm)
+}
+
+// Encode a CL type instruction.
+//
+// The imm field is a 5 bit unsigned immediate.
+//
+// 0--1-2------4-5----------6-7---------9-10----------12-13-----15
+// |op |  dest  | imm(2-bit) |   base    |  imm(3-bit)  | funct3  |
+pub fn encode_cl_type(op: ClOp, dest: WritableReg, base: Reg, imm: Uimm5) -> u16 {
+    let size = match op {
+        ClOp::CFld | ClOp::CLd => 8,
+        ClOp::CLw => 4,
+    };
+
+    encode_cs_cl_type_bits(op.op(), op.funct3(), size, dest.to_reg(), base, imm)
+}
+
+// CL and CS type instructions have the same physical layout.
+//
+// 0--1-2----------4-5----------6-7---------9-10----------12-13-----15
+// |op |  dest/src  | imm(2-bit) |   base    |  imm(3-bit)  | funct3  |
+fn encode_cs_cl_type_bits(
+    op: COpcodeSpace,
+    funct3: u32,
+    size: u32,
+    dest_src: Reg,
+    base: Reg,
+    imm: Uimm5,
+) -> u16 {
+    let imm = imm.bits();
+
+    // c.sw  / c.lw:  [2|6]
+    // c.sd  / c.ld:  [7:6]
+    // c.fsd / c.fld: [7:6]
+    //
+    // We differentiate these based on the operation size
+    let imm2 = match size {
+        4 => ((imm >> 4) & 1) | ((imm & 1) << 1),
+        8 => (imm >> 3) & 0b11,
+        _ => unreachable!(),
+    };
+
+    // [5:3] on all opcodes
+    let imm3 = match size {
+        4 => (imm >> 1) & 0b111,
+        8 => (imm >> 0) & 0b111,
+        _ => unreachable!(),
+    };
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.bits(), 2);
+    bits |= reg_to_compressed_gpr_num(dest_src) << 2;
+    bits |= unsigned_field_width(imm2 as u32, 2) << 5;
+    bits |= reg_to_compressed_gpr_num(base) << 7;
+    bits |= unsigned_field_width(imm3 as u32, 3) << 10;
+    bits |= unsigned_field_width(funct3, 3) << 13;
+    bits.try_into().unwrap()
+}
+
+// Encode a CSZN type instruction.
+//
+// This is an additional encoding format that is introduced in the Zcb extension.
+//
+// 0--1-2---------6-7--------9-10------15
+// |op |   funct5  |  rd/rs1  | funct6 |
+pub fn encode_cszn_type(op: CsznOp, rd: WritableReg) -> u16 {
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= unsigned_field_width(op.funct5(), 5) << 2;
+    bits |= reg_to_compressed_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(op.funct6(), 6) << 10;
+    bits.try_into().unwrap()
+}
+
+// Encodes the various memory operations in the Zcb extension.
+//
+// 0--1-2----------4-5----------6-7---------9-10-------15
+// |op |  dest/src  | imm(2-bit) |   base    |  funct6  |
+fn encode_zcbmem_bits(op: ZcbMemOp, dest_src: Reg, base: Reg, imm: Uimm2) -> u16 {
+    let imm = imm.bits();
+
+    // For these ops, bit 6 is part of the opcode, and bit 5 encodes the imm offset.
+    let imm = match op {
+        ZcbMemOp::CLh | ZcbMemOp::CLhu | ZcbMemOp::CSh => {
+            debug_assert_eq!(imm & !1, 0);
+            // Only c.lh has this bit as 1
+            let opcode_bit = (op == ZcbMemOp::CLh) as u8;
+            imm | (opcode_bit << 1)
+        }
+        // In the rest of the ops the imm is reversed.
+        _ => ((imm & 1) << 1) | ((imm >> 1) & 1),
+    };
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.op().bits(), 2);
+    bits |= reg_to_compressed_gpr_num(dest_src) << 2;
+    bits |= unsigned_field_width(imm as u32, 2) << 5;
+    bits |= reg_to_compressed_gpr_num(base) << 7;
+    bits |= unsigned_field_width(op.funct6(), 6) << 10;
+    bits.try_into().unwrap()
+}
+
+pub fn encode_zcbmem_load(op: ZcbMemOp, rd: WritableReg, base: Reg, imm: Uimm2) -> u16 {
+    encode_zcbmem_bits(op, rd.to_reg(), base, imm)
+}
+
+pub fn encode_zcbmem_store(op: ZcbMemOp, src: Reg, base: Reg, imm: Uimm2) -> u16 {
+    encode_zcbmem_bits(op, src, base, imm)
+}
+
+pub fn encode_fli(ty: Type, imm: FliConstant, rd: WritableReg) -> u32 {
+    // FLI.{S,D} is encoded as a FMV.{W,D} instruction with rs2 set to the
+    // immediate value to be loaded.
+    let op = FpuOPRR::FmvFmtX;
+    let width = FpuOPWidth::try_from(ty).unwrap();
+    let frm = 0; // FRM is hard coded to 0 in both instructions
+    let rs2 = 1; // rs2 set to 1 is what differentiates FLI from FMV
+
+    let mut bits = 0;
+    bits |= unsigned_field_width(op.opcode(), 7);
+    bits |= reg_to_gpr_num(rd.to_reg()) << 7;
+    bits |= unsigned_field_width(frm, 3) << 12;
+    bits |= unsigned_field_width(imm.bits() as u32, 5) << 15;
+    bits |= unsigned_field_width(rs2, 6) << 20;
+    bits |= unsigned_field_width(op.funct7(width), 7) << 25;
+    bits
+}
+
+pub fn encode_fp_rr(op: FpuOPRR, width: FpuOPWidth, frm: FRM, rd: WritableReg, rs: Reg) -> u32 {
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(rd.to_reg()),
+        frm.as_u32(),
+        reg_to_gpr_num(rs),
+        op.rs2(),
+        op.funct7(width),
+    )
+}
+
+pub fn encode_fp_rrr(
+    op: FpuOPRRR,
+    width: FpuOPWidth,
+    frm: FRM,
+    rd: WritableReg,
+    rs1: Reg,
+    rs2: Reg,
+) -> u32 {
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(rd.to_reg()),
+        frm.as_u32(),
+        reg_to_gpr_num(rs1),
+        reg_to_gpr_num(rs2),
+        op.funct7(width),
+    )
+}
+
+pub fn encode_fp_rrrr(
+    op: FpuOPRRRR,
+    width: FpuOPWidth,
+    frm: FRM,
+    rd: WritableReg,
+    rs1: Reg,
+    rs2: Reg,
+    rs3: Reg,
+) -> u32 {
+    let funct7 = (reg_to_gpr_num(rs3) << 2) | width.as_u32();
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(rd.to_reg()),
+        frm.as_u32(),
+        reg_to_gpr_num(rs1),
+        reg_to_gpr_num(rs2),
+        funct7,
+    )
+}
diff --git a/hbcb/src/inst/imms.rs b/hbcb/src/inst/imms.rs
new file mode 100644
index 00000000..28f27915
--- /dev/null
+++ b/hbcb/src/inst/imms.rs
@@ -0,0 +1,374 @@
+//! Riscv64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+use super::Inst;
+#[allow(dead_code)]
+use std::fmt::{Debug, Display, Formatter, Result};
+
+#[derive(Copy, Clone, Debug, Default)]
+pub struct Imm12 {
+    /// 16-bit container where the low 12 bits are the data payload.
+    ///
+    /// Acquiring the underlying value requires sign-extending the 12th bit.
+    bits: u16,
+}
+
+impl Imm12 {
+    pub(crate) const ZERO: Self = Self { bits: 0 };
+    pub(crate) const ONE: Self = Self { bits: 1 };
+
+    pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+        Self::maybe_from_i64(val as i64)
+    }
+
+    pub fn maybe_from_i64(val: i64) -> Option<Imm12> {
+        if val >= -2048 && val <= 2047 {
+            Some(Imm12 {
+                bits: val as u16 & 0xfff,
+            })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    pub fn from_i16(bits: i16) -> Self {
+        assert!(bits >= -2048 && bits <= 2047);
+        Self {
+            bits: (bits & 0xfff) as u16,
+        }
+    }
+
+    #[inline]
+    pub fn as_i16(self) -> i16 {
+        (self.bits << 4) as i16 >> 4
+    }
+
+    #[inline]
+    pub fn bits(&self) -> u32 {
+        self.bits.into()
+    }
+}
+
+impl Into<i64> for Imm12 {
+    fn into(self) -> i64 {
+        self.as_i16().into()
+    }
+}
+
+impl Display for Imm12 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{:+}", self.as_i16())
+    }
+}
+
+// signed
+#[derive(Clone, Copy, Default)]
+pub struct Imm20 {
+    /// 32-bit container where the low 20 bits are the data payload.
+    ///
+    /// Acquiring the underlying value requires sign-extending the 20th bit.
+    bits: u32,
+}
+
+impl Imm20 {
+    pub(crate) const ZERO: Self = Self { bits: 0 };
+
+    pub fn maybe_from_u64(val: u64) -> Option<Imm20> {
+        Self::maybe_from_i64(val as i64)
+    }
+
+    pub fn maybe_from_i64(val: i64) -> Option<Imm20> {
+        if val >= -(0x7_ffff + 1) && val <= 0x7_ffff {
+            Some(Imm20 { bits: val as u32 })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    pub fn from_i32(bits: i32) -> Self {
+        assert!(bits >= -(0x7_ffff + 1) && bits <= 0x7_ffff);
+        Self {
+            bits: (bits as u32) & 0xf_ffff,
+        }
+    }
+
+    #[inline]
+    pub fn as_i32(&self) -> i32 {
+        ((self.bits << 12) as i32) >> 12
+    }
+
+    #[inline]
+    pub fn bits(&self) -> u32 {
+        self.bits
+    }
+}
+
+impl Debug for Imm20 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.as_i32())
+    }
+}
+
+impl Display for Imm20 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.bits)
+    }
+}
+
+/// An unsigned 5-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct UImm5 {
+    value: u8,
+}
+
+impl UImm5 {
+    /// Create an unsigned 5-bit immediate from u8.
+    pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
+        if value < 32 {
+            Some(UImm5 { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u32 {
+        u32::from(self.value)
+    }
+}
+
+impl Display for UImm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+/// A Signed 5-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Imm5 {
+    value: i8,
+}
+
+impl Imm5 {
+    /// Create an signed 5-bit immediate from an i8.
+    pub fn maybe_from_i8(value: i8) -> Option<Imm5> {
+        if value >= -16 && value <= 15 {
+            Some(Imm5 { value })
+        } else {
+            None
+        }
+    }
+
+    pub fn from_bits(value: u8) -> Imm5 {
+        assert_eq!(value & 0x1f, value);
+        let signed = ((value << 3) as i8) >> 3;
+        Imm5 { value: signed }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value as u8 & 0x1f
+    }
+}
+
+impl Display for Imm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+/// A Signed 6-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Imm6 {
+    value: i8,
+}
+
+impl Imm6 {
+    /// Create an signed 6-bit immediate from an i16
+    pub fn maybe_from_i16(value: i16) -> Option<Self> {
+        if value >= -32 && value <= 31 {
+            Some(Self { value: value as i8 })
+        } else {
+            None
+        }
+    }
+
+    pub fn maybe_from_i32(value: i32) -> Option<Self> {
+        value.try_into().ok().and_then(Imm6::maybe_from_i16)
+    }
+
+    pub fn maybe_from_imm12(value: Imm12) -> Option<Self> {
+        Imm6::maybe_from_i16(value.as_i16())
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value as u8 & 0x3f
+    }
+}
+
+impl Display for Imm6 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+/// A unsigned 6-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Uimm6 {
+    value: u8,
+}
+
+impl Uimm6 {
+    /// Create an unsigned 6-bit immediate from an u8
+    pub fn maybe_from_u8(value: u8) -> Option<Self> {
+        if value <= 63 {
+            Some(Self { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value & 0x3f
+    }
+}
+
+impl Display for Uimm6 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+/// A unsigned 5-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Uimm5 {
+    value: u8,
+}
+
+impl Uimm5 {
+    /// Create an unsigned 5-bit immediate from an u8
+    pub fn maybe_from_u8(value: u8) -> Option<Self> {
+        if value <= 31 {
+            Some(Self { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value & 0x1f
+    }
+}
+
+impl Display for Uimm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+/// A unsigned 2-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Uimm2 {
+    value: u8,
+}
+
+impl Uimm2 {
+    /// Create an unsigned 2-bit immediate from an u8
+    pub fn maybe_from_u8(value: u8) -> Option<Self> {
+        if value <= 3 {
+            Some(Self { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value & 0x3
+    }
+}
+
+impl Display for Uimm2 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
+impl Inst {
+    pub(crate) fn imm_min() -> i64 {
+        let imm20_max: i64 = (1 << 19) << 12;
+        let imm12_max = 1 << 11;
+        -imm20_max - imm12_max
+    }
+    pub(crate) fn imm_max() -> i64 {
+        let imm20_max: i64 = ((1 << 19) - 1) << 12;
+        let imm12_max = (1 << 11) - 1;
+        imm20_max + imm12_max
+    }
+
+    /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate.
+    /// This helper produces an imm12, imm20, or both to generate the value.
+    ///
+    /// `value` must be between `imm_min()` and `imm_max()`, or else
+    /// this helper returns `None`.
+    pub(crate) fn generate_imm(value: u64) -> Option<(Imm20, Imm12)> {
+        if let Some(imm12) = Imm12::maybe_from_u64(value) {
+            // can be load using single imm12.
+            return Some((Imm20::ZERO, imm12));
+        }
+        let value = value as i64;
+        if !(value >= Self::imm_min() && value <= Self::imm_max()) {
+            // not in range, return None.
+            return None;
+        }
+        const MOD_NUM: i64 = 4096;
+        let (imm20, imm12) = if value > 0 {
+            let mut imm20 = value / MOD_NUM;
+            let mut imm12 = value % MOD_NUM;
+            if imm12 >= 2048 {
+                imm12 -= MOD_NUM;
+                imm20 += 1;
+            }
+            assert!(imm12 >= -2048 && imm12 <= 2047);
+            (imm20, imm12)
+        } else {
+            // this is the abs value.
+            let value_abs = value.abs();
+            let imm20 = value_abs / MOD_NUM;
+            let imm12 = value_abs % MOD_NUM;
+            let mut imm20 = -imm20;
+            let mut imm12 = -imm12;
+            if imm12 < -2048 {
+                imm12 += MOD_NUM;
+                imm20 -= 1;
+            }
+            (imm20, imm12)
+        };
+        assert!(imm20 != 0 || imm12 != 0);
+        let imm20 = i32::try_from(imm20).unwrap();
+        let imm12 = i16::try_from(imm12).unwrap();
+        Some((Imm20::from_i32(imm20), Imm12::from_i16(imm12)))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn test_imm12() {
+        let x = Imm12::ZERO;
+        assert_eq!(0, x.bits());
+        Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap();
+    }
+
+    #[test]
+    fn imm20_and_imm12() {
+        assert!(Inst::imm_max() == (i32::MAX - 2048) as i64);
+        assert!(Inst::imm_min() == i32::MIN as i64 - 2048);
+    }
+}
diff --git a/hbcb/src/inst/mod.rs b/hbcb/src/inst/mod.rs
new file mode 100644
index 00000000..6440c57b
--- /dev/null
+++ b/hbcb/src/inst/mod.rs
@@ -0,0 +1,1559 @@
+//! This module defines riscv64-specific machine instruction types.
+
+pub use crate::ir::{
+    condcodes::{FloatCC, IntCC},
+    ExternalName, MemFlags, Type,
+};
+use {
+    super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking},
+    alloc::vec::Vec,
+    cranelift_codegen::{
+        binemit::{Addend, CodeOffset, Reloc},
+        ir::types::{self, F128, F16, F32, F64, I128, I16, I32, I64, I8, I8X16},
+        isa::{CallConv, FunctionAlignment},
+        machinst::*,
+        settings, CodegenError, CodegenResult,
+    },
+    regalloc2::RegClass,
+    smallvec::{smallvec, SmallVec},
+    std::{
+        boxed::Box,
+        fmt::Write,
+        string::{String, ToString},
+    },
+};
+
+pub mod regs;
+pub use self::regs::*;
+pub mod imms;
+pub use self::imms::*;
+pub mod args;
+pub use self::args::*;
+pub mod emit;
+pub use self::emit::*;
+pub mod vector;
+pub use self::vector::*;
+pub mod encode;
+pub use self::encode::*;
+pub mod unwind;
+
+use crate::abi::Riscv64MachineDeps;
+
+#[cfg(test)]
+mod emit_tests;
+
+use std::fmt::{Display, Formatter};
+
+pub(crate) type VecU8 = Vec<u8>;
+
+//=============================================================================
+// Instructions (top level): definition
+
+pub use crate::lower::isle::generated_code::{
+    AluOPRRI, AluOPRRR, AtomicOP, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FpuOPRR,
+    FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP, CSR, FRM,
+};
+use crate::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR};
+
+/// Additional information for `return_call[_ind]` instructions, left out of
+/// line to lower the size of the `Inst` enum.
+#[derive(Clone, Debug)]
+pub struct ReturnCallInfo<T> {
+    pub dest: T,
+    pub uses: CallArgList,
+    pub new_stack_arg_size: u32,
+}
+
+/// A conditional branch target.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum CondBrTarget {
+    /// An unresolved reference to a Label, as passed into
+    /// `lower_branch_group()`.
+    Label(MachLabel),
+    /// No jump; fall through to the next instruction.
+    Fallthrough,
+}
+
+impl CondBrTarget {
+    /// Return the target's label, if it is a label-based target.
+    pub(crate) fn as_label(self) -> Option<MachLabel> {
+        match self {
+            CondBrTarget::Label(l) => Some(l),
+            _ => None,
+        }
+    }
+
+    pub(crate) fn is_fallthrouh(&self) -> bool {
+        self == &CondBrTarget::Fallthrough
+    }
+}
+
+impl Display for CondBrTarget {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CondBrTarget::Label(l) => write!(f, "{}", l.to_string()),
+            CondBrTarget::Fallthrough => write!(f, "0"),
+        }
+    }
+}
+
+pub(crate) fn enc_auipc(rd: Writable<Reg>, imm: Imm20) -> u32 {
+    let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.bits() << 12;
+    x
+}
+
+pub(crate) fn enc_jalr(rd: Writable<Reg>, base: Reg, offset: Imm12) -> u32 {
+    let x = 0b1100111
+        | reg_to_gpr_num(rd.to_reg()) << 7
+        | 0b000 << 12
+        | reg_to_gpr_num(base) << 15
+        | offset.bits() << 20;
+    x
+}
+
+/// rd and src must have the same length.
+pub(crate) fn gen_moves(rd: &[Writable<Reg>], src: &[Reg]) -> SmallInstVec<Inst> {
+    assert!(rd.len() == src.len());
+    assert!(rd.len() > 0);
+    let mut insts = SmallInstVec::new();
+    for (dst, src) in rd.iter().zip(src.iter()) {
+        let ty = Inst::canonical_type_for_rc(dst.to_reg().class());
+        insts.push(Inst::gen_move(*dst, *src, ty));
+    }
+    insts
+}
+
+impl Inst {
+    /// RISC-V can have multiple instruction sizes. 2 bytes for compressed
+    /// instructions, 4 for regular instructions, 6 and 8 byte instructions
+    /// are also being considered.
+    const UNCOMPRESSED_INSTRUCTION_SIZE: i32 = 4;
+
+    #[inline]
+    pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {
+        Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd, rs: zero_reg(), imm12: imm }
+    }
+
+    /// Immediates can be loaded using lui and addi instructions.
+    fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {
+        Inst::generate_imm(value).map(|(imm20, imm12)| {
+            let mut insts = SmallVec::new();
+
+            let imm20_is_zero = imm20.as_i32() == 0;
+            let imm12_is_zero = imm12.as_i16() == 0;
+
+            let rs = if !imm20_is_zero {
+                insts.push(Inst::Lui { rd, imm: imm20 });
+                rd.to_reg()
+            } else {
+                zero_reg()
+            };
+
+            // We also need to emit the addi if the value is 0, otherwise we just
+            // won't produce any instructions.
+            if !imm12_is_zero || (imm20_is_zero && imm12_is_zero) {
+                insts.push(Inst::AluRRImm12 { alu_op: AluOPRRI::Addi, rd, rs, imm12 })
+            }
+
+            insts
+        })
+    }
+
+    pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
+        let insts = Inst::load_const_imm(rd, value);
+        insts.unwrap_or_else(|| smallvec![Inst::LoadInlineConst { rd, ty: I32, imm: value }])
+    }
+
+    pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
+        let insts = Inst::load_const_imm(rd, value);
+        insts.unwrap_or_else(|| smallvec![Inst::LoadInlineConst { rd, ty: I64, imm: value }])
+    }
+
+    pub(crate) fn construct_auipc_and_jalr(
+        link: Option<Writable<Reg>>,
+        tmp: Writable<Reg>,
+        offset: i64,
+    ) -> [Inst; 2] {
+        Inst::generate_imm(offset as u64)
+            .map(|(imm20, imm12)| {
+                let a = Inst::Auipc { rd: tmp, imm: imm20 };
+                let b = Inst::Jalr {
+                    rd: link.unwrap_or(writable_zero_reg()),
+                    base: tmp.to_reg(),
+                    offset: imm12,
+                };
+                [a, b]
+            })
+            .expect("code range is too big.")
+    }
+
+    /// Generic constructor for a load (zero-extending where appropriate).
+    pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
+        if ty.is_vector() {
+            Inst::VecLoad {
+                eew: VecElementWidth::from_type(ty),
+                to: into_reg,
+                from: VecAMode::UnitStride { base: mem },
+                flags,
+                mask: VecOpMasking::Disabled,
+                vstate: VState::from_type(ty),
+            }
+        } else {
+            Inst::Load { rd: into_reg, op: LoadOP::from_type(ty), from: mem, flags }
+        }
+    }
+
+    /// Generic constructor for a store.
+    pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
+        if ty.is_vector() {
+            Inst::VecStore {
+                eew: VecElementWidth::from_type(ty),
+                to: VecAMode::UnitStride { base: mem },
+                from: from_reg,
+                flags,
+                mask: VecOpMasking::Disabled,
+                vstate: VState::from_type(ty),
+            }
+        } else {
+            Inst::Store { src: from_reg, op: StoreOP::from_type(ty), to: mem, flags }
+        }
+    }
+}
+
+//=============================================================================
+
+fn vec_mask_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {
+    match mask {
+        VecOpMasking::Enabled { reg } => {
+            collector.reg_fixed_use(reg, pv_reg(0).into());
+        }
+        VecOpMasking::Disabled => {}
+    }
+}
+fn vec_mask_late_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {
+    match mask {
+        VecOpMasking::Enabled { reg } => {
+            collector.reg_fixed_late_use(reg, pv_reg(0).into());
+        }
+        VecOpMasking::Disabled => {}
+    }
+}
+
+fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
+    match inst {
+        Inst::Nop0 | Inst::Nop4 => {}
+        Inst::BrTable { index, tmp1, tmp2, .. } => {
+            collector.reg_use(index);
+            collector.reg_early_def(tmp1);
+            collector.reg_early_def(tmp2);
+        }
+        Inst::Auipc { rd, .. } => collector.reg_def(rd),
+        Inst::Lui { rd, .. } => collector.reg_def(rd),
+        Inst::Fli { rd, .. } => collector.reg_def(rd),
+        Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
+        Inst::AluRRR { rd, rs1, rs2, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+            collector.reg_def(rd);
+        }
+        Inst::FpuRRR { rd, rs1, rs2, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+            collector.reg_def(rd);
+        }
+        Inst::AluRRImm12 { rd, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_def(rd);
+        }
+        Inst::CsrReg { rd, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_def(rd);
+        }
+        Inst::CsrImm { rd, .. } => {
+            collector.reg_def(rd);
+        }
+        Inst::Load { rd, from, .. } => {
+            from.get_operands(collector);
+            collector.reg_def(rd);
+        }
+        Inst::Store { to, src, .. } => {
+            to.get_operands(collector);
+            collector.reg_use(src);
+        }
+
+        Inst::Args { args } => {
+            for ArgPair { vreg, preg } in args {
+                collector.reg_fixed_def(vreg, *preg);
+            }
+        }
+        Inst::Rets { rets } => {
+            for RetPair { vreg, preg } in rets {
+                collector.reg_fixed_use(vreg, *preg);
+            }
+        }
+        Inst::Ret { .. } => {}
+
+        Inst::Extend { rd, rn, .. } => {
+            collector.reg_use(rn);
+            collector.reg_def(rd);
+        }
+        Inst::Call { info, .. } => {
+            let CallInfo { uses, defs, .. } = &mut **info;
+            for CallArgPair { vreg, preg } in uses {
+                collector.reg_fixed_use(vreg, *preg);
+            }
+            for CallRetPair { vreg, preg } in defs {
+                collector.reg_fixed_def(vreg, *preg);
+            }
+            collector.reg_clobbers(info.clobbers);
+        }
+        Inst::CallInd { info } => {
+            let CallInfo { dest, uses, defs, .. } = &mut **info;
+            collector.reg_use(dest);
+            for CallArgPair { vreg, preg } in uses {
+                collector.reg_fixed_use(vreg, *preg);
+            }
+            for CallRetPair { vreg, preg } in defs {
+                collector.reg_fixed_def(vreg, *preg);
+            }
+            collector.reg_clobbers(info.clobbers);
+        }
+        Inst::ReturnCall { info } => {
+            for CallArgPair { vreg, preg } in &mut info.uses {
+                collector.reg_fixed_use(vreg, *preg);
+            }
+        }
+        Inst::ReturnCallInd { info } => {
+            // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
+            // This shouldn't be a fixed register constraint.
+            collector.reg_fixed_use(&mut info.dest, x_reg(5));
+
+            for CallArgPair { vreg, preg } in &mut info.uses {
+                collector.reg_fixed_use(vreg, *preg);
+            }
+        }
+        Inst::Jal { .. } => {
+            // JAL technically has a rd register, but we currently always
+            // hardcode it to x0.
+        }
+        Inst::CondBr { kind: IntegerCompare { rs1, rs2, .. }, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+        }
+        Inst::LoadExtName { rd, .. } => {
+            collector.reg_def(rd);
+        }
+        Inst::ElfTlsGetAddr { rd, .. } => {
+            // x10 is a0 which is both the first argument and the first return value.
+            collector.reg_fixed_def(rd, a0());
+            let mut clobbers = Riscv64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV);
+            clobbers.remove(px_reg(10));
+            collector.reg_clobbers(clobbers);
+        }
+        Inst::LoadAddr { rd, mem } => {
+            mem.get_operands(collector);
+            collector.reg_early_def(rd);
+        }
+
+        Inst::Mov { rd, rm, .. } => {
+            collector.reg_use(rm);
+            collector.reg_def(rd);
+        }
+        Inst::MovFromPReg { rd, rm } => {
+            debug_assert!([px_reg(2), px_reg(8)].contains(rm));
+            collector.reg_def(rd);
+        }
+        Inst::Fence { .. } => {}
+        Inst::EBreak => {}
+        Inst::Udf { .. } => {}
+        Inst::FpuRR { rd, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_def(rd);
+        }
+        Inst::FpuRRRR { rd, rs1, rs2, rs3, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+            collector.reg_use(rs3);
+            collector.reg_def(rd);
+        }
+
+        Inst::Jalr { rd, base, .. } => {
+            collector.reg_use(base);
+            collector.reg_def(rd);
+        }
+        Inst::Atomic { rd, addr, src, .. } => {
+            collector.reg_use(addr);
+            collector.reg_use(src);
+            collector.reg_def(rd);
+        }
+        Inst::Select { dst, condition: IntegerCompare { rs1, rs2, .. }, x, y, .. } => {
+            // Mark the condition registers as late use so that they don't overlap with the destination
+            // register. We may potentially write to the destination register before evaluating the
+            // condition.
+            collector.reg_late_use(rs1);
+            collector.reg_late_use(rs2);
+
+            for reg in x.regs_mut() {
+                collector.reg_use(reg);
+            }
+            for reg in y.regs_mut() {
+                collector.reg_use(reg);
+            }
+
+            // If there's more than one destination register then use
+            // `reg_early_def` to prevent destination registers from overlapping
+            // with any operands. This ensures that the lowering doesn't have to
+            // deal with a situation such as when the input registers need to be
+            // swapped when moved to the destination.
+            //
+            // When there's only one destination register though don't use an
+            // early def because once the register is written no other inputs
+            // are read so it's ok for the destination to overlap the sources.
+            // The condition registers are already marked as late use so they
+            // won't overlap with the destination.
+            match dst.regs_mut() {
+                [reg] => collector.reg_def(reg),
+                regs => {
+                    for d in regs {
+                        collector.reg_early_def(d);
+                    }
+                }
+            }
+        }
+        Inst::AtomicCas { offset, t0, dst, e, addr, v, .. } => {
+            collector.reg_use(offset);
+            collector.reg_use(e);
+            collector.reg_use(addr);
+            collector.reg_use(v);
+            collector.reg_early_def(t0);
+            collector.reg_early_def(dst);
+        }
+
+        Inst::RawData { .. } => {}
+        Inst::AtomicStore { src, p, .. } => {
+            collector.reg_use(src);
+            collector.reg_use(p);
+        }
+        Inst::AtomicLoad { rd, p, .. } => {
+            collector.reg_use(p);
+            collector.reg_def(rd);
+        }
+        Inst::AtomicRmwLoop { offset, dst, p, x, t0, .. } => {
+            collector.reg_use(offset);
+            collector.reg_use(p);
+            collector.reg_use(x);
+            collector.reg_early_def(t0);
+            collector.reg_early_def(dst);
+        }
+        Inst::TrapIf { rs1, rs2, .. } => {
+            collector.reg_use(rs1);
+            collector.reg_use(rs2);
+        }
+        Inst::Unwind { .. } => {}
+        Inst::DummyUse { reg } => {
+            collector.reg_use(reg);
+        }
+        Inst::Popcnt { sum, step, rs, tmp, .. } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(step);
+            collector.reg_early_def(sum);
+        }
+        Inst::Cltz { sum, step, tmp, rs, .. } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(step);
+            collector.reg_early_def(sum);
+        }
+        Inst::Brev8 { rs, rd, step, tmp, tmp2, .. } => {
+            collector.reg_use(rs);
+            collector.reg_early_def(step);
+            collector.reg_early_def(tmp);
+            collector.reg_early_def(tmp2);
+            collector.reg_early_def(rd);
+        }
+        Inst::StackProbeLoop { .. } => {
+            // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue.
+            // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg)
+            // gen_prologue is called at emit stage.
+            // no need let reg alloc know.
+        }
+        Inst::VecAluRRRR { op, vd, vd_src, vs1, vs2, mask, .. } => {
+            debug_assert_eq!(vd_src.class(), RegClass::Vector);
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+            debug_assert_eq!(vs1.class(), op.vs1_regclass());
+
+            collector.reg_late_use(vs1);
+            collector.reg_late_use(vs2);
+            collector.reg_use(vd_src);
+            collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`.
+            vec_mask_late_operands(mask, collector);
+        }
+        Inst::VecAluRRRImm5 { op, vd, vd_src, vs2, mask, .. } => {
+            debug_assert_eq!(vd_src.class(), RegClass::Vector);
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+
+            // If the operation forbids source/destination overlap we need to
+            // ensure that the source and destination registers are different.
+            if op.forbids_overlaps(mask) {
+                collector.reg_late_use(vs2);
+                collector.reg_use(vd_src);
+                collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
+                vec_mask_late_operands(mask, collector);
+            } else {
+                collector.reg_use(vs2);
+                collector.reg_use(vd_src);
+                collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
+                vec_mask_operands(mask, collector);
+            }
+        }
+        Inst::VecAluRRR { op, vd, vs1, vs2, mask, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+            debug_assert_eq!(vs1.class(), op.vs1_regclass());
+
+            collector.reg_use(vs1);
+            collector.reg_use(vs2);
+
+            // If the operation forbids source/destination overlap, then we must
+            // register it as an early_def. This encodes the constraint that
+            // these must not overlap.
+            if op.forbids_overlaps(mask) {
+                collector.reg_early_def(vd);
+            } else {
+                collector.reg_def(vd);
+            }
+
+            vec_mask_operands(mask, collector);
+        }
+        Inst::VecAluRRImm5 { op, vd, vs2, mask, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert_eq!(vs2.class(), RegClass::Vector);
+
+            collector.reg_use(vs2);
+
+            // If the operation forbids source/destination overlap, then we must
+            // register it as an early_def. This encodes the constraint that
+            // these must not overlap.
+            if op.forbids_overlaps(mask) {
+                collector.reg_early_def(vd);
+            } else {
+                collector.reg_def(vd);
+            }
+
+            vec_mask_operands(mask, collector);
+        }
+        Inst::VecAluRR { op, vd, vs, mask, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());
+            debug_assert_eq!(vs.class(), op.src_regclass());
+
+            collector.reg_use(vs);
+
+            // If the operation forbids source/destination overlap, then we must
+            // register it as an early_def. This encodes the constraint that
+            // these must not overlap.
+            if op.forbids_overlaps(mask) {
+                collector.reg_early_def(vd);
+            } else {
+                collector.reg_def(vd);
+            }
+
+            vec_mask_operands(mask, collector);
+        }
+        Inst::VecAluRImm5 { op, vd, mask, .. } => {
+            debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
+            debug_assert!(!op.forbids_overlaps(mask));
+
+            collector.reg_def(vd);
+            vec_mask_operands(mask, collector);
+        }
+        Inst::VecSetState { rd, .. } => {
+            collector.reg_def(rd);
+        }
+        Inst::VecLoad { to, from, mask, .. } => {
+            from.get_operands(collector);
+            collector.reg_def(to);
+            vec_mask_operands(mask, collector);
+        }
+        Inst::VecStore { to, from, mask, .. } => {
+            to.get_operands(collector);
+            collector.reg_use(from);
+            vec_mask_operands(mask, collector);
+        }
+    }
+}
+
+impl MachInst for Inst {
+    type ABIMachineSpec = Riscv64MachineDeps;
+    type LabelUse = LabelUse;
+
+    // https://github.com/riscv/riscv-isa-manual/issues/850
+    // all zero will cause invalid opcode.
+    const TRAP_OPCODE: &'static [u8] = &[0; 4];
+
+    fn gen_dummy_use(reg: Reg) -> Self {
+        Inst::DummyUse { reg }
+    }
+
+    fn canonical_type_for_rc(rc: RegClass) -> Type {
+        match rc {
+            regalloc2::RegClass::Int => I64,
+            regalloc2::RegClass::Float => F64,
+            regalloc2::RegClass::Vector => I8X16,
+        }
+    }
+
+    fn is_safepoint(&self) -> bool {
+        match self {
+            Inst::Call { .. } | Inst::CallInd { .. } => true,
+            _ => false,
+        }
+    }
+
+    fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
+        riscv64_get_operands(self, collector);
+    }
+
+    fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+        match self {
+            Inst::Mov { rd, rm, .. } => Some((*rd, *rm)),
+            _ => None,
+        }
+    }
+
+    fn is_included_in_clobbers(&self) -> bool {
+        match self {
+            &Inst::Args { .. } => false,
+            _ => true,
+        }
+    }
+
+    fn is_trap(&self) -> bool {
+        match self {
+            Self::Udf { .. } => true,
+            _ => false,
+        }
+    }
+
+    fn is_args(&self) -> bool {
+        match self {
+            Self::Args { .. } => true,
+            _ => false,
+        }
+    }
+
+    fn is_term(&self) -> MachTerminator {
+        match self {
+            &Inst::Jal { .. } => MachTerminator::Uncond,
+            &Inst::CondBr { .. } => MachTerminator::Cond,
+            &Inst::Jalr { .. } => MachTerminator::Uncond,
+            &Inst::Rets { .. } => MachTerminator::Ret,
+            &Inst::BrTable { .. } => MachTerminator::Indirect,
+            &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
+            _ => MachTerminator::None,
+        }
+    }
+
+    fn is_mem_access(&self) -> bool {
+        panic!("TODO FILL ME OUT")
+    }
+
+    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+        let x = Inst::Mov { rd: to_reg, rm: from_reg, ty };
+        x
+    }
+
+    fn gen_nop(preferred_size: usize) -> Inst {
+        if preferred_size == 0 {
+            return Inst::Nop0;
+        }
+        // We can't give a NOP (or any insn) < 4 bytes.
+        assert!(preferred_size >= 4);
+        Inst::Nop4
+    }
+
+    fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
+        match ty {
+            I8 => Ok((&[RegClass::Int], &[I8])),
+            I16 => Ok((&[RegClass::Int], &[I16])),
+            I32 => Ok((&[RegClass::Int], &[I32])),
+            I64 => Ok((&[RegClass::Int], &[I64])),
+            F16 => Ok((&[RegClass::Float], &[F16])),
+            F32 => Ok((&[RegClass::Float], &[F32])),
+            F64 => Ok((&[RegClass::Float], &[F64])),
+            I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
+            _ if ty.is_vector() => {
+                debug_assert!(ty.bits() <= 512);
+
+                // Here we only need to return a SIMD type with the same size as `ty`.
+                // We use these types for spills and reloads, so prefer types with lanes <= 31
+                // since that fits in the immediate field of `vsetivli`.
+                const SIMD_TYPES: [[Type; 1]; 6] = [
+                    [types::I8X2],
+                    [types::I8X4],
+                    [types::I8X8],
+                    [types::I8X16],
+                    [types::I16X16],
+                    [types::I32X16],
+                ];
+                let idx = (ty.bytes().ilog2() - 1) as usize;
+                let ty = &SIMD_TYPES[idx][..];
+
+                Ok((&[RegClass::Vector], ty))
+            }
+            _ => Err(CodegenError::Unsupported(format!("Unexpected SSA-value type: {ty}"))),
+        }
+    }
+
+    fn gen_jump(target: MachLabel) -> Inst {
+        Inst::Jal { label: target }
+    }
+
+    fn worst_case_size() -> CodeOffset {
+        // Our worst case size is determined by the riscv64_worst_case_instruction_size test
+        84
+    }
+
+    fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
+        RegClass::Int
+    }
+
+    fn function_alignment() -> FunctionAlignment {
+        FunctionAlignment { minimum: 2, preferred: 4 }
+    }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+pub fn reg_name(reg: Reg) -> String {
+    match reg.to_real_reg() {
+        Some(real) => match real.class() {
+            RegClass::Int => match real.hw_enc() {
+                0 => "zero".into(),
+                1 => "ra".into(),
+                2 => "sp".into(),
+                3 => "gp".into(),
+                4 => "tp".into(),
+                5..=7 => format!("t{}", real.hw_enc() - 5),
+                8 => "fp".into(),
+                9 => "s1".into(),
+                10..=17 => format!("a{}", real.hw_enc() - 10),
+                18..=27 => format!("s{}", real.hw_enc() - 16),
+                28..=31 => format!("t{}", real.hw_enc() - 25),
+                _ => unreachable!(),
+            },
+            RegClass::Float => match real.hw_enc() {
+                0..=7 => format!("ft{}", real.hw_enc() - 0),
+                8..=9 => format!("fs{}", real.hw_enc() - 8),
+                10..=17 => format!("fa{}", real.hw_enc() - 10),
+                18..=27 => format!("fs{}", real.hw_enc() - 16),
+                28..=31 => format!("ft{}", real.hw_enc() - 20),
+                _ => unreachable!(),
+            },
+            RegClass::Vector => format!("v{}", real.hw_enc()),
+        },
+        None => {
+            format!("{reg:?}")
+        }
+    }
+}
+
+impl Inst {
+    fn print_with_state(&self, _state: &mut EmitState) -> String {
+        let format_reg = |reg: Reg| -> String { reg_name(reg) };
+
+        let format_vec_amode = |amode: &VecAMode| -> String {
+            match amode {
+                VecAMode::UnitStride { base } => base.to_string(),
+            }
+        };
+
+        let format_mask = |mask: &VecOpMasking| -> String {
+            match mask {
+                VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg)),
+                VecOpMasking::Disabled => format!(""),
+            }
+        };
+
+        let format_regs = |regs: &[Reg]| -> String {
+            let mut x = if regs.len() > 1 { String::from("[") } else { String::default() };
+            regs.iter().for_each(|i| {
+                x.push_str(format_reg(*i).as_str());
+                if *i != *regs.last().unwrap() {
+                    x.push_str(",");
+                }
+            });
+            if regs.len() > 1 {
+                x.push_str("]");
+            }
+            x
+        };
+        let format_labels = |labels: &[MachLabel]| -> String {
+            if labels.len() == 0 {
+                return String::from("[_]");
+            }
+            let mut x = String::from("[");
+            labels.iter().for_each(|l| {
+                x.push_str(
+                    format!("{:?}{}", l, if l != labels.last().unwrap() { "," } else { "" },)
+                        .as_str(),
+                );
+            });
+            x.push_str("]");
+            x
+        };
+
+        fn format_frm(rounding_mode: FRM) -> String {
+            format!(",{}", rounding_mode.to_static_str())
+        }
+
+        match self {
+            &Inst::Nop0 => {
+                format!("##zero length nop")
+            }
+            &Inst::Nop4 => {
+                format!("##fixed 4-size nop")
+            }
+            &Inst::StackProbeLoop { guard_size, probe_count, tmp } => {
+                let tmp = format_reg(tmp.to_reg());
+                format!(
+                    "inline_stack_probe##guard_size={guard_size} probe_count={probe_count} tmp={tmp}"
+                )
+            }
+            &Inst::AtomicStore { src, ty, p } => {
+                let src = format_reg(src);
+                let p = format_reg(p);
+                format!("atomic_store.{ty} {src},({p})")
+            }
+            &Inst::DummyUse { reg } => {
+                let reg = format_reg(reg);
+                format!("dummy_use {reg}")
+            }
+
+            &Inst::AtomicLoad { rd, ty, p } => {
+                let p = format_reg(p);
+                let rd = format_reg(rd.to_reg());
+                format!("atomic_load.{ty} {rd},({p})")
+            }
+            &Inst::AtomicRmwLoop { offset, op, dst, ty, p, x, t0 } => {
+                let offset = format_reg(offset);
+                let p = format_reg(p);
+                let x = format_reg(x);
+                let t0 = format_reg(t0.to_reg());
+                let dst = format_reg(dst.to_reg());
+                format!("atomic_rmw.{ty} {op} {dst},{x},({p})##t0={t0} offset={offset}")
+            }
+
+            &Inst::RawData { ref data } => match data.len() {
+                4 => {
+                    let mut bytes = [0; 4];
+                    for i in 0..bytes.len() {
+                        bytes[i] = data[i];
+                    }
+                    format!(".4byte 0x{:x}", u32::from_le_bytes(bytes))
+                }
+                8 => {
+                    let mut bytes = [0; 8];
+                    for i in 0..bytes.len() {
+                        bytes[i] = data[i];
+                    }
+                    format!(".8byte 0x{:x}", u64::from_le_bytes(bytes))
+                }
+                _ => {
+                    format!(".data {data:?}")
+                }
+            },
+            &Inst::Unwind { ref inst } => {
+                format!("unwind {inst:?}")
+            }
+            &Inst::Brev8 { rs, ty, step, tmp, tmp2, rd } => {
+                let rs = format_reg(rs);
+                let step = format_reg(step.to_reg());
+                let tmp = format_reg(tmp.to_reg());
+                let tmp2 = format_reg(tmp2.to_reg());
+                let rd = format_reg(rd.to_reg());
+                format!("brev8 {rd},{rs}##tmp={tmp} tmp2={tmp2} step={step} ty={ty}")
+            }
+            &Inst::Popcnt { sum, step, rs, tmp, ty } => {
+                let rs = format_reg(rs);
+                let tmp = format_reg(tmp.to_reg());
+                let step = format_reg(step.to_reg());
+                let sum = format_reg(sum.to_reg());
+                format!("popcnt {sum},{rs}##ty={ty} tmp={tmp} step={step}")
+            }
+            &Inst::Cltz { sum, step, rs, tmp, ty, leading } => {
+                let rs = format_reg(rs);
+                let tmp = format_reg(tmp.to_reg());
+                let step = format_reg(step.to_reg());
+                let sum = format_reg(sum.to_reg());
+                format!(
+                    "{} {},{}##ty={} tmp={} step={}",
+                    if leading { "clz" } else { "ctz" },
+                    sum,
+                    rs,
+                    ty,
+                    tmp,
+                    step
+                )
+            }
+            &Inst::AtomicCas { offset, t0, dst, e, addr, v, ty } => {
+                let offset = format_reg(offset);
+                let e = format_reg(e);
+                let addr = format_reg(addr);
+                let v = format_reg(v);
+                let t0 = format_reg(t0.to_reg());
+                let dst = format_reg(dst.to_reg());
+                format!("atomic_cas.{ty} {dst},{e},{v},({addr})##t0={t0} offset={offset}",)
+            }
+            &Inst::BrTable { index, tmp1, tmp2, ref targets } => {
+                format!(
+                    "{} {},{}##tmp1={},tmp2={}",
+                    "br_table",
+                    format_reg(index),
+                    format_labels(&targets[..]),
+                    format_reg(tmp1.to_reg()),
+                    format_reg(tmp2.to_reg()),
+                )
+            }
+            &Inst::Auipc { rd, imm } => {
+                format!("{} {},{}", "auipc", format_reg(rd.to_reg()), imm.as_i32(),)
+            }
+            &Inst::Jalr { rd, base, offset } => {
+                let base = format_reg(base);
+                let rd = format_reg(rd.to_reg());
+                format!("{} {},{}({})", "jalr", rd, offset.as_i16(), base)
+            }
+            &Inst::Lui { rd, ref imm } => {
+                format!("{} {},{}", "lui", format_reg(rd.to_reg()), imm.as_i32())
+            }
+            &Inst::Fli { rd, ty, imm } => {
+                let rd_s = format_reg(rd.to_reg());
+                let imm_s = imm.format();
+                let suffix = match ty {
+                    F32 => "s",
+                    F64 => "d",
+                    _ => unreachable!(),
+                };
+
+                format!("fli.{suffix} {rd_s},{imm_s}")
+            }
+            &Inst::LoadInlineConst { rd, imm, .. } => {
+                let rd = format_reg(rd.to_reg());
+                let mut buf = String::new();
+                write!(&mut buf, "auipc {rd},0; ").unwrap();
+                write!(&mut buf, "ld {rd},12({rd}); ").unwrap();
+                write!(&mut buf, "j {}; ", Inst::UNCOMPRESSED_INSTRUCTION_SIZE + 8).unwrap();
+                write!(&mut buf, ".8byte 0x{imm:x}").unwrap();
+                buf
+            }
+            &Inst::AluRRR { alu_op, rd, rs1, rs2 } => {
+                let rs1_s = format_reg(rs1);
+                let rs2_s = format_reg(rs2);
+                let rd_s = format_reg(rd.to_reg());
+                match alu_op {
+                    AluOPRRR::Adduw if rs2 == zero_reg() => {
+                        format!("zext.w {rd_s},{rs1_s}")
+                    }
+                    _ => {
+                        format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)
+                    }
+                }
+            }
+            &Inst::FpuRR { alu_op, width, frm, rd, rs } => {
+                let rs = format_reg(rs);
+                let rd = format_reg(rd.to_reg());
+                let frm = if alu_op.has_frm() { format_frm(frm) } else { String::new() };
+                format!("{} {rd},{rs}{frm}", alu_op.op_name(width))
+            }
+            &Inst::FpuRRR { alu_op, width, rd, rs1, rs2, frm } => {
+                let rs1 = format_reg(rs1);
+                let rs2 = format_reg(rs2);
+                let rd = format_reg(rd.to_reg());
+                let frm = if alu_op.has_frm() { format_frm(frm) } else { String::new() };
+
+                let rs1_is_rs2 = rs1 == rs2;
+                match alu_op {
+                    FpuOPRRR::Fsgnj if rs1_is_rs2 => format!("fmv.{width} {rd},{rs1}"),
+                    FpuOPRRR::Fsgnjn if rs1_is_rs2 => format!("fneg.{width} {rd},{rs1}"),
+                    FpuOPRRR::Fsgnjx if rs1_is_rs2 => format!("fabs.{width} {rd},{rs1}"),
+                    _ => format!("{} {rd},{rs1},{rs2}{frm}", alu_op.op_name(width)),
+                }
+            }
+            &Inst::FpuRRRR { alu_op, rd, rs1, rs2, rs3, frm, width } => {
+                let rs1 = format_reg(rs1);
+                let rs2 = format_reg(rs2);
+                let rs3 = format_reg(rs3);
+                let rd = format_reg(rd.to_reg());
+                let frm = format_frm(frm);
+                let op_name = alu_op.op_name(width);
+                format!("{op_name} {rd},{rs1},{rs2},{rs3}{frm}")
+            }
+            &Inst::AluRRImm12 { alu_op, rd, rs, ref imm12 } => {
+                let rs_s = format_reg(rs);
+                let rd = format_reg(rd.to_reg());
+
+                // Some of these special cases are better known as
+                // their pseudo-instruction version, so prefer printing those.
+                match (alu_op, rs, imm12) {
+                    (AluOPRRI::Addi, rs, _) if rs == zero_reg() => {
+                        return format!("li {},{}", rd, imm12.as_i16());
+                    }
+                    (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => {
+                        return format!("sext.w {rd},{rs_s}");
+                    }
+                    (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => {
+                        return format!("not {rd},{rs_s}");
+                    }
+                    (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => {
+                        return format!("seqz {rd},{rs_s}");
+                    }
+                    (alu_op, _, _) if alu_op.option_funct12().is_some() => {
+                        format!("{} {},{}", alu_op.op_name(), rd, rs_s)
+                    }
+                    (alu_op, _, imm12) => {
+                        format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16())
+                    }
+                }
+            }
+            &Inst::CsrReg { op, rd, rs, csr } => {
+                let rs_s = format_reg(rs);
+                let rd_s = format_reg(rd.to_reg());
+
+                match (op, csr, rd) {
+                    (CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => {
+                        format!("fsrm {rs_s}")
+                    }
+                    _ => {
+                        format!("{op} {rd_s},{csr},{rs_s}")
+                    }
+                }
+            }
+            &Inst::CsrImm { op, rd, csr, imm } => {
+                let rd_s = format_reg(rd.to_reg());
+
+                match (op, csr, rd) {
+                    (CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => {
+                        format!("fsrmi {rd_s},{imm}")
+                    }
+                    _ => {
+                        format!("{op} {rd_s},{csr},{imm}")
+                    }
+                }
+            }
+            &Inst::Load { rd, op, from, flags: _flags } => {
+                let base = from.to_string();
+                let rd = format_reg(rd.to_reg());
+                format!("{} {},{}", op.op_name(), rd, base,)
+            }
+            &Inst::Store { to, src, op, flags: _flags } => {
+                let base = to.to_string();
+                let src = format_reg(src);
+                format!("{} {},{}", op.op_name(), src, base,)
+            }
+            &Inst::Args { ref args } => {
+                let mut s = "args".to_string();
+                for arg in args {
+                    let preg = format_reg(arg.preg);
+                    let def = format_reg(arg.vreg.to_reg());
+                    write!(&mut s, " {def}={preg}").unwrap();
+                }
+                s
+            }
+            &Inst::Rets { ref rets } => {
+                let mut s = "rets".to_string();
+                for ret in rets {
+                    let preg = format_reg(ret.preg);
+                    let vreg = format_reg(ret.vreg);
+                    write!(&mut s, " {vreg}={preg}").unwrap();
+                }
+                s
+            }
+            &Inst::Ret {} => "ret".to_string(),
+
+            &MInst::Extend { rd, rn, signed, from_bits, .. } => {
+                let rn = format_reg(rn);
+                let rd = format_reg(rd.to_reg());
+                return if signed == false && from_bits == 8 {
+                    format!("andi {rd},{rn}")
+                } else {
+                    let op = if signed { "srai" } else { "srli" };
+                    let shift_bits = (64 - from_bits) as i16;
+                    format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}")
+                };
+            }
+            &MInst::Call { ref info } => format!("call {}", info.dest.display(None)),
+            &MInst::CallInd { ref info } => {
+                let rd = format_reg(info.dest);
+                format!("callind {rd}")
+            }
+            &MInst::ReturnCall { ref info } => {
+                let mut s = format!(
+                    "return_call {:?} new_stack_arg_size:{}",
+                    info.dest, info.new_stack_arg_size
+                );
+                for ret in &info.uses {
+                    let preg = format_reg(ret.preg);
+                    let vreg = format_reg(ret.vreg);
+                    write!(&mut s, " {vreg}={preg}").unwrap();
+                }
+                s
+            }
+            &MInst::ReturnCallInd { ref info } => {
+                let callee = format_reg(info.dest);
+                let mut s = format!(
+                    "return_call_ind {callee} new_stack_arg_size:{}",
+                    info.new_stack_arg_size
+                );
+                for ret in &info.uses {
+                    let preg = format_reg(ret.preg);
+                    let vreg = format_reg(ret.vreg);
+                    write!(&mut s, " {vreg}={preg}").unwrap();
+                }
+                s
+            }
+            &MInst::TrapIf { rs1, rs2, cc, trap_code } => {
+                let rs1 = format_reg(rs1);
+                let rs2 = format_reg(rs2);
+                format!("trap_if {trap_code}##({rs1} {cc} {rs2})")
+            }
+            &MInst::Jal { label } => {
+                format!("j {}", label.to_string())
+            }
+            &MInst::CondBr { taken, not_taken, kind, .. } => {
+                let rs1 = format_reg(kind.rs1);
+                let rs2 = format_reg(kind.rs2);
+                if not_taken.is_fallthrouh() && taken.as_label().is_none() {
+                    format!("{} {},{},0", kind.op_name(), rs1, rs2)
+                } else {
+                    let x = format!(
+                        "{} {},{},taken({}),not_taken({})",
+                        kind.op_name(),
+                        rs1,
+                        rs2,
+                        taken,
+                        not_taken
+                    );
+                    x
+                }
+            }
+            &MInst::Atomic { op, rd, addr, src, amo } => {
+                let op_name = op.op_name(amo);
+                let addr = format_reg(addr);
+                let src = format_reg(src);
+                let rd = format_reg(rd.to_reg());
+                if op.is_load() {
+                    format!("{op_name} {rd},({addr})")
+                } else {
+                    format!("{op_name} {rd},{src},({addr})")
+                }
+            }
+            &MInst::LoadExtName { rd, ref name, offset } => {
+                let rd = format_reg(rd.to_reg());
+                format!("load_sym {},{}{:+}", rd, name.display(None), offset)
+            }
+            &Inst::ElfTlsGetAddr { rd, ref name } => {
+                let rd = format_reg(rd.to_reg());
+                format!("elf_tls_get_addr {rd},{}", name.display(None))
+            }
+            &MInst::LoadAddr { ref rd, ref mem } => {
+                let rs = mem.to_string();
+                let rd = format_reg(rd.to_reg());
+                format!("load_addr {rd},{rs}")
+            }
+            &MInst::Mov { rd, rm, ty } => {
+                let rm = format_reg(rm);
+                let rd = format_reg(rd.to_reg());
+
+                let op = match ty {
+                    F16 => "fmv.h",
+                    F32 => "fmv.s",
+                    F64 => "fmv.d",
+                    ty if ty.is_vector() => "vmv1r.v",
+                    _ => "mv",
+                };
+
+                format!("{op} {rd},{rm}")
+            }
+            &MInst::MovFromPReg { rd, rm } => {
+                let rd = format_reg(rd.to_reg());
+                debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
+                let rm = reg_name(Reg::from(rm));
+                format!("mv {rd},{rm}")
+            }
+            &MInst::Fence { pred, succ } => {
+                format!(
+                    "fence {},{}",
+                    Inst::fence_req_to_string(pred),
+                    Inst::fence_req_to_string(succ),
+                )
+            }
+            &MInst::Select { ref dst, condition, ref x, ref y } => {
+                let c_rs1 = format_reg(condition.rs1);
+                let c_rs2 = format_reg(condition.rs2);
+                let x = format_regs(x.regs());
+                let y = format_regs(y.regs());
+                let dst = dst.map(|r| r.to_reg());
+                let dst = format_regs(dst.regs());
+                format!(
+                    "select {},{},{}##condition=({} {} {})",
+                    dst,
+                    x,
+                    y,
+                    c_rs1,
+                    condition.kind.to_static_str(),
+                    c_rs2
+                )
+            }
+            &MInst::Udf { trap_code } => format!("udf##trap_code={trap_code}"),
+            &MInst::EBreak {} => String::from("ebreak"),
+            &Inst::VecAluRRRR { op, vd, vd_src, vs1, vs2, ref mask, ref vstate } => {
+                let vs1_s = format_reg(vs1);
+                let vs2_s = format_reg(vs2);
+                let vd_src_s = format_reg(vd_src);
+                let vd_s = format_reg(vd.to_reg());
+                let mask = format_mask(mask);
+
+                let vd_fmt = if vd_s != vd_src_s { format!("{vd_s},{vd_src_s}") } else { vd_s };
+
+                // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
+                // This is noted in Section 10.1 of the RISC-V Vector spec.
+                format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}")
+            }
+            &Inst::VecAluRRRImm5 { op, vd, imm, vs2, ref mask, ref vstate, .. } => {
+                let vs2_s = format_reg(vs2);
+                let vd_s = format_reg(vd.to_reg());
+                let mask = format_mask(mask);
+
+                // Some opcodes interpret the immediate as unsigned, lets show the
+                // correct number here.
+                let imm_s =
+                    if op.imm_is_unsigned() { format!("{}", imm.bits()) } else { format!("{imm}") };
+
+                format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")
+            }
+            &Inst::VecAluRRR { op, vd, vs1, vs2, ref mask, ref vstate } => {
+                let vs1_s = format_reg(vs1);
+                let vs2_s = format_reg(vs2);
+                let vd_s = format_reg(vd.to_reg());
+                let mask = format_mask(mask);
+
+                // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
+                // This is noted in Section 10.1 of the RISC-V Vector spec.
+                match (op, vs2, vs1) {
+                    (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
+                        format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {
+                        format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {
+                        format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),
+                }
+            }
+            &Inst::VecAluRRImm5 { op, vd, imm, vs2, ref mask, ref vstate } => {
+                let vs2_s = format_reg(vs2);
+                let vd_s = format_reg(vd.to_reg());
+                let mask = format_mask(mask);
+
+                // Some opcodes interpret the immediate as unsigned, lets show the
+                // correct number here.
+                let imm_s =
+                    if op.imm_is_unsigned() { format!("{}", imm.bits()) } else { format!("{imm}") };
+
+                match (op, imm) {
+                    (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => {
+                        format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}")
+                    }
+                    _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"),
+                }
+            }
+            &Inst::VecAluRR { op, vd, vs, ref mask, ref vstate } => {
+                let vs_s = format_reg(vs);
+                let vd_s = format_reg(vd.to_reg());
+                let mask = format_mask(mask);
+
+                format!("{op} {vd_s},{vs_s}{mask} {vstate}")
+            }
+            &Inst::VecAluRImm5 { op, vd, imm, ref mask, ref vstate } => {
+                let vd_s = format_reg(vd.to_reg());
+                let mask = format_mask(mask);
+
+                format!("{op} {vd_s},{imm}{mask} {vstate}")
+            }
+            &Inst::VecSetState { rd, ref vstate } => {
+                let rd_s = format_reg(rd.to_reg());
+                assert!(vstate.avl.is_static());
+                format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)
+            }
+            Inst::VecLoad { eew, to, from, ref mask, ref vstate, .. } => {
+                let base = format_vec_amode(from);
+                let vd = format_reg(to.to_reg());
+                let mask = format_mask(mask);
+
+                format!("vl{eew}.v {vd},{base}{mask} {vstate}")
+            }
+            Inst::VecStore { eew, to, from, ref mask, ref vstate, .. } => {
+                let dst = format_vec_amode(to);
+                let vs3 = format_reg(*from);
+                let mask = format_mask(mask);
+
+                format!("vs{eew}.v {vs3},{dst}{mask} {vstate}")
+            }
+        }
+    }
+}
+
+/// Different forms of label references for different instruction formats.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+    /// 20-bit branch offset (unconditional branches). PC-rel, offset is
+    /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
+    Jal20,
+
+    /// The unconditional jump instructions all use PC-relative
+    /// addressing to help support position independent code. The JALR
+    /// instruction was defined to enable a two-instruction sequence to
+    /// jump anywhere in a 32-bit absolute address range. A LUI
+    /// instruction can first load rs1 with the upper 20 bits of a
+    /// target address, then JALR can add in the lower bits. Similarly,
+    /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
+    /// address range.
+    PCRel32,
+
+    /// All branch instructions use the B-type instruction format. The
+    /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
+    /// is added to the current pc to give the target address. The
+    /// conditional branch range is ±4 KiB.
+    B12,
+
+    /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
+    /// the immediate field of an `auipc` instruction.
+    PCRelHi20,
+
+    /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
+    /// the final address, instead of the `PCREL_HI20` label. Allows setting
+    /// the immediate field of I Type instructions such as `addi` or `lw`.
+    ///
+    /// Since we currently don't support offsets in labels, this relocation has
+    /// an implicit offset of 4.
+    PCRelLo12I,
+
+    /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation
+    RVCJump,
+}
+
+impl MachInstLabelUse for LabelUse {
+    /// Alignment for veneer code. Every Riscv64 instruction must be
+    /// 4-byte-aligned.
+    const ALIGN: CodeOffset = 4;
+
+    /// Maximum PC-relative range (positive), inclusive.
+    fn max_pos_range(self) -> CodeOffset {
+        match self {
+            LabelUse::Jal20 => ((1 << 19) - 1) * 2,
+            LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => {
+                Inst::imm_max() as CodeOffset
+            }
+            LabelUse::B12 => ((1 << 11) - 1) * 2,
+            LabelUse::RVCJump => ((1 << 10) - 1) * 2,
+        }
+    }
+
+    /// Maximum PC-relative range (negative).
+    fn max_neg_range(self) -> CodeOffset {
+        match self {
+            LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset,
+            _ => self.max_pos_range() + 2,
+        }
+    }
+
+    /// Size of window into code needed to do the patch.
+    fn patch_size(self) -> CodeOffset {
+        match self {
+            LabelUse::RVCJump => 2,
+            LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4,
+            LabelUse::PCRel32 => 8,
+        }
+    }
+
+    /// Perform the patch.
+    fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+        assert!(use_offset % 2 == 0);
+        assert!(label_offset % 2 == 0);
+        let offset = (label_offset as i64) - (use_offset as i64);
+
+        // re-check range
+        assert!(
+            offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64),
+            "{self:?} offset '{offset}' use_offset:'{use_offset}' label_offset:'{label_offset}'  must not exceed max range.",
+        );
+        self.patch_raw_offset(buffer, offset);
+    }
+
+    /// Is a veneer supported for this label reference type?
+    fn supports_veneer(self) -> bool {
+        match self {
+            Self::Jal20 | Self::B12 | Self::RVCJump => true,
+            _ => false,
+        }
+    }
+
+    /// How large is the veneer, if supported?
+    fn veneer_size(self) -> CodeOffset {
+        match self {
+            Self::B12 | Self::Jal20 | Self::RVCJump => 8,
+            _ => unreachable!(),
+        }
+    }
+
+    fn worst_case_veneer_size() -> CodeOffset {
+        8
+    }
+
+    /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
+    /// an offset and label-use for the veneer's use of the original label.
+    fn generate_veneer(
+        self,
+        buffer: &mut [u8],
+        veneer_offset: CodeOffset,
+    ) -> (CodeOffset, LabelUse) {
+        let base = writable_spilltmp_reg();
+        {
+            let x = enc_auipc(base, Imm20::ZERO).to_le_bytes();
+            buffer[0] = x[0];
+            buffer[1] = x[1];
+            buffer[2] = x[2];
+            buffer[3] = x[3];
+        }
+        {
+            let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::ZERO).to_le_bytes();
+            buffer[4] = x[0];
+            buffer[5] = x[1];
+            buffer[6] = x[2];
+            buffer[7] = x[3];
+        }
+        (veneer_offset, Self::PCRel32)
+    }
+
+    fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
+        match (reloc, addend) {
+            (Reloc::RiscvCallPlt, _) => Some(Self::PCRel32),
+            _ => None,
+        }
+    }
+}
+
+impl LabelUse {
+    #[allow(dead_code)] // in case it's needed in the future
+    fn offset_in_range(self, offset: i64) -> bool {
+        let min = -(self.max_neg_range() as i64);
+        let max = self.max_pos_range() as i64;
+        offset >= min && offset <= max
+    }
+
+    fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) {
+        let insn = match self {
+            LabelUse::RVCJump => u16::from_le_bytes(buffer[..2].try_into().unwrap()) as u32,
+            _ => u32::from_le_bytes(buffer[..4].try_into().unwrap()),
+        };
+
+        match self {
+            LabelUse::Jal20 => {
+                let offset = offset as u32;
+                let v = ((offset >> 12 & 0b1111_1111) << 12)
+                    | ((offset >> 11 & 0b1) << 20)
+                    | ((offset >> 1 & 0b11_1111_1111) << 21)
+                    | ((offset >> 20 & 0b1) << 31);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
+            }
+            LabelUse::PCRel32 => {
+                let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
+                Inst::generate_imm(offset as u64)
+                    .map(|(imm20, imm12)| {
+                        // Encode the OR-ed-in value with zero_reg(). The
+                        // register parameter must be in the original
+                        // encoded instruction and or'ing in zeroes does not
+                        // change it.
+                        buffer[0..4].clone_from_slice(&u32::to_le_bytes(
+                            insn | enc_auipc(writable_zero_reg(), imm20),
+                        ));
+                        buffer[4..8].clone_from_slice(&u32::to_le_bytes(
+                            insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12),
+                        ));
+                    })
+                    // expect make sure we handled.
+                    .expect("we have check the range before,this is a compiler error.");
+            }
+
+            LabelUse::B12 => {
+                let offset = offset as u32;
+                let v = ((offset >> 11 & 0b1) << 7)
+                    | ((offset >> 1 & 0b1111) << 8)
+                    | ((offset >> 5 & 0b11_1111) << 25)
+                    | ((offset >> 12 & 0b1) << 31);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
+            }
+
+            LabelUse::PCRelHi20 => {
+                // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
+                //
+                // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
+                // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
+                // offset of 2048, we need to land at the next page and subtract instead.
+                let offset = offset as u32;
+                let hi20 = offset.wrapping_add(0x800) >> 12;
+                let insn = (insn & 0xFFF) | (hi20 << 12);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
+            }
+
+            LabelUse::PCRelLo12I => {
+                // `offset` is the offset from the current instruction to the target address.
+                //
+                // However we are trying to compute the offset to the target address from the previous instruction.
+                // The previous instruction should be the one that contains the PCRelHi20 relocation and
+                // stores/references the program counter (`auipc` usually).
+                //
+                // Since we are trying to compute the offset from the previous instruction, we can
+                // represent it as offset = target_address - (current_instruction_address - 4)
+                // which is equivalent to offset = target_address - current_instruction_address + 4.
+                //
+                // Thus we need to add 4 to the offset here.
+                let lo12 = (offset + 4) as u32 & 0xFFF;
+                let insn = (insn & 0xFFFFF) | (lo12 << 20);
+                buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
+            }
+            LabelUse::RVCJump => {
+                debug_assert!(offset & 1 == 0);
+
+                // We currently only support this for the C.J operation, so assert that is the opcode in
+                // the buffer.
+                debug_assert_eq!(insn & 0xFFFF, 0xA001);
+
+                buffer[0..2].clone_from_slice(&u16::to_le_bytes(encode_cj_type(
+                    CjOp::CJ,
+                    Imm12::from_i16(i16::try_from(offset).unwrap()),
+                )));
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn label_use_max_range() {
+        assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2);
+        assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2);
+        assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset));
+        assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset));
+        assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2);
+    }
+}
diff --git a/hbcb/src/inst/regs.rs b/hbcb/src/inst/regs.rs
new file mode 100644
index 00000000..ffdc484a
--- /dev/null
+++ b/hbcb/src/inst/regs.rs
@@ -0,0 +1,168 @@
+//! Riscv64 ISA definitions: registers.
+//!
+
+use crate::machinst::{Reg, Writable};
+
+use alloc::vec;
+use alloc::vec::Vec;
+
+use regalloc2::{PReg, RegClass, VReg};
+
+// first argument of function call
+#[inline]
+pub fn a0() -> Reg {
+    x_reg(10)
+}
+
+// second argument of function call
+#[inline]
+#[allow(dead_code)]
+pub fn a1() -> Reg {
+    x_reg(11)
+}
+
+// third argument of function call
+#[inline]
+#[allow(dead_code)]
+pub fn a2() -> Reg {
+    x_reg(12)
+}
+
+#[inline]
+#[allow(dead_code)]
+pub fn writable_a0() -> Writable<Reg> {
+    Writable::from_reg(a0())
+}
+#[inline]
+#[allow(dead_code)]
+pub fn writable_a1() -> Writable<Reg> {
+    Writable::from_reg(a1())
+}
+#[inline]
+#[allow(dead_code)]
+pub fn writable_a2() -> Writable<Reg> {
+    Writable::from_reg(a2())
+}
+
+#[inline]
+#[allow(dead_code)]
+pub fn fa0() -> Reg {
+    f_reg(10)
+}
+#[inline]
+#[allow(dead_code)]
+pub fn writable_fa0() -> Writable<Reg> {
+    Writable::from_reg(fa0())
+}
+#[inline]
+#[allow(dead_code)]
+pub fn writable_fa1() -> Writable<Reg> {
+    Writable::from_reg(fa1())
+}
+#[inline]
+pub fn fa1() -> Reg {
+    f_reg(11)
+}
+
+/// Get a reference to the zero-register.
+#[inline]
+pub fn zero_reg() -> Reg {
+    x_reg(0)
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+#[inline]
+pub fn writable_zero_reg() -> Writable<Reg> {
+    Writable::from_reg(zero_reg())
+}
+#[inline]
+pub fn stack_reg() -> Reg {
+    x_reg(2)
+}
+
+/// Get a writable reference to the stack-pointer register.
+#[inline]
+pub fn writable_stack_reg() -> Writable<Reg> {
+    Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x1).
+pub fn link_reg() -> Reg {
+    x_reg(1)
+}
+
+/// Get a writable reference to the link register.
+#[inline]
+pub fn writable_link_reg() -> Writable<Reg> {
+    Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x8).
+#[inline]
+pub fn fp_reg() -> Reg {
+    x_reg(8)
+}
+
+/// Get a writable reference to the frame pointer.
+#[inline]
+pub fn writable_fp_reg() -> Writable<Reg> {
+    Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the first temporary, sometimes "spill temporary",
+/// register. This register is used in various ways as a temporary.
+#[inline]
+pub fn spilltmp_reg() -> Reg {
+    x_reg(31)
+}
+
+/// Get a writable reference to the spilltmp reg.
+#[inline]
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg())
+}
+
+///spilltmp2
+#[inline]
+pub fn spilltmp_reg2() -> Reg {
+    x_reg(30)
+}
+
+/// Get a writable reference to the spilltmp2 reg.
+#[inline]
+pub fn writable_spilltmp_reg2() -> Writable<Reg> {
+    Writable::from_reg(spilltmp_reg2())
+}
+
+#[inline]
+pub fn x_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Int);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub const fn px_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Int)
+}
+
+#[inline]
+pub fn f_reg(enc: usize) -> Reg {
+    let p_reg = PReg::new(enc, RegClass::Float);
+    let v_reg = VReg::new(p_reg.index(), p_reg.class());
+    Reg::from(v_reg)
+}
+pub const fn pf_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Float)
+}
+
+#[allow(dead_code)]
+pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec<Writable<Reg>> {
+    let mut regs = vec![];
+    for i in start..=end {
+        regs.push(Writable::from_reg(x_reg(i)));
+    }
+    regs
+}
+
+pub const fn pv_reg(enc: usize) -> PReg {
+    PReg::new(enc, RegClass::Vector)
+}
diff --git a/hbcb/src/inst/unwind.rs b/hbcb/src/inst/unwind.rs
new file mode 100644
index 00000000..1e2bb904
--- /dev/null
+++ b/hbcb/src/inst/unwind.rs
@@ -0,0 +1,2 @@
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
diff --git a/hbcb/src/inst/unwind/systemv.rs b/hbcb/src/inst/unwind/systemv.rs
new file mode 100644
index 00000000..6cf24450
--- /dev/null
+++ b/hbcb/src/inst/unwind/systemv.rs
@@ -0,0 +1,170 @@
+//! Unwind information for System V ABI (Riscv64).
+
+use crate::inst::regs;
+use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::machinst::Reg;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
+use regalloc2::RegClass;
+
+/// Creates a new riscv64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+    use gimli::write::CallFrameInstruction;
+
+    let mut entry = CommonInformationEntry::new(
+        Encoding {
+            address_size: 8,
+            format: Format::Dwarf32,
+            version: 1,
+        },
+        2,  // Code alignment factor
+        -8, // Data alignment factor
+        Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16),
+    );
+
+    // Every frame will start with the call frame address (CFA) at SP
+    let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into());
+    entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
+
+    entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+    let reg_offset = match reg.class() {
+        RegClass::Int => 0,
+        RegClass::Float => 32,
+        RegClass::Vector => 64,
+    };
+
+    let reg = reg.to_real_reg().unwrap().hw_enc() as u16;
+    Ok(Register(reg_offset + reg))
+}
+
+pub(crate) struct RegisterMapper;
+
+impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+    fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+        Ok(map_reg(reg)?.0)
+    }
+    fn fp(&self) -> Option<u16> {
+        Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16)
+    }
+    fn lr(&self) -> Option<u16> {
+        Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16)
+    }
+    fn lr_offset(&self) -> Option<u32> {
+        Some(8)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+
+    use crate::ir::{
+        types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind,
+        UserFuncName,
+    };
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use gimli::write::Address;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("riscv64"))
+            .expect("expect riscv64 ISA")
+            .finish(Flags::new(builder()))
+            .expect("Creating compiler backend");
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
+        ));
+
+        let code = context
+            .compile(&*isa, &mut Default::default())
+            .expect("expected compilation");
+
+        let fde = match code
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(1234))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{fde:?}"), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }");
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.sized_stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+
+    #[test]
+    fn test_multi_return_func() {
+        let isa = lookup(triple!("riscv64"))
+            .expect("expect riscv64 ISA")
+            .finish(Flags::new(builder()))
+            .expect("Creating compiler backend");
+
+        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+        let code = context
+            .compile(&*isa, &mut Default::default())
+            .expect("expected compilation");
+
+        let fde = match code
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(4321))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(
+            format!("{fde:?}"),
+            "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }"
+        );
+    }
+
+    fn create_multi_return_function(call_conv: CallConv) -> Function {
+        let mut sig = Signature::new(call_conv);
+        sig.params.push(AbiParam::new(types::I32));
+        let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig);
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, types::I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().brif(v0, block2, &[], block1, &[]);
+
+        pos.insert_block(block1);
+        pos.ins().return_(&[]);
+
+        pos.insert_block(block2);
+        pos.ins().return_(&[]);
+
+        func
+    }
+}
diff --git a/hbcb/src/inst/vector.rs b/hbcb/src/inst/vector.rs
new file mode 100644
index 00000000..356c7471
--- /dev/null
+++ b/hbcb/src/inst/vector.rs
@@ -0,0 +1,1150 @@
+use crate::lower::isle::generated_code::VecAluOpRRRR;
+use crate::lower::isle::generated_code::{
+    VecAMode, VecAluOpRImm5, VecAluOpRR, VecAluOpRRImm5, VecAluOpRRR, VecAluOpRRRImm5, VecAvl,
+    VecElementWidth, VecLmul, VecMaskMode, VecOpCategory, VecOpMasking, VecTailMode,
+};
+use crate::machinst::{OperandVisitor, RegClass};
+use crate::Reg;
+use core::fmt;
+
+use super::{Type, UImm5};
+
+impl VecAvl {
+    pub fn _static(size: u32) -> Self {
+        VecAvl::Static {
+            size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL"),
+        }
+    }
+
+    pub fn is_static(&self) -> bool {
+        match self {
+            VecAvl::Static { .. } => true,
+        }
+    }
+
+    pub fn unwrap_static(&self) -> UImm5 {
+        match self {
+            VecAvl::Static { size } => *size,
+        }
+    }
+}
+
+// TODO: Can we tell ISLE to derive this?
+impl Copy for VecAvl {}
+
+// TODO: Can we tell ISLE to derive this?
+impl PartialEq for VecAvl {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs,
+        }
+    }
+}
+
+impl fmt::Display for VecAvl {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecAvl::Static { size } => write!(f, "{size}"),
+        }
+    }
+}
+
+impl VecElementWidth {
+    pub fn from_type(ty: Type) -> Self {
+        Self::from_bits(ty.lane_bits())
+    }
+
+    pub fn from_bits(bits: u32) -> Self {
+        match bits {
+            8 => VecElementWidth::E8,
+            16 => VecElementWidth::E16,
+            32 => VecElementWidth::E32,
+            64 => VecElementWidth::E64,
+            _ => panic!("Invalid number of bits for VecElementWidth: {bits}"),
+        }
+    }
+
+    pub fn bits(&self) -> u32 {
+        match self {
+            VecElementWidth::E8 => 8,
+            VecElementWidth::E16 => 16,
+            VecElementWidth::E32 => 32,
+            VecElementWidth::E64 => 64,
+        }
+    }
+
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecElementWidth::E8 => 0b000,
+            VecElementWidth::E16 => 0b001,
+            VecElementWidth::E32 => 0b010,
+            VecElementWidth::E64 => 0b011,
+        }
+    }
+}
+
+impl fmt::Display for VecElementWidth {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "e{}", self.bits())
+    }
+}
+
+impl VecLmul {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecLmul::LmulF8 => 0b101,
+            VecLmul::LmulF4 => 0b110,
+            VecLmul::LmulF2 => 0b111,
+            VecLmul::Lmul1 => 0b000,
+            VecLmul::Lmul2 => 0b001,
+            VecLmul::Lmul4 => 0b010,
+            VecLmul::Lmul8 => 0b011,
+        }
+    }
+}
+
+impl fmt::Display for VecLmul {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecLmul::LmulF8 => write!(f, "mf8"),
+            VecLmul::LmulF4 => write!(f, "mf4"),
+            VecLmul::LmulF2 => write!(f, "mf2"),
+            VecLmul::Lmul1 => write!(f, "m1"),
+            VecLmul::Lmul2 => write!(f, "m2"),
+            VecLmul::Lmul4 => write!(f, "m4"),
+            VecLmul::Lmul8 => write!(f, "m8"),
+        }
+    }
+}
+
+impl VecTailMode {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecTailMode::Agnostic => 1,
+            VecTailMode::Undisturbed => 0,
+        }
+    }
+}
+
+impl fmt::Display for VecTailMode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecTailMode::Agnostic => write!(f, "ta"),
+            VecTailMode::Undisturbed => write!(f, "tu"),
+        }
+    }
+}
+
+impl VecMaskMode {
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecMaskMode::Agnostic => 1,
+            VecMaskMode::Undisturbed => 0,
+        }
+    }
+}
+
+impl fmt::Display for VecMaskMode {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VecMaskMode::Agnostic => write!(f, "ma"),
+            VecMaskMode::Undisturbed => write!(f, "mu"),
+        }
+    }
+}
+
+/// Vector Type (VType)
+///
+/// vtype provides the default type used to interpret the contents of the vector register file.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct VType {
+    pub sew: VecElementWidth,
+    pub lmul: VecLmul,
+    pub tail_mode: VecTailMode,
+    pub mask_mode: VecMaskMode,
+}
+
+impl VType {
+    // https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc
+    pub fn encode(&self) -> u32 {
+        let mut bits = 0;
+        bits |= self.lmul.encode();
+        bits |= self.sew.encode() << 3;
+        bits |= self.tail_mode.encode() << 6;
+        bits |= self.mask_mode.encode() << 7;
+        bits
+    }
+}
+
+impl fmt::Display for VType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}, {}, {}, {}",
+            self.sew, self.lmul, self.tail_mode, self.mask_mode
+        )
+    }
+}
+
+/// Vector State (VState)
+///
+/// VState represents the state of the vector unit that each instruction expects before execution.
+/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is
+/// used by our instruction emission code to ensure that the vector unit is in the correct state.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct VState {
+    pub avl: VecAvl,
+    pub vtype: VType,
+}
+
+impl VState {
+    pub fn from_type(ty: Type) -> Self {
+        VState {
+            avl: VecAvl::_static(ty.lane_count()),
+            vtype: VType {
+                sew: VecElementWidth::from_type(ty),
+                lmul: VecLmul::Lmul1,
+                tail_mode: VecTailMode::Agnostic,
+                mask_mode: VecMaskMode::Agnostic,
+            },
+        }
+    }
+}
+
+impl fmt::Display for VState {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype)
+    }
+}
+
+impl VecOpCategory {
+    pub fn encode(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#101-vector-arithmetic-instruction-encoding
+        match self {
+            VecOpCategory::OPIVV => 0b000,
+            VecOpCategory::OPFVV => 0b001,
+            VecOpCategory::OPMVV => 0b010,
+            VecOpCategory::OPIVI => 0b011,
+            VecOpCategory::OPIVX => 0b100,
+            VecOpCategory::OPFVF => 0b101,
+            VecOpCategory::OPMVX => 0b110,
+            VecOpCategory::OPCFG => 0b111,
+        }
+    }
+}
+
+impl Copy for VecOpMasking {}
+impl VecOpMasking {
+    pub fn is_enabled(&self) -> bool {
+        match self {
+            VecOpMasking::Enabled { .. } => true,
+            VecOpMasking::Disabled => false,
+        }
+    }
+
+    pub fn encode(&self) -> u32 {
+        match self {
+            VecOpMasking::Enabled { .. } => 0,
+            VecOpMasking::Disabled => 1,
+        }
+    }
+}
+
+impl VecAluOpRRRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VmaccVX => 0b101101,
+            VecAluOpRRRR::VnmsacVV | VecAluOpRRRR::VnmsacVX => 0b101111,
+            VecAluOpRRRR::VfmaccVV | VecAluOpRRRR::VfmaccVF => 0b101100,
+            VecAluOpRRRR::VfnmaccVV | VecAluOpRRRR::VfnmaccVF => 0b101101,
+            VecAluOpRRRR::VfmsacVV | VecAluOpRRRR::VfmsacVF => 0b101110,
+            VecAluOpRRRR::VfnmsacVV | VecAluOpRRRR::VfnmsacVF => 0b101111,
+            VecAluOpRRRR::Vslide1upVX => 0b001110,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRRR::VmaccVV | VecAluOpRRRR::VnmsacVV => VecOpCategory::OPMVV,
+            VecAluOpRRRR::VmaccVX | VecAluOpRRRR::VnmsacVX | VecAluOpRRRR::Vslide1upVX => {
+                VecOpCategory::OPMVX
+            }
+            VecAluOpRRRR::VfmaccVV
+            | VecAluOpRRRR::VfnmaccVV
+            | VecAluOpRRRR::VfmsacVV
+            | VecAluOpRRRR::VfnmsacVV => VecOpCategory::OPFVV,
+            VecAluOpRRRR::VfmaccVF
+            | VecAluOpRRRR::VfnmaccVF
+            | VecAluOpRRRR::VfmsacVF
+            | VecAluOpRRRR::VfnmsacVF => VecOpCategory::OPFVF,
+        }
+    }
+
+    // vs1 is the only variable source, vs2 is fixed.
+    pub fn vs1_regclass(&self) -> RegClass {
+        match self.category() {
+            VecOpCategory::OPMVV | VecOpCategory::OPFVV => RegClass::Vector,
+            VecOpCategory::OPMVX => RegClass::Int,
+            VecOpCategory::OPFVF => RegClass::Float,
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl VecInstOverlapInfo for VecAluOpRRRR {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRRR::Vslide1upVX => true,
+            _ => false,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - 2);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRRRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRRImm5::VslideupVI => 0b001110,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRRImm5::VslideupVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    pub fn imm_is_unsigned(&self) -> bool {
+        match self {
+            VecAluOpRRRImm5::VslideupVI => true,
+        }
+    }
+}
+
+impl VecInstOverlapInfo for VecAluOpRRRImm5 {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRRImm5::VslideupVI => true,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - 2);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRR::VaddVV
+            | VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfaddVF => 0b000000,
+            VecAluOpRRR::VsubVV
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfsubVF => 0b000010,
+            VecAluOpRRR::VrsubVX => 0b000011,
+            VecAluOpRRR::VmulVV | VecAluOpRRR::VmulVX => 0b100101,
+            VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhVX => 0b100111,
+            VecAluOpRRR::VmulhuVV
+            | VecAluOpRRR::VmulhuVX
+            | VecAluOpRRR::VfmulVV
+            | VecAluOpRRR::VfmulVF => 0b100100,
+            VecAluOpRRR::VsmulVV | VecAluOpRRR::VsmulVX => 0b100111,
+            VecAluOpRRR::VsllVV | VecAluOpRRR::VsllVX => 0b100101,
+            VecAluOpRRR::VsrlVV | VecAluOpRRR::VsrlVX => 0b101000,
+            VecAluOpRRR::VsraVV | VecAluOpRRR::VsraVX => 0b101001,
+            VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001,
+            VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010,
+            VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011,
+            VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX | VecAluOpRRR::VredminuVS => 0b000100,
+            VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101,
+            VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX | VecAluOpRRR::VredmaxuVS => 0b000110,
+            VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111,
+            VecAluOpRRR::VslidedownVX => 0b001111,
+            VecAluOpRRR::VfrsubVF => 0b100111,
+            VecAluOpRRR::VmergeVVM
+            | VecAluOpRRR::VmergeVXM
+            | VecAluOpRRR::VfmergeVFM
+            | VecAluOpRRR::VcompressVM => 0b010111,
+            VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfdivVF
+            | VecAluOpRRR::VsadduVV
+            | VecAluOpRRR::VsadduVX => 0b100000,
+            VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001,
+            VecAluOpRRR::VfminVV => 0b000100,
+            VecAluOpRRR::VfmaxVV => 0b000110,
+            VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010,
+            VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011,
+            VecAluOpRRR::VfsgnjVV | VecAluOpRRR::VfsgnjVF => 0b001000,
+            VecAluOpRRR::VfsgnjnVV => 0b001001,
+            VecAluOpRRR::VfsgnjxVV => 0b001010,
+            VecAluOpRRR::VrgatherVV | VecAluOpRRR::VrgatherVX => 0b001100,
+            VecAluOpRRR::VwadduVV | VecAluOpRRR::VwadduVX => 0b110000,
+            VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddVX => 0b110001,
+            VecAluOpRRR::VwsubuVV | VecAluOpRRR::VwsubuVX => 0b110010,
+            VecAluOpRRR::VwsubVV | VecAluOpRRR::VwsubVX => 0b110011,
+            VecAluOpRRR::VwadduWV | VecAluOpRRR::VwadduWX => 0b110100,
+            VecAluOpRRR::VwaddWV | VecAluOpRRR::VwaddWX => 0b110101,
+            VecAluOpRRR::VwsubuWV | VecAluOpRRR::VwsubuWX => 0b110110,
+            VecAluOpRRR::VwsubWV | VecAluOpRRR::VwsubWX => 0b110111,
+            VecAluOpRRR::VmseqVV
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfeqVF => 0b011000,
+            VecAluOpRRR::VmsneVV
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmfleVV
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmandMM => 0b011001,
+            VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltuVX | VecAluOpRRR::VmorMM => 0b011010,
+            VecAluOpRRR::VmsltVV
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfltVF => 0b011011,
+            VecAluOpRRR::VmsleuVV
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfneVF => 0b011100,
+            VecAluOpRRR::VmsleVV
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmnandMM => 0b011101,
+            VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110,
+            VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRR::VaddVV
+            | VecAluOpRRR::VsaddVV
+            | VecAluOpRRR::VsadduVV
+            | VecAluOpRRR::VsubVV
+            | VecAluOpRRR::VssubVV
+            | VecAluOpRRR::VssubuVV
+            | VecAluOpRRR::VsmulVV
+            | VecAluOpRRR::VsllVV
+            | VecAluOpRRR::VsrlVV
+            | VecAluOpRRR::VsraVV
+            | VecAluOpRRR::VandVV
+            | VecAluOpRRR::VorVV
+            | VecAluOpRRR::VxorVV
+            | VecAluOpRRR::VminuVV
+            | VecAluOpRRR::VminVV
+            | VecAluOpRRR::VmaxuVV
+            | VecAluOpRRR::VmaxVV
+            | VecAluOpRRR::VmergeVVM
+            | VecAluOpRRR::VrgatherVV
+            | VecAluOpRRR::VmseqVV
+            | VecAluOpRRR::VmsneVV
+            | VecAluOpRRR::VmsltuVV
+            | VecAluOpRRR::VmsltVV
+            | VecAluOpRRR::VmsleuVV
+            | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV,
+            VecAluOpRRR::VwaddVV
+            | VecAluOpRRR::VwaddWV
+            | VecAluOpRRR::VwadduVV
+            | VecAluOpRRR::VwadduWV
+            | VecAluOpRRR::VwsubVV
+            | VecAluOpRRR::VwsubWV
+            | VecAluOpRRR::VwsubuVV
+            | VecAluOpRRR::VwsubuWV
+            | VecAluOpRRR::VmulVV
+            | VecAluOpRRR::VmulhVV
+            | VecAluOpRRR::VmulhuVV
+            | VecAluOpRRR::VredmaxuVS
+            | VecAluOpRRR::VredminuVS
+            | VecAluOpRRR::VcompressVM
+            | VecAluOpRRR::VmandMM
+            | VecAluOpRRR::VmorMM
+            | VecAluOpRRR::VmnandMM
+            | VecAluOpRRR::VmnorMM => VecOpCategory::OPMVV,
+            VecAluOpRRR::VwaddVX
+            | VecAluOpRRR::VwadduVX
+            | VecAluOpRRR::VwadduWX
+            | VecAluOpRRR::VwaddWX
+            | VecAluOpRRR::VwsubVX
+            | VecAluOpRRR::VwsubuVX
+            | VecAluOpRRR::VwsubuWX
+            | VecAluOpRRR::VwsubWX
+            | VecAluOpRRR::VmulVX
+            | VecAluOpRRR::VmulhVX
+            | VecAluOpRRR::VmulhuVX => VecOpCategory::OPMVX,
+            VecAluOpRRR::VaddVX
+            | VecAluOpRRR::VsaddVX
+            | VecAluOpRRR::VsadduVX
+            | VecAluOpRRR::VsubVX
+            | VecAluOpRRR::VssubVX
+            | VecAluOpRRR::VssubuVX
+            | VecAluOpRRR::VrsubVX
+            | VecAluOpRRR::VsmulVX
+            | VecAluOpRRR::VsllVX
+            | VecAluOpRRR::VsrlVX
+            | VecAluOpRRR::VsraVX
+            | VecAluOpRRR::VandVX
+            | VecAluOpRRR::VorVX
+            | VecAluOpRRR::VxorVX
+            | VecAluOpRRR::VminuVX
+            | VecAluOpRRR::VminVX
+            | VecAluOpRRR::VmaxuVX
+            | VecAluOpRRR::VmaxVX
+            | VecAluOpRRR::VslidedownVX
+            | VecAluOpRRR::VmergeVXM
+            | VecAluOpRRR::VrgatherVX
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmsltuVX
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmsgtuVX
+            | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX,
+            VecAluOpRRR::VfaddVV
+            | VecAluOpRRR::VfsubVV
+            | VecAluOpRRR::VfmulVV
+            | VecAluOpRRR::VfdivVV
+            | VecAluOpRRR::VfmaxVV
+            | VecAluOpRRR::VfminVV
+            | VecAluOpRRR::VfsgnjVV
+            | VecAluOpRRR::VfsgnjnVV
+            | VecAluOpRRR::VfsgnjxVV
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfleVV => VecOpCategory::OPFVV,
+            VecAluOpRRR::VfaddVF
+            | VecAluOpRRR::VfsubVF
+            | VecAluOpRRR::VfrsubVF
+            | VecAluOpRRR::VfmulVF
+            | VecAluOpRRR::VfdivVF
+            | VecAluOpRRR::VfrdivVF
+            | VecAluOpRRR::VfmergeVFM
+            | VecAluOpRRR::VfsgnjVF
+            | VecAluOpRRR::VmfeqVF
+            | VecAluOpRRR::VmfneVF
+            | VecAluOpRRR::VmfltVF
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmfgeVF => VecOpCategory::OPFVF,
+        }
+    }
+
+    // vs1 is the only variable source, vs2 is fixed.
+    pub fn vs1_regclass(&self) -> RegClass {
+        match self.category() {
+            VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => RegClass::Vector,
+            VecOpCategory::OPIVX | VecOpCategory::OPMVX => RegClass::Int,
+            VecOpCategory::OPFVF => RegClass::Float,
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl VecInstOverlapInfo for VecAluOpRRR {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRR::VrgatherVV
+            | VecAluOpRRR::VrgatherVX
+            | VecAluOpRRR::VcompressVM
+            | VecAluOpRRR::VwadduVV
+            | VecAluOpRRR::VwadduVX
+            | VecAluOpRRR::VwaddVV
+            | VecAluOpRRR::VwaddVX
+            | VecAluOpRRR::VwadduWV
+            | VecAluOpRRR::VwadduWX
+            | VecAluOpRRR::VwaddWV
+            | VecAluOpRRR::VwaddWX
+            | VecAluOpRRR::VwsubuVV
+            | VecAluOpRRR::VwsubuVX
+            | VecAluOpRRR::VwsubVV
+            | VecAluOpRRR::VwsubVX
+            | VecAluOpRRR::VwsubuWV
+            | VecAluOpRRR::VwsubuWX
+            | VecAluOpRRR::VwsubWV
+            | VecAluOpRRR::VwsubWX => true,
+            _ => false,
+        }
+    }
+
+    // Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps.
+    fn forbids_mask_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRR::VredmaxuVS
+            | VecAluOpRRR::VredminuVS
+            | VecAluOpRRR::VmandMM
+            | VecAluOpRRR::VmorMM
+            | VecAluOpRRR::VmnandMM
+            | VecAluOpRRR::VmnorMM
+            | VecAluOpRRR::VmseqVX
+            | VecAluOpRRR::VmsneVX
+            | VecAluOpRRR::VmsltuVX
+            | VecAluOpRRR::VmsltVX
+            | VecAluOpRRR::VmsleuVX
+            | VecAluOpRRR::VmsleVX
+            | VecAluOpRRR::VmsgtuVX
+            | VecAluOpRRR::VmsgtVX
+            | VecAluOpRRR::VmfeqVV
+            | VecAluOpRRR::VmfneVV
+            | VecAluOpRRR::VmfltVV
+            | VecAluOpRRR::VmfleVV
+            | VecAluOpRRR::VmfeqVF
+            | VecAluOpRRR::VmfneVF
+            | VecAluOpRRR::VmfltVF
+            | VecAluOpRRR::VmfleVF
+            | VecAluOpRRR::VmfgtVF
+            | VecAluOpRRR::VmfgeVF => false,
+            _ => true,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let suffix_length = match self {
+            VecAluOpRRR::VmergeVVM | VecAluOpRRR::VmergeVXM | VecAluOpRRR::VfmergeVFM => 3,
+            _ => 2,
+        };
+
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - suffix_length);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRRImm5::VaddVI => 0b000000,
+            VecAluOpRRImm5::VrsubVI => 0b000011,
+            VecAluOpRRImm5::VsllVI => 0b100101,
+            VecAluOpRRImm5::VsrlVI => 0b101000,
+            VecAluOpRRImm5::VsraVI => 0b101001,
+            VecAluOpRRImm5::VandVI => 0b001001,
+            VecAluOpRRImm5::VorVI => 0b001010,
+            VecAluOpRRImm5::VxorVI => 0b001011,
+            VecAluOpRRImm5::VslidedownVI => 0b001111,
+            VecAluOpRRImm5::VssrlVI => 0b101010,
+            VecAluOpRRImm5::VmergeVIM => 0b010111,
+            VecAluOpRRImm5::VsadduVI => 0b100000,
+            VecAluOpRRImm5::VsaddVI => 0b100001,
+            VecAluOpRRImm5::VrgatherVI => 0b001100,
+            VecAluOpRRImm5::VmvrV => 0b100111,
+            VecAluOpRRImm5::VnclipWI => 0b101111,
+            VecAluOpRRImm5::VnclipuWI => 0b101110,
+            VecAluOpRRImm5::VmseqVI => 0b011000,
+            VecAluOpRRImm5::VmsneVI => 0b011001,
+            VecAluOpRRImm5::VmsleuVI => 0b011100,
+            VecAluOpRRImm5::VmsleVI => 0b011101,
+            VecAluOpRRImm5::VmsgtuVI => 0b011110,
+            VecAluOpRRImm5::VmsgtVI => 0b011111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRRImm5::VaddVI
+            | VecAluOpRRImm5::VrsubVI
+            | VecAluOpRRImm5::VsllVI
+            | VecAluOpRRImm5::VsrlVI
+            | VecAluOpRRImm5::VsraVI
+            | VecAluOpRRImm5::VandVI
+            | VecAluOpRRImm5::VorVI
+            | VecAluOpRRImm5::VxorVI
+            | VecAluOpRRImm5::VssrlVI
+            | VecAluOpRRImm5::VslidedownVI
+            | VecAluOpRRImm5::VmergeVIM
+            | VecAluOpRRImm5::VsadduVI
+            | VecAluOpRRImm5::VsaddVI
+            | VecAluOpRRImm5::VrgatherVI
+            | VecAluOpRRImm5::VmvrV
+            | VecAluOpRRImm5::VnclipWI
+            | VecAluOpRRImm5::VnclipuWI
+            | VecAluOpRRImm5::VmseqVI
+            | VecAluOpRRImm5::VmsneVI
+            | VecAluOpRRImm5::VmsleuVI
+            | VecAluOpRRImm5::VmsleVI
+            | VecAluOpRRImm5::VmsgtuVI
+            | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    pub fn imm_is_unsigned(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VsllVI
+            | VecAluOpRRImm5::VsrlVI
+            | VecAluOpRRImm5::VssrlVI
+            | VecAluOpRRImm5::VsraVI
+            | VecAluOpRRImm5::VslidedownVI
+            | VecAluOpRRImm5::VrgatherVI
+            | VecAluOpRRImm5::VmvrV
+            | VecAluOpRRImm5::VnclipWI
+            | VecAluOpRRImm5::VnclipuWI => true,
+            VecAluOpRRImm5::VaddVI
+            | VecAluOpRRImm5::VrsubVI
+            | VecAluOpRRImm5::VandVI
+            | VecAluOpRRImm5::VorVI
+            | VecAluOpRRImm5::VxorVI
+            | VecAluOpRRImm5::VmergeVIM
+            | VecAluOpRRImm5::VsadduVI
+            | VecAluOpRRImm5::VsaddVI
+            | VecAluOpRRImm5::VmseqVI
+            | VecAluOpRRImm5::VmsneVI
+            | VecAluOpRRImm5::VmsleuVI
+            | VecAluOpRRImm5::VmsleVI
+            | VecAluOpRRImm5::VmsgtuVI
+            | VecAluOpRRImm5::VmsgtVI => false,
+        }
+    }
+}
+
+impl VecInstOverlapInfo for VecAluOpRRImm5 {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VrgatherVI => true,
+            _ => false,
+        }
+    }
+
+    // Only mask writing operations, and reduction operations (`vred*`) allow mask / dst overlaps.
+    fn forbids_mask_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRRImm5::VmseqVI
+            | VecAluOpRRImm5::VmsneVI
+            | VecAluOpRRImm5::VmsleuVI
+            | VecAluOpRRImm5::VmsleVI
+            | VecAluOpRRImm5::VmsgtuVI
+            | VecAluOpRRImm5::VmsgtVI => false,
+            _ => true,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let suffix_length = match self {
+            VecAluOpRRImm5::VmergeVIM => 3,
+            _ => 2,
+        };
+
+        let mut s = format!("{self:?}");
+        s.make_ascii_lowercase();
+        let (opcode, category) = s.split_at(s.len() - suffix_length);
+        f.write_str(&format!("{opcode}.{category}"))
+    }
+}
+
+impl VecAluOpRR {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => {
+                0b010000
+            }
+            VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => 0b010010,
+            VecAluOpRR::VfsqrtV => 0b010011,
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
+            VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => 0b010010,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRR::VmvSX => VecOpCategory::OPMVX,
+            VecAluOpRR::VmvXS
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV,
+            VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
+            VecAluOpRR::VfmvFS
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => VecOpCategory::OPFVV,
+            VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
+            VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
+        }
+    }
+
+    /// Returns the auxiliary encoding field for the instruction, if any.
+    pub fn aux_encoding(&self) -> u32 {
+        match self {
+            // VRXUNARY0
+            VecAluOpRR::VmvSX => 0b00000,
+            // VWXUNARY0
+            VecAluOpRR::VmvXS => 0b00000,
+            // VRFUNARY0
+            VecAluOpRR::VfmvSF => 0b00000,
+            // VWFUNARY0
+            VecAluOpRR::VfmvFS => 0b00000,
+            // VFUNARY1
+            VecAluOpRR::VfsqrtV => 0b00000,
+            // VXUNARY0
+            VecAluOpRR::VzextVF8 => 0b00010,
+            VecAluOpRR::VsextVF8 => 0b00011,
+            VecAluOpRR::VzextVF4 => 0b00100,
+            VecAluOpRR::VsextVF4 => 0b00101,
+            VecAluOpRR::VzextVF2 => 0b00110,
+            VecAluOpRR::VsextVF2 => 0b00111,
+            // VFUNARY0
+            // single-width converts
+            VecAluOpRR::VfcvtxufV => 0b00000,
+            VecAluOpRR::VfcvtxfV => 0b00001,
+            VecAluOpRR::VfcvtrtzxufV => 0b00110,
+            VecAluOpRR::VfcvtrtzxfV => 0b00111,
+            VecAluOpRR::VfcvtfxuV => 0b00010,
+            VecAluOpRR::VfcvtfxV => 0b00011,
+            // widening converts
+            VecAluOpRR::VfwcvtffV => 0b01100,
+            // narrowing converts
+            VecAluOpRR::VfncvtffW => 0b10100,
+            // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
+            // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
+            VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
+        }
+    }
+
+    /// Most of these opcodes have the source register encoded in the VS2 field and
+    /// the `aux_encoding` field in VS1. However some special snowflakes have it the
+    /// other way around. As far as I can tell only vmv.v.* are backwards.
+    pub fn vs_is_vs2_encoded(&self) -> bool {
+        match self {
+            VecAluOpRR::VmvXS
+            | VecAluOpRR::VfmvFS
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => true,
+            VecAluOpRR::VmvSX
+            | VecAluOpRR::VfmvSF
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VmvVX
+            | VecAluOpRR::VfmvVF => false,
+        }
+    }
+
+    pub fn dst_regclass(&self) -> RegClass {
+        match self {
+            VecAluOpRR::VfmvSF
+            | VecAluOpRR::VmvSX
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VmvVX
+            | VecAluOpRR::VfmvVF
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => RegClass::Vector,
+            VecAluOpRR::VmvXS => RegClass::Int,
+            VecAluOpRR::VfmvFS => RegClass::Float,
+        }
+    }
+
+    pub fn src_regclass(&self) -> RegClass {
+        match self {
+            VecAluOpRR::VmvXS
+            | VecAluOpRR::VfmvFS
+            | VecAluOpRR::VmvVV
+            | VecAluOpRR::VfsqrtV
+            | VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfcvtxufV
+            | VecAluOpRR::VfcvtxfV
+            | VecAluOpRR::VfcvtrtzxufV
+            | VecAluOpRR::VfcvtrtzxfV
+            | VecAluOpRR::VfcvtfxuV
+            | VecAluOpRR::VfcvtfxV
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => RegClass::Vector,
+            VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
+            VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
+        }
+    }
+}
+
+impl VecInstOverlapInfo for VecAluOpRR {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRR::VzextVF2
+            | VecAluOpRR::VzextVF4
+            | VecAluOpRR::VzextVF8
+            | VecAluOpRR::VsextVF2
+            | VecAluOpRR::VsextVF4
+            | VecAluOpRR::VsextVF8
+            | VecAluOpRR::VfwcvtffV
+            | VecAluOpRR::VfncvtffW => true,
+            _ => false,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRR {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self {
+            VecAluOpRR::VmvSX => "vmv.s.x",
+            VecAluOpRR::VmvXS => "vmv.x.s",
+            VecAluOpRR::VfmvSF => "vfmv.s.f",
+            VecAluOpRR::VfmvFS => "vfmv.f.s",
+            VecAluOpRR::VfsqrtV => "vfsqrt.v",
+            VecAluOpRR::VzextVF2 => "vzext.vf2",
+            VecAluOpRR::VzextVF4 => "vzext.vf4",
+            VecAluOpRR::VzextVF8 => "vzext.vf8",
+            VecAluOpRR::VsextVF2 => "vsext.vf2",
+            VecAluOpRR::VsextVF4 => "vsext.vf4",
+            VecAluOpRR::VsextVF8 => "vsext.vf8",
+            VecAluOpRR::VmvVV => "vmv.v.v",
+            VecAluOpRR::VmvVX => "vmv.v.x",
+            VecAluOpRR::VfmvVF => "vfmv.v.f",
+            VecAluOpRR::VfcvtxufV => "vfcvt.xu.f.v",
+            VecAluOpRR::VfcvtxfV => "vfcvt.x.f.v",
+            VecAluOpRR::VfcvtrtzxufV => "vfcvt.rtz.xu.f.v",
+            VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v",
+            VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v",
+            VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v",
+            VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v",
+            VecAluOpRR::VfncvtffW => "vfncvt.f.f.w",
+        })
+    }
+}
+
+impl VecAluOpRImm5 {
+    pub fn opcode(&self) -> u32 {
+        // Vector Opcode
+        0x57
+    }
+    pub fn funct3(&self) -> u32 {
+        self.category().encode()
+    }
+
+    pub fn funct6(&self) -> u32 {
+        // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
+        match self {
+            VecAluOpRImm5::VmvVI => 0b010111,
+        }
+    }
+
+    pub fn category(&self) -> VecOpCategory {
+        match self {
+            VecAluOpRImm5::VmvVI => VecOpCategory::OPIVI,
+        }
+    }
+
+    /// Returns the auxiliary encoding field for the instruction, if any.
+    pub fn aux_encoding(&self) -> u32 {
+        match self {
+            // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
+            // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
+            VecAluOpRImm5::VmvVI => 0,
+        }
+    }
+}
+
+impl VecInstOverlapInfo for VecAluOpRImm5 {
+    fn forbids_src_dst_overlaps(&self) -> bool {
+        match self {
+            VecAluOpRImm5::VmvVI => false,
+        }
+    }
+}
+
+impl fmt::Display for VecAluOpRImm5 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(match self {
+            VecAluOpRImm5::VmvVI => "vmv.v.i",
+        })
+    }
+}
+
+impl VecAMode {
+    pub fn get_base_register(&self) -> Option<Reg> {
+        match self {
+            VecAMode::UnitStride { base, .. } => base.get_base_register(),
+        }
+    }
+
+    pub fn get_operands(&mut self, collector: &mut impl OperandVisitor) {
+        match self {
+            VecAMode::UnitStride { base, .. } => base.get_operands(collector),
+        }
+    }
+
+    /// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn mop(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b00,
+        }
+    }
+
+    /// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn lumop(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b00000,
+        }
+    }
+
+    /// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn sumop(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b00000,
+        }
+    }
+
+    /// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and
+    /// stores, nf=0, indicating that a single value is moved between a vector register group and memory
+    /// at each element position. Larger values in the nf field are used to access multiple contiguous
+    /// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions.
+    ///
+    /// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
+    pub fn nf(&self) -> u32 {
+        match self {
+            VecAMode::UnitStride { .. } => 0b000,
+        }
+    }
+}
+
+pub trait VecInstOverlapInfo {
+    /// § 5.2 Vector Operands states:
+    ///
+    /// A destination vector register group can overlap a source vector register group
+    /// only if one of the following holds:
+    ///
+    ///  * The destination EEW equals the source EEW.
+    ///
+    ///  * The destination EEW is smaller than the source EEW and the overlap is
+    ///    in the lowest-numbered part of the source register group (e.g., when LMUL=1,
+    ///    vnsrl.wi v0, v0, 3 is legal, but a destination of v1 is not).
+    ///
+    ///  * The destination EEW is greater than the source EEW, the source EMUL is at
+    ///    least 1, and the overlap is in the highest-numbered part of the destination register
+    ///    group (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not).
+    ///
+    /// For the purpose of determining register group overlap constraints, mask elements have EEW=1.
+    fn forbids_src_dst_overlaps(&self) -> bool;
+
+    /// § 5.3 Vector Masking states:
+    ///
+    /// > The destination vector register group for a masked vector instruction
+    /// > cannot overlap the source mask register (v0), unless the destination
+    /// > vector register is being written with a mask value (e.g., compares) or
+    /// > the scalar result of a reduction. These instruction encodings are reserved.
+    ///
+    /// In almost all instructions we should not allow the mask to be re-used as
+    /// a destination register.
+    fn forbids_mask_dst_overlaps(&self) -> bool {
+        true
+    }
+
+    /// There are two broad categories of overlaps (see above). But we can't represent such
+    /// fine grained overlaps to regalloc. So if any of the two come into play we forbid
+    /// all source and destination overlaps (including masks).
+    fn forbids_overlaps(&self, mask: &VecOpMasking) -> bool {
+        self.forbids_src_dst_overlaps() || (mask.is_enabled() && self.forbids_mask_dst_overlaps())
+    }
+}
diff --git a/hbcb/src/inst_vector.isle b/hbcb/src/inst_vector.isle
new file mode 100644
index 00000000..4b636183
--- /dev/null
+++ b/hbcb/src/inst_vector.isle
@@ -0,0 +1,1907 @@
+;; Represents the possible widths of an element when used in an operation.
+(type VecElementWidth (enum
+  (E8)
+  (E16)
+  (E32)
+  (E64)
+))
+
+;; Vector Register Group Multiplier (LMUL)
+;;
+;; The LMUL setting specifies how we should group registers together. LMUL can
+;; also be a fractional value, reducing the number of bits used in a single
+;; vector register. Fractional LMUL is used to increase the number of effective
+;; usable vector register groups when operating on mixed-width values.
+(type VecLmul (enum
+  (LmulF8)
+  (LmulF4)
+  (LmulF2)
+  (Lmul1)
+  (Lmul2)
+  (Lmul4)
+  (Lmul8)
+))
+
+;; Tail Mode
+;;
+;; The tail mode specifies how the tail elements of a vector register are handled.
+(type VecTailMode (enum
+  ;; Tail Agnostic means that the tail elements are left in an undefined state.
+  (Agnostic)
+  ;; Tail Undisturbed means that the tail elements are left in their original values.
+  (Undisturbed)
+))
+
+;; Mask Mode
+;;
+;; The mask mode specifies how the masked elements of a vector register are handled.
+(type VecMaskMode (enum
+  ;; Mask Agnostic means that the masked out elements are left in an undefined state.
+  (Agnostic)
+  ;; Mask Undisturbed means that the masked out elements are left in their original values.
+  (Undisturbed)
+))
+
+;; Application Vector Length (AVL)
+;;
+;; This setting specifies the number of elements that are going to be processed
+;; in a single instruction. Note: We may end up processing fewer elements than
+;; the AVL setting, if they don't fit in a single register.
+(type VecAvl (enum
+  ;; Static AVL emits a `vsetivli` that uses a constant value
+  (Static (size UImm5))
+  ;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it
+))
+
+(type VType (primitive VType))
+(type VState (primitive VState))
+
+
+;; Vector Opcode Category
+;;
+;; These categories are used to determine the type of operands that are allowed in the
+;; instruction.
+(type VecOpCategory (enum
+  (OPIVV)
+  (OPFVV)
+  (OPMVV)
+  (OPIVI)
+  (OPIVX)
+  (OPFVF)
+  (OPMVX)
+  (OPCFG)
+))
+
+;; Vector Opcode Masking
+;;
+;; When masked, the instruction will only operate on the elements that are dictated by
+;; the mask register. Currently this is always fixed to v0.
+(type VecOpMasking (enum
+  (Enabled (reg Reg))
+  (Disabled)
+))
+
+(decl pure masked (VReg) VecOpMasking)
+(rule (masked reg) (VecOpMasking.Enabled reg))
+
+(decl pure unmasked () VecOpMasking)
+(rule (unmasked) (VecOpMasking.Disabled))
+
+;; Register to Register ALU Ops
+(type VecAluOpRRR (enum
+  ;; Vector-Vector Opcodes
+  (VaddVV)
+  (VsaddVV)
+  (VsadduVV)
+  (VwaddVV)
+  (VwaddWV)
+  (VwadduVV)
+  (VwadduWV)
+  (VsubVV)
+  (VwsubVV)
+  (VwsubWV)
+  (VwsubuVV)
+  (VwsubuWV)
+  (VssubVV)
+  (VssubuVV)
+  (VmulVV)
+  (VmulhVV)
+  (VmulhuVV)
+  (VsmulVV)
+  (VsllVV)
+  (VsrlVV)
+  (VsraVV)
+  (VandVV)
+  (VorVV)
+  (VxorVV)
+  (VmaxVV)
+  (VmaxuVV)
+  (VminVV)
+  (VminuVV)
+  (VfaddVV)
+  (VfsubVV)
+  (VfmulVV)
+  (VfdivVV)
+  (VfminVV)
+  (VfmaxVV)
+  (VfsgnjVV)
+  (VfsgnjnVV)
+  (VfsgnjxVV)
+  (VmergeVVM)
+  (VredmaxuVS)
+  (VredminuVS)
+  (VrgatherVV)
+  (VcompressVM)
+  (VmseqVV)
+  (VmsneVV)
+  (VmsltuVV)
+  (VmsltVV)
+  (VmsleuVV)
+  (VmsleVV)
+  (VmfeqVV)
+  (VmfneVV)
+  (VmfltVV)
+  (VmfleVV)
+  (VmandMM)
+  (VmorMM)
+  (VmnandMM)
+  (VmnorMM)
+
+
+  ;; Vector-Scalar Opcodes
+  (VaddVX)
+  (VsaddVX)
+  (VsadduVX)
+  (VwaddVX)
+  (VwaddWX)
+  (VwadduVX)
+  (VwadduWX)
+  (VsubVX)
+  (VrsubVX)
+  (VwsubVX)
+  (VwsubWX)
+  (VwsubuVX)
+  (VwsubuWX)
+  (VssubVX)
+  (VssubuVX)
+  (VmulVX)
+  (VmulhVX)
+  (VmulhuVX)
+  (VsmulVX)
+  (VsllVX)
+  (VsrlVX)
+  (VsraVX)
+  (VandVX)
+  (VorVX)
+  (VxorVX)
+  (VmaxVX)
+  (VmaxuVX)
+  (VminVX)
+  (VminuVX)
+  (VslidedownVX)
+  (VfaddVF)
+  (VfsubVF)
+  (VfrsubVF)
+  (VfmulVF)
+  (VfdivVF)
+  (VfsgnjVF)
+  (VfrdivVF)
+  (VmergeVXM)
+  (VfmergeVFM)
+  (VrgatherVX)
+  (VmseqVX)
+  (VmsneVX)
+  (VmsltuVX)
+  (VmsltVX)
+  (VmsleuVX)
+  (VmsleVX)
+  (VmsgtuVX)
+  (VmsgtVX)
+  (VmfeqVF)
+  (VmfneVF)
+  (VmfltVF)
+  (VmfleVF)
+  (VmfgtVF)
+  (VmfgeVF)
+))
+
+
+
+;; Register-Imm ALU Ops that modify the destination register
+(type VecAluOpRRRImm5 (enum
+  (VslideupVI)
+))
+
+;; Register-Register ALU Ops that modify the destination register
+(type VecAluOpRRRR (enum
+  ;; Vector-Vector Opcodes
+  (VmaccVV)
+  (VnmsacVV)
+  (VfmaccVV)
+  (VfnmaccVV)
+  (VfmsacVV)
+  (VfnmsacVV)
+
+  ;; Vector-Scalar Opcodes
+  (VmaccVX)
+  (VnmsacVX)
+  (VfmaccVF)
+  (VfnmaccVF)
+  (VfmsacVF)
+  (VfnmsacVF)
+  (Vslide1upVX)
+))
+
+;; Register-Imm ALU Ops
+(type VecAluOpRRImm5 (enum
+  ;; Regular VI Opcodes
+  (VaddVI)
+  (VsaddVI)
+  (VsadduVI)
+  (VrsubVI)
+  (VsllVI)
+  (VsrlVI)
+  (VsraVI)
+  (VandVI)
+  (VorVI)
+  (VxorVI)
+  (VssrlVI)
+  (VslidedownVI)
+  (VmergeVIM)
+  (VrgatherVI)
+  ;; This opcode represents multiple instructions `vmv1r`/`vmv2r`/`vmv4r`/etc...
+  ;; The immediate field specifies how many registers should be copied.
+  (VmvrV)
+  (VnclipWI)
+  (VnclipuWI)
+  (VmseqVI)
+  (VmsneVI)
+  (VmsleuVI)
+  (VmsleVI)
+  (VmsgtuVI)
+  (VmsgtVI)
+))
+
+;; Imm only ALU Ops
+(type VecAluOpRImm5 (enum
+  (VmvVI)
+))
+
+;; These are all of the special cases that have weird encodings. They are all
+;; single source, single destination instructions, and usually use one of
+;; the two source registers as auxiliary encoding space.
+(type VecAluOpRR (enum
+  (VmvSX)
+  (VmvXS)
+  (VfmvSF)
+  (VfmvFS)
+  ;; vmv.v* is special in that vs2 must be v0 (and is ignored) otherwise the instruction is illegal.
+  (VmvVV)
+  (VmvVX)
+  (VfmvVF)
+  (VfsqrtV)
+  (VsextVF2)
+  (VsextVF4)
+  (VsextVF8)
+  (VzextVF2)
+  (VzextVF4)
+  (VzextVF8)
+  (VfcvtxufV)
+  (VfcvtxfV)
+  (VfcvtrtzxufV)
+  (VfcvtrtzxfV)
+  (VfcvtfxuV)
+  (VfcvtfxV)
+  (VfwcvtffV)
+  (VfncvtffW)
+))
+
+;; Returns the canonical destination type for a VecAluOpRRImm5.
+(decl pure vec_alu_rr_dst_type (VecAluOpRR) Type)
+(extern constructor vec_alu_rr_dst_type vec_alu_rr_dst_type)
+
+
+;; Vector Addressing Mode
+(type VecAMode (enum
+  ;; Vector unit-stride operations access elements stored contiguously in memory
+  ;; starting from the base effective address.
+  (UnitStride
+    (base AMode))
+  ;; TODO: Constant Stride
+  ;; TODO: Indexed Operations
+))
+
+
+;; Builds a static VState matching a SIMD type.
+;; The VState is guaranteed to be static with AVL set to the number of lanes.
+;; Element size is set to the size of the type.
+;; LMUL is set to 1.
+;; Tail mode is set to agnostic.
+;; Mask mode is set to agnostic.
+(decl pure vstate_from_type (Type) VState)
+(extern constructor vstate_from_type vstate_from_type)
+(convert Type VState vstate_from_type)
+
+;; Alters the LMUL of a VState to mf2
+(decl pure vstate_mf2 (VState) VState)
+(extern constructor vstate_mf2 vstate_mf2)
+
+;; Extracts an element width from a SIMD type.
+(decl pure element_width_from_type (Type) VecElementWidth)
+(rule (element_width_from_type ty)
+  (if-let $I8 (lane_type ty))
+  (VecElementWidth.E8))
+(rule (element_width_from_type ty)
+  (if-let $I16 (lane_type ty))
+  (VecElementWidth.E16))
+(rule (element_width_from_type ty)
+  (if-let $I32 (lane_type ty))
+  (VecElementWidth.E32))
+(rule (element_width_from_type ty)
+  (if-let $F32 (lane_type ty))
+  (VecElementWidth.E32))
+(rule (element_width_from_type ty)
+  (if-let $I64 (lane_type ty))
+  (VecElementWidth.E64))
+(rule (element_width_from_type ty)
+  (if-let $F64 (lane_type ty))
+  (VecElementWidth.E64))
+
+(decl pure min_vec_reg_size () u64)
+(extern constructor min_vec_reg_size min_vec_reg_size)
+
+;; An extractor that matches any type that is known to fit in a single vector
+;; register.
+(decl ty_vec_fits_in_register (Type) Type)
+(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register)
+
+;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; As noted in the RISC-V Vector Extension Specification, rs2 is the first
+;; source register and rs1 is the second source register. This is the opposite
+;; of the usual RISC-V register order.
+;; See Section 10.1 of the RISC-V Vector Extension Specification.
+
+
+;; Helper for emitting `MInst.VecAluRRRR` instructions.
+;; These instructions modify the destination register.
+(decl vec_alu_rrrr (VecAluOpRRRR VReg VReg Reg VecOpMasking  VState) VReg)
+(rule (vec_alu_rrrr op vd_src vs2 vs1 mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRRR op vd vd_src vs2 vs1 mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRRImm5` instructions.
+;; These instructions modify the destination register.
+(decl vec_alu_rrr_imm5 (VecAluOpRRRImm5 VReg VReg Imm5 VecOpMasking  VState) VReg)
+(rule (vec_alu_rrr_imm5 op vd_src vs2 imm mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRRImm5 op vd vd_src vs2 imm mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRRImm5` instructions where the immediate
+;; is zero extended instead of sign extended.
+(decl vec_alu_rrr_uimm5 (VecAluOpRRRImm5 VReg VReg UImm5 VecOpMasking VState) VReg)
+(rule (vec_alu_rrr_uimm5 op vd_src vs2 imm mask vstate)
+      (vec_alu_rrr_imm5 op vd_src vs2 (uimm5_bitcast_to_imm5 imm) mask vstate))
+
+;; Helper for emitting `MInst.VecAluRRR` instructions.
+(decl vec_alu_rrr (VecAluOpRRR Reg Reg VecOpMasking VState) Reg)
+(rule (vec_alu_rrr op vs2 vs1 mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions.
+(decl vec_alu_rr_imm5 (VecAluOpRRImm5 Reg Imm5 VecOpMasking  VState) Reg)
+(rule (vec_alu_rr_imm5 op vs2 imm mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRRImm5 op vd vs2 imm mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions where the immediate
+;; is zero extended instead of sign extended.
+(decl vec_alu_rr_uimm5 (VecAluOpRRImm5 Reg UImm5 VecOpMasking VState) Reg)
+(rule (vec_alu_rr_uimm5 op vs2 imm mask vstate)
+      (vec_alu_rr_imm5 op vs2 (uimm5_bitcast_to_imm5 imm) mask vstate))
+
+;; Helper for emitting `MInst.VecAluRRImm5` instructions that use the Imm5 as
+;; auxiliary encoding space.
+(decl vec_alu_rr (VecAluOpRR Reg VecOpMasking VState) Reg)
+(rule (vec_alu_rr op vs mask vstate)
+      (let ((vd WritableReg (temp_writable_reg (vec_alu_rr_dst_type op)))
+            (_ Unit (emit (MInst.VecAluRR op vd vs mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecAluRImm5` instructions.
+(decl vec_alu_r_imm5 (VecAluOpRImm5 Imm5 VecOpMasking VState) Reg)
+(rule (vec_alu_r_imm5 op imm mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecAluRImm5 op vd imm mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecLoad` instructions.
+(decl vec_load (VecElementWidth VecAMode MemFlags VecOpMasking VState) Reg)
+(rule (vec_load eew from flags mask vstate)
+      (let ((vd WritableVReg (temp_writable_vreg))
+            (_ Unit (emit (MInst.VecLoad eew vd from flags mask vstate))))
+        vd))
+
+;; Helper for emitting `MInst.VecStore` instructions.
+(decl vec_store (VecElementWidth VecAMode VReg MemFlags VecOpMasking VState) InstOutput)
+(rule (vec_store eew to from flags mask vstate)
+      (side_effect
+        (SideEffectNoResult.Inst (MInst.VecStore eew to from flags mask vstate))))
+
+;; Helper for emitting the `vadd.vv` instruction.
+(decl rv_vadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vadd.vx` instruction.
+(decl rv_vadd_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vadd_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vadd.vi` instruction.
+(decl rv_vadd_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vadd_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vsadd.vv` instruction.
+(decl rv_vsadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsadd.vx` instruction.
+(decl rv_vsadd_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsadd_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsaddVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsadd.vi` instruction.
+(decl rv_vsadd_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vsadd_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VsaddVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vsaddu.vv` instruction.
+(decl rv_vsaddu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsaddu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsadduVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsaddu.vx` instruction.
+(decl rv_vsaddu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsaddu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsadduVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsaddu.vi` instruction.
+(decl rv_vsaddu_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vsaddu_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VsadduVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vwadd.vv` instruction.
+;;
+;;  Widening integer add, 2*SEW = SEW + SEW
+(decl rv_vwadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwadd.vx` instruction.
+;;
+;;  Widening integer add, 2*SEW = SEW + SEW
+(decl rv_vwadd_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwadd.wv` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwadd_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwadd.wx` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwadd_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwadd_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwaddWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.vv` instruction.
+;;
+;; Widening unsigned integer add, 2*SEW = SEW + SEW
+(decl rv_vwaddu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.vv` instruction.
+;;
+;; Widening unsigned integer add, 2*SEW = SEW + SEW
+(decl rv_vwaddu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.wv` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwaddu_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwaddu.wx` instruction.
+;;
+;;  Widening integer add, 2*SEW = 2*SEW + SEW
+(decl rv_vwaddu_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwaddu_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwadduWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsub.vv` instruction.
+(decl rv_vsub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsub.vx` instruction.
+(decl rv_vsub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrsub.vx` instruction.
+(decl rv_vrsub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vrsub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.vv` instruction.
+;;
+;;  Widening integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.vx` instruction.
+;;
+;;  Widening integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.wv` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsub_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsub.wx` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsub_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsub_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.vv` instruction.
+;;
+;; Widening unsigned integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsubu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.vv` instruction.
+;;
+;; Widening unsigned integer sub, 2*SEW = SEW + SEW
+(decl rv_vwsubu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.wv` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsubu_wv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_wv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuWV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vwsubu.wx` instruction.
+;;
+;;  Widening integer sub, 2*SEW = 2*SEW + SEW
+(decl rv_vwsubu_wx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vwsubu_wx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VwsubuWX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssub.vv` instruction.
+(decl rv_vssub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vssub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssub.vx` instruction.
+(decl rv_vssub_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vssub_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssubu.vv` instruction.
+(decl rv_vssubu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vssubu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vssubu.vx` instruction.
+(decl rv_vssubu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vssubu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VssubuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vneg.v` pseudo-instruction.
+(decl rv_vneg_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vneg_v vs2 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 (zero_reg) mask vstate))
+
+;; Helper for emitting the `vrsub.vi` instruction.
+(decl rv_vrsub_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vrsub_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VrsubVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmul.vv` instruction.
+(decl rv_vmul_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmul_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmul.vx` instruction.
+(decl rv_vmul_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmul_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulh.vv` instruction.
+(decl rv_vmulh_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmulh_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulh.vx` instruction.
+(decl rv_vmulh_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmulh_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulhu.vv` instruction.
+(decl rv_vmulhu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmulhu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmulhu.vx` instruction.
+(decl rv_vmulhu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmulhu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmulhuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsmul.vv` instruction.
+;;
+;; Signed saturating and rounding fractional multiply
+;; # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1))
+(decl rv_vsmul_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsmul_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsmulVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsmul.vx` instruction.
+;;
+;; Signed saturating and rounding fractional multiply
+;; # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1))
+(decl rv_vsmul_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsmul_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsmulVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmacc.vv` instruction.
+;;
+;; Integer multiply-add, overwrite addend
+;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vmacc_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmacc_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VmaccVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmacc.vx` instruction.
+;;
+;; Integer multiply-add, overwrite addend
+;; # vd[i] = +(x[rs1] * vs2[i]) + vd[i]
+(decl rv_vmacc_vx (VReg VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmacc_vx vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VmaccVX) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vnmsac.vv` instruction.
+;;
+;; Integer multiply-sub, overwrite minuend
+;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vnmsac_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VnmsacVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vnmsac.vx` instruction.
+;;
+;; Integer multiply-sub, overwrite minuend
+;; # vd[i] = -(x[rs1] * vs2[i]) + vd[i]
+(decl rv_vnmsac_vx (VReg VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vnmsac_vx vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VnmsacVX) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `sll.vv` instruction.
+(decl rv_vsll_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsll_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsllVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `sll.vx` instruction.
+(decl rv_vsll_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsll_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsllVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsll.vi` instruction.
+(decl rv_vsll_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vsll_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsllVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `srl.vv` instruction.
+(decl rv_vsrl_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsrl_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsrlVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `srl.vx` instruction.
+(decl rv_vsrl_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsrl_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsrlVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsrl.vi` instruction.
+(decl rv_vsrl_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vsrl_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsrlVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `sra.vv` instruction.
+(decl rv_vsra_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vsra_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsraVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `sra.vx` instruction.
+(decl rv_vsra_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vsra_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VsraVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vsra.vi` instruction.
+(decl rv_vsra_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vsra_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VsraVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vand.vv` instruction.
+(decl rv_vand_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vand_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VandVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vand.vx` instruction.
+(decl rv_vand_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vand_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VandVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vand.vi` instruction.
+(decl rv_vand_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vand_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VandVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vor.vv` instruction.
+(decl rv_vor_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vor_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VorVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vor.vx` instruction.
+(decl rv_vor_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vor_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VorVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vor.vi` instruction.
+(decl rv_vor_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vor_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VorVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vxor.vv` instruction.
+(decl rv_vxor_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vxor_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vxor.vx` instruction.
+(decl rv_vxor_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vxor_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VxorVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vxor.vi` instruction.
+(decl rv_vxor_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vxor_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VxorVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vssrl.vi` instruction.
+;;
+;; vd[i] = (unsigned(vs2[i]) >> imm) + r
+;;
+;; `r` here is the rounding mode currently selected.
+(decl rv_vssrl_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vssrl_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VssrlVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vnot.v` instruction.
+;; This is just a mnemonic for `vxor.vi vd, vs, -1`
+(decl rv_vnot_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vnot_v vs2 mask vstate)
+  (if-let neg1 (i8_to_imm5 -1))
+  (rv_vxor_vi vs2 neg1 mask vstate))
+
+;; Helper for emitting the `vmax.vv` instruction.
+(decl rv_vmax_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmax_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmax.vx` instruction.
+(decl rv_vmax_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmax_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmin.vv` instruction.
+(decl rv_vmin_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmin_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmin.vx` instruction.
+(decl rv_vmin_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmin_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmaxu.vv` instruction.
+(decl rv_vmaxu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmaxu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmaxu.vx` instruction.
+(decl rv_vmaxu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmaxu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmaxuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vminu.vv` instruction.
+(decl rv_vminu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vminu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vminu.vx` instruction.
+(decl rv_vminu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vminu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VminuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfadd.vv` instruction.
+(decl rv_vfadd_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfadd_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfadd.vf` instruction.
+(decl rv_vfadd_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfadd_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsub.vv` instruction.
+(decl rv_vfsub_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsub_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsub.vf` instruction.
+(decl rv_vfsub_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfsub_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfrsub.vf` instruction.
+(decl rv_vfrsub_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfrsub_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmul.vv` instruction.
+(decl rv_vfmul_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmul_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmul.vf` instruction.
+(decl rv_vfmul_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfmul_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmacc.vv` instruction.
+;;
+;; FP multiply-accumulate, overwrites addend
+;; # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vfmacc_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmacc_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmaccVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmacc.vf` instruction.
+;;
+;; FP multiply-accumulate, overwrites addend
+;; # vd[i] = +(f[rs1] * vs2[i]) + vd[i]
+(decl rv_vfmacc_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfmacc_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmaccVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmacc.vv` instruction.
+;;
+;; FP negate-(multiply-accumulate), overwrites subtrahend
+;; # vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+(decl rv_vfnmacc_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfnmacc_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmacc.vf` instruction.
+;;
+;; FP negate-(multiply-accumulate), overwrites subtrahend
+;; # vd[i] = -(f[rs1] * vs2[i]) - vd[i]
+(decl rv_vfnmacc_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfnmacc_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmaccVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmsac.vv` instruction.
+;;
+;; FP multiply-subtract-accumulator, overwrites subtrahend
+;; # vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+(decl rv_vfmsac_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmsac_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmsacVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmsac.vf` instruction.
+;;
+;; FP multiply-subtract-accumulator, overwrites subtrahend
+;; # vd[i] = +(f[rs1] * vs2[i]) - vd[i]
+(decl rv_vfmsac_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfmsac_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfmsacVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmsac.vv` instruction.
+;;
+;; FP negate-(multiply-subtract-accumulator), overwrites minuend
+;; # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+(decl rv_vfnmsac_vv (VReg VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfnmsac_vv vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVV) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfnmsac.vf` instruction.
+;;
+;; FP negate-(multiply-subtract-accumulator), overwrites minuend
+;; # vd[i] = -(f[rs1] * vs2[i]) + vd[i]
+(decl rv_vfnmsac_vf (VReg VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfnmsac_vf vd vs2 vs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.VfnmsacVF) vd vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfdiv.vv` instruction.
+(decl rv_vfdiv_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfdiv_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfdiv.vf` instruction.
+(decl rv_vfdiv_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfdiv_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfrdiv.vf` instruction.
+(decl rv_vfrdiv_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfrdiv_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmin.vv` instruction.
+(decl rv_vfmin_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmin_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfminVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfmax.vv` instruction.
+(decl rv_vfmax_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfmax_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmaxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnj.vv` ("Floating Point Sign Injection") instruction.
+;; The output of this instruction is `vs2` with the sign bit from `vs1`
+(decl rv_vfsgnj_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnj_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnj.vf` ("Floating Point Sign Injection") instruction.
+(decl rv_vfsgnj_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnj_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction.
+;; The output of this instruction is `vs2` with the negated sign bit from `vs1`
+(decl rv_vfsgnjn_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnjn_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfneg.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs`
+(decl rv_vfneg_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfneg_v vs mask vstate) (rv_vfsgnjn_vv vs vs mask vstate))
+
+;; Helper for emitting the `vfsgnjx.vv` ("Floating Point Sign Injection Exclusive") instruction.
+;; The output of this instruction is `vs2` with the XOR of the sign bits from `vs2` and `vs1`.
+;; When `vs2 == vs1` this implements `fabs`
+(decl rv_vfsgnjx_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vfsgnjx_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfsgnjxVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vfabs.v` instruction.
+;; This instruction is a mnemonic for `vfsgnjx.vv vd, vs, vs`
+(decl rv_vfabs_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfabs_v vs mask vstate) (rv_vfsgnjx_vv vs vs mask vstate))
+
+;; Helper for emitting the `vfsqrt.v` instruction.
+;; This instruction splats the F register into all elements of the destination vector.
+(decl rv_vfsqrt_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfsqrt_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfsqrtV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.xu.f.v` instruction.
+;; This instruction converts a float to an unsigned integer.
+(decl rv_vfcvt_xu_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_xu_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtxufV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.x.f.v` instruction.
+;; This instruction converts a float to a signed integer.
+(decl rv_vfcvt_x_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_x_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtxfV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.rtz.xu.f.v` instruction.
+;; This instruction converts a float to an unsigned integer
+;; using the Round to Zero (RTZ) rounding mode and ignoring
+;; the currently set FRM rounding mode.
+(decl rv_vfcvt_rtz_xu_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_rtz_xu_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtrtzxufV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.rtz.x.f.v` instruction.
+;; This instruction converts a float to a signed integer.
+;; using the Round to Zero (RTZ) rounding mode and ignoring
+;; the currently set FRM rounding mode.
+(decl rv_vfcvt_rtz_x_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_rtz_x_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtrtzxfV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.f.xu.v` instruction.
+;; This instruction converts a unsigned integer to a float.
+(decl rv_vfcvt_f_xu_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_f_xu_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtfxuV) vs mask vstate))
+
+;; Helper for emitting the `vfcvt.x.f.v` instruction.
+;; This instruction converts a signed integer to a float.
+(decl rv_vfcvt_f_x_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfcvt_f_x_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate))
+
+  ;; Helper for emitting the `vfwcvt.f.f.v` instruction.
+;; Convert single-width float to double-width float.
+(decl rv_vfwcvt_f_f_v (VReg VecOpMasking VState) VReg)
+(rule (rv_vfwcvt_f_f_v vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfwcvtffV) vs mask vstate))
+
+;; Helper for emitting the `vfncvt.f.f.w` instruction.
+;; Convert double-width float to single-width float.
+(decl rv_vfncvt_f_f_w (VReg VecOpMasking VState) VReg)
+(rule (rv_vfncvt_f_f_w vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VfncvtffW) vs mask vstate))
+
+;; Helper for emitting the `vslidedown.vx` instruction.
+;; `vslidedown` moves all elements in the vector down by n elements.
+;; The top most elements are up to the tail policy.
+(decl rv_vslidedown_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vslidedown_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VslidedownVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vslidedown.vi` instruction.
+;; Unlike other `vi` instructions the immediate is zero extended.
+(decl rv_vslidedown_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vslidedown_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VslidedownVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vslideup.vi` instruction.
+;; Unlike other `vi` instructions the immediate is zero extended.
+;; This is implemented as a 2 source operand instruction, since it only
+;; partially modifies the destination register.
+(decl rv_vslideup_vvi (VReg VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vslideup_vvi vd vs2 imm mask vstate)
+  (vec_alu_rrr_uimm5 (VecAluOpRRRImm5.VslideupVI) vd vs2 imm mask vstate))
+
+;; Helper for emitting the `vslide1up.vx` instruction.
+;;
+;; # vd[0]=x[rs1], vd[i+1] = vs2[i]
+(decl rv_vslide1up_vx (VReg VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vslide1up_vx vd vs2 rs1 mask vstate)
+  (vec_alu_rrrr (VecAluOpRRRR.Vslide1upVX) vd vs2 rs1 mask vstate))
+
+;; Helper for emitting the `vmv.x.s` instruction.
+;; This instruction copies the first element of the source vector to the destination X register.
+;; Masked versions of this instruction are not supported.
+(decl rv_vmv_xs (VReg VState) XReg)
+(rule (rv_vmv_xs vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvXS) vs (unmasked) vstate))
+
+;; Helper for emitting the `vfmv.f.s` instruction.
+;; This instruction copies the first element of the source vector to the destination F register.
+;; Masked versions of this instruction are not supported.
+(decl rv_vfmv_fs (VReg VState) FReg)
+(rule (rv_vfmv_fs vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvFS) vs (unmasked) vstate))
+
+;; Helper for emitting the `vmv.s.x` instruction.
+;; This instruction copies the source X register into first element of the source vector.
+;; Masked versions of this instruction are not supported.
+(decl rv_vmv_sx (XReg VState) VReg)
+(rule (rv_vmv_sx vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvSX) vs (unmasked) vstate))
+
+;; Helper for emitting the `vfmv.s.f` instruction.
+;; This instruction copies the source F register into first element of the source vector.
+;; Masked versions of this instruction are not supported.
+(decl rv_vfmv_sf (FReg VState) VReg)
+(rule (rv_vfmv_sf vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvSF) vs (unmasked) vstate))
+
+;; Helper for emitting the `vmv.v.x` instruction.
+;; This instruction splats the X register into all elements of the destination vector.
+;; Masked versions of this instruction are called `vmerge`
+(decl rv_vmv_vx (XReg VState) VReg)
+(rule (rv_vmv_vx vs vstate)
+  (vec_alu_rr (VecAluOpRR.VmvVX) vs (unmasked) vstate))
+
+;; Helper for emitting the `vfmv.v.f` instruction.
+;; This instruction splats the F register into all elements of the destination vector.
+;; Masked versions of this instruction are called `vmerge`
+(decl rv_vfmv_vf (FReg VState) VReg)
+(rule (rv_vfmv_vf vs vstate)
+  (vec_alu_rr (VecAluOpRR.VfmvVF) vs (unmasked) vstate))
+
+;; Helper for emitting the `vmv.v.i` instruction.
+;; This instruction splat's the immediate value into all elements of the destination vector.
+;; Masked versions of this instruction are called `vmerge`
+(decl rv_vmv_vi (Imm5 VState) VReg)
+(rule (rv_vmv_vi imm vstate)
+  (vec_alu_r_imm5 (VecAluOpRImm5.VmvVI) imm (unmasked) vstate))
+
+;; Helper for emitting the `vmerge.vvm` instruction.
+;; This instruction merges the elements of the two source vectors into the destination vector
+;; based on a mask. Elements are taken from the first source vector if the mask bit is clear,
+;; and from the second source vector if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? vs1[i] : vs2[i]
+(decl rv_vmerge_vvm (VReg VReg VReg VState) VReg)
+(rule (rv_vmerge_vvm vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmergeVVM) vs2 vs1 (masked mask) vstate))
+
+;; Helper for emitting the `vmerge.vxm` instruction.
+;; Elements are taken from the first source vector if the mask bit is clear, and from the X
+;; register if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? x[rs1] : vs2[i]
+(decl rv_vmerge_vxm (VReg XReg VReg VState) VReg)
+(rule (rv_vmerge_vxm vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmergeVXM) vs2 vs1 (masked mask) vstate))
+
+;; Helper for emitting the `vfmerge.vfm` instruction.
+;; Elements are taken from the first source vector if the mask bit is clear, and from the F
+;; register if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? f[rs1] : vs2[i]
+(decl rv_vfmerge_vfm (VReg FReg VReg VState) VReg)
+(rule (rv_vfmerge_vfm vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VfmergeVFM) vs2 vs1 (masked mask) vstate))
+
+;; Helper for emitting the `vmerge.vim` instruction.
+;; Elements are taken from the first source vector if the mask bit is clear, and from the
+;; immediate value if the mask bit is set. This instruction is always masked.
+;;
+;; vd[i] = v0.mask[i] ? imm : vs2[i]
+(decl rv_vmerge_vim (VReg Imm5 VReg VState) VReg)
+(rule (rv_vmerge_vim vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmergeVIM) vs2 imm (masked mask) vstate))
+
+
+;; Helper for emitting the `vredminu.vs` instruction.
+;;
+;; vd[0] = minu( vs1[0] , vs2[*] )
+(decl rv_vredminu_vs (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vredminu_vs vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VredminuVS) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vredmaxu.vs` instruction.
+;;
+;; vd[0] = maxu( vs1[0] , vs2[*] )
+(decl rv_vredmaxu_vs (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vredmaxu_vs vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VredmaxuVS) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrgather.vv` instruction.
+;;
+;; vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
+(decl rv_vrgather_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vrgather_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrgatherVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrgather.vx` instruction.
+;;
+;; vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[x[rs1]]
+(decl rv_vrgather_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vrgather_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VrgatherVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vrgather.vi` instruction.
+(decl rv_vrgather_vi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vrgather_vi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VrgatherVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vcompress.vm` instruction.
+;;
+;; The vector compress instruction allows elements selected by a vector mask
+;; register from a source vector register group to be packed into contiguous
+;; elements at the start of the destination vector register group.
+;;
+;; The mask register is specified through vs1
+(decl rv_vcompress_vm (VReg VReg VState) VReg)
+(rule (rv_vcompress_vm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VcompressVM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmseq.vv` (Vector Mask Set If Equal) instruction.
+(decl rv_vmseq_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmseq_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmseqVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmseq.vx` (Vector Mask Set If Equal) instruction.
+(decl rv_vmseq_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmseq_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmseqVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmseq.vi` (Vector Mask Set If Equal) instruction.
+(decl rv_vmseq_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmseq_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmseqVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsne.vv` (Vector Mask Set If Not Equal) instruction.
+(decl rv_vmsne_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsne_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsneVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsne.vx` (Vector Mask Set If Not Equal) instruction.
+(decl rv_vmsne_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsne_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsneVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsne.vi` (Vector Mask Set If Not Equal) instruction.
+(decl rv_vmsne_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsne_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsneVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsltu.vv` (Vector Mask Set If Less Than, Unsigned) instruction.
+(decl rv_vmsltu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsltu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsltu.vx` (Vector Mask Set If Less Than, Unsigned) instruction.
+(decl rv_vmsltu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsltu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmslt.vv` (Vector Mask Set If Less Than) instruction.
+(decl rv_vmslt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmslt_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmslt.vx` (Vector Mask Set If Less Than) instruction.
+(decl rv_vmslt_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmslt_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsltVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsleu.vv` (Vector Mask Set If Less Than or Equal, Unsigned) instruction.
+(decl rv_vmsleu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsleu_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleuVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsleu.vx` (Vector Mask Set If Less Than or Equal, Unsigned) instruction.
+(decl rv_vmsleu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsleu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsleu.vi` (Vector Mask Set If Less Than or Equal, Unsigned) instruction.
+(decl rv_vmsleu_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsleu_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleuVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsle.vv` (Vector Mask Set If Less Than or Equal) instruction.
+(decl rv_vmsle_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsle_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsle.vx` (Vector Mask Set If Less Than or Equal) instruction.
+(decl rv_vmsle_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsle_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsleVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsle.vi` (Vector Mask Set If Less Than or Equal) instruction.
+(decl rv_vmsle_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsle_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsleVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than, Unsigned) instruction.
+;; This is an alias for `vmsltu.vv` with the operands inverted.
+(decl rv_vmsgtu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsgtu_vv vs2 vs1 mask vstate) (rv_vmsltu_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmsgtu.vx` (Vector Mask Set If Greater Than, Unsigned) instruction.
+(decl rv_vmsgtu_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsgtu_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsgtuVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsgtu.vi` (Vector Mask Set If Greater Than, Unsigned) instruction.
+(decl rv_vmsgtu_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsgtu_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtuVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsgt.vv` (Vector Mask Set If Greater Than) instruction.
+;; This is an alias for `vmslt.vv` with the operands inverted.
+(decl rv_vmsgt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsgt_vv vs2 vs1 mask vstate) (rv_vmslt_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmsgt.vx` (Vector Mask Set If Greater Than) instruction.
+(decl rv_vmsgt_vx (VReg XReg VecOpMasking VState) VReg)
+(rule (rv_vmsgt_vx vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmsgtVX) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmsgt.vi` (Vector Mask Set If Greater Than) instruction.
+(decl rv_vmsgt_vi (VReg Imm5 VecOpMasking VState) VReg)
+(rule (rv_vmsgt_vi vs2 imm mask vstate)
+  (vec_alu_rr_imm5 (VecAluOpRRImm5.VmsgtVI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmsgeu.vv` (Vector Mask Set If Greater Than or Equal, Unsigned) instruction.
+;; This is an alias for `vmsleu.vv` with the operands inverted.
+(decl rv_vmsgeu_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsgeu_vv vs2 vs1 mask vstate) (rv_vmsleu_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmsge.vv` (Vector Mask Set If Greater Than or Equal) instruction.
+;; This is an alias for `vmsle.vv` with the operands inverted.
+(decl rv_vmsge_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmsge_vv vs2 vs1 mask vstate) (rv_vmsle_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfeq.vv` (Vector Mask Set If Float Equal) instruction.
+(decl rv_vmfeq_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfeq_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfeqVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfeq.vf` (Vector Mask Set If Float Equal) instruction.
+(decl rv_vmfeq_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfeq_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfeqVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfne.vv` (Vector Mask Set If Float Not Equal) instruction.
+(decl rv_vmfne_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfne_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfneVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfne.vf` (Vector Mask Set If Float Not Equal) instruction.
+(decl rv_vmfne_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfne_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfneVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmflt.vv` (Vector Mask Set If Float Less Than) instruction.
+(decl rv_vmflt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmflt_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfltVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmflt.vf` (Vector Mask Set If Float Less Than) instruction.
+(decl rv_vmflt_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmflt_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfltVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfle.vv` (Vector Mask Set If Float Less Than Or Equal) instruction.
+(decl rv_vmfle_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfle_vv vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfleVV) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfle.vf` (Vector Mask Set If Float Less Than Or Equal) instruction.
+(decl rv_vmfle_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfle_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfleVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfgt.vv` (Vector Mask Set If Float Greater Than) instruction.
+;; This is an alias for `vmflt.vv` with the operands inverted.
+(decl rv_vmfgt_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfgt_vv vs2 vs1 mask vstate) (rv_vmflt_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfgt.vf` (Vector Mask Set If Float Greater Than) instruction.
+(decl rv_vmfgt_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfgt_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfgtVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vmfge.vv` (Vector Mask Set If Float Greater Than Or Equal) instruction.
+;; This is an alias for `vmfle.vv` with the operands inverted.
+(decl rv_vmfge_vv (VReg VReg VecOpMasking VState) VReg)
+(rule (rv_vmfge_vv vs2 vs1 mask vstate) (rv_vmfle_vv vs1 vs2 mask vstate))
+
+;; Helper for emitting the `vmfge.vf` (Vector Mask Set If Float Greater Than Or Equal) instruction.
+(decl rv_vmfge_vf (VReg FReg VecOpMasking VState) VReg)
+(rule (rv_vmfge_vf vs2 vs1 mask vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmfgeVF) vs2 vs1 mask vstate))
+
+;; Helper for emitting the `vzext.vf2` instruction.
+;; Zero-extend SEW/2 source to SEW destination
+(decl rv_vzext_vf2 (VReg VecOpMasking VState) VReg)
+(rule (rv_vzext_vf2 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VzextVF2) vs mask vstate))
+
+;; Helper for emitting the `vzext.vf4` instruction.
+;; Zero-extend SEW/4 source to SEW destination
+(decl rv_vzext_vf4 (VReg VecOpMasking VState) VReg)
+(rule (rv_vzext_vf4 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VzextVF4) vs mask vstate))
+
+;; Helper for emitting the `vzext.vf8` instruction.
+;; Zero-extend SEW/8 source to SEW destination
+(decl rv_vzext_vf8 (VReg VecOpMasking VState) VReg)
+(rule (rv_vzext_vf8 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VzextVF8) vs mask vstate))
+
+;; Helper for emitting the `vsext.vf2` instruction.
+;; Sign-extend SEW/2 source to SEW destination
+(decl rv_vsext_vf2 (VReg VecOpMasking VState) VReg)
+(rule (rv_vsext_vf2 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VsextVF2) vs mask vstate))
+
+;; Helper for emitting the `vsext.vf4` instruction.
+;; Sign-extend SEW/4 source to SEW destination
+(decl rv_vsext_vf4 (VReg VecOpMasking VState) VReg)
+(rule (rv_vsext_vf4 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VsextVF4) vs mask vstate))
+
+;; Helper for emitting the `vsext.vf8` instruction.
+;; Sign-extend SEW/8 source to SEW destination
+(decl rv_vsext_vf8 (VReg VecOpMasking VState) VReg)
+(rule (rv_vsext_vf8 vs mask vstate)
+  (vec_alu_rr (VecAluOpRR.VsextVF8) vs mask vstate))
+
+;; Helper for emitting the `vnclip.wi` instruction.
+;;
+;; vd[i] = clip(roundoff_signed(vs2[i], uimm))
+(decl rv_vnclip_wi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vnclip_wi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipWI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vnclipu.wi` instruction.
+;;
+;; vd[i] = clip(roundoff_unsigned(vs2[i], uimm))
+(decl rv_vnclipu_wi (VReg UImm5 VecOpMasking VState) VReg)
+(rule (rv_vnclipu_wi vs2 imm mask vstate)
+  (vec_alu_rr_uimm5 (VecAluOpRRImm5.VnclipuWI) vs2 imm mask vstate))
+
+;; Helper for emitting the `vmand.mm` (Mask Bitwise AND) instruction.
+;;
+;; vd.mask[i] = vs2.mask[i] &&  vs1.mask[i]
+(decl rv_vmand_mm (VReg VReg VState) VReg)
+(rule (rv_vmand_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmandMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmor.mm` (Mask Bitwise OR) instruction.
+;;
+;; vd.mask[i] = vs2.mask[i] ||  vs1.mask[i]
+(decl rv_vmor_mm (VReg VReg VState) VReg)
+(rule (rv_vmor_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmorMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmnand.mm` (Mask Bitwise NAND) instruction.
+;;
+;; vd.mask[i] = !(vs2.mask[i] &&  vs1.mask[i])
+(decl rv_vmnand_mm (VReg VReg VState) VReg)
+(rule (rv_vmnand_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmnandMM) vs2 vs1 (unmasked) vstate))
+
+;; Helper for emitting the `vmnot.m` (Mask Bitwise NOT) instruction.
+;; This is an alias for `vmnand.mm vd, vs, vs`
+;;
+;; vd.mask[i] = !vs.mask[i]
+(decl rv_vmnot_m (VReg VState) VReg)
+(rule (rv_vmnot_m vs vstate) (rv_vmnand_mm vs vs vstate))
+
+;; Helper for emitting the `vmnor.mm` (Mask Bitwise NOR) instruction.
+;;
+;; vd.mask[i] = !(vs2.mask[i] ||  vs1.mask[i])
+(decl rv_vmnor_mm (VReg VReg VState) VReg)
+(rule (rv_vmnor_mm vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VmnorMM) vs2 vs1 (unmasked) vstate))
+
+;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl gen_extractlane (Type VReg u8) Reg)
+
+;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly.
+(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
+  (if (ty_vector_float ty))
+  (rv_vfmv_fs src ty))
+
+;; When extracting lane 0 for integers, we can use `vmv.x.s` directly.
+(rule 2 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
+  (if (ty_vector_not_float ty))
+  (rv_vmv_xs src ty))
+
+;; In the general case, we must first use a `vslidedown` to place the correct lane
+;; in index 0, and then use the appropriate `vmv` instruction.
+;; If the index fits into a 5-bit immediate, we can emit a `vslidedown.vi`.
+(rule 1 (gen_extractlane (ty_vec_fits_in_register ty) src (uimm5_from_u8 idx))
+  (gen_extractlane ty (rv_vslidedown_vi src idx (unmasked) ty) 0))
+
+;; Otherwise lower it into an X register.
+(rule 0 (gen_extractlane (ty_vec_fits_in_register ty) src idx)
+  (gen_extractlane ty (rv_vslidedown_vx src (imm $I64 idx) (unmasked) ty) 0))
+
+
+;; Build a vector mask from a u64
+;; TODO(#6571): We should merge this with the `vconst` rules, and take advantage of
+;; the other existing `vconst` rules.
+(decl gen_vec_mask (u64) VReg)
+
+;; When the immediate fits in a 5-bit immediate, we can use `vmv.v.i` directly.
+(rule 1 (gen_vec_mask (imm5_from_u64 imm))
+  (rv_vmv_vi imm (vstate_from_type $I64X2)))
+
+;; Materialize the mask into an X register, and move it into the bottom of
+;; the vector register.
+(rule 0 (gen_vec_mask mask)
+  (rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2)))
+
+
+;; Loads a `VCodeConstant` value into a vector register. For some special `VCodeConstant`s
+;; we can use a dedicated instruction, otherwise we load the value from the pool.
+;;
+;; Type is the preferred type to use when loading the constant.
+(decl gen_constant (Type VCodeConstant) VReg)
+
+;; The fallback case is to load the constant from the pool.
+(rule (gen_constant ty n)
+  (vec_load
+    (element_width_from_type ty)
+    (VecAMode.UnitStride (gen_const_amode n))
+    (mem_flags_trusted)
+    (unmasked)
+    ty))
+
+
+;; Emits a vslidedown instruction that moves half the lanes down.
+(decl gen_slidedown_half (Type VReg) VReg)
+
+;; If the lane count can fit in a 5-bit immediate, we can use `vslidedown.vi`.
+(rule 1 (gen_slidedown_half (ty_vec_fits_in_register ty) src)
+  (if-let (uimm5_from_u64 amt) (u64_udiv (ty_lane_count ty) 2))
+  (rv_vslidedown_vi src amt (unmasked) ty))
+
+;; Otherwise lower it into an X register.
+(rule 0 (gen_slidedown_half (ty_vec_fits_in_register ty) src)
+  (if-let amt (u64_udiv (ty_lane_count ty) 2))
+  (rv_vslidedown_vx src (imm $I64 amt) (unmasked) ty))
+
+
+;; Expands a mask into SEW wide lanes. Enabled lanes are set to all ones, disabled
+;; lanes are set to all zeros.
+(decl gen_expand_mask (Type VReg) VReg)
+(rule (gen_expand_mask ty mask)
+  (if-let zero (i8_to_imm5 0))
+  (if-let neg1 (i8_to_imm5 -1))
+  (rv_vmerge_vim (rv_vmv_vi zero ty) neg1 mask ty))
+
+
+;; Builds a vector mask corresponding to the IntCC operation.
+;; TODO: We are still missing some rules here for immediates. See #6623
+(decl gen_icmp_mask (Type IntCC Value Value) VReg)
+
+;; IntCC.Equal
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y)
+  (rv_vmseq_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x (splat y))
+  (rv_vmseq_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) (splat x) y)
+  (rv_vmseq_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y)
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vmseq_vi x y_imm (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.Equal) x y)
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vmseq_vi y x_imm (unmasked) ty))
+
+;; IntCC.NotEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y)
+  (rv_vmsne_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x (splat y))
+  (rv_vmsne_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) (splat x) y)
+  (rv_vmsne_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y)
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vmsne_vi x y_imm (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.NotEqual) x y)
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vmsne_vi y x_imm (unmasked) ty))
+
+;; IntCC.UnsignedLessThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y)
+  (rv_vmsltu_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x (splat y))
+  (rv_vmsltu_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) (splat x) y)
+  (rv_vmsgtu_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThan) x y)
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vmsgtu_vi y x_imm (unmasked) ty))
+
+;; IntCC.SignedLessThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y)
+  (rv_vmslt_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x (splat y))
+  (rv_vmslt_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) (splat x) y)
+  (rv_vmsgt_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThan) x y)
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vmsgt_vi y x_imm (unmasked) ty))
+
+;; IntCC.UnsignedLessThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y)
+  (rv_vmsleu_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x (splat y))
+  (rv_vmsleu_vx x y (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedLessThanOrEqual) x y)
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vmsleu_vi x y_imm (unmasked) ty))
+
+;; IntCC.SignedLessThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y)
+  (rv_vmsle_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x (splat y))
+  (rv_vmsle_vx x y (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedLessThanOrEqual) x y)
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vmsle_vi x y_imm (unmasked) ty))
+
+;; IntCC.UnsignedGreaterThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y)
+  (rv_vmsgtu_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x (splat y))
+  (rv_vmsgtu_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) (splat x) y)
+  (rv_vmsltu_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThan) x y)
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vmsgtu_vi x y_imm (unmasked) ty))
+
+;; IntCC.SignedGreaterThan
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y)
+  (rv_vmsgt_vv x y (unmasked) ty))
+
+(rule 1 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x (splat y))
+  (rv_vmsgt_vx x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) (splat x) y)
+  (rv_vmslt_vx y x (unmasked) ty))
+
+(rule 3 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThan) x y)
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vmsgt_vi x y_imm (unmasked) ty))
+
+;; IntCC.UnsignedGreaterThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y)
+  (rv_vmsgeu_vv x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) (splat x) y)
+  (rv_vmsleu_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.UnsignedGreaterThanOrEqual) x y)
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vmsleu_vi y x_imm (unmasked) ty))
+
+;; IntCC.SignedGreaterThanOrEqual
+
+(rule 0 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y)
+  (rv_vmsge_vv x y (unmasked) ty))
+
+(rule 2 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) (splat x) y)
+  (rv_vmsle_vx y x (unmasked) ty))
+
+(rule 4 (gen_icmp_mask (ty_vec_fits_in_register ty) (IntCC.SignedGreaterThanOrEqual) x y)
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vmsle_vi y x_imm (unmasked) ty))
+
+
+
+;; Builds a vector mask corresponding to the FloatCC operation.
+(decl gen_fcmp_mask (Type FloatCC Value Value) VReg)
+
+;; FloatCC.Equal
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x y)
+  (rv_vmfeq_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) x (splat y))
+  (rv_vmfeq_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Equal) (splat x) y)
+  (rv_vmfeq_vf y x (unmasked) ty))
+
+;; FloatCC.NotEqual
+;; Note: This is UnorderedNotEqual. It is the only unordered comparison that is not named as such.
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x y)
+  (rv_vmfne_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) x (splat y))
+  (rv_vmfne_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.NotEqual) (splat x) y)
+  (rv_vmfne_vf y x (unmasked) ty))
+
+;; FloatCC.LessThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x y)
+  (rv_vmflt_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) x (splat y))
+  (rv_vmflt_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThan) (splat x) y)
+  (rv_vmfgt_vf y x (unmasked) ty))
+
+;; FloatCC.LessThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x y)
+  (rv_vmfle_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) x (splat y))
+  (rv_vmfle_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.LessThanOrEqual) (splat x) y)
+  (rv_vmfge_vf y x (unmasked) ty))
+
+;; FloatCC.GreaterThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x y)
+  (rv_vmfgt_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) x (splat y))
+  (rv_vmfgt_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThan) (splat x) y)
+  (rv_vmflt_vf y x (unmasked) ty))
+
+;; FloatCC.GreaterThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x y)
+  (rv_vmfge_vv x y (unmasked) ty))
+
+(rule 1 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) x (splat y))
+  (rv_vmfge_vf x y (unmasked) ty))
+
+(rule 2 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.GreaterThanOrEqual) (splat x) y)
+  (rv_vmfle_vf y x (unmasked) ty))
+
+;; FloatCC.Ordered
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Ordered) x y)
+  (rv_vmand_mm
+    (gen_fcmp_mask ty (FloatCC.Equal) x x)
+    (gen_fcmp_mask ty (FloatCC.Equal) y y)
+    ty))
+
+;; FloatCC.Unordered
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.Unordered) x y)
+  (rv_vmor_mm
+    (gen_fcmp_mask ty (FloatCC.NotEqual) x x)
+    (gen_fcmp_mask ty (FloatCC.NotEqual) y y)
+    ty))
+
+;; FloatCC.OrderedNotEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.OrderedNotEqual) x y)
+  (rv_vmor_mm
+    (gen_fcmp_mask ty (FloatCC.LessThan) x y)
+    (gen_fcmp_mask ty (FloatCC.LessThan) y x)
+    ty))
+
+;; FloatCC.UnorderedOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrEqual) x y)
+  (rv_vmnor_mm
+    (gen_fcmp_mask ty (FloatCC.LessThan) x y)
+    (gen_fcmp_mask ty (FloatCC.LessThan) y x)
+    ty))
+
+;; FloatCC.UnorderedOrGreaterThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThan) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThanOrEqual) x y) ty))
+
+;; FloatCC.UnorderedOrGreaterThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrGreaterThanOrEqual) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.LessThan) x y) ty))
+
+;; FloatCC.UnorderedOrLessThan
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThan) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThanOrEqual) x y) ty))
+
+;; FloatCC.UnorderedOrLessThanOrEqual
+
+(rule 0 (gen_fcmp_mask (ty_vec_fits_in_register ty) (FloatCC.UnorderedOrLessThanOrEqual) x y)
+  (rv_vmnot_m (gen_fcmp_mask ty (FloatCC.GreaterThan) x y) ty))
+
+
+;; Emits a `vfcvt.x.f.v` instruction with the given rounding mode.
+(decl gen_vfcvt_x_f (VReg FRM VState) VReg)
+
+;; We have a special instruction for RTZ
+(rule 1 (gen_vfcvt_x_f x (FRM.RTZ) vstate)
+  (rv_vfcvt_rtz_x_f_v x (unmasked) vstate))
+
+;; In the general case we need to first switch into the appropriate rounding mode.
+(rule 0 (gen_vfcvt_x_f x frm vstate)
+  (let (;; Set the rounding mode and save the current mode
+        (saved_frm XReg (rv_fsrmi frm))
+        (res VReg (rv_vfcvt_x_f_v x (unmasked) vstate))
+        ;; Restore the previous rounding mode
+        (_ Unit (rv_fsrm saved_frm)))
+    res))
+
+
+;; Returns the maximum value integer value that can be represented by a float
+(decl float_int_max (Type) u64)
+(rule (float_int_max $F32) 0x4B000000)
+(rule (float_int_max $F64) 0x4330000000000000)
+
+;; Builds the instruction sequence to round a vector register to FRM
+(decl gen_vec_round (VReg FRM Type) VReg)
+
+;; For floating-point round operations, if the input is NaN, +/-infinity, or +/-0, the
+;; same input is returned as the rounded result; this differs from behavior of
+;; RISCV fcvt instructions (which round out-of-range values to the nearest
+;; max or min value), therefore special handling is needed for these values.
+(rule (gen_vec_round x frm (ty_vec_fits_in_register ty))
+  (let ((scalar_ty Type (lane_type ty))
+        ;; if x is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
+        ;; in mantissa, the result is the same as src, build a mask for those cases.
+        ;; (There is an additional fixup for NaN's at the end)
+        (abs VReg (rv_vfabs_v x (unmasked) ty))
+        (max FReg (imm scalar_ty (float_int_max scalar_ty)))
+        (exact VReg (rv_vmflt_vf abs max (unmasked) ty))
+
+        ;; The rounding is performed by converting from float to integer, with the
+        ;; desired rounding mode. And then converting back with the default rounding
+        ;; mode.
+        (int VReg (gen_vfcvt_x_f x frm ty))
+        (cvt VReg (rv_vfcvt_f_x_v int (unmasked) ty))
+        ;; Copy the sign bit from the original value.
+        (signed VReg (rv_vfsgnj_vv cvt x (unmasked) ty))
+
+        ;; We want to return a arithmetic nan if the input is a canonical nan.
+        ;; Convert them by adding 0.0 to the input.
+        (float_zero FReg (gen_bitcast (zero_reg) (float_int_of_same_size scalar_ty) scalar_ty))
+        (corrected_nan VReg (rv_vfadd_vf x float_zero (unmasked) ty)))
+    ;; Merge the original value if it does not need rounding, or the rounded value
+    (rv_vmerge_vvm corrected_nan signed exact ty)))
diff --git a/hbcb/src/lib.rs b/hbcb/src/lib.rs
new file mode 100644
index 00000000..6eb55309
--- /dev/null
+++ b/hbcb/src/lib.rs
@@ -0,0 +1,264 @@
+//! risc-v 64-bit Instruction Set Architecture.
+
+#![allow(clippy::all)]
+
+extern crate alloc;
+
+use {
+    crate::settings as riscv_settings,
+    alloc::{boxed::Box, vec::Vec},
+    core::fmt,
+    cranelift_codegen::{
+        dominator_tree::DominatorTree,
+        ir::{self, Function, Type},
+        isa::{Builder as IsaBuilder, FunctionAlignment, OwnedTargetIsa, TargetIsa},
+        machinst::{
+            compile, CompiledCode, CompiledCodeStencil, MachInst, MachTextSectionBuilder, Reg,
+            SigSet, TextSectionBuilder, VCode,
+        },
+        result::CodegenResult,
+        settings::{self as shared_settings, Flags},
+        CodegenError,
+    },
+    cranelift_control::ControlPlane,
+    target_lexicon::{Architecture, Triple},
+};
+mod abi;
+pub(crate) mod inst;
+mod lower;
+mod settings;
+use self::inst::EmitInfo;
+#[cfg(feature = "unwind")]
+use crate::isa::unwind::systemv;
+
+/// An riscv64 backend.
+pub struct Riscv64Backend {
+    triple: Triple,
+    flags: shared_settings::Flags,
+    isa_flags: riscv_settings::Flags,
+}
+
+impl Riscv64Backend {
+    /// Create a new riscv64 backend with the given (shared) flags.
+    pub fn new_with_flags(
+        triple: Triple,
+        flags: shared_settings::Flags,
+        isa_flags: riscv_settings::Flags,
+    ) -> Riscv64Backend {
+        Riscv64Backend { triple, flags, isa_flags }
+    }
+
+    /// This performs lowering to VCode, register-allocates the code, computes block layout and
+    /// finalizes branches. The result is ready for binary emission.
+    fn compile_vcode(
+        &self,
+        func: &Function,
+        domtree: &DominatorTree,
+        ctrl_plane: &mut ControlPlane,
+    ) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
+        let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone());
+        let sigs = SigSet::new::<abi::Riscv64MachineDeps>(func, &self.flags)?;
+        let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?;
+        compile::compile::<Riscv64Backend>(func, domtree, self, abi, emit_info, sigs, ctrl_plane)
+    }
+}
+
+impl TargetIsa for Riscv64Backend {
+    fn compile_function(
+        &self,
+        func: &Function,
+        domtree: &DominatorTree,
+        want_disasm: bool,
+        ctrl_plane: &mut ControlPlane,
+    ) -> CodegenResult<CompiledCodeStencil> {
+        let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?;
+
+        let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
+        let emit_result = vcode.emit(&regalloc_result, want_disasm, &self.flags, ctrl_plane);
+        let frame_size = emit_result.frame_size;
+        let value_labels_ranges = emit_result.value_labels_ranges;
+        let buffer = emit_result.buffer;
+        let sized_stackslot_offsets = emit_result.sized_stackslot_offsets;
+        let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets;
+
+        if let Some(disasm) = emit_result.disasm.as_ref() {
+            log::debug!("disassembly:\n{}", disasm);
+        }
+
+        Ok(CompiledCodeStencil {
+            buffer,
+            frame_size,
+            vcode: emit_result.disasm,
+            value_labels_ranges,
+            sized_stackslot_offsets,
+            dynamic_stackslot_offsets,
+            bb_starts: emit_result.bb_offsets,
+            bb_edges: emit_result.bb_edges,
+        })
+    }
+
+    fn name(&self) -> &'static str {
+        "riscv64"
+    }
+
+    fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 {
+        16
+    }
+
+    fn triple(&self) -> &Triple {
+        &self.triple
+    }
+
+    fn flags(&self) -> &shared_settings::Flags {
+        &self.flags
+    }
+
+    fn isa_flags(&self) -> Vec<shared_settings::Value> {
+        self.isa_flags.iter().collect()
+    }
+
+    #[cfg(feature = "unwind")]
+    fn emit_unwind_info(
+        &self,
+        result: &CompiledCode,
+        kind: crate::isa::unwind::UnwindInfoKind,
+    ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+        use crate::isa::unwind::{UnwindInfo, UnwindInfoKind};
+        Ok(match kind {
+            UnwindInfoKind::SystemV => {
+                let mapper = self::inst::unwind::systemv::RegisterMapper;
+                Some(UnwindInfo::SystemV(
+                    crate::isa::unwind::systemv::create_unwind_info_from_insts(
+                        &result.buffer.unwind_info[..],
+                        result.buffer.data().len(),
+                        &mapper,
+                    )?,
+                ))
+            }
+            UnwindInfoKind::Windows => None,
+            _ => None,
+        })
+    }
+
+    #[cfg(feature = "unwind")]
+    fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+        Some(inst::unwind::systemv::create_cie())
+    }
+
+    fn text_section_builder(&self, num_funcs: usize) -> Box<dyn TextSectionBuilder> {
+        Box::new(MachTextSectionBuilder::<inst::Inst>::new(num_funcs))
+    }
+
+    #[cfg(feature = "unwind")]
+    fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result<u16, systemv::RegisterMappingError> {
+        inst::unwind::systemv::map_reg(reg).map(|reg| reg.0)
+    }
+
+    fn function_alignment(&self) -> FunctionAlignment {
+        inst::Inst::function_alignment()
+    }
+
+    fn page_size_align_log2(&self) -> u8 {
+        debug_assert_eq!(1 << 12, 0x1000);
+        12
+    }
+
+    #[cfg(feature = "disas")]
+    fn to_capstone(&self) -> Result<capstone::Capstone, capstone::Error> {
+        use capstone::prelude::*;
+        let mut cs_builder = Capstone::new().riscv().mode(arch::riscv::ArchMode::RiscV64);
+
+        // Enable C instruction decoding if we have compressed instructions enabled.
+        //
+        // We can't enable this unconditionally because it will cause Capstone to
+        // emit weird instructions and generally mess up when it encounters unknown
+        // instructions, such as any Zba,Zbb,Zbc or Vector instructions.
+        //
+        // This causes the default disassembly to be quite unreadable, so enable
+        // it only when we are actually going to be using them.
+        let uses_compressed = self
+            .isa_flags()
+            .iter()
+            .filter(|f| ["has_zca", "has_zcb", "has_zcd"].contains(&f.name))
+            .any(|f| f.as_bool().unwrap_or(false));
+        if uses_compressed {
+            cs_builder = cs_builder.extra_mode([arch::riscv::ArchExtraMode::RiscVC].into_iter());
+        }
+
+        let mut cs = cs_builder.build()?;
+
+        // Similar to AArch64, RISC-V uses inline constants rather than a separate
+        // constant pool. We want to skip disassembly over inline constants instead
+        // of stopping on invalid bytes.
+        cs.set_skipdata(true)?;
+        Ok(cs)
+    }
+
+    fn has_native_fma(&self) -> bool {
+        true
+    }
+
+    fn has_x86_blendv_lowering(&self, _: Type) -> bool {
+        false
+    }
+
+    fn has_x86_pshufb_lowering(&self) -> bool {
+        false
+    }
+
+    fn has_x86_pmulhrsw_lowering(&self) -> bool {
+        false
+    }
+
+    fn has_x86_pmaddubsw_lowering(&self) -> bool {
+        false
+    }
+}
+
+impl fmt::Display for Riscv64Backend {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("MachBackend")
+            .field("name", &self.name())
+            .field("triple", &self.triple())
+            .field("flags", &format!("{}", self.flags()))
+            .finish()
+    }
+}
+
+/// Create a new `isa::Builder`.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+    match triple.architecture {
+        Architecture::Riscv64(..) => {}
+        _ => unreachable!(),
+    }
+    IsaBuilder { triple, setup: riscv_settings::builder(), constructor: isa_constructor }
+}
+
+fn isa_constructor(
+    triple: Triple,
+    shared_flags: Flags,
+    builder: &shared_settings::Builder,
+) -> CodegenResult<OwnedTargetIsa> {
+    let isa_flags = riscv_settings::Flags::new(&shared_flags, builder);
+
+    // The RISC-V backend does not work without at least the G extension enabled.
+    // The G extension is simply a combination of the following extensions:
+    // - I: Base Integer Instruction Set
+    // - M: Integer Multiplication and Division
+    // - A: Atomic Instructions
+    // - F: Single-Precision Floating-Point
+    // - D: Double-Precision Floating-Point
+    // - Zicsr: Control and Status Register Instructions
+    // - Zifencei: Instruction-Fetch Fence
+    //
+    // Ensure that those combination of features is enabled.
+    if !isa_flags.has_g() {
+        return Err(CodegenError::Unsupported(
+            "The RISC-V Backend currently requires all the features in the G Extension enabled"
+                .into(),
+        ));
+    }
+
+    let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags);
+    Ok(backend.wrapped())
+}
diff --git a/hbcb/src/lower.isle b/hbcb/src/lower.isle
new file mode 100644
index 00000000..fff894e9
--- /dev/null
+++ b/hbcb/src/lower.isle
@@ -0,0 +1,2966 @@
+;; riscv64 instruction selection and CLIF-to-MachInst lowering.
+
+;; The main lowering constructor term: takes a clif `Inst` and returns the
+;; register(s) within which the lowered instruction's result values live.
+(decl partial lower (Inst) InstOutput)
+
+;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
+  (imm ty n))
+
+;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_supported_vec ty) (vconst n)))
+  (gen_constant ty (const_to_vconst n)))
+
+;;;; Rules for `f16const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (f16const (u16_from_ieee16 n)))
+  (imm $F16 n))
+
+;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (f32const (u32_from_ieee32 n)))
+  (imm $F32 n))
+
+;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (f64const (u64_from_ieee64 n)))
+  (imm $F64 n))
+
+;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Base case, simply adding things in registers.
+(rule -1 (lower (has_type (fits_in_32 (ty_int ty)) (iadd x y)))
+  (rv_addw x y))
+
+(rule 0 (lower (has_type $I64 (iadd x y)))
+  (rv_add x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y))))
+  (alu_rr_imm12 (select_addi ty) x y))
+
+(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y)))
+  (alu_rr_imm12 (select_addi ty) y x))
+
+;; Special case when one of the operands is uextended
+;; Needs `Zba`
+(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32)))))
+  (if-let $true (has_zba))
+  (rv_adduw y x))
+
+(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y)))
+  (if-let $true (has_zba))
+  (rv_adduw x y))
+
+;; Add with const shift. We have a few of these instructions with `Zba`.
+(decl pure partial match_shnadd (Imm64) AluOPRRR)
+(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add))
+(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add))
+(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add))
+
+(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n))))))
+  (if-let $true (has_zba))
+  (if-let shnadd (match_shnadd n))
+  (alu_rrr shnadd y x))
+
+(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y)))
+  (if-let $true (has_zba))
+  (if-let shnadd (match_shnadd n))
+  (alu_rrr shnadd x y))
+
+
+;; Add with uextended const shift. We have a few of these instructions with `Zba`.
+;;
+;; !!! Important !!!
+;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))!
+;; Getting this wrong means a potential misscalculation of the shift amount.
+;; Additionally we can only ensure that this is correct if the uextend is 32 to 64 bits.
+(decl pure partial match_shnadd_uw (Imm64) AluOPRRR)
+(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw))
+(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw))
+(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw))
+
+(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n))))))
+  (if-let $true (has_zba))
+  (if-let shnadd_uw (match_shnadd_uw n))
+  (alu_rrr shnadd_uw y x))
+
+(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y)))
+  (if-let $true (has_zba))
+  (if-let shnadd_uw (match_shnadd_uw n))
+  (alu_rrr shnadd_uw x y))
+
+;; I128 cases
+(rule 7 (lower (has_type $I128 (iadd x y)))
+  (let ((low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0)))
+        ;; compute carry.
+        (carry XReg (rv_sltu low (value_regs_get y 0)))
+        ;;
+        (high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1)))
+        ;; add carry.
+        (high XReg (rv_add high_tmp carry)))
+    (value_regs low high)))
+
+;; SIMD Vectors
+(rule 8 (lower (has_type (ty_supported_vec ty) (iadd x y)))
+  (rv_vadd_vv x y (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (iadd x (splat y))))
+  (rv_vadd_vx x y (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (splat (sextend y @ (value_type sext_ty))))))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) sext_ty))
+  (rv_vwadd_wx x y (unmasked) (vstate_mf2 half_ty)))
+
+(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (splat (uextend y @ (value_type uext_ty))))))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) uext_ty))
+  (rv_vwaddu_wx x y (unmasked) (vstate_mf2 half_ty)))
+
+(rule 20 (lower (has_type (ty_supported_vec ty) (iadd x y)))
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vadd_vi x y_imm (unmasked) ty))
+
+
+(rule 12 (lower (has_type (ty_supported_vec ty) (iadd (splat x) y)))
+  (rv_vadd_vx y x (unmasked) ty))
+
+(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (splat (sextend x @ (value_type sext_ty))) y)))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) sext_ty))
+  (rv_vwadd_wx y x (unmasked) (vstate_mf2 half_ty)))
+
+(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (splat (uextend x @ (value_type uext_ty))) y)))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) uext_ty))
+  (rv_vwaddu_wx y x (unmasked) (vstate_mf2 half_ty)))
+
+(rule 21 (lower (has_type (ty_supported_vec ty) (iadd x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vadd_vi y x_imm (unmasked) ty))
+
+;; Signed Widening Low Additions
+
+(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (swiden_low y @ (value_type in_ty)))))
+  (rv_vwadd_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 12 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty)) y)))
+  (rv_vwadd_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty))
+                                                            (swiden_low y))))
+  (rv_vwadd_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty))
+                                                            (splat (sextend y @ (value_type sext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwadd_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (sextend x @ (value_type sext_ty)))
+                                                            (swiden_low y @ (value_type in_ty)))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwadd_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Signed Widening High Additions
+;; These are the same as the low additions, but we first slide down the inputs.
+
+(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (swiden_high y @ (value_type in_ty)))))
+  (rv_vwadd_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 12 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty)) y)))
+  (rv_vwadd_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty))
+                                                            (swiden_high y))))
+  (rv_vwadd_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty))
+                                                            (splat (sextend y @ (value_type sext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwadd_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (sextend x @ (value_type sext_ty)))
+                                                            (swiden_high y @ (value_type in_ty)))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwadd_vx (gen_slidedown_half in_ty y) x (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening Low Additions
+
+(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (uwiden_low y @ (value_type in_ty)))))
+  (rv_vwaddu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 12 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty)) y)))
+  (rv_vwaddu_wv y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty))
+                                                            (uwiden_low y))))
+  (rv_vwaddu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty))
+                                                            (splat (uextend y @ (value_type uext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwaddu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (uextend x @ (value_type uext_ty)))
+                                                            (uwiden_low y @ (value_type in_ty)))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwaddu_vx y x (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening High Additions
+;; These are the same as the low additions, but we first slide down the inputs.
+
+(rule 9 (lower (has_type (ty_supported_vec _) (iadd x (uwiden_high y @ (value_type in_ty)))))
+  (rv_vwaddu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 12 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty)) y)))
+  (rv_vwaddu_wv y (gen_slidedown_half in_ty x) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty))
+                                                            (uwiden_high y))))
+  (rv_vwaddu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty))
+                                                            (splat (uextend y @ (value_type uext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 15 (lower (has_type (ty_supported_vec _) (iadd (splat (uextend y @ (value_type uext_ty)))
+                                                            (uwiden_high x @ (value_type in_ty)))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwaddu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Signed Widening Mixed High/Low Additions
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_low x @ (value_type in_ty))
+                                                            (swiden_high y))))
+  (rv_vwadd_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (swiden_high x @ (value_type in_ty))
+                                                            (swiden_low y))))
+  (rv_vwadd_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening Mixed High/Low Additions
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_low x @ (value_type in_ty))
+                                                            (uwiden_high y))))
+  (rv_vwaddu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 13 (lower (has_type (ty_supported_vec _) (iadd (uwiden_high x @ (value_type in_ty))
+                                                            (uwiden_low y))))
+  (rv_vwaddu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Fused Multiply Accumulate Rules `vmacc`
+;;
+;; I dont think we can use `vmadd`/`vmnsub` here since it just modifies the multiplication
+;; register instead of the addition one. The actual pattern matched seems to be
+;; exactly the same.
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (iadd x (imul y z))))
+  (rv_vmacc_vv x y z (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (imul y (splat z)))))
+  (rv_vmacc_vx x y z (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_supported_vec ty) (iadd x (imul (splat y) z))))
+  (rv_vmacc_vx x z y (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_supported_vec ty) (iadd (imul x y) z)))
+  (rv_vmacc_vv z x y (unmasked) ty))
+
+(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (imul x (splat y)) z)))
+  (rv_vmacc_vx z x y (unmasked) ty))
+
+(rule 14 (lower (has_type (ty_supported_vec ty) (iadd (imul (splat x) y) z)))
+  (rv_vmacc_vx z y x (unmasked) ty))
+
+;; Fused Multiply Subtract Rules `vnmsac`
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (iadd x (ineg (imul y z)))))
+  (rv_vnmsac_vv x y z (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_supported_vec ty) (iadd x (ineg (imul y (splat z))))))
+  (rv_vnmsac_vx x y z (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_supported_vec ty) (iadd x (ineg (imul (splat y) z)))))
+  (rv_vnmsac_vx x z y (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_supported_vec ty) (iadd (ineg (imul x y)) z)))
+  (rv_vnmsac_vv z x y (unmasked) ty))
+
+(rule 13 (lower (has_type (ty_supported_vec ty) (iadd (ineg (imul x (splat y))) z)))
+  (rv_vnmsac_vx z x y (unmasked) ty))
+
+(rule 14 (lower (has_type (ty_supported_vec ty) (iadd (ineg (imul (splat x) y)) z)))
+  (rv_vnmsac_vx z y x (unmasked) ty))
+
+;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_32 ty) (uadd_overflow_trap x y tc)))
+  (let ((tmp_x XReg (zext x))
+        (tmp_y XReg (zext y))
+        (sum XReg (rv_add tmp_x tmp_y))
+        (test XReg (rv_srli sum (imm12_const (ty_bits ty))))
+        (_ InstOutput (gen_trapnz test tc)))
+    sum))
+
+(rule 1 (lower (has_type $I64 (uadd_overflow_trap x y tc)))
+  (let ((tmp XReg (rv_add x y))
+        (_ InstOutput (gen_trapif (IntCC.UnsignedLessThan) tmp x tc)))
+    tmp))
+
+;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Base case, simply subtracting things in registers.
+
+(rule 0 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
+  (rv_subw x y))
+
+(rule 1 (lower (has_type $I64 (isub x y)))
+  (rv_sub x y))
+
+(rule 2 (lower (has_type $I128 (isub x y)))
+  (i128_sub x y))
+
+;; Switch to an `addi` by a negative if we can fit the value in an `imm12`.
+(rule 3 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
+  (if-let imm12_neg (imm12_from_negated_value y))
+  (alu_rr_imm12 (select_addi ty) x imm12_neg))
+
+;; SIMD Vectors
+(rule 4 (lower (has_type (ty_supported_vec ty) (isub x y)))
+  (rv_vsub_vv x y (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_supported_vec ty) (isub x (splat y))))
+  (rv_vsub_vx x y (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_supported_vec ty) (isub x (splat (sextend y @ (value_type sext_ty))))))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) sext_ty))
+  (rv_vwsub_wx x y (unmasked) (vstate_mf2 half_ty)))
+
+(rule 6 (lower (has_type (ty_supported_vec ty) (isub x (splat (uextend y @ (value_type uext_ty))))))
+  (if-let half_ty (ty_half_width ty))
+  (if-let $true (ty_equal (lane_type half_ty) uext_ty))
+  (rv_vwsubu_wx x y (unmasked) (vstate_mf2 half_ty)))
+
+(rule 7 (lower (has_type (ty_supported_vec ty) (isub (splat x) y)))
+  (rv_vrsub_vx y x (unmasked) ty))
+
+(rule 8 (lower (has_type (ty_supported_vec ty) (isub x y)))
+  (if-let imm5_neg (negated_replicated_imm5 y))
+  (rv_vadd_vi x imm5_neg (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (isub x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vrsub_vi y x_imm (unmasked) ty))
+
+
+;; Signed Widening Low Subtractions
+
+(rule 6 (lower (has_type (ty_supported_vec _) (isub x (swiden_low y @ (value_type in_ty)))))
+  (rv_vwsub_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_low x @ (value_type in_ty))
+                                                           (swiden_low y))))
+  (rv_vwsub_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_low x @ (value_type in_ty))
+                                                           (splat (sextend y @ (value_type sext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwsub_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Signed Widening High Subtractions
+;; These are the same as the low widenings, but we first slide down the inputs.
+
+(rule 6 (lower (has_type (ty_supported_vec _) (isub x (swiden_high y @ (value_type in_ty)))))
+  (rv_vwsub_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_high x @ (value_type in_ty))
+                                                           (swiden_high y))))
+  (rv_vwsub_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_high x @ (value_type in_ty))
+                                                           (splat (sextend y @ (value_type sext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) sext_ty))
+  (rv_vwsub_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening Low Subtractions
+
+(rule 6 (lower (has_type (ty_supported_vec _) (isub x (uwiden_low y @ (value_type in_ty)))))
+  (rv_vwsubu_wv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_low x @ (value_type in_ty))
+                                                           (uwiden_low y))))
+  (rv_vwsubu_vv x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_low x @ (value_type in_ty))
+                                                           (splat (uextend y @ (value_type uext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwsubu_vx x y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening High Subtractions
+;; These are the same as the low widenings, but we first slide down the inputs.
+
+(rule 6 (lower (has_type (ty_supported_vec _) (isub x (uwiden_high y @ (value_type in_ty)))))
+  (rv_vwsubu_wv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_high x @ (value_type in_ty))
+                                                           (uwiden_high y))))
+  (rv_vwsubu_vv (gen_slidedown_half in_ty x) (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_high x @ (value_type in_ty))
+                                                           (splat (uextend y @ (value_type uext_ty))))))
+  (if-let $true (ty_equal (lane_type in_ty) uext_ty))
+  (rv_vwsubu_vx (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Signed Widening Mixed High/Low Subtractions
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_low x @ (value_type in_ty))
+                                                           (swiden_high y))))
+  (rv_vwsub_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (swiden_high x @ (value_type in_ty))
+                                                           (swiden_low y))))
+  (rv_vwsub_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+;; Unsigned Widening Mixed High/Low Subtractions
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_low x @ (value_type in_ty))
+                                                           (uwiden_high y))))
+  (rv_vwsubu_vv x (gen_slidedown_half in_ty y) (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+(rule 10 (lower (has_type (ty_supported_vec _) (isub (uwiden_high x @ (value_type in_ty))
+                                                           (uwiden_low y))))
+  (rv_vwsubu_vv (gen_slidedown_half in_ty x) y (unmasked) (vstate_mf2 (ty_half_lanes in_ty))))
+
+
+;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_int ty) (ineg val)))
+  (neg ty val))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (ineg x)))
+  (rv_vneg_v x (unmasked) ty))
+
+
+;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
+  (rv_mul x y))
+
+(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
+  (rv_mulw x y))
+
+;; for I128
+(rule 2 (lower (has_type $I128 (imul x y)))
+  (let
+    ((x_regs ValueRegs x)
+      (x_lo XReg (value_regs_get x_regs 0))
+      (x_hi XReg (value_regs_get x_regs 1))
+
+      ;; Get the high/low registers for `y`.
+      (y_regs ValueRegs y)
+      (y_lo XReg (value_regs_get y_regs 0))
+      (y_hi XReg (value_regs_get y_regs 1))
+
+      ;; 128bit mul formula:
+      ;;   dst_lo = x_lo * y_lo
+      ;;   dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
+      ;;
+      ;; We can convert the above formula into the following
+      ;; mulhu   dst_hi, x_lo, y_lo
+      ;; madd    dst_hi, x_lo, y_hi, dst_hi
+      ;; madd    dst_hi, x_hi, y_lo, dst_hi
+      ;; madd    dst_lo, x_lo, y_lo, zero
+      (dst_hi1 XReg (rv_mulhu x_lo y_lo))
+      (dst_hi2 XReg (madd x_lo y_hi dst_hi1))
+      (dst_hi XReg (madd x_hi y_lo dst_hi2))
+      (dst_lo XReg (madd x_lo y_lo (zero_reg))))
+    (value_regs dst_lo dst_hi)))
+
+;; Special case 128-bit multiplication where the operands are extended since
+;; that maps directly to the `mulhu` and `mulh` instructions.
+(rule 6 (lower (has_type $I128 (imul (uextend x) (uextend y))))
+  (let ((x XReg (zext x))
+        (y XReg (zext y)))
+    (value_regs (rv_mul x y) (rv_mulhu x y))))
+
+(rule 6 (lower (has_type $I128 (imul (sextend x) (sextend y))))
+  (let ((x XReg (sext x))
+        (y XReg (sext y)))
+    (value_regs (rv_mul x y) (rv_mulh x y))))
+
+;; Vector multiplication
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (imul x y)))
+  (rv_vmul_vv x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (imul (splat x) y)))
+  (rv_vmul_vx y x (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_supported_vec ty) (imul x (splat y))))
+  (rv_vmul_vx x y (unmasked) ty))
+
+;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
+  (lower_smlhi ty (sext x) (sext y)))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (smulhi x y)))
+  (rv_vmulh_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (smulhi (splat x) y)))
+  (rv_vmulh_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (smulhi x (splat y))))
+  (rv_vmulh_vx x y (unmasked) ty))
+
+;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_32 ty) (umulhi x y)))
+  (let ((tmp XReg (rv_mul (zext x) (zext y))))
+    (rv_srli tmp (imm12_const (ty_bits ty)))))
+
+(rule 1 (lower (has_type $I64 (umulhi x y)))
+  (rv_mulhu x y))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (umulhi x y)))
+  (rv_vmulhu_vv x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (umulhi (splat x) y)))
+  (rv_vmulhu_vx y x (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (umulhi x (splat y))))
+  (rv_vmulhu_vx x y (unmasked) ty))
+
+;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_16 ty) (udiv x y)))
+  (if-let $true (has_m))
+  (rv_divuw (zext x) (nonzero_divisor (zext y))))
+
+(rule 1 (lower (has_type (fits_in_16 ty) (udiv x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 ty imm))
+  (rv_divuw (zext x) (zext y)))
+
+(rule 2 (lower (has_type $I32 (udiv x y)))
+  (if-let $true (has_m))
+  (rv_divuw x (nonzero_divisor (zext y))))
+
+(rule 3 (lower (has_type $I32 (udiv x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I32 imm))
+  (rv_divuw x y))
+
+(rule 2 (lower (has_type $I64 (udiv x y)))
+  (if-let $true (has_m))
+  (rv_divu x (nonzero_divisor y)))
+
+(rule 3 (lower (has_type $I64 (udiv x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I64 imm))
+  (rv_divu x y))
+
+;; Traps if the input register is zero, otherwise returns the same register.
+(decl nonzero_divisor (XReg) XReg)
+(rule (nonzero_divisor val)
+  (let ((_ InstOutput (gen_trapif (IntCC.Equal) val (zero_reg) (TrapCode.IntegerDivisionByZero))))
+    val))
+
+;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_16 ty) (sdiv x y)))
+  (if-let $true (has_m))
+  (let ((x XReg (sext x)))
+    (rv_divw x (safe_sdiv_divisor ty x (sext y)))))
+
+(rule 1 (lower (has_type (fits_in_16 ty) (sdiv x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 ty imm))
+  (rv_divw (sext x) (sext y)))
+
+(rule 2 (lower (has_type $I32 (sdiv x y)))
+  (if-let $true (has_m))
+  (let ((x XReg (sext x)))
+    (rv_divw x (safe_sdiv_divisor $I32 x (sext y)))))
+
+(rule 3 (lower (has_type $I32 (sdiv x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I32 imm))
+  (rv_divw x y))
+
+(rule 2 (lower (has_type $I64 (sdiv x y)))
+  (if-let $true (has_m))
+  (rv_div x (safe_sdiv_divisor $I64 x y)))
+
+(rule 3 (lower (has_type $I64 (sdiv x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I64 imm))
+  (rv_div x y))
+
+;; Check for two trapping conditions:
+;;
+;; * the divisor is 0, or...
+;; * the divisor is -1 and the dividend is $ty::MIN
+(decl safe_sdiv_divisor (Type XReg XReg) XReg)
+(rule (safe_sdiv_divisor ty x y)
+  (let (
+      (y XReg (nonzero_divisor y))
+      (min XReg (imm $I64 (u64_shl 0xffffffff_ffffffff (u64_sub (ty_bits ty) 1))))
+      (x_is_not_min XReg (rv_xor x min))
+      (y_is_not_neg_one XReg (rv_not y))
+      (no_int_overflow XReg (rv_or x_is_not_min y_is_not_neg_one))
+      (_ InstOutput (gen_trapif
+                      (IntCC.Equal)
+                      no_int_overflow (zero_reg)
+                      (TrapCode.IntegerOverflow))))
+      y))
+
+;;;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_16 ty) (urem x y)))
+  (if-let $true (has_m))
+  (rv_remuw (zext x) (nonzero_divisor (zext y))))
+
+(rule 1 (lower (has_type (fits_in_16 ty) (urem x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 ty imm))
+  (rv_remuw (zext x) (zext y)))
+
+(rule 2 (lower (has_type $I32 (urem x y)))
+  (if-let $true (has_m))
+  (rv_remuw x (nonzero_divisor (zext y))))
+
+(rule 3 (lower (has_type $I32 (urem x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I32 imm))
+  (rv_remuw x y))
+
+(rule 2 (lower (has_type $I64 (urem x y)))
+  (if-let $true (has_m))
+  (rv_remu x (nonzero_divisor y)))
+
+(rule 3 (lower (has_type $I64 (urem x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I64 imm))
+  (rv_remu x y))
+
+;;;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_16 ty) (srem x y)))
+  (if-let $true (has_m))
+  (rv_remw (sext x) (nonzero_divisor (sext y))))
+
+(rule 1 (lower (has_type (fits_in_16 ty) (srem x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 ty imm))
+  (rv_remw (sext x) (sext y)))
+
+(rule 2 (lower (has_type $I32 (srem x y)))
+  (if-let $true (has_m))
+  (rv_remw x (nonzero_divisor (sext y))))
+
+(rule 3 (lower (has_type $I32 (srem x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I32 imm))
+  (rv_remw x y))
+
+(rule 2 (lower (has_type $I64 (srem x y)))
+  (if-let $true (has_m))
+  (rv_rem x (nonzero_divisor y)))
+
+(rule 3 (lower (has_type $I64 (srem x y @ (iconst imm))))
+  (if-let $true (has_m))
+  (if (safe_divisor_from_imm64 $I64 imm))
+  (rv_rem x y))
+
+;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule -1 (lower (has_type (fits_in_64 ty) (band x y)))
+  (rv_and x y))
+
+(rule 0 (lower (has_type $I128 (band x y)))
+  (value_regs
+    (rv_and (value_regs_get x 0) (value_regs_get y 0))
+    (rv_and (value_regs_get x 1) (value_regs_get y 1))))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y))))
+  (rv_andi x y))
+
+(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y)))
+  (rv_andi y x))
+
+(rule 3 (lower (has_type (ty_supported_float ty) (band x y)))
+  (lower_float_binary (AluOPRRR.And) x y ty))
+
+;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
+;; by Cranelift's `band_not` instruction that is legalized into the simpler
+;; forms early on.
+
+(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y))))
+  (if-let $true (has_zbb))
+  (rv_andn x y))
+
+(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x)))
+  (if-let $true (has_zbb))
+  (rv_andn x y))
+
+(rule 6 (lower (has_type $I128 (band x (bnot y))))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 7 (lower (has_type $I128 (band (bnot y) x)))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_andn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 8 (lower (has_type (ty_supported_vec ty) (band x y)))
+  (rv_vand_vv x y (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (band x (splat y))))
+  (if (ty_vector_not_float ty))
+  (rv_vand_vx x y (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_supported_vec ty) (band (splat x) y)))
+  (if (ty_vector_not_float ty))
+  (rv_vand_vx y x (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_supported_vec ty) (band x y)))
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vand_vi x y_imm (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_supported_vec ty) (band x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vand_vi y x_imm (unmasked) ty))
+
+;; `bclr{,i}` specializations from `zbs`
+
+(rule 13 (lower (has_type (fits_in_32 ty) (band x (bnot (ishl (i64_from_iconst 1) y)))))
+  (if-let $true (has_zbs))
+  (rv_bclr x (rv_andi y (imm12_const (u8_sub (ty_bits ty) 1)))))
+(rule 14 (lower (has_type (fits_in_32 ty) (band (bnot (ishl (i64_from_iconst 1) y)) x)))
+  (if-let $true (has_zbs))
+  (rv_bclr x (rv_andi y (imm12_const (u8_sub (ty_bits ty) 1)))))
+
+(rule 15 (lower (has_type $I64 (band x (bnot (ishl (i64_from_iconst 1) y)))))
+  (if-let $true (has_zbs))
+  (rv_bclr x y))
+(rule 16 (lower (has_type $I64 (band (bnot (ishl (i64_from_iconst 1) y)) x)))
+  (if-let $true (has_zbs))
+  (rv_bclr x y))
+
+(rule 17 (lower (has_type (fits_in_64 ty) (band x (u64_from_iconst n))))
+  (if-let $true (has_zbs))
+  (if-let imm (bclr_imm ty n))
+  (rv_bclri x imm))
+(rule 18 (lower (has_type (fits_in_64 ty) (band (u64_from_iconst n) x)))
+  (if-let $true (has_zbs))
+  (if-let imm (bclr_imm ty n))
+  (rv_bclri x imm))
+
+(decl pure partial bclr_imm (Type u64) Imm12)
+(extern constructor bclr_imm bclr_imm)
+
+;; `bext{,i}` specializations from `zbs`
+
+(rule 19 (lower (has_type $I32 (band (ushr x y) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bext x (rv_andi y (imm12_const 31))))
+(rule 19 (lower (has_type $I32 (band (sshr x y) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bext x (rv_andi y (imm12_const 31))))
+(rule 19 (lower (has_type $I32 (band (u64_from_iconst 1) (ushr x y))))
+  (if-let $true (has_zbs))
+  (rv_bext x (rv_andi y (imm12_const 31))))
+(rule 19 (lower (has_type $I32 (band (u64_from_iconst 1) (sshr x y))))
+  (if-let $true (has_zbs))
+  (rv_bext x (rv_andi y (imm12_const 31))))
+
+(rule 19 (lower (has_type $I64 (band (ushr x y) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bext x y))
+(rule 19 (lower (has_type $I64 (band (sshr x y) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bext x y))
+(rule 19 (lower (has_type $I64 (band (u64_from_iconst 1) (ushr x y))))
+  (if-let $true (has_zbs))
+  (rv_bext x y))
+(rule 19 (lower (has_type $I64 (band (u64_from_iconst 1) (sshr x y))))
+  (if-let $true (has_zbs))
+  (rv_bext x y))
+
+(rule 20 (lower (has_type $I32 (band (ushr x (imm12_from_value y)) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bexti x (imm12_and y 31)))
+(rule 20 (lower (has_type $I32 (band (sshr x (imm12_from_value y)) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bexti x (imm12_and y 31)))
+(rule 20 (lower (has_type $I64 (band (ushr x (imm12_from_value y)) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bexti x (imm12_and y 63)))
+(rule 20 (lower (has_type $I64 (band (sshr x (imm12_from_value y)) (u64_from_iconst 1))))
+  (if-let $true (has_zbs))
+  (rv_bexti x (imm12_and y 63)))
+
+;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_int ty) (bor x y)))
+  (gen_or ty x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y))))
+  (rv_ori x y))
+
+(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y)))
+  (rv_ori y x))
+
+(rule 3 (lower (has_type (ty_supported_float ty) (bor x y)))
+  (lower_float_binary (AluOPRRR.Or) x y ty))
+
+;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
+;; by Cranelift's `bor_not` instruction that is legalized into the simpler
+;; forms early on.
+
+(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y))))
+  (if-let $true (has_zbb))
+  (rv_orn x y))
+
+(rule 5 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x)))
+  (if-let $true (has_zbb))
+  (rv_orn x y))
+
+(rule 6 (lower (has_type $I128 (bor x (bnot y))))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 7 (lower (has_type $I128 (bor (bnot y) x)))
+  (if-let $true (has_zbb))
+  (let ((low XReg (rv_orn (value_regs_get x 0) (value_regs_get y 0)))
+        (high XReg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
+    (value_regs low high)))
+
+(rule 8 (lower (has_type (ty_supported_vec ty) (bor x y)))
+  (rv_vor_vv x y (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (bor x (splat y))))
+  (if (ty_vector_not_float ty))
+  (rv_vor_vx x y (unmasked) ty))
+
+(rule 10 (lower (has_type (ty_supported_vec ty) (bor (splat x) y)))
+  (if (ty_vector_not_float ty))
+  (rv_vor_vx y x (unmasked) ty))
+
+(rule 11 (lower (has_type (ty_supported_vec ty) (bor x y)))
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vor_vi x y_imm (unmasked) ty))
+
+(rule 12 (lower (has_type (ty_supported_vec ty) (bor x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vor_vi y x_imm (unmasked) ty))
+
+;; `bset{,i}` specializations from `zbs`
+
+(rule 13 (lower (has_type $I32 (bor x (ishl (i64_from_iconst 1) y))))
+  (if-let $true (has_zbs))
+  (rv_bset x (rv_andi y (imm12_const 31))))
+(rule 14 (lower (has_type $I32 (bor (ishl (i64_from_iconst 1) y) x)))
+  (if-let $true (has_zbs))
+  (rv_bset x (rv_andi y (imm12_const 31))))
+
+(rule 13 (lower (has_type $I64 (bor x (ishl (i64_from_iconst 1) y))))
+  (if-let $true (has_zbs))
+  (rv_bset x y))
+(rule 14 (lower (has_type $I64 (bor (ishl (i64_from_iconst 1) y) x)))
+  (if-let $true (has_zbs))
+  (rv_bset x y))
+
+(rule 15 (lower (has_type (fits_in_64 _) (bor x (u64_from_iconst n))))
+  (if-let $true (has_zbs))
+  (if-let imm (bseti_imm n))
+  (rv_bseti x imm))
+(rule 16 (lower (has_type (fits_in_64 _) (bor (u64_from_iconst n) x)))
+  (if-let $true (has_zbs))
+  (if-let imm (bseti_imm n))
+  (rv_bseti x imm))
+
+(decl pure partial bseti_imm (u64) Imm12)
+(extern constructor bseti_imm bseti_imm)
+
+;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
+  (rv_xor x y))
+
+;; Special cases for when one operand is an immediate that fits in 12 bits.
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y))))
+  (rv_xori x y))
+
+(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y)))
+  (rv_xori y x))
+
+(rule 3 (lower (has_type $I128 (bxor x y)))
+  (lower_b128_binary (AluOPRRR.Xor) x y))
+
+(rule 4 (lower (has_type (ty_supported_float ty) (bxor x y)))
+  (lower_float_binary (AluOPRRR.Xor) x y ty))
+
+(rule 5 (lower (has_type (ty_supported_vec ty) (bxor x y)))
+  (rv_vxor_vv x y (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_supported_vec ty) (bxor x (splat y))))
+  (if (ty_vector_not_float ty))
+  (rv_vxor_vx x y (unmasked) ty))
+
+(rule 7 (lower (has_type (ty_supported_vec ty) (bxor (splat x) y)))
+  (if (ty_vector_not_float ty))
+  (rv_vxor_vx y x (unmasked) ty))
+
+(rule 8 (lower (has_type (ty_supported_vec ty) (bxor x y)))
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vxor_vi x y_imm (unmasked) ty))
+
+(rule 9 (lower (has_type (ty_supported_vec ty) (bxor x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vxor_vi y x_imm (unmasked) ty))
+
+;; `binv{,i}` specializations from `zbs`
+
+(rule 13 (lower (has_type $I32 (bxor x (ishl (i64_from_iconst 1) y))))
+  (if-let $true (has_zbs))
+  (rv_binv x (rv_andi y (imm12_const 31))))
+(rule 14 (lower (has_type $I32 (bxor (ishl (i64_from_iconst 1) y) x)))
+  (if-let $true (has_zbs))
+  (rv_binv x (rv_andi y (imm12_const 31))))
+
+(rule 13 (lower (has_type $I64 (bxor x (ishl (i64_from_iconst 1) y))))
+  (if-let $true (has_zbs))
+  (rv_binv x y))
+(rule 14 (lower (has_type $I64 (bxor (ishl (i64_from_iconst 1) y) x)))
+  (if-let $true (has_zbs))
+  (rv_binv x y))
+
+(rule 15 (lower (has_type (fits_in_64 _) (bxor x (u64_from_iconst n))))
+  (if-let $true (has_zbs))
+  (if-let imm (binvi_imm n))
+  (rv_binvi x imm))
+(rule 16 (lower (has_type (fits_in_64 _) (bxor (u64_from_iconst n) x)))
+  (if-let $true (has_zbs))
+  (if-let imm (binvi_imm n))
+  (rv_binvi x imm))
+
+(decl pure partial binvi_imm (u64) Imm12)
+(extern constructor binvi_imm binvi_imm)
+
+;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 _) (bnot x)))
+  (rv_not x))
+
+(rule 1 (lower (has_type (ty_supported_float ty) (bnot x)))
+  (move_x_to_f (rv_not (move_f_to_x x ty)) (float_int_of_same_size ty)))
+
+(rule 2 (lower (has_type $I128 (bnot x)))
+  (value_regs
+    (rv_not (value_regs_get x 0))
+    (rv_not (value_regs_get x 1))))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (bnot x)))
+  (rv_vnot_v x (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_int_ref_scalar_64 _) (bnot (bxor x y))))
+  (if-let $true (has_zbb))
+  (rv_xnor x y))
+
+;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitrev x)))
+  (gen_bitrev ty x))
+
+(rule 1 (lower (has_type $I128 (bitrev x)))
+  (value_regs
+    (gen_bitrev $I64 (value_regs_get x 1))
+    (gen_bitrev $I64 (value_regs_get x 0))))
+
+
+;; Constructs a sequence of instructions that reverse all bits in `x` up to
+;; the given type width.
+(decl gen_bitrev (Type XReg) XReg)
+
+(rule 0 (gen_bitrev (ty_16_or_32 (ty_int ty)) x)
+  (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty))))
+  (rv_srli (gen_bitrev $I64 x) shift_amt))
+
+(rule 1 (gen_bitrev $I8 x)
+  (gen_brev8 x $I8))
+
+(rule 1 (gen_bitrev $I64 x)
+  (gen_brev8 (gen_bswap $I64 x) $I64))
+
+
+;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bswap x)))
+  (gen_bswap ty x))
+
+(rule 2 (lower (has_type $I128 (bswap x)))
+  (value_regs
+    (gen_bswap $I64 (value_regs_get x 1))
+    (gen_bswap $I64 (value_regs_get x 0))))
+
+;; Builds a sequence of instructions that swaps the bytes in `x` up to the given
+;; type width.
+(decl gen_bswap (Type XReg) XReg)
+
+;; This is only here to make the rule below work. bswap.i8 isn't valid
+(rule 0 (gen_bswap $I8 x) x)
+(rule 1 (gen_bswap (ty_int_ref_16_to_64 ty) x)
+  (if-let half_ty (ty_half_width ty))
+  (if-let half_size (u64_to_imm12 (ty_bits half_ty)))
+  (let (;; This swaps the top bytes and zeroes the bottom bytes, so that
+        ;; we can or it with the bottom bytes later.
+        (swap_top XReg (gen_bswap half_ty x))
+        (top XReg (rv_slli swap_top half_size))
+
+        ;; Get the top half, swap it, and zero extend it so we can `or` it
+        ;; with the bottom half. Note that zero extension here already knows
+        ;; that `zbb` isn't available and that `half_ty` is not `$I64`, so this
+        ;; falls back to the shift-then-shift sequence.
+        (shifted XReg (rv_srli x half_size))
+        (swap_bot XReg (gen_bswap half_ty shifted))
+        (shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits half_ty))))
+        (bot_shifted_left XReg (rv_slli swap_bot shift))
+        (bot XReg (rv_srli bot_shifted_left shift)))
+    (rv_or top bot)))
+
+(rule 2 (gen_bswap (ty_16_or_32 (ty_int ty)) x)
+  (if-let $true (has_zbb))
+  (if-let shift_amt (u64_to_imm12 (u64_sub 64 (ty_bits ty))))
+  (rv_srli (rv_rev8 x) shift_amt))
+
+(rule 3 (gen_bswap $I64 x)
+  (if-let $true (has_zbb))
+  (rv_rev8 x))
+
+;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule (lower (has_type (fits_in_64 ty) (ctz x)))
+  (lower_ctz ty x))
+
+(rule 1 (lower (has_type $I128 (ctz x)))
+  (let ((x_lo XReg (value_regs_get x 0))
+        (x_hi XReg (value_regs_get x 1))
+        ;; Count both halves
+        (high XReg (lower_ctz $I64 x_hi))
+        (low XReg (lower_ctz $I64 x_lo))
+        ;; Only add the top half if the bottom is zero
+        (high XReg (gen_select_xreg (cmp_eqz x_lo) high (zero_reg)))
+        (result XReg (rv_add low high)))
+    (value_regs result (imm $I64 0))))
+
+;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_64 ty) (clz x)))
+  (gen_cltz $true x ty))
+
+(rule 1 (lower (has_type $I128 (clz x)))
+  (let ((x_lo XReg (value_regs_get x 0))
+        (x_hi XReg (value_regs_get x 1))
+        ;; Count both halves
+        (high XReg (gen_clz x_hi))
+        (low XReg (gen_clz x_lo))
+        ;; Only add the bottom zeros if the top half is zero
+        (low XReg (gen_select_xreg (cmp_eqz x_hi) low (zero_reg))))
+    (value_regs (rv_add high low) (imm $I64 0))))
+
+(rule 2 (lower (has_type (fits_in_16 ty) (clz x)))
+  (if-let $true (has_zbb))
+  (let ((tmp XReg (zext x))
+        (count XReg (rv_clz tmp)))
+    ;; We always do the operation on the full 64-bit register, so subtract 64 from the result.
+    (rv_addi count (imm12_const_add (ty_bits ty) -64))))
+
+(rule 3 (lower (has_type $I32 (clz x)))
+  (if-let $true (has_zbb))
+  (rv_clzw x))
+
+(rule 3 (lower (has_type $I64 (clz x)))
+  (if-let $true (has_zbb))
+  (rv_clz x))
+
+(decl gen_clz (XReg) XReg)
+(rule 0 (gen_clz rs)
+  (gen_cltz $true rs $I64))
+(rule 1 (gen_clz rs)
+  (if-let $true (has_zbb))
+  (rv_clz rs))
+
+;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (fits_in_64 ty) (cls x)))
+  (let ((tmp XReg (sext x))
+        (tmp2 XReg (gen_select_xreg (cmp_ltz tmp) (rv_not tmp) tmp))
+        (tmp3 XReg (gen_clz tmp2)))
+    ;; clz counted the full register width, so subtract (64-$width), and then
+    ;; additionally subtract one more, meaning here -65+width is added.
+    (rv_addi tmp3 (imm12_const_add (ty_bits ty) -65))))
+
+;; If the sign bit is set, we count the leading zeros of the inverted value.
+;; Otherwise we can just count the leading zeros of the original value.
+;; Subtract 1 since the sign bit does not count.
+(rule 1 (lower (has_type $I128 (cls x)))
+  (let ((low XReg (value_regs_get x 0))
+        (high XReg (value_regs_get x 1))
+        (low XReg (gen_select_xreg (cmp_ltz high) (rv_not low) low))
+        (high XReg (gen_select_xreg (cmp_ltz high) (rv_not high) high))
+
+        ;; Count both halves
+        (high_cnt XReg (gen_clz high))
+        (low_cnt XReg (gen_clz low))
+        ;; Only add the bottom zeros if the top half is zero
+        (low_cnt XReg (gen_select_xreg (cmp_eqz high) low_cnt (zero_reg)))
+        (count XReg (rv_add high_cnt low_cnt))
+        (result XReg (rv_addi count (imm12_const -1))))
+    (value_regs result (imm $I64 0))))
+
+
+;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_64 _) (uextend val)))
+  (zext val))
+
+(rule 1 (lower (has_type $I128 (uextend val)))
+  (value_regs (zext val) (imm $I64 0)))
+
+;; When the source of an `uextend` is a load, we can merge both ops
+(rule 2 (lower (has_type (fits_in_64 _) (uextend (sinkable_load inst ty flags addr offset))))
+  (gen_sunk_load inst (amode addr offset) (uextend_load_op ty) flags))
+
+(decl pure uextend_load_op (Type) LoadOP)
+(rule (uextend_load_op $I8) (LoadOP.Lbu))
+(rule (uextend_load_op $I16) (LoadOP.Lhu))
+(rule (uextend_load_op $I32) (LoadOP.Lwu))
+
+;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (fits_in_64 _) (sextend val @ (value_type in_ty))))
+  (sext val))
+
+(rule 1 (lower (has_type $I128 (sextend val @ (value_type in_ty))))
+  (let ((lo XReg (sext val)))
+    (value_regs lo (rv_srai lo (imm12_const 63)))))
+
+;; When the source of an `sextend` is a load, we can merge both ops
+(rule 2 (lower (has_type (fits_in_64 _) (sextend (sinkable_load inst ty flags addr offset))))
+  (gen_sunk_load inst (amode addr offset) (sextend_load_op ty) flags))
+
+(decl pure sextend_load_op (Type) LoadOP)
+(rule (sextend_load_op $I8) (LoadOP.Lb))
+(rule (sextend_load_op $I16) (LoadOP.Lh))
+(rule (sextend_load_op $I32) (LoadOP.Lw))
+
+;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_64 _) (popcnt x)))
+  (gen_popcnt (zext x)))
+
+(rule 1 (lower (has_type $I128 (popcnt x)))
+  (let
+    ((x ValueRegs x)
+     (low XReg (gen_popcnt (value_regs_get x 0)))
+     (high XReg (gen_popcnt (value_regs_get x 1)))
+     (result XReg (rv_add low high)))
+    (value_regs result (imm $I64 0))))
+
+(rule 2 (lower (has_type (fits_in_64 _) (popcnt x)))
+  (if-let $true (has_zbb))
+  (rv_cpop (zext x)))
+
+(rule 3 (lower (has_type $I32 (popcnt x)))
+  (if-let $true (has_zbb))
+  (rv_cpopw x))
+
+(rule 3 (lower (has_type $I128 (popcnt x)))
+  (if-let $true (has_zbb))
+  (let
+    ((x ValueRegs x)
+     (low XReg (rv_cpop (value_regs_get x 0)))
+     (high XReg (rv_cpop (value_regs_get x 1)))
+     (result XReg (rv_add low high)))
+    (value_regs result (imm $I64 0))))
+
+;; Popcount using multiply.
+;; This is popcount64c() from
+;; http://en.wikipedia.org/wiki/Hamming_weight
+;;
+;; Here's the C version for 32 bits:
+;;  x = x - ((x>> 1) & 0x55555555);
+;;  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+;;  x = ((x + (x >> 4)) & 0x0F0F0F0F);
+;;  return (x * 0x01010101) >> 24; // Here 24 is the type width - 8.
+;;
+;; TODO: LLVM generates a much better implementation for I8X16. See: https://godbolt.org/z/qr6vf9Gr3
+;; For the other types it seems to be largely the same.
+(rule 4 (lower (has_type (ty_supported_vec ty) (popcnt x)))
+  (if-let one (u64_to_uimm5 1))
+  (if-let two (u64_to_uimm5 2))
+  (if-let four (u64_to_uimm5 4))
+
+  (let (;; x = x - ((x >> 1) & 0x55555555);
+        (mask_55 XReg (imm (lane_type ty) (u64_and 0x5555555555555555 (ty_mask (lane_type ty)))))
+        (count2_shr VReg (rv_vsrl_vi x one (unmasked) ty))
+        (count2_and VReg (rv_vand_vx count2_shr mask_55 (unmasked) ty))
+        (count2 VReg (rv_vsub_vv x count2_and (unmasked) ty))
+
+        ;; x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+        (mask_33 XReg (imm (lane_type ty) (u64_and 0x3333333333333333 (ty_mask (lane_type ty)))))
+        (count4_shr VReg (rv_vsrl_vi count2 two (unmasked) ty))
+        (count4_and VReg (rv_vand_vx count4_shr mask_33 (unmasked) ty))
+        (count4_lhs VReg (rv_vand_vx count2 mask_33 (unmasked) ty))
+        (count4 VReg (rv_vadd_vv count4_lhs count4_and (unmasked) ty))
+
+        ;; x = (x + (x >> 4)) & 0x0F0F0F0F;
+        (mask_0f XReg (imm (lane_type ty) (u64_and 0x0f0f0f0f0f0f0f0f (ty_mask (lane_type ty)))))
+        (count8_shr VReg (rv_vsrl_vi count4 four (unmasked) ty))
+        (count8_add VReg (rv_vadd_vv count4 count8_shr (unmasked) ty))
+        (count8 VReg (rv_vand_vx count8_add mask_0f (unmasked) ty))
+
+        ;; (x * 0x01010101) >> (<ty_width> - 8)
+        (mask_01 XReg (imm (lane_type ty) (u64_and 0x0101010101010101 (ty_mask (lane_type ty)))))
+        (mul VReg (rv_vmul_vx count8 mask_01 (unmasked) ty))
+        (shift XReg (imm $I64 (u64_sub (ty_bits (lane_type ty)) 8)))
+        (res VReg (rv_vsrl_vx mul shift (unmasked) ty)))
+    res))
+
+;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 8/16 bit types need a mask on the shift amount
+(rule 0 (lower (has_type (ty_int (ty_8_or_16 ty)) (ishl x y)))
+  (if-let mask (u64_to_imm12 (ty_shift_mask ty)))
+  (rv_sllw x (rv_andi (value_regs_get y 0) mask)))
+
+;; Using the 32bit version of `sll` automatically masks the shift amount.
+(rule 1 (lower (has_type $I32 (ishl x y)))
+  (rv_sllw x (value_regs_get y 0)))
+
+;; Similarly, the 64bit version does the right thing.
+(rule 1 (lower (has_type $I64 (ishl x y)))
+  (rv_sll x (value_regs_get y 0)))
+
+;; If the shift amount is known. We can mask it and encode it in the instruction.
+(rule 2 (lower (has_type (int_fits_in_32 ty) (ishl x (maybe_uextend (imm12_from_value y)))))
+  (rv_slliw x (imm12_and y (ty_shift_mask ty))))
+
+;; We technically don't need to mask the shift amount here. The instruction
+;; does the right thing. But it's neater when pretty printing it.
+(rule 3 (lower (has_type ty @ $I64 (ishl x (maybe_uextend (imm12_from_value y)))))
+  (rv_slli x (imm12_and y (ty_shift_mask ty))))
+
+;; With `Zba` we have a shift that zero extends the LHS argument.
+(rule 4 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y)))))
+  (if-let $true (has_zba))
+  (rv_slliuw x y))
+
+;; I128 cases
+(rule 4 (lower (has_type $I128 (ishl x y)))
+  (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+        (shamt XReg (value_regs_get tmp 0))
+        (len_sub_shamt XReg (value_regs_get tmp 1))
+        ;;
+        (low XReg (rv_sll (value_regs_get x 0) shamt))
+        ;; high part.
+        (high_part1 XReg (rv_srl (value_regs_get x 0) len_sub_shamt))
+        (high_part2 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part1))
+        ;;
+        (high_part3 XReg (rv_sll (value_regs_get x 1) shamt))
+        (high XReg (rv_or high_part2 high_part3))
+        ;;
+        (const64 XReg (imm $I64 64))
+        (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    (gen_select_regs
+      (cmp_geu shamt_128 const64)
+      (value_regs (zero_reg) low)
+      (value_regs low high))))
+
+;; SIMD Cases
+;; We don't need to mask anything since it is done by the instruction according to SEW.
+
+(rule 5 (lower (has_type (ty_supported_vec ty) (ishl x y)))
+  (rv_vsll_vx x (value_regs_get y 0) (unmasked) ty))
+
+(rule 6 (lower (has_type (ty_supported_vec ty) (ishl x (maybe_uextend (uimm5_from_value y)))))
+  (rv_vsll_vi x y (unmasked) ty))
+
+;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be
+;; zero extended.
+(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x y)))
+  (if-let mask (u64_to_imm12 (ty_shift_mask ty)))
+  (rv_srlw (zext x) (rv_andi (value_regs_get y 0) mask)))
+
+;; Using the 32bit version of `srl` automatically masks the shift amount.
+(rule 1 (lower (has_type $I32 (ushr x y)))
+  (rv_srlw x (value_regs_get y 0)))
+
+;; Similarly, the 64bit version does the right thing.
+(rule 1 (lower (has_type $I64 (ushr x y)))
+  (rv_srl x (value_regs_get y 0)))
+
+;; When the RHS is known we can just encode it in the instruction.
+(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (ushr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srliw (zext x) (imm12_and y (ty_shift_mask ty))))
+
+(rule 3 (lower (has_type $I32 (ushr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srliw x y))
+
+(rule 3 (lower (has_type $I64 (ushr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srli x y))
+
+(rule 3 (lower (has_type $I128 (ushr x y)))
+  (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+        (shamt XReg (value_regs_get tmp 0))
+        (len_sub_shamt XReg (value_regs_get tmp 1))
+        ;; low part.
+        (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
+        (low_part2 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part1))
+        ;;
+        (low_part3 XReg (rv_srl (value_regs_get x 0) shamt))
+        (low XReg (rv_or low_part2 low_part3))
+        ;;
+        (const64 XReg (imm $I64 64))
+        ;;
+        (high XReg (rv_srl (value_regs_get x 1) shamt))
+        (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    (gen_select_regs
+      (cmp_geu shamt_128 const64)
+      (value_regs high (zero_reg))
+      (value_regs low high))))
+
+;; SIMD Cases
+;; We don't need to mask or extend anything since it is done by the instruction according to SEW.
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (ushr x y)))
+  (rv_vsrl_vx x (value_regs_get y 0) (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_supported_vec ty) (ushr x (maybe_uextend (uimm5_from_value y)))))
+  (rv_vsrl_vi x y (unmasked) ty))
+
+;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; 8/16 bit types need a mask on the shift amount, and the LHS needs to be
+;; zero extended.
+(rule 0 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x y)))
+  (if-let mask (u64_to_imm12 (ty_shift_mask ty)))
+  (rv_sraw (sext x) (rv_andi (value_regs_get y 0) mask)))
+
+;; Using the 32bit version of `sra` automatically masks the shift amount.
+(rule 1 (lower (has_type $I32 (sshr x y)))
+  (rv_sraw x (value_regs_get y 0)))
+
+;; Similarly, the 64bit version does the right thing.
+(rule 1 (lower (has_type $I64 (sshr x y)))
+  (rv_sra x (value_regs_get y 0)))
+
+;; When the RHS is known we can just encode it in the instruction.
+(rule 2 (lower (has_type (ty_int (fits_in_16 ty)) (sshr x (maybe_uextend (imm12_from_value y)))))
+  (rv_sraiw (sext x) (imm12_and y (ty_shift_mask ty))))
+
+(rule 3 (lower (has_type $I32 (sshr x (maybe_uextend (imm12_from_value y)))))
+  (rv_sraiw x y))
+
+(rule 3 (lower (has_type $I64 (sshr x (maybe_uextend (imm12_from_value y)))))
+  (rv_srai x y))
+
+(rule 3 (lower (has_type $I128 (sshr x y)))
+  (let ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+        (shamt XReg (value_regs_get tmp 0))
+        (len_sub_shamt XReg (value_regs_get tmp 1))
+        ;; low part.
+        (low_part1 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
+        (low_part2 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part1))
+        ;;
+        (low_part3 XReg (rv_srl (value_regs_get x 0) shamt))
+        (low XReg (rv_or low_part2 low_part3))
+        ;;
+        (const64 XReg (imm $I64 64))
+        ;;
+        (high XReg (rv_sra (value_regs_get x 1) shamt))
+        ;;
+        (const_neg_1 XReg (imm $I64 (i64_as_u64 -1)))
+        ;;
+        (high_replacement XReg (gen_select_xreg (cmp_ltz (value_regs_get x 1)) const_neg_1 (zero_reg)))
+        (const64 XReg (imm $I64 64))
+        (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    (gen_select_regs
+      (cmp_geu shamt_128 const64)
+      (value_regs high high_replacement)
+      (value_regs low high))))
+
+;; SIMD Cases
+;; We don't need to mask or extend anything since it is done by the instruction according to SEW.
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (sshr x y)))
+  (rv_vsra_vx x (value_regs_get y 0) (unmasked) ty))
+
+(rule 5 (lower (has_type (ty_supported_vec ty) (sshr x (maybe_uextend (uimm5_from_value y)))))
+  (rv_vsra_vi x y (unmasked) ty))
+
+
+;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_64 ty) (rotl rs amount)))
+  (let
+    ((rs XReg (zext rs))
+      (amount XReg (value_regs_get amount 0))
+      (x ValueRegs (gen_shamt ty amount))
+      (shamt XReg (value_regs_get x 0))
+      (len_sub_shamt Reg (value_regs_get x 1))
+      (part1 Reg (rv_sll rs shamt))
+      (part2 Reg (rv_srl rs len_sub_shamt))
+      (part3 Reg (gen_select_xreg (cmp_eqz shamt) (zero_reg) part2)))
+    (rv_or part1 part3)))
+
+(rule 1 (lower (has_type $I32 (rotl rs amount)))
+  (if-let $true (has_zbb))
+  (rv_rolw rs (value_regs_get amount 0)))
+
+(rule 2 (lower (has_type $I32 (rotl rs (u64_from_iconst n))))
+  (if-let $true (has_zbb))
+  (if-let (imm12_from_u64 imm) (u64_sub 32 (u64_and n 31)))
+  (rv_roriw rs imm))
+
+(rule 1 (lower (has_type $I64 (rotl rs amount)))
+  (if-let $true (has_zbb))
+  (rv_rol rs (value_regs_get amount 0)))
+
+(rule 2 (lower (has_type $I64 (rotl rs (u64_from_iconst n))))
+  (if-let $true (has_zbb))
+  (if-let (imm12_from_u64 imm) (u64_sub 64 (u64_and n 63)))
+  (rv_rori rs imm))
+
+(rule 1 (lower (has_type $I128 (rotl x y)))
+  (let
+    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+      (shamt XReg (value_regs_get tmp 0))
+      (len_sub_shamt XReg (value_regs_get tmp 1))
+      (low_part1 XReg (rv_sll (value_regs_get x 0) shamt))
+      (low_part2 XReg (rv_srl (value_regs_get x 1) len_sub_shamt))
+      ;;; if shamt == 0 low_part2 will overflow we should zero instead.
+      (low_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part2))
+      (low XReg (rv_or low_part1 low_part3))
+      (high_part1 XReg (rv_sll (value_regs_get x 1) shamt))
+      (high_part2 XReg (rv_srl (value_regs_get x 0) len_sub_shamt))
+      (high_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part2))
+      (high XReg (rv_or high_part1 high_part3))
+      (const64 XReg (imm $I64 64))
+      (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    ;; right now we only rotate less than 64 bits.
+    ;; if shamt is greater than or equal 64 , we should switch low and high.
+    (gen_select_regs
+      (cmp_geu shamt_128 const64)
+      (value_regs high low)
+      (value_regs low high)
+    )))
+
+;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (fits_in_64 ty) (rotr rs amount)))
+  (let
+    ((rs XReg (zext rs))
+      (amount XReg (value_regs_get amount 0))
+      (x ValueRegs (gen_shamt ty amount))
+      (shamt XReg (value_regs_get x 0))
+      (len_sub_shamt XReg (value_regs_get x 1))
+      (part1 XReg (rv_srl rs shamt))
+      (part2 XReg (rv_sll rs len_sub_shamt))
+      (part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) part2)))
+    (rv_or part1 part3)))
+
+(rule 1 (lower (has_type $I32 (rotr rs amount)))
+  (if-let $true (has_zbb))
+  (rv_rorw rs (value_regs_get amount 0)))
+
+(rule 2 (lower (has_type $I32 (rotr rs (imm12_from_value n))))
+  (if-let $true (has_zbb))
+  (rv_roriw rs n))
+
+(rule 1 (lower (has_type $I64 (rotr rs amount)))
+  (if-let $true (has_zbb))
+  (rv_ror rs (value_regs_get amount 0)))
+
+(rule 2 (lower (has_type $I64 (rotr rs (imm12_from_value n))))
+  (if-let $true (has_zbb))
+  (rv_rori rs n))
+
+(rule 1 (lower (has_type $I128 (rotr x y)))
+  (let
+    ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
+      (shamt XReg (value_regs_get tmp 0))
+      (len_sub_shamt XReg (value_regs_get tmp 1))
+      (low_part1 XReg (rv_srl (value_regs_get x 0) shamt))
+      (low_part2 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
+      ;;; if shamt == 0 low_part2 will overflow we should zero instead.
+      (low_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part2))
+      (low XReg (rv_or low_part1 low_part3))
+      (high_part1 XReg (rv_srl (value_regs_get x 1) shamt))
+      (high_part2 XReg (rv_sll (value_regs_get x 0) len_sub_shamt))
+      (high_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part2))
+      (high XReg (rv_or high_part1 high_part3))
+      (const64 XReg (imm $I64 64))
+      (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
+    ;; right now we only rotate less than 64 bits.
+    ;; if shamt is greater than or equal 64 , we should switch low and high.
+    (gen_select_regs
+      (cmp_geu shamt_128 const64)
+      (value_regs high low)
+      (value_regs low high)
+    )))
+
+;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (fabs x)))
+  (rv_fabs ty x))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fabs x)))
+  (rv_vfabs_v x (unmasked) ty))
+
+;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (fneg x)))
+  (rv_fneg ty x))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fneg x)))
+  (rv_vfneg_v x (unmasked) ty))
+
+;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (fcopysign x y)))
+  (rv_fsgnj ty x y))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fcopysign x y)))
+  (rv_vfsgnj_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (fcopysign x (splat y))))
+  (rv_vfsgnj_vf x y (unmasked) ty))
+
+;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; RISC-V has 4 FMA instructions that do a slightly different computation.
+;;
+;; fmadd:   (rs1 * rs2) + rs3
+;; fmsub:   (rs1 * rs2) - rs3
+;; fnmadd: -(rs1 * rs2) - rs3
+;; fnmsub: -(rs1 * rs2) + rs3
+;;
+;; Additionally there are vector versions of these instructions with slightly different names.
+;; The vector instructions also have two variants each. `.vv` and `.vf`, where `.vv` variants
+;; take two vector operands and the `.vf` variants take a vector operand and a scalar operand.
+;;
+;; Due to this, variation they receive the arguments in a different order. So we need to swap
+;; the arguments below.
+;;
+;; vfmacc:  vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+;; vfmsac:  vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+;; vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+;; vfnmsac: vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+
+(type IsFneg (enum (Result (negate u64) (value Value))))
+
+(decl pure is_fneg (Value) IsFneg)
+(rule 1 (is_fneg (fneg x)) (IsFneg.Result 1 x))
+(rule 0 (is_fneg x) (IsFneg.Result 0 x))
+
+(decl pure is_fneg_neg (IsFneg) u64)
+(rule (is_fneg_neg (IsFneg.Result n _)) n)
+
+(decl pure get_fneg_value (IsFneg) Value)
+(rule (get_fneg_value (IsFneg.Result _ v)) v)
+
+(rule (lower (has_type ty (fma x_src y_src z_src)))
+  (let
+    ((x_res IsFneg (is_fneg x_src))
+     (y_res IsFneg (is_fneg y_src))
+     (z_res IsFneg (is_fneg z_src))
+     (x Value (get_fneg_value x_res))
+     (y Value (get_fneg_value y_res))
+     (z Value (get_fneg_value z_res)))
+    (rv_fma ty (u64_xor (is_fneg_neg x_res) (is_fneg_neg y_res)) (is_fneg_neg z_res) x y z)))
+
+; parity arguments indicate whether to negate the x*y term or the z term, respectively
+(decl rv_fma (Type u64 u64 Value Value Value) InstOutput)
+(rule 0 (rv_fma (ty_supported_float ty) 0 0 x y z) (rv_fmadd ty (FRM.RNE) x y z))
+(rule 0 (rv_fma (ty_supported_float ty) 0 1 x y z) (rv_fmsub ty (FRM.RNE) x y z))
+(rule 0 (rv_fma (ty_supported_float ty) 1 0 x y z) (rv_fnmsub ty (FRM.RNE) x y z))
+(rule 0 (rv_fma (ty_supported_float ty) 1 1 x y z) (rv_fnmadd ty (FRM.RNE) x y z))
+(rule 1 (rv_fma (ty_supported_vec ty) 0 0 x y z) (rv_vfmacc_vv z y x (unmasked) ty))
+(rule 1 (rv_fma (ty_supported_vec ty) 0 1 x y z) (rv_vfmsac_vv z y x (unmasked) ty))
+(rule 1 (rv_fma (ty_supported_vec ty) 1 0 x y z) (rv_vfnmsac_vv z y x (unmasked) ty))
+(rule 1 (rv_fma (ty_supported_vec ty) 1 1 x y z) (rv_vfnmacc_vv z y x (unmasked) ty))
+(rule 2 (rv_fma (ty_supported_vec ty) 0 0 (splat x) y z) (rv_vfmacc_vf z y x (unmasked) ty))
+(rule 2 (rv_fma (ty_supported_vec ty) 0 1 (splat x) y z) (rv_vfmsac_vf z y x (unmasked) ty))
+(rule 2 (rv_fma (ty_supported_vec ty) 1 0 (splat x) y z) (rv_vfnmsac_vf z y x (unmasked) ty))
+(rule 2 (rv_fma (ty_supported_vec ty) 1 1 (splat x) y z) (rv_vfnmacc_vf z y x (unmasked) ty))
+(rule 3 (rv_fma (ty_supported_vec ty) 0 0 x (splat y) z) (rv_vfmacc_vf z x y (unmasked) ty))
+(rule 3 (rv_fma (ty_supported_vec ty) 0 1 x (splat y) z) (rv_vfmsac_vf z x y (unmasked) ty))
+(rule 3 (rv_fma (ty_supported_vec ty) 1 0 x (splat y) z) (rv_vfnmsac_vf z x y (unmasked) ty))
+(rule 3 (rv_fma (ty_supported_vec ty) 1 1 x (splat y) z) (rv_vfnmacc_vf z x y (unmasked) ty))
+
+;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (sqrt x)))
+  (rv_fsqrt ty (FRM.RNE) x))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (sqrt x)))
+  (rv_vfsqrt_v x (unmasked) ty))
+
+;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule -1
+  ;;
+  (lower
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x)))
+  (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo)))
+
+;;; for I8 and I16
+(rule 1
+  (lower
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x)))
+  (gen_atomic_rmw_loop op ty addr x))
+
+;;;special for I8 and I16 max min etc.
+;;;because I need uextend or sextend the value.
+(rule 2
+  (lower
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x)))
+  (gen_atomic_rmw_loop op ty addr (sext x)))
+
+
+(rule 2
+  ;;
+  (lower
+    (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x)))
+  ;;
+  (gen_atomic_rmw_loop op ty addr (zext x)))
+
+;;;;;  Rules for `AtomicRmwOp.Sub`
+(rule
+  (lower
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x)))
+  (let
+    ((tmp WritableReg (temp_writable_reg ty))
+     (x2 Reg (rv_neg x)))
+    (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo))))
+
+(decl gen_atomic_rmw_loop (AtomicRmwOp Type XReg XReg) XReg)
+(rule
+  (gen_atomic_rmw_loop op ty addr x)
+  (let
+    ((dst WritableXReg (temp_writable_xreg))
+      (t0 WritableXReg (temp_writable_xreg))
+      (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0))))
+    (writable_reg_to_reg dst)))
+
+;;;;;  Rules for `AtomicRmwOp.Nand`
+(rule
+  (lower
+    (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x)))
+    (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x))
+
+(decl is_atomic_rmw_max_etc (AtomicRmwOp bool) AtomicRmwOp)
+(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc)
+
+;;;;;  Rules for `atomic load`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p)))
+  (gen_atomic_load p ty))
+
+
+;;;;;  Rules for `atomic store`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p))
+  (gen_atomic_store p ty src))
+
+(decl gen_atomic_offset (XReg Type) XReg)
+(rule 1 (gen_atomic_offset p (fits_in_16 ty))
+  (rv_slli (rv_andi p (imm12_const 3)) (imm12_const 3)))
+
+(rule (gen_atomic_offset p _)
+  (zero_reg))
+
+(decl gen_atomic_p (XReg Type) XReg)
+(rule 1 (gen_atomic_p p (fits_in_16 ty))
+  (rv_andi p (imm12_const -4)))
+
+(rule (gen_atomic_p p _)
+  p)
+
+
+;;;;;  Rules for `atomic cas`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x)))
+  (let
+    ((t0 WritableReg (temp_writable_reg ty))
+      (dst WritableReg (temp_writable_reg ty))
+      (_ Unit (emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (zext e) (gen_atomic_p p ty) x ty))))
+    (writable_reg_to_reg dst)))
+
+;;;;;  Rules for `ireduce`;;;;;;;;;;;;;;;;;
+(rule
+  (lower (has_type ty (ireduce x)))
+  (value_regs_get x 0))
+
+;;;;;  Rules for `fpromote`;;;;;;;;;;;;;;;;;
+(rule (lower (fpromote x))
+  (rv_fcvtds x))
+
+;;;;;  Rules for `fvpromote_low`;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_supported_vec ty) (fvpromote_low x)))
+  (if-let half_ty (ty_half_width ty))
+  (rv_vfwcvt_f_f_v x (unmasked) (vstate_mf2 half_ty)))
+
+;;;;;  Rules for `fdemote`;;;;;;;;;;;;;;;;;;
+(rule (lower (fdemote x))
+  (rv_fcvtsd (FRM.RNE) x))
+
+;;;;;  Rules for `fvdemote`;;;;;;;;;;;;;;;;;
+
+;; `vfncvt...` leaves the upper bits of the register undefined so
+;; we need to zero them out.
+(rule (lower (has_type (ty_supported_vec ty @ $F32X4) (fvdemote x)))
+  (if-let zero (i8_to_imm5 0))
+  (let ((narrow VReg (rv_vfncvt_f_f_w x (unmasked) (vstate_mf2 ty)))
+        (mask VReg (gen_vec_mask 0xC)))
+    (rv_vmerge_vim narrow zero mask ty)))
+
+
+;;;;;  Rules for for float arithmetic
+
+
+;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_float ty) (fadd x y)))
+  (rv_fadd ty (FRM.RNE) x y))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fadd x y)))
+  (rv_vfadd_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (fadd x (splat y))))
+  (rv_vfadd_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (fadd (splat x) y)))
+  (rv_vfadd_vf y x (unmasked) ty))
+
+
+;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (fsub x y)))
+  (rv_fsub ty (FRM.RNE) x y))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fsub x y)))
+  (rv_vfsub_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (fsub x (splat y))))
+  (rv_vfsub_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (fsub (splat x) y)))
+  (rv_vfrsub_vf y x (unmasked) ty))
+
+;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (fmul x y)))
+  (rv_fmul ty (FRM.RNE) x y))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fmul x y)))
+  (rv_vfmul_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (fmul x (splat y))))
+  (rv_vfmul_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (fmul (splat x) y)))
+  (rv_vfmul_vf y x (unmasked) ty))
+
+
+;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (fdiv x y)))
+  (rv_fdiv ty (FRM.RNE) x y))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (fdiv x y)))
+  (rv_vfdiv_vv x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (fdiv x (splat y))))
+  (rv_vfdiv_vf x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (fdiv (splat x) y)))
+  (rv_vfrdiv_vf y x (unmasked) ty))
+
+;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; RISC-V's `fmin` instruction returns the number input if one of inputs is a
+;; NaN. We handle this by manually checking if one of the inputs is a NaN
+;; and selecting based on that result.
+(rule 0 (lower (has_type (ty_supported_float ty) (fmin x y)))
+  (let (;; Check if both inputs are not nan.
+        (is_ordered FloatCompare (fcmp_to_float_compare (FloatCC.Ordered) ty x y))
+        ;; `fadd` returns a nan if any of the inputs is a NaN.
+        (nan FReg (rv_fadd ty (FRM.RNE) x y))
+        (min FReg (rv_fmin ty x y)))
+    (gen_select_freg is_ordered min nan)))
+
+;; With Zfa we can use the special `fminm` that precisely matches the expected
+;; NaN behavior.
+(rule 1 (lower (has_type (ty_supported_float ty) (fmin x y)))
+  (if-let $true (has_zfa))
+  (rv_fminm ty x y))
+
+;; vfmin does almost the right thing, but it does not handle NaN's correctly.
+;; We should return a NaN if any of the inputs is a NaN, but vfmin returns the
+;; number input instead.
+;;
+;; TODO: We can improve this by using a masked `fmin` instruction that modifies
+;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction.
+(rule 2 (lower (has_type (ty_supported_vec ty) (fmin x y)))
+  (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y))
+        (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty))))
+        (vec_nan VReg (rv_vmv_vx nan ty))
+        (min VReg (rv_vfmin_vv x y (unmasked) ty)))
+    (rv_vmerge_vvm vec_nan min is_not_nan ty)))
+
+;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; RISC-V's `fmax` instruction returns the number input if one of inputs is a
+;; NaN. We handle this by manually checking if one of the inputs is a NaN
+;; and selecting based on that result.
+(rule 0 (lower (has_type (ty_supported_float ty) (fmax x y)))
+  (let (;; Check if both inputs are not nan.
+        (is_ordered FloatCompare (fcmp_to_float_compare (FloatCC.Ordered) ty x y))
+        ;; `fadd` returns a NaN if any of the inputs is a NaN.
+        (nan FReg (rv_fadd ty (FRM.RNE) x y))
+        (max FReg (rv_fmax ty x y)))
+    (gen_select_freg is_ordered max nan)))
+
+;; With Zfa we can use the special `fmaxm` that precisely matches the expected
+;; NaN behavior.
+(rule 1 (lower (has_type (ty_supported_float ty) (fmax x y)))
+  (if-let $true (has_zfa))
+  (rv_fmaxm ty x y))
+
+;; vfmax does almost the right thing, but it does not handle NaN's correctly.
+;; We should return a NaN if any of the inputs is a NaN, but vfmax returns the
+;; number input instead.
+;;
+;; TODO: We can improve this by using a masked `fmax` instruction that modifies
+;; the canonical nan register. That way we could avoid the `vmerge.vv` instruction.
+(rule 2 (lower (has_type (ty_supported_vec ty) (fmax x y)))
+  (let ((is_not_nan VReg (gen_fcmp_mask ty (FloatCC.Ordered) x y))
+        (nan XReg (imm $I64 (canonical_nan_u64 (lane_type ty))))
+        (vec_nan VReg (rv_vmv_vx nan ty))
+        (max VReg (rv_vfmax_vv x y (unmasked) ty)))
+    (rv_vmerge_vvm vec_nan max is_not_nan ty)))
+
+;;;;;  Rules for `stack_addr`;;;;;;;;;
+(rule
+  (lower (stack_addr ss offset))
+  (gen_stack_addr ss offset))
+
+;;;;;  Rules for `select`;;;;;;;;;
+
+;; Manually matching (iconst 0) here is a bit of a hack. We can't do that as part
+;; of the iconst rule because that runs into regalloc issues. gen_select_xreg
+;; has some optimizations based on the use of the zero register so we have to
+;; manually match it here.
+(rule 5 (lower (has_type (ty_int_ref_scalar_64 _) (select c (i64_from_iconst 0) y)))
+  (gen_select_xreg (is_nonzero_cmp c) (zero_reg) y))
+
+(rule 4 (lower (has_type (ty_int_ref_scalar_64 _) (select c x (i64_from_iconst 0))))
+  (gen_select_xreg (is_nonzero_cmp c) x (zero_reg)))
+
+(rule 3 (lower (has_type (ty_int_ref_scalar_64 _) (select c x y)))
+  (gen_select_xreg (is_nonzero_cmp c) x y))
+
+(rule 2 (lower (has_type $I128 (select c x y)))
+  (gen_select_regs (is_nonzero_cmp c) x y))
+
+(rule 1 (lower (has_type (ty_supported_vec _) (select c x y)))
+  (gen_select_vreg (is_nonzero_cmp c) x y))
+
+(rule 0 (lower (has_type (ty_supported_float _) (select c x y)))
+  (gen_select_freg (is_nonzero_cmp c) x y))
+
+;;;;;  Rules for `bitselect`;;;;;;;;;
+
+;; Do a (c & x) | (~c & y) operation.
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (bitselect c x y)))
+  (let ((tmp_x XReg (rv_and c x))
+        (c_inverse XReg (rv_not c))
+        (tmp_y XReg (rv_and c_inverse y)))
+    (rv_or tmp_x tmp_y)))
+
+;; For vectors, we also do the same operation.
+;; We can technically use any type in the bitwise operations, but prefer
+;; using the type of the inputs so that we avoid emitting unnecessary
+;; `vsetvl` instructions. it's likely that the vector unit is already
+;; configured for that type.
+(rule 1 (lower (has_type (ty_supported_vec ty) (bitselect c x y)))
+  (let ((tmp_x VReg (rv_vand_vv c x (unmasked) ty))
+        (c_inverse VReg (rv_vnot_v c (unmasked) ty))
+        (tmp_y VReg (rv_vand_vv c_inverse y (unmasked) ty)))
+    (rv_vor_vv tmp_x tmp_y (unmasked) ty)))
+
+;; Special case for bitselects with cmp's as an input.
+;;
+;; This allows us to skip the mask expansion step and use the more efficient
+;; vmerge.vvm instruction.
+;;
+;; We should be careful to ensure that the mask and the vmerge have the
+;; same type. So that we don't generate a mask with length 16 (i.e. for i8x16), and then
+;; only copy the first few lanes of the result to the destination register because
+;; the bitselect has a different length (i.e. i64x2).
+;;
+;; See: https://github.com/bytecodealliance/wasmtime/issues/8131
+
+(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (icmp cc a @ (value_type (ty_supported_vec cmp_ty)) b) x y)))
+  (let ((mask VReg (gen_icmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask cmp_ty)))
+
+(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (fcmp cc a @ (value_type (ty_supported_vec cmp_ty)) b) x y)))
+  (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask cmp_ty)))
+
+(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (bitcast _ (fcmp cc a @ (value_type (ty_supported_vec cmp_ty)) b)) x y)))
+  (let ((mask VReg (gen_fcmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask cmp_ty)))
+
+(rule 2 (lower (has_type (ty_supported_vec _ty) (bitselect (bitcast _ (icmp cc a @ (value_type (ty_supported_vec cmp_ty)) b)) x y)))
+  (let ((mask VReg (gen_icmp_mask cmp_ty cc a b)))
+    (rv_vmerge_vvm y x mask cmp_ty)))
+
+
+;;;;;  Rules for `isplit`;;;;;;;;;
+(rule
+  (lower (isplit x))
+  (let
+    ((t1 XReg (value_regs_get x 0))
+      (t2 XReg (value_regs_get x 1)))
+    (output_pair t1 t2)))
+
+;;;;;  Rules for `iconcat`;;;;;;;;;
+(rule
+  (lower (has_type $I128 (iconcat x y)))
+  (let
+    ((t1 XReg x)
+      (t2 XReg y))
+    (value_regs t1 t2)))
+
+;; Special-case the lowering of an `isplit` of a 128-bit multiply where the
+;; lower bits of the result are discarded and the operands are sign or zero
+;; extended. This maps directly to `umulh` and `smulh`.
+(rule 1 (lower i @ (isplit (has_type $I128 (imul (uextend x) (uextend y)))))
+  (if-let (first_result lo) i)
+  (if-let $true (value_is_unused lo))
+  (output_pair (invalid_reg) (rv_mulhu (zext x) (zext y))))
+
+(rule 1 (lower i @ (isplit (has_type $I128 (imul (sextend x) (sextend y)))))
+  (if-let (first_result lo) i)
+  (if-let $true (value_is_unused lo))
+  (output_pair (invalid_reg) (rv_mulh (sext x) (sext y))))
+
+;;;;;  Rules for `smax`;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_64 ty) (smax x y)))
+  (let ((x XReg (sext x))
+        (y XReg (sext y)))
+    (gen_select_xreg (cmp_gt x y) x y)))
+
+(rule 1 (lower (has_type $I128 (smax x y)))
+  (gen_select_regs (icmp_to_int_compare (IntCC.SignedGreaterThan) x y) x y))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (smax x y)))
+  (rv_vmax_vv x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (smax x (splat y))))
+  (rv_vmax_vx x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (smax (splat x) y)))
+  (rv_vmax_vx y x (unmasked) ty))
+
+;;;;;  Rules for `smin`;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_64 ty) (smin x y)))
+  (let ((x XReg (sext x))
+        (y XReg (sext y)))
+    (gen_select_xreg (cmp_lt x y) x y)))
+
+(rule 1 (lower (has_type $I128 (smin x y)))
+  (gen_select_regs (icmp_to_int_compare (IntCC.SignedLessThan) x y) x y))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (smin x y)))
+  (rv_vmin_vv x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (smin x (splat y))))
+  (rv_vmin_vx x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (smin (splat x) y)))
+  (rv_vmin_vx y x (unmasked) ty))
+
+;;;;;  Rules for `umax`;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_64 ty) (umax x y)))
+  (let ((x XReg (zext x))
+        (y XReg (zext y)))
+    (gen_select_xreg (cmp_gtu x y) x y)))
+
+(rule 1 (lower (has_type $I128 (umax x y)))
+  (gen_select_regs (icmp_to_int_compare (IntCC.UnsignedGreaterThan) x y) x y))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (umax x y)))
+  (rv_vmaxu_vv x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (umax x (splat y))))
+  (rv_vmaxu_vx x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (umax (splat x) y)))
+  (rv_vmaxu_vx y x (unmasked) ty))
+
+;;;;;  Rules for `umin`;;;;;;;;;
+
+(rule 0 (lower (has_type (fits_in_64 ty) (umin x y)))
+  (let ((x XReg (zext x))
+        (y XReg (zext y)))
+    (gen_select_xreg (cmp_ltu x y) x y)))
+
+(rule 1 (lower (has_type $I128 (umin x y)))
+  (gen_select_regs (icmp_to_int_compare (IntCC.UnsignedLessThan) x y) x y))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (umin x y)))
+  (rv_vminu_vv x y (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (umin x (splat y))))
+  (rv_vminu_vx x y (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (umin (splat x) y)))
+  (rv_vminu_vx y x (unmasked) ty))
+
+
+;;;;;  Rules for `debugtrap`;;;;;;;;;
+(rule
+  (lower (debugtrap))
+  (side_effect (SideEffectNoResult.Inst (MInst.EBreak))))
+
+;;;;;  Rules for `fence`;;;;;;;;;
+(rule
+  (lower (fence))
+  (side_effect (SideEffectNoResult.Inst (MInst.Fence 15 15))))
+
+;;;;;  Rules for `trap`;;;;;;;;;
+(rule
+  (lower (trap code))
+  (udf code))
+
+;;;;;  Rules for `uload8`;;;;;;;;;
+(rule (lower (uload8 flags addr offset))
+  (gen_load (amode addr offset) (LoadOP.Lbu) flags))
+
+;;;;;  Rules for `sload8`;;;;;;;;;
+(rule (lower (sload8 flags addr offset))
+  (gen_load (amode addr offset) (LoadOP.Lb) flags))
+
+;;;;;  Rules for `uload16`;;;;;;;;;
+(rule (lower (uload16 flags addr offset))
+  (gen_load (amode addr offset) (LoadOP.Lhu) flags))
+
+;;;;;  Rules for `iload16`;;;;;;;;;
+(rule (lower (sload16 flags addr offset))
+  (gen_load (amode addr offset) (LoadOP.Lh) flags))
+
+;;;;;  Rules for `uload32`;;;;;;;;;
+(rule (lower (uload32 flags addr offset))
+  (gen_load (amode addr offset) (LoadOP.Lwu) flags))
+
+;;;;;  Rules for `sload32`;;;;;;;;;
+(rule (lower (sload32 flags addr offset))
+  (gen_load (amode addr offset) (LoadOP.Lw) flags))
+
+;;;;;  Rules for `load`;;;;;;;;;
+(rule (lower (has_type ty (load flags addr offset)))
+  (gen_load (amode addr offset) (load_op ty) flags))
+
+(rule 1 (lower (has_type $I128 (load flags addr offset)))
+  (if-let offset_plus_8 (s32_add_fallible offset 8))
+  (let ((lo XReg (gen_load (amode addr offset) (LoadOP.Ld) flags))
+        (hi XReg (gen_load (amode addr offset_plus_8) (LoadOP.Ld) flags)))
+    (value_regs lo hi)))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (load flags addr offset)))
+  (let ((eew VecElementWidth (element_width_from_type ty))
+        (amode AMode (amode addr offset)))
+    (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) ty)))
+
+;;;;;  Rules for Load + Extend Combos ;;;;;;;;;
+
+;; These rules cover the special loads that load a 64bit value and do some sort of extension.
+;; We don't have any special instructions to do this, so just load the 64 bits as a vector, and
+;; do a SEW/2 extension. This only reads half width elements from the source vector register
+;; extends it, and writes the back the full register.
+
+(decl gen_load64_extend (Type ExtendOp MemFlags AMode) VReg)
+
+(rule (gen_load64_extend ty (ExtendOp.Signed) flags amode)
+  (let ((eew VecElementWidth (element_width_from_type $I64))
+        (load_state VState (vstate_from_type $I64))
+        (loaded VReg (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) load_state)))
+    (rv_vsext_vf2 loaded (unmasked) ty)))
+
+(rule (gen_load64_extend ty (ExtendOp.Zero) flags amode)
+  (let ((eew VecElementWidth (element_width_from_type $I64))
+        (load_state VState (vstate_from_type $I64))
+        (loaded VReg (vec_load eew (VecAMode.UnitStride amode) flags (unmasked) load_state)))
+    (rv_vzext_vf2 loaded (unmasked) ty)))
+
+;;;;;  Rules for `uload8x8`;;;;;;;;;;
+(rule (lower (has_type (ty_supported_vec ty @ $I16X8) (uload8x8 flags addr offset)))
+  (gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset)))
+
+;;;;;  Rules for `uload16x4`;;;;;;;;;
+(rule (lower (has_type (ty_supported_vec ty @ $I32X4) (uload16x4 flags addr offset)))
+  (gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset)))
+
+;;;;;  Rules for `uload32x2`;;;;;;;;;
+(rule (lower (has_type (ty_supported_vec ty @ $I64X2) (uload32x2 flags addr offset)))
+  (gen_load64_extend ty (ExtendOp.Zero) flags (amode addr offset)))
+
+;;;;;  Rules for `sload8x8`;;;;;;;;;;
+(rule (lower (has_type (ty_supported_vec ty @ $I16X8) (sload8x8 flags addr offset)))
+  (gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset)))
+
+;;;;;  Rules for `sload16x4`;;;;;;;;;
+(rule (lower (has_type (ty_supported_vec ty @ $I32X4) (sload16x4 flags addr offset)))
+  (gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset)))
+
+;;;;;  Rules for `sload32x2`;;;;;;;;;
+(rule (lower (has_type (ty_supported_vec ty @ $I64X2) (sload32x2 flags addr offset)))
+  (gen_load64_extend ty (ExtendOp.Signed) flags (amode addr offset)))
+
+;;;;;  Rules for `istore8`;;;;;;;;;
+(rule (lower (istore8 flags src addr offset))
+  (rv_store (amode addr offset) (StoreOP.Sb) flags src))
+
+;;;;;  Rules for `istore16`;;;;;;;;;
+(rule (lower (istore16 flags src addr offset))
+  (rv_store (amode addr offset) (StoreOP.Sh) flags src))
+
+;;;;;  Rules for `istore32`;;;;;;;;;
+(rule (lower (istore32 flags src addr offset))
+  (rv_store (amode addr offset) (StoreOP.Sw) flags src))
+
+;;;;;  Rules for `store`;;;;;;;;;
+(rule (lower (store flags src @ (value_type ty) addr offset))
+  (gen_store (amode addr offset) flags src))
+
+(rule 1 (lower (store flags src @ (value_type $I128) addr offset))
+  (if-let offset_plus_8 (s32_add_fallible offset 8))
+  (let ((_ InstOutput (rv_store (amode addr offset) (StoreOP.Sd) flags (value_regs_get src 0))))
+    (rv_store (amode addr offset_plus_8) (StoreOP.Sd) flags (value_regs_get src 1))))
+
+(rule 2 (lower (store flags src @ (value_type (ty_supported_vec ty)) addr offset))
+  (let ((eew VecElementWidth (element_width_from_type ty))
+        (amode AMode (amode addr offset)))
+    (vec_store eew (VecAMode.UnitStride amode) src flags (unmasked) ty)))
+
+
+;;;;;  Rules for `icmp`;;;;;;;;;
+
+;; 8-64 bit comparisons. Mostly fall back onto `IntegerCompare` and then
+;; materializing that, but before that happens try to match some
+;; constant-related patterns
+
+(rule 0 (lower (icmp cc x @ (value_type (fits_in_64 ty)) y))
+  (lower_icmp cc x y))
+
+(decl lower_icmp (IntCC Value Value) XReg)
+(rule 0 (lower_icmp cc x y)
+  (lower_int_compare (icmp_to_int_compare cc x y)))
+
+;; a == $imm => seqz(xori(..))
+(rule 1 (lower_icmp (IntCC.Equal) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) y)
+  (rv_seqz (rv_xori (sext x) imm)))
+(rule 2 (lower_icmp (IntCC.Equal) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) x)
+  (rv_seqz (rv_xori (sext y) imm)))
+
+;; a != $imm => snez(xori(..))
+(rule 1 (lower_icmp (IntCC.NotEqual) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) y)
+  (rv_snez (rv_xori (sext x) imm)))
+(rule 2 (lower_icmp (IntCC.NotEqual) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) x)
+  (rv_snez (rv_xori (sext y) imm)))
+
+;; a < $imm => slti(..)
+(rule 1 (lower_icmp (IntCC.SignedLessThan) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) y)
+  (rv_slti (sext x) imm))
+(rule 1 (lower_icmp (IntCC.SignedGreaterThan) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 imm))) x)
+  (rv_slti (sext y) imm))
+(rule 1 (lower_icmp (IntCC.UnsignedLessThan) x y)
+  (if-let (u64_from_iconst (u64_nonzero (imm12_from_u64 imm))) y)
+  (rv_sltiu (zext x) imm))
+(rule 1 (lower_icmp (IntCC.UnsignedGreaterThan) x y)
+  (if-let (u64_from_iconst (u64_nonzero (imm12_from_u64 imm))) x)
+  (rv_sltiu (zext y) imm))
+
+;; a >= $imm => !(a < $imm)
+(rule 2 (lower_icmp cc @ (IntCC.SignedGreaterThanOrEqual) x y)
+  (if-let (i64_from_iconst (i64_nonzero (imm12_from_i64 _))) y)
+  (rv_xori (lower_icmp (intcc_complement cc) x y) (imm12_const 1)))
+(rule 2 (lower_icmp cc @ (IntCC.UnsignedGreaterThanOrEqual) x y)
+  (if-let (u64_from_iconst (u64_nonzero (imm12_from_u64 _))) y)
+  (rv_xori (lower_icmp (intcc_complement cc) x y) (imm12_const 1)))
+
+;; Materializes an `IntegerCompare` bundle directly into an `XReg` with a 0
+;; or 1 value.
+(decl lower_int_compare (IntegerCompare) XReg)
+
+;; x == y => x ^ y == 0
+(rule 0 (lower_int_compare (int_compare_decompose (IntCC.Equal) x y))
+  (rv_seqz (rv_xor x y)))
+(rule 1 (lower_int_compare (int_compare_decompose (IntCC.Equal) x (zero_reg)))
+  (rv_seqz x))
+(rule 2 (lower_int_compare (int_compare_decompose (IntCC.Equal) (zero_reg) y))
+  (rv_seqz y))
+;; x != y => x ^ y != 0
+(rule 0 (lower_int_compare (int_compare_decompose (IntCC.NotEqual) x y))
+  (rv_snez (rv_xor x y)))
+(rule 1 (lower_int_compare (int_compare_decompose (IntCC.NotEqual) x (zero_reg)))
+  (rv_snez x))
+(rule 2 (lower_int_compare (int_compare_decompose (IntCC.NotEqual) (zero_reg) x))
+  (rv_snez x))
+;; x < y => x < y
+(rule (lower_int_compare (int_compare_decompose (IntCC.SignedLessThan) x y))
+  (rv_slt x y))
+(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedLessThan) x y))
+  (rv_sltu x y))
+;; x > y => y < x
+(rule (lower_int_compare (int_compare_decompose (IntCC.SignedGreaterThan) x y))
+  (rv_slt y x))
+(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedGreaterThan) x y))
+  (rv_sltu y x))
+;; x <= y => !(y < x)
+(rule (lower_int_compare (int_compare_decompose (IntCC.SignedLessThanOrEqual) x y))
+  (rv_xori (rv_slt y x) (imm12_const 1)))
+(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedLessThanOrEqual) x y))
+  (rv_xori (rv_sltu y x) (imm12_const 1)))
+;; x >= y => !(x < y)
+(rule (lower_int_compare (int_compare_decompose (IntCC.SignedGreaterThanOrEqual) x y))
+  (rv_xori (rv_slt x y) (imm12_const 1)))
+(rule (lower_int_compare (int_compare_decompose (IntCC.UnsignedGreaterThanOrEqual) x y))
+  (rv_xori (rv_sltu x y) (imm12_const 1)))
+
+;; 128-bit comparisons.
+;;
+;; Currently only `==`, `!=`, and `<` are implemented, and everything else
+;; delegates to one of those.
+
+(rule 20 (lower (icmp cc x @ (value_type $I128) y))
+  (lower_icmp_i128 cc x y))
+
+(decl lower_icmp_i128 (IntCC ValueRegs ValueRegs) XReg)
+(rule 0 (lower_icmp_i128 (IntCC.Equal) x y)
+  (let ((lo XReg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
+        (hi XReg (rv_xor (value_regs_get x 1) (value_regs_get y 1))))
+    (rv_seqz (rv_or lo hi))))
+(rule 0 (lower_icmp_i128 (IntCC.NotEqual) x y)
+  (let ((lo XReg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
+        (hi XReg (rv_xor (value_regs_get x 1) (value_regs_get y 1))))
+    (rv_snez (rv_or lo hi))))
+
+;; swap args for `>` to use `<` instead
+(rule 0 (lower_icmp_i128 cc @ (IntCC.SignedGreaterThan) x y)
+  (lower_icmp_i128 (intcc_swap_args cc) y x))
+(rule 0 (lower_icmp_i128 cc @ (IntCC.UnsignedGreaterThan) x y)
+  (lower_icmp_i128 (intcc_swap_args cc) y x))
+
+;; complement `=`-related conditions to get ones that don't use `=`.
+(rule 0 (lower_icmp_i128 cc @ (IntCC.SignedLessThanOrEqual) x y)
+  (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1)))
+(rule 0 (lower_icmp_i128 cc @ (IntCC.SignedGreaterThanOrEqual) x y)
+  (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1)))
+(rule 0 (lower_icmp_i128 cc @ (IntCC.UnsignedLessThanOrEqual) x y)
+  (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1)))
+(rule 0 (lower_icmp_i128 cc @ (IntCC.UnsignedGreaterThanOrEqual) x y)
+  (rv_xori (lower_icmp_i128 (intcc_complement cc) x y) (imm12_const 1)))
+
+;; Compare both the bottom and upper halves of the 128-bit values. If
+;; the top half is equal use the bottom comparison, otherwise use the upper
+;; comparison. Note that the lower comparison is always unsigned since if it's
+;; used the top halves are all zeros and the semantic values are positive.
+(rule 1 (lower_icmp_i128 cc x y)
+  (if-let (IntCC.UnsignedLessThan) (intcc_unsigned cc))
+  (let ((x_lo Reg (value_regs_get x 0))
+        (x_hi Reg (value_regs_get x 1))
+        (y_lo Reg (value_regs_get y 0))
+        (y_hi Reg (value_regs_get y 1))
+        (top_cmp XReg (lower_int_compare (int_compare cc x_hi y_hi)))
+        (bottom_cmp XReg (rv_sltu x_lo y_lo)))
+    (gen_select_xreg (cmp_eqz (rv_xor x_hi y_hi)) bottom_cmp top_cmp)))
+
+;; vector icmp comparisons
+
+(rule 30 (lower (icmp cc x @ (value_type (ty_supported_vec ty)) y))
+  (gen_expand_mask ty (gen_icmp_mask ty cc x y)))
+
+;;;;;  Rules for `fcmp`;;;;;;;;;
+(rule 0 (lower (fcmp cc x @ (value_type (ty_supported_float ty)) y))
+  (lower_float_compare (fcmp_to_float_compare cc ty x y)))
+
+(decl lower_float_compare (FloatCompare) XReg)
+(rule (lower_float_compare (FloatCompare.One r)) r)
+(rule (lower_float_compare (FloatCompare.Zero r)) (rv_seqz r))
+
+(rule 1 (lower (fcmp cc x @ (value_type (ty_supported_vec ty)) y))
+  (gen_expand_mask ty (gen_fcmp_mask ty cc x y)))
+
+;;;;;  Rules for `func_addr`;;;;;;;;;
+(rule
+  (lower (func_addr (func_ref_data _ name _)))
+  (load_ext_name name 0))
+
+;;;;;  Rules for `fcvt_to_uint`;;;;;;;;;
+
+;; RISC-V float-to-integer conversion does not trap, but Cranelift semantics are
+;; to trap. This manually performs checks for NaN and out-of-bounds values and
+;; traps in such cases.
+;;
+;; TODO: could this perhaps be more optimal through inspection of the `fcsr`?
+;; Unsure whether that needs to be preserved across function calls and/or would
+;; cause other problems. Also unsure whether it's actually more performant.
+(rule (lower (has_type ity (fcvt_to_uint v @ (value_type fty))))
+  (let ((_ InstOutput (gen_trapz (rv_feq fty v v) (TrapCode.BadConversionToInteger)))
+        (min FReg (imm fty (fcvt_umin_bound fty $false)))
+        (_ InstOutput (gen_trapnz (rv_fle fty v min) (TrapCode.IntegerOverflow)))
+        (max FReg (imm fty (fcvt_umax_bound fty ity $false)))
+        (_ InstOutput (gen_trapnz (rv_fge fty v max) (TrapCode.IntegerOverflow))))
+    (lower_inbounds_fcvt_to_uint ity fty v)))
+
+(decl lower_inbounds_fcvt_to_uint (Type Type FReg) XReg)
+(rule 0 (lower_inbounds_fcvt_to_uint (fits_in_32 _) fty v)
+  (rv_fcvtwu fty (FRM.RTZ) v))
+(rule 1 (lower_inbounds_fcvt_to_uint $I64 fty v)
+  (rv_fcvtlu fty (FRM.RTZ) v))
+
+;;;;;  Rules for `fcvt_to_sint`;;;;;;;;;
+
+;; NB: see above with `fcvt_to_uint` as this is similar
+(rule (lower (has_type ity (fcvt_to_sint v @ (value_type fty))))
+  (let ((_ InstOutput (gen_trapz (rv_feq fty v v) (TrapCode.BadConversionToInteger)))
+        (min FReg (imm fty (fcvt_smin_bound fty ity $false)))
+        (_ InstOutput (gen_trapnz (rv_fle fty v min) (TrapCode.IntegerOverflow)))
+        (max FReg (imm fty (fcvt_smax_bound fty ity $false)))
+        (_ InstOutput (gen_trapnz (rv_fge fty v max) (TrapCode.IntegerOverflow))))
+    (lower_inbounds_fcvt_to_sint ity fty v)))
+
+(decl lower_inbounds_fcvt_to_sint (Type Type FReg) XReg)
+(rule 0 (lower_inbounds_fcvt_to_sint (fits_in_32 _) fty v)
+  (rv_fcvtw fty (FRM.RTZ) v))
+(rule 1 (lower_inbounds_fcvt_to_sint $I64 fty v)
+  (rv_fcvtl fty (FRM.RTZ) v))
+
+;;;;;  Rules for `fcvt_to_sint_sat`;;;;;;;;;
+
+(rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_supported_float from)))))
+  (handle_fcvt_to_int_nan from v (lower_fcvt_to_sint_sat from to v)))
+
+;; Lowers to a `rv_fcvt*` instruction but handles 8/16-bit cases where the
+;; float is clamped before the conversion.
+(decl lower_fcvt_to_sint_sat (Type Type FReg) XReg)
+(rule 0 (lower_fcvt_to_sint_sat ty (fits_in_16 out_ty) v)
+  (let ((max FReg (imm ty (fcvt_smax_bound ty out_ty $true)))
+        (min FReg (imm ty (fcvt_smin_bound ty out_ty $true)))
+        (clamped FReg (rv_fmin ty max (rv_fmax ty min v))))
+    (rv_fcvtw ty (FRM.RTZ) clamped)))
+(rule 1 (lower_fcvt_to_sint_sat ty $I32 v) (rv_fcvtw ty (FRM.RTZ) v))
+(rule 1 (lower_fcvt_to_sint_sat ty $I64 v) (rv_fcvtl ty (FRM.RTZ) v))
+
+(decl fcvt_smax_bound (Type Type bool) u64)
+(extern constructor fcvt_smax_bound fcvt_smax_bound)
+(decl fcvt_smin_bound (Type Type bool) u64)
+(extern constructor fcvt_smin_bound fcvt_smin_bound)
+
+;; RISC-V float-to-int conversions generate the same output for NaN and +Inf,
+;; but Cranelift semantics are to produce 0 for NaN instead. This helper
+;; translates these semantics by taking the float being converted (with the type
+;; specified) and the native RISC-V output as an `XReg`. The returned `XReg`
+;; will be zeroed out if the float is NaN.
+;;
+;; This is done by comparing the float to itself, generating 0 if it's NaN. This
+;; bit is then negated to become either all-ones or all-zeros which is then
+;; and-ed against the native output. That'll produce all zeros if the input is
+;; NaN or the native output otherwise.
+(decl handle_fcvt_to_int_nan (Type FReg XReg) XReg)
+(rule (handle_fcvt_to_int_nan ty freg xreg)
+  (let ((is_not_nan XReg (rv_feq ty freg freg))
+        (not_nan_mask XReg (rv_neg is_not_nan)))
+    (rv_and xreg not_nan_mask)))
+
+(rule 1 (lower (has_type (ty_supported_vec _) (fcvt_to_sint_sat v @ (value_type from_ty))))
+  (if-let zero (i8_to_imm5 0))
+  (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
+        (cvt VReg (rv_vfcvt_rtz_x_f_v v (unmasked) from_ty)))
+    (rv_vmerge_vim cvt zero is_nan from_ty)))
+
+;;;;;  Rules for `fcvt_to_uint_sat`;;;;;;;;;
+
+(rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_supported_float from)))))
+  (handle_fcvt_to_int_nan from v (lower_fcvt_to_uint_sat from to v)))
+
+;; Lowers to a `rv_fcvt*` instruction but handles 8/16-bit cases where the
+;; float is clamped before the conversion.
+(decl lower_fcvt_to_uint_sat (Type Type FReg) XReg)
+(rule 0 (lower_fcvt_to_uint_sat ty (fits_in_16 out_ty) v)
+  (let ((max FReg (imm ty (fcvt_umax_bound ty out_ty $true)))
+        (min FReg (rv_fmvdx (zero_reg)))
+        (clamped FReg (rv_fmin ty max (rv_fmax ty min v))))
+    (rv_fcvtwu ty (FRM.RTZ) clamped)))
+(rule 1 (lower_fcvt_to_uint_sat ty $I32 v) (rv_fcvtwu ty (FRM.RTZ) v))
+(rule 1 (lower_fcvt_to_uint_sat ty $I64 v) (rv_fcvtlu ty (FRM.RTZ) v))
+
+(decl fcvt_umax_bound (Type Type bool) u64)
+(extern constructor fcvt_umax_bound fcvt_umax_bound)
+(decl fcvt_umin_bound (Type bool) u64)
+(extern constructor fcvt_umin_bound fcvt_umin_bound)
+
+(rule 1 (lower (has_type (ty_supported_vec _) (fcvt_to_uint_sat v @ (value_type from_ty))))
+  (if-let zero (i8_to_imm5 0))
+  (let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
+        (cvt VReg (rv_vfcvt_rtz_xu_f_v v (unmasked) from_ty)))
+    (rv_vmerge_vim cvt zero is_nan from_ty)))
+
+;;;;;  Rules for `fcvt_from_sint`;;;;;;;;;
+(rule 0 (lower (has_type $F32 (fcvt_from_sint v @ (value_type (fits_in_16 ty)))))
+  (rv_fcvtsl (FRM.RNE) (sext v)))
+
+(rule 1 (lower (has_type $F32 (fcvt_from_sint v @ (value_type $I32))))
+  (rv_fcvtsw (FRM.RNE) v))
+
+(rule 1 (lower (has_type $F32 (fcvt_from_sint v @ (value_type $I64))))
+  (rv_fcvtsl (FRM.RNE) v))
+
+(rule 0 (lower (has_type $F64 (fcvt_from_sint v @ (value_type (fits_in_16 ty)))))
+  (rv_fcvtdl (FRM.RNE) (sext v)))
+
+(rule 1 (lower (has_type $F64 (fcvt_from_sint v @ (value_type $I32))))
+  (rv_fcvtdw v))
+
+(rule 1 (lower (has_type $F64 (fcvt_from_sint v @ (value_type $I64))))
+  (rv_fcvtdl (FRM.RNE) v))
+
+(rule 2 (lower (has_type (ty_supported_vec _) (fcvt_from_sint v @ (value_type from_ty))))
+  (rv_vfcvt_f_x_v v (unmasked) from_ty))
+
+;;;;;  Rules for `fcvt_from_uint`;;;;;;;;;
+(rule 0 (lower (has_type $F32 (fcvt_from_uint v @ (value_type (fits_in_16 ty)))))
+  (rv_fcvtslu (FRM.RNE) (zext v)))
+
+(rule 1 (lower (has_type $F32 (fcvt_from_uint v @ (value_type $I32))))
+  (rv_fcvtswu (FRM.RNE) v))
+
+(rule 1 (lower (has_type $F32 (fcvt_from_uint v @ (value_type $I64))))
+  (rv_fcvtslu (FRM.RNE) v))
+
+(rule 0 (lower (has_type $F64 (fcvt_from_uint v @ (value_type (fits_in_16 ty)))))
+  (rv_fcvtdlu (FRM.RNE) (zext v)))
+
+(rule 1 (lower (has_type $F64 (fcvt_from_uint v @ (value_type $I32))))
+  (rv_fcvtdwu v))
+
+(rule 1 (lower (has_type $F64 (fcvt_from_uint v @ (value_type $I64))))
+  (rv_fcvtdlu (FRM.RNE) v))
+
+(rule 2 (lower (has_type (ty_supported_vec _) (fcvt_from_uint v @ (value_type from_ty))))
+  (rv_vfcvt_f_xu_v v (unmasked) from_ty))
+
+;;;;;  Rules for `symbol_value`;;;;;;;;;
+(rule
+   (lower (symbol_value (symbol_value_data name _ offset)))
+   (load_ext_name name offset))
+
+;;;;;  Rules for `tls_value` ;;;;;;;;;;;;;;
+
+(rule (lower (has_type (tls_model (TlsModel.ElfGd)) (tls_value (symbol_value_data name _ _))))
+      (elf_tls_get_addr name))
+
+;;;;;  Rules for `bitcast`;;;;;;;;;
+
+;; These rules should probably be handled in `gen_bitcast`, but it's convenient to have that return
+;; a single register, instead of a `ValueRegs`
+(rule 3 (lower (has_type $I128 (bitcast _ v @ (value_type (ty_supported_vec _)))))
+    (value_regs
+      (gen_extractlane $I64X2 v 0)
+      (gen_extractlane $I64X2 v 1)))
+
+;; Move the high half into a vector register, and then use vslide1up to move it up and
+;; insert the lower half in one instruction.
+(rule 2 (lower (has_type (ty_supported_vec _) (bitcast _ v @ (value_type $I128))))
+    (let ((lo XReg (value_regs_get v 0))
+          (hi XReg (value_regs_get v 1))
+          (vstate VState (vstate_from_type $I64X2))
+          (vec VReg (rv_vmv_sx hi vstate)))
+      (rv_vslide1up_vx vec vec lo (unmasked) vstate)))
+
+;; `gen_bitcast` below only works with single register values, so handle I128
+;; specially here.
+(rule 1 (lower (has_type $I128 (bitcast _ v @ (value_type $I128))))
+   v)
+
+(rule 0 (lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
+   (gen_bitcast v in_ty out_ty))
+
+;;;;;  Rules for `ceil`;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (ceil x)))
+  (gen_float_round (FRM.RUP) x ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (ceil x)))
+  (gen_vec_round x (FRM.RUP) ty))
+
+;;;;;  Rules for `floor`;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (floor x)))
+  (gen_float_round (FRM.RDN) x ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (floor x)))
+  (gen_vec_round x (FRM.RDN) ty))
+
+;;;;;  Rules for `trunc`;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (trunc x)))
+  (gen_float_round (FRM.RTZ) x ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (trunc x)))
+  (gen_vec_round x (FRM.RTZ) ty))
+
+;;;;;  Rules for `nearest`;;;;;;;;;
+(rule 0 (lower (has_type (ty_supported_float ty) (nearest x)))
+  (gen_float_round (FRM.RNE) x ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (nearest x)))
+  (gen_vec_round x (FRM.RNE) ty))
+
+
+;;;;;  Rules for `select_spectre_guard`;;;;;;;;;
+
+;; SelectSpectreGuard is equivalent to Select, but we should not use a branch based
+;; lowering for it. Instead we use a conditional move based lowering.
+;;
+;; We don't have cmov's in RISC-V either, but we can emulate those using bitwise
+;; operations, which is what we do below.
+
+;; Base case: use `gen_bmask` to generate a 0 mask or -1 mask from the value of
+;; `cmp`. This is then used with some bit twiddling to produce the final result.
+(rule 0 (lower (has_type (fits_in_64 _) (select_spectre_guard cmp x y)))
+  (let ((mask XReg (gen_bmask cmp)))
+    (rv_or (rv_and mask x) (rv_andn y mask))))
+(rule 1 (lower (has_type $I128 (select_spectre_guard cmp x y)))
+  (let ((mask XReg (gen_bmask cmp)))
+    (value_regs
+      (rv_or (rv_and mask (value_regs_get x 0)) (rv_andn (value_regs_get y 0) mask))
+      (rv_or (rv_and mask (value_regs_get x 1)) (rv_andn (value_regs_get y 1) mask)))))
+
+;; Special case when an argument is the constant zero as some ands and ors
+;; can be folded away.
+(rule 2 (lower (has_type (fits_in_64 _) (select_spectre_guard cmp (i64_from_iconst 0) y)))
+  (rv_andn y (gen_bmask cmp)))
+(rule 3 (lower (has_type (fits_in_64 _) (select_spectre_guard cmp x (i64_from_iconst 0))))
+  (rv_and x (gen_bmask cmp)))
+
+;;;;;  Rules for `bmask`;;;;;;;;;
+(rule
+  (lower (has_type oty (bmask x)))
+  (lower_bmask x oty))
+
+;; N.B.: the Ret itself is generated by the ABI.
+(rule (lower (return args))
+      (lower_return args))
+
+;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;;
+
+(rule (lower (get_frame_pointer))
+  (gen_mov_from_preg (fp_reg)))
+
+(rule (lower (get_stack_pointer))
+  (gen_mov_from_preg (sp_reg)))
+
+(rule (lower (get_return_address))
+  (load_ra))
+
+;;; Rules for `iabs` ;;;;;;;;;;;;;
+
+;; I64 and lower
+;; Generate the following code:
+;;   sext.{b,h,w} a0, a0
+;;   neg a1, a0
+;;   max a0, a0, a1
+(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x)))
+  (let ((extended XReg (sext x))
+        (negated XReg (rv_neg extended)))
+    (gen_select_xreg (cmp_gt extended negated) extended negated)))
+
+;; For vectors we generate the same code, but with vector instructions
+;; we can skip the sign extension, since the vector unit will only process
+;; Element Sized chunks.
+(rule 1 (lower (has_type (ty_supported_vec ty) (iabs x)))
+  (let ((negated VReg (rv_vneg_v x (unmasked) ty)))
+    (rv_vmax_vv x negated (unmasked) ty)))
+
+;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (call (func_ref_data sig_ref extname dist) inputs))
+  (gen_call sig_ref extname dist inputs))
+
+(rule (lower (call_indirect sig_ref val inputs))
+  (gen_call_indirect sig_ref val inputs))
+
+;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (return_call (func_ref_data sig_ref extname dist) args))
+      (gen_return_call sig_ref extname dist args))
+
+(rule (lower (return_call_indirect sig_ref callee args))
+      (gen_return_call_indirect sig_ref callee args))
+
+
+;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (extractlane x @ (value_type ty) (u8_from_uimm8 idx)))
+  (gen_extractlane ty x idx))
+
+;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; We can insert a lane by using a masked splat from an X register.
+;; Build a mask that is only enabled in the lane we want to insert.
+;; Then use a masked splat (vmerge) to insert the value.
+(rule 0 (lower (insertlane vec @ (value_type (ty_supported_vec ty))
+                           val @ (value_type (ty_int _))
+                           (u8_from_uimm8 lane)))
+  (let ((mask VReg (gen_vec_mask (u64_shl 1 lane))))
+    (rv_vmerge_vxm vec val mask ty)))
+
+;; Similar to above, but using the float variants of the instructions.
+(rule 1 (lower (insertlane vec @ (value_type (ty_supported_vec ty))
+                           val @ (value_type (ty_supported_float _))
+                           (u8_from_uimm8 lane)))
+  (let ((mask VReg (gen_vec_mask (u64_shl 1 lane))))
+    (rv_vfmerge_vfm vec val mask ty)))
+
+;; If we are inserting from an Imm5 const we can use the immediate
+;; variant of vmerge.
+(rule 2 (lower (insertlane vec @ (value_type (ty_supported_vec ty))
+                           (i64_from_iconst (imm5_from_i64 imm))
+                           (u8_from_uimm8 lane)))
+  (let ((mask VReg (gen_vec_mask (u64_shl 1 lane))))
+    (rv_vmerge_vim vec imm mask ty)))
+
+;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type ty (splat n @ (value_type (ty_supported_float _)))))
+  (rv_vfmv_vf n ty))
+
+(rule 1 (lower (has_type ty (splat n @ (value_type (ty_int_ref_scalar_64 _)))))
+  (rv_vmv_vx n ty))
+
+(rule 2 (lower (has_type ty (splat (iconst (u64_from_imm64 (imm5_from_u64 imm))))))
+  (rv_vmv_vi imm ty))
+
+;; TODO: We can splat out more patterns by using for example a vmv.v.i i8x16 for
+;; a i64x2 const with a compatible bit pattern. The AArch64 Backend does something
+;; similar in its splat rules.
+;; TODO: Look through bitcasts when splatting out registers. We can use
+;; `vmv.v.x` in a `(splat.f32x4 (bitcast.f32 val))`. And vice versa for integers.
+
+;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (uadd_sat x y)))
+  (rv_vsaddu_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (uadd_sat x (splat y))))
+  (rv_vsaddu_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (uadd_sat (splat x) y)))
+  (rv_vsaddu_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (uadd_sat x y)))
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vsaddu_vi x y_imm (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (uadd_sat x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vsaddu_vi y x_imm (unmasked) ty))
+
+;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (sadd_sat x y)))
+  (rv_vsadd_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (sadd_sat x (splat y))))
+  (rv_vsadd_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (sadd_sat (splat x) y)))
+  (rv_vsadd_vx y x (unmasked) ty))
+
+(rule 3 (lower (has_type (ty_supported_vec ty) (sadd_sat x y)))
+  (if-let y_imm (replicated_imm5 y))
+  (rv_vsadd_vi x y_imm (unmasked) ty))
+
+(rule 4 (lower (has_type (ty_supported_vec ty) (sadd_sat x y)))
+  (if-let x_imm (replicated_imm5 x))
+  (rv_vsadd_vi y x_imm (unmasked) ty))
+
+;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (usub_sat x y)))
+  (rv_vssubu_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (usub_sat x (splat y))))
+  (rv_vssubu_vx x y (unmasked) ty))
+
+;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (ssub_sat x y)))
+  (rv_vssub_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (ssub_sat x (splat y))))
+  (rv_vssub_vx x y (unmasked) ty))
+
+;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Here we do a Vector Reduce operation. Get the unsigned minimum value of any
+;; lane in the vector. The fixed input to the reduce operation is a 1.
+;; This way, if any lane is 0, the result will be 0. Otherwise, the result will
+;; be a 1.
+;; The reduce operation leaves the result in the lowest lane, we then move it
+;; into the destination X register.
+(rule (lower (vall_true x @ (value_type (ty_supported_vec ty))))
+  (if-let one (i8_to_imm5 1))
+  ;; We don't need to broadcast the immediate into all lanes, only into lane 0.
+  ;; I did it this way since it uses one less instruction than with a vmv.s.x.
+  (let ((fixed VReg (rv_vmv_vi one ty))
+        (min VReg (rv_vredminu_vs x fixed (unmasked) ty)))
+    (rv_vmv_xs min ty)))
+
+
+;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Here we do a Vector Reduce operation. Get the unsigned maximum value of the
+;; input vector register. Move the max to an X register, and do a `snez` on it
+;; to ensure its either 1 or 0.
+(rule (lower (vany_true x @ (value_type (ty_supported_vec ty))))
+  (let ((max VReg (rv_vredmaxu_vs x x (unmasked) ty))
+        (x_max XReg (rv_vmv_xs max ty)))
+    (rv_snez x_max)))
+
+
+;;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; To check if the MSB of a lane is set, we do a `vmslt` with zero, this sets
+;; the mask bit to 1 if the value is negative (MSB 1) and 0 if not. We can then
+;; just move that mask to an X Register.
+;;
+;; We must ensure that the move to the X register has a SEW with enough bits
+;; to hold the full mask. Additionally, in some cases (e.g. i64x2) we are going
+;; to read some tail bits. These are undefined, so we need to further mask them
+;; off.
+(rule (lower (vhigh_bits x @ (value_type (ty_supported_vec ty))))
+  (let ((mask VReg (rv_vmslt_vx x (zero_reg) (unmasked) ty))
+        ;; Here we only need I64X1, but emit an AVL of 2 since it
+        ;; saves one vector state change in the case of I64X2.
+        ;;
+        ;; TODO: For types that have more lanes than element bits, we can
+        ;; use the original type as a VState and avoid a state change.
+        (x_mask XReg (rv_vmv_xs mask (vstate_from_type $I64X2))))
+    (gen_andi x_mask (ty_lane_mask ty))))
+
+;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (swizzle x y)))
+  (rv_vrgather_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (swizzle x (splat y))))
+  (rv_vrgather_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (swizzle x y)))
+  (if-let y_imm (replicated_uimm5 y))
+  (rv_vrgather_vi x y_imm (unmasked) ty))
+
+;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Use a vrgather to load all 0-15 lanes from x. And then modify the mask to load all
+;; 16-31 lanes from y. Finally, use a vor to combine the two vectors.
+;;
+;; vrgather will insert a 0 for lanes that are out of bounds, so we can let it load
+;; negative and out of bounds indexes.
+(rule (lower (has_type (ty_supported_vec ty @ $I8X16) (shuffle x y (vconst_from_immediate mask))))
+  (if-let neg16 (i8_to_imm5 -16))
+  (let ((x_mask VReg (gen_constant ty mask))
+        (x_lanes VReg (rv_vrgather_vv x x_mask (unmasked) ty))
+        (y_mask VReg (rv_vadd_vi x_mask neg16 (unmasked) ty))
+        (y_lanes VReg (rv_vrgather_vv y y_mask (unmasked) ty)))
+    (rv_vor_vv x_lanes y_lanes (unmasked) ty)))
+
+;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Slide down half the vector, and do a signed extension.
+(rule 0 (lower (has_type (ty_supported_vec out_ty) (swiden_high x @ (value_type in_ty))))
+  (rv_vsext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_supported_vec out_ty) (swiden_high (swiden_high x @ (value_type in_ty)))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vsext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_supported_vec out_ty) (swiden_high (swiden_high (swiden_high x @ (value_type in_ty))))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vsext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Slide down half the vector, and do a zero extension.
+(rule 0 (lower (has_type (ty_supported_vec out_ty) (uwiden_high x @ (value_type in_ty))))
+  (rv_vzext_vf2 (gen_slidedown_half in_ty x) (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_supported_vec out_ty) (uwiden_high (uwiden_high x @ (value_type in_ty)))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vzext_vf4 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_supported_vec out_ty) (uwiden_high (uwiden_high (uwiden_high x @ (value_type in_ty))))))
+  (if-let (uimm5_from_u64 amt) (u64_sub (ty_lane_count in_ty) (ty_lane_count out_ty)))
+  (rv_vzext_vf8 (rv_vslidedown_vi x amt (unmasked) in_ty) (unmasked) out_ty))
+
+;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec out_ty) (swiden_low x)))
+  (rv_vsext_vf2 x (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_supported_vec out_ty) (swiden_low (swiden_low x))))
+  (rv_vsext_vf4 x (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_supported_vec out_ty) (swiden_low (swiden_low (swiden_low x)))))
+  (rv_vsext_vf8 x (unmasked) out_ty))
+
+;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec out_ty) (uwiden_low x)))
+  (rv_vzext_vf2 x (unmasked) out_ty))
+
+(rule 1 (lower (has_type (ty_supported_vec out_ty) (uwiden_low (uwiden_low x))))
+  (rv_vzext_vf4 x (unmasked) out_ty))
+
+(rule 2 (lower (has_type (ty_supported_vec out_ty) (uwiden_low (uwiden_low (uwiden_low x)))))
+  (rv_vzext_vf8 x (unmasked) out_ty))
+
+;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; We don't have a dedicated instruction for this, rearrange the register elements
+;; and use a vadd.
+;;
+;; We do this by building two masks, one for the even elements and one for the odd
+;; elements. Using vcompress we can extract the elements and group them together.
+;;
+;; This is likely not the optimal way of doing this. LLVM does this using a bunch
+;; of vrgathers (See: https://godbolt.org/z/jq8Wj8WG4), that doesn't seem to be
+;; too much better than this.
+;;
+;; However V8 does something better. They use 2 vcompresses using LMUL2, that means
+;; that they can do the whole thing in 3 instructions (2 vcompress + vadd). We don't
+;; support LMUL > 1, so we can't do that.
+(rule (lower (has_type (ty_supported_vec ty) (iadd_pairwise x y)))
+  (if-let half_size (u64_to_uimm5 (u64_udiv (ty_lane_count ty) 2)))
+  (let ((odd_mask  VReg (gen_vec_mask 0x5555555555555555))
+        (lhs_lo VReg (rv_vcompress_vm x odd_mask ty))
+        (lhs_hi VReg (rv_vcompress_vm y odd_mask ty))
+        (lhs VReg (rv_vslideup_vvi lhs_lo lhs_hi half_size (unmasked) ty))
+
+        (even_mask VReg (gen_vec_mask 0xAAAAAAAAAAAAAAAA))
+        (rhs_lo VReg (rv_vcompress_vm x even_mask ty))
+        (rhs_hi VReg (rv_vcompress_vm y even_mask ty))
+        (rhs VReg (rv_vslideup_vvi rhs_lo rhs_hi half_size (unmasked) ty)))
+    (rv_vadd_vv lhs rhs (unmasked) ty)))
+
+;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `avg_round` computes the unsigned average with rounding: a := (x + y + 1) // 2
+;;
+;; See Section "2–5 Average of Two Integers" of the Hacker's Delight book
+;;
+;; The floor average of two integers without overflow can be computed as:
+;;     t = (x & y) + ((x ^ y) >> 1)
+;;
+;; The right shift should be a logical shift if the integers are unsigned.
+;;
+;; We are however interested in the ceiling average (x + y + 1). For that
+;; we use a special rounding mode in the right shift instruction.
+;;
+;; For the right shift instruction we use `vssrl` which is a Scaling Shift
+;; Right Logical instruction using the `vxrm` fixed-point rounding mode. The
+;; default rounding mode is `rnu` (round-to-nearest-up (add +0.5 LSB)).
+;; Which is coincidentally the rounding mode we want for `avg_round`.
+(rule (lower (has_type (ty_supported_vec ty) (avg_round x y)))
+  (if-let one (u64_to_uimm5 1))
+  (let ((lhs VReg (rv_vand_vv x y (unmasked) ty))
+        (xor  VReg (rv_vxor_vv x y (unmasked) ty))
+        (rhs VReg (rv_vssrl_vi xor one (unmasked) ty)))
+    (rv_vadd_vv lhs rhs (unmasked) ty)))
+
+;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (scalar_to_vector x)))
+  (if (ty_vector_float ty))
+  (let ((zero VReg (rv_vmv_vx (zero_reg) ty))
+        (elem VReg (rv_vfmv_sf x ty))
+        (mask VReg (gen_vec_mask 1)))
+    (rv_vmerge_vvm zero elem mask ty)))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (scalar_to_vector x)))
+  (if (ty_vector_not_float ty))
+  (let ((zero VReg (rv_vmv_vx (zero_reg) ty))
+        (mask VReg (gen_vec_mask 1)))
+    (rv_vmerge_vxm zero x mask ty)))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (scalar_to_vector (imm5_from_value x))))
+  (let ((zero VReg (rv_vmv_vx (zero_reg) ty))
+        (mask VReg (gen_vec_mask 1)))
+    (rv_vmerge_vim zero x mask ty)))
+
+;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 0 (lower (has_type (ty_supported_vec ty) (sqmul_round_sat x y)))
+  (rv_vsmul_vv x y (unmasked) ty))
+
+(rule 1 (lower (has_type (ty_supported_vec ty) (sqmul_round_sat x (splat y))))
+  (rv_vsmul_vx x y (unmasked) ty))
+
+(rule 2 (lower (has_type (ty_supported_vec ty) (sqmul_round_sat (splat x) y)))
+  (rv_vsmul_vx y x (unmasked) ty))
+
+;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_supported_vec out_ty) (snarrow x @ (value_type in_ty) y)))
+  (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2)))
+  (if-let zero (u64_to_uimm5 0))
+  (let ((x_clip VReg (rv_vnclip_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
+        (y_clip VReg (rv_vnclip_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
+
+;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type (ty_supported_vec out_ty) (uunarrow x @ (value_type in_ty) y)))
+  (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2)))
+  (if-let zero (u64_to_uimm5 0))
+  (let ((x_clip VReg (rv_vnclipu_wi x zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
+        (y_clip VReg (rv_vnclipu_wi y zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
+
+;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; We don't have a instruction that saturates a signed source into an unsigned destination.
+;; To correct for this we just remove negative values using `vmax` and then use the normal
+;; unsigned to unsigned narrowing instruction.
+
+(rule (lower (has_type (ty_supported_vec out_ty) (unarrow x @ (value_type in_ty) y)))
+  (if-let lane_diff (u64_to_uimm5 (u64_udiv (ty_lane_count out_ty) 2)))
+  (if-let zero (u64_to_uimm5 0))
+  (let ((x_pos VReg (rv_vmax_vx x (zero_reg) (unmasked) in_ty))
+        (y_pos VReg (rv_vmax_vx y (zero_reg) (unmasked) in_ty))
+        (x_clip VReg (rv_vnclipu_wi x_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty))))
+        (y_clip VReg (rv_vnclipu_wi y_pos zero (unmasked) (vstate_mf2 (ty_half_lanes out_ty)))))
+    (rv_vslideup_vvi x_clip y_clip lane_diff (unmasked) out_ty)))
diff --git a/hbcb/src/lower.rs b/hbcb/src/lower.rs
new file mode 100644
index 00000000..6e12183e
--- /dev/null
+++ b/hbcb/src/lower.rs
@@ -0,0 +1,36 @@
+//! Lowering rules for Riscv64.
+use {
+    crate::{inst::*, Riscv64Backend},
+    cranelift_codegen::{
+        ir::Inst as IRInst,
+        machinst::{lower::*, *},
+    },
+};
+pub mod isle;
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for Riscv64Backend {
+    type FactFlowState = ();
+    type MInst = Inst;
+
+    fn lower(&self, ctx: &mut Lower<Inst>, ir_inst: IRInst) -> Option<InstOutput> {
+        isle::lower(ctx, self, ir_inst)
+    }
+
+    fn lower_branch(
+        &self,
+        ctx: &mut Lower<Inst>,
+        ir_inst: IRInst,
+        targets: &[MachLabel],
+    ) -> Option<()> {
+        isle::lower_branch(ctx, self, ir_inst, targets)
+    }
+
+    fn maybe_pinned_reg(&self) -> Option<Reg> {
+        // pinned register is a register that you want put anything in it.
+        // right now riscv64 not support this feature.
+        None
+    }
+}
diff --git a/hbcb/src/lower/isle.rs b/hbcb/src/lower/isle.rs
new file mode 100644
index 00000000..80751566
--- /dev/null
+++ b/hbcb/src/lower/isle.rs
@@ -0,0 +1,730 @@
+//! ISLE integration glue code for riscv64 lowering.
+
+// Pull in the ISLE generated code.
+#[allow(unused)]
+pub mod generated_code;
+// Types that the generated ISLE code uses via `use super::*`.
+use {
+    self::generated_code::{FpuOPWidth, VecAluOpRR, VecLmul},
+    crate::{inst::*, Riscv64Backend},
+    cranelift_codegen::{
+        abi::Riscv64ABICallSite,
+        ir::{
+            immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData,
+            MemFlags, Opcode, TrapCode, Value, ValueList,
+        },
+        isa::{self},
+        lower::args::{FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg},
+        machinst::{
+            isle::*, ArgPair, CallInfo, InstOutput, IsTailCall, MachInst, Reg, VCodeConstant,
+            VCodeConstantData,
+        },
+    },
+    generated_code::MInst,
+    regalloc2::PReg,
+    std::{boxed::Box, vec::Vec},
+};
+
+type BoxCallInfo = Box<CallInfo<ExternalName>>;
+type BoxCallIndInfo = Box<CallInfo<Reg>>;
+type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
+type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
+type BoxExternalName = Box<ExternalName>;
+type VecMachLabel = Vec<MachLabel>;
+type VecArgPair = Vec<ArgPair>;
+
+pub(crate) struct RV64IsleContext<'a, 'b, I, B>
+where
+    I: VCodeInst,
+    B: LowerBackend,
+{
+    pub lower_ctx: &'a mut Lower<'b, I>,
+    pub backend: &'a B,
+    /// Precalucated value for the minimum vector register size. Will be 0 if
+    /// vectors are not supported.
+    min_vec_reg_size: u64,
+}
+
+impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> {
+    fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self {
+        Self { lower_ctx, backend, min_vec_reg_size: backend.isa_flags.min_vec_reg_size() }
+    }
+}
+
+impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> {
+    isle_lower_prelude_methods!();
+
+    isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite);
+
+    fn gen_return_call(
+        &mut self,
+        callee_sig: SigRef,
+        callee: ExternalName,
+        distance: RelocDistance,
+        args: ValueSlice,
+    ) -> InstOutput {
+        let caller_conv = isa::CallConv::Tail;
+        debug_assert_eq!(
+            self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
+            caller_conv,
+            "Can only do `return_call`s from within a `tail` calling convention function"
+        );
+
+        let call_site = Riscv64ABICallSite::from_func(
+            self.lower_ctx.sigs(),
+            callee_sig,
+            &callee,
+            IsTailCall::Yes,
+            distance,
+            caller_conv,
+            self.backend.flags().clone(),
+        );
+        call_site.emit_return_call(self.lower_ctx, args);
+
+        InstOutput::new()
+    }
+
+    fn gen_return_call_indirect(
+        &mut self,
+        callee_sig: SigRef,
+        callee: Value,
+        args: ValueSlice,
+    ) -> InstOutput {
+        let caller_conv = isa::CallConv::Tail;
+        debug_assert_eq!(
+            self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()),
+            caller_conv,
+            "Can only do `return_call`s from within a `tail` calling convention function"
+        );
+
+        let callee = self.put_in_reg(callee);
+
+        let call_site = Riscv64ABICallSite::from_ptr(
+            self.lower_ctx.sigs(),
+            callee_sig,
+            callee,
+            IsTailCall::Yes,
+            caller_conv,
+            self.backend.flags().clone(),
+        );
+        call_site.emit_return_call(self.lower_ctx, args);
+
+        InstOutput::new()
+    }
+
+    fn fpu_op_width_from_ty(&mut self, ty: Type) -> FpuOPWidth {
+        match ty {
+            F16 => FpuOPWidth::H,
+            F32 => FpuOPWidth::S,
+            F64 => FpuOPWidth::D,
+            F128 => FpuOPWidth::Q,
+            _ => unimplemented!("Unimplemented FPU Op Width: {ty}"),
+        }
+    }
+
+    fn vreg_new(&mut self, r: Reg) -> VReg {
+        VReg::new(r).unwrap()
+    }
+
+    fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg {
+        r.map(|wr| VReg::new(wr).unwrap())
+    }
+
+    fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg {
+        arg0.to_reg()
+    }
+
+    fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg {
+        arg0.map(|vr| vr.to_reg())
+    }
+
+    fn vreg_to_reg(&mut self, arg0: VReg) -> Reg {
+        *arg0
+    }
+
+    fn xreg_new(&mut self, r: Reg) -> XReg {
+        XReg::new(r).unwrap()
+    }
+
+    fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg {
+        r.map(|wr| XReg::new(wr).unwrap())
+    }
+
+    fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg {
+        arg0.to_reg()
+    }
+
+    fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg {
+        arg0.map(|xr| xr.to_reg())
+    }
+
+    fn xreg_to_reg(&mut self, arg0: XReg) -> Reg {
+        *arg0
+    }
+
+    fn freg_new(&mut self, r: Reg) -> FReg {
+        FReg::new(r).unwrap()
+    }
+
+    fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg {
+        r.map(|wr| FReg::new(wr).unwrap())
+    }
+
+    fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg {
+        arg0.to_reg()
+    }
+
+    fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg {
+        arg0.map(|fr| fr.to_reg())
+    }
+
+    fn freg_to_reg(&mut self, arg0: FReg) -> Reg {
+        *arg0
+    }
+
+    fn min_vec_reg_size(&mut self) -> u64 {
+        self.min_vec_reg_size
+    }
+
+    #[inline]
+    fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option<Type> {
+        if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+
+    fn ty_supported(&mut self, ty: Type) -> Option<Type> {
+        let lane_type = ty.lane_type();
+        let supported = match ty {
+            // Scalar integers are always supported
+            ty if ty.is_int() => true,
+            // Floating point types depend on certain extensions
+            F16 => self.backend.isa_flags.has_zfh(),
+            // F32 depends on the F extension
+            F32 => self.backend.isa_flags.has_f(),
+            // F64 depends on the D extension
+            F64 => self.backend.isa_flags.has_d(),
+
+            // The base vector extension supports all integer types, up to 64 bits
+            // as long as they fit in a register
+            ty if self.ty_vec_fits_in_register(ty).is_some()
+                && lane_type.is_int()
+                && lane_type.bits() <= 64 =>
+            {
+                true
+            }
+
+            // If the vector type has floating point lanes then the spec states:
+            //
+            // Vector instructions where any floating-point vector operand’s EEW is not a
+            // supported floating-point type width (which includes when FLEN < SEW) are reserved.
+            //
+            // So we also have to check if we support the scalar version of the type.
+            ty if self.ty_vec_fits_in_register(ty).is_some()
+                && lane_type.is_float()
+                && self.ty_supported(lane_type).is_some()
+                // Additionally the base V spec only supports 32 and 64 bit floating point types.
+                && (lane_type.bits() == 32 || lane_type.bits() == 64) =>
+            {
+                true
+            }
+
+            // Otherwise do not match
+            _ => false,
+        };
+
+        if supported {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+
+    fn ty_supported_float(&mut self, ty: Type) -> Option<Type> {
+        self.ty_supported(ty).filter(|ty| ty.is_float())
+    }
+
+    fn ty_supported_vec(&mut self, ty: Type) -> Option<Type> {
+        self.ty_supported(ty).filter(|ty| ty.is_vector())
+    }
+
+    fn load_ra(&mut self) -> Reg {
+        if self.backend.flags.preserve_frame_pointers() {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::Load {
+                rd: tmp,
+                op: LoadOP::Ld,
+                flags: MemFlags::trusted(),
+                from: AMode::FPOffset(8),
+            });
+            tmp.to_reg()
+        } else {
+            link_reg()
+        }
+    }
+
+    fn label_to_br_target(&mut self, label: MachLabel) -> CondBrTarget {
+        CondBrTarget::Label(label)
+    }
+
+    fn imm12_and(&mut self, imm: Imm12, x: u64) -> Imm12 {
+        Imm12::from_i16(imm.as_i16() & (x as i16))
+    }
+
+    fn fli_constant_from_u64(&mut self, ty: Type, imm: u64) -> Option<FliConstant> {
+        FliConstant::maybe_from_u64(ty, imm)
+    }
+
+    fn fli_constant_from_negated_u64(&mut self, ty: Type, imm: u64) -> Option<FliConstant> {
+        let negated_imm = match ty {
+            F64 => imm ^ 0x8000000000000000,
+            F32 => imm ^ 0x80000000,
+            _ => unimplemented!(),
+        };
+
+        FliConstant::maybe_from_u64(ty, negated_imm)
+    }
+
+    fn i64_generate_imm(&mut self, imm: i64) -> Option<(Imm20, Imm12)> {
+        MInst::generate_imm(imm as u64)
+    }
+
+    fn i64_shift_for_lui(&mut self, imm: i64) -> Option<(u64, Imm12)> {
+        let trailing = imm.trailing_zeros();
+        if trailing < 12 {
+            return None;
+        }
+
+        let shift = Imm12::from_i16(trailing as i16 - 12);
+        let base = (imm as u64) >> trailing;
+        Some((base, shift))
+    }
+
+    fn i64_shift(&mut self, imm: i64) -> Option<(i64, Imm12)> {
+        let trailing = imm.trailing_zeros();
+        // We can do without this condition but in this case there is no need to go further
+        if trailing == 0 {
+            return None;
+        }
+
+        let shift = Imm12::from_i16(trailing as i16);
+        let base = imm >> trailing;
+        Some((base, shift))
+    }
+
+    #[inline]
+    fn emit(&mut self, arg0: &MInst) -> Unit {
+        self.lower_ctx.emit(arg0.clone());
+    }
+
+    #[inline]
+    fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12> {
+        Imm12::maybe_from_u64(arg0)
+    }
+
+    #[inline]
+    fn imm12_from_i64(&mut self, arg0: i64) -> Option<Imm12> {
+        Imm12::maybe_from_i64(arg0)
+    }
+
+    #[inline]
+    fn imm12_is_zero(&mut self, imm: Imm12) -> Option<()> {
+        if imm.as_i16() == 0 {
+            Some(())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn imm20_from_u64(&mut self, arg0: u64) -> Option<Imm20> {
+        Imm20::maybe_from_u64(arg0)
+    }
+
+    #[inline]
+    fn imm20_from_i64(&mut self, arg0: i64) -> Option<Imm20> {
+        Imm20::maybe_from_i64(arg0)
+    }
+
+    #[inline]
+    fn imm20_is_zero(&mut self, imm: Imm20) -> Option<()> {
+        if imm.as_i32() == 0 {
+            Some(())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn imm5_from_u64(&mut self, arg0: u64) -> Option<Imm5> {
+        Imm5::maybe_from_i8(i8::try_from(arg0 as i64).ok()?)
+    }
+
+    #[inline]
+    fn imm5_from_i64(&mut self, arg0: i64) -> Option<Imm5> {
+        Imm5::maybe_from_i8(i8::try_from(arg0).ok()?)
+    }
+
+    #[inline]
+    fn i8_to_imm5(&mut self, arg0: i8) -> Option<Imm5> {
+        Imm5::maybe_from_i8(arg0)
+    }
+
+    #[inline]
+    fn uimm5_bitcast_to_imm5(&mut self, arg0: UImm5) -> Imm5 {
+        Imm5::from_bits(arg0.bits() as u8)
+    }
+
+    #[inline]
+    fn uimm5_from_u8(&mut self, arg0: u8) -> Option<UImm5> {
+        UImm5::maybe_from_u8(arg0)
+    }
+
+    #[inline]
+    fn uimm5_from_u64(&mut self, arg0: u64) -> Option<UImm5> {
+        arg0.try_into().ok().and_then(UImm5::maybe_from_u8)
+    }
+
+    #[inline]
+    fn writable_zero_reg(&mut self) -> WritableReg {
+        writable_zero_reg()
+    }
+
+    #[inline]
+    fn zero_reg(&mut self) -> XReg {
+        XReg::new(zero_reg()).unwrap()
+    }
+
+    fn is_non_zero_reg(&mut self, reg: XReg) -> Option<()> {
+        if reg != self.zero_reg() {
+            Some(())
+        } else {
+            None
+        }
+    }
+
+    fn is_zero_reg(&mut self, reg: XReg) -> Option<()> {
+        if reg == self.zero_reg() {
+            Some(())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn imm_from_bits(&mut self, val: u64) -> Imm12 {
+        Imm12::maybe_from_u64(val).unwrap()
+    }
+
+    #[inline]
+    fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 {
+        Imm12::maybe_from_i64(val).unwrap()
+    }
+
+    fn frm_bits(&mut self, frm: &FRM) -> UImm5 {
+        UImm5::maybe_from_u8(frm.bits()).unwrap()
+    }
+
+    fn u8_as_i32(&mut self, x: u8) -> i32 {
+        x as i32
+    }
+
+    fn imm12_const(&mut self, val: i32) -> Imm12 {
+        if let Some(res) = Imm12::maybe_from_i64(val as i64) {
+            res
+        } else {
+            panic!("Unable to make an Imm12 value from {val}")
+        }
+    }
+
+    fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 {
+        Imm12::maybe_from_i64((val + add) as i64).unwrap()
+    }
+
+    fn imm12_add(&mut self, val: Imm12, add: i32) -> Option<Imm12> {
+        Imm12::maybe_from_i64((i32::from(val.as_i16()) + add).into())
+    }
+
+    //
+    fn gen_shamt(&mut self, ty: Type, shamt: XReg) -> ValueRegs {
+        let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() };
+        let ty_bits = i16::try_from(ty_bits).unwrap();
+        let shamt = {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::AluRRImm12 {
+                alu_op: AluOPRRI::Andi,
+                rd: tmp,
+                rs: shamt.to_reg(),
+                imm12: Imm12::from_i16(ty_bits - 1),
+            });
+            tmp.to_reg()
+        };
+        let len_sub_shamt = {
+            let tmp = self.temp_writable_reg(I64);
+            self.emit(&MInst::load_imm12(tmp, Imm12::from_i16(ty_bits)));
+            let len_sub_shamt = self.temp_writable_reg(I64);
+            self.emit(&MInst::AluRRR {
+                alu_op: AluOPRRR::Sub,
+                rd: len_sub_shamt,
+                rs1: tmp.to_reg(),
+                rs2: shamt,
+            });
+            len_sub_shamt.to_reg()
+        };
+        ValueRegs::two(shamt, len_sub_shamt)
+    }
+
+    fn has_v(&mut self) -> bool {
+        self.backend.isa_flags.has_v()
+    }
+
+    fn has_m(&mut self) -> bool {
+        self.backend.isa_flags.has_m()
+    }
+
+    fn has_zfa(&mut self) -> bool {
+        self.backend.isa_flags.has_zfa()
+    }
+
+    fn has_zfh(&mut self) -> bool {
+        self.backend.isa_flags.has_zfh()
+    }
+
+    fn has_zbkb(&mut self) -> bool {
+        self.backend.isa_flags.has_zbkb()
+    }
+
+    fn has_zba(&mut self) -> bool {
+        self.backend.isa_flags.has_zba()
+    }
+
+    fn has_zbb(&mut self) -> bool {
+        self.backend.isa_flags.has_zbb()
+    }
+
+    fn has_zbc(&mut self) -> bool {
+        self.backend.isa_flags.has_zbc()
+    }
+
+    fn has_zbs(&mut self) -> bool {
+        self.backend.isa_flags.has_zbs()
+    }
+
+    fn has_zicond(&mut self) -> bool {
+        self.backend.isa_flags.has_zicond()
+    }
+
+    fn gen_reg_offset_amode(&mut self, base: Reg, offset: i64) -> AMode {
+        AMode::RegOffset(base, offset)
+    }
+
+    fn gen_sp_offset_amode(&mut self, offset: i64) -> AMode {
+        AMode::SPOffset(offset)
+    }
+
+    fn gen_fp_offset_amode(&mut self, offset: i64) -> AMode {
+        AMode::FPOffset(offset)
+    }
+
+    fn gen_stack_slot_amode(&mut self, ss: StackSlot, offset: i64) -> AMode {
+        // Offset from beginning of stackslot area.
+        let stack_off = self.lower_ctx.abi().sized_stackslot_offsets()[ss] as i64;
+        let sp_off: i64 = stack_off + offset;
+        AMode::SlotOffset(sp_off)
+    }
+
+    fn gen_const_amode(&mut self, c: VCodeConstant) -> AMode {
+        AMode::Const(c)
+    }
+
+    fn valid_atomic_transaction(&mut self, ty: Type) -> Option<Type> {
+        if ty.is_int() && ty.bits() <= 64 {
+            Some(ty)
+        } else {
+            None
+        }
+    }
+
+    fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> {
+        let op = *op;
+        match op {
+            crate::ir::AtomicRmwOp::Umin => Some((op, false)),
+            crate::ir::AtomicRmwOp::Umax => Some((op, false)),
+            crate::ir::AtomicRmwOp::Smin => Some((op, true)),
+            crate::ir::AtomicRmwOp::Smax => Some((op, true)),
+            _ => None,
+        }
+    }
+
+    fn sinkable_inst(&mut self, val: Value) -> Option<Inst> {
+        self.is_sinkable_inst(val)
+    }
+
+    fn load_op(&mut self, ty: Type) -> LoadOP {
+        LoadOP::from_type(ty)
+    }
+
+    fn store_op(&mut self, ty: Type) -> StoreOP {
+        StoreOP::from_type(ty)
+    }
+
+    fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg {
+        let tmp = self.temp_writable_reg(I64);
+        self.emit(&MInst::LoadExtName { rd: tmp, name: Box::new(name), offset });
+        tmp.to_reg()
+    }
+
+    fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg {
+        let result = self.temp_writable_reg(I64);
+        let i = self.lower_ctx.abi().sized_stackslot_addr(slot, i64::from(offset) as u32, result);
+        self.emit(&i);
+        result.to_reg()
+    }
+
+    fn atomic_amo(&mut self) -> AMO {
+        AMO::SeqCst
+    }
+
+    fn lower_br_table(&mut self, index: Reg, targets: &[MachLabel]) -> Unit {
+        let tmp1 = self.temp_writable_reg(I64);
+        let tmp2 = self.temp_writable_reg(I64);
+        self.emit(&MInst::BrTable { index, tmp1, tmp2, targets: targets.to_vec() });
+    }
+
+    fn fp_reg(&mut self) -> PReg {
+        px_reg(8)
+    }
+
+    fn sp_reg(&mut self) -> PReg {
+        px_reg(2)
+    }
+
+    #[inline]
+    fn int_compare(&mut self, kind: &IntCC, rs1: XReg, rs2: XReg) -> IntegerCompare {
+        IntegerCompare { kind: *kind, rs1: rs1.to_reg(), rs2: rs2.to_reg() }
+    }
+
+    #[inline]
+    fn int_compare_decompose(&mut self, cmp: IntegerCompare) -> (IntCC, XReg, XReg) {
+        (cmp.kind, self.xreg_new(cmp.rs1), self.xreg_new(cmp.rs2))
+    }
+
+    #[inline]
+    fn vstate_from_type(&mut self, ty: Type) -> VState {
+        VState::from_type(ty)
+    }
+
+    #[inline]
+    fn vstate_mf2(&mut self, vs: VState) -> VState {
+        VState { vtype: VType { lmul: VecLmul::LmulF2, ..vs.vtype }, ..vs }
+    }
+
+    fn vec_alu_rr_dst_type(&mut self, op: &VecAluOpRR) -> Type {
+        MInst::canonical_type_for_rc(op.dst_regclass())
+    }
+
+    fn bclr_imm(&mut self, ty: Type, i: u64) -> Option<Imm12> {
+        // Only consider those bits in the immediate which are up to the width
+        // of `ty`.
+        let neg = !i & (u64::MAX >> (64 - ty.bits()));
+        if neg.count_ones() != 1 {
+            return None;
+        }
+        Imm12::maybe_from_u64(neg.trailing_zeros().into())
+    }
+
+    fn binvi_imm(&mut self, i: u64) -> Option<Imm12> {
+        if i.count_ones() != 1 {
+            return None;
+        }
+        Imm12::maybe_from_u64(i.trailing_zeros().into())
+    }
+
+    fn bseti_imm(&mut self, i: u64) -> Option<Imm12> {
+        self.binvi_imm(i)
+    }
+
+    fn fcvt_smin_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 {
+        match (int, float) {
+            // Saturating cases for larger integers are handled using the
+            // `fcvt.{w,d}.{s,d}` instruction directly, that automatically
+            // saturates up/down to the correct limit.
+            //
+            // NB: i32/i64 don't use this function because the native RISC-V
+            // instruction does everything we already need, so only cases for
+            // i8/i16 are listed here.
+            (I8, F32) if saturating => f32::from(i8::MIN).to_bits().into(),
+            (I8, F64) if saturating => f64::from(i8::MIN).to_bits(),
+            (I16, F32) if saturating => f32::from(i16::MIN).to_bits().into(),
+            (I16, F64) if saturating => f64::from(i16::MIN).to_bits(),
+
+            (_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).0.to_bits().into(),
+            (_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).0.to_bits(),
+            _ => unimplemented!(),
+        }
+    }
+
+    fn fcvt_smax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 {
+        // NB: see `fcvt_smin_bound` for some more comments
+        match (int, float) {
+            (I8, F32) if saturating => f32::from(i8::MAX).to_bits().into(),
+            (I8, F64) if saturating => f64::from(i8::MAX).to_bits(),
+            (I16, F32) if saturating => f32::from(i16::MAX).to_bits().into(),
+            (I16, F64) if saturating => f64::from(i16::MAX).to_bits(),
+
+            (_, F32) if !saturating => f32_cvt_to_int_bounds(true, int.bits()).1.to_bits().into(),
+            (_, F64) if !saturating => f64_cvt_to_int_bounds(true, int.bits()).1.to_bits(),
+            _ => unimplemented!(),
+        }
+    }
+
+    fn fcvt_umax_bound(&mut self, float: Type, int: Type, saturating: bool) -> u64 {
+        // NB: see `fcvt_smin_bound` for some more comments
+        match (int, float) {
+            (I8, F32) if saturating => f32::from(u8::MAX).to_bits().into(),
+            (I8, F64) if saturating => f64::from(u8::MAX).to_bits(),
+            (I16, F32) if saturating => f32::from(u16::MAX).to_bits().into(),
+            (I16, F64) if saturating => f64::from(u16::MAX).to_bits(),
+
+            (_, F32) if !saturating => f32_cvt_to_int_bounds(false, int.bits()).1.to_bits().into(),
+            (_, F64) if !saturating => f64_cvt_to_int_bounds(false, int.bits()).1.to_bits(),
+            _ => unimplemented!(),
+        }
+    }
+
+    fn fcvt_umin_bound(&mut self, float: Type, saturating: bool) -> u64 {
+        assert!(!saturating);
+        match float {
+            F32 => (-1.0f32).to_bits().into(),
+            F64 => (-1.0f64).to_bits(),
+            _ => unimplemented!(),
+        }
+    }
+}
+
+/// The main entry point for lowering with ISLE.
+pub(crate) fn lower(
+    lower_ctx: &mut Lower<MInst>,
+    backend: &Riscv64Backend,
+    inst: Inst,
+) -> Option<InstOutput> {
+    // TODO: reuse the ISLE context across lowerings so we can reuse its
+    // internal heap allocations.
+    let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
+    generated_code::constructor_lower(&mut isle_ctx, inst)
+}
+
+/// The main entry point for branch lowering with ISLE.
+pub(crate) fn lower_branch(
+    lower_ctx: &mut Lower<MInst>,
+    backend: &Riscv64Backend,
+    branch: Inst,
+    targets: &[MachLabel],
+) -> Option<()> {
+    // TODO: reuse the ISLE context across lowerings so we can reuse its
+    // internal heap allocations.
+    let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
+    generated_code::constructor_lower_branch(&mut isle_ctx, branch, targets)
+}
diff --git a/hbcb/src/lower/isle/generated_code.rs b/hbcb/src/lower/isle/generated_code.rs
new file mode 100644
index 00000000..d5d1feae
--- /dev/null
+++ b/hbcb/src/lower/isle/generated_code.rs
@@ -0,0 +1,9 @@
+// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of
+// the generated ISLE source below because we include!() it. We must include!() it because its path
+// depends on an environment variable; and also because of this, we can't do the `#[path = "..."]
+// mod generated_code;` trick either.
+#![allow(dead_code, unreachable_code, unreachable_patterns)]
+#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)]
+#![allow(irrefutable_let_patterns, clippy::clone_on_copy)]
+
+include!(concat!(env!("ISLE_DIR"), "/isle_riscv64.rs"));
diff --git a/hbcb/src/prelude.isle b/hbcb/src/prelude.isle
new file mode 100644
index 00000000..413ff000
--- /dev/null
+++ b/hbcb/src/prelude.isle
@@ -0,0 +1,752 @@
+;; This is a prelude of standard definitions for ISLE, the instruction-selector
+;; DSL, as we use it bound to our interfaces.
+;;
+;; Note that all `extern` functions here are typically defined in the
+;; `isle_prelude_methods` macro defined in `src/isa/isle.rs`
+
+;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `()`
+(type Unit (primitive Unit))
+
+(decl pure unit () Unit)
+(extern constructor unit unit)
+
+(type bool (primitive bool))
+(extern const $true bool)
+(extern const $false bool)
+
+(type u8 (primitive u8))
+(type u16 (primitive u16))
+(type u32 (primitive u32))
+(type u64 (primitive u64))
+(type u128 (primitive u128))
+(type usize (primitive usize))
+
+(type i8 (primitive i8))
+(type i16 (primitive i16))
+(type i32 (primitive i32))
+(type i64 (primitive i64))
+(type i128 (primitive i128))
+(type isize (primitive isize))
+
+;; `cranelift-entity`-based identifiers.
+(type Type (primitive Type))
+(type Value (primitive Value))
+(type ValueList (primitive ValueList))
+(type BlockCall (primitive BlockCall))
+
+;; ISLE representation of `&[Value]`.
+(type ValueSlice (primitive ValueSlice))
+
+;; Extract the type of a `Value`.
+(decl value_type (Type) Value)
+(extern extractor infallible value_type value_type)
+
+;; Extractor that matches a `u32` only if non-negative.
+(decl u32_nonnegative (u32) u32)
+(extern extractor u32_nonnegative u32_nonnegative)
+
+;; Extractor that pulls apart an Offset32 into a i32 with the raw
+;; signed-32-bit twos-complement bits.
+(decl offset32 (i32) Offset32)
+(extern extractor infallible offset32 offset32)
+
+;; Pure/fallible constructor that tests if one u32 is less than or
+;; equal to another.
+(decl pure partial u32_lteq (u32 u32) Unit)
+(extern constructor u32_lteq u32_lteq)
+
+;; Pure/fallible constructor that tests if one u8 is less than or
+;; equal to another.
+(decl pure partial u8_lteq (u8 u8) Unit)
+(extern constructor u8_lteq u8_lteq)
+
+;; Pure/fallible constructor that tests if one u8 is strictly less
+;;  than another.
+(decl pure partial u8_lt (u8 u8) Unit)
+(extern constructor u8_lt u8_lt)
+
+;;;; Primitive Type Conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl pure u8_as_i8 (u8) i8)
+(extern constructor u8_as_i8 u8_as_i8)
+
+(decl pure u8_as_u32 (u8) u32)
+(extern constructor u8_as_u32 u8_as_u32)
+(convert u8 u32 u8_as_u32)
+
+(decl pure u8_as_u64 (u8) u64)
+(extern constructor u8_as_u64 u8_as_u64)
+(convert u8 u64 u8_as_u64)
+
+(decl pure u16_as_i16 (u16) i16)
+(extern constructor u16_as_i16 u16_as_i16)
+
+(decl pure u16_as_u32 (u16) u32)
+(extern constructor u16_as_u32 u16_as_u32)
+(convert u16 u32 u16_as_u32)
+
+(decl pure u16_as_u64 (u16) u64)
+(extern constructor u16_as_u64 u16_as_u64)
+(convert u16 u64 u16_as_u64)
+
+(decl pure u64_as_u8 (u64) u8)
+(extern constructor u64_as_u8 u64_as_u8)
+
+(decl pure u64_as_u16 (u64) u16)
+(extern constructor u64_as_u16 u64_as_u16)
+
+(decl pure u64_as_i64 (u64) i64)
+(extern constructor u64_as_i64 u64_as_i64)
+
+(decl pure partial u16_try_from_u64 (u64) u16)
+(extern constructor u16_try_from_u64 u16_try_from_u64)
+
+(decl pure partial u32_try_from_u64 (u64) u32)
+(extern constructor u32_try_from_u64 u32_try_from_u64)
+
+(decl pure partial i8_try_from_u64 (u64) i8)
+(extern constructor i8_try_from_u64 i8_try_from_u64)
+
+(decl pure partial i16_try_from_u64 (u64) i16)
+(extern constructor i16_try_from_u64 i16_try_from_u64)
+
+(decl pure partial i32_try_from_u64 (u64) i32)
+(extern constructor i32_try_from_u64 i32_try_from_u64)
+
+(decl pure u32_as_u64 (u32) u64)
+(extern constructor u32_as_u64 u32_as_u64)
+(convert u32 u64 u32_as_u64)
+
+(decl pure i32_as_i64 (i32) i64)
+(extern constructor i32_as_i64 i32_as_i64)
+(convert i32 i64 i32_as_i64)
+
+(decl pure i64_as_u64 (i64) u64)
+(extern constructor i64_as_u64 i64_as_u64)
+
+(decl pure i64_neg (i64) i64)
+(extern constructor i64_neg i64_neg)
+
+(decl pure i8_neg (i8) i8)
+(extern constructor i8_neg i8_neg)
+
+(decl u128_as_u64 (u64) u128)
+(extern extractor u128_as_u64 u128_as_u64)
+
+(decl u64_as_u32 (u32) u64)
+(extern extractor u64_as_u32 u64_as_u32)
+
+(decl u32_as_u16 (u16) u32)
+(extern extractor u32_as_u16 u32_as_u16)
+
+(decl pure u64_as_i32 (u64) i32)
+(extern constructor u64_as_i32 u64_as_i32)
+
+;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl pure u8_and (u8 u8) u8)
+(extern constructor u8_and u8_and)
+
+(decl pure u8_shl (u8 u8) u8)
+(extern constructor u8_shl u8_shl)
+
+(decl pure u8_shr (u8 u8) u8)
+(extern constructor u8_shr u8_shr)
+
+(decl pure u8_sub (u8 u8) u8)
+(extern constructor u8_sub u8_sub)
+
+(decl pure u32_add (u32 u32) u32)
+(extern constructor u32_add u32_add)
+
+(decl pure u32_sub (u32 u32) u32)
+(extern constructor u32_sub u32_sub)
+
+(decl pure u32_and (u32 u32) u32)
+(extern constructor u32_and u32_and)
+
+(decl pure u32_shl (u32 u32) u32)
+(extern constructor u32_shl u32_shl)
+
+;; Pure/fallible constructor that tries to add two `u32`s, interpreted
+;; as signed values, and fails to match on overflow.
+(decl pure partial s32_add_fallible (i32 i32) i32)
+(extern constructor s32_add_fallible s32_add_fallible)
+
+(decl pure u64_add (u64 u64) u64)
+(extern constructor u64_add u64_add)
+
+(decl pure u64_sub (u64 u64) u64)
+(extern constructor u64_sub u64_sub)
+
+(decl pure u64_mul (u64 u64) u64)
+(extern constructor u64_mul u64_mul)
+
+(decl pure partial u64_sdiv (u64 u64) u64)
+(extern constructor u64_sdiv u64_sdiv)
+
+(decl pure partial u64_udiv (u64 u64) u64)
+(extern constructor u64_udiv u64_udiv)
+
+(decl pure u64_and (u64 u64) u64)
+(extern constructor u64_and u64_and)
+
+(decl pure u64_or (u64 u64) u64)
+(extern constructor u64_or u64_or)
+
+(decl pure u64_xor (u64 u64) u64)
+(extern constructor u64_xor u64_xor)
+
+(decl pure u64_shl (u64 u64) u64)
+(extern constructor u64_shl u64_shl)
+
+(decl pure imm64_shl (Type Imm64 Imm64) Imm64)
+(extern constructor imm64_shl imm64_shl)
+
+(decl pure imm64_ushr (Type Imm64 Imm64) Imm64)
+(extern constructor imm64_ushr imm64_ushr)
+
+(decl pure imm64_sshr (Type Imm64 Imm64) Imm64)
+(extern constructor imm64_sshr imm64_sshr)
+
+(decl pure u64_not (u64) u64)
+(extern constructor u64_not u64_not)
+
+(decl pure u64_eq (u64 u64) bool)
+(extern constructor u64_eq u64_eq)
+
+(decl pure u64_le (u64 u64) bool)
+(extern constructor u64_le u64_le)
+
+(decl pure u64_lt (u64 u64) bool)
+(extern constructor u64_lt u64_lt)
+
+(decl pure i64_shr (i64 i64) i64)
+(extern constructor i64_shr i64_shr)
+
+(decl pure i64_ctz (i64) i64)
+(extern constructor i64_ctz i64_ctz)
+
+;; Sign extends a u64 from ty bits up to 64bits
+(decl pure i64_sextend_u64 (Type u64) i64)
+(extern constructor i64_sextend_u64 i64_sextend_u64)
+
+(decl pure i64_sextend_imm64 (Type Imm64) i64)
+(extern constructor i64_sextend_imm64 i64_sextend_imm64)
+
+(decl pure u64_uextend_imm64 (Type Imm64) u64)
+(extern constructor u64_uextend_imm64 u64_uextend_imm64)
+
+(decl pure imm64_icmp (Type IntCC Imm64 Imm64) Imm64)
+(extern constructor imm64_icmp imm64_icmp)
+
+(decl u64_is_zero (bool) u64)
+(extern extractor infallible u64_is_zero u64_is_zero)
+
+(decl i64_is_zero (bool) i64)
+(extern extractor infallible i64_is_zero i64_is_zero)
+
+(decl u64_zero () u64)
+(extractor (u64_zero) (u64_is_zero $true))
+
+(decl u64_nonzero (u64) u64)
+(extractor (u64_nonzero x) (and (u64_is_zero $false) x))
+
+(decl i64_nonzero (i64) i64)
+(extractor (i64_nonzero x) (and (i64_is_zero $false) x))
+
+(decl pure u64_is_odd (u64) bool)
+(extern constructor u64_is_odd u64_is_odd)
+
+;; Each of these extractors tests whether the upper half of the input equals the
+;; lower half of the input
+(decl u128_replicated_u64 (u64) u128)
+(extern extractor u128_replicated_u64 u128_replicated_u64)
+(decl u64_replicated_u32 (u64) u64)
+(extern extractor u64_replicated_u32 u64_replicated_u32)
+(decl u32_replicated_u16 (u64) u64)
+(extern extractor u32_replicated_u16 u32_replicated_u16)
+(decl u16_replicated_u8 (u8) u64)
+(extern extractor u16_replicated_u8 u16_replicated_u8)
+
+;; Floating point operations
+
+(decl pure partial f16_min (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_min f16_min)
+(decl pure partial f16_max (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_max f16_max)
+(decl pure f16_neg (Ieee16) Ieee16)
+(extern constructor f16_neg f16_neg)
+(decl pure f16_abs (Ieee16) Ieee16)
+(extern constructor f16_abs f16_abs)
+(decl pure f16_copysign (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_copysign f16_copysign)
+(decl pure partial f32_add (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_add f32_add)
+(decl pure partial f32_sub (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_sub f32_sub)
+(decl pure partial f32_mul (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_mul f32_mul)
+(decl pure partial f32_div (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_div f32_div)
+(decl pure partial f32_sqrt (Ieee32) Ieee32)
+(extern constructor f32_sqrt f32_sqrt)
+(decl pure partial f32_ceil (Ieee32) Ieee32)
+(extern constructor f32_ceil f32_ceil)
+(decl pure partial f32_floor (Ieee32) Ieee32)
+(extern constructor f32_floor f32_floor)
+(decl pure partial f32_trunc (Ieee32) Ieee32)
+(extern constructor f32_trunc f32_trunc)
+(decl pure partial f32_nearest (Ieee32) Ieee32)
+(extern constructor f32_nearest f32_nearest)
+(decl pure partial f32_min (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_min f32_min)
+(decl pure partial f32_max (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_max f32_max)
+(decl pure f32_neg (Ieee32) Ieee32)
+(extern constructor f32_neg f32_neg)
+(decl pure f32_abs (Ieee32) Ieee32)
+(extern constructor f32_abs f32_abs)
+(decl pure f32_copysign (Ieee32 Ieee32) Ieee32)
+(extern constructor f32_copysign f32_copysign)
+(decl pure partial f64_add (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_add f64_add)
+(decl pure partial f64_sub (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_sub f64_sub)
+(decl pure partial f64_mul (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_mul f64_mul)
+(decl pure partial f64_div (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_div f64_div)
+(decl pure partial f64_sqrt (Ieee64) Ieee64)
+(extern constructor f64_sqrt f64_sqrt)
+(decl pure partial f64_ceil (Ieee64) Ieee64)
+(extern constructor f64_ceil f64_ceil)
+(decl pure partial f64_floor (Ieee64) Ieee64)
+(extern constructor f64_floor f64_floor)
+(decl pure partial f64_trunc (Ieee64) Ieee64)
+(extern constructor f64_trunc f64_trunc)
+(decl pure partial f64_nearest (Ieee64) Ieee64)
+(extern constructor f64_nearest f64_nearest)
+(decl pure partial f64_min (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_min f64_min)
+(decl pure partial f64_max (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_max f64_max)
+(decl pure f64_neg (Ieee64) Ieee64)
+(extern constructor f64_neg f64_neg)
+(decl pure f64_abs (Ieee64) Ieee64)
+(extern constructor f64_abs f64_abs)
+(decl pure f64_copysign (Ieee64 Ieee64) Ieee64)
+(extern constructor f64_copysign f64_copysign)
+(decl pure partial f128_min (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_min f128_min)
+(decl pure partial f128_max (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_max f128_max)
+(decl pure f128_neg (Ieee128) Ieee128)
+(extern constructor f128_neg f128_neg)
+(decl pure f128_abs (Ieee128) Ieee128)
+(extern constructor f128_abs f128_abs)
+(decl pure f128_copysign (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_copysign f128_copysign)
+(type Ieee128 (primitive Ieee128))
+
+;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(extern const $I8 Type)
+(extern const $I16 Type)
+(extern const $I32 Type)
+(extern const $I64 Type)
+(extern const $I128 Type)
+
+(extern const $F16 Type)
+(extern const $F32 Type)
+(extern const $F64 Type)
+(extern const $F128 Type)
+
+(extern const $I8X8 Type)
+(extern const $I8X16 Type)
+(extern const $I16X4 Type)
+(extern const $I16X8 Type)
+(extern const $I32X2 Type)
+(extern const $I32X4 Type)
+(extern const $I64X2 Type)
+
+(extern const $F32X4 Type)
+(extern const $F64X2 Type)
+
+(extern const $I32X4XN Type)
+
+;; Get the unsigned minimum value for a given type.
+;; This always zero, but is included for completeness.
+(decl pure ty_umin (Type) u64)
+(extern constructor ty_umin ty_umin)
+
+;; Get the unsigned maximum value for a given type.
+(decl pure ty_umax (Type) u64)
+(extern constructor ty_umax ty_umax)
+
+;; Get the signed minimum value for a given type.
+(decl pure ty_smin (Type) u64)
+(extern constructor ty_smin ty_smin)
+
+;; Get the signed maximum value for a given type.
+(decl pure ty_smax (Type) u64)
+(extern constructor ty_smax ty_smax)
+
+;; Get the bit width of a given type.
+(decl pure ty_bits (Type) u8)
+(extern constructor ty_bits ty_bits)
+
+;; Get the bit width of a given type.
+(decl pure ty_bits_u16 (Type) u16)
+(extern constructor ty_bits_u16 ty_bits_u16)
+
+;; Get the bit width of a given type.
+(decl pure ty_bits_u64 (Type) u64)
+(extern constructor ty_bits_u64 ty_bits_u64)
+
+;; Get a mask for the width of a given type.
+(decl pure ty_mask (Type) u64)
+(extern constructor ty_mask ty_mask)
+
+;; Get a mask that is set for each lane in a given type.
+(decl pure ty_lane_mask (Type) u64)
+(extern constructor ty_lane_mask ty_lane_mask)
+
+;; Get the number of lanes for a given type.
+(decl pure ty_lane_count (Type) u64)
+(extern constructor ty_lane_count ty_lane_count)
+
+;; Get the byte width of a given type.
+(decl pure ty_bytes (Type) u16)
+(extern constructor ty_bytes ty_bytes)
+
+;; Get the type of each lane in the given type.
+(decl pure lane_type (Type) Type)
+(extern constructor lane_type lane_type)
+
+;; Get a type with the same element type, but half the number of lanes.
+(decl pure partial ty_half_lanes (Type) Type)
+(extern constructor ty_half_lanes ty_half_lanes)
+
+;; Get a type with the same number of lanes but a lane type that is half as small.
+(decl pure partial ty_half_width (Type) Type)
+(extern constructor ty_half_width ty_half_width)
+
+;; Generate a mask for the maximum shift amount for a given type. i.e 31 for I32.
+(decl pure ty_shift_mask (Type) u64)
+(rule (ty_shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1))
+
+;; Compare two types for equality.
+(decl pure ty_equal (Type Type) bool)
+(extern constructor ty_equal ty_equal)
+
+;;;; `cranelift_codegen::ir::MemFlags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `MemFlags::trusted`
+(decl pure mem_flags_trusted () MemFlags)
+(extern constructor mem_flags_trusted mem_flags_trusted)
+
+;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Swap args of an IntCC flag.
+(decl intcc_swap_args (IntCC) IntCC)
+(extern constructor intcc_swap_args intcc_swap_args)
+
+;; Complement an IntCC flag.
+(decl intcc_complement (IntCC) IntCC)
+(extern constructor intcc_complement intcc_complement)
+
+;; This is a direct import of `IntCC::without_equal`.
+;; Get the corresponding IntCC with the equal component removed.
+;; For conditions without a zero component, this is a no-op.
+(decl pure intcc_without_eq (IntCC) IntCC)
+(extern constructor intcc_without_eq intcc_without_eq)
+
+;; Swap args of a FloatCC flag.
+(decl floatcc_swap_args (FloatCC) FloatCC)
+(extern constructor floatcc_swap_args floatcc_swap_args)
+
+;; Complement a FloatCC flag.
+(decl floatcc_complement (FloatCC) FloatCC)
+(extern constructor floatcc_complement floatcc_complement)
+
+;; True when this FloatCC involves an unordered comparison.
+(decl pure floatcc_unordered (FloatCC) bool)
+(extern constructor floatcc_unordered floatcc_unordered)
+
+;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl eq (Type Value Value) Value)
+(extractor (eq ty x y) (icmp ty (IntCC.Equal) x y))
+
+(decl ne (Type Value Value) Value)
+(extractor (ne ty x y) (icmp ty (IntCC.NotEqual) x y))
+
+(decl ult (Type Value Value) Value)
+(extractor (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y))
+
+(decl ule (Type Value Value) Value)
+(extractor (ule ty x y) (icmp ty (IntCC.UnsignedLessThanOrEqual) x y))
+
+(decl ugt (Type Value Value) Value)
+(extractor (ugt ty x y) (icmp ty (IntCC.UnsignedGreaterThan) x y))
+
+(decl uge (Type Value Value) Value)
+(extractor (uge ty x y) (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x y))
+
+(decl slt (Type Value Value) Value)
+(extractor (slt ty x y) (icmp ty (IntCC.SignedLessThan) x y))
+
+(decl sle (Type Value Value) Value)
+(extractor (sle ty x y) (icmp ty (IntCC.SignedLessThanOrEqual) x y))
+
+(decl sgt (Type Value Value) Value)
+(extractor (sgt ty x y) (icmp ty (IntCC.SignedGreaterThan) x y))
+
+(decl sge (Type Value Value) Value)
+(extractor (sge ty x y) (icmp ty (IntCC.SignedGreaterThanOrEqual) x y))
+
+;; An extractor that only matches types that can fit in 16 bits.
+(decl fits_in_16 (Type) Type)
+(extern extractor fits_in_16 fits_in_16)
+
+;; An extractor that only matches types that can fit in 32 bits.
+(decl fits_in_32 (Type) Type)
+(extern extractor fits_in_32 fits_in_32)
+
+;; An extractor that only matches types that can fit in 32 bits.
+(decl lane_fits_in_32 (Type) Type)
+(extern extractor lane_fits_in_32 lane_fits_in_32)
+
+;; An extractor that only matches types that can fit in 64 bits.
+(decl fits_in_64 (Type) Type)
+(extern extractor fits_in_64 fits_in_64)
+
+;; An extractor that only matches types that fit in exactly 32 bits.
+(decl ty_32 (Type) Type)
+(extern extractor ty_32 ty_32)
+
+;; An extractor that only matches types that fit in exactly 64 bits.
+(decl ty_64 (Type) Type)
+(extern extractor ty_64 ty_64)
+
+;; A pure constructor/extractor that only matches scalar integers, and
+;; references that can fit in 64 bits.
+(decl pure partial ty_int_ref_scalar_64 (Type) Type)
+(extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64)
+(extern extractor ty_int_ref_scalar_64 ty_int_ref_scalar_64_extract)
+
+;; An extractor that matches 32- and 64-bit types only.
+(decl ty_32_or_64 (Type) Type)
+(extern extractor ty_32_or_64 ty_32_or_64)
+
+;; An extractor that matches 8- and 16-bit types only.
+(decl ty_8_or_16 (Type) Type)
+(extern extractor ty_8_or_16 ty_8_or_16)
+
+;; An extractor that matches 16- and 32-bit types only.
+(decl ty_16_or_32 (Type) Type)
+(extern extractor ty_16_or_32 ty_16_or_32)
+
+;; An extractor that matches int types that fit in 32 bits.
+(decl int_fits_in_32 (Type) Type)
+(extern extractor int_fits_in_32 int_fits_in_32)
+
+;; An extractor that matches I64.
+(decl ty_int_ref_64 (Type) Type)
+(extern extractor ty_int_ref_64 ty_int_ref_64)
+
+;; An extractor that matches int or reference types bigger than 16 bits but at most 64 bits.
+(decl ty_int_ref_16_to_64 (Type) Type)
+(extern extractor ty_int_ref_16_to_64 ty_int_ref_16_to_64)
+
+;; An extractor that only matches integers.
+(decl ty_int (Type) Type)
+(extern extractor ty_int ty_int)
+
+;; An extractor that only matches scalar types, float or int or ref's.
+(decl ty_scalar (Type) Type)
+(extern extractor ty_scalar ty_scalar)
+
+;; An extractor that only matches scalar floating-point types--F32 or F64.
+(decl ty_scalar_float (Type) Type)
+(extern extractor ty_scalar_float ty_scalar_float)
+
+;; An extractor that matches scalar floating-point types or vector types.
+(decl ty_float_or_vec (Type) Type)
+(extern extractor ty_float_or_vec ty_float_or_vec)
+
+;; A pure constructor that only matches vector floating-point types.
+(decl pure partial ty_vector_float (Type) Type)
+(extern constructor ty_vector_float ty_vector_float)
+
+;; A pure constructor that only matches vector types with lanes which
+;; are not floating-point.
+(decl pure partial ty_vector_not_float (Type) Type)
+(extern constructor ty_vector_not_float ty_vector_not_float)
+
+;; A pure constructor/extractor that only matches 64-bit vector types.
+(decl pure partial ty_vec64 (Type) Type)
+(extern constructor ty_vec64 ty_vec64_ctor)
+(extern extractor ty_vec64 ty_vec64)
+
+;; An extractor that only matches 128-bit vector types.
+(decl ty_vec128 (Type) Type)
+(extern extractor ty_vec128 ty_vec128)
+
+;; An extractor that only matches dynamic vector types with a 64-bit
+;; base type.
+(decl ty_dyn_vec64 (Type) Type)
+(extern extractor ty_dyn_vec64 ty_dyn_vec64)
+
+;; An extractor that only matches dynamic vector types with a 128-bit
+;; base type.
+(decl ty_dyn_vec128 (Type) Type)
+(extern extractor ty_dyn_vec128 ty_dyn_vec128)
+
+;; An extractor that only matches 64-bit vector types with integer
+;; lanes (I8X8, I16X4, I32X2)
+(decl ty_vec64_int (Type) Type)
+(extern extractor ty_vec64_int ty_vec64_int)
+
+;; An extractor that only matches 128-bit vector types with integer
+;; lanes (I8X16, I16X8, I32X4, I64X2).
+(decl ty_vec128_int (Type) Type)
+(extern extractor ty_vec128_int ty_vec128_int)
+
+;; An extractor that only matches types that can be a 64-bit address.
+(decl ty_addr64 (Type) Type)
+(extern extractor ty_addr64 ty_addr64)
+
+;; A pure constructor that matches everything except vectors with size 32X2.
+(decl pure partial not_vec32x2 (Type) Type)
+(extern constructor not_vec32x2 not_vec32x2)
+
+;; An extractor that matches everything except I64X2
+(decl not_i64x2 () Type)
+(extern extractor not_i64x2 not_i64x2)
+
+;; Extract a `u8` from an `Uimm8`.
+(decl u8_from_uimm8 (u8) Uimm8)
+(extern extractor infallible u8_from_uimm8 u8_from_uimm8)
+
+;; Extract a `u64` from a `bool`.
+(decl u64_from_bool (u64) bool)
+(extern extractor infallible u64_from_bool u64_from_bool)
+
+;; Extract a `u64` from an `Imm64`.
+(decl u64_from_imm64 (u64) Imm64)
+(extern extractor infallible u64_from_imm64 u64_from_imm64)
+
+;; Extract a `u64` from an `Imm64` which is not zero.
+(decl nonzero_u64_from_imm64 (u64) Imm64)
+(extern extractor nonzero_u64_from_imm64 nonzero_u64_from_imm64)
+
+;; If the given `Imm64` is a power-of-two, extract its log2 value.
+(decl imm64_power_of_two (u64) Imm64)
+(extern extractor imm64_power_of_two imm64_power_of_two)
+
+;; Create a new Imm64.
+(decl pure imm64 (u64) Imm64)
+(extern constructor imm64 imm64)
+
+;; Create a new Imm64, masked to the width of the given type.
+(decl pure imm64_masked (Type u64) Imm64)
+(extern constructor imm64_masked imm64_masked)
+
+;; Extract a `u16` from an `Ieee16`.
+(decl u16_from_ieee16 (u16) Ieee16)
+(extern extractor infallible u16_from_ieee16 u16_from_ieee16)
+
+;; Extract a `u32` from an `Ieee32`.
+(decl u32_from_ieee32 (u32) Ieee32)
+(extern extractor infallible u32_from_ieee32 u32_from_ieee32)
+
+;; Extract a `u64` from an `Ieee64`.
+(decl u64_from_ieee64 (u64) Ieee64)
+(extern extractor infallible u64_from_ieee64 u64_from_ieee64)
+
+;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
+;; type. Will only match when there is more than one lane.
+(decl multi_lane (u32 u32) Type)
+(extern extractor multi_lane multi_lane)
+
+;; Match a dynamic-lane type, extracting (# bits per lane) from the given
+;; type.
+(decl dynamic_lane (u32 u32) Type)
+(extern extractor dynamic_lane dynamic_lane)
+
+;; An extractor that only matches 64-bit dynamic vector types with integer
+;; lanes (I8X8XN, I16X4XN, I32X2XN)
+(decl ty_dyn64_int (Type) Type)
+(extern extractor ty_dyn64_int ty_dyn64_int)
+
+;; An extractor that only matches 128-bit dynamic vector types with integer
+;; lanes (I8X16XN, I16X8XN, I32X4XN, I64X2XN).
+(decl ty_dyn128_int (Type) Type)
+(extern extractor ty_dyn128_int ty_dyn128_int)
+
+;; Convert an `Offset32` to a primitive number.
+(decl pure offset32_to_i32 (Offset32) i32)
+(extern constructor offset32_to_i32 offset32_to_i32)
+
+;; Convert a number to an `Offset32`
+(decl pure i32_to_offset32 (i32) Offset32)
+(extern constructor i32_to_offset32 i32_to_offset32)
+
+;; This is a direct import of `IntCC::unsigned`.
+;; Get the corresponding IntCC with the signed component removed.
+;; For conditions without a signed component, this is a no-op.
+(decl pure intcc_unsigned (IntCC) IntCC)
+(extern constructor intcc_unsigned intcc_unsigned)
+
+;; Pure constructor that only matches signed integer cond codes.
+(decl pure partial signed_cond_code (IntCC) IntCC)
+(extern constructor signed_cond_code signed_cond_code)
+
+;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl pure trap_code_division_by_zero () TrapCode)
+(extern constructor trap_code_division_by_zero trap_code_division_by_zero)
+
+(decl pure trap_code_integer_overflow () TrapCode)
+(extern constructor trap_code_integer_overflow trap_code_integer_overflow)
+
+(decl pure trap_code_bad_conversion_to_integer () TrapCode)
+(extern constructor trap_code_bad_conversion_to_integer trap_code_bad_conversion_to_integer)
+
+;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; A range of integers to loop through.
+(type Range (primitive Range))
+
+;; Create a new range from `start` through `end` (exclusive).
+(decl pure range (usize usize) Range)
+(extern constructor range range)
+
+;; A view on the current state of the range.
+(type RangeView extern
+      (enum
+        (Empty)
+        (NonEmpty (index usize) (rest Range))))
+
+;; View the current state of the range.
+(decl range_view (RangeView) Range)
+(extern extractor infallible range_view range_view)
+
+;; Extractor to test whether a range is empty.
+(decl range_empty () Range)
+(extractor (range_empty) (range_view (RangeView.Empty)))
+
+;; Extractor to return the first value in the range, and a sub-range
+;; containing the remaining values.
+(decl range_unwrap (usize Range) Range)
+(extractor (range_unwrap index rest) (range_view (RangeView.NonEmpty index rest)))
+
+;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(convert Offset32 i32 offset32_to_i32)
+(convert i32 Offset32 i32_to_offset32)
+
diff --git a/hbcb/src/prelude_lower.isle b/hbcb/src/prelude_lower.isle
new file mode 100644
index 00000000..ec343126
--- /dev/null
+++ b/hbcb/src/prelude_lower.isle
@@ -0,0 +1,1082 @@
+;; Prelude definitions specific to lowering environments (backends) in
+;; ISLE.
+
+;;;; Primitive and External Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; `cranelift-entity`-based identifiers.
+(type Inst (primitive Inst))
+
+;; ISLE representation of `Vec<u8>`
+(type VecMask extern (enum))
+
+(type ValueRegs (primitive ValueRegs))
+(type WritableValueRegs (primitive WritableValueRegs))
+
+;; Instruction lowering result: a vector of `ValueRegs`.
+(type InstOutput (primitive InstOutput))
+;; (Mutable) builder to incrementally construct an `InstOutput`.
+(type InstOutputBuilder extern (enum))
+
+;; Type to hold multiple Regs
+(type MultiReg
+      (enum
+       (Empty)
+       (One (a Reg))
+       (Two (a Reg) (b Reg))
+       (Three (a Reg) (b Reg) (c Reg))
+       (Four (a Reg) (b Reg) (c Reg) (d Reg))
+      ))
+
+;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type Reg (primitive Reg))
+(type WritableReg (primitive WritableReg))
+(type OptionWritableReg (primitive OptionWritableReg))
+(type VecReg extern (enum))
+(type VecWritableReg extern (enum))
+(type PReg (primitive PReg))
+
+;; Construct a `ValueRegs` of one register.
+(decl value_reg (Reg) ValueRegs)
+(extern constructor value_reg value_reg)
+
+;; Construct a `WritableValueRegs` of one register.
+(decl writable_value_reg (WritableReg) WritableValueRegs)
+(extern constructor writable_value_reg writable_value_reg)
+
+;; Construct a `ValueRegs` of two registers.
+(decl value_regs (Reg Reg) ValueRegs)
+(extern constructor value_regs value_regs)
+
+;; Construct a `WritableValueRegs` of two registers.
+(decl writable_value_regs (WritableReg WritableReg) WritableValueRegs)
+(extern constructor writable_value_regs writable_value_regs)
+
+;; Construct an empty `ValueRegs` containing only invalid register sentinels.
+(decl value_regs_invalid () ValueRegs)
+(extern constructor value_regs_invalid value_regs_invalid)
+
+;; Construct an empty `InstOutput`.
+(decl output_none () InstOutput)
+(extern constructor output_none output_none)
+
+;; Construct a single-element `InstOutput`.
+(decl output (ValueRegs) InstOutput)
+(extern constructor output output)
+
+;; Construct a two-element `InstOutput`.
+(decl output_pair (ValueRegs ValueRegs) InstOutput)
+(extern constructor output_pair output_pair)
+
+;; Construct a single-element `InstOutput` from a single register.
+(decl output_reg (Reg) InstOutput)
+(rule (output_reg reg) (output (value_reg reg)))
+
+;; Construct a single-element `InstOutput` from a value.
+(decl output_value (Value) InstOutput)
+(rule (output_value val) (output (put_in_regs val)))
+
+;; Initially empty `InstOutput` builder.
+(decl output_builder_new () InstOutputBuilder)
+(extern constructor output_builder_new output_builder_new)
+
+;; Append a `ValueRegs` to an `InstOutput` under construction.
+(decl output_builder_push (InstOutputBuilder ValueRegs) Unit)
+(extern constructor output_builder_push output_builder_push)
+
+;; Finish building an `InstOutput` incrementally.
+(decl output_builder_finish (InstOutputBuilder) InstOutput)
+(extern constructor output_builder_finish output_builder_finish)
+
+;; Get a temporary register for writing.
+(decl temp_writable_reg (Type) WritableReg)
+(extern constructor temp_writable_reg temp_writable_reg)
+
+;; Get a temporary register for reading.
+(decl temp_reg (Type) Reg)
+(rule (temp_reg ty)
+      (writable_reg_to_reg (temp_writable_reg ty)))
+
+(decl is_valid_reg (bool) Reg)
+(extern extractor infallible is_valid_reg is_valid_reg)
+
+;; Get or match the invalid register.
+(decl invalid_reg () Reg)
+(extern constructor invalid_reg invalid_reg)
+(extractor (invalid_reg) (is_valid_reg $false))
+
+;; Match any register but the invalid register.
+(decl valid_reg (Reg) Reg)
+(extractor (valid_reg reg) (and (is_valid_reg $true) reg))
+
+;; Mark this value as used, to ensure that it gets lowered.
+(decl mark_value_used (Value) Unit)
+(extern constructor mark_value_used mark_value_used)
+
+;; Put the given value into a register.
+;;
+;; Asserts that the value fits into a single register, and doesn't require
+;; multiple registers for its representation (like `i128` on x64 for example).
+;;
+;; As a side effect, this marks the value as used.
+(decl put_in_reg (Value) Reg)
+(extern constructor put_in_reg put_in_reg)
+
+;; Put the given value into one or more registers.
+;;
+;; As a side effect, this marks the value as used.
+(decl put_in_regs (Value) ValueRegs)
+(extern constructor put_in_regs put_in_regs)
+
+;; If the given reg is a real register, cause the value in reg to be in a virtual
+;; reg, by copying it into a new virtual reg.
+(decl ensure_in_vreg (Reg Type) Reg)
+(extern constructor ensure_in_vreg ensure_in_vreg)
+
+;; Get the `n`th register inside a `ValueRegs`.
+(decl value_regs_get (ValueRegs usize) Reg)
+(extern constructor value_regs_get value_regs_get)
+
+;; Get the number of registers in a `ValueRegs`.
+(decl pure value_regs_len (ValueRegs) usize)
+(extern constructor value_regs_len value_regs_len)
+
+;; Get a range for the number of regs in a `ValueRegs`.
+(decl value_regs_range (ValueRegs) Range)
+(rule (value_regs_range regs) (range 0 (value_regs_len regs)))
+
+;; Put the value into one or more registers and return the first register.
+;;
+;; Unlike `put_in_reg`, this does not assert that the value fits in a single
+;; register. This is useful for things like a `i128` shift amount, where we mask
+;; the shift amount to the bit width of the value being shifted, and so the high
+;; half of the `i128` won't ever be used.
+;;
+;; As a side effect, this marks that value as used.
+(decl lo_reg (Value) Reg)
+(rule (lo_reg val)
+      (let ((regs ValueRegs (put_in_regs val)))
+        (value_regs_get regs 0)))
+
+;; Convert a `PReg` into a `Reg`.
+(decl preg_to_reg (PReg) Reg)
+(extern constructor preg_to_reg preg_to_reg)
+
+;; Convert a MultiReg with three registers into an InstOutput containing
+;; one ValueRegs containing the first two regs and one containing the third reg
+(decl multi_reg_to_pair_and_single (MultiReg) InstOutput)
+(rule (multi_reg_to_pair_and_single (MultiReg.Three a b c))
+      (output_pair (value_regs a b) c))
+
+;; Convert a MultiReg with two registers into an InstOutput containing one ValueRegs with both regs
+(decl multi_reg_to_pair (MultiReg) InstOutput)
+(rule (multi_reg_to_pair (MultiReg.Two a b))
+      (value_regs a b))
+
+;; Convert a MultiReg with one register into an InstOutput containing one ValueRegs with the register
+(decl multi_reg_to_single (MultiReg) InstOutput)
+(rule (multi_reg_to_single (MultiReg.One a))
+      (value_reg a))
+
+;; Add a range fact to a register, when compiling with
+;; proof-carrying-code enabled.
+(decl add_range_fact (Reg u16 u64 u64) Reg)
+(extern constructor add_range_fact add_range_fact)
+
+;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(type MachLabel (primitive MachLabel))
+(type ValueLabel (primitive ValueLabel))
+(type UnwindInst (primitive UnwindInst))
+(type ExternalName (primitive ExternalName))
+(type BoxExternalName (primitive BoxExternalName))
+(type RelocDistance (primitive RelocDistance))
+(type VecArgPair extern (enum))
+(type VecRetPair extern (enum))
+(type CallArgList extern (enum))
+(type MachLabelSlice extern (enum))
+(type BoxVecMachLabel extern (enum))
+
+;; Extract a the target from a MachLabelSlice with exactly one target.
+(decl single_target (MachLabel) MachLabelSlice)
+(extern extractor single_target single_target)
+
+;; Extract a the targets from a MachLabelSlice with exactly two targets.
+(decl two_targets (MachLabel MachLabel) MachLabelSlice)
+(extern extractor two_targets two_targets)
+
+;; Extract the default target and jump table from a MachLabelSlice.
+(decl jump_table_targets (MachLabel BoxVecMachLabel) MachLabelSlice)
+(extern extractor jump_table_targets jump_table_targets)
+
+;; The size of the jump table.
+(decl jump_table_size (BoxVecMachLabel) u32)
+(extern constructor jump_table_size jump_table_size)
+
+;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Extractor to get a `ValueSlice` out of a `ValueList`.
+(decl value_list_slice (ValueSlice) ValueList)
+(extern extractor infallible value_list_slice value_list_slice)
+
+;; Extractor to test whether a `ValueSlice` is empty.
+(decl value_slice_empty () ValueSlice)
+(extern extractor value_slice_empty value_slice_empty)
+
+;; Extractor to split a `ValueSlice` into its first element plus a tail.
+(decl value_slice_unwrap (Value ValueSlice) ValueSlice)
+(extern extractor value_slice_unwrap value_slice_unwrap)
+
+;; Return the length of a `ValueSlice`.
+(decl value_slice_len (ValueSlice) usize)
+(extern constructor value_slice_len value_slice_len)
+
+;; Return any element of a `ValueSlice`.
+(decl value_slice_get (ValueSlice usize) Value)
+(extern constructor value_slice_get value_slice_get)
+
+;; Extractor to get the first element from a value list, along with its tail as
+;; a `ValueSlice`.
+(decl unwrap_head_value_list_1 (Value ValueSlice) ValueList)
+(extractor (unwrap_head_value_list_1 head tail)
+           (value_list_slice (value_slice_unwrap head tail)))
+
+;; Extractor to get the first two elements from a value list, along with its
+;; tail as a `ValueSlice`.
+(decl unwrap_head_value_list_2 (Value Value ValueSlice) ValueList)
+(extractor (unwrap_head_value_list_2 head1 head2 tail)
+           (value_list_slice (value_slice_unwrap head1 (value_slice_unwrap head2 tail))))
+
+;; Turn a `Writable<Reg>` into a `Reg` via `Writable::to_reg`.
+(decl pure writable_reg_to_reg (WritableReg) Reg)
+(extern constructor writable_reg_to_reg writable_reg_to_reg)
+
+;; Extract the result values for the given instruction.
+(decl inst_results (ValueSlice) Inst)
+(extern extractor infallible inst_results inst_results)
+
+;; Returns whether the given value is unused in this function and is a dead
+;; result.
+(decl pure value_is_unused (Value) bool)
+(extern constructor value_is_unused value_is_unused)
+
+;; Extract the first result value of the given instruction.
+(decl first_result (Value) Inst)
+(extern extractor first_result first_result)
+
+;; Extract the `InstructionData` for an `Inst`.
+(decl inst_data (InstructionData) Inst)
+(extern extractor infallible inst_data inst_data)
+
+;; Extract the type of the instruction's first result.
+(decl result_type (Type) Inst)
+(extractor (result_type ty)
+           (first_result (value_type ty)))
+
+;; Extract the type of the instruction's first result and pass along the
+;; instruction as well.
+(decl has_type (Type Inst) Inst)
+(extractor (has_type ty inst)
+           (and (result_type ty)
+                inst))
+
+;; Match the instruction that defines the given value, if any.
+(decl def_inst (Inst) Value)
+(extern extractor def_inst def_inst)
+
+;; Extract a constant `u64` from a value defined by an `iconst`.
+(decl u64_from_iconst (u64) Value)
+(extractor (u64_from_iconst x)
+           (def_inst (iconst (u64_from_imm64 x))))
+
+;; Extract a constant `i32` from a value defined by an `iconst`.
+;; The value is sign extended to 32 bits.
+(decl i32_from_iconst (i32) Value)
+(extern extractor i32_from_iconst i32_from_iconst)
+
+;; Extract a constant `i64` from a value defined by an `iconst`.
+;; The value is sign extended to 64 bits.
+(decl i64_from_iconst (i64) Value)
+(extern extractor i64_from_iconst i64_from_iconst)
+
+;; Match any zero value for iconst, fconst32, fconst64, vconst and splat.
+(decl pure partial zero_value (Value) Value)
+(extern constructor zero_value zero_value)
+
+;; Match a sinkable instruction from a value operand.
+(decl pure partial is_sinkable_inst (Value) Inst)
+(extern constructor is_sinkable_inst is_sinkable_inst)
+
+;; Match a uextend or any other instruction, "seeing through" the uextend if
+;; present.
+(decl maybe_uextend (Value) Value)
+(extern extractor maybe_uextend maybe_uextend)
+
+;; Get an unsigned 8-bit immediate in a u8 from an Imm64, if possible.
+(decl uimm8 (u8) Imm64)
+(extern extractor uimm8 uimm8)
+
+;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Emit an instruction.
+;;
+;; This is low-level and side-effectful; it should only be used as an
+;; implementation detail by helpers that preserve the SSA facade themselves.
+
+(decl emit (MInst) Unit)
+(extern constructor emit emit)
+
+;; Sink an instruction.
+;;
+;; This is a side-effectful operation that notifies the context that the
+;; instruction has been sunk into another instruction, and no longer needs to
+;; be lowered.
+(decl sink_inst (Inst) Unit)
+(extern constructor sink_inst sink_inst)
+
+;; Constant pool emission.
+
+(type VCodeConstant (primitive VCodeConstant))
+
+;; Add a u64 little-endian constant to the in-memory constant pool and
+;; return a VCodeConstant index that refers to it. This is
+;; side-effecting but idempotent (constants are deduplicated).
+(decl emit_u64_le_const (u64) VCodeConstant)
+(extern constructor emit_u64_le_const emit_u64_le_const)
+
+;; Add a u128 little-endian constant to the in-memory constant pool and
+;; return a VCodeConstant index that refers to it. This is
+;; side-effecting but idempotent (constants are deduplicated).
+(decl emit_u128_le_const (u128) VCodeConstant)
+(extern constructor emit_u128_le_const emit_u128_le_const)
+
+;; Fetch the VCodeConstant associated with a Constant.
+(decl const_to_vconst (Constant) VCodeConstant)
+(extern constructor const_to_vconst const_to_vconst)
+
+;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
+
+(type SideEffectNoResult (enum
+                          (Inst (inst MInst))
+                          (Inst2 (inst1 MInst)
+                                 (inst2 MInst))
+                          (Inst3 (inst1 MInst)
+                                 (inst2 MInst)
+                                 (inst3 MInst))))
+
+;; Emit given side-effectful instruction.
+(decl emit_side_effect (SideEffectNoResult) Unit)
+(rule (emit_side_effect (SideEffectNoResult.Inst inst))
+      (emit inst))
+(rule (emit_side_effect (SideEffectNoResult.Inst2 inst1 inst2))
+      (let ((_ Unit (emit inst1)))
+        (emit inst2)))
+(rule (emit_side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3))
+      (let ((_ Unit (emit inst1))
+            (_ Unit (emit inst2)))
+        (emit inst3)))
+
+;; Create an empty `InstOutput`, but do emit the given side-effectful
+;; instruction.
+(decl side_effect (SideEffectNoResult) InstOutput)
+(rule (side_effect inst)
+      (let ((_ Unit (emit_side_effect inst)))
+        (output_none)))
+
+(decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult)
+(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2))
+      (SideEffectNoResult.Inst2 inst1 inst2))
+(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3))
+      (SideEffectNoResult.Inst3 inst1 inst2 inst3))
+(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3))
+      (SideEffectNoResult.Inst3 inst1 inst2 inst3))
+
+;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Newtype wrapper around `MInst` for instructions that are used for their
+;; effect on flags.
+;;
+;; Variant determines how result is given when combined with a
+;; ConsumesFlags. See `with_flags` below for more.
+(type ProducesFlags (enum
+                     ;; For cases where the flags have been produced by another
+                     ;; instruction, and we have out-of-band reasons to know
+                     ;; that they won't be clobbered by the time we depend on
+                     ;; them.
+                     (AlreadyExistingFlags)
+                     (ProducesFlagsSideEffect (inst MInst))
+                     (ProducesFlagsTwiceSideEffect (inst1 MInst) (inst2 MInst))
+                     ;; Not directly combinable with a ConsumesFlags;
+                     ;; used in s390x and unwrapped directly by `trapif`.
+                     (ProducesFlagsReturnsReg (inst MInst) (result Reg))
+                     (ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
+
+;; Chain another producer to a `ProducesFlags`.
+(decl produces_flags_concat (ProducesFlags ProducesFlags) ProducesFlags)
+(rule (produces_flags_concat (ProducesFlags.ProducesFlagsSideEffect inst1) (ProducesFlags.ProducesFlagsSideEffect inst2))
+      (ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2))
+
+;; Newtype wrapper around `MInst` for instructions that consume and produce flags
+(type ConsumesAndProducesFlags (enum
+                      (SideEffect (inst MInst))
+                      (ReturnsReg (inst MInst) (result Reg))))
+
+;; Newtype wrapper around `MInst` for instructions that consume flags.
+;;
+;; Variant determines how result is given when combined with a
+;; ProducesFlags. See `with_flags` below for more.
+(type ConsumesFlags (enum
+                     (ConsumesFlagsSideEffect (inst MInst))
+                     (ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst))
+                     (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg))
+                     (ConsumesFlagsReturnsReg (inst MInst) (result Reg))
+                     (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
+                                                         (inst2 MInst)
+                                                         (result ValueRegs))
+                     (ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
+                                                             (inst2 MInst)
+                                                             (inst3 MInst)
+                                                             (inst4 MInst)
+                                                             (result ValueRegs))))
+
+
+
+;; Get the produced register out of a ProducesFlags.
+(decl produces_flags_get_reg (ProducesFlags) Reg)
+(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
+(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsResultWithConsumer _ reg)) reg)
+
+;; Modify a ProducesFlags to use it only for its side-effect, ignoring
+;; its result.
+(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
+(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
+      (ProducesFlags.ProducesFlagsSideEffect inst))
+(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
+      (ProducesFlags.ProducesFlagsSideEffect inst))
+
+;; Helper for combining two flags-consumer instructions that return a
+;; single Reg, giving a ConsumesFlags that returns both values in a
+;; ValueRegs.
+(decl consumes_flags_concat (ConsumesFlags ConsumesFlags) ConsumesFlags)
+(rule (consumes_flags_concat (ConsumesFlags.ConsumesFlagsReturnsReg inst1 reg1)
+                             (ConsumesFlags.ConsumesFlagsReturnsReg inst2 reg2))
+      (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
+       inst1
+       inst2
+       (value_regs reg1 reg2)))
+(rule (consumes_flags_concat
+        (ConsumesFlags.ConsumesFlagsSideEffect inst1)
+        (ConsumesFlags.ConsumesFlagsSideEffect inst2))
+      (ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2))
+
+;; Combine flags-producing and -consuming instructions together, ensuring that
+;; they are emitted back-to-back and no other instructions can be emitted
+;; between them and potentially clobber the flags.
+;;
+;; Returns a `ValueRegs` according to the specific combination of ProducesFlags and ConsumesFlags modes:
+;; - SideEffect + ReturnsReg --> ValueReg with one Reg from consumer
+;; - SideEffect + ReturnsValueRegs --> ValueReg as given from consumer
+;; - ReturnsResultWithProducer + ReturnsResultWithConsumer --> ValueReg with low part from producer, high part from consumer
+;;
+;; See `with_flags_reg` below for a variant that extracts out just the lower Reg.
+(decl with_flags (ProducesFlags ConsumesFlags) ValueRegs)
+
+(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
+                  (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consumer_inst consumer_result))
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst)))
+        (value_regs producer_result consumer_result)))
+
+;; A flag-producer that also produces a result, paired with a consumer that has
+;; no results.
+(rule (with_flags (ProducesFlags.ProducesFlagsReturnsResultWithConsumer producer_inst producer_result)
+                  (ConsumesFlags.ConsumesFlagsSideEffect consumer_inst))
+      (let ((_ Unit (emit producer_inst))
+            (_ Unit (emit consumer_inst)))
+        (value_reg producer_result)))
+
+(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
+                  (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst)))
+        (value_reg consumer_result)))
+
+(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
+                  (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
+                                                                    consumer_inst_2
+                                                                    consumer_result))
+      ;; We must emit these instructions in order as the creator of
+      ;; the ConsumesFlags may be relying on dataflow dependencies
+      ;; amongst them.
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst_1))
+            (_z Unit (emit consumer_inst_2)))
+        consumer_result))
+
+(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
+                  (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
+                                                                        consumer_inst_2
+                                                                        consumer_inst_3
+                                                                        consumer_inst_4
+                                                                        consumer_result))
+      ;; We must emit these instructions in order as the creator of
+      ;; the ConsumesFlags may be relying on dataflow dependencies
+      ;; amongst them.
+      (let ((_x Unit (emit producer_inst))
+            (_y Unit (emit consumer_inst_1))
+            (_z Unit (emit consumer_inst_2))
+            (_w Unit (emit consumer_inst_3))
+            (_v Unit (emit consumer_inst_4)))
+        consumer_result))
+
+(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
+                  (ConsumesFlags.ConsumesFlagsReturnsReg consumer_inst consumer_result))
+      (let ((_ Unit (emit producer_inst1))
+            (_ Unit (emit producer_inst2))
+            (_ Unit (emit consumer_inst)))
+        (value_reg consumer_result)))
+
+(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
+                  (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
+                                                                    consumer_inst_2
+                                                                    consumer_result))
+      ;; We must emit these instructions in order as the creator of
+      ;; the ConsumesFlags may be relying on dataflow dependencies
+      ;; amongst them.
+      (let ((_ Unit (emit producer_inst1))
+            (_ Unit (emit producer_inst2))
+            (_ Unit (emit consumer_inst_1))
+            (_ Unit (emit consumer_inst_2)))
+        consumer_result))
+
+(rule (with_flags (ProducesFlags.ProducesFlagsTwiceSideEffect producer_inst1 producer_inst2)
+                  (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
+                                                                        consumer_inst_2
+                                                                        consumer_inst_3
+                                                                        consumer_inst_4
+                                                                        consumer_result))
+      ;; We must emit these instructions in order as the creator of
+      ;; the ConsumesFlags may be relying on dataflow dependencies
+      ;; amongst them.
+      (let ((_ Unit (emit producer_inst1))
+            (_ Unit (emit producer_inst2))
+            (_ Unit (emit consumer_inst_1))
+            (_ Unit (emit consumer_inst_2))
+            (_ Unit (emit consumer_inst_3))
+            (_ Unit (emit consumer_inst_4)))
+        consumer_result))
+
+(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)
+(rule (with_flags_reg p c)
+      (let ((v ValueRegs (with_flags p c)))
+        (value_regs_get v 0)))
+
+;; Indicate that the current state of the flags register from the instruction
+;; that produces this Value is relied on.
+(decl flags_to_producesflags (Value) ProducesFlags)
+(rule (flags_to_producesflags val)
+      (let ((_ Unit (mark_value_used val)))
+        (ProducesFlags.AlreadyExistingFlags)))
+
+;; Combine a flags-producing instruction and a flags-consuming instruction that
+;; produces no results.
+;;
+;; This function handles the following case only:
+;; - ProducesFlagsSideEffect + ConsumesFlagsSideEffect
+(decl with_flags_side_effect (ProducesFlags ConsumesFlags) SideEffectNoResult)
+
+(rule (with_flags_side_effect
+        (ProducesFlags.AlreadyExistingFlags)
+        (ConsumesFlags.ConsumesFlagsSideEffect c))
+      (SideEffectNoResult.Inst c))
+
+(rule (with_flags_side_effect
+        (ProducesFlags.AlreadyExistingFlags)
+        (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
+      (SideEffectNoResult.Inst2 c1 c2))
+
+(rule (with_flags_side_effect
+        (ProducesFlags.ProducesFlagsSideEffect p)
+        (ConsumesFlags.ConsumesFlagsSideEffect c))
+      (SideEffectNoResult.Inst2 p c))
+
+(rule (with_flags_side_effect
+        (ProducesFlags.ProducesFlagsSideEffect p)
+        (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2))
+      (SideEffectNoResult.Inst3 p c1 c2))
+
+(rule (with_flags_side_effect
+        (ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2)
+        (ConsumesFlags.ConsumesFlagsSideEffect c))
+      (SideEffectNoResult.Inst3 p1 p2 c))
+
+;; Combine flag-producing and -consuming instruction that allows more than two results to be returned
+(decl with_flags_chained (ProducesFlags ConsumesAndProducesFlags ConsumesFlags) MultiReg)
+
+;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Empty)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Empty)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst reg))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.One reg)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2))
+            (_ Unit (emit consume_inst3))
+            (_ Unit (emit consume_inst4)))
+        (MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+
+;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.One prod_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.One prod_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Two prod_result consume_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.SideEffect middle_inst)
+                          (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2))
+            (_ Unit (emit consume_inst3))
+            (_ Unit (emit consume_inst4)))
+        (MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+
+;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.One middle_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.One middle_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Two middle_result consume_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2))
+            (_ Unit (emit consume_inst3))
+            (_ Unit (emit consume_inst4)))
+        (MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+
+;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Two prod_result middle_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Two prod_result middle_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Three prod_result middle_result consume_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2))
+            (_ Unit (emit consume_inst3))
+            (_ Unit (emit consume_inst4)))
+        (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+;; ProducesFlags.ReturnsResultWithConsumer + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Two prod_result middle_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Two prod_result middle_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Three prod_result middle_result consume_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consume_inst consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst)))
+        (MultiReg.Three prod_result middle_result consume_result)))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2)))
+        (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
+                          (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
+                          (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
+      (let ((_ Unit (emit prod_inst))
+            (_ Unit (emit middle_inst))
+            (_ Unit (emit consume_inst1))
+            (_ Unit (emit consume_inst2))
+            (_ Unit (emit consume_inst3))
+            (_ Unit (emit consume_inst4)))
+        (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
+
+;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; This definition should be kept up to date with the values defined in
+;; cranelift/codegen/meta/src/shared/settings.rs
+(type TlsModel extern (enum (None) (ElfGd) (Macho) (Coff)))
+
+(decl tls_model (TlsModel) Type)
+(extern extractor infallible tls_model tls_model)
+
+(decl pure partial tls_model_is_elf_gd () Unit)
+(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
+
+(decl pure partial tls_model_is_macho () Unit)
+(extern constructor tls_model_is_macho tls_model_is_macho)
+
+(decl pure partial tls_model_is_coff () Unit)
+(extern constructor tls_model_is_coff tls_model_is_coff)
+
+(decl pure partial preserve_frame_pointers () Unit)
+(extern constructor preserve_frame_pointers preserve_frame_pointers)
+
+;; This definition should be kept up to date with the values defined in
+;; cranelift/codegen/meta/src/shared/settings.rs
+(type StackSwitchModel extern (enum (None) (Basic) (UpdateWindowsTib)))
+
+(decl pure partial stack_switch_model () StackSwitchModel)
+(extern constructor stack_switch_model stack_switch_model)
+
+;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl box_external_name (ExternalName) BoxExternalName)
+(extern constructor box_external_name box_external_name)
+
+;; Accessor for `FuncRef`.
+
+(decl func_ref_data (SigRef ExternalName RelocDistance) FuncRef)
+(extern extractor infallible func_ref_data func_ref_data)
+
+;; Accessor for `GlobalValue`.
+
+(decl symbol_value_data (ExternalName RelocDistance i64) GlobalValue)
+(extern extractor symbol_value_data symbol_value_data)
+
+;; Accessor for `RelocDistance`.
+
+(decl reloc_distance_near () RelocDistance)
+(extern extractor reloc_distance_near reloc_distance_near)
+
+;; Accessor for `Immediate` as a vector of u8 values.
+
+(decl vec_mask_from_immediate (VecMask) Immediate)
+(extern extractor vec_mask_from_immediate vec_mask_from_immediate)
+
+;; Accessor for `Immediate` as u128.
+
+(decl u128_from_immediate (u128) Immediate)
+(extern extractor u128_from_immediate u128_from_immediate)
+
+;; Extracts an `Immediate` as a `VCodeConstant`.
+
+(decl vconst_from_immediate (VCodeConstant) Immediate)
+(extern extractor vconst_from_immediate vconst_from_immediate)
+
+;; Accessor for `Constant` as u128.
+
+(decl u128_from_constant (u128) Constant)
+(extern extractor u128_from_constant u128_from_constant)
+
+;; Accessor for `Constant` as u64.
+
+(decl u64_from_constant (u64) Constant)
+(extern extractor u64_from_constant u64_from_constant)
+
+;; Extracts lane indices, represented as u8's, if the immediate for a
+;; `shuffle` instruction represents shuffling N-bit values. The u8 values
+;; returned will be in the range of 0 to (256/N)-1, inclusive, and index the
+;; N-bit chunks of two concatenated 128-bit vectors starting from the
+;; least-significant bits.
+(decl shuffle64_from_imm (u8 u8) Immediate)
+(extern extractor shuffle64_from_imm shuffle64_from_imm)
+(decl shuffle32_from_imm (u8 u8 u8 u8) Immediate)
+(extern extractor shuffle32_from_imm shuffle32_from_imm)
+(decl shuffle16_from_imm (u8 u8 u8 u8 u8 u8 u8 u8) Immediate)
+(extern extractor shuffle16_from_imm shuffle16_from_imm)
+
+;;;; Helpers for generating returns ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Extractor to check for the special case that a `WritableValueRegs`
+;; contains only a single register.
+(decl only_writable_reg (WritableReg) WritableValueRegs)
+(extern extractor only_writable_reg only_writable_reg)
+
+;; Get the `n`th register inside a `WritableValueRegs`.
+(decl writable_regs_get (WritableValueRegs usize) WritableReg)
+(extern constructor writable_regs_get writable_regs_get)
+
+;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Type to hold information about a function call signature.
+(type Sig (primitive Sig))
+
+;; Information how to pass one argument or return value.
+(type ABIArg extern (enum))
+
+;; Information how to pass a single slot of one argument or return value.
+(type ABIArgSlot extern
+  (enum
+    (Reg
+      (reg RealReg)
+      (ty Type)
+      (extension ArgumentExtension))
+    (Stack
+      (offset i64)
+      (ty Type)
+      (extension ArgumentExtension))))
+
+;; Physical register that may hold an argument or return value.
+(type RealReg (primitive RealReg))
+
+;; Instruction on whether and how to extend an argument value.
+(type ArgumentExtension extern
+  (enum
+    (None)
+    (Uext)
+    (Sext)))
+
+;; Get the number of arguments expected.
+(decl abi_num_args (Sig) usize)
+(extern constructor abi_num_args abi_num_args)
+
+;; Get information specifying how to pass one argument.
+(decl abi_get_arg (Sig usize) ABIArg)
+(extern constructor abi_get_arg abi_get_arg)
+
+;; Get the number of return values expected.
+(decl abi_num_rets (Sig) usize)
+(extern constructor abi_num_rets abi_num_rets)
+
+;; Get information specifying how to pass one return value.
+(decl abi_get_ret (Sig usize) ABIArg)
+(extern constructor abi_get_ret abi_get_ret)
+
+;; Get information specifying how to pass the implicit pointer
+;; to the return-value area on the stack, if required.
+(decl abi_ret_arg (ABIArg) Sig)
+(extern extractor abi_ret_arg abi_ret_arg)
+
+;; Succeeds if no implicit return-value area pointer is required.
+(decl abi_no_ret_arg () Sig)
+(extern extractor abi_no_ret_arg abi_no_ret_arg)
+
+;; Size of the argument area.
+(decl abi_sized_stack_arg_space (Sig) i64)
+(extern constructor abi_sized_stack_arg_space abi_sized_stack_arg_space)
+
+;; Size of the return-value area.
+(decl abi_sized_stack_ret_space (Sig) i64)
+(extern constructor abi_sized_stack_ret_space abi_sized_stack_ret_space)
+
+;; Incoming return area pointer (must be present).
+(decl abi_unwrap_ret_area_ptr () Reg)
+(extern constructor abi_unwrap_ret_area_ptr abi_unwrap_ret_area_ptr)
+
+;; StackSlot addr
+(decl abi_stackslot_addr (WritableReg StackSlot Offset32) MInst)
+(extern constructor abi_stackslot_addr abi_stackslot_addr)
+
+;; DynamicStackSlot addr
+(decl abi_dynamic_stackslot_addr (WritableReg DynamicStackSlot) MInst)
+(extern constructor abi_dynamic_stackslot_addr abi_dynamic_stackslot_addr)
+
+;; Extractor to detect the special case where an argument or
+;; return value only requires a single slot to be passed.
+(decl abi_arg_only_slot (ABIArgSlot) ABIArg)
+(extern extractor abi_arg_only_slot abi_arg_only_slot)
+
+;; Extractor to detect the special case where a non-struct argument
+;; is implicitly passed by reference using a hidden pointer.
+(decl abi_arg_implicit_pointer (ABIArgSlot i64 Type) ABIArg)
+(extern extractor abi_arg_implicit_pointer abi_arg_implicit_pointer)
+
+;; Convert a real register number into a virtual register.
+(decl real_reg_to_reg (RealReg) Reg)
+(extern constructor real_reg_to_reg real_reg_to_reg)
+
+;; Convert a real register number into a writable virtual register.
+(decl real_reg_to_writable_reg (RealReg) WritableReg)
+(extern constructor real_reg_to_writable_reg real_reg_to_writable_reg)
+
+;; Generate a move between two registers.
+(decl gen_move (Type WritableReg Reg) MInst)
+(extern constructor gen_move gen_move)
+
+;; Generate a return instruction
+(decl lower_return (ValueSlice) InstOutput)
+(rule (lower_return vals)
+      (let ((_ Unit (gen_return vals)))
+        (output_none)))
+
+(decl gen_return (ValueSlice) Unit)
+(extern constructor gen_return gen_return)
+
+(decl gen_return_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
+(extern constructor gen_return_call gen_return_call)
+
+(decl gen_return_call_indirect (SigRef Value ValueSlice) InstOutput)
+(extern constructor gen_return_call_indirect gen_return_call_indirect)
+
+;; Helper for extracting an immediate that's not 0 and not -1 from an imm64.
+(decl pure partial safe_divisor_from_imm64 (Type Imm64) u64)
+(extern constructor safe_divisor_from_imm64 safe_divisor_from_imm64)
+
+;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(convert Inst Value def_inst)
+(convert Reg ValueRegs value_reg)
+(convert WritableReg WritableValueRegs writable_value_reg)
+(convert Value Reg put_in_reg)
+(convert Value ValueRegs put_in_regs)
+(convert WritableReg Reg writable_reg_to_reg)
+(convert ValueRegs InstOutput output)
+(convert Reg InstOutput output_reg)
+(convert Value InstOutput output_value)
+(convert ExternalName BoxExternalName box_external_name)
+(convert PReg Reg preg_to_reg)
+
diff --git a/hbcb/src/prelude_opt.isle b/hbcb/src/prelude_opt.isle
new file mode 100644
index 00000000..b8b9fc43
--- /dev/null
+++ b/hbcb/src/prelude_opt.isle
@@ -0,0 +1,123 @@
+;; Prelude definitions specific to the mid-end.
+
+;; Any `extern` definitions here are generally implemented in `src/opts.rs`.
+
+;;;;; eclass and enode access ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Extract any node(s) for the given eclass ID.
+(decl multi inst_data (Type InstructionData) Value)
+(extern extractor inst_data inst_data_etor)
+
+;; Identical to `inst_data`, just with a different ISLE type.
+;; This is basically a manual version of `curry`/`uncurry` in Haskell:
+;; to compose extractors the outer one needs to be single-parameter,
+;; so this combines the two parameters of `inst_data` into one.
+(type TypeAndInstructionData (primitive TypeAndInstructionData))
+(decl multi inst_data_tupled (TypeAndInstructionData) Value)
+(extern extractor inst_data_tupled inst_data_tupled_etor)
+
+;; Construct a pure node, returning a new (or deduplicated
+;; already-existing) eclass ID.
+(decl make_inst (Type InstructionData) Value)
+(extern constructor make_inst make_inst_ctor)
+
+;; Constructors for value arrays.
+(decl value_array_2_ctor (Value Value) ValueArray2)
+(extern constructor value_array_2_ctor value_array_2_ctor)
+(decl value_array_3_ctor (Value Value Value) ValueArray3)
+(extern constructor value_array_3_ctor value_array_3_ctor)
+
+(rule (eq ty x y) (icmp ty (IntCC.Equal) x y))
+(rule (ne ty x y) (icmp ty (IntCC.NotEqual) x y))
+(rule (ult ty x y) (icmp ty (IntCC.UnsignedLessThan) x y))
+(rule (ule ty x y) (icmp ty (IntCC.UnsignedLessThanOrEqual) x y))
+(rule (ugt ty x y) (icmp ty (IntCC.UnsignedGreaterThan) x y))
+(rule (uge ty x y) (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x y))
+(rule (slt ty x y) (icmp ty (IntCC.SignedLessThan) x y))
+(rule (sle ty x y) (icmp ty (IntCC.SignedLessThanOrEqual) x y))
+(rule (sgt ty x y) (icmp ty (IntCC.SignedGreaterThan) x y))
+(rule (sge ty x y) (icmp ty (IntCC.SignedGreaterThanOrEqual) x y))
+
+;; 3-way comparison, returning -1/0/+1 in I8
+(decl spaceship_s (Type Value Value) Value)
+(rule (spaceship_s ty x y) (isub $I8 (sgt ty x y) (slt ty x y)))
+(extractor (spaceship_s ty x y) (isub $I8 (sgt ty x y) (slt ty x y)))
+(decl spaceship_u (Type Value Value) Value)
+(rule (spaceship_u ty x y) (isub $I8 (ugt ty x y) (ult ty x y)))
+(extractor (spaceship_u ty x y) (isub $I8 (ugt ty x y) (ult ty x y)))
+
+;;;;; optimization toplevel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The main matcher rule invoked by the toplevel driver.
+(decl multi simplify (Value) Value)
+
+;; Mark a node as requiring remat when used in a different block.
+(decl remat (Value) Value)
+(extern constructor remat remat)
+
+;; Mark a node as subsuming whatever else it's rewritten from -- this
+;; is definitely preferable, not just a possible option. Useful for,
+;; e.g., constant propagation where we arrive at a definite "final
+;; answer".
+(decl subsume (Value) Value)
+(extern constructor subsume subsume)
+
+;;;;; constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl iconst_sextend_etor (Type i64) TypeAndInstructionData)
+(extern extractor iconst_sextend_etor iconst_sextend_etor)
+
+;; Construct an `iconst` from an `i64` or Extract an `i64` from an `iconst`
+;; by treating the constant as signed.
+;; When extracting, smaller types get their value sign-extended to 64-bits,
+;; so that `iconst.i8 255` will give you a `-1_i64`.
+;; When constructing, the rule will fail if the value cannot be represented in
+;; the target type.  If it fits, it'll be masked accordingly in the constant.
+(decl iconst_s (Type i64) Value)
+(extractor (iconst_s ty c) (inst_data_tupled (iconst_sextend_etor ty c)))
+(rule 0 (iconst_s ty c)
+	(if-let c_masked (u64_and (i64_as_u64 c) (ty_umax ty)))
+	(if-let c_reextended (i64_sextend_u64 ty c_masked))
+	(if-let $true (u64_eq (i64_as_u64 c) (i64_as_u64 c_reextended)))
+	(iconst ty (imm64 c_masked)))
+(rule 1 (iconst_s $I128 c) (sextend $I128 (iconst_s $I64 c)))
+
+;; Construct an `iconst` from a `u64` or Extract a `u64` from an `iconst`
+;; by treating the constant as unsigned.
+;; When extracting, smaller types get their value zero-extended to 64-bits,
+;; so that `iconst.i8 255` will give you a `255_u64`.
+;; When constructing, the rule will fail if the value cannot be represented in
+;; the target type.
+(decl iconst_u (Type u64) Value)
+(extractor (iconst_u ty c) (iconst ty (u64_from_imm64 c)))
+(rule 0 (iconst_u ty c)
+	(if-let $true (u64_le c (ty_umax ty)))
+    (iconst ty (imm64 c)))
+(rule 1 (iconst_u $I128 c) (uextend $I128 (iconst_u $I64 c)))
+
+;; These take `Value`, rather than going through `inst_data_tupled`, because
+;; most of the time they want to return the original `Value`, and it would be
+;; a waste to need to re-GVN the instruction data in those cases.
+(decl multi sextend_maybe_etor (Type Value) Value)
+(extern extractor infallible sextend_maybe_etor sextend_maybe_etor)
+(decl multi uextend_maybe_etor (Type Value) Value)
+(extern extractor infallible uextend_maybe_etor uextend_maybe_etor)
+
+;; Match or Construct a possibly-`uextend`ed value.
+;; Gives the extended-to type and inner value when matching something that was
+;; extended, or the input value and its type when the value isn't an extension.
+;; Useful to write a single pattern that can match things that may or may not
+;; have undergone C's "usual arithmetic conversions".
+;; When generating values, extending to the same type is invalid CLIF,
+;; so this avoids doing that where there's no extension actually needed.
+(decl uextend_maybe (Type Value) Value)
+(extractor (uextend_maybe ty val) (uextend_maybe_etor ty val))
+(rule 0 (uextend_maybe ty val) (uextend ty val))
+(rule 1 (uextend_maybe ty val@(value_type ty)) val)
+
+;; Same as `uextend_maybe` above, just for `sextend`.
+(decl sextend_maybe (Type Value) Value)
+(extractor (sextend_maybe ty val) (sextend_maybe_etor ty val))
+(rule 0 (sextend_maybe ty val) (sextend ty val))
+(rule 1 (sextend_maybe ty val@(value_type ty)) val)
+
diff --git a/hbcb/src/settings.rs b/hbcb/src/settings.rs
new file mode 100644
index 00000000..5cd68e3c
--- /dev/null
+++ b/hbcb/src/settings.rs
@@ -0,0 +1,10 @@
+//! riscv64 Settings.
+
+use {
+    core::fmt,
+    cranelift_codegen::settings::{self, detail, Builder, Value},
+};
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+include!(concat!(env!("OUT_DIR"), "/settings-riscv64.rs"));