Compare commits

..

7 commits

Author SHA1 Message Date
Erin d36a2335c7 a 2023-07-12 02:15:33 +02:00
Erin e5f50c7b6c Removed old text module 2023-07-12 02:15:14 +02:00
Erin c648bf0b24 Labels 2023-07-12 02:12:23 +02:00
Erin 51e0db9c7d rename 2023-07-12 01:24:41 +02:00
Erin cf05464c14 replaced in main 2023-07-12 01:24:20 +02:00
Erin b0b4022a85 Immediates and labels not implemented yet, soon be there. 2023-07-12 01:22:28 +02:00
Erin 299e8e6759 a 2023-07-12 00:01:40 +02:00
47 changed files with 1393 additions and 2640 deletions

109
Cargo.lock generated
View file

@ -15,9 +15,9 @@ dependencies = [
[[package]] [[package]]
name = "allocator-api2" name = "allocator-api2"
version = "0.2.16" version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9"
[[package]] [[package]]
name = "ariadne" name = "ariadne"
@ -47,6 +47,36 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "convert_case"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "delegate"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d358e0ec5c59a5e1603b933def447096886121660fc680dc1e64a0753981fe3c"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"convert_case",
"proc-macro2",
"quote",
"rustc_version",
"syn 1.0.109",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -94,7 +124,13 @@ version = "0.1.0"
name = "hbvm" name = "hbvm"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"delegate",
"derive_more",
"hashbrown 0.13.2",
"hbbytecode", "hbbytecode",
"log",
"paste",
"static_assertions",
] ]
[[package]] [[package]]
@ -127,6 +163,15 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "logos" name = "logos"
version = "0.13.0" version = "0.13.0"
@ -147,7 +192,7 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"regex-syntax", "regex-syntax",
"syn", "syn 2.0.18",
] ]
[[package]] [[package]]
@ -161,30 +206,30 @@ dependencies = [
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.18.0" version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]] [[package]]
name = "paste" name = "paste"
version = "1.0.14" version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.66" version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.33" version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
] ]
@ -196,10 +241,42 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]] [[package]]
name = "syn" name = "rustc_version"
version = "2.0.29" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "semver"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -208,9 +285,9 @@ dependencies = [
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.11" version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"

View file

@ -1,3 +1,2 @@
[workspace] [workspace]
resolver = "2" members = ["hbasm", "hbbytecode", "hbvm"]
members = ["hbasm", "hbbytecode", "hbvm"]

View file

@ -1,29 +0,0 @@
# C ABI (proposal)
## C datatypes
| C Type | Description | Size (B) |
|:------------|:-------------------------|-------------:|
| char | Character / byte | 8 |
| short | Short integer | 16 |
| int | Integer | 32 |
| long | Long integer | 64 |
| long long | Long long integer | 64 |
| T* | Pointer | 64 |
| float | Single-precision float | 32 |
| double | Double-precision float | 64 |
| long double | Extended-precision float | **Bikeshed** |
## Registers
| Register | ABI Name | Description | Saver |
|:---------|:---------|:---------------|:-------|
| `r0` | — | Zero register | N/A |
| `r1` | `ra` | Return address | Caller |
| `r2` | `sp` | Stack pointer | Callee |
| `r3` | `tp` | Thread pointer | N/A |
**TODO:** Parameters
**TODO:** Saved
**TODO:** Temp

View file

@ -4,12 +4,12 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
ariadne = "0.3" ariadne = "0.3.0"
bytemuck = "1.13"
hashbrown = "0.14"
hbbytecode = { path = "../hbbytecode" } hbbytecode = { path = "../hbbytecode" }
literify = "0.1" literify = "0.1"
paste = "1.0" paste = "1.0"
hashbrown = "0.14.0"
bytemuck = "1.13.1"
[dependencies.lasso] [dependencies.lasso]
version = "0.7" version = "0.7"

View file

@ -1,12 +0,0 @@
-- Add two numbers
-- A + B = C
-- r1 A
li r1, 2
-- r2 Result
li r2, 0
-- B = 4
addi r2, r1, 4
-- terminate execution
tx

View file

@ -1,16 +0,0 @@
-- r1 will be the temp in fahrenheit
-- r2 temp in celsius
-- r3/r4/r5 will be used by constants
-- (f - 32) * 5 / 9
li r1, 100
li r3, 32
li r4, 5
li r5, 9
sub r2, r1, r3
mul r2, r2, r4
dir r2, r0, r2, r5
tx

View file

@ -1,14 +0,0 @@
li r255, 0
ecall
li r255, 1
li r254, 1
li r253, 100
ecall
li r255, 2
li r254, 0
li r253, 0
ecall
tx

View file

@ -1,2 +0,0 @@
L:
jal r0, r0, L

View file

@ -1,4 +0,0 @@
li r20, 1010
st r20, r24, 0, 1
addi r24, r0, 10
tx

View file

@ -1,99 +1,45 @@
//! Holey Bytes Assembler
//!
//! Some people claim:
//! > Write programs to handle text streams, because that is a universal interface.
//!
//! We at AbleCorp believe that nice programatic API is nicer than piping some text
//! into a program. It's less error-prone and faster.
//!
//! So this crate contains both assembleer with API for programs and a text assembler
//! for humans to write
#![no_std] #![no_std]
extern crate alloc; extern crate alloc;
mod macros; mod macros;
use { use {alloc::vec::Vec, hashbrown::HashSet};
alloc::{vec, vec::Vec},
hashbrown::HashSet,
};
/// Assembler #[derive(Default)]
///
/// - Opcode-generic, instruction-type-specific methods are named `i_param_<type>`
/// - You likely won't need to use them, but they are here, just in case :)
/// - Instruction-specific methods are named `i_<instruction>`
pub struct Assembler { pub struct Assembler {
pub buf: Vec<u8>, pub buf: Vec<u8>,
pub sub: HashSet<usize>, pub sub: HashSet<usize>,
} }
impl Default for Assembler { macros::impl_both!(
fn default() -> Self { bbbb(p0: R, p1: R, p2: R, p3: R)
Self { => [DIR, DIRF, FMAF],
buf: vec![0; 4], bbb(p0: R, p1: R, p2: R)
sub: Default::default(), => [ADD, SUB, MUL, AND, OR, XOR, SL, SR, SRS, CMP, CMPU, BRC, ADDF, SUBF, MULF],
} bbdh(p0: R, p1: R, p2: I, p3: u16)
} => [LD, ST],
} bbd(p0: R, p1: R, p2: I)
=> [ADDI, MULI, ANDI, ORI, XORI, SLI, SRI, SRSI, CMPI, CMPUI,
BMC, JEQ, JNE, JLT, JGT, JLTU, JGTU, ADDFI, MULFI],
bb(p0: R, p1: R)
=> [NEG, NOT, CP, SWA, NEGF, ITF, FTI],
bd(p0: R, p1: I)
=> [LI, JMP],
n()
=> [NOP, ECALL],
);
hbbytecode::invoke_with_def!(macros::text::gen_text);
impl Assembler {
hbbytecode::invoke_with_def!(macros::asm::impl_asm);
/// Append 12 zeroes (UN) at the end and add magic to the begining
///
/// # HoleyBytes lore
///
/// In reference HBVM implementation checks are done in
/// a separate phase before execution.
///
/// This way execution will be much faster as they have to
/// be done only once.
///
/// There was an issue. You cannot statically check register values and
/// `JAL` instruction could hop at the end of program to some byte, which
/// will be interpreted as some valid opcode and VM in attempt to decode
/// the instruction performed out-of-bounds read which leads to undefined behaviour.
///
/// Several options were considered to overcome this, but inserting some data at
/// program's end which when executed would lead to undesired behaviour, though
/// not undefined behaviour.
///
/// Newly created `UN` (as UNreachable) was chosen as
/// - It was a good idea to add some equivalent to `ud2` anyways
/// - It was chosen to be zero
/// - What if you somehow reached that code, it will appropriately bail :)
/// - (yes, originally `NOP` was considered)
///
/// Why 12 bytes? That's the size of largest instruction parameter part.
pub fn finalise(&mut self) {
self.buf.extend([0; 12]);
self.buf[0..4].copy_from_slice(&0xAB1E0B_u32.to_le_bytes());
}
}
/// Immediate value
///
/// # Implementor notice
/// It should insert exactly 8 bytes, otherwise output will be malformed.
/// This is not checked in any way
pub trait Imm { pub trait Imm {
/// Insert immediate value
fn insert(&self, asm: &mut Assembler); fn insert(&self, asm: &mut Assembler);
} }
/// Implement immediate values
macro_rules! impl_imm_le_bytes { macro_rules! impl_imm_le_bytes {
($($ty:ty),* $(,)?) => { ($($ty:ty),* $(,)?) => {
$( $(
impl Imm for $ty { impl Imm for $ty {
#[inline(always)] #[inline(always)]
fn insert(&self, asm: &mut Assembler) { fn insert(&self, asm: &mut Assembler) {
// Convert to little-endian bytes, insert.
asm.buf.extend(self.to_le_bytes()); asm.buf.extend(self.to_le_bytes());
} }
} }

View file

@ -1,9 +1,5 @@
//! Macros to generate [`crate::Assembler`]
/// Incremental token-tree muncher to implement specific instruction
/// functions based on generic function for instruction type
macro_rules! impl_asm_opcodes { macro_rules! impl_asm_opcodes {
( // End case (
$generic:ident $generic:ident
($($param_i:ident: $param_ty:ty),*) ($($param_i:ident: $param_ty:ty),*)
=> [] => []
@ -14,15 +10,14 @@ macro_rules! impl_asm_opcodes {
($($param_i:ident: $param_ty:ty),*) ($($param_i:ident: $param_ty:ty),*)
=> [$opcode:ident, $($rest:tt)*] => [$opcode:ident, $($rest:tt)*]
) => { ) => {
// Instruction-specific function
paste::paste! { paste::paste! {
#[allow(dead_code)]
#[inline(always)] #[inline(always)]
pub fn [<i_ $opcode:lower>](&mut self, $($param_i: $param_ty),*) { pub fn [<i_ $opcode:lower>](&mut self, $($param_i: $param_ty),*) {
self.$generic(hbbytecode::opcode::$opcode, $($param_i),*) self.$generic(hbbytecode::opcode::$opcode, $($param_i),*)
} }
} }
// And recurse!
macros::asm::impl_asm_opcodes!( macros::asm::impl_asm_opcodes!(
$generic($($param_i: $param_ty),*) $generic($($param_i: $param_ty),*)
=> [$($rest)*] => [$($rest)*]
@ -30,27 +25,16 @@ macro_rules! impl_asm_opcodes {
}; };
} }
/// Numeric value insert
macro_rules! impl_asm_insert { macro_rules! impl_asm_insert {
// Immediate - this is trait-based,
// the insertion is delegated to its implementation
($self:expr, $id:ident, I) => { ($self:expr, $id:ident, I) => {
Imm::insert(&$id, $self) Imm::insert(&$id, $self)
}; };
// Length - cannot be more than 2048
($self:expr, $id:ident, L) => {{
assert!($id <= 2048);
$self.buf.extend($id.to_le_bytes())
}};
// Other numbers, just insert their bytes, little endian
($self:expr, $id:ident, $_:ident) => { ($self:expr, $id:ident, $_:ident) => {
$self.buf.extend($id.to_le_bytes()) $self.buf.extend($id.to_le_bytes())
}; };
} }
/// Implement assembler
macro_rules! impl_asm { macro_rules! impl_asm {
( (
$( $(
@ -61,13 +45,12 @@ macro_rules! impl_asm {
) => { ) => {
paste::paste! { paste::paste! {
$( $(
// Opcode-generic functions specific for instruction types #[allow(dead_code)]
pub fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: macros::asm::ident_map_ty!($param_ty)),*) { fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: macros::asm::ident_map_ty!($param_ty)),*) {
self.buf.push(opcode); self.buf.push(opcode);
$(macros::asm::impl_asm_insert!(self, $param_i, $param_ty);)* $(macros::asm::impl_asm_insert!(self, $param_i, $param_ty);)*
} }
// Generate opcode-specific functions calling the opcode-generic ones
macros::asm::impl_asm_opcodes!( macros::asm::impl_asm_opcodes!(
[<i_param_ $ityn>]($($param_i: macros::asm::ident_map_ty!($param_ty)),*) [<i_param_ $ityn>]($($param_i: macros::asm::ident_map_ty!($param_ty)),*)
=> [$($opcode,)*] => [$($opcode,)*]
@ -77,13 +60,14 @@ macro_rules! impl_asm {
}; };
} }
/// Map operand type to Rust type
#[rustfmt::skip] #[rustfmt::skip]
macro_rules! ident_map_ty { macro_rules! ident_map_ty {
(R) => { u8 }; // Register is just u8 (R) => { u8 };
(I) => { impl Imm }; // Immediate is anything implementing the trait (I) => { impl Imm };
(L) => { u16 }; // Copy count ($id:ident) => { $id };
($id:ident) => { $id }; // Anything else → identity map
} }
pub(crate) use {ident_map_ty, impl_asm, impl_asm_insert, impl_asm_opcodes}; pub(crate) use {ident_map_ty, impl_asm, impl_asm_opcodes};
#[allow(clippy::single_component_path_imports)]
pub(crate) use impl_asm_insert;

View file

@ -1,6 +1,14 @@
//! And here the land of macros begin.
//!
//! They do not bite, really. Have you seen what Yandros is writing?
pub mod asm; pub mod asm;
pub mod text; pub mod text;
macro_rules! impl_both {
($($tt:tt)*) => {
impl Assembler {
$crate::macros::asm::impl_asm!($($tt)*);
}
$crate::macros::text::gen_text!($($tt)*);
};
}
pub(crate) use impl_both;

View file

@ -1,7 +1,3 @@
//! Macros to generate text-code assembler at [`crate::text`]
// Refering in module which generates a module to that module — is that even legal? :D
/// Generate text code based assembler
macro_rules! gen_text { macro_rules! gen_text {
( (
$( $(
@ -10,21 +6,6 @@ macro_rules! gen_text {
=> [$($opcode:ident),* $(,)?], => [$($opcode:ident),* $(,)?],
)* )*
) => { ) => {
/// # Text assembler
/// Text assembler generated simply calls methods in the [`crate::Assembler`] type.
///
/// # Syntax
/// ```text
/// instruction op1, op2, …
/// …
/// ```
/// - Opcode names are lowercase
/// - Registers are prefixed with `r` followed by number
/// - Operands are separated by `,`
/// - Instructions are separated by either line feed or `;` (αυτό δεν είναι ερωτηματικό!)
/// - Labels are defined by their names followed by colon `label:`
/// - Labels are referenced simply by their names
/// - Immediates are numbers, can be negative, floats are not yet supported
pub mod text { pub mod text {
use { use {
crate::{ crate::{
@ -37,7 +18,6 @@ macro_rules! gen_text {
}; };
paste::paste!(literify::literify! { paste::paste!(literify::literify! {
/// Assembly token
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(extras = Rodeo)] #[logos(extras = Rodeo)]
#[logos(skip r"[ \t\t]+")] #[logos(skip r"[ \t\t]+")]
@ -78,7 +58,6 @@ macro_rules! gen_text {
} }
}); });
/// Type of error
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind { pub enum ErrorKind {
UnexpectedToken, UnexpectedToken,
@ -87,14 +66,12 @@ macro_rules! gen_text {
InvalidSymbol, InvalidSymbol,
} }
/// Text assembly error
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error { pub struct Error {
pub kind: ErrorKind, pub kind: ErrorKind,
pub span: Span, pub span: Span,
} }
/// Parse code and insert instructions
pub fn assemble(asm: &mut Assembler, code: &str) -> Result<(), Error> { pub fn assemble(asm: &mut Assembler, code: &str) -> Result<(), Error> {
pub struct TextAsm<'a> { pub struct TextAsm<'a> {
asm: &'a mut Assembler, asm: &'a mut Assembler,
@ -115,40 +92,20 @@ macro_rules! gen_text {
fn run(&mut self) -> Result<(), ErrorKind> { fn run(&mut self) -> Result<(), ErrorKind> {
loop { loop {
match self.lexer.next() { match self.lexer.next() {
// Got an opcode
Some(Ok(Token::Opcode(op))) => { Some(Ok(Token::Opcode(op))) => {
match op { match op {
// Special-cased
hbbytecode::opcode::BRC => {
param_extract_itm!(
self,
p0: R,
p1: R,
p2: u8
);
self.asm.i_param_bbb(op, p0, p1, p2);
},
// Take all the opcodes and match them to their corresponding functions
$( $(
#[allow(unreachable_patterns)]
$(hbbytecode::opcode::$opcode)|* => paste::paste!({ $(hbbytecode::opcode::$opcode)|* => paste::paste!({
param_extract_itm!(self, $($param_i: $param_ty),*); param_extract_itm!(self, $($param_i: $param_ty),*);
self.asm.[<i_param_ $ityn>](op, $($param_i),*); self.asm.[<i_param_ $ityn>](op, $($param_i),*);
}), }),
)* )*
// Already matched in Logos, should not be able to obtain
// invalid opcode.
_ => unreachable!(), _ => unreachable!(),
} }
} }
// Insert label to table
Some(Ok(Token::Label(lbl))) => { Some(Ok(Token::Label(lbl))) => {
self.symloc.insert(lbl, self.asm.buf.len()); self.symloc.insert(lbl, self.asm.buf.len());
} }
// Instruction separator (LF, ;)
Some(Ok(Token::ISep)) => (), Some(Ok(Token::ISep)) => (),
Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken), Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken),
Some(Err(())) => return Err(ErrorKind::InvalidToken), Some(Err(())) => return Err(ErrorKind::InvalidToken),
@ -166,21 +123,16 @@ macro_rules! gen_text {
asm.run() asm.run()
.map_err(|kind| Error { kind, span: asm.lexer.span() })?; .map_err(|kind| Error { kind, span: asm.lexer.span() })?;
// Walk table and substitute labels
// for their addresses
for &loc in &asm.asm.sub { for &loc in &asm.asm.sub {
// Extract indices from the code and get addresses from table
let val = asm.symloc let val = asm.symloc
.get( .get(
&Spur::try_from_usize(bytemuck::pod_read_unaligned::<u64>( &Spur::try_from_usize(bytemuck::pod_read_unaligned::<u64>(&asm.asm.buf[loc..loc+core::mem::size_of::<u64>()]) as _)
&asm.asm.buf[loc..loc + core::mem::size_of::<u64>()]) as _ .unwrap()
).unwrap()
) )
.ok_or(Error { kind: ErrorKind::InvalidSymbol, span: 0..0 })? .ok_or(Error { kind: ErrorKind::InvalidSymbol, span: 0..0 })?
.to_le_bytes(); .to_le_bytes();
// New address
asm.asm.buf[loc..] asm.asm.buf[loc..]
.iter_mut() .iter_mut()
.zip(val) .zip(val)
@ -190,13 +142,6 @@ macro_rules! gen_text {
Ok(()) Ok(())
} }
// Fun fact: this is a little hack
// It may slow the things a little bit down, but
// it made the macro to be made pretty nice.
//
// If you have any idea how to get rid of this,
// contributions are welcome :)
// I *likely* won't try anymore.
enum InternalImm { enum InternalImm {
Const(u64), Const(u64),
Named(Spur), Named(Spur),
@ -206,14 +151,9 @@ macro_rules! gen_text {
#[inline] #[inline]
fn insert(&self, asm: &mut Assembler) { fn insert(&self, asm: &mut Assembler) {
match self { match self {
// Constant immediate, just put it in
Self::Const(a) => a.insert(asm), Self::Const(a) => a.insert(asm),
// Label
Self::Named(a) => { Self::Named(a) => {
// Insert to the sub table that substitution will be
// requested
asm.sub.insert(asm.buf.len()); asm.sub.insert(asm.buf.len());
// Insert value from interner in place
asm.buf.extend((a.into_usize() as u64).to_le_bytes()); asm.buf.extend((a.into_usize() as u64).to_le_bytes());
}, },
} }
@ -223,7 +163,6 @@ macro_rules! gen_text {
}; };
} }
/// Extract item by pattern, otherwise return [`ErrorKind::UnexpectedToken`]
macro_rules! extract_pat { macro_rules! extract_pat {
($self:expr, $pat:pat) => { ($self:expr, $pat:pat) => {
let $pat = $self.next()? let $pat = $self.next()?
@ -231,60 +170,30 @@ macro_rules! extract_pat {
}; };
} }
/// Generate extract macro macro_rules! extract {
macro_rules! gen_extract { ($self:expr, R, $id:ident) => {
// Integer types have same body extract_pat!($self, Token::Register($id));
($($int:ident),* $(,)?) => { };
/// Extract operand from code
macro_rules! extract {
// Register (require prefixing with r)
($self:expr, R, $id:ident) => {
extract_pat!($self, Token::Register($id));
};
($self:expr, L, $id:ident) => { ($self:expr, I, $id:ident) => {
extract_pat!($self, Token::Integer($id)); let $id = match $self.next()? {
if $id > 2048 { Token::Integer(a) => InternalImm::Const(a),
return Err(ErrorKind::InvalidToken); Token::Symbol(a) => InternalImm::Named(a),
} _ => return Err(ErrorKind::UnexpectedToken),
};
};
let $id = u16::try_from($id).unwrap(); ($self:expr, u16, $id:ident) => {
}; extract_pat!($self, Token::Integer($id));
let $id = u16::try_from($id).map_err(|_| ErrorKind::InvalidToken)?;
// Immediate
($self:expr, I, $id:ident) => {
let $id = match $self.next()? {
// Either straight up integer
Token::Integer(a) => InternalImm::Const(a),
// …or a label
Token::Symbol(a) => InternalImm::Named(a),
_ => return Err(ErrorKind::UnexpectedToken),
};
};
// Get $int, if not fitting, the token is claimed invalid
$(($self:expr, $int, $id:ident) => {
extract_pat!($self, Token::Integer($id));
let $id = $int::try_from($id).map_err(|_| ErrorKind::InvalidToken)?;
});*;
}
}; };
} }
gen_extract!(u8, u16, u32);
/// Parameter extract incremental token-tree muncher
///
/// What else would it mean?
macro_rules! param_extract_itm { macro_rules! param_extract_itm {
($self:expr, $($id:ident: $ty:ident)? $(, $($tt:tt)*)?) => { ($self:expr, $($id:ident: $ty:ident)? $(, $($tt:tt)*)?) => {
// Extract pattern
$(extract!($self, $ty, $id);)? $(extract!($self, $ty, $id);)?
$( $(
// Require operand separator
extract_pat!($self, Token::PSep); extract_pat!($self, Token::PSep);
// And go to the next (recursive)
// …munch munch… yummy token trees.
param_extract_itm!($self, $($tt)*); param_extract_itm!($self, $($tt)*);
)? )?
}; };

View file

@ -40,7 +40,7 @@ fn main() -> Result<(), Box<dyn Error>> {
.with_code(e_code) .with_code(e_code)
.with_message(format!("{:?}", e.kind)) .with_message(format!("{:?}", e.kind))
.with_label( .with_label(
Label::new(("engine_internal", e.span)) Label::new(("engine_internal", e.span.clone()))
.with_message(message) .with_message(message)
.with_color(a), .with_color(a),
) )
@ -48,7 +48,6 @@ fn main() -> Result<(), Box<dyn Error>> {
.eprint(("engine_internal", Source::from(&code))) .eprint(("engine_internal", Source::from(&code)))
.unwrap(); .unwrap();
} else { } else {
assembler.finalise();
std::io::stdout().lock().write_all(&assembler.buf).unwrap(); std::io::stdout().lock().write_all(&assembler.buf).unwrap();
} }

View file

@ -1,8 +1,5 @@
/* HoleyBytes Bytecode representation in C /* HoleyBytes Bytecode representation in C
* Requires C23 compiler or better * Requires C23 compiler or better
*
* Uses MSVC pack pragma extension,
* proved to work with Clang and GNU® GCC.
*/ */
#pragma once #pragma once
@ -13,15 +10,15 @@
static_assert(CHAR_BIT == 8, "Cursed architectures are not supported"); static_assert(CHAR_BIT == 8, "Cursed architectures are not supported");
enum hbbc_Opcode: uint8_t { enum hbbc_Opcode: uint8_t {
hbbc_Op_UN , hbbc_Op_TX , hbbc_Op_NOP , hbbc_Op_ADD , hbbc_Op_SUB , hbbc_Op_MUL , hbbc_Op_NOP , hbbc_Op_ADD , hbbc_Op_SUB , hbbc_Op_MUL , hbbc_Op_AND , hbbc_Op_OR ,
hbbc_Op_AND , hbbc_Op_OR , hbbc_Op_XOR , hbbc_Op_SL , hbbc_Op_SR , hbbc_Op_SRS , hbbc_Op_XOR , hbbc_Op_SL , hbbc_Op_SR , hbbc_Op_SRS , hbbc_Op_CMP , hbbc_Op_CMPU ,
hbbc_Op_CMP , hbbc_Op_CMPU , hbbc_Op_DIR , hbbc_Op_NEG , hbbc_Op_NOT , hbbc_Op_ADDI , hbbc_Op_DIR , hbbc_Op_NEG , hbbc_Op_NOT , hbbc_Op_ADDI , hbbc_Op_MULI , hbbc_Op_ANDI ,
hbbc_Op_MULI , hbbc_Op_ANDI , hbbc_Op_ORI , hbbc_Op_XORI , hbbc_Op_SLI , hbbc_Op_SRI , hbbc_Op_ORI , hbbc_Op_XORI , hbbc_Op_SLI , hbbc_Op_SRI , hbbc_Op_SRSI , hbbc_Op_CMPI ,
hbbc_Op_SRSI , hbbc_Op_CMPI , hbbc_Op_CMPUI , hbbc_Op_CP , hbbc_Op_SWA , hbbc_Op_LI , hbbc_Op_CMPUI , hbbc_Op_CP , hbbc_Op_SWA , hbbc_Op_LI , hbbc_Op_LD , hbbc_Op_ST ,
hbbc_Op_LD , hbbc_Op_ST , hbbc_Op_BMC , hbbc_Op_BRC , hbbc_Op_JMP , hbbc_Op_JAL , hbbc_Op_BMC , hbbc_Op_BRC , hbbc_Op_JMP , hbbc_Op_JEQ , hbbc_Op_JNE , hbbc_Op_JLT ,
hbbc_Op_JEQ , hbbc_Op_JNE , hbbc_Op_JLT , hbbc_Op_JGT , hbbc_Op_JLTU , hbbc_Op_JGTU , hbbc_Op_JGT , hbbc_Op_JLTU , hbbc_Op_JGTU , hbbc_Op_ECALL , hbbc_Op_ADDF , hbbc_Op_SUBF ,
hbbc_Op_ECALL , hbbc_Op_ADDF , hbbc_Op_SUBF , hbbc_Op_MULF , hbbc_Op_DIRF , hbbc_Op_FMAF , hbbc_Op_MULF , hbbc_Op_DIRF , hbbc_Op_FMAF , hbbc_Op_NEGF , hbbc_Op_ITF , hbbc_Op_FTI ,
hbbc_Op_NEGF , hbbc_Op_ITF , hbbc_Op_FTI , hbbc_Op_ADDFI , hbbc_Op_MULFI , hbbc_Op_ADDFI , hbbc_Op_MULFI ,
} typedef hbbc_Opcode; } typedef hbbc_Opcode;
static_assert(sizeof(hbbc_Opcode) == 1); static_assert(sizeof(hbbc_Opcode) == 1);
@ -47,11 +44,6 @@ struct hbbc_ParamBBD
typedef hbbc_ParamBBD; typedef hbbc_ParamBBD;
static_assert(sizeof(hbbc_ParamBBD) == 80 / 8); static_assert(sizeof(hbbc_ParamBBD) == 80 / 8);
struct hbbc_ParamBBW
{ uint8_t _0; uint8_t _1; uint32_t _2; }
typedef hbbc_ParamBBW;
static_assert(sizeof(hbbc_ParamBBW) == 48 / 8);
struct hbbc_ParamBB struct hbbc_ParamBB
{ uint8_t _0; uint8_t _1; } { uint8_t _0; uint8_t _1; }
typedef hbbc_ParamBB; typedef hbbc_ParamBB;

View file

@ -1,170 +0,0 @@
//! Generate HoleyBytes code validator
macro_rules! gen_valider {
(
$(
$ityn:ident
($($param_i:ident: $param_ty:ident),* $(,)?)
=> [$($opcode:ident),* $(,)?],
)*
) => {
#[allow(unreachable_code)]
pub mod valider {
//! Validate if program is sound to execute
/// Program validation error kind
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
/// Unknown opcode
InvalidInstruction,
/// VM doesn't implement this valid opcode
Unimplemented,
/// Attempted to copy over register boundary
RegisterArrayOverflow,
/// Program is not validly terminated
InvalidEnd,
/// Program misses magic
MissingMagic
}
/// Error
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
/// Kind
pub kind: ErrorKind,
/// Location in bytecode
pub index: usize,
}
/// Perform bytecode validation. If it passes, the program should be
/// sound to execute.
pub fn validate(mut program: &[u8]) -> Result<(), Error> {
// Validate magic
if program.get(0..4) != Some(&0xAB1E0B_u32.to_le_bytes()) {
return Err(Error {
kind: ErrorKind::MissingMagic,
index: 0,
});
}
// Program has to end with 12 zeroes, if there is less than
// 12 bytes, program is invalid.
if program.len() < 12 {
return Err(Error {
kind: ErrorKind::InvalidEnd,
index: 0,
});
}
// Verify that program ends with 12 zeroes
for (index, item) in program.iter().enumerate().skip(program.len() - 12) {
if *item != 0 {
return Err(Error {
kind: ErrorKind::InvalidEnd,
index,
});
}
}
let start = program;
program = &program[4..];
loop {
use crate::opcode::*;
program = match program {
// End of program
[] => return Ok(()),
// Memory load/store cannot go out-of-bounds register array
// B B D1 D2 D3 D4 D5 D6 D7 D8 H1 H2
[LD..=ST, reg, _, _, _, _, _, _, _, _, _, count_0, count_1, ..]
if usize::from(*reg) * 8
+ usize::from(u16::from_le_bytes([*count_0, *count_1]))
> 2048 =>
{
return Err(Error {
kind: ErrorKind::RegisterArrayOverflow,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
});
}
// Block register copy cannot go out-of-bounds register array
[BRC, src, dst, count, ..]
if src.checked_add(*count).is_none()
|| dst.checked_add(*count).is_none() =>
{
return Err(Error {
kind: ErrorKind::RegisterArrayOverflow,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
});
}
$(
$crate::gen_valider::inst_chk!(
rest, $ityn, $($opcode),*
)
)|* => rest,
// The plebs
_ => {
return Err(Error {
kind: ErrorKind::InvalidInstruction,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
})
}
}
}
}
}
};
}
/// Generate instruction check pattern
macro_rules! inst_chk {
// Sadly this has hardcoded instruction types,
// as I cannot generate parts of patterns+
($rest:ident, bbbb, $($opcode:ident),*) => {
// B B B B
[$($opcode)|*, _, _, _, _, $rest @ ..]
};
($rest:ident, bbb, $($opcode:ident),*) => {
// B B B
[$($opcode)|*, _, _, _, $rest @ ..]
};
($rest:ident, bbdh, $($opcode:ident),*) => {
// B B D1 D2 D3 D4 D5 D6 D7 D8 H1 H2
[$($opcode)|*, _, _, _, _, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, bbd, $($opcode:ident),*) => {
// B B D1 D2 D3 D4 D5 D6 D7 D8
[$($opcode)|*, _, _, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, bbw, $($opcode:ident),*) => {
// B B W1 W2 W3 W4
[$($opcode)|*, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, bb, $($opcode:ident),*) => {
// B B
[$($opcode)|*, _, _, $rest @ ..]
};
($rest:ident, bd, $($opcode:ident),*) => {
// B D1 D2 D3 D4 D5 D6 D7 D8
[$($opcode)|*, _, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, n, $($opcode:ident),*) => {
[$($opcode)|*, $rest @ ..]
};
($_0:ident, $($_1:ident),*) => {
compile_error!("Invalid instruction type");
}
}
pub(crate) use {gen_valider, inst_chk};

View file

@ -1,7 +1,5 @@
#![no_std] #![no_std]
mod gen_valider;
macro_rules! constmod { macro_rules! constmod {
($vis:vis $mname:ident($repr:ty) { ($vis:vis $mname:ident($repr:ty) {
$(#![doc = $mdoc:literal])? $(#![doc = $mdoc:literal])?
@ -17,147 +15,93 @@ macro_rules! constmod {
}; };
} }
#[allow(rustdoc::invalid_rust_codeblocks)]
/// Invoke macro with bytecode definition
/// # Input syntax
/// ```no_run
/// macro!(
/// INSTRUCTION_TYPE(p0: TYPE, p1: TYPE, …)
/// => [INSTRUCTION_A, INSTRUCTION_B, …],
/// …
/// );
/// ```
/// - Instruction type determines opcode-generic, instruction-type-specific
/// function. Name: `i_param_INSTRUCTION_TYPE`
/// - Per-instructions there will be generated opcode-specific functions calling the generic ones
/// - Operand types
/// - R: Register (u8)
/// - I: Immediate
/// - L: Memory load / store size (u16)
/// - Other types are identity-mapped
///
/// # BRC special-case
/// BRC's 3rd operand is plain byte, not a register. Encoding is the same, but for some cases it may matter.
///
/// Please, if you distinguish in your API between byte and register, special case this one.
///
/// Sorry for that :(
#[macro_export]
macro_rules! invoke_with_def {
($macro:path) => {
$macro!(
bbbb(p0: R, p1: R, p2: R, p3: R)
=> [DIR, DIRF, FMAF],
bbb(p0: R, p1: R, p2: R)
=> [ADD, SUB, MUL, AND, OR, XOR, SL, SR, SRS, CMP, CMPU, BRC, ADDF, SUBF, MULF],
bbdh(p0: R, p1: R, p2: I, p3: L)
=> [LD, ST],
bbd(p0: R, p1: R, p2: I)
=> [ADDI, MULI, ANDI, ORI, XORI, CMPI, CMPUI, BMC, JAL, JEQ, JNE, JLT, JGT, JLTU,
JGTU, ADDFI, MULFI],
bbw(p0: R, p1: R, p2: u32)
=> [SLI, SRI, SRSI],
bb(p0: R, p1: R)
=> [NEG, NOT, CP, SWA, NEGF, ITF, FTI],
bd(p0: R, p1: I)
=> [LI],
n()
=> [UN, TX, NOP, ECALL],
);
};
}
invoke_with_def!(gen_valider::gen_valider);
constmod!(pub opcode(u8) { constmod!(pub opcode(u8) {
//! Opcode constant module //! Opcode constant module
UN = 0, "N; Raises a trap"; NOP = 0, "N; Do nothing";
TX = 1, "N; Terminate execution";
NOP = 2, "N; Do nothing";
ADD = 3, "BBB; #0 ← #1 + #2"; ADD = 1, "BBB; #0 ← #1 + #2";
SUB = 4, "BBB; #0 ← #1 - #2"; SUB = 2, "BBB; #0 ← #1 - #2";
MUL = 5, "BBB; #0 ← #1 × #2"; MUL = 3, "BBB; #0 ← #1 × #2";
AND = 6, "BBB; #0 ← #1 & #2"; AND = 4, "BBB; #0 ← #1 & #2";
OR = 7, "BBB; #0 ← #1 | #2"; OR = 5, "BBB; #0 ← #1 | #2";
XOR = 8, "BBB; #0 ← #1 ^ #2"; XOR = 6, "BBB; #0 ← #1 ^ #2";
SL = 9, "BBB; #0 ← #1 « #2"; SL = 7, "BBB; #0 ← #1 « #2";
SR = 10, "BBB; #0 ← #1 » #2"; SR = 8, "BBB; #0 ← #1 » #2";
SRS = 11, "BBB; #0 ← #1 » #2 (signed)"; SRS = 9, "BBB; #0 ← #1 » #2 (signed)";
CMP = 12, "BBB; #0 ← #1 <=> #2"; CMP = 10, "BBB; #0 ← #1 <=> #2";
CMPU = 13, "BBB; #0 ← #1 <=> #2 (unsigned)"; CMPU = 11, "BBB; #0 ← #1 <=> #2 (unsigned)";
DIR = 14, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; DIR = 12, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3";
NEG = 15, "BB; #0 ← -#1"; NEG = 13, "BB; #0 ← -#1";
NOT = 16, "BB; #0 ← !#1"; NOT = 14, "BB; #0 ← !#1";
ADDI = 17, "BBD; #0 ← #1 + imm #2"; ADDI = 15, "BBD; #0 ← #1 + imm #2";
MULI = 18, "BBD; #0 ← #1 × imm #2"; MULI = 16, "BBD; #0 ← #1 × imm #2";
ANDI = 19, "BBD; #0 ← #1 & imm #2"; ANDI = 17, "BBD; #0 ← #1 & imm #2";
ORI = 20, "BBD; #0 ← #1 | imm #2"; ORI = 18, "BBD; #0 ← #1 | imm #2";
XORI = 21, "BBD; #0 ← #1 ^ imm #2"; XORI = 19, "BBD; #0 ← #1 ^ imm #2";
SLI = 22, "BBW; #0 ← #1 « imm #2"; SLI = 20, "BBD; #0 ← #1 « imm #2";
SRI = 23, "BBW; #0 ← #1 » imm #2"; SRI = 21, "BBD; #0 ← #1 » imm #2";
SRSI = 24, "BBW; #0 ← #1 » imm #2 (signed)"; SRSI = 22, "BBD; #0 ← #1 » imm #2 (signed)";
CMPI = 25, "BBD; #0 ← #1 <=> imm #2"; CMPI = 23, "BBD; #0 ← #1 <=> imm #2";
CMPUI = 26, "BBD; #0 ← #1 <=> imm #2 (unsigned)"; CMPUI = 24, "BBD; #0 ← #1 <=> imm #2 (unsigned)";
CP = 27, "BB; Copy #0 ← #1"; CP = 25, "BB; Copy #0 ← #1";
SWA = 28, "BB; Swap #0 and #1"; SWA = 26, "BB; Swap #0 and #1";
LI = 29, "BD; #0 ← imm #1"; LI = 27, "BD; #0 ← imm #1";
LD = 30, "BBDB; #0 ← [#1 + imm #3], imm #4 bytes, overflowing"; LD = 28, "BBDB; #0 ← [#1 + imm #3], imm #4 bytes, overflowing";
ST = 31, "BBDB; [#1 + imm #3] ← #0, imm #4 bytes, overflowing"; ST = 29, "BBDB; [#1 + imm #3] ← #0, imm #4 bytes, overflowing";
BMC = 32, "BBD; [#0] ← [#1], imm #2 bytes"; BMC = 30, "BBD; [#0] ← [#1], imm #2 bytes";
BRC = 33, "BBB; #0 ← #1, imm #2 registers"; BRC = 31, "BBB; #0 ← #1, imm #2 registers";
JMP = 34, "D; Unconditional, non-linking absolute jump"; JMP = 32, "BD; Unconditional jump [#0 + imm #1]";
JAL = 35, "BD; Copy PC to #0 and unconditional jump [#1 + imm #2]"; JEQ = 33, "BBD; if #0 = #1 → jump imm #2";
JEQ = 36, "BBD; if #0 = #1 → jump imm #2"; JNE = 34, "BBD; if #0 ≠ #1 → jump imm #2";
JNE = 37, "BBD; if #0 ≠ #1 → jump imm #2"; JLT = 35, "BBD; if #0 < #1 → jump imm #2";
JLT = 38, "BBD; if #0 < #1 → jump imm #2"; JGT = 36, "BBD; if #0 > #1 → jump imm #2";
JGT = 39, "BBD; if #0 > #1 → jump imm #2"; JLTU = 37, "BBD; if #0 < #1 → jump imm #2 (unsigned)";
JLTU = 40, "BBD; if #0 < #1 → jump imm #2 (unsigned)"; JGTU = 38, "BBD; if #0 > #1 → jump imm #2 (unsigned)";
JGTU = 41, "BBD; if #0 > #1 → jump imm #2 (unsigned)"; ECALL = 39, "N; Issue system call";
ECALL = 42, "N; Issue system call";
ADDF = 43, "BBB; #0 ← #1 +. #2"; ADDF = 40, "BBB; #0 ← #1 +. #2";
SUBF = 44, "BBB; #0 ← #1 -. #2"; SUBF = 41, "BBB; #0 ← #1 -. #2";
MULF = 45, "BBB; #0 ← #1 +. #2"; MULF = 42, "BBB; #0 ← #1 +. #2";
DIRF = 46, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; DIRF = 43, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3";
FMAF = 47, "BBBB; #0 ← (#1 * #2) + #3"; FMAF = 44, "BBBB; #0 ← (#1 * #2) + #3";
NEGF = 48, "BB; #0 ← -#1"; NEGF = 45, "BB; #0 ← -#1";
ITF = 49, "BB; #0 ← #1 as float"; ITF = 46, "BB; #0 ← #1 as float";
FTI = 50, "BB; #0 ← #1 as int"; FTI = 47, "BB; #0 ← #1 as int";
ADDFI = 51, "BBD; #0 ← #1 +. imm #2"; ADDFI = 48, "BBD; #0 ← #1 +. imm #2";
MULFI = 52, "BBD; #0 ← #1 *. imm #2"; MULFI = 49, "BBD; #0 ← #1 *. imm #2";
}); });
#[repr(packed)] #[repr(packed)]
pub struct ParamBBBB(pub u8, pub u8, pub u8, pub u8); pub struct ParamBBBB(pub u8, pub u8, pub u8, pub u8);
#[repr(packed)] #[repr(packed)]
pub struct ParamBBB(pub u8, pub u8, pub u8); pub struct ParamBBB(pub u8, pub u8, pub u8);
#[repr(packed)] #[repr(packed)]
pub struct ParamBBDH(pub u8, pub u8, pub u64, pub u16); pub struct ParamBBDH(pub u8, pub u8, pub u64, pub u16);
#[repr(packed)] #[repr(packed)]
pub struct ParamBBD(pub u8, pub u8, pub u64); pub struct ParamBBD(pub u8, pub u8, pub u64);
#[repr(packed)]
pub struct ParamBBW(pub u8, pub u8, pub u32);
#[repr(packed)] #[repr(packed)]
pub struct ParamBB(pub u8, pub u8); pub struct ParamBB(pub u8, pub u8);
#[repr(packed)] #[repr(packed)]
pub struct ParamBD(pub u8, pub u64); pub struct ParamBD(pub u8, pub u64);
/// # Safety /// # Safety
/// Has to be valid to be decoded from bytecode. /// Has to be valid to be decoded from bytecode.
pub unsafe trait ProgramVal {} pub unsafe trait OpParam {}
unsafe impl ProgramVal for ParamBBBB {} unsafe impl OpParam for ParamBBBB {}
unsafe impl ProgramVal for ParamBBB {} unsafe impl OpParam for ParamBBB {}
unsafe impl ProgramVal for ParamBBDH {} unsafe impl OpParam for ParamBBDH {}
unsafe impl ProgramVal for ParamBBD {} unsafe impl OpParam for ParamBBD {}
unsafe impl ProgramVal for ParamBBW {} unsafe impl OpParam for ParamBB {}
unsafe impl ProgramVal for ParamBB {} unsafe impl OpParam for ParamBD {}
unsafe impl ProgramVal for ParamBD {} unsafe impl OpParam for u64 {}
unsafe impl ProgramVal for u64 {} unsafe impl OpParam for () {}
unsafe impl ProgramVal for u8 {} // Opcode
unsafe impl ProgramVal for () {}

View file

@ -6,10 +6,11 @@ edition = "2021"
[profile.release] [profile.release]
lto = true lto = true
[features]
default = ["alloc"]
alloc = []
nightly = []
[dependencies] [dependencies]
hbbytecode.path = "../hbbytecode" delegate = "0.9"
derive_more = "0.99"
hashbrown = "0.13"
hbbytecode.path = "../hbbytecode"
log = "0.4"
paste = "1.0"
static_assertions = "1.0"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,5 +0,0 @@
target
artifacts
corpus
coverage
Cargo.lock

View file

@ -1,30 +0,0 @@
[package]
name = "hbvm-fuzz"
version = "0.0.0"
publish = false
edition = "2021"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.hbvm]
path = ".."
[dependencies.hbbytecode]
path = "../../hbbytecode"
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
name = "vm"
path = "fuzz_targets/vm.rs"
test = false
doc = false

View file

@ -1,85 +0,0 @@
#![no_main]
use {
hbbytecode::valider::validate,
hbvm::{
mem::{
softpaging::{
paging::{PageTable, Permission},
HandlePageFault, PageSize, SoftPagedMem,
},
Address, MemoryAccessReason,
},
Vm,
},
libfuzzer_sys::fuzz_target,
};
fuzz_target!(|data: &[u8]| {
if validate(data).is_ok() {
let mut vm = unsafe {
Vm::<_, 16384>::new(
SoftPagedMem::<_, true> {
pf_handler: TestTrapHandler,
program: data,
root_pt: Box::into_raw(Default::default()),
icache: Default::default(),
},
Address::new(4),
)
};
// Alloc and map some memory
let pages = [
alloc_and_map(&mut vm.memory, 0),
alloc_and_map(&mut vm.memory, 4096),
];
// Run VM
let _ = vm.run();
// Unmap and dealloc the memory
for (i, page) in pages.into_iter().enumerate() {
unmap_and_dealloc(&mut vm.memory, page, i as u64 * 4096);
}
let _ = unsafe { Box::from_raw(vm.memory.root_pt) };
}
});
fn alloc_and_map(memory: &mut SoftPagedMem<TestTrapHandler>, at: u64) -> *mut u8 {
let ptr = Box::into_raw(Box::<Page>::default()).cast();
unsafe {
memory
.map(ptr, Address::new(at), Permission::Write, PageSize::Size4K)
.unwrap()
};
ptr
}
fn unmap_and_dealloc(memory: &mut SoftPagedMem<TestTrapHandler>, ptr: *mut u8, from: u64) {
memory.unmap(Address::new(from)).unwrap();
let _ = unsafe { Box::from_raw(ptr.cast::<Page>()) };
}
#[repr(align(4096))]
struct Page([u8; 4096]);
impl Default for Page {
fn default() -> Self {
unsafe { std::mem::MaybeUninit::zeroed().assume_init() }
}
}
struct TestTrapHandler;
impl HandlePageFault for TestTrapHandler {
fn page_fault(
&mut self,
_: MemoryAccessReason,
_: &mut PageTable,
_: Address,
_: PageSize,
_: *mut u8,
) -> bool {
false
}
}

View file

@ -1,135 +0,0 @@
//! Block memory copier state machine
use {
super::{mem::MemoryAccessReason, Memory, VmRunError},
crate::mem::Address,
core::{mem::MaybeUninit, task::Poll},
};
/// Buffer size (defaults to 4 KiB, a smallest page size on most platforms)
const BUF_SIZE: usize = 4096;
/// Buffer of possibly uninitialised bytes, aligned to [`BUF_SIZE`]
#[repr(align(4096))]
struct AlignedBuf([MaybeUninit<u8>; BUF_SIZE]);
/// State for block memory copy
pub struct BlockCopier {
/// Source address
src: Address,
/// Destination address
dst: Address,
/// How many buffer sizes to copy?
n_buffers: usize,
/// …and what remainds after?
rem: usize,
}
impl BlockCopier {
/// Construct a new one
#[inline]
pub fn new(src: Address, dst: Address, count: usize) -> Self {
Self {
src,
dst,
n_buffers: count / BUF_SIZE,
rem: count % BUF_SIZE,
}
}
/// Copy one block
///
/// # Safety
/// - Same as for [`Memory::load`] and [`Memory::store`]
pub unsafe fn poll(&mut self, memory: &mut impl Memory) -> Poll<Result<(), BlkCopyError>> {
// Safety: Assuming uninit of array of MaybeUninit is sound
let mut buf = AlignedBuf(MaybeUninit::uninit().assume_init());
// We have at least one buffer size to copy
if self.n_buffers != 0 {
if let Err(e) = act(
memory,
self.src,
self.dst,
buf.0.as_mut_ptr().cast(),
BUF_SIZE,
) {
return Poll::Ready(Err(e));
}
// Bump source and destination address
self.src += BUF_SIZE;
self.dst += BUF_SIZE;
self.n_buffers -= 1;
return if self.n_buffers + self.rem == 0 {
// If there is nothing left, we are done
Poll::Ready(Ok(()))
} else {
// Otherwise let's advice to run it again
Poll::Pending
};
}
if self.rem != 0 {
if let Err(e) = act(
memory,
self.src,
self.dst,
buf.0.as_mut_ptr().cast(),
self.rem,
) {
return Poll::Ready(Err(e));
}
}
Poll::Ready(Ok(()))
}
}
/// Load to buffer and store from buffer
#[inline]
unsafe fn act(
memory: &mut impl Memory,
src: Address,
dst: Address,
buf: *mut u8,
count: usize,
) -> Result<(), BlkCopyError> {
// Load to buffer
memory
.load(src, buf, count)
.map_err(|super::mem::LoadError(addr)| BlkCopyError {
access_reason: MemoryAccessReason::Load,
addr,
})?;
// Store from buffer
memory
.store(dst, buf, count)
.map_err(|super::mem::StoreError(addr)| BlkCopyError {
access_reason: MemoryAccessReason::Store,
addr,
})?;
Ok(())
}
/// Error occured when copying a block of memory
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BlkCopyError {
/// Kind of access
access_reason: MemoryAccessReason,
/// VM Address
addr: Address,
}
impl From<BlkCopyError> for VmRunError {
fn from(value: BlkCopyError) -> Self {
match value.access_reason {
MemoryAccessReason::Load => Self::LoadAccessEx(value.addr),
MemoryAccessReason::Store => Self::StoreAccessEx(value.addr),
}
}
}

View file

@ -1,108 +1,7 @@
//! HoleyBytes Virtual Machine #![doc = include_str!("../README.md")]
//!
//! # Alloc feature
//! - Enabled by default
//! - Provides mapping / unmapping, as well as [`Default`] and [`Drop`]
//! implementations for soft-paged memory implementation
// # General safety notice:
// - Validation has to assure there is 256 registers (r0 - r255)
// - Instructions have to be valid as specified (values and sizes)
// - Mapped pages should be at least 4 KiB
#![no_std] #![no_std]
#![cfg_attr(feature = "nightly", feature(fn_align))]
#![warn(missing_docs)]
use mem::{Memory, Address};
#[cfg(feature = "alloc")]
extern crate alloc; extern crate alloc;
pub mod mem; pub mod validate;
pub mod value; pub mod vm;
mod bmc;
mod vmrun;
mod utils;
use {bmc::BlockCopier, value::Value};
/// HoleyBytes Virtual Machine
pub struct Vm<Mem, const TIMER_QUOTIENT: usize> {
/// Holds 256 registers
///
/// Writing to register 0 is considered undefined behaviour
/// in terms of HoleyBytes program execution
pub registers: [Value; 256],
/// Memory implementation
pub memory: Mem,
/// Program counter
pub pc: Address,
/// Program timer
timer: usize,
/// Saved block copier
copier: Option<BlockCopier>,
}
impl<Mem, const TIMER_QUOTIENT: usize> Vm<Mem, TIMER_QUOTIENT>
where
Mem: Memory,
{
/// Create a new VM with program and trap handler
///
/// # Safety
/// Program code has to be validated
pub unsafe fn new(memory: Mem, entry: Address) -> Self {
Self {
registers: [Value::from(0_u64); 256],
memory,
pc: entry,
timer: 0,
copier: None,
}
}
}
/// Virtual machine halt error
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum VmRunError {
/// Tried to execute invalid instruction
InvalidOpcode(u8),
/// Unhandled load access exception
LoadAccessEx(Address),
/// Unhandled instruction load access exception
ProgramFetchLoadEx(Address),
/// Unhandled store access exception
StoreAccessEx(Address),
/// Register out-of-bounds access
RegOutOfBounds,
/// Address out-of-bounds
AddrOutOfBounds,
/// Reached unreachable code
Unreachable,
}
/// Virtual machine halt ok
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum VmRunOk {
/// Program has eached its end
End,
/// Program was interrupted by a timer
Timer,
/// Environment call
Ecall,
}

View file

@ -1,14 +1,7 @@
use hbvm::mem::Address; use hbvm::vm::mem::{HandlePageFault, Memory, MemoryAccessReason, PageSize};
use { use {
hbbytecode::valider::validate, hbvm::{validate::validate, vm::Vm},
hbvm::{
mem::{
softpaging::{paging::PageTable, HandlePageFault, PageSize, SoftPagedMem},
MemoryAccessReason,
},
Vm,
},
std::io::{stdin, Read}, std::io::{stdin, Read},
}; };
@ -21,43 +14,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
return Ok(()); return Ok(());
} else { } else {
unsafe { unsafe {
let mut vm = Vm::<_, 0>::new( let mut vm = Vm::<_, 0>::new_unchecked(&prog, TestTrapHandler);
SoftPagedMem::<_, true> { vm.memory.insert_test_page();
pf_handler: TestTrapHandler,
program: &prog,
root_pt: Box::into_raw(Default::default()),
icache: Default::default(),
},
Address::new(4),
);
let data = {
let ptr = std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align_unchecked(
4096, 4096,
));
if ptr.is_null() {
panic!("Alloc error tbhl");
}
ptr
};
vm.memory
.map(
data,
Address::new(8192),
hbvm::mem::softpaging::paging::Permission::Write,
PageSize::Size4K,
)
.unwrap();
println!("Program interrupt: {:?}", vm.run()); println!("Program interrupt: {:?}", vm.run());
println!("{:?}", vm.registers); println!("{:?}", vm.registers);
std::alloc::dealloc(
data,
std::alloc::Layout::from_size_align_unchecked(4096, 4096),
);
vm.memory.unmap(Address::new(8192)).unwrap();
let _ = Box::from_raw(vm.memory.root_pt);
} }
} }
Ok(()) Ok(())
@ -67,14 +27,13 @@ pub fn time() -> u32 {
9 9
} }
#[derive(Default)]
struct TestTrapHandler; struct TestTrapHandler;
impl HandlePageFault for TestTrapHandler { impl HandlePageFault for TestTrapHandler {
fn page_fault( fn page_fault(
&mut self, &mut self,
_: MemoryAccessReason, _: MemoryAccessReason,
_: &mut PageTable, _: &mut Memory,
_: Address, _: u64,
_: PageSize, _: PageSize,
_: *mut u8, _: *mut u8,
) -> bool { ) -> bool {

View file

@ -1,110 +0,0 @@
//! Virtual(?) memory address
use core::{fmt::Debug, ops};
use crate::utils::impl_display;
/// Memory address
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Address(u64);
impl Address {
/// A null address
pub const NULL: Self = Self(0);
/// Saturating integer addition. Computes self + rhs, saturating at the numeric bounds instead of overflowing.
#[inline]
pub fn saturating_add<T: AddressOp>(self, rhs: T) -> Self {
Self(self.0.saturating_add(rhs.cast_u64()))
}
/// Saturating integer subtraction. Computes self - rhs, saturating at the numeric bounds instead of overflowing.
#[inline]
pub fn saturating_sub<T: AddressOp>(self, rhs: T) -> Self {
Self(self.0.saturating_sub(rhs.cast_u64()))
}
/// Cast or if smaller, truncate to [`usize`]
pub fn truncate_usize(self) -> usize {
self.0 as _
}
/// Get inner value
#[inline(always)]
pub fn get(self) -> u64 {
self.0
}
/// Construct new address
#[inline(always)]
pub fn new(val: u64) -> Self {
Self(val)
}
/// Do something with inner value
#[inline(always)]
pub fn map(self, f: impl Fn(u64) -> u64) -> Self {
Self(f(self.0))
}
}
impl_display!(for Address =>
|Address(a)| "{a:0x}"
);
impl<T: AddressOp> ops::Add<T> for Address {
type Output = Self;
#[inline]
fn add(self, rhs: T) -> Self::Output {
Self(self.0.wrapping_add(rhs.cast_u64()))
}
}
impl<T: AddressOp> ops::Sub<T> for Address {
type Output = Self;
#[inline]
fn sub(self, rhs: T) -> Self::Output {
Self(self.0.wrapping_sub(rhs.cast_u64()))
}
}
impl<T: AddressOp> ops::AddAssign<T> for Address {
fn add_assign(&mut self, rhs: T) {
self.0 = self.0.wrapping_add(rhs.cast_u64())
}
}
impl<T: AddressOp> ops::SubAssign<T> for Address {
fn sub_assign(&mut self, rhs: T) {
self.0 = self.0.wrapping_sub(rhs.cast_u64())
}
}
impl From<Address> for u64 {
#[inline(always)]
fn from(value: Address) -> Self {
value.0
}
}
impl Debug for Address {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "[{:0x}]", self.0)
}
}
/// Can perform address operations with
pub trait AddressOp {
/// Cast to u64, truncating or extending
fn cast_u64(self) -> u64;
}
macro_rules! impl_address_ops(($($ty:ty),* $(,)?) => {
$(impl AddressOp for $ty {
#[inline(always)]
fn cast_u64(self) -> u64 { self as _ }
})*
});
impl_address_ops!(u8, u16, u32, u64, usize);

View file

@ -1,86 +0,0 @@
//! Memory implementations
pub mod softpaging;
mod addr;
pub use addr::Address;
use {crate::utils::impl_display, hbbytecode::ProgramVal};
/// Load-store memory access
pub trait Memory {
/// Load data from memory on address
///
/// # Safety
/// - Shall not overrun the buffer
unsafe fn load(
&mut self,
addr: Address,
target: *mut u8,
count: usize,
) -> Result<(), LoadError>;
/// Store data to memory on address
///
/// # Safety
/// - Shall not overrun the buffer
unsafe fn store(
&mut self,
addr: Address,
source: *const u8,
count: usize,
) -> Result<(), StoreError>;
/// Read from program memory to execute
///
/// # Safety
/// - Data read have to be valid
unsafe fn prog_read<T: ProgramVal>(&mut self, addr: Address) -> Option<T>;
/// Read from program memory to exectue
///
/// # Safety
/// - You have to be really sure that these bytes are there, understand?
unsafe fn prog_read_unchecked<T: ProgramVal>(&mut self, addr: Address) -> T;
}
/// Unhandled load access trap
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LoadError(pub Address);
impl_display!(for LoadError =>
|LoadError(a)| "Load access error at address {a}",
);
/// Unhandled store access trap
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct StoreError(pub Address);
impl_display!(for StoreError =>
|StoreError(a)| "Load access error at address {a}",
);
/// Reason to access memory
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MemoryAccessReason {
/// Memory was accessed for load (read)
Load,
/// Memory was accessed for store (write)
Store,
}
impl_display!(for MemoryAccessReason => match {
Self::Load => const "Load";
Self::Store => const "Store";
});
impl From<LoadError> for crate::VmRunError {
fn from(value: LoadError) -> Self {
Self::LoadAccessEx(value.0)
}
}
impl From<StoreError> for crate::VmRunError {
fn from(value: StoreError) -> Self {
Self::StoreAccessEx(value.0)
}
}

View file

@ -1,109 +0,0 @@
//! Program instruction cache
use crate::mem::Address;
use {
super::{lookup::AddrPageLookuper, paging::PageTable, PageSize},
core::{
mem::{size_of, MaybeUninit},
ptr::{copy_nonoverlapping, NonNull},
},
};
/// Instruction cache
#[derive(Clone, Debug)]
pub struct ICache {
/// Current page address base
base: Address,
/// Curent page pointer
data: Option<NonNull<u8>>,
/// Current page size
size: PageSize,
/// Address mask
mask: u64,
}
impl Default for ICache {
fn default() -> Self {
Self {
base: Address::NULL,
data: Default::default(),
size: PageSize::Size4K,
mask: Default::default(),
}
}
}
impl ICache {
/// Fetch instruction from cache
///
/// # Safety
/// `T` should be valid to read from instruction memory
pub(super) unsafe fn fetch<T>(
&mut self,
addr: Address,
root_pt: *const PageTable,
) -> Option<T> {
let mut ret = MaybeUninit::<T>::uninit();
let pbase = self
.data
.or_else(|| self.fetch_page(self.base + self.size, root_pt))?;
// Get address base
let base = addr.map(|x| x & self.mask);
// Base not matching, fetch anew
if base != self.base {
self.fetch_page(base, root_pt)?;
};
let offset = addr.get() & !self.mask;
let requ_size = size_of::<T>();
// Page overflow
let rem = (offset as usize)
.saturating_add(requ_size)
.saturating_sub(self.size as _);
let first_copy = requ_size.saturating_sub(rem);
// Copy non-overflowing part
copy_nonoverlapping(pbase.as_ptr(), ret.as_mut_ptr().cast::<u8>(), first_copy);
// Copy overflow
if rem != 0 {
let pbase = self.fetch_page(self.base + self.size, root_pt)?;
// Unlikely, unsupported scenario
if rem > self.size as _ {
return None;
}
copy_nonoverlapping(
pbase.as_ptr(),
ret.as_mut_ptr().cast::<u8>().add(first_copy),
rem,
);
}
Some(ret.assume_init())
}
/// Fetch a page
unsafe fn fetch_page(&mut self, addr: Address, pt: *const PageTable) -> Option<NonNull<u8>> {
let res = AddrPageLookuper::new(addr, 0, pt).next()?.ok()?;
if !super::perm_check::executable(res.perm) {
return None;
}
(self.size, self.mask) = match res.size {
4096 => (PageSize::Size4K, !((1 << 8) - 1)),
2097152 => (PageSize::Size2M, !((1 << (8 * 2)) - 1)),
1073741824 => (PageSize::Size1G, !((1 << (8 * 3)) - 1)),
_ => return None,
};
self.data = Some(NonNull::new(res.ptr)?);
self.base = addr.map(|x| x & self.mask);
self.data
}
}

View file

@ -1,126 +0,0 @@
//! Address lookup
use crate::mem::addr::Address;
use super::{
addr_extract_index,
paging::{PageTable, Permission},
PageSize,
};
/// Good result from address split
pub struct AddrPageLookupOk {
/// Virtual address
pub vaddr: Address,
/// Pointer to the start for perform operation
pub ptr: *mut u8,
/// Size to the end of page / end of desired size
pub size: usize,
/// Page permission
pub perm: Permission,
}
/// Errornous address split result
pub struct AddrPageLookupError {
/// Address of failure
pub addr: Address,
/// Requested page size
pub size: PageSize,
}
/// Address splitter into pages
pub struct AddrPageLookuper {
/// Current address
addr: Address,
/// Size left
size: usize,
/// Page table
pagetable: *const PageTable,
}
impl AddrPageLookuper {
/// Create a new page lookuper
#[inline]
pub const fn new(addr: Address, size: usize, pagetable: *const PageTable) -> Self {
Self {
addr,
size,
pagetable,
}
}
/// Bump address by size X
pub fn bump(&mut self, page_size: PageSize) {
self.addr += page_size;
self.size = self.size.saturating_sub(page_size as _);
}
}
impl Iterator for AddrPageLookuper {
type Item = Result<AddrPageLookupOk, AddrPageLookupError>;
fn next(&mut self) -> Option<Self::Item> {
// The end, everything is fine
if self.size == 0 {
return None;
}
let (base, perm, size, offset) = 'a: {
let mut current_pt = self.pagetable;
// Walk the page table
for lvl in (0..5).rev() {
// Get an entry
unsafe {
let entry = (*current_pt)
.table
.get_unchecked(addr_extract_index(self.addr, lvl));
let ptr = entry.ptr();
match entry.permission() {
// No page → page fault
Permission::Empty => {
return Some(Err(AddrPageLookupError {
addr: self.addr,
size: PageSize::from_lvl(lvl)?,
}))
}
// Node → proceed waking
Permission::Node => current_pt = ptr as _,
// Leaf → return relevant data
perm => {
break 'a (
// Pointer in host memory
ptr as *mut u8,
perm,
PageSize::from_lvl(lvl)?,
// In-page offset
addr_extract_index(self.addr, lvl),
);
}
}
}
}
return None; // Reached the end (should not happen)
};
// Get available byte count in the selected page with offset
let avail = (size as usize).saturating_sub(offset).clamp(0, self.size);
self.bump(size);
Some(Ok(AddrPageLookupOk {
vaddr: self.addr,
ptr: unsafe { base.add(offset) }, // Return pointer to the start of region
size: avail,
perm,
}))
}
}

View file

@ -1,166 +0,0 @@
//! Automatic memory mapping
use crate::{mem::addr::Address, utils::impl_display};
use {
super::{
addr_extract_index,
paging::{PageTable, Permission, PtEntry, PtPointedData},
PageSize, SoftPagedMem,
},
alloc::boxed::Box,
};
impl<'p, A, const OUT_PROG_EXEC: bool> SoftPagedMem<'p, A, OUT_PROG_EXEC> {
/// Maps host's memory into VM's memory
///
/// # Safety
/// - Your faith in the gods of UB
/// - Addr-san claims it's fine but who knows is she isn't lying :ferrisSus:
/// - Alright, Miri-sama is also fine with this, who knows why
pub unsafe fn map(
&mut self,
host: *mut u8,
target: Address,
perm: Permission,
pagesize: PageSize,
) -> Result<(), MapError> {
let mut current_pt = self.root_pt;
// Decide on what level depth are we going
let lookup_depth = match pagesize {
PageSize::Size4K => 0,
PageSize::Size2M => 1,
PageSize::Size1G => 2,
};
// Walk pagetable levels
for lvl in (lookup_depth + 1..5).rev() {
let entry = (*current_pt)
.table
.get_unchecked_mut(addr_extract_index(target, lvl));
let ptr = entry.ptr();
match entry.permission() {
// Still not on target and already seeing empty entry?
// No worries! Let's create one (allocates).
Permission::Empty => {
// Increase children count
(*current_pt).childen += 1;
let table = Box::into_raw(Box::new(PtPointedData {
pt: PageTable::default(),
}));
core::ptr::write(entry, PtEntry::new(table, Permission::Node));
current_pt = table as _;
}
// Continue walking
Permission::Node => current_pt = ptr as _,
// There is some entry on place of node
_ => return Err(MapError::PageOnNode),
}
}
let node = (*current_pt)
.table
.get_unchecked_mut(addr_extract_index(target, lookup_depth));
// Check if node is not mapped
if node.permission() != Permission::Empty {
return Err(MapError::AlreadyMapped);
}
// Write entry
(*current_pt).childen += 1;
core::ptr::write(node, PtEntry::new(host.cast(), perm));
Ok(())
}
/// Unmaps pages from VM's memory
///
/// If errors, it only means there is no entry to unmap and in most cases
/// just should be ignored.
pub fn unmap(&mut self, addr: Address) -> Result<(), NothingToUnmap> {
let mut current_pt = self.root_pt;
let mut page_tables = [core::ptr::null_mut(); 5];
// Walk page table in reverse
for lvl in (0..5).rev() {
let entry = unsafe {
(*current_pt)
.table
.get_unchecked_mut(addr_extract_index(addr, lvl))
};
let ptr = entry.ptr();
match entry.permission() {
// Nothing is there, throw an error, not critical!
Permission::Empty => return Err(NothingToUnmap),
// Node Save to visited pagetables and continue walking
Permission::Node => {
page_tables[lvl as usize] = entry;
current_pt = ptr as _
}
// Page entry zero it out!
// Zero page entry is completely valid entry with
// empty permission - no UB here!
_ => unsafe {
core::ptr::write_bytes(entry, 0, 1);
break;
},
}
}
// Now walk in order visited page tables
for entry in page_tables.into_iter() {
// Level not visited, skip.
if entry.is_null() {
continue;
}
unsafe {
let children = &mut (*(*entry).ptr()).pt.childen;
*children -= 1; // Decrease children count
// If there are no children, deallocate.
if *children == 0 {
let _ = Box::from_raw((*entry).ptr() as *mut PageTable);
// Zero visited entry
core::ptr::write_bytes(entry, 0, 1);
} else {
break;
}
}
}
Ok(())
}
}
/// Error mapping
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MapError {
/// Entry was already mapped
AlreadyMapped,
/// When walking a page entry was
/// encounterd.
PageOnNode,
}
impl_display!(for MapError => match {
Self::AlreadyMapped => "There is already a page mapped on specified address";
Self::PageOnNode => "There was a page mapped on the way instead of node";
});
/// There was no entry in page table to unmap
///
/// No worry, don't panic, nothing bad has happened,
/// but if you are 120% sure there should be something,
/// double-check your addresses.
#[derive(Clone, Copy, Debug)]
pub struct NothingToUnmap;
impl_display!(for NothingToUnmap => "There is no entry to unmap");

View file

@ -1,296 +0,0 @@
//! Platform independent, software paged memory implementation
pub mod icache;
pub mod lookup;
pub mod paging;
#[cfg(feature = "alloc")]
pub mod mapping;
use {
super::{addr::Address, LoadError, Memory, MemoryAccessReason, StoreError},
core::mem::size_of,
icache::ICache,
lookup::{AddrPageLookupError, AddrPageLookupOk, AddrPageLookuper},
paging::{PageTable, Permission},
};
/// HoleyBytes software paged memory
///
/// - `OUT_PROG_EXEC`: set to `false` to disable executing program
/// not contained in initially provided program, even the pages
/// are executable
#[derive(Clone, Debug)]
pub struct SoftPagedMem<'p, PfH, const OUT_PROG_EXEC: bool = true> {
/// Root page table
pub root_pt: *mut PageTable,
/// Page fault handler
pub pf_handler: PfH,
/// Program memory segment
pub program: &'p [u8],
/// Program instruction cache
pub icache: ICache,
}
impl<'p, PfH: HandlePageFault, const OUT_PROG_EXEC: bool> Memory
for SoftPagedMem<'p, PfH, OUT_PROG_EXEC>
{
/// Load value from an address
///
/// # Safety
/// Applies same conditions as for [`core::ptr::copy_nonoverlapping`]
unsafe fn load(
&mut self,
addr: Address,
target: *mut u8,
count: usize,
) -> Result<(), LoadError> {
self.memory_access(
MemoryAccessReason::Load,
addr,
target,
count,
perm_check::readable,
|src, dst, count| core::ptr::copy_nonoverlapping(src, dst, count),
)
.map_err(LoadError)
}
/// Store value to an address
///
/// # Safety
/// Applies same conditions as for [`core::ptr::copy_nonoverlapping`]
unsafe fn store(
&mut self,
addr: Address,
source: *const u8,
count: usize,
) -> Result<(), StoreError> {
self.memory_access(
MemoryAccessReason::Store,
addr,
source.cast_mut(),
count,
perm_check::writable,
|dst, src, count| core::ptr::copy_nonoverlapping(src, dst, count),
)
.map_err(StoreError)
}
#[inline(always)]
unsafe fn prog_read<T>(&mut self, addr: Address) -> Option<T> {
if OUT_PROG_EXEC && addr.truncate_usize() > self.program.len() {
return self.icache.fetch::<T>(addr, self.root_pt);
}
let addr = addr.truncate_usize();
self.program
.get(addr..addr + size_of::<T>())
.map(|x| x.as_ptr().cast::<T>().read())
}
#[inline(always)]
unsafe fn prog_read_unchecked<T>(&mut self, addr: Address) -> T {
if OUT_PROG_EXEC && addr.truncate_usize() > self.program.len() {
return self
.icache
.fetch::<T>(addr, self.root_pt)
.unwrap_or_else(|| core::mem::zeroed());
}
self.program
.as_ptr()
.add(addr.truncate_usize())
.cast::<T>()
.read()
}
}
impl<'p, PfH: HandlePageFault, const OUT_PROG_EXEC: bool> SoftPagedMem<'p, PfH, OUT_PROG_EXEC> {
// Everyone behold, the holy function, the god of HBVM memory accesses!
/// Split address to pages, check their permissions and feed pointers with offset
/// to a specified function.
///
/// If page is not found, execute page fault trap handler.
#[allow(clippy::too_many_arguments)] // Silence peasant
fn memory_access(
&mut self,
reason: MemoryAccessReason,
src: Address,
mut dst: *mut u8,
len: usize,
permission_check: fn(Permission) -> bool,
action: fn(*mut u8, *mut u8, usize),
) -> Result<(), Address> {
// Memory load from program section
let (src, len) = if src.truncate_usize() < self.program.len() as _ {
// Allow only loads
if reason != MemoryAccessReason::Load {
return Err(src);
}
// Determine how much data to copy from here
let to_copy = len.clamp(0, self.program.len().saturating_sub(src.truncate_usize()));
// Perform action
action(
unsafe { self.program.as_ptr().add(src.truncate_usize()).cast_mut() },
dst,
to_copy,
);
// Return shifted from what we've already copied
(
src.saturating_add(to_copy as u64),
len.saturating_sub(to_copy),
)
} else {
(src, len) // Nothing weird!
};
// Nothing to copy? Don't bother doing anything, bail.
if len == 0 {
return Ok(());
}
// Create new splitter
let mut pspl = AddrPageLookuper::new(src, len, self.root_pt);
loop {
match pspl.next() {
// Page is found
Some(Ok(AddrPageLookupOk {
vaddr,
ptr,
size,
perm,
})) => {
if !permission_check(perm) {
return Err(vaddr);
}
// Perform specified memory action and bump destination pointer
action(ptr, dst, size);
dst = unsafe { dst.add(size) };
}
// No page found
Some(Err(AddrPageLookupError { addr, size })) => {
// Attempt to execute page fault handler
if self.pf_handler.page_fault(
reason,
unsafe { &mut *self.root_pt },
addr,
size,
dst,
) {
// Shift the splitter address
pspl.bump(size);
// Bump dst pointer
dst = unsafe { dst.add(size as _) };
} else {
return Err(addr); // Unhandleable, VM will yield.
}
}
// No remaining pages, we are done!
None => return Ok(()),
}
}
}
}
/// Extract index in page table on specified level
///
/// The level shall not be larger than 4, otherwise
/// the output of the function is unspecified (yes, it can also panic :)
pub fn addr_extract_index(addr: Address, lvl: u8) -> usize {
debug_assert!(lvl <= 4);
let addr = addr.get();
usize::try_from((addr >> (lvl * 8 + 12)) & ((1 << 8) - 1)).expect("?conradluget a better CPU")
}
/// Page size
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PageSize {
/// 4 KiB page (on level 0)
Size4K = 4096,
/// 2 MiB page (on level 1)
Size2M = 1024 * 1024 * 2,
/// 1 GiB page (on level 2)
Size1G = 1024 * 1024 * 1024,
}
impl PageSize {
/// Convert page table level to size of page
const fn from_lvl(lvl: u8) -> Option<Self> {
match lvl {
0 => Some(PageSize::Size4K),
1 => Some(PageSize::Size2M),
2 => Some(PageSize::Size1G),
_ => None,
}
}
}
impl core::ops::Add<PageSize> for Address {
type Output = Self;
#[inline(always)]
fn add(self, rhs: PageSize) -> Self::Output {
self + (rhs as u64)
}
}
impl core::ops::AddAssign<PageSize> for Address {
#[inline(always)]
fn add_assign(&mut self, rhs: PageSize) {
*self = Self::new(self.get().wrapping_add(rhs as u64));
}
}
/// Permisison checks
pub mod perm_check {
use super::paging::Permission;
/// Page is readable
#[inline(always)]
pub const fn readable(perm: Permission) -> bool {
matches!(
perm,
Permission::Readonly | Permission::Write | Permission::Exec
)
}
/// Page is writable
#[inline(always)]
pub const fn writable(perm: Permission) -> bool {
matches!(perm, Permission::Write)
}
/// Page is executable
#[inline(always)]
pub const fn executable(perm: Permission) -> bool {
matches!(perm, Permission::Exec)
}
}
/// Handle VM traps
pub trait HandlePageFault {
/// Handle page fault
///
/// Return true if handling was sucessful,
/// otherwise the program will be interrupted and will
/// yield an error.
fn page_fault(
&mut self,
reason: MemoryAccessReason,
pagetable: &mut PageTable,
vaddr: Address,
size: PageSize,
dataptr: *mut u8,
) -> bool
where
Self: Sized;
}

View file

@ -1,86 +0,0 @@
//! Page table and associated structures implementation
use core::{fmt::Debug, mem::MaybeUninit};
/// Page entry permission
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(u8)]
pub enum Permission {
/// No page present
#[default]
Empty,
/// Points to another pagetable
Node,
/// Page is read only
Readonly,
/// Page is readable and writable
Write,
/// Page is readable and executable
Exec,
}
/// Page table entry
#[derive(Clone, Copy, Default, PartialEq, Eq)]
pub struct PtEntry(u64);
impl PtEntry {
/// Create new
///
/// # Safety
/// - `ptr` has to point to valid data and shall not be deallocated
/// troughout the entry lifetime
#[inline]
pub unsafe fn new(ptr: *mut PtPointedData, permission: Permission) -> Self {
Self(ptr as u64 | permission as u64)
}
/// Get permission
#[inline]
pub fn permission(&self) -> Permission {
unsafe { core::mem::transmute(self.0 as u8 & 0b111) }
}
/// Get pointer to the data (leaf) or next page table (node)
#[inline]
pub fn ptr(&self) -> *mut PtPointedData {
(self.0 & !((1 << 12) - 1)) as _
}
}
impl Debug for PtEntry {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("PtEntry")
.field("ptr", &self.ptr())
.field("permission", &self.permission())
.finish()
}
}
/// Page table
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(align(4096))]
pub struct PageTable {
/// How much entries are in use
pub childen: u8,
/// Entries
pub table: [PtEntry; 256],
}
impl Default for PageTable {
fn default() -> Self {
// SAFETY: It's fine, zeroed page table entry is valid (= empty)
Self {
childen: 0,
table: unsafe { MaybeUninit::zeroed().assume_init() },
}
}
}
/// Data page table entry can possibly point to
#[derive(Clone, Copy)]
#[repr(C, align(4096))]
pub union PtPointedData {
/// Node - next page table
pub pt: PageTable,
/// Leaf
pub page: u8,
}

View file

@ -1,53 +0,0 @@
macro_rules! impl_display {
(for $ty:ty => $(|$selfty:pat_param|)? $fmt:literal $(, $($param:expr),+)? $(,)?) => {
impl ::core::fmt::Display for $ty {
fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
$(let $selfty = self;)?
write!(f, $fmt, $($param),*)
}
}
};
(for $ty:ty => $str:literal) => {
impl ::core::fmt::Display for $ty {
fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
f.write_str($str)
}
}
};
(for $ty:ty => match {$(
$bind:pat => $($const:ident)? $fmt:literal $(,$($params:tt)*)?;
)*}) => {
impl ::core::fmt::Display for $ty {
fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
match self {
$(
$bind => $crate::utils::internal::impl_display_match_fragment!($($const,)? f, $fmt $(, $($params)*)?)
),*
}
}
}
}
}
#[doc(hidden)]
pub(crate) mod internal {
macro_rules! impl_display_match_fragment {
(const, $f:expr, $lit:literal) => {
$f.write_str($lit)
};
($f:expr, $fmt:literal $(, $($params:tt)*)?) => {
write!($f, $fmt, $($($params)*)?)
};
}
pub(crate) use impl_display_match_fragment;
}
macro_rules! static_assert_eq(($l:expr, $r:expr $(,)?) => {
const _: [(); ($l != $r) as usize] = [];
});
pub(crate) use {impl_display, static_assert_eq};

64
hbvm/src/validate.rs Normal file
View file

@ -0,0 +1,64 @@
//! Validate if program is sound to execute
/// Program validation error kind
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
/// Unknown opcode
InvalidInstruction,
/// VM doesn't implement this valid opcode
Unimplemented,
/// Attempted to copy over register boundary
RegisterArrayOverflow,
}
/// Error
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
/// Kind
pub kind: ErrorKind,
/// Location in bytecode
pub index: usize,
}
/// Perform bytecode validation. If it passes, the program should be
/// sound to execute.
pub fn validate(mut program: &[u8]) -> Result<(), Error> {
use hbbytecode::opcode::*;
let start = program;
loop {
// Match on instruction types and perform necessary checks
program = match program {
[] => return Ok(()),
[LD..=ST, reg, _, _, _, _, _, _, _, _, _, count, ..]
if usize::from(*reg) * 8 + usize::from(*count) > 2048 =>
{
return Err(Error {
kind: ErrorKind::RegisterArrayOverflow,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
})
}
[BRC, src, dst, count, ..]
if src.checked_add(*count).is_none() || dst.checked_add(*count).is_none() =>
{
return Err(Error {
kind: ErrorKind::RegisterArrayOverflow,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
})
}
[NOP | ECALL, rest @ ..]
| [DIR | DIRF, _, _, _, _, rest @ ..]
| [ADD..=CMPU | BRC | ADDF..=MULF, _, _, _, rest @ ..]
| [NEG..=NOT | CP..=SWA | NEGF..=FTI, _, _, rest @ ..]
| [LI | JMP, _, _, _, _, _, _, _, _, _, rest @ ..]
| [ADDI..=CMPUI | BMC | JEQ..=JGTU | ADDFI..=MULFI, _, _, _, _, _, _, _, _, _, _, rest @ ..]
| [LD..=ST, _, _, _, _, _, _, _, _, _, _, _, _, rest @ ..] => rest,
_ => {
return Err(Error {
kind: ErrorKind::InvalidInstruction,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
})
}
}
}
}

View file

@ -1,80 +0,0 @@
//! HoleyBytes register value definition
/// Define [`Value`] union
///
/// # Safety
/// Union variants have to be sound to byte-reinterpretate
/// between each other. Otherwise the behaviour is undefined.
macro_rules! value_def {
($($ty:ident),* $(,)?) => {
/// HBVM register value
#[derive(Copy, Clone)]
#[repr(packed)]
pub union Value {
$(
#[doc = concat!(stringify!($ty), " type")]
pub $ty: $ty
),*
}
$(
impl From<$ty> for Value {
#[inline]
fn from(value: $ty) -> Self {
Self { $ty: value }
}
}
crate::utils::static_assert_eq!(
core::mem::size_of::<$ty>(),
core::mem::size_of::<Value>(),
);
impl private::Sealed for $ty {}
unsafe impl ValueVariant for $ty {}
)*
};
}
impl Value {
/// Byte reinterpret value to target variant
#[inline]
pub fn cast<V: ValueVariant>(self) -> V {
/// Evil.
///
/// Transmute cannot be performed with generic type
/// as size is unknown, so union is used.
///
/// # Safety
/// If [`ValueVariant`] implemented correctly, it's fine :)
///
/// :ferrisClueless:
union Transmute<Variant: ValueVariant> {
/// Self
src: Value,
/// Target variant
variant: Variant,
}
unsafe { Transmute { src: self }.variant }
}
}
/// # Safety
/// - N/A, not to be implemented manually
pub unsafe trait ValueVariant: private::Sealed + Copy + Into<Value> {}
mod private {
pub trait Sealed {}
}
value_def!(u64, i64, f64);
crate::utils::static_assert_eq!(core::mem::size_of::<Value>(), 8);
impl core::fmt::Debug for Value {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
// Print formatted as hexadecimal, unsigned integer
write!(f, "{:x}", self.cast::<u64>())
}
}

434
hbvm/src/vm/mem/mod.rs Normal file
View file

@ -0,0 +1,434 @@
//! Program memory implementation
pub mod paging;
mod pfhandler;
pub use pfhandler::HandlePageFault;
use {
self::paging::{PageTable, Permission, PtEntry},
super::VmRunError,
alloc::boxed::Box,
core::mem::MaybeUninit,
derive_more::Display,
};
/// HoleyBytes virtual memory
#[derive(Clone, Debug)]
pub struct Memory {
/// Root page table
root_pt: *mut PageTable,
}
impl Default for Memory {
fn default() -> Self {
Self {
root_pt: Box::into_raw(Box::default()),
}
}
}
impl Drop for Memory {
fn drop(&mut self) {
let _ = unsafe { Box::from_raw(self.root_pt) };
}
}
impl Memory {
// HACK: Just for allocation testing, will be removed when proper memory interfaces
// implemented.
pub fn insert_test_page(&mut self) {
unsafe {
let mut entry = PtEntry::new(
{
let layout = alloc::alloc::Layout::from_size_align_unchecked(4096, 4096);
let ptr = alloc::alloc::alloc_zeroed(layout);
if ptr.is_null() {
alloc::alloc::handle_alloc_error(layout);
}
core::ptr::write_bytes(ptr, 69, 10);
ptr.cast()
},
Permission::Write,
);
for _ in 0..4 {
let mut pt = Box::<PageTable>::default();
pt[0] = entry;
entry = PtEntry::new(Box::into_raw(pt) as _, Permission::Node);
}
(*self.root_pt)[0] = entry;
}
}
/// Load value from an address
///
/// # Safety
/// Applies same conditions as for [`core::ptr::copy_nonoverlapping`]
pub unsafe fn load(
&mut self,
addr: u64,
target: *mut u8,
count: usize,
traph: &mut impl HandlePageFault,
) -> Result<(), LoadError> {
self.memory_access(
MemoryAccessReason::Load,
addr,
target,
count,
|perm| {
matches!(
perm,
Permission::Readonly | Permission::Write | Permission::Exec
)
},
|src, dst, count| core::ptr::copy_nonoverlapping(src, dst, count),
traph,
)
.map_err(LoadError)
}
/// Store value to an address
///
/// # Safety
/// Applies same conditions as for [`core::ptr::copy_nonoverlapping`]
pub unsafe fn store(
&mut self,
addr: u64,
source: *const u8,
count: usize,
traph: &mut impl HandlePageFault,
) -> Result<(), StoreError> {
self.memory_access(
MemoryAccessReason::Store,
addr,
source.cast_mut(),
count,
|perm| perm == Permission::Write,
|dst, src, count| core::ptr::copy_nonoverlapping(src, dst, count),
traph,
)
.map_err(StoreError)
}
/// Copy a block of memory
///
/// # Safety
/// - Same as for [`Self::load`] and [`Self::store`]
/// - Your faith in the gods of UB
/// - Addr-san claims it's fine but who knows is she isn't lying :ferrisSus:
pub unsafe fn block_copy(
&mut self,
src: u64,
dst: u64,
count: usize,
traph: &mut impl HandlePageFault,
) -> Result<(), BlkCopyError> {
// Yea, i know it is possible to do this more efficiently, but I am too lazy.
const STACK_BUFFER_SIZE: usize = 512;
// Decide if to use stack-allocated buffer or to heap allocate
// Deallocation is again decided on size at the end of the function
let mut buf = MaybeUninit::<[u8; STACK_BUFFER_SIZE]>::uninit();
let buf = if count <= STACK_BUFFER_SIZE {
buf.as_mut_ptr().cast()
} else {
unsafe {
let layout = core::alloc::Layout::from_size_align_unchecked(count, 1);
let ptr = alloc::alloc::alloc(layout);
if ptr.is_null() {
alloc::alloc::handle_alloc_error(layout);
}
ptr
}
};
// Perform memory block transfer
let status = (|| {
// Load to buffer
self.memory_access(
MemoryAccessReason::Load,
src,
buf,
count,
|perm| {
matches!(
perm,
Permission::Readonly | Permission::Write | Permission::Exec
)
},
|src, dst, count| core::ptr::copy(src, dst, count),
traph,
)
.map_err(|addr| BlkCopyError {
access_reason: MemoryAccessReason::Load,
addr,
})?;
// Store from buffer
self.memory_access(
MemoryAccessReason::Store,
dst,
buf,
count,
|perm| perm == Permission::Write,
|dst, src, count| core::ptr::copy(src, dst, count),
traph,
)
.map_err(|addr| BlkCopyError {
access_reason: MemoryAccessReason::Store,
addr,
})?;
Ok::<_, BlkCopyError>(())
})();
// Deallocate if used heap-allocated array
if count > STACK_BUFFER_SIZE {
alloc::alloc::dealloc(
buf,
core::alloc::Layout::from_size_align_unchecked(count, 1),
);
}
status
}
/// Split address to pages, check their permissions and feed pointers with offset
/// to a specified function.
///
/// If page is not found, execute page fault trap handler.
#[allow(clippy::too_many_arguments)] // Silence peasant
fn memory_access(
&mut self,
reason: MemoryAccessReason,
src: u64,
mut dst: *mut u8,
len: usize,
permission_check: fn(Permission) -> bool,
action: fn(*mut u8, *mut u8, usize),
traph: &mut impl HandlePageFault,
) -> Result<(), u64> {
let mut pspl = AddrSplitter::new(src, len, self.root_pt);
loop {
match pspl.next() {
// Page found
Some(Ok(AddrSplitOk {
vaddr,
ptr,
size,
perm,
})) => {
if !permission_check(perm) {
return Err(vaddr);
}
// Perform memory action and bump dst pointer
action(ptr, dst, size);
dst = unsafe { dst.add(size) };
}
Some(Err(AddrSplitError { addr, size })) => {
// Execute page fault handler
if traph.page_fault(reason, self, addr, size, dst) {
// Shift the splitter address
pspl.bump(size);
// Bump dst pointer
dst = unsafe { dst.add(size as _) };
} else {
return Err(addr); // Unhandleable
}
}
None => return Ok(()),
}
}
}
}
/// Result from address split
struct AddrSplitOk {
/// Virtual address
vaddr: u64,
/// Pointer to the start for perform operation
ptr: *mut u8,
/// Size to the end of page / end of desired size
size: usize,
/// Page permission
perm: Permission,
}
struct AddrSplitError {
/// Address of failure
addr: u64,
/// Requested page size
size: PageSize,
}
/// Address splitter into pages
struct AddrSplitter {
/// Current address
addr: u64,
/// Size left
size: usize,
/// Page table
pagetable: *const PageTable,
}
impl AddrSplitter {
/// Create a new page splitter
pub const fn new(addr: u64, size: usize, pagetable: *const PageTable) -> Self {
Self {
addr,
size,
pagetable,
}
}
/// Bump address by size X
fn bump(&mut self, page_size: PageSize) {
self.addr += page_size as u64;
self.size = self.size.saturating_sub(page_size as _);
}
}
impl Iterator for AddrSplitter {
type Item = Result<AddrSplitOk, AddrSplitError>;
fn next(&mut self) -> Option<Self::Item> {
// The end, everything is fine
if self.size == 0 {
return None;
}
let (base, perm, size, offset) = 'a: {
let mut current_pt = self.pagetable;
// Walk the page table
for lvl in (0..5).rev() {
// Get an entry
unsafe {
let entry = (*current_pt).get_unchecked(
usize::try_from((self.addr >> (lvl * 9 + 12)) & ((1 << 9) - 1))
.expect("?conradluget a better CPU"),
);
let ptr = entry.ptr();
match entry.permission() {
// No page → page fault
Permission::Empty => {
return Some(Err(AddrSplitError {
addr: self.addr,
size: PageSize::from_lvl(lvl)?,
}))
}
// Node → proceed waking
Permission::Node => current_pt = ptr as _,
// Leaft → return relevant data
perm => {
break 'a (
// Pointer in host memory
ptr as *mut u8,
perm,
PageSize::from_lvl(lvl)?,
// In-page offset
self.addr as usize & ((1 << (lvl * 9 + 12)) - 1),
);
}
}
}
}
return None; // Reached the end (should not happen)
};
// Get available byte count in the selected page with offset
let avail = (size as usize - offset).clamp(0, self.size);
self.bump(size);
Some(Ok(AddrSplitOk {
vaddr: self.addr,
ptr: unsafe { base.add(offset) }, // Return pointer to the start of region
size: avail,
perm,
}))
}
}
/// Page size
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PageSize {
/// 4 KiB page (on level 0)
Size4K = 4096,
/// 2 MiB page (on level 1)
Size2M = 1024 * 1024 * 2,
/// 1 GiB page (on level 2)
Size1G = 1024 * 1024 * 1024,
}
impl PageSize {
/// Convert page table level to size of page
fn from_lvl(lvl: u8) -> Option<Self> {
match lvl {
0 => Some(PageSize::Size4K),
1 => Some(PageSize::Size2M),
2 => Some(PageSize::Size1G),
_ => None,
}
}
}
/// Unhandled load access trap
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
pub struct LoadError(u64);
/// Unhandled store access trap
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
pub struct StoreError(u64);
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
pub enum MemoryAccessReason {
Load,
Store,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BlkCopyError {
access_reason: MemoryAccessReason,
addr: u64,
}
impl From<BlkCopyError> for VmRunError {
fn from(value: BlkCopyError) -> Self {
match value.access_reason {
MemoryAccessReason::Load => Self::LoadAccessEx(value.addr),
MemoryAccessReason::Store => Self::StoreAccessEx(value.addr),
}
}
}
impl From<LoadError> for VmRunError {
fn from(value: LoadError) -> Self {
Self::LoadAccessEx(value.0)
}
}
impl From<StoreError> for VmRunError {
fn from(value: StoreError) -> Self {
Self::StoreAccessEx(value.0)
}
}

152
hbvm/src/vm/mem/paging.rs Normal file
View file

@ -0,0 +1,152 @@
//! Page table and associated structures implementation
use {
core::{
fmt::Debug,
mem::MaybeUninit,
ops::{Index, IndexMut},
slice::SliceIndex,
},
delegate::delegate,
};
/// Page entry permission
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(u8)]
pub enum Permission {
/// No page present
#[default]
Empty,
/// Points to another pagetable
Node,
/// Page is read only
Readonly,
/// Page is readable and writable
Write,
/// Page is readable and executable
Exec,
}
/// Page table entry
#[derive(Clone, Copy, Default, PartialEq, Eq)]
pub struct PtEntry(u64);
impl PtEntry {
/// Create new
///
/// # Safety
/// - `ptr` has to point to valid data and shall not be deallocated
/// troughout the entry lifetime
#[inline]
pub unsafe fn new(ptr: *mut PtPointedData, permission: Permission) -> Self {
Self(ptr as u64 | permission as u64)
}
/// Get permission
#[inline]
pub fn permission(&self) -> Permission {
unsafe { core::mem::transmute(self.0 as u8 & 0b111) }
}
/// Get pointer to the data (leaf) or next page table (node)
#[inline]
pub fn ptr(&self) -> *mut PtPointedData {
(self.0 & !((1 << 12) - 1)) as _
}
}
impl Debug for PtEntry {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("PtEntry")
.field("ptr", &self.ptr())
.field("permission", &self.permission())
.finish()
}
}
/// Page table
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(align(4096))]
pub struct PageTable([PtEntry; 512]);
impl PageTable {
delegate!(to self.0 {
/// Returns a reference to an element or subslice depending on the type of
/// index.
///
/// - If given a position, returns a reference to the element at that
/// position or `None` if out of bounds.
/// - If given a range, returns the subslice corresponding to that range,
/// or `None` if out of bounds.
///
pub fn get<I>(&self, ix: I) -> Option<&I::Output>
where I: SliceIndex<[PtEntry]>;
/// Returns a mutable reference to an element or subslice depending on the
/// type of index (see [`get`]) or `None` if the index is out of bounds.
pub fn get_mut<I>(&mut self, ix: I) -> Option<&mut I::Output>
where I: SliceIndex<[PtEntry]>;
/// Returns a reference to an element or subslice, without doing bounds
/// checking.
///
/// For a safe alternative see [`get`].
///
/// # Safety
///
/// Calling this method with an out-of-bounds index is *[undefined behavior]*
/// even if the resulting reference is not used.
pub unsafe fn get_unchecked<I>(&self, index: I) -> &I::Output
where I: SliceIndex<[PtEntry]>;
/// Returns a mutable reference to an element or subslice, without doing
/// bounds checking.
///
/// For a safe alternative see [`get_mut`].
///
/// # Safety
///
/// Calling this method with an out-of-bounds index is *[undefined behavior]*
/// even if the resulting reference is not used.
pub unsafe fn get_unchecked_mut<I>(&mut self, index: I) -> &mut I::Output
where I: SliceIndex<[PtEntry]>;
});
}
impl<Idx> Index<Idx> for PageTable
where
Idx: SliceIndex<[PtEntry]>,
{
type Output = Idx::Output;
#[inline(always)]
fn index(&self, index: Idx) -> &Self::Output {
&self.0[index]
}
}
impl<Idx> IndexMut<Idx> for PageTable
where
Idx: SliceIndex<[PtEntry]>,
{
#[inline(always)]
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
&mut self.0[index]
}
}
impl Default for PageTable {
fn default() -> Self {
// SAFETY: It's fine, zeroed page table entry is valid (= empty)
Self(unsafe { MaybeUninit::zeroed().assume_init() })
}
}
/// Data page table entry can possibly point to
#[derive(Clone, Copy)]
#[repr(C, align(4096))]
pub union PtPointedData {
/// Node - next page table
pub pt: PageTable,
/// Leaf
pub page: u8,
}

View file

@ -0,0 +1,16 @@
//! Program trap handling interfaces
use super::{Memory, MemoryAccessReason, PageSize};
/// Handle VM traps
pub trait HandlePageFault {
/// Handle page fault
fn page_fault(
&mut self,
reason: MemoryAccessReason,
memory: &mut Memory,
vaddr: u64,
size: PageSize,
dataptr: *mut u8,
) -> bool;
}

367
hbvm/src/vm/mod.rs Normal file
View file

@ -0,0 +1,367 @@
//! HoleyBytes Virtual Machine
//!
//! All unsafe code here should be sound, if input bytecode passes validation.
// # General safety notice:
// - Validation has to assure there is 256 registers (r0 - r255)
// - Instructions have to be valid as specified (values and sizes)
// - Mapped pages should be at least 4 KiB
// - Yes, I am aware of the UB when jumping in-mid of instruction where
// the read byte corresponds to an instruction whose lenght exceets the
// program size. If you are (rightfully) worried about the UB, for now just
// append your program with 11 zeroes.
use self::mem::HandlePageFault;
pub mod mem;
pub mod value;
use {
crate::validate,
core::ops,
hbbytecode::{OpParam, ParamBB, ParamBBB, ParamBBBB, ParamBBD, ParamBBDH, ParamBD},
mem::Memory,
static_assertions::assert_impl_one,
value::Value,
};
/// Extract a parameter from program
macro_rules! param {
($self:expr, $ty:ty) => {{
assert_impl_one!($ty: OpParam);
let data = $self
.program
.as_ptr()
.add($self.pc + 1)
.cast::<$ty>()
.read();
$self.pc += 1 + core::mem::size_of::<$ty>();
data
}};
}
/// Perform binary operation `#0 ← #1 OP #2`
macro_rules! binary_op {
($self:expr, $ty:ident, $handler:expr) => {{
let ParamBBB(tg, a0, a1) = param!($self, ParamBBB);
$self.write_reg(
tg,
$handler(
Value::$ty(&$self.read_reg(a0)),
Value::$ty(&$self.read_reg(a1)),
),
);
}};
}
/// Perform binary operation with immediate `#0 ← #1 OP imm #2`
macro_rules! binary_op_imm {
($self:expr, $ty:ident, $handler:expr) => {{
let ParamBBD(tg, a0, imm) = param!($self, ParamBBD);
$self.write_reg(
tg,
$handler(Value::$ty(&$self.read_reg(a0)), Value::$ty(&imm.into())),
);
}};
}
/// Jump at `#3` if ordering on `#0 <=> #1` is equal to expected
macro_rules! cond_jump {
($self:expr, $ty:ident, $expected:ident) => {{
let ParamBBD(a0, a1, jt) = param!($self, ParamBBD);
if core::cmp::Ord::cmp(&$self.read_reg(a0).as_u64(), &$self.read_reg(a1).as_u64())
== core::cmp::Ordering::$expected
{
$self.pc = jt as usize;
}
}};
}
/// HoleyBytes Virtual Machine
pub struct Vm<'a, PfHandler, const TIMER_QUOTIENT: usize> {
/// Holds 256 registers
///
/// Writing to register 0 is considered undefined behaviour
/// in terms of HoleyBytes program execution
pub registers: [Value; 256],
/// Memory implementation
pub memory: Memory,
/// Trap handler
pub pfhandler: PfHandler,
// Program counter
pc: usize,
/// Program
program: &'a [u8],
/// Program timer
timer: usize,
}
impl<'a, PfHandler: HandlePageFault, const TIMER_QUOTIENT: usize>
Vm<'a, PfHandler, TIMER_QUOTIENT>
{
/// Create a new VM with program and trap handler
///
/// # Safety
/// Program code has to be validated
pub unsafe fn new_unchecked(program: &'a [u8], traph: PfHandler) -> Self {
Self {
registers: [Value::from(0_u64); 256],
memory: Default::default(),
pfhandler: traph,
pc: 0,
program,
timer: 0,
}
}
/// Create a new VM with program and trap handler only if it passes validation
pub fn new_validated(program: &'a [u8], traph: PfHandler) -> Result<Self, validate::Error> {
validate::validate(program)?;
Ok(unsafe { Self::new_unchecked(program, traph) })
}
/// Execute program
///
/// Program can return [`VmRunError`] if a trap handling failed
pub fn run(&mut self) -> Result<VmRunOk, VmRunError> {
use hbbytecode::opcode::*;
loop {
// Fetch instruction
let Some(&opcode) = self.program.get(self.pc)
else { return Ok(VmRunOk::End) };
// Big match
unsafe {
match opcode {
NOP => param!(self, ()),
ADD => binary_op!(self, as_u64, u64::wrapping_add),
SUB => binary_op!(self, as_u64, u64::wrapping_sub),
MUL => binary_op!(self, as_u64, u64::wrapping_mul),
AND => binary_op!(self, as_u64, ops::BitAnd::bitand),
OR => binary_op!(self, as_u64, ops::BitOr::bitor),
XOR => binary_op!(self, as_u64, ops::BitXor::bitxor),
SL => binary_op!(self, as_u64, ops::Shl::shl),
SR => binary_op!(self, as_u64, ops::Shr::shr),
SRS => binary_op!(self, as_i64, ops::Shr::shr),
CMP => {
let ParamBBB(tg, a0, a1) = param!(self, ParamBBB);
self.write_reg(
tg,
self.read_reg(a0).as_i64().cmp(&self.read_reg(a1).as_i64()) as i64,
);
}
CMPU => {
let ParamBBB(tg, a0, a1) = param!(self, ParamBBB);
self.write_reg(
tg,
self.read_reg(a0).as_u64().cmp(&self.read_reg(a1).as_u64()) as i64,
);
}
NOT => {
let param = param!(self, ParamBB);
self.write_reg(param.0, !self.read_reg(param.1).as_u64());
}
NEG => {
let param = param!(self, ParamBB);
self.write_reg(
param.0,
match self.read_reg(param.1).as_u64() {
0 => 1_u64,
_ => 0,
},
);
}
DIR => {
let ParamBBBB(dt, rt, a0, a1) = param!(self, ParamBBBB);
let a0 = self.read_reg(a0).as_u64();
let a1 = self.read_reg(a1).as_u64();
self.write_reg(dt, a0.checked_div(a1).unwrap_or(u64::MAX));
self.write_reg(rt, a0.checked_rem(a1).unwrap_or(u64::MAX));
}
ADDI => binary_op_imm!(self, as_u64, ops::Add::add),
MULI => binary_op_imm!(self, as_u64, ops::Mul::mul),
ANDI => binary_op_imm!(self, as_u64, ops::BitAnd::bitand),
ORI => binary_op_imm!(self, as_u64, ops::BitOr::bitor),
XORI => binary_op_imm!(self, as_u64, ops::BitXor::bitxor),
SLI => binary_op_imm!(self, as_u64, ops::Shl::shl),
SRI => binary_op_imm!(self, as_u64, ops::Shr::shr),
SRSI => binary_op_imm!(self, as_i64, ops::Shr::shr),
CMPI => {
let ParamBBD(tg, a0, imm) = param!(self, ParamBBD);
self.write_reg(
tg,
self.read_reg(a0).as_i64().cmp(&Value::from(imm).as_i64()) as i64,
);
}
CMPUI => {
let ParamBBD(tg, a0, imm) = param!(self, ParamBBD);
self.write_reg(tg, self.read_reg(a0).as_u64().cmp(&imm) as i64);
}
CP => {
let param = param!(self, ParamBB);
self.write_reg(param.0, self.read_reg(param.1));
}
SWA => {
let ParamBB(src, dst) = param!(self, ParamBB);
if src + dst != 0 {
core::ptr::swap(
self.registers.get_unchecked_mut(usize::from(src)),
self.registers.get_unchecked_mut(usize::from(dst)),
);
}
}
LI => {
let param = param!(self, ParamBD);
self.write_reg(param.0, param.1);
}
LD => {
let ParamBBDH(dst, base, off, count) = param!(self, ParamBBDH);
let n: usize = match dst {
0 => 1,
_ => 0,
};
self.memory.load(
self.read_reg(base).as_u64() + off + n as u64,
self.registers.as_mut_ptr().add(usize::from(dst) + n).cast(),
usize::from(count).saturating_sub(n),
&mut self.pfhandler,
)?;
}
ST => {
let ParamBBDH(dst, base, off, count) = param!(self, ParamBBDH);
self.memory.store(
self.read_reg(base).as_u64() + off,
self.registers.as_ptr().add(usize::from(dst)).cast(),
count.into(),
&mut self.pfhandler,
)?;
}
BMC => {
let ParamBBD(src, dst, count) = param!(self, ParamBBD);
self.memory.block_copy(
self.read_reg(src).as_u64(),
self.read_reg(dst).as_u64(),
count as _,
&mut self.pfhandler,
)?;
}
BRC => {
let ParamBBB(src, dst, count) = param!(self, ParamBBB);
core::ptr::copy(
self.registers.get_unchecked(usize::from(src)),
self.registers.get_unchecked_mut(usize::from(dst)),
usize::from(count * 8),
);
}
JMP => {
let ParamBD(reg, offset) = param!(self, ParamBD);
self.pc = (self.read_reg(reg).as_u64() + offset) as usize;
}
JEQ => cond_jump!(self, int, Equal),
JNE => {
let ParamBBD(a0, a1, jt) = param!(self, ParamBBD);
if self.read_reg(a0).as_u64() != self.read_reg(a1).as_u64() {
self.pc = jt as usize;
}
}
JLT => cond_jump!(self, int, Less),
JGT => cond_jump!(self, int, Greater),
JLTU => cond_jump!(self, sint, Less),
JGTU => cond_jump!(self, sint, Greater),
ECALL => {
param!(self, ());
return Ok(VmRunOk::Ecall);
}
ADDF => binary_op!(self, as_f64, ops::Add::add),
SUBF => binary_op!(self, as_f64, ops::Sub::sub),
MULF => binary_op!(self, as_f64, ops::Mul::mul),
DIRF => {
let ParamBBBB(dt, rt, a0, a1) = param!(self, ParamBBBB);
let a0 = self.read_reg(a0).as_f64();
let a1 = self.read_reg(a1).as_f64();
self.write_reg(dt, a0 / a1);
self.write_reg(rt, a0 % a1);
}
FMAF => {
let ParamBBBB(dt, a0, a1, a2) = param!(self, ParamBBBB);
self.write_reg(
dt,
self.read_reg(a0).as_f64() * self.read_reg(a1).as_f64()
+ self.read_reg(a2).as_f64(),
);
}
NEGF => {
let ParamBB(dt, a0) = param!(self, ParamBB);
self.write_reg(dt, -self.read_reg(a0).as_f64());
}
ITF => {
let ParamBB(dt, a0) = param!(self, ParamBB);
self.write_reg(dt, self.read_reg(a0).as_i64() as f64);
}
FTI => {
let ParamBB(dt, a0) = param!(self, ParamBB);
self.write_reg(dt, self.read_reg(a0).as_f64() as i64);
}
ADDFI => binary_op_imm!(self, as_f64, ops::Add::add),
MULFI => binary_op_imm!(self, as_f64, ops::Mul::mul),
op => return Err(VmRunError::InvalidOpcode(op)),
}
}
if TIMER_QUOTIENT != 0 {
self.timer = self.timer.wrapping_add(1);
if self.timer % TIMER_QUOTIENT == 0 {
return Ok(VmRunOk::Timer);
}
}
}
}
/// Read register
#[inline]
unsafe fn read_reg(&self, n: u8) -> Value {
*self.registers.get_unchecked(n as usize)
}
/// Write a register.
/// Writing to register 0 is no-op.
#[inline]
unsafe fn write_reg(&mut self, n: u8, value: impl Into<Value>) {
if n != 0 {
*self.registers.get_unchecked_mut(n as usize) = value.into();
}
}
}
/// Virtual machine halt error
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum VmRunError {
/// Tried to execute invalid instruction
InvalidOpcode(u8),
/// Unhandled load access exception
LoadAccessEx(u64),
/// Unhandled store access exception
StoreAccessEx(u64),
}
/// Virtual machine halt ok
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum VmRunOk {
/// Program has eached its end
End,
/// Program was interrupted by a timer
Timer,
/// Environment call
Ecall,
}

48
hbvm/src/vm/value.rs Normal file
View file

@ -0,0 +1,48 @@
//! HoleyBytes register value definition
use core::fmt::Debug;
/// Define [`Value`] union
///
/// # Safety
/// Union variants have to be sound to byte-reinterpretate
/// between each other. Otherwise the behaviour is undefined.
macro_rules! value_def {
($($ty:ident),* $(,)?) => {
/// HBVM register value
#[derive(Copy, Clone)]
#[repr(packed)]
pub union Value {
$(pub $ty: $ty),*
}
paste::paste! {
impl Value {$(
#[doc = "Byte-reinterpret [`Value`] as [`" $ty "`]"]
#[inline]
pub fn [<as_ $ty>](&self) -> $ty {
unsafe { self.$ty }
}
)*}
}
$(
impl From<$ty> for Value {
#[inline]
fn from(value: $ty) -> Self {
Self { $ty: value }
}
}
)*
};
}
value_def!(u64, i64, f64);
static_assertions::const_assert_eq!(core::mem::size_of::<Value>(), 8);
impl Debug for Value {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
// Print formatted as hexadecimal, unsigned integer
write!(f, "{:x}", self.as_u64())
}
}

View file

@ -1,409 +0,0 @@
//! Welcome to the land of The Great Dispatch Loop
//!
//! Have fun
use crate::mem::Address;
use {
super::{
bmc::BlockCopier,
mem::Memory,
value::{Value, ValueVariant},
Vm, VmRunError, VmRunOk,
},
core::{cmp::Ordering, mem::size_of, ops},
hbbytecode::{
ParamBB, ParamBBB, ParamBBBB, ParamBBD, ParamBBDH, ParamBBW, ParamBD, ProgramVal,
},
};
impl<Mem, const TIMER_QUOTIENT: usize> Vm<Mem, TIMER_QUOTIENT>
where
Mem: Memory,
{
/// Execute program
///
/// Program can return [`VmRunError`] if a trap handling failed
#[cfg_attr(feature = "nightly", repr(align(4096)))]
pub fn run(&mut self) -> Result<VmRunOk, VmRunError> {
use hbbytecode::opcode::*;
loop {
// Big match
//
// Contribution guide:
// - Zero register shall never be overwitten. It's value has to always be 0.
// - Prefer `Self::read_reg` and `Self::write_reg` functions
// - Extract parameters using `param!` macro
// - Prioritise speed over code size
// - Memory is cheap, CPUs not that much
// - Do not heap allocate at any cost
// - Yes, user-provided trap handler may allocate,
// but that is not our »fault«.
// - Unsafe is kinda must, but be sure you have validated everything
// - Your contributions have to pass sanitizers and Miri
// - Strictly follow the spec
// - The spec does not specify how you perform actions, in what order,
// just that the observable effects have to be performed in order and
// correctly.
// - Yes, we assume you run 64 bit CPU. Else ?conradluget a better CPU
// sorry 8 bit fans, HBVM won't run on your Speccy :(
unsafe {
match self
.memory
.prog_read::<u8>(self.pc as _)
.ok_or(VmRunError::ProgramFetchLoadEx(self.pc as _))?
{
UN => {
self.decode::<()>();
return Err(VmRunError::Unreachable);
}
TX => {
self.decode::<()>();
return Ok(VmRunOk::End);
}
NOP => self.decode::<()>(),
ADD => self.binary_op(u64::wrapping_add),
SUB => self.binary_op(u64::wrapping_sub),
MUL => self.binary_op(u64::wrapping_mul),
AND => self.binary_op::<u64>(ops::BitAnd::bitand),
OR => self.binary_op::<u64>(ops::BitOr::bitor),
XOR => self.binary_op::<u64>(ops::BitXor::bitxor),
SL => self.binary_op(|l, r| u64::wrapping_shl(l, r as u32)),
SR => self.binary_op(|l, r| u64::wrapping_shr(l, r as u32)),
SRS => self.binary_op(|l, r| i64::wrapping_shl(l, r as u32)),
CMP => {
// Compare a0 <=> a1
// < → 0
// > → 1
// = → 2
let ParamBBB(tg, a0, a1) = self.decode();
self.write_reg(
tg,
self.read_reg(a0)
.cast::<i64>()
.cmp(&self.read_reg(a1).cast::<i64>())
as i64
+ 1,
);
}
CMPU => {
// Unsigned comparsion
let ParamBBB(tg, a0, a1) = self.decode();
self.write_reg(
tg,
self.read_reg(a0)
.cast::<u64>()
.cmp(&self.read_reg(a1).cast::<u64>())
as i64
+ 1,
);
}
NOT => {
// Logical negation
let ParamBB(tg, a0) = self.decode();
self.write_reg(tg, !self.read_reg(a0).cast::<u64>());
}
NEG => {
// Bitwise negation
let ParamBB(tg, a0) = self.decode();
self.write_reg(
tg,
match self.read_reg(a0).cast::<u64>() {
0 => 1_u64,
_ => 0,
},
);
}
DIR => {
// Fused Division-Remainder
let ParamBBBB(dt, rt, a0, a1) = self.decode();
let a0 = self.read_reg(a0).cast::<u64>();
let a1 = self.read_reg(a1).cast::<u64>();
self.write_reg(dt, a0.checked_div(a1).unwrap_or(u64::MAX));
self.write_reg(rt, a0.checked_rem(a1).unwrap_or(u64::MAX));
}
ADDI => self.binary_op_imm(u64::wrapping_add),
MULI => self.binary_op_imm(u64::wrapping_sub),
ANDI => self.binary_op_imm::<u64>(ops::BitAnd::bitand),
ORI => self.binary_op_imm::<u64>(ops::BitOr::bitor),
XORI => self.binary_op_imm::<u64>(ops::BitXor::bitxor),
SLI => self.binary_op_ims(u64::wrapping_shl),
SRI => self.binary_op_ims(u64::wrapping_shr),
SRSI => self.binary_op_ims(i64::wrapping_shr),
CMPI => {
let ParamBBD(tg, a0, imm) = self.decode();
self.write_reg(
tg,
self.read_reg(a0)
.cast::<i64>()
.cmp(&Value::from(imm).cast::<i64>())
as i64,
);
}
CMPUI => {
let ParamBBD(tg, a0, imm) = self.decode();
self.write_reg(tg, self.read_reg(a0).cast::<u64>().cmp(&imm) as i64);
}
CP => {
let ParamBB(tg, a0) = self.decode();
self.write_reg(tg, self.read_reg(a0));
}
SWA => {
// Swap registers
let ParamBB(r0, r1) = self.decode();
match (r0, r1) {
(0, 0) => (),
(dst, 0) | (0, dst) => self.write_reg(dst, 0_u64),
(r0, r1) => {
core::ptr::swap(
self.registers.get_unchecked_mut(usize::from(r0)),
self.registers.get_unchecked_mut(usize::from(r1)),
);
}
}
}
LI => {
let ParamBD(tg, imm) = self.decode();
self.write_reg(tg, imm);
}
LD => {
// Load. If loading more than register size, continue on adjecent registers
let ParamBBDH(dst, base, off, count) = self.decode();
let n: u8 = match dst {
0 => 1,
_ => 0,
};
self.memory.load(
self.ldst_addr_uber(dst, base, off, count, n)?,
self.registers
.as_mut_ptr()
.add(usize::from(dst) + usize::from(n))
.cast(),
usize::from(count).saturating_sub(n.into()),
)?;
}
ST => {
// Store. Same rules apply as to LD
let ParamBBDH(dst, base, off, count) = self.decode();
self.memory.store(
self.ldst_addr_uber(dst, base, off, count, 0)?,
self.registers.as_ptr().add(usize::from(dst)).cast(),
count.into(),
)?;
}
BMC => {
// Block memory copy
match if let Some(copier) = &mut self.copier {
// There is some copier, poll.
copier.poll(&mut self.memory)
} else {
// There is none, make one!
let ParamBBD(src, dst, count) = self.decode();
// So we are still on BMC on next cycle
self.pc -= size_of::<ParamBBD>() + 1;
self.copier = Some(BlockCopier::new(
Address::new(self.read_reg(src).cast()),
Address::new(self.read_reg(dst).cast()),
count as _,
));
self.copier
.as_mut()
.unwrap_unchecked() // SAFETY: We just assigned there
.poll(&mut self.memory)
} {
// We are done, shift program counter
core::task::Poll::Ready(Ok(())) => {
self.copier = None;
self.pc += size_of::<ParamBBD>() + 1;
}
// Error, shift program counter (for consistency)
// and yield error
core::task::Poll::Ready(Err(e)) => {
self.pc += size_of::<ParamBBD>() + 1;
return Err(e.into());
}
// Not done yet, proceed to next cycle
core::task::Poll::Pending => (),
}
}
BRC => {
// Block register copy
let ParamBBB(src, dst, count) = self.decode();
if src.checked_add(count).is_none() || dst.checked_add(count).is_none() {
return Err(VmRunError::RegOutOfBounds);
}
core::ptr::copy(
self.registers.get_unchecked(usize::from(src)),
self.registers.get_unchecked_mut(usize::from(dst)),
usize::from(count),
);
}
JAL => {
// Jump and link. Save PC after this instruction to
// specified register and jump to reg + offset.
let ParamBBD(save, reg, offset) = self.decode();
self.write_reg(save, self.pc.get());
self.pc =
Address::new(self.read_reg(reg).cast::<u64>().saturating_add(offset));
}
// Conditional jumps, jump only to immediates
JEQ => self.cond_jmp::<u64>(Ordering::Equal),
JNE => {
let ParamBBD(a0, a1, jt) = self.decode();
if self.read_reg(a0).cast::<u64>() != self.read_reg(a1).cast::<u64>() {
self.pc = Address::new(jt);
}
}
JLT => self.cond_jmp::<u64>(Ordering::Less),
JGT => self.cond_jmp::<u64>(Ordering::Greater),
JLTU => self.cond_jmp::<i64>(Ordering::Less),
JGTU => self.cond_jmp::<i64>(Ordering::Greater),
ECALL => {
self.decode::<()>();
// So we don't get timer interrupt after ECALL
if TIMER_QUOTIENT != 0 {
self.timer = self.timer.wrapping_add(1);
}
return Ok(VmRunOk::Ecall);
}
ADDF => self.binary_op::<f64>(ops::Add::add),
SUBF => self.binary_op::<f64>(ops::Sub::sub),
MULF => self.binary_op::<f64>(ops::Mul::mul),
DIRF => {
let ParamBBBB(dt, rt, a0, a1) = self.decode();
let a0 = self.read_reg(a0).cast::<f64>();
let a1 = self.read_reg(a1).cast::<f64>();
self.write_reg(dt, a0 / a1);
self.write_reg(rt, a0 % a1);
}
FMAF => {
let ParamBBBB(dt, a0, a1, a2) = self.decode();
self.write_reg(
dt,
self.read_reg(a0).cast::<f64>() * self.read_reg(a1).cast::<f64>()
+ self.read_reg(a2).cast::<f64>(),
);
}
NEGF => {
let ParamBB(dt, a0) = self.decode();
self.write_reg(dt, -self.read_reg(a0).cast::<f64>());
}
ITF => {
let ParamBB(dt, a0) = self.decode();
self.write_reg(dt, self.read_reg(a0).cast::<i64>() as f64);
}
FTI => {
let ParamBB(dt, a0) = self.decode();
self.write_reg(dt, self.read_reg(a0).cast::<f64>() as i64);
}
ADDFI => self.binary_op_imm::<f64>(ops::Add::add),
MULFI => self.binary_op_imm::<f64>(ops::Mul::mul),
op => return Err(VmRunError::InvalidOpcode(op)),
}
}
if TIMER_QUOTIENT != 0 {
self.timer = self.timer.wrapping_add(1);
if self.timer % TIMER_QUOTIENT == 0 {
return Ok(VmRunOk::Timer);
}
}
}
}
/// Decode instruction operands
#[inline(always)]
unsafe fn decode<T: ProgramVal>(&mut self) -> T {
let pc1 = self.pc + 1_u64;
let data = self.memory.prog_read_unchecked::<T>(pc1 as _);
self.pc += 1 + size_of::<T>();
data
}
/// Perform binary operating over two registers
#[inline(always)]
unsafe fn binary_op<T: ValueVariant>(&mut self, op: impl Fn(T, T) -> T) {
let ParamBBB(tg, a0, a1) = self.decode();
self.write_reg(
tg,
op(self.read_reg(a0).cast::<T>(), self.read_reg(a1).cast::<T>()),
);
}
/// Perform binary operation over register and immediate
#[inline(always)]
unsafe fn binary_op_imm<T: ValueVariant>(&mut self, op: impl Fn(T, T) -> T) {
let ParamBBD(tg, reg, imm) = self.decode();
self.write_reg(
tg,
op(self.read_reg(reg).cast::<T>(), Value::from(imm).cast::<T>()),
);
}
/// Perform binary operation over register and shift immediate
#[inline(always)]
unsafe fn binary_op_ims<T: ValueVariant>(&mut self, op: impl Fn(T, u32) -> T) {
let ParamBBW(tg, reg, imm) = self.decode();
self.write_reg(tg, op(self.read_reg(reg).cast::<T>(), imm));
}
/// Jump at `#3` if ordering on `#0 <=> #1` is equal to expected
#[inline(always)]
unsafe fn cond_jmp<T: ValueVariant + Ord>(&mut self, expected: Ordering) {
let ParamBBD(a0, a1, ja) = self.decode();
if self
.read_reg(a0)
.cast::<T>()
.cmp(&self.read_reg(a1).cast::<T>())
== expected
{
self.pc = Address::new(ja);
}
}
/// Read register
#[inline(always)]
unsafe fn read_reg(&self, n: u8) -> Value {
*self.registers.get_unchecked(n as usize)
}
/// Write a register.
/// Writing to register 0 is no-op.
#[inline(always)]
unsafe fn write_reg(&mut self, n: u8, value: impl Into<Value>) {
if n != 0 {
*self.registers.get_unchecked_mut(n as usize) = value.into();
}
}
/// Load / Store Address check-computation überfunction
#[inline(always)]
unsafe fn ldst_addr_uber(
&self,
dst: u8,
base: u8,
offset: u64,
size: u16,
adder: u8,
) -> Result<Address, VmRunError> {
let reg = dst.checked_add(adder).ok_or(VmRunError::RegOutOfBounds)?;
if usize::from(reg) * 8 + usize::from(size) > 2048 {
Err(VmRunError::RegOutOfBounds)
} else {
self.read_reg(base)
.cast::<u64>()
.checked_add(offset)
.and_then(|x| x.checked_add(adder.into()))
.ok_or(VmRunError::AddrOutOfBounds)
.map(Address::new)
}
}
}

View file

@ -1,4 +1,4 @@
hex_literal_case = "Upper" hex_literal_case = "Upper"
imports_granularity = "One" imports_granularity = "One"
struct_field_align_threshold = 8 struct_field_align_threshold = 5
enum_discrim_align_threshold = 8 enum_discrim_align_threshold = 5

146
spec.md
View file

@ -1,11 +1,9 @@
# HoleyBytes ISA Specification # HoleyBytes ISA Specification
# Bytecode format # Bytecode format
- Holey Bytes program should start with following magic: `[0xAB, 0x1E, 0x0B]`
- All numbers are encoded little-endian - All numbers are encoded little-endian
- There is 256 registers, they are represented by a byte - There is 256 registers, they are represented by a byte
- Immediate values are 64 bit - Immediate values are 64 bit
- Program is by spec required to be terminated with 12 zero bytes
### Instruction encoding ### Instruction encoding
- Instruction parameters are packed (no alignment) - Instruction parameters are packed (no alignment)
@ -22,7 +20,6 @@
| BBB | 24 bits | | BBB | 24 bits |
| BBDH | 96 bits | | BBDH | 96 bits |
| BBD | 80 bits | | BBD | 80 bits |
| BBW | 48 bits |
| BB | 16 bits | | BB | 16 bits |
| BD | 72 bits | | BD | 72 bits |
| D | 64 bits | | D | 64 bits |
@ -34,14 +31,12 @@
- `P ← V`: Set register P to value V - `P ← V`: Set register P to value V
- `[x]`: Address x - `[x]`: Address x
## Program execution control ## No-op
- N type - N type
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-----------------------------:| |:------:|:----:|:----------:|
| 0 | UN | Trigger unreachable code trap | | 0 | NOP | Do nothing |
| 1 | TX | Terminate execution |
| 2 | NOP | Do nothing |
## Integer binary ops. ## Integer binary ops.
- BBB type - BBB type
@ -49,28 +44,28 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-----------------------:| |:------:|:----:|:-----------------------:|
| 3 | ADD | Wrapping addition | | 1 | ADD | Wrapping addition |
| 4 | SUB | Wrapping subtraction | | 2 | SUB | Wrapping subtraction |
| 5 | MUL | Wrapping multiplication | | 3 | MUL | Wrapping multiplication |
| 6 | AND | Bitand | | 4 | AND | Bitand |
| 7 | OR | Bitor | | 5 | OR | Bitor |
| 8 | XOR | Bitxor | | 6 | XOR | Bitxor |
| 9 | SL | Unsigned left bitshift | | 7 | SL | Unsigned left bitshift |
| 10 | SR | Unsigned right bitshift | | 8 | SR | Unsigned right bitshift |
| 11 | SRS | Signed right bitshift | | 9 | SRS | Signed right bitshift |
### Comparsion ### Comparsion
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-------------------:| |:------:|:----:|:-------------------:|
| 12 | CMP | Signed comparsion | | 10 | CMP | Signed comparsion |
| 13 | CMPU | Unsigned comparsion | | 11 | CMPU | Unsigned comparsion |
#### Comparsion table #### Comparsion table
| #1 *op* #2 | Result | | #1 *op* #2 | Result |
|:----------:|:------:| |:----------:|:------:|
| < | 0 | | < | -1 |
| = | 1 | | = | 0 |
| > | 2 | | > | 1 |
### Division-remainder ### Division-remainder
- Type BBBB - Type BBBB
@ -80,7 +75,7 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-------------------------------:| |:------:|:----:|:-------------------------------:|
| 14 | DIR | Divide and remainder combinated | | 12 | DIR | Divide and remainder combinated |
### Negations ### Negations
- Type BB - Type BB
@ -88,36 +83,31 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:----------------:| |:------:|:----:|:----------------:|
| 15 | NEG | Bit negation | | 13 | NEG | Bit negation |
| 16 | NOT | Logical negation | | 14 | NOT | Logical negation |
## Integer immediate binary ops. ## Integer immediate binary ops.
- Type BBD - Type BBD
- `#0 ← #1 <op> imm #2` - `#0 ← #1 <op> imm #2`
| Opcode | Name | Action |
|:------:|:----:|:--------------------:|
| 17 | ADDI | Wrapping addition |
| 18 | MULI | Wrapping subtraction |
| 19 | ANDI | Bitand |
| 20 | ORI | Bitor |
| 21 | XORI | Bitxor |
### Bitshifts
- Type BBW
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-----------------------:| |:------:|:----:|:-----------------------:|
| 22 | SLI | Unsigned left bitshift | | 15 | ADDI | Wrapping addition |
| 23 | SRI | Unsigned right bitshift | | 16 | MULI | Wrapping subtraction |
| 24 | SRSI | Signed right bitshift | | 17 | ANDI | Bitand |
| 18 | ORI | Bitor |
| 19 | XORI | Bitxor |
| 20 | SLI | Unsigned left bitshift |
| 21 | SRI | Unsigned right bitshift |
| 22 | SRSI | Signed right bitshift |
### Comparsion ### Comparsion
- Comparsion is the same as when RRR type - Comparsion is the same as when RRR type
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:-----:|:-------------------:| |:------:|:-----:|:-------------------:|
| 25 | CMPI | Signed comparsion | | 23 | CMPI | Signed comparsion |
| 26 | CMPUI | Unsigned comparsion | | 24 | CMPUI | Unsigned comparsion |
## Register value set / copy ## Register value set / copy
@ -127,18 +117,15 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:------:| |:------:|:----:|:------:|
| 27 | CP | Copy | | 25 | CP | Copy |
### Swap ### Swap
- Type BB - Type BB
- Swap #0 and #1 - Swap #0 and #1
- Zero register rules:
- Both: no-op
- One: Copy zero to the non-zero register
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:------:| |:------:|:----:|:------:|
| 28 | SWA | Swap | | 26 | SWA | Swap |
### Load immediate ### Load immediate
- Type BD - Type BD
@ -146,7 +133,7 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:--------------:| |:------:|:----:|:--------------:|
| 29 | LI | Load immediate | | 27 | LI | Load immediate |
## Memory operations ## Memory operations
- Type BBDH - Type BBDH
@ -155,8 +142,8 @@
### Load / Store ### Load / Store
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:---------------------------------------:| |:------:|:----:|:---------------------------------------:|
| 30 | LD | `#0 ← [#1 + imm #3], copy imm #4 bytes` | | 28 | LD | `#0 ← [#1 + imm #3], copy imm #4 bytes` |
| 31 | ST | `[#1 + imm #3] ← #0, copy imm #4 bytes` | | 29 | ST | `[#1 + imm #3] ← #0, copy imm #4 bytes` |
## Block copy ## Block copy
- Block copy source and target can overlap - Block copy source and target can overlap
@ -166,7 +153,7 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:--------------------------------:| |:------:|:----:|:--------------------------------:|
| 32 | BMC | `[#1] ← [#0], copy imm #2 bytes` | | 30 | BMC | `[#0] ← [#1], copy imm #2 bytes` |
### Register copy ### Register copy
- Type BBB - Type BBB
@ -174,22 +161,16 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:--------------------------------:| |:------:|:----:|:--------------------------------:|
| 33 | BRC | `#1 ← #0, copy imm #2 registers` | | 31 | BRC | `#0 ← #1, copy imm #2 registers` |
## Control flow ## Control flow
### Unconditional jump ### Unconditional jump
- Type D - Type BD
| Opcode | Name | Action |
|:------:|:----:|:-------------------------------:|
| 34 | JMP | Unconditional, non-linking jump |
### Unconditional linking jump | Opcode | Name | Action |
- Type BBD |:------:|:----:|:---------------------:|
| 32 | JMP | Jump at `#0 + imm #1` |
| Opcode | Name | Action |
|:------:|:----:|:--------------------------------------------------:|
| 35 | JAL | Save PC past JAL to `#0` and jump at `#1 + imm #2` |
### Conditional jumps ### Conditional jumps
- Type BBD - Type BBD
@ -197,19 +178,19 @@
| Opcode | Name | Comparsion | | Opcode | Name | Comparsion |
|:------:|:----:|:------------:| |:------:|:----:|:------------:|
| 36 | JEQ | = | | 33 | JEQ | = |
| 37 | JNE | ≠ | | 34 | JNE | ≠ |
| 38 | JLT | < (signed) | | 35 | JLT | < (signed) |
| 39 | JGT | > (signed) | | 36 | JGT | > (signed) |
| 40 | JLTU | < (unsigned) | | 37 | JLTU | < (unsigned) |
| 41 | JGTU | > (unsigned) | | 38 | JGTU | > (unsigned) |
### Environment call ### Environment call
- Type N - Type N
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:-----:|:-------------------------------------:| |:------:|:-----:|:-------------------------------------:|
| 42 | ECALL | Cause an trap to the host environment | | 39 | ECALL | Cause an trap to the host environment |
## Floating point operations ## Floating point operations
- Type BBB - Type BBB
@ -217,29 +198,29 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:--------------:| |:------:|:----:|:--------------:|
| 43 | ADDF | Addition | | 40 | ADDF | Addition |
| 44 | SUBF | Subtraction | | 41 | SUBF | Subtraction |
| 45 | MULF | Multiplication | | 42 | MULF | Multiplication |
### Division-remainder ### Division-remainder
- Type BBBB - Type BBBB
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-------------------------:| |:------:|:----:|:-------------------------:|
| 46 | DIRF | Same as for integer `DIR` | | 43 | DIRF | Same as for integer `DIR` |
### Fused Multiply-Add ### Fused Multiply-Add
- Type BBBB - Type BBBB
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:---------------------:| |:------:|:----:|:---------------------:|
| 47 | FMAF | `#0 ← (#1 * #2) + #3` | | 44 | FMAF | `#0 ← (#1 * #2) + #3` |
### Negation ### Negation
- Type BB - Type BB
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:----------:| |:------:|:----:|:----------:|
| 48 | NEGF | `#0 ← -#1` | | 45 | NEGF | `#0 ← -#1` |
### Conversion ### Conversion
- Type BB - Type BB
@ -248,8 +229,8 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:------------:| |:------:|:----:|:------------:|
| 49 | ITF | Int to Float | | 46 | ITF | Int to Float |
| 50 | FTI | Float to Int | | 47 | FTI | Float to Int |
## Floating point immediate operations ## Floating point immediate operations
- Type BBD - Type BBD
@ -257,8 +238,8 @@
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:-----:|:--------------:| |:------:|:-----:|:--------------:|
| 51 | ADDFI | Addition | | 48 | ADDFI | Addition |
| 52 | MULFI | Multiplication | | 49 | MULFI | Multiplication |
# Registers # Registers
- There is 255 registers + one zero register (with index 0) - There is 255 registers + one zero register (with index 0)
@ -267,23 +248,21 @@
# Memory # Memory
- Addresses are 64 bit - Addresses are 64 bit
- Program should be in the same address space as all other data
- Memory implementation is arbitrary - Memory implementation is arbitrary
- Address `0x0` may or may not be valid. Count with compilers
considering it invalid!
- In case of accessing invalid address: - In case of accessing invalid address:
- Program shall trap (LoadAccessEx, StoreAccessEx) with parameter of accessed address - Program shall trap (LoadAccessEx, StoreAccessEx) with parameter of accessed address
- Value of register when trapped is undefined - Value of register when trapped is undefined
## Recommendations ## Recommendations
- Leave address `0x0` as invalid
- If paging used: - If paging used:
- Leave first page invalid - Leave first page invalid
- Pages should be at least 4 KiB - Pages should be at least 4 KiB
# Program execution # Program execution
- The way of program execution is implementation defined - The way of program execution is implementation defined
- The execution is arbitrary, as long all effects are obervable - The order of instruction is arbitrary, as long all observable
in the way as program was executed literally, in order. effects are applied in the program's order
# Program validation # Program validation
- Invalid program should cause runtime error: - Invalid program should cause runtime error:
@ -298,7 +277,6 @@ Program should at least implement these traps:
- Invalid instruction exception - Invalid instruction exception
- Load address exception - Load address exception
- Store address exception - Store address exception
- Unreachable instruction
and executing environment should be able to get information about them, and executing environment should be able to get information about them,
like the opcode of invalid instruction or attempted address to load/store. like the opcode of invalid instruction or attempted address to load/store.