Lottsa things changed

This commit is contained in:
Erin 2023-09-26 23:36:27 +02:00
parent b1bdbea991
commit 3e4095da6f
27 changed files with 362 additions and 1322 deletions

202
Cargo.lock generated
View file

@ -2,93 +2,12 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]]
name = "ariadne"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bytemuck"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "hashbrown"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "hbasm"
version = "0.1.0"
dependencies = [
"ariadne",
"bytemuck",
"hashbrown 0.14.0",
"hbbytecode",
"lasso",
"literify",
"logos",
"paste",
]
[[package]]
name = "hbbytecode"
version = "0.1.0"
dependencies = [
"with_builtin_macros",
]
[[package]]
name = "hbvm"
@ -98,84 +17,17 @@ dependencies = [
]
[[package]]
name = "lasso"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2"
dependencies = [
"ahash",
"hashbrown 0.13.2",
]
[[package]]
name = "literify"
name = "hbxrt"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54e4d365df794ed78b4ce1061886f82eae7afa8e3a98ce4c4b0bfd0c777b1175"
dependencies = [
"litrs",
"proc-macro2",
"quote",
"hbvm",
]
[[package]]
name = "litrs"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f17c3668f3cc1132437cdadc93dab05e52d592f06948d3f64828430c36e4a70"
dependencies = [
"proc-macro2",
]
[[package]]
name = "logos"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e"
dependencies = [
"logos-codegen",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]]
name = "proc-macro2"
version = "1.0.66"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
dependencies = [
"unicode-ident",
]
@ -189,17 +41,11 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "syn"
version = "2.0.29"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
@ -208,24 +54,26 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.11"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-width"
version = "0.1.10"
name = "with_builtin_macros"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
checksum = "a59d55032495429b87f9d69954c6c8602e4d3f3e0a747a12dea6b0b23de685da"
dependencies = [
"with_builtin_macros-proc_macros",
]
[[package]]
name = "version_check"
version = "0.9.4"
name = "with_builtin_macros-proc_macros"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
checksum = "15bd7679c15e22924f53aee34d4e448c45b674feb6129689af88593e129f8f42"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -1,3 +1,3 @@
[workspace]
resolver = "2"
members = ["hbasm", "hbbytecode", "hbvm"]
members = ["hbbytecode", "hbvm", "hbxrt"]

View file

@ -1,22 +0,0 @@
[package]
name = "hbasm"
version = "0.1.0"
edition = "2021"
[dependencies]
ariadne = "0.3"
bytemuck = "1.13"
hashbrown = "0.14"
hbbytecode = { path = "../hbbytecode" }
literify = "0.1"
paste = "1.0"
[dependencies.lasso]
version = "0.7"
default-features = false
features = ["no-std"]
[dependencies.logos]
version = "0.13"
default-features = false
features = ["export_derive"]

View file

@ -1,12 +0,0 @@
-- Add two numbers
-- A + B = C
-- r1 A
li r1, 2
-- r2 Result
li r2, 0
-- B = 4
addi r2, r1, 4
-- terminate execution
tx

View file

@ -1,16 +0,0 @@
-- r1 will be the temp in fahrenheit
-- r2 temp in celsius
-- r3/r4/r5 will be used by constants
-- (f - 32) * 5 / 9
li r1, 100
li r3, 32
li r4, 5
li r5, 9
sub r2, r1, r3
mul r2, r2, r4
dir r2, r0, r2, r5
tx

View file

@ -1,14 +0,0 @@
li r255, 0
ecall
li r255, 1
li r254, 1
li r253, 100
ecall
li r255, 2
li r254, 0
li r253, 0
ecall
tx

View file

@ -1,2 +0,0 @@
L:
jal r0, r0, L

View file

@ -1,4 +0,0 @@
li r20, 1010
st r20, r24, 0, 1
addi r24, r0, 10
tx

View file

@ -1,18 +0,0 @@
jmp r0, start
start:
jmp r0, init_serial_port
-- Uses r20 to set the port
init_serial_port:
add r20, r30, r10
li r20, 00
-- outb(PORT + 1, 0x00); // Disable all interrupts
-- outb(PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
-- outb(PORT + 0, 0x03); // Set divisor to 3 (lo byte) 38400 baud
-- outb(PORT + 1, 0x00); // (hi byte)
-- outb(PORT + 3, 0x03); // 8 bits, no parity, one stop bit
-- outb(PORT + 2, 0xC7); // Enable FIFO, clear them, with 14-byte threshold
-- outb(PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
-- outb(PORT + 4, 0x1E); // Set in loopback mode, test the serial chip
-- outb(PORT + 0, 0xAE); // Test serial chip (send byte 0xAE and check if serial returns same byte)

View file

@ -1,104 +0,0 @@
//! Holey Bytes Assembler
//!
//! Some people claim:
//! > Write programs to handle text streams, because that is a universal interface.
//!
//! We at AbleCorp believe that nice programatic API is nicer than piping some text
//! into a program. It's less error-prone and faster.
//!
//! So this crate contains both assembleer with API for programs and a text assembler
//! for humans to write
#![no_std]
extern crate alloc;
mod macros;
use {
alloc::{vec, vec::Vec},
hashbrown::HashSet,
};
/// Assembler
///
/// - Opcode-generic, instruction-type-specific methods are named `i_param_<type>`
/// - You likely won't need to use them, but they are here, just in case :)
/// - Instruction-specific methods are named `i_<instruction>`
pub struct Assembler {
pub buf: Vec<u8>,
pub sub: HashSet<usize>,
}
impl Default for Assembler {
fn default() -> Self {
Self {
buf: vec![0; 4],
sub: Default::default(),
}
}
}
hbbytecode::invoke_with_def!(macros::text::gen_text);
impl Assembler {
hbbytecode::invoke_with_def!(macros::asm::impl_asm);
/// Append 12 zeroes (UN) at the end and add magic to the begining
///
/// # HoleyBytes lore
///
/// In reference HBVM implementation checks are done in
/// a separate phase before execution.
///
/// This way execution will be much faster as they have to
/// be done only once.
///
/// There was an issue. You cannot statically check register values and
/// `JAL` instruction could hop at the end of program to some byte, which
/// will be interpreted as some valid opcode and VM in attempt to decode
/// the instruction performed out-of-bounds read which leads to undefined behaviour.
///
/// Several options were considered to overcome this, but inserting some data at
/// program's end which when executed would lead to undesired behaviour, though
/// not undefined behaviour.
///
/// Newly created `UN` (as UNreachable) was chosen as
/// - It was a good idea to add some equivalent to `ud2` anyways
/// - It was chosen to be zero
/// - What if you somehow reached that code, it will appropriately bail :)
/// - (yes, originally `NOP` was considered)
///
/// Why 12 bytes? That's the size of largest instruction parameter part.
pub fn finalise(&mut self) {
self.buf.extend([0; 12]);
self.buf[0..4].copy_from_slice(&0xAB1E0B_u32.to_le_bytes());
}
}
/// Immediate value
///
/// # Implementor notice
/// It should insert exactly 8 bytes, otherwise output will be malformed.
/// This is not checked in any way
pub trait Imm {
/// Insert immediate value
fn insert(&self, asm: &mut Assembler);
}
/// Implement immediate values
macro_rules! impl_imm_le_bytes {
($($ty:ty),* $(,)?) => {
$(
impl Imm for $ty {
#[inline(always)]
fn insert(&self, asm: &mut Assembler) {
// Convert to little-endian bytes, insert.
asm.buf.extend(self.to_le_bytes());
}
}
)*
};
}
impl_imm_le_bytes!(u64, i64, f64);

View file

@ -1,89 +0,0 @@
//! Macros to generate [`crate::Assembler`]
/// Incremental token-tree muncher to implement specific instruction
/// functions based on generic function for instruction type
macro_rules! impl_asm_opcodes {
( // End case
$generic:ident
($($param_i:ident: $param_ty:ty),*)
=> []
) => {};
(
$generic:ident
($($param_i:ident: $param_ty:ty),*)
=> [$opcode:ident, $($rest:tt)*]
) => {
// Instruction-specific function
paste::paste! {
#[inline(always)]
pub fn [<i_ $opcode:lower>](&mut self, $($param_i: $param_ty),*) {
self.$generic(hbbytecode::opcode::$opcode, $($param_i),*)
}
}
// And recurse!
macros::asm::impl_asm_opcodes!(
$generic($($param_i: $param_ty),*)
=> [$($rest)*]
);
};
}
/// Numeric value insert
macro_rules! impl_asm_insert {
// Immediate - this is trait-based,
// the insertion is delegated to its implementation
($self:expr, $id:ident, I) => {
Imm::insert(&$id, $self)
};
// Length - cannot be more than 2048
($self:expr, $id:ident, L) => {{
assert!($id <= 2048);
$self.buf.extend($id.to_le_bytes())
}};
// Other numbers, just insert their bytes, little endian
($self:expr, $id:ident, $_:ident) => {
$self.buf.extend($id.to_le_bytes())
};
}
/// Implement assembler
macro_rules! impl_asm {
(
$(
$ityn:ident
($($param_i:ident: $param_ty:ident),* $(,)?)
=> [$($opcode:ident),* $(,)?],
)*
) => {
paste::paste! {
$(
// Opcode-generic functions specific for instruction types
pub fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: macros::asm::ident_map_ty!($param_ty)),*) {
self.buf.push(opcode);
$(macros::asm::impl_asm_insert!(self, $param_i, $param_ty);)*
}
// Generate opcode-specific functions calling the opcode-generic ones
macros::asm::impl_asm_opcodes!(
[<i_param_ $ityn>]($($param_i: macros::asm::ident_map_ty!($param_ty)),*)
=> [$($opcode,)*]
);
)*
}
};
}
/// Map operand type to Rust type
#[rustfmt::skip]
macro_rules! ident_map_ty {
(R) => { u8 }; // Register is just u8
(I) => { impl Imm }; // Immediate is anything implementing the trait
(L) => { u16 }; // Copy count
($id:ident) => { $id }; // Anything else → identity map
}
pub(crate) use {ident_map_ty, impl_asm, impl_asm_insert, impl_asm_opcodes};

View file

@ -1,6 +0,0 @@
//! And here the land of macros begin.
//!
//! They do not bite, really. Have you seen what Yandros is writing?
pub mod asm;
pub mod text;

View file

@ -1,293 +0,0 @@
//! Macros to generate text-code assembler at [`crate::text`]
// Refering in module which generates a module to that module — is that even legal? :D
/// Generate text code based assembler
macro_rules! gen_text {
(
$(
$ityn:ident
($($param_i:ident: $param_ty:ident),* $(,)?)
=> [$($opcode:ident),* $(,)?],
)*
) => {
/// # Text assembler
/// Text assembler generated simply calls methods in the [`crate::Assembler`] type.
///
/// # Syntax
/// ```text
/// instruction op1, op2, …
/// …
/// ```
/// - Opcode names are lowercase
/// - Registers are prefixed with `r` followed by number
/// - Operands are separated by `,`
/// - Instructions are separated by either line feed or `;` (αυτό δεν είναι ερωτηματικό!)
/// - Labels are defined by their names followed by colon `label:`
/// - Labels are referenced simply by their names
/// - Immediates are numbers, can be negative, floats are not yet supported
pub mod text {
use {
crate::{
Assembler,
macros::text::*,
},
hashbrown::HashMap,
lasso::{Key, Rodeo, Spur},
logos::{Lexer, Logos, Span},
};
paste::paste!(literify::literify! {
/// Assembly token
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(extras = Rodeo)]
#[logos(skip r"[ \t\t]+")]
#[logos(skip r"-- .*")]
pub enum Token {
$($(#[token(~([<$opcode:lower>]), |_| hbbytecode::opcode::[<$opcode:upper>])])*)*
Opcode(u8),
#[regex("[0-9]+", |l| l.slice().parse().ok())]
#[regex(
"-[0-9]+",
|lexer| {
Some(u64::from_ne_bytes(lexer.slice().parse::<i64>().ok()?.to_ne_bytes()))
},
)] Integer(u64),
#[regex(
"r[0-9]+",
|lexer| match lexer.slice()[1..].parse() {
Ok(n) => Some(n),
_ => None
},
)] Register(u8),
#[regex(
r"\p{XID_Start}\p{XID_Continue}*:",
|lexer| lexer.extras.get_or_intern(&lexer.slice()[..lexer.slice().len() - 1]),
)] Label(Spur),
#[regex(
r"\p{XID_Start}\p{XID_Continue}*",
|lexer| lexer.extras.get_or_intern(lexer.slice()),
)] Symbol(Spur),
#[token("\n")]
#[token(";")] ISep,
#[token(",")] PSep,
}
});
/// Type of error
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind {
UnexpectedToken,
InvalidToken,
UnexpectedEnd,
InvalidSymbol,
}
/// Text assembly error
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error {
pub kind: ErrorKind,
pub span: Span,
}
/// Parse code and insert instructions
pub fn assemble(asm: &mut Assembler, code: &str) -> Result<(), Error> {
pub struct TextAsm<'a> {
asm: &'a mut Assembler,
lexer: Lexer<'a, Token>,
symloc: HashMap<Spur, usize>,
}
impl<'a> TextAsm<'a> {
fn next(&mut self) -> Result<Token, ErrorKind> {
match self.lexer.next() {
Some(Ok(t)) => Ok(t),
Some(Err(())) => Err(ErrorKind::InvalidToken),
None => Err(ErrorKind::UnexpectedEnd),
}
}
#[inline(always)]
fn run(&mut self) -> Result<(), ErrorKind> {
loop {
match self.lexer.next() {
// Got an opcode
Some(Ok(Token::Opcode(op))) => {
match op {
// Special-cased
hbbytecode::opcode::BRC => {
param_extract_itm!(
self,
p0: R,
p1: R,
p2: u8
);
self.asm.i_param_bbb(op, p0, p1, p2);
},
// Take all the opcodes and match them to their corresponding functions
$(
#[allow(unreachable_patterns)]
$(hbbytecode::opcode::$opcode)|* => paste::paste!({
param_extract_itm!(self, $($param_i: $param_ty),*);
self.asm.[<i_param_ $ityn>](op, $($param_i),*);
}),
)*
// Already matched in Logos, should not be able to obtain
// invalid opcode.
_ => unreachable!(),
}
}
// Insert label to table
Some(Ok(Token::Label(lbl))) => {
self.symloc.insert(lbl, self.asm.buf.len());
}
// Instruction separator (LF, ;)
Some(Ok(Token::ISep)) => (),
Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken),
Some(Err(())) => return Err(ErrorKind::InvalidToken),
None => return Ok(()),
}
}
}
}
let mut asm = TextAsm {
asm,
lexer: Token::lexer(code),
symloc: HashMap::default(),
};
asm.run()
.map_err(|kind| Error { kind, span: asm.lexer.span() })?;
// Walk table and substitute labels
// for their addresses
for &loc in &asm.asm.sub {
// Extract indices from the code and get addresses from table
let val = asm.symloc
.get(
&Spur::try_from_usize(bytemuck::pod_read_unaligned::<u64>(
&asm.asm.buf[loc..loc + core::mem::size_of::<u64>()]) as _
).unwrap()
)
.ok_or(Error { kind: ErrorKind::InvalidSymbol, span: 0..0 })?
.to_le_bytes();
// New address
asm.asm.buf[loc..]
.iter_mut()
.zip(val)
.for_each(|(dst, src)| *dst = src);
}
Ok(())
}
// Fun fact: this is a little hack
// It may slow the things a little bit down, but
// it made the macro to be made pretty nice.
//
// If you have any idea how to get rid of this,
// contributions are welcome :)
// I *likely* won't try anymore.
enum InternalImm {
Const(u64),
Named(Spur),
}
impl $crate::Imm for InternalImm {
#[inline]
fn insert(&self, asm: &mut Assembler) {
match self {
// Constant immediate, just put it in
Self::Const(a) => a.insert(asm),
// Label
Self::Named(a) => {
// Insert to the sub table that substitution will be
// requested
asm.sub.insert(asm.buf.len());
// Insert value from interner in place
asm.buf.extend((a.into_usize() as u64).to_le_bytes());
},
}
}
}
}
};
}
/// Extract item by pattern, otherwise return [`ErrorKind::UnexpectedToken`]
macro_rules! extract_pat {
($self:expr, $pat:pat) => {
let $pat = $self.next()?
else { return Err(ErrorKind::UnexpectedToken) };
};
}
/// Generate extract macro
macro_rules! gen_extract {
// Integer types have same body
($($int:ident),* $(,)?) => {
/// Extract operand from code
macro_rules! extract {
// Register (require prefixing with r)
($self:expr, R, $id:ident) => {
extract_pat!($self, Token::Register($id));
};
($self:expr, L, $id:ident) => {
extract_pat!($self, Token::Integer($id));
if $id > 2048 {
return Err(ErrorKind::InvalidToken);
}
let $id = u16::try_from($id).unwrap();
};
// Immediate
($self:expr, I, $id:ident) => {
let $id = match $self.next()? {
// Either straight up integer
Token::Integer(a) => InternalImm::Const(a),
// …or a label
Token::Symbol(a) => InternalImm::Named(a),
_ => return Err(ErrorKind::UnexpectedToken),
};
};
// Get $int, if not fitting, the token is claimed invalid
$(($self:expr, $int, $id:ident) => {
extract_pat!($self, Token::Integer($id));
let $id = $int::try_from($id).map_err(|_| ErrorKind::InvalidToken)?;
});*;
}
};
}
gen_extract!(u8, u16, u32);
/// Parameter extract incremental token-tree muncher
///
/// What else would it mean?
macro_rules! param_extract_itm {
($self:expr, $($id:ident: $ty:ident)? $(, $($tt:tt)*)?) => {
// Extract pattern
$(extract!($self, $ty, $id);)?
$(
// Require operand separator
extract_pat!($self, Token::PSep);
// And go to the next (recursive)
// …munch munch… yummy token trees.
param_extract_itm!($self, $($tt)*);
)?
};
}
pub(crate) use {extract, extract_pat, gen_text, param_extract_itm};

View file

@ -1,56 +0,0 @@
use std::io::Write;
use hbasm::Assembler;
use {
ariadne::{ColorGenerator, Label, Report, ReportKind, Source},
std::{
error::Error,
io::{stdin, Read},
},
};
fn main() -> Result<(), Box<dyn Error>> {
let mut code = String::new();
stdin().read_to_string(&mut code)?;
let mut assembler = Assembler::default();
if let Err(e) = hbasm::text::assemble(&mut assembler, &code) {
let mut colors = ColorGenerator::new();
let e_code = match e.kind {
hbasm::text::ErrorKind::UnexpectedToken => 1,
hbasm::text::ErrorKind::InvalidToken => 2,
hbasm::text::ErrorKind::UnexpectedEnd => 3,
hbasm::text::ErrorKind::InvalidSymbol => 4,
};
let message = match e.kind {
hbasm::text::ErrorKind::UnexpectedToken => "This token is not expected!",
hbasm::text::ErrorKind::InvalidToken => "The token is not valid!",
hbasm::text::ErrorKind::UnexpectedEnd => {
"The assembler reached the end of input unexpectedly!"
}
hbasm::text::ErrorKind::InvalidSymbol => {
"This referenced symbol doesn't have a corresponding label!"
}
};
let a = colors.next();
Report::build(ReportKind::Error, "engine_internal", e.span.clone().start)
.with_code(e_code)
.with_message(format!("{:?}", e.kind))
.with_label(
Label::new(("engine_internal", e.span))
.with_message(message)
.with_color(a),
)
.finish()
.eprint(("engine_internal", Source::from(&code)))
.unwrap();
} else {
assembler.finalise();
std::io::stdout().lock().write_all(&assembler.buf).unwrap();
}
Ok(())
}

View file

@ -1,6 +1,7 @@
[package]
name = "hbbytecode"
version = "0.1.0"
edition = "2021"
edition = "2018"
[dependencies]
with_builtin_macros = "0.0.3"

View file

@ -18,10 +18,11 @@ enum hbbc_Opcode: uint8_t {
hbbc_Op_CMP , hbbc_Op_CMPU , hbbc_Op_DIR , hbbc_Op_NEG , hbbc_Op_NOT , hbbc_Op_ADDI ,
hbbc_Op_MULI , hbbc_Op_ANDI , hbbc_Op_ORI , hbbc_Op_XORI , hbbc_Op_SLI , hbbc_Op_SRI ,
hbbc_Op_SRSI , hbbc_Op_CMPI , hbbc_Op_CMPUI , hbbc_Op_CP , hbbc_Op_SWA , hbbc_Op_LI ,
hbbc_Op_LD , hbbc_Op_ST , hbbc_Op_BMC , hbbc_Op_BRC , hbbc_Op_JMP , hbbc_Op_JAL ,
hbbc_Op_JEQ , hbbc_Op_JNE , hbbc_Op_JLT , hbbc_Op_JGT , hbbc_Op_JLTU , hbbc_Op_JGTU ,
hbbc_Op_ECALL , hbbc_Op_ADDF , hbbc_Op_SUBF , hbbc_Op_MULF , hbbc_Op_DIRF , hbbc_Op_FMAF ,
hbbc_Op_NEGF , hbbc_Op_ITF , hbbc_Op_FTI , hbbc_Op_ADDFI , hbbc_Op_MULFI ,
hhbc_Op_LRA , hbbc_Op_LD , hbbc_Op_ST , hbbc_Op_LDR , hhbc_Op_STR , hbbc_Op_BMC ,
hbbc_Op_BRC , hbbc_Op_JMP , hbbc_Op_JMPR , hbbc_Op_JAL , hbbc_Op_JALR , hbbc_Op_JEQ ,
hbbc_Op_JNE , hbbc_Op_JLT , hbbc_Op_JGT , hbbc_Op_JLTU , hbbc_Op_JGTU , hbbc_Op_ECALL ,
hbbc_Op_ADDF , hbbc_Op_SUBF , hbbc_Op_MULF , hbbc_Op_DIRF , hbbc_Op_FMAF , hbbc_Op_NEGF ,
hbbc_Op_ITF , hbbc_Op_FTI , hbbc_Op_ADDFI , hbbc_Op_MULFI ,
} typedef hbbc_Opcode;
static_assert(sizeof(hbbc_Opcode) == 1);
@ -42,6 +43,12 @@ struct hbbc_ParamBBDH
typedef hbbc_ParamBBDH;
static_assert(sizeof(hbbc_ParamBBDH) == 96 / 8);
struct hbbc_ParamBBWH
{ uint8_t _0; uint8_t _1; uint32_t _2; uint16_t _3; }
typedef hbbc_ParamBBWH;
static_assert(sizeof(hbbc_ParamBBWH) == 64 / 8);
struct hbbc_ParamBBD
{ uint8_t _0; uint8_t _1; uint64_t _2; }
typedef hbbc_ParamBBD;

View file

@ -0,0 +1,64 @@
// OPCODE, MNEMONIC, TYPE, DOC;
0, UN, N, "Cause an unreachable code trap" ;
1, TX, N, "Termiante execution" ;
2, NOP, N, "Do nothing" ;
3, ADD, RRR, "Addition" ;
4, SUB, RRR, "Subtraction" ;
5, MUL, RRR, "Multiplication" ;
6, AND, RRR, "Bitand" ;
7, OR, RRR, "Bitor" ;
8, XOR, RRR, "Bitxor" ;
9, SL, RRR, "Unsigned left bitshift" ;
10, SR, RRR, "Unsigned right bitshift" ;
11, SRS, RRR, "Signed right bitshift" ;
12, CMP, RRR, "Signed comparsion" ;
13, CMPU, RRR, "Unsigned comparsion" ;
14, DIR, RRRR, "Merged divide-remainder" ;
15, NOT, RR, "Logical negation" ;
16, ADDI, RRD, "Addition with immediate" ;
17, MULI, RRD, "Multiplication with immediate" ;
18, ANDI, RRD, "Bitand with immediate" ;
19, ORI, RRD, "Bitor with immediate" ;
20, XORI, RRD, "Bitxor with immediate" ;
21, SLI, RRW, "Unsigned left bitshift with immedidate";
22, SRI, RRW, "Unsigned right bitshift with immediate";
23, SRSI, RRW, "Signed right bitshift with immediate" ;
24, CMPI, RRD, "Signed compare with immediate" ;
25, CMPUI, RRD, "Unsigned compare with immediate" ;
26, CP, RR, "Copy register" ;
27, SWA, RR, "Swap registers" ;
28, LI, RD, "Load immediate" ;
29, LRA, RRO, "Load relative address" ;
30, LD, RRAH, "Load from absolute address" ;
31, ST, RRAH, "Store to absolute address" ;
32, LDR, RROH, "Load from relative address" ;
33, STR, RROH, "Store to absolute address" ;
34, BMC, RRH, "Copy block of memory" ;
35, BRC, RRB, "Copy register block" ;
36, JMP, A, "Absolute jump" ;
37, JMPR, O, "Relative jump" ;
38, JAL, RRA, "Linking absolute jump" ;
39, JALR, RRO, "Linking relative jump" ;
40, JEQ, RRP, "Branch on equal" ;
41, JNE, RRP, "Branch on nonequal" ;
42, JLT, RRP, "Branch on lesser-than (signed)" ;
43, JGT, RRP, "Branch on greater-than (signed)" ;
44, JLTU, RRP, "Branch on lesser-than (unsigned)" ;
45, JGTU, RRP, "Branch on greater-than (unsigned)" ;
46, ECALL, N, "Issue ecall trap" ;
47, ADDF, RRR, "Floating addition" ;
48, SUBF, RRR, "Floating subtraction" ;
49, MULF, RRR, "Floating multiply" ;
50, DIRF, RRRR, "Merged floating divide-remainder" ;
51, FMAF, RRRR, "Fused floating multiply-add" ;
52, NEGF, RR, "Floating sign negation" ;
53, ITF, RR, "Int to float" ;
54, FTI, RR, "Float to int" ;
55, ADDFI, RRD, "Floating addition with immediate" ;
56, MULFI, RRD, "Floating multiplication with immediate";

View file

@ -1,175 +0,0 @@
//! Generate HoleyBytes code validator
macro_rules! gen_valider {
(
$(
$ityn:ident
($($param_i:ident: $param_ty:ident),* $(,)?)
=> [$($opcode:ident),* $(,)?],
)*
) => {
#[allow(unreachable_code)]
pub mod valider {
//! Validate if program is sound to execute
/// Program validation error kind
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ErrorKind {
/// Unknown opcode
InvalidInstruction,
/// VM doesn't implement this valid opcode
Unimplemented,
/// Attempted to copy over register boundary
RegisterArrayOverflow,
/// Program is not validly terminated
InvalidEnd,
/// Program misses magic
MissingMagic
}
/// Error
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Error {
/// Kind
pub kind: ErrorKind,
/// Location in bytecode
pub index: usize,
}
/// Perform bytecode validation. If it passes, the program should be
/// sound to execute.
pub fn validate(mut program: &[u8]) -> Result<(), Error> {
// Validate magic
if program.get(0..4) != Some(&0xAB1E0B_u32.to_le_bytes()) {
return Err(Error {
kind: ErrorKind::MissingMagic,
index: 0,
});
}
// Program has to end with 12 zeroes, if there is less than
// 12 bytes, program is invalid.
if program.len() < 12 {
return Err(Error {
kind: ErrorKind::InvalidEnd,
index: 0,
});
}
// Verify that program ends with 12 zeroes
for (index, item) in program.iter().enumerate().skip(program.len() - 12) {
if *item != 0 {
return Err(Error {
kind: ErrorKind::InvalidEnd,
index,
});
}
}
let start = program;
program = &program[4..];
loop {
use crate::opcode::*;
program = match program {
// End of program
[] => return Ok(()),
// Memory load/store cannot go out-of-bounds register array
// B B D1 D2 D3 D4 D5 D6 D7 D8 H1 H2
[LD..=ST, reg, _, _, _, _, _, _, _, _, _, count_0, count_1, ..]
if usize::from(*reg) * 8
+ usize::from(u16::from_le_bytes([*count_0, *count_1]))
> 2048 =>
{
return Err(Error {
kind: ErrorKind::RegisterArrayOverflow,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
});
}
// Block register copy cannot go out-of-bounds register array
[BRC, src, dst, count, ..]
if src.checked_add(*count).is_none()
|| dst.checked_add(*count).is_none() =>
{
return Err(Error {
kind: ErrorKind::RegisterArrayOverflow,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
});
}
$(
$crate::gen_valider::inst_chk!(
rest, $ityn, $($opcode),*
)
)|* => rest,
// The plebs
_ => {
return Err(Error {
kind: ErrorKind::InvalidInstruction,
index: (program.as_ptr() as usize) - (start.as_ptr() as usize),
})
}
}
}
}
}
};
}
/// Generate instruction check pattern
macro_rules! inst_chk {
// Sadly this has hardcoded instruction types,
// as I cannot generate parts of patterns+
($rest:ident, bbbb, $($opcode:ident),*) => {
// B B B B
[$($opcode)|*, _, _, _, _, $rest @ ..]
};
($rest:ident, bbb, $($opcode:ident),*) => {
// B B B
[$($opcode)|*, _, _, _, $rest @ ..]
};
($rest:ident, bbdh, $($opcode:ident),*) => {
// B B D1 D2 D3 D4 D5 D6 D7 D8 H1 H2
[$($opcode)|*, _, _, _, _, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, bbd, $($opcode:ident),*) => {
// B B D1 D2 D3 D4 D5 D6 D7 D8
[$($opcode)|*, _, _, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, bbw, $($opcode:ident),*) => {
// B B W1 W2 W3 W4
[$($opcode)|*, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, bb, $($opcode:ident),*) => {
// B B
[$($opcode)|*, _, _, $rest @ ..]
};
($rest:ident, bd, $($opcode:ident),*) => {
// B D1 D2 D3 D4 D5 D6 D7 D8
[$($opcode)|*, _, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, d, $($opcode:ident),*) => {
// D1 D2 D3 D4 D5 D6 D7 D8
[$($opcode)|*, _, _, _, _, _, _, _, _, $rest @ ..]
};
($rest:ident, n, $($opcode:ident),*) => {
[$($opcode)|*, $rest @ ..]
};
($_0:ident, $($_1:ident),*) => {
compile_error!("Invalid instruction type");
}
}
pub(crate) use {gen_valider, inst_chk};

View file

@ -1,165 +1,91 @@
#![no_std]
mod gen_valider;
pub type OpR = u8;
macro_rules! constmod {
($vis:vis $mname:ident($repr:ty) {
$(#![doc = $mdoc:literal])?
$($cname:ident = $val:expr $(,$doc:literal)?;)*
}) => {
$(#[doc = $mdoc])?
$vis mod $mname {
pub type OpA = u64;
pub type OpO = u32;
pub type OpP = u16;
pub type OpB = u8;
pub type OpH = u16;
pub type OpW = u32;
pub type OpD = u64;
/// # Safety
/// Has to be valid to be decoded from bytecode.
pub unsafe trait BytecodeItem {}
macro_rules! define_items {
($($name:ident ($($item:ident),* $(,)?)),* $(,)?) => {
$(
#[repr(packed)]
pub struct $name($(pub $item),*);
unsafe impl BytecodeItem for $name {}
)*
};
}
define_items! {
OpsRR (OpR, OpR ),
OpsRRR (OpR, OpR, OpR ),
OpsRRRR (OpR, OpR, OpR, OpR),
OpsRRB (OpR, OpR, OpB ),
OpsRRH (OpR, OpR, OpH ),
OpsRRW (OpR, OpR, OpW ),
OpsRD (OpR, OpD ),
OpsRRD (OpR, OpR, OpD ),
OpsRRAH (OpR, OpR, OpA, OpH),
OpsRROH (OpR, OpR, OpO, OpH),
OpsRRO (OpR, OpR, OpO ),
OpsRRP (OpR, OpR, OpP ),
}
unsafe impl BytecodeItem for OpA {}
unsafe impl BytecodeItem for OpB {}
unsafe impl BytecodeItem for OpO {}
unsafe impl BytecodeItem for () {}
::with_builtin_macros::with_builtin! {
let $spec = include_from_root!("instructions.in") in {
/// Invoke macro with bytecode definition
///
/// # Format
/// ```text
/// Opcode, Mnemonic, Type, Docstring;
/// ```
///
/// # Type
/// ```text
/// Types consist of letters meaning a single field
/// | Type | Size (B) | Meaning |
/// |:-----|:---------|:------------------------|
/// | N | 0 | Empty |
/// | R | 1 | Register |
/// | A | 8 | Absolute address |
/// | O | 4 | Relative address offset |
/// | P | 2 | Relative address offset |
/// | B | 1 | Immediate |
/// | H | 2 | Immediate |
/// | W | 4 | Immediate |
/// | D | 8 | Immediate |
/// ```
#[macro_export]
macro_rules! invoke_with_def {
($macro:path) => {
$macro! { $spec }
};
}
}
}
macro_rules! gen_opcodes {
($($opcode:expr, $mnemonic:ident, $_ty:ident, $doc:literal;)*) => {
pub mod opcode {
$(
$(#[doc = $doc])?
pub const $cname: $repr = $val;
#[doc = $doc]
pub const $mnemonic: u8 = $opcode;
)*
}
};
}
#[allow(rustdoc::invalid_rust_codeblocks)]
/// Invoke macro with bytecode definition
/// # Input syntax
/// ```no_run
/// macro!(
/// INSTRUCTION_TYPE(p0: TYPE, p1: TYPE, …)
/// => [INSTRUCTION_A, INSTRUCTION_B, …],
/// …
/// );
/// ```
/// - Instruction type determines opcode-generic, instruction-type-specific
/// function. Name: `i_param_INSTRUCTION_TYPE`
/// - Per-instructions there will be generated opcode-specific functions calling the generic ones
/// - Operand types
/// - R: Register (u8)
/// - I: Immediate
/// - L: Memory load / store size (u16)
/// - Other types are identity-mapped
///
/// # BRC special-case
/// BRC's 3rd operand is plain byte, not a register. Encoding is the same, but for some cases it may matter.
///
/// Please, if you distinguish in your API between byte and register, special case this one.
///
/// Sorry for that :(
#[macro_export]
macro_rules! invoke_with_def {
($macro:path) => {
$macro!(
bbbb(p0: R, p1: R, p2: R, p3: R)
=> [DIR, DIRF, FMAF],
bbb(p0: R, p1: R, p2: R)
=> [ADD, SUB, MUL, AND, OR, XOR, SL, SR, SRS, CMP, CMPU, BRC, ADDF, SUBF, MULF],
bbdh(p0: R, p1: R, p2: I, p3: L)
=> [LD, ST],
bbd(p0: R, p1: R, p2: I)
=> [ADDI, MULI, ANDI, ORI, XORI, CMPI, CMPUI, BMC, JAL, JEQ, JNE, JLT, JGT, JLTU,
JGTU, ADDFI, MULFI],
bbw(p0: R, p1: R, p2: u32)
=> [SLI, SRI, SRSI],
bb(p0: R, p1: R)
=> [NEG, NOT, CP, SWA, NEGF, ITF, FTI],
bd(p0: R, p1: I)
=> [LI],
d(p0: I)
=> [JMP],
n()
=> [UN, TX, NOP, ECALL],
);
};
}
invoke_with_def!(gen_valider::gen_valider);
constmod!(pub opcode(u8) {
//! Opcode constant module
UN = 0, "N; Raises a trap";
TX = 1, "N; Terminate execution";
NOP = 2, "N; Do nothing";
ADD = 3, "BBB; #0 ← #1 + #2";
SUB = 4, "BBB; #0 ← #1 - #2";
MUL = 5, "BBB; #0 ← #1 × #2";
AND = 6, "BBB; #0 ← #1 & #2";
OR = 7, "BBB; #0 ← #1 | #2";
XOR = 8, "BBB; #0 ← #1 ^ #2";
SL = 9, "BBB; #0 ← #1 « #2";
SR = 10, "BBB; #0 ← #1 » #2";
SRS = 11, "BBB; #0 ← #1 » #2 (signed)";
CMP = 12, "BBB; #0 ← #1 <=> #2";
CMPU = 13, "BBB; #0 ← #1 <=> #2 (unsigned)";
DIR = 14, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3";
NEG = 15, "BB; #0 ← -#1";
NOT = 16, "BB; #0 ← !#1";
ADDI = 17, "BBD; #0 ← #1 + imm #2";
MULI = 18, "BBD; #0 ← #1 × imm #2";
ANDI = 19, "BBD; #0 ← #1 & imm #2";
ORI = 20, "BBD; #0 ← #1 | imm #2";
XORI = 21, "BBD; #0 ← #1 ^ imm #2";
SLI = 22, "BBW; #0 ← #1 « imm #2";
SRI = 23, "BBW; #0 ← #1 » imm #2";
SRSI = 24, "BBW; #0 ← #1 » imm #2 (signed)";
CMPI = 25, "BBD; #0 ← #1 <=> imm #2";
CMPUI = 26, "BBD; #0 ← #1 <=> imm #2 (unsigned)";
CP = 27, "BB; Copy #0 ← #1";
SWA = 28, "BB; Swap #0 and #1";
LI = 29, "BD; #0 ← imm #1";
LD = 30, "BBDB; #0 ← [#1 + imm #3], imm #4 bytes, overflowing";
ST = 31, "BBDB; [#1 + imm #3] ← #0, imm #4 bytes, overflowing";
BMC = 32, "BBD; [#0] ← [#1], imm #2 bytes";
BRC = 33, "BBB; #0 ← #1, imm #2 registers";
JMP = 34, "D; Unconditional, non-linking absolute jump";
JAL = 35, "BD; Copy PC to #0 and unconditional jump [#1 + imm #2]";
JEQ = 36, "BBD; if #0 = #1 → jump imm #2";
JNE = 37, "BBD; if #0 ≠ #1 → jump imm #2";
JLT = 38, "BBD; if #0 < #1 → jump imm #2";
JGT = 39, "BBD; if #0 > #1 → jump imm #2";
JLTU = 40, "BBD; if #0 < #1 → jump imm #2 (unsigned)";
JGTU = 41, "BBD; if #0 > #1 → jump imm #2 (unsigned)";
ECALL = 42, "N; Issue system call";
ADDF = 43, "BBB; #0 ← #1 +. #2";
SUBF = 44, "BBB; #0 ← #1 -. #2";
MULF = 45, "BBB; #0 ← #1 +. #2";
DIRF = 46, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3";
FMAF = 47, "BBBB; #0 ← (#1 * #2) + #3";
NEGF = 48, "BB; #0 ← -#1";
ITF = 49, "BB; #0 ← #1 as float";
FTI = 50, "BB; #0 ← #1 as int";
ADDFI = 51, "BBD; #0 ← #1 +. imm #2";
MULFI = 52, "BBD; #0 ← #1 *. imm #2";
});
#[repr(packed)]
pub struct ParamBBBB(pub u8, pub u8, pub u8, pub u8);
#[repr(packed)]
pub struct ParamBBB(pub u8, pub u8, pub u8);
#[repr(packed)]
pub struct ParamBBDH(pub u8, pub u8, pub u64, pub u16);
#[repr(packed)]
pub struct ParamBBD(pub u8, pub u8, pub u64);
#[repr(packed)]
pub struct ParamBBW(pub u8, pub u8, pub u32);
#[repr(packed)]
pub struct ParamBB(pub u8, pub u8);
#[repr(packed)]
pub struct ParamBD(pub u8, pub u64);
/// # Safety
/// Has to be valid to be decoded from bytecode.
pub unsafe trait ProgramVal {}
unsafe impl ProgramVal for ParamBBBB {}
unsafe impl ProgramVal for ParamBBB {}
unsafe impl ProgramVal for ParamBBDH {}
unsafe impl ProgramVal for ParamBBD {}
unsafe impl ProgramVal for ParamBBW {}
unsafe impl ProgramVal for ParamBB {}
unsafe impl ProgramVal for ParamBD {}
unsafe impl ProgramVal for u64 {}
unsafe impl ProgramVal for u8 {} // Opcode
unsafe impl ProgramVal for () {}
invoke_with_def!(gen_opcodes);

View file

@ -14,7 +14,7 @@
#![cfg_attr(feature = "nightly", feature(fn_align))]
#![warn(missing_docs)]
use mem::{Memory, Address};
use mem::{Address, Memory};
#[cfg(feature = "alloc")]
extern crate alloc;
@ -23,8 +23,8 @@ pub mod mem;
pub mod value;
mod bmc;
mod vmrun;
mod utils;
mod vmrun;
use {bmc::BlockCopier, value::Value};

View file

@ -1,83 +0,0 @@
use hbvm::mem::Address;
use {
hbbytecode::valider::validate,
hbvm::{
mem::{
softpaging::{paging::PageTable, HandlePageFault, PageSize, SoftPagedMem},
MemoryAccessReason,
},
Vm,
},
std::io::{stdin, Read},
};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut prog = vec![];
stdin().read_to_end(&mut prog)?;
if let Err(e) = validate(&prog) {
eprintln!("Program validation error: {e:?}");
return Ok(());
} else {
unsafe {
let mut vm = Vm::<_, 0>::new(
SoftPagedMem::<_, true> {
pf_handler: TestTrapHandler,
program: &prog,
root_pt: Box::into_raw(Default::default()),
icache: Default::default(),
},
Address::new(4),
);
let data = {
let ptr = std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align_unchecked(
4096, 4096,
));
if ptr.is_null() {
panic!("Alloc error tbhl");
}
ptr
};
vm.memory
.map(
data,
Address::new(8192),
hbvm::mem::softpaging::paging::Permission::Write,
PageSize::Size4K,
)
.unwrap();
println!("Program interrupt: {:?}", vm.run());
println!("{:?}", vm.registers);
std::alloc::dealloc(
data,
std::alloc::Layout::from_size_align_unchecked(4096, 4096),
);
vm.memory.unmap(Address::new(8192)).unwrap();
let _ = Box::from_raw(vm.memory.root_pt);
}
}
Ok(())
}
pub fn time() -> u32 {
9
}
#[derive(Default)]
struct TestTrapHandler;
impl HandlePageFault for TestTrapHandler {
fn page_fault(
&mut self,
_: MemoryAccessReason,
_: &mut PageTable,
_: Address,
_: PageSize,
_: *mut u8,
) -> bool {
false
}
}

View file

@ -23,6 +23,18 @@ impl Address {
Self(self.0.saturating_sub(rhs.cast_u64()))
}
/// Wrapping integer addition. Computes self + rhs, wrapping the numeric bounds.
#[inline]
pub fn wrapping_add<T: AddressOp>(self, rhs: T) -> Self {
Self(self.0.wrapping_add(rhs.cast_u64()))
}
/// Wrapping integer subtraction. Computes self + rhs, wrapping the numeric bounds.
#[inline]
pub fn wrapping_sub<T: AddressOp>(self, rhs: T) -> Self {
Self(self.0.wrapping_sub(rhs.cast_u64()))
}
/// Cast or if smaller, truncate to [`usize`]
pub fn truncate_usize(self) -> usize {
self.0 as _

View file

@ -2,11 +2,11 @@
pub mod softpaging;
mod addr;
pub(crate) mod addr;
pub use addr::Address;
use {crate::utils::impl_display, hbbytecode::ProgramVal};
use {crate::utils::impl_display, hbbytecode::BytecodeItem};
/// Load-store memory access
pub trait Memory {
@ -36,13 +36,13 @@ pub trait Memory {
///
/// # Safety
/// - Data read have to be valid
unsafe fn prog_read<T: ProgramVal>(&mut self, addr: Address) -> Option<T>;
unsafe fn prog_read<T: BytecodeItem>(&mut self, addr: Address) -> Option<T>;
/// Read from program memory to exectue
///
/// # Safety
/// - You have to be really sure that these bytes are there, understand?
unsafe fn prog_read_unchecked<T: ProgramVal>(&mut self, addr: Address) -> T;
unsafe fn prog_read_unchecked<T: BytecodeItem>(&mut self, addr: Address) -> T;
}
/// Unhandled load access trap

View file

@ -2,8 +2,6 @@
//!
//! Have fun
use crate::mem::Address;
use {
super::{
bmc::BlockCopier,
@ -11,9 +9,11 @@ use {
value::{Value, ValueVariant},
Vm, VmRunError, VmRunOk,
},
crate::mem::{addr::AddressOp, Address},
core::{cmp::Ordering, mem::size_of, ops},
hbbytecode::{
ParamBB, ParamBBB, ParamBBBB, ParamBBD, ParamBBDH, ParamBBW, ParamBD, ProgramVal,
BytecodeItem, OpA, OpO, OpsRD, OpsRR, OpsRRAH, OpsRRB, OpsRRD, OpsRRH, OpsRRO, OpsRROH,
OpsRRP, OpsRRR, OpsRRRR, OpsRRW,
},
};
@ -70,14 +70,14 @@ where
XOR => self.binary_op::<u64>(ops::BitXor::bitxor),
SL => self.binary_op(|l, r| u64::wrapping_shl(l, r as u32)),
SR => self.binary_op(|l, r| u64::wrapping_shr(l, r as u32)),
SRS => self.binary_op(|l, r| i64::wrapping_shl(l, r as u32)),
SRS => self.binary_op(|l: u64, r| i64::wrapping_shl(l as i64, r as u32) as u64),
CMP => {
// Compare a0 <=> a1
// < → 0
// > → 1
// = → 2
let ParamBBB(tg, a0, a1) = self.decode();
let OpsRRR(tg, a0, a1) = self.decode();
self.write_reg(
tg,
self.read_reg(a0)
@ -89,7 +89,7 @@ where
}
CMPU => {
// Unsigned comparsion
let ParamBBB(tg, a0, a1) = self.decode();
let OpsRRR(tg, a0, a1) = self.decode();
self.write_reg(
tg,
self.read_reg(a0)
@ -101,23 +101,12 @@ where
}
NOT => {
// Logical negation
let ParamBB(tg, a0) = self.decode();
let OpsRR(tg, a0) = self.decode();
self.write_reg(tg, !self.read_reg(a0).cast::<u64>());
}
NEG => {
// Bitwise negation
let ParamBB(tg, a0) = self.decode();
self.write_reg(
tg,
match self.read_reg(a0).cast::<u64>() {
0 => 1_u64,
_ => 0,
},
);
}
DIR => {
// Fused Division-Remainder
let ParamBBBB(dt, rt, a0, a1) = self.decode();
let OpsRRRR(dt, rt, a0, a1) = self.decode();
let a0 = self.read_reg(a0).cast::<u64>();
let a1 = self.read_reg(a1).cast::<u64>();
self.write_reg(dt, a0.checked_div(a1).unwrap_or(u64::MAX));
@ -132,7 +121,7 @@ where
SRI => self.binary_op_ims(u64::wrapping_shr),
SRSI => self.binary_op_ims(i64::wrapping_shr),
CMPI => {
let ParamBBD(tg, a0, imm) = self.decode();
let OpsRRD(tg, a0, imm) = self.decode();
self.write_reg(
tg,
self.read_reg(a0)
@ -142,16 +131,16 @@ where
);
}
CMPUI => {
let ParamBBD(tg, a0, imm) = self.decode();
let OpsRRD(tg, a0, imm) = self.decode();
self.write_reg(tg, self.read_reg(a0).cast::<u64>().cmp(&imm) as i64);
}
CP => {
let ParamBB(tg, a0) = self.decode();
let OpsRR(tg, a0) = self.decode();
self.write_reg(tg, self.read_reg(a0));
}
SWA => {
// Swap registers
let ParamBB(r0, r1) = self.decode();
let OpsRR(r0, r1) = self.decode();
match (r0, r1) {
(0, 0) => (),
(dst, 0) | (0, dst) => self.write_reg(dst, 0_u64),
@ -164,12 +153,16 @@ where
}
}
LI => {
let ParamBD(tg, imm) = self.decode();
let OpsRD(tg, imm) = self.decode();
self.write_reg(tg, imm);
}
LRA => {
let OpsRRO(tg, reg, imm) = self.decode();
self.write_reg(tg, self.rel_addr(reg, imm).get());
}
LD => {
// Load. If loading more than register size, continue on adjecent registers
let ParamBBDH(dst, base, off, count) = self.decode();
let OpsRRAH(dst, base, off, count) = self.decode();
let n: u8 = match dst {
0 => 1,
_ => 0,
@ -181,29 +174,67 @@ where
.as_mut_ptr()
.add(usize::from(dst) + usize::from(n))
.cast(),
usize::from(count).saturating_sub(n.into()),
usize::from(count).wrapping_sub(n.into()),
)?;
}
ST => {
// Store. Same rules apply as to LD
let ParamBBDH(dst, base, off, count) = self.decode();
let OpsRRAH(dst, base, off, count) = self.decode();
self.memory.store(
self.ldst_addr_uber(dst, base, off, count, 0)?,
self.registers.as_ptr().add(usize::from(dst)).cast(),
count.into(),
)?;
}
LDR => {
let OpsRROH(dst, base, off, count) = self.decode();
let n: u8 = match dst {
0 => 1,
_ => 0,
};
self.memory.load(
self.ldst_addr_uber(
dst,
base,
u64::from(off).wrapping_add(self.pc.get()),
count,
n,
)?,
self.registers
.as_mut_ptr()
.add(usize::from(dst) + usize::from(n))
.cast(),
usize::from(count).wrapping_sub(n.into()),
)?;
}
STR => {
let OpsRROH(dst, base, off, count) = self.decode();
self.memory.store(
self.ldst_addr_uber(
dst,
base,
u64::from(off).wrapping_add(self.pc.get()),
count,
0,
)?,
self.registers.as_ptr().add(usize::from(dst)).cast(),
count.into(),
)?;
}
BMC => {
const INS_SIZE: usize = size_of::<OpsRRH>() + 1;
// Block memory copy
match if let Some(copier) = &mut self.copier {
// There is some copier, poll.
copier.poll(&mut self.memory)
} else {
// There is none, make one!
let ParamBBD(src, dst, count) = self.decode();
let OpsRRH(src, dst, count) = self.decode();
// So we are still on BMC on next cycle
self.pc -= size_of::<ParamBBD>() + 1;
self.pc -= INS_SIZE;
self.copier = Some(BlockCopier::new(
Address::new(self.read_reg(src).cast()),
@ -219,12 +250,12 @@ where
// We are done, shift program counter
core::task::Poll::Ready(Ok(())) => {
self.copier = None;
self.pc += size_of::<ParamBBD>() + 1;
self.pc += INS_SIZE;
}
// Error, shift program counter (for consistency)
// and yield error
core::task::Poll::Ready(Err(e)) => {
self.pc += size_of::<ParamBBD>() + 1;
self.pc += INS_SIZE;
return Err(e.into());
}
// Not done yet, proceed to next cycle
@ -233,7 +264,7 @@ where
}
BRC => {
// Block register copy
let ParamBBB(src, dst, count) = self.decode();
let OpsRRB(src, dst, count) = self.decode();
if src.checked_add(count).is_none() || dst.checked_add(count).is_none() {
return Err(VmRunError::RegOutOfBounds);
}
@ -244,21 +275,25 @@ where
usize::from(count),
);
}
JMP => self.pc = Address::new(self.decode::<u64>()),
JMP => self.pc = Address::new(self.decode::<OpA>()),
JMPR => self.pc = self.pc.wrapping_add(self.decode::<OpO>()),
JAL => {
// Jump and link. Save PC after this instruction to
// specified register and jump to reg + offset.
let ParamBBD(save, reg, offset) = self.decode();
let OpsRRW(save, reg, offset) = self.decode();
self.write_reg(save, self.pc.get());
self.pc =
Address::new(self.read_reg(reg).cast::<u64>().saturating_add(offset));
self.pc = Address::new(
self.read_reg(reg).cast::<u64>().wrapping_add(offset.into()),
);
}
// Conditional jumps, jump only to immediates
JEQ => self.cond_jmp::<u64>(Ordering::Equal),
JNE => {
let ParamBBD(a0, a1, jt) = self.decode();
let OpsRRP(a0, a1, ja) = self.decode();
if self.read_reg(a0).cast::<u64>() != self.read_reg(a1).cast::<u64>() {
self.pc = Address::new(jt);
self.pc = Address::new(
((self.pc.get() as i64).wrapping_add(ja as i64)) as u64,
)
}
}
JLT => self.cond_jmp::<u64>(Ordering::Less),
@ -278,14 +313,14 @@ where
SUBF => self.binary_op::<f64>(ops::Sub::sub),
MULF => self.binary_op::<f64>(ops::Mul::mul),
DIRF => {
let ParamBBBB(dt, rt, a0, a1) = self.decode();
let OpsRRRR(dt, rt, a0, a1) = self.decode();
let a0 = self.read_reg(a0).cast::<f64>();
let a1 = self.read_reg(a1).cast::<f64>();
self.write_reg(dt, a0 / a1);
self.write_reg(rt, a0 % a1);
}
FMAF => {
let ParamBBBB(dt, a0, a1, a2) = self.decode();
let OpsRRRR(dt, a0, a1, a2) = self.decode();
self.write_reg(
dt,
self.read_reg(a0).cast::<f64>() * self.read_reg(a1).cast::<f64>()
@ -293,15 +328,15 @@ where
);
}
NEGF => {
let ParamBB(dt, a0) = self.decode();
let OpsRR(dt, a0) = self.decode();
self.write_reg(dt, -self.read_reg(a0).cast::<f64>());
}
ITF => {
let ParamBB(dt, a0) = self.decode();
let OpsRR(dt, a0) = self.decode();
self.write_reg(dt, self.read_reg(a0).cast::<i64>() as f64);
}
FTI => {
let ParamBB(dt, a0) = self.decode();
let OpsRR(dt, a0) = self.decode();
self.write_reg(dt, self.read_reg(a0).cast::<f64>() as i64);
}
ADDFI => self.binary_op_imm::<f64>(ops::Add::add),
@ -321,7 +356,7 @@ where
/// Decode instruction operands
#[inline(always)]
unsafe fn decode<T: ProgramVal>(&mut self) -> T {
unsafe fn decode<T: BytecodeItem>(&mut self) -> T {
let pc1 = self.pc + 1_u64;
let data = self.memory.prog_read_unchecked::<T>(pc1 as _);
self.pc += 1 + size_of::<T>();
@ -331,7 +366,7 @@ where
/// Perform binary operating over two registers
#[inline(always)]
unsafe fn binary_op<T: ValueVariant>(&mut self, op: impl Fn(T, T) -> T) {
let ParamBBB(tg, a0, a1) = self.decode();
let OpsRRR(tg, a0, a1) = self.decode();
self.write_reg(
tg,
op(self.read_reg(a0).cast::<T>(), self.read_reg(a1).cast::<T>()),
@ -341,7 +376,7 @@ where
/// Perform binary operation over register and immediate
#[inline(always)]
unsafe fn binary_op_imm<T: ValueVariant>(&mut self, op: impl Fn(T, T) -> T) {
let ParamBBD(tg, reg, imm) = self.decode();
let OpsRRD(tg, reg, imm) = self.decode();
self.write_reg(
tg,
op(self.read_reg(reg).cast::<T>(), Value::from(imm).cast::<T>()),
@ -351,36 +386,44 @@ where
/// Perform binary operation over register and shift immediate
#[inline(always)]
unsafe fn binary_op_ims<T: ValueVariant>(&mut self, op: impl Fn(T, u32) -> T) {
let ParamBBW(tg, reg, imm) = self.decode();
let OpsRRW(tg, reg, imm) = self.decode();
self.write_reg(tg, op(self.read_reg(reg).cast::<T>(), imm));
}
/// Jump at `#3` if ordering on `#0 <=> #1` is equal to expected
/// Compute address relative to program counter an register value
#[inline(always)]
fn rel_addr(&self, reg: u8, imm: impl AddressOp) -> Address {
self.pc
.wrapping_add(self.read_reg(reg).cast::<u64>())
.wrapping_add(imm)
}
/// Jump at `PC + #3` if ordering on `#0 <=> #1` is equal to expected
#[inline(always)]
unsafe fn cond_jmp<T: ValueVariant + Ord>(&mut self, expected: Ordering) {
let ParamBBD(a0, a1, ja) = self.decode();
let OpsRRP(a0, a1, ja) = self.decode();
if self
.read_reg(a0)
.cast::<T>()
.cmp(&self.read_reg(a1).cast::<T>())
== expected
{
self.pc = Address::new(ja);
self.pc = Address::new(((self.pc.get() as i64).wrapping_add(ja as i64)) as u64);
}
}
/// Read register
#[inline(always)]
unsafe fn read_reg(&self, n: u8) -> Value {
*self.registers.get_unchecked(n as usize)
fn read_reg(&self, n: u8) -> Value {
unsafe { *self.registers.get_unchecked(n as usize) }
}
/// Write a register.
/// Writing to register 0 is no-op.
#[inline(always)]
unsafe fn write_reg(&mut self, n: u8, value: impl Into<Value>) {
fn write_reg(&mut self, n: u8, value: impl Into<Value>) {
if n != 0 {
*self.registers.get_unchecked_mut(n as usize) = value.into();
unsafe { *self.registers.get_unchecked_mut(n as usize) = value.into() };
}
}

7
hbxrt/Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "hbxrt"
version = "0.1.0"
edition = "2021"
[dependencies]
hbvm.path = "../hbvm"

11
hbxrt/src/main.rs Normal file
View file

@ -0,0 +1,11 @@
use std::io::{stdin, Read};
/// Holey Bytes Experimental Runtime
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut prog = vec![];
stdin().read_to_end(&mut prog)?;
eprintln!("WARNING! Bytecode valider has not been yet implemented and running program can lead to undefiend behaviour.");
Ok(())
}

73
spec.md
View file

@ -148,6 +148,12 @@
|:------:|:----:|:--------------:|
| 29 | LI | Load immediate |
### Load relative address
- Type BBW
| Opcode | Name | Action |
|:------:|:----:|:-----------------------:|
| 30 | LRA | `#0 ← #1 + imm #2 + PC` |
## Memory operations
- Type BBDH
- If loaded/store value exceeds one register size, continue accessing following registers
@ -155,8 +161,15 @@
### Load / Store
| Opcode | Name | Action |
|:------:|:----:|:---------------------------------------:|
| 30 | LD | `#0 ← [#1 + imm #3], copy imm #4 bytes` |
| 31 | ST | `[#1 + imm #3] ← #0, copy imm #4 bytes` |
| 31 | LD | `#0 ← [#1 + imm #2], copy imm #3 bytes` |
| 32 | ST | `[#1 + imm #2] ← #0, copy imm #3 bytes` |
### PC relative Load / Store
- Type BBDW
| Opcode | Name | Action |
|:------:|:----:|:--------------------------------------------:|
| 33 | LDR | `#0 ← [#1 + imm #2 + PC], copy imm #3 bytes` |
| 34 | STR | `[#1 + imm #2 + PC] ← #0, copy imm #3 bytes` |
## Block copy
- Block copy source and target can overlap
@ -166,7 +179,7 @@
| Opcode | Name | Action |
|:------:|:----:|:--------------------------------:|
| 32 | BMC | `[#1] ← [#0], copy imm #2 bytes` |
| 35 | BMC | `[#1] ← [#0], copy imm #2 bytes` |
### Register copy
- Type BBB
@ -174,42 +187,44 @@
| Opcode | Name | Action |
|:------:|:----:|:--------------------------------:|
| 33 | BRC | `#1 ← #0, copy imm #2 registers` |
| 36 | BRC | `#1 ← #0, copy imm #2 registers` |
## Control flow
### Unconditional jump
- Type D
| Opcode | Name | Action |
|:------:|:----:|:-------------------------------:|
| 34 | JMP | Unconditional, non-linking jump |
| Opcode | Name | Action |
|:------:|:----:|:-------------------------------------------:|
| 37 | JMP | Unconditional, non-linking jump |
| 38 | JMPR | Jump at address relative to program counter |
### Unconditional linking jump
- Type BBD
| Opcode | Name | Action |
|:------:|:----:|:--------------------------------------------------:|
| 35 | JAL | Save PC past JAL to `#0` and jump at `#1 + imm #2` |
| Opcode | Name | Action |
|:------:|:----:|:-------------------------------------------------------:|
| 39 | JAL | Save PC past JAL to `#0` and jump at `#1 + imm #2` |
| 40 | JALR | Save PC past JAL to `#0` and jump at `#1 + imm #2 + PC` |
### Conditional jumps
- Type BBD
- Jump at `imm #2` if `#0 <op> #1`
- Type BBH
- Jump at `PC + imm #2` if `#0 <op> #1`
| Opcode | Name | Comparsion |
|:------:|:----:|:------------:|
| 36 | JEQ | = |
| 37 | JNE | ≠ |
| 38 | JLT | < (signed) |
| 39 | JGT | > (signed) |
| 40 | JLTU | < (unsigned) |
| 41 | JGTU | > (unsigned) |
| 41 | JEQ | = |
| 42 | JNE | ≠ |
| 43 | JLT | < (signed) |
| 44 | JGT | > (signed) |
| 45 | JLTU | < (unsigned) |
| 46 | JGTU | > (unsigned) |
### Environment call
- Type N
| Opcode | Name | Action |
|:------:|:-----:|:-------------------------------------:|
| 42 | ECALL | Cause an trap to the host environment |
| 47 | ECALL | Cause an trap to the host environment |
## Floating point operations
- Type BBB
@ -217,29 +232,29 @@
| Opcode | Name | Action |
|:------:|:----:|:--------------:|
| 43 | ADDF | Addition |
| 44 | SUBF | Subtraction |
| 45 | MULF | Multiplication |
| 48 | ADDF | Addition |
| 49 | SUBF | Subtraction |
| 50 | MULF | Multiplication |
### Division-remainder
- Type BBBB
| Opcode | Name | Action |
|:------:|:----:|:-------------------------:|
| 46 | DIRF | Same as for integer `DIR` |
| 51 | DIRF | Same as for integer `DIR` |
### Fused Multiply-Add
- Type BBBB
| Opcode | Name | Action |
|:------:|:----:|:---------------------:|
| 47 | FMAF | `#0 ← (#1 * #2) + #3` |
| 52 | FMAF | `#0 ← (#1 * #2) + #3` |
### Negation
- Type BB
| Opcode | Name | Action |
|:------:|:----:|:----------:|
| 48 | NEGF | `#0 ← -#1` |
| 53 | NEGF | `#0 ← -#1` |
### Conversion
- Type BB
@ -248,8 +263,8 @@
| Opcode | Name | Action |
|:------:|:----:|:------------:|
| 49 | ITF | Int to Float |
| 50 | FTI | Float to Int |
| 54 | ITF | Int to Float |
| 55 | FTI | Float to Int |
## Floating point immediate operations
- Type BBD
@ -257,8 +272,8 @@
| Opcode | Name | Action |
|:------:|:-----:|:--------------:|
| 51 | ADDFI | Addition |
| 52 | MULFI | Multiplication |
| 56 | ADDFI | Addition |
| 57 | MULFI | Multiplication |
# Registers
- There is 255 registers + one zero register (with index 0)