diff --git a/Cargo.lock b/Cargo.lock index 0ca3e82..ef2eeb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,6 +13,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "cfg-if" version = "1.0.0" @@ -20,8 +26,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "compiler" -version = "0.1.0" +name = "delegate" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d358e0ec5c59a5e1603b933def447096886121660fc680dc1e64a0753981fe3c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "hashbrown" @@ -32,12 +51,39 @@ dependencies = [ "ahash", ] +[[package]] +name = "hbasm" +version = "0.1.0" +dependencies = [ + "hbbytecode", + "lasso", + "logos", + "paste", +] + +[[package]] +name = "hbbytecode" +version = "0.1.0" + [[package]] name = "hbvm" version = "0.1.0" dependencies = [ + "delegate", "hashbrown", + "hbbytecode", "log", + "paste", + "static_assertions", +] + +[[package]] +name = "lasso" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" +dependencies = [ + "hashbrown", ] [[package]] @@ -49,12 +95,108 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "logos" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c000ca4d908ff18ac99b93a062cb8958d331c3220719c52e77cb19cc6ac5d2c1" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc487311295e0002e452025d6b580b77bb17286de87b57138f3b5db711cded68" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn 2.0.18", +] + +[[package]] +name = "logos-derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbfc0d229f1f42d790440136d941afd806bc9e949e2bcb8faa813b0f00d1267e" +dependencies = [ + "logos-codegen", +] + [[package]] name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "paste" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" + +[[package]] +name = "proc-macro2" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" + [[package]] name = "version_check" version = "0.9.4" diff --git a/Cargo.toml b/Cargo.toml index 2d921c2..7fc1ade 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["hbvm", "compiler"] +members = ["hbasm", "hbbytecode", "hbvm"] diff --git a/DESIGN_DOC.md b/DESIGN_DOC.md deleted file mode 100644 index 5face2e..0000000 --- a/DESIGN_DOC.md +++ /dev/null @@ -1,28 +0,0 @@ -# Math operations -``` -MATH_OP - Add - Sub - Mul - Div - Mod -``` -``` -MATH_TYPE - Unsigned - Signed - FloatingPoint -``` - -``` -MATH_OP_SIDES - Register Constant - Register Register - Constant Constant - Constant Register -``` -`[MATH_OP] [MATH_OP_SIDES] [MATH_TYPE] [IMM_LHS] [IMM_RHS] [REG]` - - - - diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml deleted file mode 100644 index 41b7a45..0000000 --- a/compiler/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "compiler" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] diff --git a/compiler/src/main.rs b/compiler/src/main.rs deleted file mode 100644 index 8f14d17..0000000 --- a/compiler/src/main.rs +++ /dev/null @@ -1,5 +0,0 @@ -fn main() { - let prog = "load 1, A0 - jump 0"; - println!("Hello, world!"); -} diff --git a/hbasm/Cargo.toml b/hbasm/Cargo.toml new file mode 100644 index 0000000..1ab3f41 --- /dev/null +++ b/hbasm/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "hbasm" +version = "0.1.0" +edition = "2021" + +[dependencies] +hbbytecode = { path = "../hbbytecode" } +lasso = "0.7" +paste = "1.0" + +[dependencies.logos] +version = "0.13" +default-features = false +features = ["export_derive"] diff --git a/hbasm/src/lib.rs b/hbasm/src/lib.rs new file mode 100644 index 0000000..3313693 --- /dev/null +++ b/hbasm/src/lib.rs @@ -0,0 +1,269 @@ +use std::collections::HashMap; +use { + lasso::{Rodeo, Spur}, + logos::{Lexer, Logos, Span}, + std::fmt::{Display, Formatter}, +}; + +macro_rules! tokendef { + ($($opcode:literal),* $(,)?) => { + paste::paste! { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] + #[logos(extras = Rodeo)] + #[logos(skip r"[ \t\f]+")] + #[logos(skip r"-- .*")] + pub enum Token { + $(#[token($opcode, |_| hbbytecode::opcode::[<$opcode:upper>])])* + OpCode(u8), + + #[regex("[0-9]+", |l| l.slice().parse().ok())] + #[regex( + "-[0-9]+", + |lexer| { + Some(u64::from_ne_bytes(lexer.slice().parse::().ok()?.to_ne_bytes())) + }, + )] Integer(u64), + + #[regex( + "r[0-9]+", + |lexer| match lexer.slice()[1..].parse() { + Ok(n) => Some(n), + _ => None + }, + )] Register(u8), + + #[regex( + r"\p{XID_Start}\p{XID_Continue}*:", + |lexer| lexer.extras.get_or_intern(&lexer.slice()[..lexer.slice().len() - 1]), + )] Label(Spur), + + #[regex( + r"\p{XID_Start}\p{XID_Continue}*", + |lexer| lexer.extras.get_or_intern(lexer.slice()), + )] Symbol(Spur), + + #[token("\n")] + #[token(";")] ISep, + #[token(",")] PSep, + } + } + }; +} + +#[rustfmt::skip] +tokendef![ + "nop", "add", "sub", "mul", "and", "or", "xor", "sl", "sr", "srs", "cmp", "cmpu", + "dir", "neg", "not", "addi", "muli", "andi", "ori", "xori", "sli", "sri", "srsi", + "cmpi", "cmpui", "cp", "swa", "li", "ld", "st", "bmc", "brc", "jmp", "jeq", "jne", + "jlt", "jgt", "jltu", "jgtu", "ecall", "addf", "mulf", "dirf", "addfi", "mulfi", +]; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ErrorKind { + UnexpectedToken, + InvalidToken, + UnexpectedEnd, + InvalidSymbol, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Error { + pub kind: ErrorKind, + pub span: Span, +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Error {:?} at {:?}", self.kind, self.span) + } +} + +impl std::error::Error for Error {} + +macro_rules! expect_matches { + ($self:expr, $($pat:pat),* $(,)?) => {$( + let $pat = $self.next()? + else { return Err(ErrorKind::UnexpectedToken) }; + )*} +} + +pub fn assembly(code: &str, buf: &mut Vec) -> Result<(), Error> { + struct Assembler<'a> { + lexer: Lexer<'a, Token>, + buf: &'a mut Vec, + label_map: HashMap, + to_sub_label: HashMap, + } + + impl<'a> Assembler<'a> { + fn next(&mut self) -> Result { + match self.lexer.next() { + Some(Ok(t)) => Ok(t), + Some(Err(())) => Err(ErrorKind::InvalidToken), + None => Err(ErrorKind::UnexpectedEnd), + } + } + + fn assemble(&mut self) -> Result<(), ErrorKind> { + use hbbytecode::opcode::*; + loop { + match self.lexer.next() { + Some(Ok(Token::OpCode(op))) => { + self.buf.push(op); + match op { + NOP | ECALL => Ok(()), + DIR | DIRF => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Register(r2), + Token::PSep, + Token::Register(r3), + ); + self.buf.extend([r0, r1, r2, r3]); + Ok(()) + } + ADD..=CMPU | ADDF..=MULF => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Register(r2), + ); + self.buf.extend([r0, r1, r2]); + Ok(()) + } + BRC => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Integer(count), + ); + self.buf.extend([ + r0, + r1, + u8::try_from(count).map_err(|_| ErrorKind::UnexpectedToken)?, + ]); + Ok(()) + } + NEG..=NOT | CP..=SWA => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + ); + self.buf.extend([r0, r1]); + Ok(()) + } + LI | JMP => { + expect_matches!(self, Token::Register(r0), Token::PSep); + self.buf.push(r0); + self.insert_imm()?; + Ok(()) + } + ADDI..=CMPUI | BMC | JEQ..=JGTU | ADDFI..=MULFI => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + ); + self.buf.extend([r0, r1]); + self.insert_imm()?; + Ok(()) + } + LD..=ST => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Integer(offset), + Token::PSep, + Token::Integer(len), + ); + self.buf.extend([r0, r1]); + self.buf.extend(offset.to_le_bytes()); + self.buf.extend( + u16::try_from(len) + .map_err(|_| ErrorKind::InvalidToken)? + .to_le_bytes(), + ); + Ok(()) + } + _ => unreachable!(), + }?; + match self.next() { + Ok(Token::ISep) => (), + Ok(_) => return Err(ErrorKind::UnexpectedToken), + Err(ErrorKind::UnexpectedEnd) => return Ok(()), + Err(e) => return Err(e), + } + } + Some(Ok(Token::Label(lbl))) => { + self.label_map.insert(lbl, self.buf.len() as u64); + } + Some(Ok(Token::ISep)) => (), + Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken), + Some(Err(())) => return Err(ErrorKind::InvalidToken), + None => return Ok(()), + } + } + } + + fn link_local_syms(&mut self) -> Result<(), ErrorKind> { + for (ix, sym) in &self.to_sub_label { + self.label_map + .get(sym) + .ok_or(ErrorKind::InvalidSymbol)? + .to_le_bytes() + .iter() + .enumerate() + .for_each(|(i, b)| { + self.buf[ix + i] = *b; + }); + } + + Ok(()) + } + + fn insert_imm(&mut self) -> Result<(), ErrorKind> { + let imm = match self.next()? { + Token::Integer(i) => i.to_le_bytes(), + Token::Symbol(s) => { + self.to_sub_label.insert(self.buf.len(), s); + [0; 8] + } + _ => return Err(ErrorKind::UnexpectedToken), + }; + self.buf.extend(imm); + Ok(()) + } + } + + let mut asm = Assembler { + lexer: Token::lexer(code), + label_map: Default::default(), + to_sub_label: Default::default(), + buf, + }; + + asm.assemble().map_err(|kind| Error { + kind, + span: asm.lexer.span(), + })?; + + asm.link_local_syms() + .map_err(|kind| Error { kind, span: 0..0 }) +} diff --git a/hbasm/src/main.rs b/hbasm/src/main.rs new file mode 100644 index 0000000..6eae52f --- /dev/null +++ b/hbasm/src/main.rs @@ -0,0 +1,21 @@ +use std::{ + error::Error, + io::{stdin, stdout, Read, Write}, +}; + +fn main() -> Result<(), Box> { + let mut code = String::new(); + stdin().read_to_string(&mut code)?; + + let mut buf = vec![]; + if let Err(e) = hbasm::assembly(&code, &mut buf) { + eprintln!( + "Error {:?} at {:?} (`{}`)", + e.kind, + e.span.clone(), + &code[e.span], + ); + } + stdout().write_all(&buf)?; + Ok(()) +} diff --git a/hbbytecode/Cargo.toml b/hbbytecode/Cargo.toml new file mode 100644 index 0000000..fa9f970 --- /dev/null +++ b/hbbytecode/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "hbbytecode" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/hbbytecode/hbbytecode.h b/hbbytecode/hbbytecode.h new file mode 100644 index 0000000..996e99d --- /dev/null +++ b/hbbytecode/hbbytecode.h @@ -0,0 +1,60 @@ +/* HoleyBytes Bytecode representation in C + * Requires C23 compiler or better + */ + +#pragma once +#include +#include + +typedef enum hbbc_Opcode: uint8_t { + hbbc_Op_NOP, hbbc_Op_ADD, hbbc_Op_MUL, hbbc_Op_AND, hbbc_Op_OR, hbbc_Op_XOR, hbbc_Op_SL, + hbbc_Op_SR, hbbc_Op_SRS, hbbc_Op_CMP, hbbc_Op_CMPU, hbbc_Op_DIR, hbbc_Op_NEG, hbbc_Op_NOT, + hbbc_Op_ADDI, hbbc_Op_MULI, hbbc_Op_ANDI, hbbc_Op_ORI, hbbc_Op_XORI, hbbc_Op_SLI, hbbc_Op_SRI, + hbbc_Op_SRSI, hbbc_Op_CMPI, hbbc_Op_CMPUI, hbbc_Op_CP, hbbc_Op_SWA, hbbc_Op_LI, hbbc_Op_LD, + hbbc_Op_ST, hbbc_Op_BMC, hbbc_Op_BRC, hbbc_Op_JMP, hbbc_Op_JEQ, hbbc_Op_JNE, hbbc_Op_JLT, + hbbc_Op_JGT, hbbc_Op_JLTU, hbbc_Op_JGTU, hbbc_Op_ECALL, hbbc_Op_ADDF, hbbc_Op_MULF, + hbbc_Op_DIRF, hbbc_Op_ADDFI, hbbc_Op_MULFI, +} hbbc_Opcode; + +static_assert(sizeof(hbbc_Opcode) == 1); + +#pragma pack(push, 1) +typedef struct hbbc_ParamBBBB + { uint8_t _0; uint8_t _1; uint8_t _2; uint8_t _3; } + hbbc_ParamBBBB; + static_assert(sizeof(hbbc_ParamBBBB) == 4); + +typedef struct hbbc_ParamBBB + { uint8_t _0; uint8_t _1; uint8_t _2; } + hbbc_ParamBBB; + static_assert(sizeof(hbbc_ParamBBB) == 3); + +typedef struct hbbc_ParamBBDH + { uint8_t _0; uint8_t _1; uint64_t _2; uint16_t _3; } + hbbc_ParamBBDH; + static_assert(sizeof(hbbc_ParamBBDH) == 12); + +typedef struct hbbc_ParamBBDB + { uint8_t _0; uint8_t _1; uint64_t _2; uint8_t _3; } + hbbc_ParamBBDB; + static_assert(sizeof(hbbc_ParamBBDB) == 11); + +typedef struct hbbc_ParamBBD + { uint8_t _0; uint8_t _1; uint64_t _2; } + hbbc_ParamBBD; + static_assert(sizeof(hbbc_ParamBBD) == 10); + +typedef struct hbbc_ParamBB + { uint8_t _0; uint8_t _1; } + hbbc_ParamBB; + static_assert(sizeof(hbbc_ParamBB) == 2); + +typedef struct hbbc_ParamBD + { uint8_t _0; uint64_t _1; } + hbbc_ParamBD; + static_assert(sizeof(hbbc_ParamBD) == 9); + +typedef uint64_t hbbc_ParamD; + static_assert(sizeof(hbbc_ParamD) == 8); + +#pragma pack(pop) diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs new file mode 100644 index 0000000..78298bb --- /dev/null +++ b/hbbytecode/src/lib.rs @@ -0,0 +1,106 @@ +#![no_std] + +macro_rules! constmod { + ($vis:vis $mname:ident($repr:ty) { + $(#![doc = $mdoc:literal])? + $($cname:ident = $val:expr $(,$doc:literal)?;)* + }) => { + $(#[doc = $mdoc])? + $vis mod $mname { + $( + $(#[doc = $doc])? + pub const $cname: $repr = $val; + )* + } + }; +} + +constmod!(pub opcode(u8) { + //! Opcode constant module + + NOP = 0, "N; Do nothing"; + + ADD = 1, "BBB; #0 ← #1 + #2"; + SUB = 2, "BBB; #0 ← #1 - #2"; + MUL = 3, "BBB; #0 ← #1 × #2"; + AND = 4, "BBB; #0 ← #1 & #2"; + OR = 5, "BBB; #0 ← #1 | #2"; + XOR = 6, "BBB; #0 ← #1 ^ #2"; + SL = 7, "BBB; #0 ← #1 « #2"; + SR = 8, "BBB; #0 ← #1 » #2"; + SRS = 9, "BBB; #0 ← #1 » #2 (signed)"; + CMP = 10, "BBB; #0 ← #1 <=> #2"; + CMPU = 11, "BBB; #0 ← #1 <=> #2 (unsigned)"; + DIR = 12, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; + NEG = 13, "BB; #0 ← ~#1"; + NOT = 14, "BB; #0 ← !#1"; + + ADDI = 15, "BBD; #0 ← #1 + imm #2"; + MULI = 16, "BBD; #0 ← #1 × imm #2"; + ANDI = 17, "BBD; #0 ← #1 & imm #2"; + ORI = 18, "BBD; #0 ← #1 | imm #2"; + XORI = 19, "BBD; #0 ← #1 ^ imm #2"; + SLI = 20, "BBD; #0 ← #1 « imm #2"; + SRI = 21, "BBD; #0 ← #1 » imm #2"; + SRSI = 22, "BBD; #0 ← #1 » imm #2 (signed)"; + CMPI = 23, "BBD; #0 ← #1 <=> imm #2"; + CMPUI = 24, "BBD; #0 ← #1 <=> imm #2 (unsigned)"; + + CP = 25, "BB; Copy #0 ← #1"; + SWA = 26, "BB; Swap #0 and #1"; + LI = 27, "BD; #0 ← imm #1"; + LD = 28, "BBDB; #0 ← [#1 + imm #3], imm #4 bytes, overflowing"; + ST = 29, "BBDB; [#1 + imm #3] ← #0, imm #4 bytes, overflowing"; + BMC = 30, "BBD; [#0] ← [#1], imm #2 bytes"; + BRC = 31, "BBB; #0 ← #1, imm #2 registers"; + + JMP = 32, "BD; Unconditional jump [#0 + imm #1]"; + JEQ = 33, "BBD; if #0 = #1 → jump imm #2"; + JNE = 34, "BBD; if #0 ≠ #1 → jump imm #2"; + JLT = 35, "BBD; if #0 < #1 → jump imm #2"; + JGT = 36, "BBD; if #0 > #1 → jump imm #2"; + JLTU = 37, "BBD; if #0 < #1 → jump imm #2 (unsigned)"; + JGTU = 38, "BBD; if #0 > #1 → jump imm #2 (unsigned)"; + ECALL = 39, "N; Issue system call"; + + ADDF = 40, "BBB; #0 ← #1 +. #2"; + MULF = 41, "BBB; #0 ← #1 +. #2"; + DIRF = 42, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; + + ADDFI = 43, "BBD; #0 ← #1 +. imm #2"; + MULFI = 44, "BBD; #0 ← #1 *. imm #2"; +}); + +#[repr(packed)] +pub struct ParamBBBB(pub u8, pub u8, pub u8, pub u8); + +#[repr(packed)] +pub struct ParamBBB(pub u8, pub u8, pub u8); + +#[repr(packed)] +pub struct ParamBBDH(pub u8, pub u8, pub u64, pub u16); + +#[repr(packed)] +pub struct ParamBBDB(pub u8, pub u8, pub u64, pub u8); + +#[repr(packed)] +pub struct ParamBBD(pub u8, pub u8, pub u64); + +#[repr(packed)] +pub struct ParamBB(pub u8, pub u8); + +#[repr(packed)] +pub struct ParamBD(pub u8, pub u64); + +/// # Safety +/// Has to be valid to be decoded from bytecode. +pub unsafe trait OpParam {} +unsafe impl OpParam for ParamBBBB {} +unsafe impl OpParam for ParamBBB {} +unsafe impl OpParam for ParamBBDB {} +unsafe impl OpParam for ParamBBDH {} +unsafe impl OpParam for ParamBBD {} +unsafe impl OpParam for ParamBB {} +unsafe impl OpParam for ParamBD {} +unsafe impl OpParam for u64 {} +unsafe impl OpParam for () {} diff --git a/hbvm/Cargo.toml b/hbvm/Cargo.toml index bac4d36..739c62b 100644 --- a/hbvm/Cargo.toml +++ b/hbvm/Cargo.toml @@ -3,6 +3,13 @@ name = "hbvm" version = "0.1.0" edition = "2021" +[profile.release] +lto = true + [dependencies] -log = "*" -hashbrown = "0.13.2" +delegate = "0.9" +hashbrown = "0.13" +hbbytecode.path = "../hbbytecode" +log = "0.4" +paste = "1.0" +static_assertions = "1.0" diff --git a/hbvm/src/bytecode/mod.rs b/hbvm/src/bytecode/mod.rs deleted file mode 100644 index 414c60d..0000000 --- a/hbvm/src/bytecode/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod ops; -pub mod types; diff --git a/hbvm/src/bytecode/ops.rs b/hbvm/src/bytecode/ops.rs deleted file mode 100644 index 2b23ae8..0000000 --- a/hbvm/src/bytecode/ops.rs +++ /dev/null @@ -1,68 +0,0 @@ -#[repr(u8)] -pub enum Operations { - NOP = 0, - - ADD = 1, - SUB = 2, - MUL = 3, - DIV = 4, - MOD = 5, - - AND = 6, - OR = 7, - XOR = 8, - NOT = 9, - - // LOADs a memory address/constant into a register - LOAD = 15, - // STOREs a register/constant into a memory address - STORE = 16, - - MapPage = 17, - UnmapPage = 18, - - // SHIFT LEFT 16 A0 - Shift = 20, - - JUMP = 100, - JumpCond = 101, - RET = 103, - - EnviromentCall = 255, -} - -pub enum PageMapTypes { - // Have the host make a new VMPage - VMPage = 0, - // Ask the host to map a RealPage into memory - RealPage = 1, -} - -pub enum MathOpSubTypes { - Unsigned = 0, - Signed = 1, - FloatingPoint = 2, -} - -pub enum MathOpSides { - RegisterConstant = 0, - RegisterRegister = 1, - ConstantConstant = 2, - ConstantRegister = 3, -} - -pub enum RWSubTypes { - AddrToReg = 0, - RegToAddr, - ConstToReg, - ConstToAddr, -} - -pub enum JumpConditionals { - Equal = 0, - NotEqual = 1, - LessThan = 2, - LessThanOrEqualTo = 3, - GreaterThan = 4, - GreaterThanOrEqualTo = 5, -} diff --git a/hbvm/src/bytecode/types.rs b/hbvm/src/bytecode/types.rs deleted file mode 100644 index 1e98c23..0000000 --- a/hbvm/src/bytecode/types.rs +++ /dev/null @@ -1,9 +0,0 @@ - -pub const CONST_U8: u8 = 0x00; -pub const CONST_I8: i8 = 0x01; - -pub const CONST_U64: u8 = 0x02; -pub const CONST_I64: u8 = 0x03; -pub const CONST_F64: u8 = 0x04; - -pub const ADDRESS: u8 = 0x05; diff --git a/hbvm/src/engine/call_stack.rs b/hbvm/src/engine/call_stack.rs deleted file mode 100644 index 39fee04..0000000 --- a/hbvm/src/engine/call_stack.rs +++ /dev/null @@ -1,6 +0,0 @@ -use alloc::vec::Vec; - -pub type CallStack = Vec; -pub struct FnCall { - pub ret: usize, -} diff --git a/hbvm/src/engine/config.rs b/hbvm/src/engine/config.rs deleted file mode 100644 index 504d663..0000000 --- a/hbvm/src/engine/config.rs +++ /dev/null @@ -1,13 +0,0 @@ -pub struct EngineConfig { - pub call_stack_depth: usize, - pub quantum: u32, -} - -impl EngineConfig { - pub fn default() -> Self { - Self { - call_stack_depth: 32, - quantum: 0, - } - } -} diff --git a/hbvm/src/engine/enviroment_calls.rs b/hbvm/src/engine/enviroment_calls.rs deleted file mode 100644 index 27289d6..0000000 --- a/hbvm/src/engine/enviroment_calls.rs +++ /dev/null @@ -1,3 +0,0 @@ -use super::Engine; - -pub type EnviromentCall = fn(&mut Engine) -> Result<&mut Engine, u64>; diff --git a/hbvm/src/engine/mod.rs b/hbvm/src/engine/mod.rs deleted file mode 100644 index 4370689..0000000 --- a/hbvm/src/engine/mod.rs +++ /dev/null @@ -1,102 +0,0 @@ -use log::info; - -pub mod call_stack; -pub mod config; -pub mod enviroment_calls; -pub mod regs; -#[cfg(test)] -pub mod tests; - -use { - self::call_stack::CallStack, - crate::{engine::enviroment_calls::EnviromentCall, memory, HaltStatus, RuntimeErrors}, - alloc::vec::Vec, - config::EngineConfig, - log::trace, - regs::Registers, -}; - -// pub const PAGE_SIZE: usize = 8192; - -pub struct RealPage { - pub ptr: *mut u8, -} - -#[derive(Debug, Clone, Copy)] -pub struct VMPage { - pub data: [u8; 8192], -} -impl Default for VMPage { - fn default() -> Self { - Self { - data: [0; 4096 * 2], - } - } -} - -pub enum Page { - VMPage(VMPage), - RealPage(RealPage), -} -impl Page { - pub fn data(&self) -> [u8; 4096 * 2] { - match self { - Page::VMPage(vmpage) => vmpage.data, - Page::RealPage(_) => { - unimplemented!("Memmapped hw page not yet supported") - } - } - } -} - -pub fn empty_enviroment_call(engine: &mut Engine) -> Result<&mut Engine, u64> { - trace!("Registers {:?}", engine.registers); - Err(0) -} - -pub struct Engine { - pub pc: usize, - pub program: Vec, - pub registers: Registers, - pub config: EngineConfig, - - /// BUG: This DOES NOT account for overflowing - pub last_timer_count: u32, - pub timer_callback: Option u32>, - pub memory: memory::Memory, - pub enviroment_call_table: [Option; 256], - pub call_stack: CallStack, -} - -impl Engine { - pub fn set_timer_callback(&mut self, func: fn() -> u32) { - self.timer_callback = Some(func); - } -} - -impl Engine { - pub fn new(program: Vec) -> Self { - let mut mem = memory::Memory::new(); - for (addr, byte) in program.clone().into_iter().enumerate() { - let _ = mem.set_addr8(addr as u64, byte); - } - trace!("{:?}", mem.read_addr8(0)); - let ecall_table: [Option; 256] = [None; 256]; - Self { - pc: 0, - program, - registers: Registers::default(), - config: EngineConfig::default(), - last_timer_count: 0, - timer_callback: None, - enviroment_call_table: ecall_table, - memory: mem, - call_stack: Vec::new(), - } - } - - pub fn dump(&self) {} - pub fn run(&mut self) -> Result { - Ok(HaltStatus::Running) - } -} diff --git a/hbvm/src/engine/regs.rs b/hbvm/src/engine/regs.rs deleted file mode 100644 index 60e7d42..0000000 --- a/hbvm/src/engine/regs.rs +++ /dev/null @@ -1,66 +0,0 @@ -use core::{ - fmt::Debug, - ops::{Index, IndexMut}, -}; - -#[derive(Debug, Clone, Copy)] -pub struct Registers([Value; 60]); - -impl Index for Registers { - type Output = Value; - - #[inline] - fn index(&self, index: u8) -> &Self::Output { - &self.0[index as usize] - } -} - -impl IndexMut for Registers { - #[inline] - fn index_mut(&mut self, index: u8) -> &mut Self::Output { - &mut self.0[index as usize] - } -} - -impl Default for Registers { - fn default() -> Self { - Self([Value { i: 0 }; 60]) - } -} - -/// # Safety -/// The macro invoker shall make sure that byte reinterpret-cast -/// won't cause undefined behaviour. -macro_rules! value_def { - ($($fname:ident : $fty:ident, $getter:ident);* $(;)?) => { - #[derive(Clone, Copy)] - pub union Value { - $($fname: $fty),* - } - - impl Value {$( - #[inline] - pub fn $getter(&self) -> $fty { - unsafe { self.$fname } - } - )*} - - $(impl From<$fty> for Value { - #[inline] - fn from($fname: $fty) -> Self { - Self { $fname } - } - })* - } -} - -value_def! { - i: u64, int; - f: f64, float; -} - -impl Debug for Value { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.int().fmt(f) - } -} diff --git a/hbvm/src/engine/tests/mod.rs b/hbvm/src/engine/tests/mod.rs deleted file mode 100644 index aa74d70..0000000 --- a/hbvm/src/engine/tests/mod.rs +++ /dev/null @@ -1,125 +0,0 @@ -use { - super::Engine, - crate::{HaltStatus, RuntimeErrors}, - alloc::vec, - RuntimeErrors::*, -}; - -#[test] -fn invalid_program() { - let prog = vec![1, 0]; - let mut eng = Engine::new(prog); - let ret = eng.run(); - assert_eq!(ret, Err(InvalidOpcodePair(1, 0))); -} - -#[test] -fn empty_program() { - let prog = vec![]; - let mut eng = Engine::new(prog); - let ret = eng.run(); - assert_eq!(ret, Ok(HaltStatus::Halted)); -} - -#[test] -fn max_quantum_reached() { - let prog = vec![0, 0, 0, 0]; - let mut eng = Engine::new(prog); - eng.set_timer_callback(|| { - return 1; - }); - eng.config.quantum = 1; - let ret = eng.run(); - assert_eq!(ret, Ok(HaltStatus::Running)); -} - -#[test] -fn jump_out_of_bounds() { - use crate::bytecode::ops::Operations::JUMP; - let prog = vec![JUMP as u8, 0, 0, 0, 0, 0, 0, 1, 0]; - let mut eng = Engine::new(prog); - let ret = eng.run(); - assert_eq!(ret, Err(InvalidJumpAddress(256))); -} - -#[test] -fn invalid_system_call() { - let prog = vec![255, 0]; - let mut eng = Engine::new(prog); - let ret = eng.run(); - assert_eq!(ret, Err(InvalidSystemCall(0))); -} - -#[test] -fn add_u8() { - use crate::bytecode::ops::{MathOpSides::ConstantConstant, Operations::ADD}; - - let prog = vec![ADD as u8, ConstantConstant as u8, 100, 98, 0xA0]; - let mut eng = Engine::new(prog); - let _ = eng.run(); - assert_eq!(eng.registers.a0, 2); -} - -#[test] -fn sub_u8() { - use crate::bytecode::ops::Operations::SUB; - - let prog = vec![SUB as u8]; - let mut eng = Engine::new(prog); - let _ = eng.run(); - assert_eq!(eng.registers.a0, 1); -} -#[test] -fn mul_u8() { - use crate::bytecode::ops::{MathOpSides::ConstantConstant, Operations::MUL}; - - let prog = vec![MUL as u8, ConstantConstant as u8, 1, 2, 0xA0]; - let mut eng = Engine::new(prog); - let _ = eng.run(); - assert_eq!(eng.registers.a0, 2); -} - -#[test] -fn div_u8() { - use crate::bytecode::ops::Operations::DIV; - - let prog = vec![DIV as u8]; - let mut eng = Engine::new(prog); - let _ = eng.run(); - assert_eq!(eng.registers.a0, 2); -} - -#[test] -fn set_register() { - let prog = alloc::vec![]; - let mut eng = Engine::new(prog); - eng.set_register(0xA0, 1); - assert_eq!(eng.registers.a0, 1); -} - -#[test] -fn load_u8() { - use crate::bytecode::ops::{Operations::LOAD, RWSubTypes::AddrToReg}; - - let prog = vec![LOAD as u8, AddrToReg as u8, 0, 0, 0, 0, 0, 0, 1, 0, 0xA0]; - let mut eng = Engine::new(prog); - let ret = eng.memory.set_addr8(256, 1); - assert_eq!(ret, Ok(())); - let _ = eng.run(); - assert_eq!(eng.registers.a0, 1); -} -#[test] -fn set_memory_8() { - let prog = vec![]; - let mut eng = Engine::new(prog); - let ret = eng.memory.set_addr8(256, 1); - assert_eq!(ret, Ok(())); -} - -#[test] -fn set_memory_64() { - let prog = vec![]; - let mut eng = Engine::new(prog); - let ret = eng.memory.set_addr64(256, 1); - assert_eq!(ret, Ok(())); -} diff --git a/hbvm/src/lib.rs b/hbvm/src/lib.rs index 6541de8..013fdb2 100644 --- a/hbvm/src/lib.rs +++ b/hbvm/src/lib.rs @@ -1,9 +1,8 @@ #![no_std] extern crate alloc; -pub mod bytecode; -pub mod engine; -pub mod memory; +pub mod validate; +pub mod vm; #[derive(Debug, PartialEq)] pub enum RuntimeErrors { diff --git a/hbvm/src/main.rs b/hbvm/src/main.rs index a873e57..a136f59 100644 --- a/hbvm/src/main.rs +++ b/hbvm/src/main.rs @@ -1,32 +1,26 @@ -use hbvm::{ - bytecode::ops::{Operations::*}, - engine::Engine, - RuntimeErrors, HaltStatus, +use { + hbvm::{validate::validate, vm::Vm}, + std::io::{stdin, Read}, }; -fn main() -> Result<(), RuntimeErrors> { - // TODO: Grab program from cmdline - #[rustfmt::skip] - let prog: Vec = vec![ - NOP as u8, - JUMP as u8, 0, 0, 0, 0, 0, 0, 0, 0, - ]; - - let mut eng = Engine::new(prog); - // eng.set_timer_callback(time); - eng.enviroment_call_table[10] = Some(print_fn); - while eng.run()? != HaltStatus::Halted {} - eng.dump(); - println!("{:#?}", eng.registers); - +fn main() -> Result<(), Box> { + let mut prog = vec![]; + stdin().read_to_end(&mut prog)?; + + if let Err(e) = validate(&prog) { + eprintln!("Program validation error: {e:?}"); + return Ok(()); + } else { + unsafe { + let mut vm = Vm::new_unchecked(&prog); + vm.memory.insert_test_page(); + println!("Program interrupt: {:?}", vm.run()); + println!("{:?}", vm.registers); + } + } Ok(()) } pub fn time() -> u32 { 9 } - -pub fn print_fn(engine: &mut Engine) -> Result<&mut Engine, u64> { - println!("hello"); - Ok(engine) -} diff --git a/hbvm/src/memory.rs b/hbvm/src/memory.rs deleted file mode 100644 index 1b1ff8a..0000000 --- a/hbvm/src/memory.rs +++ /dev/null @@ -1,70 +0,0 @@ -use crate::engine::VMPage; - -use { - crate::{engine::Page, RuntimeErrors}, - alloc::vec::Vec, - hashbrown::HashMap, - log::trace, -}; - -pub struct Memory { - inner: HashMap, -} - -impl Memory { - pub fn new() -> Self { - Self { - inner: HashMap::new(), - } - // - } - - pub fn map_vec(&mut self, address: u64, vec: Vec) { - panic!("Mapping vectors into pages is not supported yet"); - } -} - -impl Memory { - pub fn read_addr8(&mut self, address: u64) -> Result { - let (page, offset) = addr_to_page(address); - trace!("page {} offset {}", page, offset); - match self.inner.get(&page) { - Some(page) => { - let val = page.data()[offset as usize]; - trace!("Value {}", val); - Ok(val) - } - None => { - trace!("page not mapped"); - Err(RuntimeErrors::PageNotMapped(page)) - } - } - } - pub fn read_addr64(&mut self, address: u64) -> u64 { - unimplemented!() - } - - pub fn set_addr8(&mut self, address: u64, value: u8) -> Result<(), RuntimeErrors> { - let (page, offset) = addr_to_page(address); - let ret: Option<(&u64, &mut Page)> = self.inner.get_key_value_mut(&page); - match ret { - Some((_, page)) => { - page.data()[offset as usize] = value; - } - None => { - let mut pg = VMPage::default(); - pg.data[offset as usize] = value; - self.inner.insert(page, Page::VMPage(pg)); - trace!("Mapped page {}", page); - } - } - Ok(()) - } - pub fn set_addr64(&mut self, address: u64, value: u64) -> Result<(), RuntimeErrors> { - unimplemented!() - } -} - -fn addr_to_page(addr: u64) -> (u64, u64) { - (addr / 8192, addr % 8192) -} diff --git a/hbvm/src/validate.rs b/hbvm/src/validate.rs new file mode 100644 index 0000000..65a7fec --- /dev/null +++ b/hbvm/src/validate.rs @@ -0,0 +1,52 @@ +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ErrorKind { + InvalidInstruction, + Unimplemented, + RegisterArrayOverflow, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Error { + pub kind: ErrorKind, + pub index: usize, +} + +pub fn validate(mut program: &[u8]) -> Result<(), Error> { + use hbbytecode::opcode::*; + + let start = program; + loop { + program = match program { + [] => return Ok(()), + [LD..=ST, reg, _, _, _, _, _, _, _, _, _, count, ..] + if usize::from(*reg) * 8 + usize::from(*count) > 2048 => + { + return Err(Error { + kind: ErrorKind::RegisterArrayOverflow, + index: (program.as_ptr() as usize) - (start.as_ptr() as usize), + }) + } + [BRC, src, dst, count, ..] + if src.checked_add(*count).is_none() || dst.checked_add(*count).is_none() => + { + return Err(Error { + kind: ErrorKind::RegisterArrayOverflow, + index: (program.as_ptr() as usize) - (start.as_ptr() as usize), + }) + } + [NOP | ECALL, rest @ ..] + | [DIR | DIRF, _, _, _, _, rest @ ..] + | [ADD..=CMPU | BRC | ADDF..=MULF, _, _, _, rest @ ..] + | [NEG..=NOT | CP..=SWA, _, _, rest @ ..] + | [LI | JMP, _, _, _, _, _, _, _, _, _, rest @ ..] + | [ADDI..=CMPUI | BMC | JEQ..=JGTU | ADDFI..=MULFI, _, _, _, _, _, _, _, _, _, _, rest @ ..] + | [LD..=ST, _, _, _, _, _, _, _, _, _, _, _, _, rest @ ..] => rest, + _ => { + return Err(Error { + kind: ErrorKind::InvalidInstruction, + index: (program.as_ptr() as usize) - (start.as_ptr() as usize), + }) + } + } + } +} diff --git a/hbvm/src/vm/mem/mod.rs b/hbvm/src/vm/mem/mod.rs new file mode 100644 index 0000000..476f835 --- /dev/null +++ b/hbvm/src/vm/mem/mod.rs @@ -0,0 +1,215 @@ +mod paging; + +use self::paging::{PageTable, Permission, PtEntry}; +use alloc::boxed::Box; + +#[derive(Clone, Debug)] +pub struct Memory { + root_pt: *mut PageTable, +} + +impl Default for Memory { + fn default() -> Self { + Self { + root_pt: Box::into_raw(Box::default()), + } + } +} + +impl Drop for Memory { + fn drop(&mut self) { + let _ = unsafe { Box::from_raw(self.root_pt) }; + } +} + +impl Memory { + // HACK: Just for allocation testing, will be removed when proper memory interfaces + // implemented. + pub fn insert_test_page(&mut self) { + unsafe { + let mut entry = PtEntry::new( + { + let layout = alloc::alloc::Layout::from_size_align_unchecked(4096, 4096); + let ptr = alloc::alloc::alloc_zeroed(layout); + if ptr.is_null() { + alloc::alloc::handle_alloc_error(layout); + } + + core::ptr::write_bytes(ptr, 69, 10); + ptr.cast() + }, + Permission::Write, + ); + + for _ in 0..4 { + let mut pt = Box::::default(); + pt[0] = entry; + entry = PtEntry::new(Box::into_raw(pt) as _, Permission::Node); + } + + self.root_pt_mut()[0] = entry; + } + } + + /// Load value from an address + pub unsafe fn load(&self, addr: u64, target: *mut u8, count: usize) -> Result<(), ()> { + self.memory_access( + addr, + target, + count, + |perm| { + matches!( + perm, + Permission::Readonly | Permission::Write | Permission::Exec + ) + }, + |src, dst, count| core::ptr::copy_nonoverlapping(src, dst, count), + ) + } + + /// Store value to an address + pub unsafe fn store(&mut self, addr: u64, source: *const u8, count: usize) -> Result<(), ()> { + self.memory_access( + addr, + source.cast_mut(), + count, + |perm| perm == Permission::Write, + |dst, src, count| core::ptr::copy_nonoverlapping(src, dst, count), + ) + } + + /// Copy a block of memory + pub unsafe fn block_copy(&mut self, src: u64, dst: u64, count: u64) -> Result<(), ()> { + let count = usize::try_from(count).expect("?conradluget a better CPU"); + + let mut srcs = PageSplitter::new(src, count, self.root_pt); + let mut dsts = PageSplitter::new(dst, count, self.root_pt); + let mut c_src = srcs.next().ok_or(())?; + let mut c_dst = dsts.next().ok_or(())?; + + loop { + let min_size = c_src.size.min(c_dst.size); + unsafe { + core::ptr::copy(c_src.ptr, c_dst.ptr, min_size); + } + + match ( + match c_src.size.saturating_sub(min_size) { + 0 => srcs.next(), + size => Some(PageSplitResult { size, ..c_src }), + }, + match c_dst.size.saturating_sub(min_size) { + 0 => dsts.next(), + size => Some(PageSplitResult { size, ..c_dst }), + }, + ) { + (None, None) => return Ok(()), + (Some(src), Some(dst)) => (c_src, c_dst) = (src, dst), + _ => return Err(()), + } + } + } + + #[inline] + pub fn root_pt(&self) -> &PageTable { + unsafe { &*self.root_pt } + } + + #[inline] + pub fn root_pt_mut(&mut self) -> &mut PageTable { + unsafe { &mut *self.root_pt } + } + + fn memory_access( + &self, + src: u64, + mut dst: *mut u8, + len: usize, + permission_check: impl Fn(Permission) -> bool, + action: impl Fn(*mut u8, *mut u8, usize), + ) -> Result<(), ()> { + for PageSplitResult { ptr, size, perm } in PageSplitter::new(src, len, self.root_pt) { + if !permission_check(perm) { + return Err(()); + } + + action(ptr, dst, size); + dst = unsafe { dst.add(size) }; + } + + Ok(()) + } +} + +struct PageSplitResult { + ptr: *mut u8, + size: usize, + perm: Permission, +} + +struct PageSplitter { + addr: u64, + size: usize, + pagetable: *const PageTable, +} + +impl PageSplitter { + pub const fn new(addr: u64, size: usize, pagetable: *const PageTable) -> Self { + Self { + addr, + size, + pagetable, + } + } +} + +impl Iterator for PageSplitter { + type Item = PageSplitResult; + + fn next(&mut self) -> Option { + if self.size == 0 { + return None; + } + + let (base, perm, size, offset) = 'a: { + let mut current_pt = self.pagetable; + for lvl in (0..5).rev() { + unsafe { + let entry = (*current_pt).get_unchecked( + usize::try_from((self.addr >> (lvl * 9 + 12)) & ((1 << 9) - 1)) + .expect("?conradluget a better CPU"), + ); + + let ptr = entry.ptr(); + match entry.permission() { + Permission::Empty => return None, + Permission::Node => current_pt = ptr as _, + perm => { + break 'a ( + ptr as *mut u8, + perm, + match lvl { + 0 => 4096, + 1 => 1024_usize.pow(2) * 2, + 2 => 1024_usize.pow(3), + _ => return None, + }, + self.addr as usize & ((1 << (lvl * 9 + 12)) - 1), + ) + } + } + } + } + return None; + }; + + let avail = (size - offset).clamp(0, self.size); + self.addr += size as u64; + self.size = self.size.saturating_sub(size); + Some(PageSplitResult { + ptr: unsafe { base.add(offset) }, + size: avail, + perm, + }) + } +} diff --git a/hbvm/src/vm/mem/paging.rs b/hbvm/src/vm/mem/paging.rs new file mode 100644 index 0000000..ae9c0bf --- /dev/null +++ b/hbvm/src/vm/mem/paging.rs @@ -0,0 +1,101 @@ +use core::{ + fmt::Debug, + mem::MaybeUninit, + ops::{Index, IndexMut}, + slice::SliceIndex, +}; +use delegate::delegate; + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +#[repr(u8)] +pub enum Permission { + #[default] + Empty, + Node, + Readonly, + Write, + Exec, +} + +#[derive(Clone, Copy, Default, PartialEq, Eq)] +pub struct PtEntry(u64); +impl PtEntry { + #[inline] + pub unsafe fn new(ptr: *mut PtPointedData, permission: Permission) -> Self { + Self(ptr as u64 | permission as u64) + } + + #[inline] + pub fn permission(&self) -> Permission { + unsafe { core::mem::transmute(self.0 as u8 & 0b111) } + } + + #[inline] + pub fn ptr(&self) -> *mut PtPointedData { + (self.0 & !((1 << 12) - 1)) as _ + } +} + +impl Debug for PtEntry { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("PtEntry") + .field("ptr", &self.ptr()) + .field("permission", &self.permission()) + .finish() + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(align(4096))] +pub struct PageTable([PtEntry; 512]); + +impl PageTable { + delegate!(to self.0 { + pub unsafe fn get(&self, ix: I) -> Option<&I::Output> + where I: SliceIndex<[PtEntry]>; + + pub unsafe fn get_mut(&mut self, ix: I) -> Option<&mut I::Output> + where I: SliceIndex<[PtEntry]>; + + pub unsafe fn get_unchecked(&self, index: I) -> &I::Output + where I: SliceIndex<[PtEntry]>; + + pub unsafe fn get_unchecked_mut(&mut self, index: I) -> &mut I::Output + where I: SliceIndex<[PtEntry]>; + }); +} + +impl Index for PageTable +where + Idx: SliceIndex<[PtEntry]>, +{ + type Output = Idx::Output; + + #[inline(always)] + fn index(&self, index: Idx) -> &Self::Output { + &self.0[index] + } +} + +impl IndexMut for PageTable +where + Idx: SliceIndex<[PtEntry]>, +{ + #[inline(always)] + fn index_mut(&mut self, index: Idx) -> &mut Self::Output { + &mut self.0[index] + } +} + +impl Default for PageTable { + fn default() -> Self { + Self(unsafe { MaybeUninit::zeroed().assume_init() }) + } +} + +#[derive(Clone, Copy)] +#[repr(C, align(4096))] +pub union PtPointedData { + pub pt: PageTable, + pub page: u8, +} diff --git a/hbvm/src/vm/mod.rs b/hbvm/src/vm/mod.rs new file mode 100644 index 0000000..ccb60ef --- /dev/null +++ b/hbvm/src/vm/mod.rs @@ -0,0 +1,308 @@ +//! HoleyBytes Virtual Machine +//! +//! All unsafe code here should be sound, if input bytecode passes validation. + +// # General safety notice: +// - Validation has to assure there is 256 registers (r0 - r255) +// - Instructions have to be valid as specified (values and sizes) +// - Mapped pages should be at least 4 KiB +// - Yes, I am aware of the UB when jumping in-mid of instruction where +// the read byte corresponds to an instruction whose lenght exceets the +// program size. If you are (rightfully) worried about the UB, for now just +// append your program with 11 zeroes. + +mod mem; +mod value; + +use { + crate::validate, + core::ops, + hbbytecode::{OpParam, ParamBB, ParamBBB, ParamBBBB, ParamBBD, ParamBBDH, ParamBD}, + mem::Memory, + static_assertions::assert_impl_one, + value::Value, +}; + +macro_rules! param { + ($self:expr, $ty:ty) => {{ + assert_impl_one!($ty: OpParam); + let data = $self + .program + .as_ptr() + .add($self.pc + 1) + .cast::<$ty>() + .read(); + $self.pc += 1 + core::mem::size_of::<$ty>(); + data + }}; +} + +macro_rules! binary_op { + ($self:expr, $ty:ident, $handler:expr) => {{ + let ParamBBB(tg, a0, a1) = param!($self, ParamBBB); + $self.write_reg( + tg, + $handler( + Value::$ty(&$self.read_reg(a0)), + Value::$ty(&$self.read_reg(a1)), + ) + .into(), + ); + }}; +} + +macro_rules! binary_op_imm { + ($self:expr, $ty:ident, $handler:expr) => {{ + let ParamBBD(tg, a0, imm) = param!($self, ParamBBD); + $self.write_reg( + tg, + $handler(Value::$ty(&$self.read_reg(a0)), Value::$ty(&imm.into())).into(), + ); + }}; +} + +macro_rules! cond_jump { + ($self:expr, $ty:ident, $expected:ident) => {{ + let ParamBBD(a0, a1, jt) = param!($self, ParamBBD); + if core::cmp::Ord::cmp(&$self.read_reg(a0).as_u64(), &$self.read_reg(a1).as_u64()) + == core::cmp::Ordering::$expected + { + $self.pc = jt as usize; + } + }}; +} + +pub struct Vm<'a> { + pub registers: [Value; 256], + pub memory: Memory, + pc: usize, + program: &'a [u8], +} + +impl<'a> Vm<'a> { + /// # Safety + /// Program code has to be validated + pub unsafe fn new_unchecked(program: &'a [u8]) -> Self { + Self { + registers: [Value::from(0_u64); 256], + memory: Default::default(), + pc: 0, + program, + } + } + + pub fn new_validated(program: &'a [u8]) -> Result { + validate::validate(program)?; + Ok(unsafe { Self::new_unchecked(program) }) + } + + pub fn run(&mut self) -> HaltReason { + use hbbytecode::opcode::*; + loop { + let Some(&opcode) = self.program.get(self.pc) + else { return HaltReason::ProgramEnd }; + + unsafe { + match opcode { + NOP => param!(self, ()), + ADD => binary_op!(self, as_u64, u64::wrapping_add), + SUB => binary_op!(self, as_u64, u64::wrapping_sub), + MUL => binary_op!(self, as_u64, u64::wrapping_mul), + AND => binary_op!(self, as_u64, ops::BitAnd::bitand), + OR => binary_op!(self, as_u64, ops::BitOr::bitor), + XOR => binary_op!(self, as_u64, ops::BitXor::bitxor), + SL => binary_op!(self, as_u64, ops::Shl::shl), + SR => binary_op!(self, as_u64, ops::Shr::shr), + SRS => binary_op!(self, as_i64, ops::Shr::shr), + CMP => { + let ParamBBB(tg, a0, a1) = param!(self, ParamBBB); + self.write_reg( + tg, + (self.read_reg(a0).as_i64().cmp(&self.read_reg(a1).as_i64()) as i64) + .into(), + ); + } + CMPU => { + let ParamBBB(tg, a0, a1) = param!(self, ParamBBB); + self.write_reg( + tg, + (self.read_reg(a0).as_u64().cmp(&self.read_reg(a1).as_u64()) as i64) + .into(), + ); + } + NOT => { + let param = param!(self, ParamBB); + self.write_reg(param.0, (!self.read_reg(param.1).as_u64()).into()); + } + NEG => { + let param = param!(self, ParamBB); + self.write_reg( + param.0, + match self.read_reg(param.1).as_u64() { + 0 => 1_u64, + _ => 0, + } + .into(), + ); + } + DIR => { + let ParamBBBB(dt, rt, a0, a1) = param!(self, ParamBBBB); + let a0 = self.read_reg(a0).as_u64(); + let a1 = self.read_reg(a1).as_u64(); + self.write_reg(dt, (a0.checked_div(a1).unwrap_or(u64::MAX)).into()); + self.write_reg(rt, (a0.checked_rem(a1).unwrap_or(u64::MAX)).into()); + } + ADDI => binary_op_imm!(self, as_u64, ops::Add::add), + MULI => binary_op_imm!(self, as_u64, ops::Mul::mul), + ANDI => binary_op_imm!(self, as_u64, ops::BitAnd::bitand), + ORI => binary_op_imm!(self, as_u64, ops::BitOr::bitor), + XORI => binary_op_imm!(self, as_u64, ops::BitXor::bitxor), + SLI => binary_op_imm!(self, as_u64, ops::Shl::shl), + SRI => binary_op_imm!(self, as_u64, ops::Shr::shr), + SRSI => binary_op_imm!(self, as_i64, ops::Shr::shr), + CMPI => { + let ParamBBD(tg, a0, imm) = param!(self, ParamBBD); + self.write_reg( + tg, + (self.read_reg(a0).as_i64().cmp(&Value::from(imm).as_i64()) as i64) + .into(), + ); + } + CMPUI => { + let ParamBBD(tg, a0, imm) = param!(self, ParamBBD); + self.write_reg(tg, (self.read_reg(a0).as_u64().cmp(&imm) as i64).into()); + } + CP => { + let param = param!(self, ParamBB); + self.write_reg(param.0, self.read_reg(param.1)); + } + SWA => { + let ParamBB(src, dst) = param!(self, ParamBB); + if src + dst != 0 { + core::ptr::swap( + self.registers.get_unchecked_mut(usize::from(src)), + self.registers.get_unchecked_mut(usize::from(dst)), + ); + } + } + LI => { + let param = param!(self, ParamBD); + self.write_reg(param.0, param.1.into()); + } + LD => { + let ParamBBDH(dst, base, off, count) = param!(self, ParamBBDH); + let n: usize = match dst { + 0 => 1, + _ => 0, + }; + + if self + .memory + .load( + self.read_reg(base).as_u64() + off + n as u64, + self.registers.as_mut_ptr().add(usize::from(dst) + n).cast(), + usize::from(count).saturating_sub(n), + ) + .is_err() + { + return HaltReason::LoadAccessEx; + } + } + ST => { + let ParamBBDH(dst, base, off, count) = param!(self, ParamBBDH); + if self + .memory + .store( + self.read_reg(base).as_u64() + off, + self.registers.as_ptr().add(usize::from(dst)).cast(), + count.into(), + ) + .is_err() + { + return HaltReason::LoadAccessEx; + } + } + BMC => { + let ParamBBD(src, dst, count) = param!(self, ParamBBD); + if self + .memory + .block_copy( + self.read_reg(src).as_u64(), + self.read_reg(dst).as_u64(), + count, + ) + .is_err() + { + return HaltReason::LoadAccessEx; + } + } + BRC => { + let ParamBBB(src, dst, count) = param!(self, ParamBBB); + core::ptr::copy( + self.registers.get_unchecked(usize::from(src)), + self.registers.get_unchecked_mut(usize::from(dst)), + usize::from(count * 8), + ); + } + JMP => { + let ParamBD(reg, offset) = param!(self, ParamBD); + self.pc = (self.read_reg(reg).as_u64() + offset) as usize; + } + JEQ => cond_jump!(self, int, Equal), + JNE => { + let ParamBBD(a0, a1, jt) = param!(self, ParamBBD); + if self.read_reg(a0).as_u64() != self.read_reg(a1).as_u64() { + self.pc = jt as usize; + } + } + JLT => cond_jump!(self, int, Less), + JGT => cond_jump!(self, int, Greater), + JLTU => cond_jump!(self, sint, Less), + JGTU => cond_jump!(self, sint, Greater), + ECALL => { + param!(self, ()); + return HaltReason::Ecall; + } + ADDF => binary_op!(self, as_f64, ops::Add::add), + MULF => binary_op!(self, as_f64, ops::Mul::mul), + DIRF => { + let ParamBBBB(dt, rt, a0, a1) = param!(self, ParamBBBB); + let a0 = self.read_reg(a0).as_f64(); + let a1 = self.read_reg(a1).as_f64(); + self.write_reg(dt, (a0 / a1).into()); + self.write_reg(rt, (a0 % a1).into()); + } + ADDFI => binary_op_imm!(self, as_f64, ops::Add::add), + MULFI => binary_op_imm!(self, as_f64, ops::Mul::mul), + _ => return HaltReason::InvalidOpcode, + } + } + } + } + + #[inline] + unsafe fn read_reg(&self, n: u8) -> Value { + if n == 0 { + 0_u64.into() + } else { + *self.registers.get_unchecked(n as usize) + } + } + + #[inline] + unsafe fn write_reg(&mut self, n: u8, value: Value) { + if n != 0 { + *self.registers.get_unchecked_mut(n as usize) = value; + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum HaltReason { + ProgramEnd, + Ecall, + InvalidOpcode, + LoadAccessEx, + StoreAccessEx, +} diff --git a/hbvm/src/vm/value.rs b/hbvm/src/vm/value.rs new file mode 100644 index 0000000..cfb7300 --- /dev/null +++ b/hbvm/src/vm/value.rs @@ -0,0 +1,37 @@ +use core::fmt::Debug; + +macro_rules! value_def { + ($($ty:ident),* $(,)?) => { + #[derive(Copy, Clone)] + #[repr(packed)] + pub union Value { + $(pub $ty: $ty),* + } + + paste::paste! { + impl Value {$( + #[inline] + pub fn [](&self) -> $ty { + unsafe { self.$ty } + } + )*} + } + + $( + impl From<$ty> for Value { + #[inline] + fn from(value: $ty) -> Self { + Self { $ty: value } + } + } + )* + }; +} + +value_def!(u64, i64, f64); + +impl Debug for Value { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.as_u64().fmt(f) + } +} diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 07ade69..0000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -nightly \ No newline at end of file diff --git a/spec.md b/spec.md new file mode 100644 index 0000000..b70ca58 --- /dev/null +++ b/spec.md @@ -0,0 +1,272 @@ +# HoleyBytes ISA Specification + +# Bytecode format +- All numbers are encoded little-endian +- There is 256 registers, they are represented by a byte +- Immediate values are 64 bit + +### Instruction encoding +- Instruction parameters are packed (no alignment) +- [opcode, …parameters…] + +### Instruction parameter types +- B = Byte +- D = Doubleword (64 bits) +- H = Halfword (16 bits) + +| Name | Size | +|:----:|:--------| +| BBBB | 32 bits | +| BBB | 24 bits | +| BBDH | 96 bits | +| BBDB | 88 bits | +| BBD | 80 bits | +| BB | 16 bits | +| BD | 72 bits | +| D | 64 bits | +| N | 0 bits | + +# Instructions +- `#n`: register in parameter *n* +- `imm #n`: for immediate in parameter *n* +- `P ← V`: Set register P to value V +- `[x]`: Address x + +## No-op +- N type + +| Opcode | Name | Action | +|:------:|:----:|:----------:| +| 0 | NOP | Do nothing | + +## Integer binary ops. +- BBB type +- `#0 ← #1 #2` + +| Opcode | Name | Action | +|:------:|:----:|:-----------------------:| +| 1 | ADD | Wrapping addition | +| 2 | SUB | Wrapping subtraction | +| 3 | MUL | Wrapping multiplication | +| 4 | AND | Bitand | +| 5 | OR | Bitor | +| 6 | XOR | Bitxor | +| 7 | SL | Unsigned left bitshift | +| 8 | SR | Unsigned right bitshift | +| 9 | SRS | Signed right bitshift | + +### Comparsion +| Opcode | Name | Action | +|:------:|:----:|:-------------------:| +| 10 | CMP | Signed comparsion | +| 11 | CMPU | Unsigned comparsion | + +#### Comparsion table +| #1 *op* #2 | Result | +|:----------:|:------:| +| < | -1 | +| = | 0 | +| > | 1 | + +### Division-remainder +- Type BBBB +- In case of `#3` is zero, the resulting value is all-ones +- `#0 ← #2 ÷ #3` +- `#1 ← #2 % #3` + +| Opcode | Name | Action | +|:------:|:----:|:-------------------------------:| +| 12 | DIR | Divide and remainder combinated | + +### Negations +- Type BB +- `#0 ← #1 #2` + +| Opcode | Name | Action | +|:------:|:----:|:----------------:| +| 13 | NEG | Bit negation | +| 14 | NOT | Logical negation | + +## Integer immediate binary ops. +- Type BBD +- `#0 ← #1 imm #2` + +| Opcode | Name | Action | +|:------:|:----:|:-----------------------:| +| 15 | ADDI | Wrapping addition | +| 16 | MULI | Wrapping subtraction | +| 17 | ANDI | Bitand | +| 18 | ORI | Bitor | +| 19 | XORI | Bitxor | +| 20 | SLI | Unsigned left bitshift | +| 21 | SRI | Unsigned right bitshift | +| 22 | SRSI | Signed right bitshift | + +### Comparsion +- Comparsion is the same as when RRR type + +| Opcode | Name | Action | +|:------:|:-----:|:-------------------:| +| 23 | CMPI | Signed comparsion | +| 24 | CMPUI | Unsigned comparsion | + +## Register value set / copy + +### Copy +- Type BB +- `#0 ← #1` + +| Opcode | Name | Action | +|:------:|:----:|:------:| +| 25 | CP | Copy | + +### Swap +- Type BB +- Swap #0 and #1 + +| Opcode | Name | Action | +|:------:|:----:|:------:| +| 26 | SWA | Swap | + +### Load immediate +- Type BD +- `#0 ← #1` + +| Opcode | Name | Action | +|:------:|:----:|:--------------:| +| 27 | LI | Load immediate | + +## Memory operations +- Type BBDH +- If loaded/store value exceeds one register size, continue accessing following registers + +### Load / Store +| Opcode | Name | Action | +|:------:|:----:|:---------------------------------------:| +| 28 | LD | `#0 ← [#1 + imm #3], copy imm #4 bytes` | +| 29 | ST | `[#1 + imm #3] ← #0, copy imm #4 bytes` | + +## Block copy +- Block copy source and target can overlap + +### Memory copy +- Type BBD + +| Opcode | Name | Action | +|:------:|:----:|:--------------------------------:| +| 30 | BMC | `[#0] ← [#1], copy imm #2 bytes` | + +### Register copy +- Type BBB +- Copy a block a register to another location (again, overflowing to following registers) + +| Opcode | Name | Action | +|:------:|:----:|:--------------------------------:| +| 31 | BRC | `#0 ← #1, copy imm #2 registers` | + +## Control flow + +### Unconditional jump +- Type BD + +| Opcode | Name | Action | +|:------:|:----:|:---------------------:| +| 32 | JMP | Jump at `#0 + imm #1` | + +### Conditional jumps +- Type BBD +- Jump at `imm #2` if `#0 #1` + +| Opcode | Name | Comparsion | +|:------:|:----:|:------------:| +| 33 | JEQ | = | +| 34 | JNE | ≠ | +| 35 | JLT | < (signed) | +| 36 | JGT | > (signed) | +| 37 | JLTU | < (unsigned) | +| 38 | JGTU | > (unsigned) | + +### Environment call +- Type N + +| Opcode | Name | Action | +|:------:|:-----:|:-------------------------------------:| +| 39 | ECALL | Cause an trap to the host environment | + +## Floating point operations +- Type BBB +- `#0 ← #1 #2` + +| Opcode | Name | Action | +|:------:|:----:|:--------------:| +| 40 | ADDF | Addition | +| 41 | MULF | Multiplication | + +### Division-remainder +- Type BBBB + +| Opcode | Name | Action | +|:------:|:----:|:--------------------------------------:| +| 42 | DIRF | Same flow applies as for integer `DIR` | + +## Floating point immediate operations +- Type BBD +- `#0 ← #1 imm #2` + +| Opcode | Name | Action | +|:------:|:-----:|:--------------:| +| 43 | ADDFI | Addition | +| 44 | MULFI | Multiplication | + +# Registers +- There is 255 registers + one zero register (with index 0) +- Reading from zero register yields zero +- Writing to zero register is a no-op + +# Memory +- Addresses are 64 bit +- Memory implementation is arbitrary +- In case of accessing invalid address: + - Program shall trap (LoadAccessEx, StoreAccessEx) with parameter of accessed address + - Value of register when trapped is undefined + +## Recommendations +- Leave address `0x0` as invalid +- If paging used: + - Leave first page invalid + - Pages should be at least 4 KiB + +# Program execution +- The way of program execution is implementation defined +- The order of instruction is arbitrary, as long all observable + effects are applied in the program's order + +# Program validation +- Invalid program should cause runtime error: + - The form of error is arbitrary. Can be a trap or an interpreter-specified error + - It shall not be handleable from within the program +- Executing invalid opcode should trap +- Program can be validaded either before execution or when executing + +# Traps +Program should at least implement these traps: +- Environment call +- Invalid instruction exception +- Load address exception +- Store address exception + +and executing environment should be able to get information about them, +like the opcode of invalid instruction or attempted address to load/store. +Details about these are left as an implementation detail. + +# Assembly +HoleyBytes assembly format is not defined, this is just a weak description +of `hbasm` syntax. + +- Opcode names correspond to specified opcode names, lowercase (`nop`) +- Parameters are separated by comma (`addi r0, r0, 1`) +- Instructions are separated by either line feed or semicolon +- Registers are represented by `r` followed by the number (`r10`) +- Labels are defined by label name followed with colon (`loop:`) +- Labels are references simply by their name (`print`) +- Immediates are entered plainly. Negative numbers supported. \ No newline at end of file