From 141c5f524f19f8afdbcc9d6cfb0e7e7fd6558d8a Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 13 Jul 2023 11:05:41 +0200 Subject: [PATCH] Added UN instruction and fixed UB --- Cargo.lock | 33 ++++++------- hbasm/src/lib.rs | 7 ++- hbasm/src/main.rs | 1 + hbbytecode/src/lib.rs | 103 ++++++++++++++++++++-------------------- hbvm/src/validate.rs | 20 +++++++- hbvm/src/vm/mod.rs | 24 ++++++---- spec.md | 106 +++++++++++++++++++++--------------------- 7 files changed, 163 insertions(+), 131 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c53369..007c080 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,12 +165,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "logos" @@ -192,7 +189,7 @@ dependencies = [ "proc-macro2", "quote", "regex-syntax", - "syn 2.0.18", + "syn 2.0.25", ] [[package]] @@ -206,30 +203,30 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "paste" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35" [[package]] name = "proc-macro2" -version = "1.0.59" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" +checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.28" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] @@ -274,9 +271,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.18" +version = "2.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" +checksum = "15e3fc8c0c74267e2df136e5e5fb656a464158aa57624053375eb9c8c6e25ae2" dependencies = [ "proc-macro2", "quote", @@ -285,9 +282,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" [[package]] name = "unicode-width" diff --git a/hbasm/src/lib.rs b/hbasm/src/lib.rs index 391b251..a9cecc7 100644 --- a/hbasm/src/lib.rs +++ b/hbasm/src/lib.rs @@ -27,7 +27,7 @@ macros::impl_both!( bd(p0: R, p1: I) => [LI], n() - => [NOP, ECALL], + => [UN, NOP, ECALL], ); impl Assembler { @@ -36,6 +36,11 @@ impl Assembler { pub fn i_brc(&mut self, p0: u8, p1: u8, p2: u8) { self.i_param_bbb(hbbytecode::opcode::BRC, p0, p1, p2) } + + /// Append 12 zeroes (UN) at the end + pub fn finalise(&mut self) { + self.buf.extend([0; 12]); + } } pub trait Imm { diff --git a/hbasm/src/main.rs b/hbasm/src/main.rs index 844f2d6..8ce4f57 100644 --- a/hbasm/src/main.rs +++ b/hbasm/src/main.rs @@ -48,6 +48,7 @@ fn main() -> Result<(), Box> { .eprint(("engine_internal", Source::from(&code))) .unwrap(); } else { + assembler.finalise(); std::io::stdout().lock().write_all(&assembler.buf).unwrap(); } diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index 9b4f60b..400cab1 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -18,62 +18,63 @@ macro_rules! constmod { constmod!(pub opcode(u8) { //! Opcode constant module - NOP = 0, "N; Do nothing"; + UN = 0, "N; Raises a trap"; + NOP = 1, "N; Do nothing"; + + ADD = 2, "BBB; #0 ← #1 + #2"; + SUB = 3, "BBB; #0 ← #1 - #2"; + MUL = 4, "BBB; #0 ← #1 × #2"; + AND = 5, "BBB; #0 ← #1 & #2"; + OR = 6, "BBB; #0 ← #1 | #2"; + XOR = 7, "BBB; #0 ← #1 ^ #2"; + SL = 8, "BBB; #0 ← #1 « #2"; + SR = 9, "BBB; #0 ← #1 » #2"; + SRS = 10, "BBB; #0 ← #1 » #2 (signed)"; + CMP = 11, "BBB; #0 ← #1 <=> #2"; + CMPU = 12, "BBB; #0 ← #1 <=> #2 (unsigned)"; + DIR = 13, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; + NEG = 14, "BB; #0 ← -#1"; + NOT = 15, "BB; #0 ← !#1"; - ADD = 1, "BBB; #0 ← #1 + #2"; - SUB = 2, "BBB; #0 ← #1 - #2"; - MUL = 3, "BBB; #0 ← #1 × #2"; - AND = 4, "BBB; #0 ← #1 & #2"; - OR = 5, "BBB; #0 ← #1 | #2"; - XOR = 6, "BBB; #0 ← #1 ^ #2"; - SL = 7, "BBB; #0 ← #1 « #2"; - SR = 8, "BBB; #0 ← #1 » #2"; - SRS = 9, "BBB; #0 ← #1 » #2 (signed)"; - CMP = 10, "BBB; #0 ← #1 <=> #2"; - CMPU = 11, "BBB; #0 ← #1 <=> #2 (unsigned)"; - DIR = 12, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; - NEG = 13, "BB; #0 ← -#1"; - NOT = 14, "BB; #0 ← !#1"; + ADDI = 16, "BBD; #0 ← #1 + imm #2"; + MULI = 17, "BBD; #0 ← #1 × imm #2"; + ANDI = 18, "BBD; #0 ← #1 & imm #2"; + ORI = 19, "BBD; #0 ← #1 | imm #2"; + XORI = 20, "BBD; #0 ← #1 ^ imm #2"; + SLI = 21, "BBD; #0 ← #1 « imm #2"; + SRI = 22, "BBD; #0 ← #1 » imm #2"; + SRSI = 23, "BBD; #0 ← #1 » imm #2 (signed)"; + CMPI = 24, "BBD; #0 ← #1 <=> imm #2"; + CMPUI = 25, "BBD; #0 ← #1 <=> imm #2 (unsigned)"; - ADDI = 15, "BBD; #0 ← #1 + imm #2"; - MULI = 16, "BBD; #0 ← #1 × imm #2"; - ANDI = 17, "BBD; #0 ← #1 & imm #2"; - ORI = 18, "BBD; #0 ← #1 | imm #2"; - XORI = 19, "BBD; #0 ← #1 ^ imm #2"; - SLI = 20, "BBD; #0 ← #1 « imm #2"; - SRI = 21, "BBD; #0 ← #1 » imm #2"; - SRSI = 22, "BBD; #0 ← #1 » imm #2 (signed)"; - CMPI = 23, "BBD; #0 ← #1 <=> imm #2"; - CMPUI = 24, "BBD; #0 ← #1 <=> imm #2 (unsigned)"; + CP = 26, "BB; Copy #0 ← #1"; + SWA = 27, "BB; Swap #0 and #1"; + LI = 28, "BD; #0 ← imm #1"; + LD = 29, "BBDB; #0 ← [#1 + imm #3], imm #4 bytes, overflowing"; + ST = 30, "BBDB; [#1 + imm #3] ← #0, imm #4 bytes, overflowing"; + BMC = 31, "BBD; [#0] ← [#1], imm #2 bytes"; + BRC = 32, "BBB; #0 ← #1, imm #2 registers"; - CP = 25, "BB; Copy #0 ← #1"; - SWA = 26, "BB; Swap #0 and #1"; - LI = 27, "BD; #0 ← imm #1"; - LD = 28, "BBDB; #0 ← [#1 + imm #3], imm #4 bytes, overflowing"; - ST = 29, "BBDB; [#1 + imm #3] ← #0, imm #4 bytes, overflowing"; - BMC = 30, "BBD; [#0] ← [#1], imm #2 bytes"; - BRC = 31, "BBB; #0 ← #1, imm #2 registers"; + JAL = 33, "BD; Copy PC to #0 and unconditional jump [#1 + imm #2]"; + JEQ = 34, "BBD; if #0 = #1 → jump imm #2"; + JNE = 35, "BBD; if #0 ≠ #1 → jump imm #2"; + JLT = 36, "BBD; if #0 < #1 → jump imm #2"; + JGT = 37, "BBD; if #0 > #1 → jump imm #2"; + JLTU = 38, "BBD; if #0 < #1 → jump imm #2 (unsigned)"; + JGTU = 39, "BBD; if #0 > #1 → jump imm #2 (unsigned)"; + ECALL = 40, "N; Issue system call"; - JAL = 32, "BD; Copy PC to #0 and unconditional jump [#1 + imm #2]"; - JEQ = 33, "BBD; if #0 = #1 → jump imm #2"; - JNE = 34, "BBD; if #0 ≠ #1 → jump imm #2"; - JLT = 35, "BBD; if #0 < #1 → jump imm #2"; - JGT = 36, "BBD; if #0 > #1 → jump imm #2"; - JLTU = 37, "BBD; if #0 < #1 → jump imm #2 (unsigned)"; - JGTU = 38, "BBD; if #0 > #1 → jump imm #2 (unsigned)"; - ECALL = 39, "N; Issue system call"; + ADDF = 41, "BBB; #0 ← #1 +. #2"; + SUBF = 42, "BBB; #0 ← #1 -. #2"; + MULF = 43, "BBB; #0 ← #1 +. #2"; + DIRF = 44, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; + FMAF = 45, "BBBB; #0 ← (#1 * #2) + #3"; + NEGF = 46, "BB; #0 ← -#1"; + ITF = 47, "BB; #0 ← #1 as float"; + FTI = 48, "BB; #0 ← #1 as int"; - ADDF = 40, "BBB; #0 ← #1 +. #2"; - SUBF = 41, "BBB; #0 ← #1 -. #2"; - MULF = 42, "BBB; #0 ← #1 +. #2"; - DIRF = 43, "BBBB; #0 ← #2 / #3, #1 ← #2 % #3"; - FMAF = 44, "BBBB; #0 ← (#1 * #2) + #3"; - NEGF = 45, "BB; #0 ← -#1"; - ITF = 46, "BB; #0 ← #1 as float"; - FTI = 47, "BB; #0 ← #1 as int"; - - ADDFI = 48, "BBD; #0 ← #1 +. imm #2"; - MULFI = 49, "BBD; #0 ← #1 *. imm #2"; + ADDFI = 49, "BBD; #0 ← #1 +. imm #2"; + MULFI = 50, "BBD; #0 ← #1 *. imm #2"; }); #[repr(packed)] diff --git a/hbvm/src/validate.rs b/hbvm/src/validate.rs index d75306b..41b1636 100644 --- a/hbvm/src/validate.rs +++ b/hbvm/src/validate.rs @@ -9,6 +9,8 @@ pub enum ErrorKind { Unimplemented, /// Attempted to copy over register boundary RegisterArrayOverflow, + /// Program is not validly terminated + InvalidEnd, } /// Error @@ -25,6 +27,22 @@ pub struct Error { pub fn validate(mut program: &[u8]) -> Result<(), Error> { use hbbytecode::opcode::*; + if program.len() < 12 { + return Err(Error { + kind: ErrorKind::InvalidEnd, + index: 0, + }); + } + + for (index, item) in program.iter().enumerate().skip(program.len() - 12) { + if *item != 0 { + return Err(Error { + kind: ErrorKind::InvalidEnd, + index, + }); + } + } + let start = program; loop { // Match on instruction types and perform necessary checks @@ -46,7 +64,7 @@ pub fn validate(mut program: &[u8]) -> Result<(), Error> { index: (program.as_ptr() as usize) - (start.as_ptr() as usize), }) } - [NOP | ECALL, rest @ ..] + [UN | NOP | ECALL, rest @ ..] | [DIR | DIRF, _, _, _, _, rest @ ..] | [ADD..=CMPU | BRC | ADDF..=MULF, _, _, _, rest @ ..] | [NEG..=NOT | CP..=SWA | NEGF..=FTI, _, _, rest @ ..] diff --git a/hbvm/src/vm/mod.rs b/hbvm/src/vm/mod.rs index 1fcee80..bf12d70 100644 --- a/hbvm/src/vm/mod.rs +++ b/hbvm/src/vm/mod.rs @@ -6,10 +6,6 @@ // - Validation has to assure there is 256 registers (r0 - r255) // - Instructions have to be valid as specified (values and sizes) // - Mapped pages should be at least 4 KiB -// - Yes, I am aware of the UB when jumping in-mid of instruction where -// the read byte corresponds to an instruction whose lenght exceets the -// program size. If you are (rightfully) worried about the UB, for now just -// append your program with 11 zeroes. use self::mem::HandlePageFault; @@ -97,6 +93,9 @@ pub struct Vm<'a, PfHandler, const TIMER_QUOTIENT: usize> { /// Program program: &'a [u8], + /// Cached program length (without unreachable end) + program_len: usize, + /// Program timer timer: usize, } @@ -114,6 +113,7 @@ impl<'a, PfHandler: HandlePageFault, const TIMER_QUOTIENT: usize> memory: Default::default(), pfhandler: traph, pc: 0, + program_len: program.len() - 12, program, timer: 0, } @@ -131,13 +131,18 @@ impl<'a, PfHandler: HandlePageFault, const TIMER_QUOTIENT: usize> pub fn run(&mut self) -> Result { use hbbytecode::opcode::*; loop { - // Fetch instruction - let Some(&opcode) = self.program.get(self.pc) - else { return Ok(VmRunOk::End) }; + // Check instruction boundary + if self.pc >= self.program_len { + return Ok(VmRunOk::End); + } // Big match unsafe { - match opcode { + match *self.program.get_unchecked(self.pc) { + UN => { + param!(self, ()); + return Err(VmRunError::Unreachable); + } NOP => param!(self, ()), ADD => binary_op!(self, as_u64, u64::wrapping_add), SUB => binary_op!(self, as_u64, u64::wrapping_sub), @@ -352,6 +357,9 @@ pub enum VmRunError { /// Unhandled store access exception StoreAccessEx(u64), + + /// Reached unreachable code + Unreachable, } /// Virtual machine halt ok diff --git a/spec.md b/spec.md index 55c3aaa..8fc0201 100644 --- a/spec.md +++ b/spec.md @@ -4,6 +4,7 @@ - All numbers are encoded little-endian - There is 256 registers, they are represented by a byte - Immediate values are 64 bit +- Program is by spec required to be terminated with 12 zero bytes ### Instruction encoding - Instruction parameters are packed (no alignment) @@ -34,9 +35,10 @@ ## No-op - N type -| Opcode | Name | Action | -|:------:|:----:|:----------:| -| 0 | NOP | Do nothing | +| Opcode | Name | Action | +|:------:|:----:|:-----------------------------:| +| 0 | UN | Trigger unreachable code trap | +| 1 | NOP | Do nothing | ## Integer binary ops. - BBB type @@ -44,21 +46,21 @@ | Opcode | Name | Action | |:------:|:----:|:-----------------------:| -| 1 | ADD | Wrapping addition | -| 2 | SUB | Wrapping subtraction | -| 3 | MUL | Wrapping multiplication | -| 4 | AND | Bitand | -| 5 | OR | Bitor | -| 6 | XOR | Bitxor | -| 7 | SL | Unsigned left bitshift | -| 8 | SR | Unsigned right bitshift | -| 9 | SRS | Signed right bitshift | +| 2 | ADD | Wrapping addition | +| 3 | SUB | Wrapping subtraction | +| 4 | MUL | Wrapping multiplication | +| 5 | AND | Bitand | +| 6 | OR | Bitor | +| 7 | XOR | Bitxor | +| 8 | SL | Unsigned left bitshift | +| 9 | SR | Unsigned right bitshift | +| 10 | SRS | Signed right bitshift | ### Comparsion | Opcode | Name | Action | |:------:|:----:|:-------------------:| -| 10 | CMP | Signed comparsion | -| 11 | CMPU | Unsigned comparsion | +| 11 | CMP | Signed comparsion | +| 12 | CMPU | Unsigned comparsion | #### Comparsion table | #1 *op* #2 | Result | @@ -75,7 +77,7 @@ | Opcode | Name | Action | |:------:|:----:|:-------------------------------:| -| 12 | DIR | Divide and remainder combinated | +| 13 | DIR | Divide and remainder combinated | ### Negations - Type BB @@ -83,8 +85,8 @@ | Opcode | Name | Action | |:------:|:----:|:----------------:| -| 13 | NEG | Bit negation | -| 14 | NOT | Logical negation | +| 14 | NEG | Bit negation | +| 15 | NOT | Logical negation | ## Integer immediate binary ops. - Type BBD @@ -92,22 +94,22 @@ | Opcode | Name | Action | |:------:|:----:|:-----------------------:| -| 15 | ADDI | Wrapping addition | -| 16 | MULI | Wrapping subtraction | -| 17 | ANDI | Bitand | -| 18 | ORI | Bitor | -| 19 | XORI | Bitxor | -| 20 | SLI | Unsigned left bitshift | -| 21 | SRI | Unsigned right bitshift | -| 22 | SRSI | Signed right bitshift | +| 16 | ADDI | Wrapping addition | +| 17 | MULI | Wrapping subtraction | +| 18 | ANDI | Bitand | +| 19 | ORI | Bitor | +| 20 | XORI | Bitxor | +| 21 | SLI | Unsigned left bitshift | +| 22 | SRI | Unsigned right bitshift | +| 23 | SRSI | Signed right bitshift | ### Comparsion - Comparsion is the same as when RRR type | Opcode | Name | Action | |:------:|:-----:|:-------------------:| -| 23 | CMPI | Signed comparsion | -| 24 | CMPUI | Unsigned comparsion | +| 24 | CMPI | Signed comparsion | +| 25 | CMPUI | Unsigned comparsion | ## Register value set / copy @@ -117,7 +119,7 @@ | Opcode | Name | Action | |:------:|:----:|:------:| -| 25 | CP | Copy | +| 26 | CP | Copy | ### Swap - Type BB @@ -125,7 +127,7 @@ | Opcode | Name | Action | |:------:|:----:|:------:| -| 26 | SWA | Swap | +| 27 | SWA | Swap | ### Load immediate - Type BD @@ -133,7 +135,7 @@ | Opcode | Name | Action | |:------:|:----:|:--------------:| -| 27 | LI | Load immediate | +| 28 | LI | Load immediate | ## Memory operations - Type BBDH @@ -142,8 +144,8 @@ ### Load / Store | Opcode | Name | Action | |:------:|:----:|:---------------------------------------:| -| 28 | LD | `#0 ← [#1 + imm #3], copy imm #4 bytes` | -| 29 | ST | `[#1 + imm #3] ← #0, copy imm #4 bytes` | +| 29 | LD | `#0 ← [#1 + imm #3], copy imm #4 bytes` | +| 30 | ST | `[#1 + imm #3] ← #0, copy imm #4 bytes` | ## Block copy - Block copy source and target can overlap @@ -153,7 +155,7 @@ | Opcode | Name | Action | |:------:|:----:|:--------------------------------:| -| 30 | BMC | `[#1] ← [#0], copy imm #2 bytes` | +| 31 | BMC | `[#1] ← [#0], copy imm #2 bytes` | ### Register copy - Type BBB @@ -161,7 +163,7 @@ | Opcode | Name | Action | |:------:|:----:|:--------------------------------:| -| 31 | BRC | `#1 ← #0, copy imm #2 registers` | +| 32 | BRC | `#1 ← #0, copy imm #2 registers` | ## Control flow @@ -170,7 +172,7 @@ | Opcode | Name | Action | |:------:|:----:|:-------------------------------------------------:| -| 32 | JAL | Save current PC to `#0` and jump at `#1 + imm #2` | +| 33 | JAL | Save current PC to `#0` and jump at `#1 + imm #2` | ### Conditional jumps - Type BBD @@ -178,19 +180,19 @@ | Opcode | Name | Comparsion | |:------:|:----:|:------------:| -| 33 | JEQ | = | -| 34 | JNE | ≠ | -| 35 | JLT | < (signed) | -| 36 | JGT | > (signed) | -| 37 | JLTU | < (unsigned) | -| 38 | JGTU | > (unsigned) | +| 34 | JEQ | = | +| 35 | JNE | ≠ | +| 36 | JLT | < (signed) | +| 37 | JGT | > (signed) | +| 38 | JLTU | < (unsigned) | +| 39 | JGTU | > (unsigned) | ### Environment call - Type N | Opcode | Name | Action | |:------:|:-----:|:-------------------------------------:| -| 39 | ECALL | Cause an trap to the host environment | +| 40 | ECALL | Cause an trap to the host environment | ## Floating point operations - Type BBB @@ -198,29 +200,29 @@ | Opcode | Name | Action | |:------:|:----:|:--------------:| -| 40 | ADDF | Addition | -| 41 | SUBF | Subtraction | -| 42 | MULF | Multiplication | +| 41 | ADDF | Addition | +| 42 | SUBF | Subtraction | +| 43 | MULF | Multiplication | ### Division-remainder - Type BBBB | Opcode | Name | Action | |:------:|:----:|:-------------------------:| -| 43 | DIRF | Same as for integer `DIR` | +| 44 | DIRF | Same as for integer `DIR` | ### Fused Multiply-Add - Type BBBB | Opcode | Name | Action | |:------:|:----:|:---------------------:| -| 44 | FMAF | `#0 ← (#1 * #2) + #3` | +| 45 | FMAF | `#0 ← (#1 * #2) + #3` | ### Negation - Type BB | Opcode | Name | Action | |:------:|:----:|:----------:| -| 45 | NEGF | `#0 ← -#1` | +| 46 | NEGF | `#0 ← -#1` | ### Conversion - Type BB @@ -229,8 +231,8 @@ | Opcode | Name | Action | |:------:|:----:|:------------:| -| 46 | ITF | Int to Float | -| 47 | FTI | Float to Int | +| 47 | ITF | Int to Float | +| 48 | FTI | Float to Int | ## Floating point immediate operations - Type BBD @@ -238,8 +240,8 @@ | Opcode | Name | Action | |:------:|:-----:|:--------------:| -| 48 | ADDFI | Addition | -| 49 | MULFI | Multiplication | +| 49 | ADDFI | Addition | +| 50 | MULFI | Multiplication | # Registers - There is 255 registers + one zero register (with index 0)