From 74f98f610c83588111e28666365a301744b2b664 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 25 Jul 2023 23:43:06 +0200 Subject: [PATCH] Valider is now generated from macro (not done yet) --- hbasm/src/lib.rs | 20 +---- hbasm/src/macros/mod.rs | 16 ++-- hbbytecode/src/gen_valider.rs | 158 ++++++++++++++++++++++++++++++++++ hbbytecode/src/lib.rs | 30 +++++++ hbvm/src/lib.rs | 1 - hbvm/src/main.rs | 3 +- hbvm/src/validate.rs | 98 --------------------- hbvm/src/vm/mod.rs | 10 ++- 8 files changed, 205 insertions(+), 131 deletions(-) create mode 100644 hbbytecode/src/gen_valider.rs delete mode 100644 hbvm/src/validate.rs diff --git a/hbasm/src/lib.rs b/hbasm/src/lib.rs index 62d13717..11b1937d 100644 --- a/hbasm/src/lib.rs +++ b/hbasm/src/lib.rs @@ -19,25 +19,7 @@ pub struct Assembler { // Implement both assembler and generate module for text-code-based one -macros::impl_both!( - bbbb(p0: R, p1: R, p2: R, p3: R) - => [DIR, DIRF, FMAF], - bbb(p0: R, p1: R, p2: R) - => [ADD, SUB, MUL, AND, OR, XOR, SL, SR, SRS, CMP, CMPU, /*BRC,*/ ADDF, SUBF, MULF], - bbdh(p0: R, p1: R, p2: I, p3: L) - => [LD, ST], - bbd(p0: R, p1: R, p2: I) - => [ADDI, MULI, ANDI, ORI, XORI, CMPI, CMPUI, BMC, JAL, JEQ, JNE, JLT, JGT, JLTU, - JGTU, ADDFI, MULFI], - bbw(p0: R, p1: R, p2: u32) - => [SLI, SRI, SRSI], - bb(p0: R, p1: R) - => [NEG, NOT, CP, SWA, NEGF, ITF, FTI], - bd(p0: R, p1: I) - => [LI], - n() - => [UN, NOP, ECALL], -); +hbbytecode::invoke_with_def!(macros::impl_all); impl Assembler { // Special-cased for text-assembler diff --git a/hbasm/src/macros/mod.rs b/hbasm/src/macros/mod.rs index 55d770b7..db4ca008 100644 --- a/hbasm/src/macros/mod.rs +++ b/hbasm/src/macros/mod.rs @@ -1,5 +1,5 @@ //! And here the land of macros begin. -//! +//! //! They do not bite, really. Have you seen what Yandros is writing? pub mod asm; @@ -8,16 +8,16 @@ pub mod text; #[allow(rustdoc::invalid_rust_codeblocks)] /// Generate code for both programmatic-interface assembler and /// textural interface. -/// +/// /// Some people claim: /// > Write programs to handle text streams, because that is a universal interface. -/// +/// /// We at AbleCorp believe that nice programatic API is nicer than piping some text /// into a program. It's less error-prone and faster. -/// +/// /// # Syntax /// ```no_run -/// impl_both!( +/// impl_all!( /// INSTRUCTION_TYPE(p0: TYPE, p1: TYPE, …) /// => [INSTRUCTION_A, INSTRUCTION_B, …], /// … @@ -30,7 +30,7 @@ pub mod text; /// - R: Register (u8) /// - I: Immediate (implements [`crate::Imm`] trait) /// - Other types are identity-mapped -/// +/// /// # Text assembler /// Text assembler generated simply calls methods in the [`crate::Assembler`] type. /// # Syntax @@ -45,7 +45,7 @@ pub mod text; /// - Labels are defined by their names followed by colon `label:` /// - Labels are referenced simply by their names /// - Immediates are numbers, can be negative, floats are not yet supported -macro_rules! impl_both { +macro_rules! impl_all { ($($tt:tt)*) => { impl Assembler { $crate::macros::asm::impl_asm!($($tt)*); @@ -55,4 +55,4 @@ macro_rules! impl_both { }; } -pub(crate) use impl_both; +pub(crate) use impl_all; diff --git a/hbbytecode/src/gen_valider.rs b/hbbytecode/src/gen_valider.rs new file mode 100644 index 00000000..42e053f5 --- /dev/null +++ b/hbbytecode/src/gen_valider.rs @@ -0,0 +1,158 @@ +//! Generate HoleyBytes code validator + +macro_rules! gen_valider { + ( + $( + $ityn:ident + ($($param_i:ident: $param_ty:ident),* $(,)?) + => [$($opcode:ident),* $(,)?], + )* + ) => { + #[allow(unreachable_code)] + pub mod valider { + //! Validate if program is sound to execute + + /// Program validation error kind + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub enum ErrorKind { + /// Unknown opcode + InvalidInstruction, + /// VM doesn't implement this valid opcode + Unimplemented, + /// Attempted to copy over register boundary + RegisterArrayOverflow, + /// Program is not validly terminated + InvalidEnd, + } + + /// Error + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct Error { + /// Kind + pub kind: ErrorKind, + /// Location in bytecode + pub index: usize, + } + + /// Perform bytecode validation. If it passes, the program should be + /// sound to execute. + pub fn validate(mut program: &[u8]) -> Result<(), Error> { + // Program has to end with 12 zeroes, if there is less than + // 12 bytes, program is invalid. + if program.len() < 12 { + return Err(Error { + kind: ErrorKind::InvalidEnd, + index: 0, + }); + } + + // Verify that program ends with 12 zeroes + for (index, item) in program.iter().enumerate().skip(program.len() - 12) { + if *item != 0 { + return Err(Error { + kind: ErrorKind::InvalidEnd, + index, + }); + } + } + + let start = program; + loop { + use crate::opcode::*; + program = match program { + // End of program + [] => return Ok(()), + + // Memory load/store cannot go out-of-bounds register array + [LD..=ST, reg, _, _, _, _, _, _, _, _, count_0, count_1, ..] + if usize::from(*reg) * 8 + + usize::from(u16::from_le_bytes([*count_1, *count_0])) + > 2048 => + { + return Err(Error { + kind: ErrorKind::RegisterArrayOverflow, + index: (program.as_ptr() as usize) - (start.as_ptr() as usize), + }); + } + + // Block register copy cannot go out-of-bounds register array + [BRC, src, dst, count, ..] + if src.checked_add(*count).is_none() + || dst.checked_add(*count).is_none() => + { + return Err(Error { + kind: ErrorKind::RegisterArrayOverflow, + index: (program.as_ptr() as usize) - (start.as_ptr() as usize), + }); + } + + $( + $crate::gen_valider::inst_chk!( + rest, $ityn, $($opcode),* + ) + )|* => rest, + + // The plebs + _ => { + return Err(Error { + kind: ErrorKind::InvalidInstruction, + index: (program.as_ptr() as usize) - (start.as_ptr() as usize), + }) + } + } + } + } + } + }; +} + +/// Generate instruction check pattern +macro_rules! inst_chk { + // Sadly this has hardcoded instruction types, + // as I cannot generate parts of patterns+ + + ($rest:ident, bbbb, $($opcode:ident),*) => { + // B B B B + [$($opcode)|*, _, _, _, _, $rest @ ..] + }; + + ($rest:ident, bbb, $($opcode:ident),*) => { + // B B B + [$($opcode)|*, _, _, _, $rest @ ..] + }; + + ($rest:ident, bbdh, $($opcode:ident),*) => { + // B B D1 D2 D3 D4 D5 D6 D7 D8 H1 H2 + [$($opcode)|*, _, _, _, _, _, _, _, _, _, _, _, _, $rest @ ..] + }; + + ($rest:ident, bbd, $($opcode:ident),*) => { + // B B D1 D2 D3 D4 D5 D6 D7 D8 + [$($opcode)|*, _, _, _, _, _, _, _, _, _, _, $rest @ ..] + }; + + ($rest:ident, bbw, $($opcode:ident),*) => { + // B B W1 W2 W3 W4 + [$($opcode)|*, _, _, _, _, _, _, $rest @ ..] + }; + + ($rest:ident, bb, $($opcode:ident),*) => { + // B B + [$($opcode)|*, _, _, $rest @ ..] + }; + + ($rest:ident, bd, $($opcode:ident),*) => { + // B D1 D2 D3 D4 D5 D6 D7 D8 + [$($opcode)|*, _, _, _, _, _, _, _, _, _, $rest @ ..] + }; + + ($rest:ident, n, $($opcode:ident),*) => { + [$($opcode)|*, $rest @ ..] + }; + + ($_0:ident, $($_1:ident),*) => { + compile_error!("Invalid instruction type"); + } +} + +pub(crate) use {gen_valider, inst_chk}; diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index 1cf0b21d..08cb345a 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -1,5 +1,7 @@ #![no_std] +mod gen_valider; + macro_rules! constmod { ($vis:vis $mname:ident($repr:ty) { $(#![doc = $mdoc:literal])? @@ -15,6 +17,34 @@ macro_rules! constmod { }; } +/// Invoke macro with bytecode definition format +#[macro_export] +macro_rules! invoke_with_def { + ($macro:path) => { + $macro!( + bbbb(p0: R, p1: R, p2: R, p3: R) + => [DIR, DIRF, FMAF], + bbb(p0: R, p1: R, p2: R) + => [ADD, SUB, MUL, AND, OR, XOR, SL, SR, SRS, CMP, CMPU, /*BRC,*/ ADDF, SUBF, MULF], + bbdh(p0: R, p1: R, p2: I, p3: L) + => [LD, ST], + bbd(p0: R, p1: R, p2: I) + => [ADDI, MULI, ANDI, ORI, XORI, CMPI, CMPUI, BMC, JAL, JEQ, JNE, JLT, JGT, JLTU, + JGTU, ADDFI, MULFI], + bbw(p0: R, p1: R, p2: u32) + => [SLI, SRI, SRSI], + bb(p0: R, p1: R) + => [NEG, NOT, CP, SWA, NEGF, ITF, FTI], + bd(p0: R, p1: I) + => [LI], + n() + => [UN, NOP, ECALL], + ); + }; +} + +invoke_with_def!(gen_valider::gen_valider); + constmod!(pub opcode(u8) { //! Opcode constant module diff --git a/hbvm/src/lib.rs b/hbvm/src/lib.rs index 82559847..1feaa738 100644 --- a/hbvm/src/lib.rs +++ b/hbvm/src/lib.rs @@ -3,5 +3,4 @@ extern crate alloc; -pub mod validate; pub mod vm; diff --git a/hbvm/src/main.rs b/hbvm/src/main.rs index 28ec8ced..b9d7a241 100644 --- a/hbvm/src/main.rs +++ b/hbvm/src/main.rs @@ -1,7 +1,8 @@ use hbvm::vm::mem::{HandlePageFault, Memory, MemoryAccessReason, PageSize}; use { - hbvm::{validate::validate, vm::Vm}, + hbbytecode::valider::validate, + hbvm::vm::Vm, std::io::{stdin, Read}, }; diff --git a/hbvm/src/validate.rs b/hbvm/src/validate.rs deleted file mode 100644 index 3bc7d81d..00000000 --- a/hbvm/src/validate.rs +++ /dev/null @@ -1,98 +0,0 @@ -//! Validate if program is sound to execute - -/// Program validation error kind -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum ErrorKind { - /// Unknown opcode - InvalidInstruction, - /// VM doesn't implement this valid opcode - Unimplemented, - /// Attempted to copy over register boundary - RegisterArrayOverflow, - /// Program is not validly terminated - InvalidEnd, -} - -/// Error -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct Error { - /// Kind - pub kind: ErrorKind, - /// Location in bytecode - pub index: usize, -} - -/// Perform bytecode validation. If it passes, the program should be -/// sound to execute. -pub fn validate(mut program: &[u8]) -> Result<(), Error> { - // Program has to end with 12 zeroes, if there is less than - // 12 bytes, program is invalid. - if program.len() < 12 { - return Err(Error { - kind: ErrorKind::InvalidEnd, - index: 0, - }); - } - - // Verify that program ends with 12 zeroes - for (index, item) in program.iter().enumerate().skip(program.len() - 12) { - if *item != 0 { - return Err(Error { - kind: ErrorKind::InvalidEnd, - index, - }); - } - } - - let start = program; - loop { - use hbbytecode::opcode::*; - // Match on instruction types and perform necessary checks - program = match program { - // End of program - [] => return Ok(()), - - // Memory load/store cannot go out-of-bounds register array - [LD..=ST, reg, _, _, _, _, _, _, _, _, count_0, count_1, ..] - if usize::from(*reg) * 8 - + usize::from(u16::from_le_bytes([*count_1, *count_0])) - > 2048 => - { - return Err(Error { - kind: ErrorKind::RegisterArrayOverflow, - index: (program.as_ptr() as usize) - (start.as_ptr() as usize), - }) - } - - // Block register copy cannot go out-of-bounds register array - [BRC, src, dst, count, ..] - if src.checked_add(*count).is_none() || dst.checked_add(*count).is_none() => - { - return Err(Error { - kind: ErrorKind::RegisterArrayOverflow, - index: (program.as_ptr() as usize) - (start.as_ptr() as usize), - }) - } - - // Valid instructions - [DIR | DIRF | FMAF, _, _, _, _, rest @ ..] // BBBB - | [ADD | SUB | MUL | AND | OR | XOR | SL | SR | SRS | CMP | CMPU | BRC | ADDF | SUBF | MULF, _, _, _, rest @ ..] - | [LD | ST, _, _, _, _, _, _, _, _, _, _, _, rest @ ..] // BBDH - | [ - ADDI | MULI | ANDI | ORI | XORI | CMPI | CMPUI | BMC | JAL | JEQ | JNE | JLT | JGT | JLTU | JGTU | ADDFI | MULFI, _, _, _, _, _, _, _, _, _, _, rest @ ..] // BBD - | [SLI | SRI | SRSI, _, _, _, _, _, _, rest @ ..] // BBW - | [NEG | NOT | CP | SWA | NEGF | ITF | FTI, _, _, rest @ ..] // BB - | [LI, _, _, _, _, _, _, _, _, _, rest @ ..] // BD - | [UN | NOP | ECALL, rest @ ..] // N - => rest, - - // The rest - _ => { - return Err(Error { - kind: ErrorKind::InvalidInstruction, - index: (program.as_ptr() as usize) - (start.as_ptr() as usize), - }) - } - } - } -} diff --git a/hbvm/src/vm/mod.rs b/hbvm/src/vm/mod.rs index 797d396f..1da55c95 100644 --- a/hbvm/src/vm/mod.rs +++ b/hbvm/src/vm/mod.rs @@ -12,9 +12,10 @@ pub mod value; use { self::{mem::HandlePageFault, value::ValueVariant}, - crate::validate, core::{cmp::Ordering, ops}, - hbbytecode::{OpParam, ParamBB, ParamBBB, ParamBBBB, ParamBBD, ParamBBDH, ParamBBW, ParamBD}, + hbbytecode::{ + valider, OpParam, ParamBB, ParamBBB, ParamBBBB, ParamBBD, ParamBBDH, ParamBBW, ParamBD, + }, mem::Memory, value::Value, }; @@ -66,8 +67,8 @@ impl<'a, PfHandler: HandlePageFault, const TIMER_QUOTIENT: usize> } /// Create a new VM with program and trap handler only if it passes validation - pub fn new_validated(program: &'a [u8], traph: PfHandler) -> Result { - validate::validate(program)?; + pub fn new_validated(program: &'a [u8], traph: PfHandler) -> Result { + valider::validate(program)?; Ok(unsafe { Self::new_unchecked(program, traph) }) } @@ -249,6 +250,7 @@ impl<'a, PfHandler: HandlePageFault, const TIMER_QUOTIENT: usize> BRC => { // Block register copy let ParamBBB(src, dst, count) = self.decode(); + extern crate std; core::ptr::copy( self.registers.get_unchecked(usize::from(src)), self.registers.get_unchecked_mut(usize::from(dst)),