More comments

This commit is contained in:
Erin 2023-07-22 02:26:03 +02:00 committed by ondra05
parent 29084d7e55
commit 89c08a8602
7 changed files with 290 additions and 65 deletions

View file

@ -6,12 +6,19 @@ mod macros;
use {alloc::vec::Vec, hashbrown::HashSet}; use {alloc::vec::Vec, hashbrown::HashSet};
/// Assembler
///
/// - Opcode-generic, instruction-type-specific methods are named `i_param_<type>`
/// - You likely won't need to use them, but they are here, just in case :)
/// - Instruction-specific methods are named `i_<instruction>`
#[derive(Default)] #[derive(Default)]
pub struct Assembler { pub struct Assembler {
pub buf: Vec<u8>, pub buf: Vec<u8>,
pub sub: HashSet<usize>, pub sub: HashSet<usize>,
} }
// Implement both assembler and generate module for text-code-based one
macros::impl_both!( macros::impl_both!(
bbbb(p0: R, p1: R, p2: R, p3: R) bbbb(p0: R, p1: R, p2: R, p3: R)
=> [DIR, DIRF, FMAF], => [DIR, DIRF, FMAF],
@ -31,7 +38,9 @@ macros::impl_both!(
); );
impl Assembler { impl Assembler {
// Special-cased // Special-cased for text-assembler
//
// `p2` is not a register, but the instruction is still BBB
#[inline(always)] #[inline(always)]
pub fn i_brc(&mut self, p0: u8, p1: u8, p2: u8) { pub fn i_brc(&mut self, p0: u8, p1: u8, p2: u8) {
self.i_param_bbb(hbbytecode::opcode::BRC, p0, p1, p2) self.i_param_bbb(hbbytecode::opcode::BRC, p0, p1, p2)
@ -39,20 +48,49 @@ impl Assembler {
/// Append 12 zeroes (UN) at the end /// Append 12 zeroes (UN) at the end
pub fn finalise(&mut self) { pub fn finalise(&mut self) {
// HBVM lore:
//
// In reference HBVM implementation checks are done in
// a separate phase before execution.
//
// This way execution will be much faster as they have to
// be done only once.
//
// There was an issue. You cannot statically check register values and
// `JAL` instruction could hop at the end of program to some byte, which
// will be interpreted as opcode and VM in attempt to decode the instruction
// performed out-of-bounds read which leads to undefined behaviour.
//
// Several options were considered to overcome this, but inserting some data at
// program's end which when executed would lead to undesired behaviour, though
// not undefined behaviour.
//
// Newly created `UN` (as UNreachable) was chosen as
// - It was a good idea to add some equivalent to `ud2` anyways
// - Its zeroes
// - What if you somehow reached that code, it will appropriately bail :)
self.buf.extend([0; 12]); self.buf.extend([0; 12]);
} }
} }
/// Immediate value
///
/// # Implementor notice
/// It should insert exactly 8 bytes, otherwise output will be malformed.
/// This is not checked in any way
pub trait Imm { pub trait Imm {
/// Insert immediate value
fn insert(&self, asm: &mut Assembler); fn insert(&self, asm: &mut Assembler);
} }
/// Implement immediate values
macro_rules! impl_imm_le_bytes { macro_rules! impl_imm_le_bytes {
($($ty:ty),* $(,)?) => { ($($ty:ty),* $(,)?) => {
$( $(
impl Imm for $ty { impl Imm for $ty {
#[inline(always)] #[inline(always)]
fn insert(&self, asm: &mut Assembler) { fn insert(&self, asm: &mut Assembler) {
// Convert to little-endian bytes, insert.
asm.buf.extend(self.to_le_bytes()); asm.buf.extend(self.to_le_bytes());
} }
} }

View file

@ -1,5 +1,9 @@
//! Macros to generate [`crate::Assembler`]
/// Incremental token-tree muncher to implement specific instruction
/// functions based on generic function for instruction type
macro_rules! impl_asm_opcodes { macro_rules! impl_asm_opcodes {
( ( // End case
$generic:ident $generic:ident
($($param_i:ident: $param_ty:ty),*) ($($param_i:ident: $param_ty:ty),*)
=> [] => []
@ -10,6 +14,7 @@ macro_rules! impl_asm_opcodes {
($($param_i:ident: $param_ty:ty),*) ($($param_i:ident: $param_ty:ty),*)
=> [$opcode:ident, $($rest:tt)*] => [$opcode:ident, $($rest:tt)*]
) => { ) => {
// Instruction-specific function
paste::paste! { paste::paste! {
#[inline(always)] #[inline(always)]
pub fn [<i_ $opcode:lower>](&mut self, $($param_i: $param_ty),*) { pub fn [<i_ $opcode:lower>](&mut self, $($param_i: $param_ty),*) {
@ -17,6 +22,7 @@ macro_rules! impl_asm_opcodes {
} }
} }
// And recurse!
macros::asm::impl_asm_opcodes!( macros::asm::impl_asm_opcodes!(
$generic($($param_i: $param_ty),*) $generic($($param_i: $param_ty),*)
=> [$($rest)*] => [$($rest)*]
@ -24,16 +30,21 @@ macro_rules! impl_asm_opcodes {
}; };
} }
/// Numeric value insert
macro_rules! impl_asm_insert { macro_rules! impl_asm_insert {
// Immediate - this is trait-based,
// the insertion is delegated to its implementation
($self:expr, $id:ident, I) => { ($self:expr, $id:ident, I) => {
Imm::insert(&$id, $self) Imm::insert(&$id, $self)
}; };
// Other numbers, just insert their bytes, little endian
($self:expr, $id:ident, $_:ident) => { ($self:expr, $id:ident, $_:ident) => {
$self.buf.extend($id.to_le_bytes()) $self.buf.extend($id.to_le_bytes())
}; };
} }
/// Implement assembler
macro_rules! impl_asm { macro_rules! impl_asm {
( (
$( $(
@ -44,11 +55,13 @@ macro_rules! impl_asm {
) => { ) => {
paste::paste! { paste::paste! {
$( $(
fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: macros::asm::ident_map_ty!($param_ty)),*) { // Opcode-generic functions specific for instruction types
pub fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: macros::asm::ident_map_ty!($param_ty)),*) {
self.buf.push(opcode); self.buf.push(opcode);
$(macros::asm::impl_asm_insert!(self, $param_i, $param_ty);)* $(macros::asm::impl_asm_insert!(self, $param_i, $param_ty);)*
} }
// Generate opcode-specific functions calling the opcode-generic ones
macros::asm::impl_asm_opcodes!( macros::asm::impl_asm_opcodes!(
[<i_param_ $ityn>]($($param_i: macros::asm::ident_map_ty!($param_ty)),*) [<i_param_ $ityn>]($($param_i: macros::asm::ident_map_ty!($param_ty)),*)
=> [$($opcode,)*] => [$($opcode,)*]
@ -58,14 +71,12 @@ macro_rules! impl_asm {
}; };
} }
/// Map operand type to Rust type
#[rustfmt::skip] #[rustfmt::skip]
macro_rules! ident_map_ty { macro_rules! ident_map_ty {
(R) => { u8 }; (R) => { u8 }; // Register is just u8
(I) => { impl Imm }; (I) => { impl Imm }; // Immediate is anything implementing the trait
($id:ident) => { $id }; ($id:ident) => { $id }; // Anything else → identity map
} }
pub(crate) use {ident_map_ty, impl_asm, impl_asm_opcodes}; pub(crate) use {ident_map_ty, impl_asm, impl_asm_insert, impl_asm_opcodes};
#[allow(clippy::single_component_path_imports)]
pub(crate) use impl_asm_insert;

View file

@ -1,6 +1,50 @@
//! And here the land of macros begin.
//!
//! They do not bite, really. Have you seen what Yandros is writing?
pub mod asm; pub mod asm;
pub mod text; pub mod text;
#[allow(rustdoc::invalid_rust_codeblocks)]
/// Generate code for both programmatic-interface assembler and
/// textural interface.
///
/// Some people claim:
/// > Write programs to handle text streams, because that is a universal interface.
///
/// We at AbleCorp believe that nice programatic API is nicer than piping some text
/// into a program. It's less error-prone and faster.
///
/// # Syntax
/// ```no_run
/// impl_both!(
/// INSTRUCTION_TYPE(p0: TYPE, p1: TYPE, …)
/// => [INSTRUCTION_A, INSTRUCTION_B, …],
/// …
/// );
/// ```
/// - Instruction type determines opcode-generic, instruction-type-specific
/// function. Name: `i_param_INSTRUCTION_TYPE`
/// - Per-instructions there will be generated opcode-specific functions calling the generic ones
/// - Operand types
/// - R: Register (u8)
/// - I: Immediate (implements [`crate::Imm`] trait)
/// - Other types are identity-mapped
///
/// # Text assembler
/// Text assembler generated simply calls methods in the [`crate::Assembler`] type.
/// # Syntax
/// ```text
/// instruction op1, op2, …
/// …
/// ```
/// - Opcode names are lowercase
/// - Registers are prefixed with `r` followed by number
/// - Operands are separated by `,`
/// - Instructions are separated by either line feed or `;` (αυτό δεν είναι ερωτηματικό!)
/// - Labels are defined by their names followed by colon `label:`
/// - Labels are referenced simply by their names
/// - Immediates are numbers, can be negative, floats are not yet supported
macro_rules! impl_both { macro_rules! impl_both {
($($tt:tt)*) => { ($($tt:tt)*) => {
impl Assembler { impl Assembler {

View file

@ -1,3 +1,7 @@
//! Macros to generate text-code assembler at [`crate::text`]
// Refering in module which generates a module to that module — is that even legal? :D
/// Generate text code based assembler
macro_rules! gen_text { macro_rules! gen_text {
( (
$( $(
@ -6,6 +10,7 @@ macro_rules! gen_text {
=> [$($opcode:ident),* $(,)?], => [$($opcode:ident),* $(,)?],
)* )*
) => { ) => {
/// Text code based assembler
pub mod text { pub mod text {
use { use {
crate::{ crate::{
@ -18,6 +23,7 @@ macro_rules! gen_text {
}; };
paste::paste!(literify::literify! { paste::paste!(literify::literify! {
/// Assembly token
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(extras = Rodeo)] #[logos(extras = Rodeo)]
#[logos(skip r"[ \t\t]+")] #[logos(skip r"[ \t\t]+")]
@ -59,6 +65,7 @@ macro_rules! gen_text {
} }
}); });
/// Type of error
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind { pub enum ErrorKind {
UnexpectedToken, UnexpectedToken,
@ -67,12 +74,14 @@ macro_rules! gen_text {
InvalidSymbol, InvalidSymbol,
} }
/// Text assembly error
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error { pub struct Error {
pub kind: ErrorKind, pub kind: ErrorKind,
pub span: Span, pub span: Span,
} }
/// Parse code and insert instructions
pub fn assemble(asm: &mut Assembler, code: &str) -> Result<(), Error> { pub fn assemble(asm: &mut Assembler, code: &str) -> Result<(), Error> {
pub struct TextAsm<'a> { pub struct TextAsm<'a> {
asm: &'a mut Assembler, asm: &'a mut Assembler,
@ -93,8 +102,10 @@ macro_rules! gen_text {
fn run(&mut self) -> Result<(), ErrorKind> { fn run(&mut self) -> Result<(), ErrorKind> {
loop { loop {
match self.lexer.next() { match self.lexer.next() {
// Got an opcode
Some(Ok(Token::Opcode(op))) => { Some(Ok(Token::Opcode(op))) => {
match op { match op {
// Take all the opcodes and match them to their corresponding functions
$( $(
$(hbbytecode::opcode::$opcode)|* => paste::paste!({ $(hbbytecode::opcode::$opcode)|* => paste::paste!({
param_extract_itm!(self, $($param_i: $param_ty),*); param_extract_itm!(self, $($param_i: $param_ty),*);
@ -112,12 +123,16 @@ macro_rules! gen_text {
self.asm.i_param_bbb(op, p0, p1, p2); self.asm.i_param_bbb(op, p0, p1, p2);
} }
// Already matched in Logos, should not be able to obtain
// invalid opcode.
_ => unreachable!(), _ => unreachable!(),
} }
} }
// Insert label to table
Some(Ok(Token::Label(lbl))) => { Some(Ok(Token::Label(lbl))) => {
self.symloc.insert(lbl, self.asm.buf.len()); self.symloc.insert(lbl, self.asm.buf.len());
} }
// Instruction separator (LF, ;)
Some(Ok(Token::ISep)) => (), Some(Ok(Token::ISep)) => (),
Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken), Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken),
Some(Err(())) => return Err(ErrorKind::InvalidToken), Some(Err(())) => return Err(ErrorKind::InvalidToken),
@ -136,15 +151,20 @@ macro_rules! gen_text {
asm.run() asm.run()
.map_err(|kind| Error { kind, span: asm.lexer.span() })?; .map_err(|kind| Error { kind, span: asm.lexer.span() })?;
// Walk table and substitute labels
// for their addresses
for &loc in &asm.asm.sub { for &loc in &asm.asm.sub {
// Extract indices from the code and get addresses from table
let val = asm.symloc let val = asm.symloc
.get( .get(
&Spur::try_from_usize(bytemuck::pod_read_unaligned::<u64>(&asm.asm.buf[loc..loc+core::mem::size_of::<u64>()]) as _) &Spur::try_from_usize(bytemuck::pod_read_unaligned::<u64>(
.unwrap() &asm.asm.buf[loc..loc + core::mem::size_of::<u64>()]) as _
).unwrap()
) )
.ok_or(Error { kind: ErrorKind::InvalidSymbol, span: 0..0 })? .ok_or(Error { kind: ErrorKind::InvalidSymbol, span: 0..0 })?
.to_le_bytes(); .to_le_bytes();
// New address
asm.asm.buf[loc..] asm.asm.buf[loc..]
.iter_mut() .iter_mut()
.zip(val) .zip(val)
@ -154,6 +174,13 @@ macro_rules! gen_text {
Ok(()) Ok(())
} }
// Fun fact: this is a little hack
// It may slow the things a little bit down, but
// it made the macro to be made pretty nice.
//
// If you have any idea how to get rid of this,
// contributions are welcome :)
// I *likely* won't try anymore.
enum InternalImm { enum InternalImm {
Const(u64), Const(u64),
Named(Spur), Named(Spur),
@ -163,9 +190,14 @@ macro_rules! gen_text {
#[inline] #[inline]
fn insert(&self, asm: &mut Assembler) { fn insert(&self, asm: &mut Assembler) {
match self { match self {
// Constant immediate, just put it in
Self::Const(a) => a.insert(asm), Self::Const(a) => a.insert(asm),
// Label
Self::Named(a) => { Self::Named(a) => {
// Insert to the sub table that substitution will be
// requested
asm.sub.insert(asm.buf.len()); asm.sub.insert(asm.buf.len());
// Insert value from interner in place
asm.buf.extend((a.into_usize() as u64).to_le_bytes()); asm.buf.extend((a.into_usize() as u64).to_le_bytes());
}, },
} }
@ -175,42 +207,57 @@ macro_rules! gen_text {
}; };
} }
/// Extract item by pattern, otherwise return [`ErrorKind::UnexpectedToken`]
macro_rules! extract_pat { macro_rules! extract_pat {
($self:expr, $pat:pat) => { ($self:expr, $pat:pat) => {
let $pat = $self.next()? let $pat = $self.next()?
else { return Err(ErrorKind::UnexpectedToken) }; else { return Err(ErrorKind::UnexpectedToken) };
}; };
} }
/// Extract operand from code
macro_rules! extract { macro_rules! extract {
// Register (require prefixing with r)
($self:expr, R, $id:ident) => { ($self:expr, R, $id:ident) => {
extract_pat!($self, Token::Register($id)); extract_pat!($self, Token::Register($id));
}; };
// Immediate
($self:expr, I, $id:ident) => { ($self:expr, I, $id:ident) => {
let $id = match $self.next()? { let $id = match $self.next()? {
// Either straight up integer
Token::Integer(a) => InternalImm::Const(a), Token::Integer(a) => InternalImm::Const(a),
// …or a label
Token::Symbol(a) => InternalImm::Named(a), Token::Symbol(a) => InternalImm::Named(a),
_ => return Err(ErrorKind::UnexpectedToken), _ => return Err(ErrorKind::UnexpectedToken),
}; };
}; };
// Get u8, if not fitting, the token is claimed invalid
($self:expr, u8, $id:ident) => { ($self:expr, u8, $id:ident) => {
extract_pat!($self, Token::Integer($id)); extract_pat!($self, Token::Integer($id));
let $id = u8::try_from($id).map_err(|_| ErrorKind::InvalidToken)?; let $id = u8::try_from($id).map_err(|_| ErrorKind::InvalidToken)?;
}; };
// Get u16, if not fitting, the token is claimed invalid
($self:expr, u16, $id:ident) => { ($self:expr, u16, $id:ident) => {
extract_pat!($self, Token::Integer($id)); extract_pat!($self, Token::Integer($id));
let $id = u16::try_from($id).map_err(|_| ErrorKind::InvalidToken)?; let $id = u16::try_from($id).map_err(|_| ErrorKind::InvalidToken)?;
}; };
} }
/// Parameter extract incremental token-tree muncher
///
/// What else would it mean?
macro_rules! param_extract_itm { macro_rules! param_extract_itm {
($self:expr, $($id:ident: $ty:ident)? $(, $($tt:tt)*)?) => { ($self:expr, $($id:ident: $ty:ident)? $(, $($tt:tt)*)?) => {
// Extract pattern
$(extract!($self, $ty, $id);)? $(extract!($self, $ty, $id);)?
$( $(
// Require operand separator
extract_pat!($self, Token::PSep); extract_pat!($self, Token::PSep);
// And go to the next (recursive)
// …munch munch… yummy token trees.
param_extract_itm!($self, $($tt)*); param_extract_itm!($self, $($tt)*);
)? )?
}; };

View file

@ -39,7 +39,9 @@ impl Memory {
/// Maps host's memory into VM's memory /// Maps host's memory into VM's memory
/// ///
/// # Safety /// # Safety
/// Who knows. /// - Your faith in the gods of UB
/// - Addr-san claims it's fine but who knows is she isn't lying :ferrisSus:
/// - Alright, Miri-sama is also fine with this, who knows why
pub unsafe fn map( pub unsafe fn map(
&mut self, &mut self,
host: *mut u8, host: *mut u8,
@ -49,13 +51,14 @@ impl Memory {
) -> Result<(), MapError> { ) -> Result<(), MapError> {
let mut current_pt = self.root_pt; let mut current_pt = self.root_pt;
// Decide on what level depth are we going
let lookup_depth = match pagesize { let lookup_depth = match pagesize {
PageSize::Size4K => 4, PageSize::Size4K => 4,
PageSize::Size2M => 3, PageSize::Size2M => 3,
PageSize::Size1G => 2, PageSize::Size1G => 2,
}; };
// Lookup pagetable above // Walk pagetable levels
for lvl in (0..lookup_depth).rev() { for lvl in (0..lookup_depth).rev() {
let entry = (*current_pt) let entry = (*current_pt)
.table .table
@ -63,8 +66,12 @@ impl Memory {
let ptr = entry.ptr(); let ptr = entry.ptr();
match entry.permission() { match entry.permission() {
// Still not on target and already seeing empty entry?
// No worries! Let's create one (allocates).
Permission::Empty => { Permission::Empty => {
// Increase children count
(*current_pt).childen += 1; (*current_pt).childen += 1;
let table = Box::into_raw(Box::new(paging::PtPointedData { let table = Box::into_raw(Box::new(paging::PtPointedData {
pt: PageTable::default(), pt: PageTable::default(),
})); }));
@ -72,28 +79,39 @@ impl Memory {
core::ptr::write(entry, PtEntry::new(table, Permission::Node)); core::ptr::write(entry, PtEntry::new(table, Permission::Node));
current_pt = table as _; current_pt = table as _;
} }
// Continue walking
Permission::Node => current_pt = ptr as _, Permission::Node => current_pt = ptr as _,
_ => return Err(MapError::AlreadyMapped),
// There is some entry on place of node
_ => return Err(MapError::PageOnNode),
} }
} }
let node = (*current_pt)
.table
.get_unchecked_mut(addr_extract_index(target, 4 - lookup_depth));
// Check if node is not mapped
if node.permission() != Permission::Empty {
return Err(MapError::AlreadyMapped);
}
// Write entry // Write entry
(*current_pt).childen += 1; (*current_pt).childen += 1;
core::ptr::write( core::ptr::write(node, PtEntry::new(host.cast(), perm));
(*current_pt)
.table
.get_unchecked_mut(addr_extract_index(target, 4 - lookup_depth)),
PtEntry::new(host.cast(), perm),
);
Ok(()) Ok(())
} }
/// Unmaps pages from VM's memory /// Unmaps pages from VM's memory
///
/// If errors, it only means there is no entry to unmap and in most cases
/// just should be ignored.
pub fn unmap(&mut self, addr: u64) -> Result<(), NothingToUnmap> { pub fn unmap(&mut self, addr: u64) -> Result<(), NothingToUnmap> {
let mut current_pt = self.root_pt; let mut current_pt = self.root_pt;
let mut page_tables = [core::ptr::null_mut(); 5]; let mut page_tables = [core::ptr::null_mut(); 5];
// Walk page table in reverse
for lvl in (0..5).rev() { for lvl in (0..5).rev() {
let entry = unsafe { let entry = unsafe {
(*current_pt) (*current_pt)
@ -103,30 +121,42 @@ impl Memory {
let ptr = entry.ptr(); let ptr = entry.ptr();
match entry.permission() { match entry.permission() {
// Nothing is there, throw an error, not critical!
Permission::Empty => return Err(NothingToUnmap), Permission::Empty => return Err(NothingToUnmap),
// Node Save to visited pagetables and continue walking
Permission::Node => { Permission::Node => {
page_tables[lvl as usize] = entry; page_tables[lvl as usize] = entry;
current_pt = ptr as _ current_pt = ptr as _
} }
// Page entry zero it out!
// Zero page entry is completely valid entry with
// empty permission - no UB here!
_ => unsafe { _ => unsafe {
core::ptr::write(entry, Default::default()); core::ptr::write_bytes(entry, 0, 1);
}, },
} }
} }
// Now walk in order visited page tables
for entry in page_tables.into_iter() { for entry in page_tables.into_iter() {
// Level not visited, skip.
if entry.is_null() { if entry.is_null() {
continue; continue;
} }
unsafe { unsafe {
let children = &mut (*(*entry).ptr()).pt.childen; let children = &mut (*(*entry).ptr()).pt.childen;
*children -= 1;
if *children == 0 {
core::mem::drop(Box::from_raw((*entry).ptr() as *mut PageTable));
}
core::ptr::write(entry, Default::default()); // Decrease children count
*children -= 1;
// If there are no children, deallocate.
if *children == 0 {
let _ = Box::from_raw((*entry).ptr() as *mut PageTable);
// Zero visited entry
core::ptr::write_bytes(entry, 0, 1);
}
} }
} }
@ -149,12 +179,7 @@ impl Memory {
addr, addr,
target, target,
count, count,
|perm| { perm_check::readable,
matches!(
perm,
Permission::Readonly | Permission::Write | Permission::Exec
)
},
|src, dst, count| core::ptr::copy_nonoverlapping(src, dst, count), |src, dst, count| core::ptr::copy_nonoverlapping(src, dst, count),
traph, traph,
) )
@ -177,7 +202,7 @@ impl Memory {
addr, addr,
source.cast_mut(), source.cast_mut(),
count, count,
|perm| perm == Permission::Write, perm_check::writable,
|dst, src, count| core::ptr::copy_nonoverlapping(src, dst, count), |dst, src, count| core::ptr::copy_nonoverlapping(src, dst, count),
traph, traph,
) )
@ -188,8 +213,7 @@ impl Memory {
/// ///
/// # Safety /// # Safety
/// - Same as for [`Self::load`] and [`Self::store`] /// - Same as for [`Self::load`] and [`Self::store`]
/// - Your faith in the gods of UB /// - This function has been rewritten and is now pretty much boring
/// - Addr-san claims it's fine but who knows is she isn't lying :ferrisSus:
pub unsafe fn block_copy( pub unsafe fn block_copy(
&mut self, &mut self,
mut src: u64, mut src: u64,
@ -209,17 +233,13 @@ impl Memory {
count: usize, count: usize,
traph: &mut impl HandlePageFault, traph: &mut impl HandlePageFault,
) -> Result<(), BlkCopyError> { ) -> Result<(), BlkCopyError> {
// Load to buffer
self.memory_access( self.memory_access(
MemoryAccessReason::Load, MemoryAccessReason::Load,
src, src,
buf, buf,
STACK_BUFFER_SIZE, count,
|perm| { perm_check::readable,
matches!(
perm,
Permission::Readonly | Permission::Write | Permission::Exec
)
},
|src, dst, count| core::ptr::copy(src, dst, count), |src, dst, count| core::ptr::copy(src, dst, count),
traph, traph,
) )
@ -228,12 +248,13 @@ impl Memory {
addr, addr,
})?; })?;
// Store from buffer
self.memory_access( self.memory_access(
MemoryAccessReason::Store, MemoryAccessReason::Store,
dst, dst,
buf, buf,
count, count,
|perm| perm == Permission::Write, perm_check::writable,
|dst, src, count| core::ptr::copy(src, dst, count), |dst, src, count| core::ptr::copy(src, dst, count),
traph, traph,
) )
@ -246,24 +267,37 @@ impl Memory {
} }
} }
const STACK_BUFFER_SIZE: usize = 4096; // Buffer size (defaults to 4 KiB, a smallest page size on most platforms)
const BUF_SIZE: usize = 4096;
// Decide if to use stack-allocated buffer or to heap allocate // This should be equal to `BUF_SIZE`
// Deallocation is again decided on size at the end of the function #[repr(align(4096))]
let mut buf = MaybeUninit::<[u8; STACK_BUFFER_SIZE]>::uninit(); struct AlignedBuf([MaybeUninit<u8>; BUF_SIZE]);
let n_buffers = count / STACK_BUFFER_SIZE; // Safety: Assuming uninit of array of MaybeUninit is sound
let rem = count % STACK_BUFFER_SIZE; let mut buf = AlignedBuf(MaybeUninit::uninit().assume_init());
// Calculate how many times we need to copy buffer-sized blocks if any and the rest.
let n_buffers = count / BUF_SIZE;
let rem = count % BUF_SIZE;
// Copy buffer-sized blocks
for _ in 0..n_buffers { for _ in 0..n_buffers {
self.act(src, dst, buf.as_mut_ptr().cast(), STACK_BUFFER_SIZE, traph)?; self.act(src, dst, buf.0.as_mut_ptr().cast(), BUF_SIZE, traph)?;
src += STACK_BUFFER_SIZE as u64; src += BUF_SIZE as u64;
dst += STACK_BUFFER_SIZE as u64; dst += BUF_SIZE as u64;
} }
self.act(src, dst, buf.as_mut_ptr().cast(), rem, traph) // Copy the rest (if any)
if rem != 0 {
self.act(src, dst, buf.0.as_mut_ptr().cast(), rem, traph)?;
}
Ok(())
} }
// Everyone behold, the holy function, the god of HBVM memory accesses!
/// Split address to pages, check their permissions and feed pointers with offset /// Split address to pages, check their permissions and feed pointers with offset
/// to a specified function. /// to a specified function.
/// ///
@ -279,10 +313,11 @@ impl Memory {
action: fn(*mut u8, *mut u8, usize), action: fn(*mut u8, *mut u8, usize),
traph: &mut impl HandlePageFault, traph: &mut impl HandlePageFault,
) -> Result<(), u64> { ) -> Result<(), u64> {
// Create new splitter
let mut pspl = AddrPageLookuper::new(src, len, self.root_pt); let mut pspl = AddrPageLookuper::new(src, len, self.root_pt);
loop { loop {
match pspl.next() { match pspl.next() {
// Page found // Page is found
Some(Ok(AddrPageLookupOk { Some(Ok(AddrPageLookupOk {
vaddr, vaddr,
ptr, ptr,
@ -293,12 +328,13 @@ impl Memory {
return Err(vaddr); return Err(vaddr);
} }
// Perform memory action and bump dst pointer // Perform specified memory action and bump destination pointer
action(ptr, dst, size); action(ptr, dst, size);
dst = unsafe { dst.add(size) }; dst = unsafe { dst.add(size) };
} }
// No page found
Some(Err(AddrPageLookupError { addr, size })) => { Some(Err(AddrPageLookupError { addr, size })) => {
// Execute page fault handler // Attempt to execute page fault handler
if traph.page_fault(reason, self, addr, size, dst) { if traph.page_fault(reason, self, addr, size, dst) {
// Shift the splitter address // Shift the splitter address
pspl.bump(size); pspl.bump(size);
@ -306,16 +342,17 @@ impl Memory {
// Bump dst pointer // Bump dst pointer
dst = unsafe { dst.add(size as _) }; dst = unsafe { dst.add(size as _) };
} else { } else {
return Err(addr); // Unhandleable return Err(addr); // Unhandleable, VM will yield.
} }
} }
// No remaining pages, we are done!
None => return Ok(()), None => return Ok(()),
} }
} }
} }
} }
/// Result from address split /// Good result from address split
struct AddrPageLookupOk { struct AddrPageLookupOk {
/// Virtual address /// Virtual address
vaddr: u64, vaddr: u64,
@ -330,6 +367,7 @@ struct AddrPageLookupOk {
perm: Permission, perm: Permission,
} }
/// Errornous address split result
struct AddrPageLookupError { struct AddrPageLookupError {
/// Address of failure /// Address of failure
addr: u64, addr: u64,
@ -351,7 +389,7 @@ struct AddrPageLookuper {
} }
impl AddrPageLookuper { impl AddrPageLookuper {
/// Create a new page splitter /// Create a new page lookuper
pub const fn new(addr: u64, size: usize, pagetable: *const PageTable) -> Self { pub const fn new(addr: u64, size: usize, pagetable: *const PageTable) -> Self {
Self { Self {
addr, addr,
@ -430,7 +468,11 @@ impl Iterator for AddrPageLookuper {
} }
} }
fn addr_extract_index(addr: u64, lvl: u8) -> usize { /// Extract index in page table on specified level
///
/// The level shall not be larger than 4, otherwise
/// the output of the function is unspecified (yes, it can also panic :)
pub fn addr_extract_index(addr: u64, lvl: u8) -> usize {
debug_assert!(lvl <= 4); debug_assert!(lvl <= 4);
usize::try_from((addr >> (lvl * 9 + 12)) & ((1 << 9) - 1)).expect("?conradluget a better CPU") usize::try_from((addr >> (lvl * 9 + 12)) & ((1 << 9) - 1)).expect("?conradluget a better CPU")
} }
@ -462,24 +504,36 @@ impl PageSize {
/// Unhandled load access trap /// Unhandled load access trap
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
#[display(fmt = "Load access error at address {_0:#x}")]
pub struct LoadError(u64); pub struct LoadError(u64);
/// Unhandled store access trap /// Unhandled store access trap
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
#[display(fmt = "Store access error at address {_0:#x}")]
pub struct StoreError(u64); pub struct StoreError(u64);
/// There was no entry in page table to unmap
///
/// No worry, don't panic, nothing bad has happened,
/// but if you are 120% sure there should be something,
/// double-check your addresses.
#[derive(Clone, Copy, Display, Debug)] #[derive(Clone, Copy, Display, Debug)]
#[display(fmt = "There was no entry to unmap")]
pub struct NothingToUnmap; pub struct NothingToUnmap;
/// Reason to access memory
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
pub enum MemoryAccessReason { pub enum MemoryAccessReason {
Load, Load,
Store, Store,
} }
/// Error occured when copying a block of memory
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BlkCopyError { pub struct BlkCopyError {
/// Kind of access
access_reason: MemoryAccessReason, access_reason: MemoryAccessReason,
/// VM Address
addr: u64, addr: u64,
} }
@ -504,7 +558,34 @@ impl From<StoreError> for VmRunError {
} }
} }
/// Error mapping
#[derive(Clone, Copy, Display, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Display, Debug, PartialEq, Eq)]
pub enum MapError { pub enum MapError {
/// Entry was already mapped
#[display(fmt = "There is already a page mapped on specified address")]
AlreadyMapped, AlreadyMapped,
/// When walking a page entry was
/// encounterd.
#[display(fmt = "There was a page mapped on the way instead of node")]
PageOnNode,
}
/// Permisison checks
pub mod perm_check {
use super::paging::Permission;
/// Page is readable
#[inline(always)]
pub fn readable(perm: Permission) -> bool {
matches!(
perm,
Permission::Readonly | Permission::Write | Permission::Exec
)
}
/// Page is writable
#[inline(always)]
pub fn writable(perm: Permission) -> bool {
perm == Permission::Write
}
} }

View file

@ -5,6 +5,10 @@ use super::{Memory, MemoryAccessReason, PageSize};
/// Handle VM traps /// Handle VM traps
pub trait HandlePageFault { pub trait HandlePageFault {
/// Handle page fault /// Handle page fault
///
/// Return true if handling was sucessful,
/// otherwise the program will be interrupted and will
/// yield an error.
fn page_fault( fn page_fault(
&mut self, &mut self,
reason: MemoryAccessReason, reason: MemoryAccessReason,

View file

@ -173,9 +173,9 @@
### Unconditional jump ### Unconditional jump
- Type BBD - Type BBD
| Opcode | Name | Action | | Opcode | Name | Action |
|:------:|:----:|:-------------------------------------------------:| |:------:|:----:|:--------------------------------------------------:|
| 33 | JAL | Save current PC to `#0` and jump at `#1 + imm #2` | | 33 | JAL | Save PC past JAL to `#0` and jump at `#1 + imm #2` |
### Conditional jumps ### Conditional jumps
- Type BBD - Type BBD