From 0070016f74fc21606375bda56b78d3997251fea5 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 22 Feb 2024 23:57:29 +0100 Subject: [PATCH] Documented hbasm --- hbasm/src/data.rs | 20 ++++++++- hbasm/src/ins.rs | 107 +++++++++++++++++++++++++++++++++++++++----- hbasm/src/label.rs | 58 ++++++++++++++++++------ hbasm/src/linker.rs | 12 +++++ hbasm/src/object.rs | 17 +++++++ 5 files changed, 187 insertions(+), 27 deletions(-) diff --git a/hbasm/src/data.rs b/hbasm/src/data.rs index e7c9052..16e00f4 100644 --- a/hbasm/src/data.rs +++ b/hbasm/src/data.rs @@ -1,14 +1,21 @@ -use rhai::{CustomType, Engine, FuncRegistration, ImmutableString}; +//! Data section inserts use { crate::{object::SymbolRef, SharedObject}, - rhai::Module, + rhai::{CustomType, Engine, FuncRegistration, ImmutableString, Module}, }; +/// Generate insertions for data types +/// +/// `gen_data_instructions!($module, $obj, [$type, …]);` +/// - `$module`: Rhai module +/// - `$obj`: Code object +/// - `$type`: Type of single array item macro_rules! gen_data_insertions { ($module:expr, $obj:expr, [$($ty:ident),* $(,)?] $(,)?) => {{ let (module, obj) = ($module, $obj); $({ + // Clone object to each function let obj = ::std::rc::Rc::clone(obj); FuncRegistration::new(stringify!($ty)) @@ -17,17 +24,23 @@ macro_rules! gen_data_insertions { let obj = &mut *obj.borrow_mut(); let symbol = obj.symbol($crate::object::Section::Data); + // Reserve space for object so we don't resize it + // all the time obj.sections .data .reserve(arr.len() * ::std::mem::size_of::<$ty>()); + // For every item… for item in arr { + // … try do conversions from i32 to desired type + // and insert it. obj.sections.data.extend( match item.as_int() { Ok(num) => $ty::try_from(num).map_err(|_| "i64".to_owned()), Err(ty) => Err(ty.to_owned()), } .map_err(|err| { + ::rhai::EvalAltResult::ErrorMismatchDataType( stringify!($ty).to_owned(), err, @@ -47,6 +60,7 @@ macro_rules! gen_data_insertions { }}; } +/// Reference to entry in data section #[derive(Clone, Copy, Debug)] pub struct DataRef { pub symbol: SymbolRef, @@ -67,6 +81,8 @@ pub fn module(engine: &mut Engine, obj: SharedObject) -> Module { gen_data_insertions!(&mut module, &obj, [i8, i16, i32, i64]); + // Specialisation for strings, they should be + // inserted as plain UTF-8 arrays FuncRegistration::new("str") .with_namespace(rhai::FnNamespace::Global) .set_into_module::<_, 1, false, _, true, _>(&mut module, move |s: ImmutableString| { diff --git a/hbasm/src/ins.rs b/hbasm/src/ins.rs index 7dca48a..ea43559 100644 --- a/hbasm/src/ins.rs +++ b/hbasm/src/ins.rs @@ -1,9 +1,20 @@ +//! Functions for inserting instructions +//! +//! Most of the code you see is just metaprogramming stuff. +//! This ensures that adding new instructions won't need any +//! specific changes and consistent behaviour. +//! +//! > I tried to comment stuff here, but I meanwhile forgor how it works. +//! +//! — Erin + use { crate::object::Object, rhai::{FuncRegistration, Module}, std::{cell::RefCell, rc::Rc}, }; +/// Operand types and their insertions pub mod optypes { use { crate::{ @@ -13,6 +24,7 @@ pub mod optypes { rhai::{Dynamic, EvalAltResult, ImmutableString, Position}, }; + // These types represent operand types to be inserted pub type R = u8; pub type B = i8; pub type H = i16; @@ -23,12 +35,18 @@ pub mod optypes { pub type O = Dynamic; pub type P = Dynamic; + /// Insert relocation into code + /// + /// - If integer, just write it to the code + /// - Otherwise insert entry into relocation table + /// and fill zeroes pub fn insert_reloc( obj: &mut Object, ty: RelocType, val: &Dynamic, ) -> Result<(), EvalAltResult> { match () { + // Direct references – insert directly to table _ if val.is::() => { obj.relocation(RelocKey::Symbol(val.clone_cast::().0), ty) } @@ -38,9 +56,13 @@ pub mod optypes { _ if val.is::() => { obj.relocation(RelocKey::Symbol(val.clone_cast::().symbol.0), ty) } + + // String (indirect) reference _ if val.is_string() => { obj.relocation(RelocKey::Label(val.clone_cast::()), ty) } + + // Manual offset _ if val.is_int() => { let int = val.clone_cast::(); match ty { @@ -49,9 +71,10 @@ pub mod optypes { RelocType::Abs64 => obj.sections.text.extend(int.to_le_bytes()), } } + _ => { return Err(EvalAltResult::ErrorMismatchDataType( - "SybolRef, UnboundLabel, String or Int".to_owned(), + "SymbolRef, UnboundLabel, String or Int".to_owned(), val.type_name().to_owned(), Position::NONE, )) @@ -61,8 +84,23 @@ pub mod optypes { Ok(()) } + /// Generate macro for inserting item into the output object + /// + /// Pre-defines inserts for absolute address and relative offsets. + /// These are inserted with function [`insert_reloc`] + /// # le_bytes + /// `gen_insert!(le_bytes: [B, …]);` + /// + /// Takes sequence of operand types which should be inserted + /// by invoking `to_le_bytes` method on it. macro_rules! gen_insert { (le_bytes: [$($lety:ident),* $(,)?]) => { + /// `insert!($thing, $obj, $type)` where + /// - `$thing`: Value you want to insert + /// - `$obj`: Code object + /// - `$type`: Type of inserted value + /// + /// Eg. `insert!(69_u8, obj, B);` macro_rules! insert { $(($thing:expr, $obj: expr, $lety) => { $obj.sections.text.extend($thing.to_le_bytes()); @@ -101,6 +139,7 @@ pub mod optypes { use crate::data::DataRef; } +/// Rhai Types (types for function parameters as Rhai uses only 64bit signed integers) pub mod rity { pub use super::optypes::{A, O, P, R}; pub type B = i64; @@ -109,6 +148,7 @@ pub mod rity { pub type D = i64; } +/// Generic instruction (instruction of certain operands type) inserts pub mod generic { use {crate::object::Object, rhai::EvalAltResult}; @@ -122,29 +162,48 @@ pub mod generic { }) } + /// Generate opcode-generic instruction insert macro macro_rules! gen_ins { ($($($name:ident : $ty:ty),*;)*) => { paste::paste! { - $(#[inline] - pub fn [<$($ty:lower)*>]( - obj: &mut Object, - opcode: u8, - $($name: $crate::ins::optypes::$ty),*, - ) -> Result<(), EvalAltResult> { - obj.sections.text.push(opcode); - $($crate::ins::optypes::insert!(&$name, obj, $ty);)* - Ok(()) - })* + $( + /// Instruction-generic opcode insertion function + /// - `obj`: Code object + /// - `opcode`: opcode, not checked if valid for instruction type + /// - … for operands + #[inline] + pub fn [<$($ty:lower)*>]( + obj: &mut Object, + opcode: u8, + $($name: $crate::ins::optypes::$ty),*, + ) -> Result<(), EvalAltResult> { + // Push opcode + obj.sections.text.push(opcode); + // Insert based on type + $($crate::ins::optypes::insert!(&$name, obj, $ty);)* + Ok(()) + } + )* + + /// Generate Rhai opcode-specific instruction insertion functions + /// + /// `gen_ins_fn!($obj, $opcode, $optype);` where: + /// - `$obj`: Code object + /// - `$opcode`: Opcode value macro_rules! gen_ins_fn { $( ($obj:expr, $opcode:expr, [<$($ty)*>]) => { + // Opcode-specific insertion function + // - Parameters = operands move |$($name: $crate::ins::rity::$ty),*| { + // Invoke generic function $crate::ins::generic::[<$($ty:lower)*>]( &mut *$obj.borrow_mut(), $opcode, $( - $crate::ins::generic::convert_op::< + // Convert to desired type (from Rhai-provided values) + $crate::ins::generic::convert_op::< _, $crate::ins::optypes::$ty >($name)? @@ -154,11 +213,13 @@ pub mod generic { } }; + // Internal-use: count args (@arg_count [<$($ty)*>]) => { { ["", $(stringify!($ty)),*].len() - 1 } }; )* + // Specialisation for no-operand instructions ($obj:expr, $opcode:expr, N) => { move || { $crate::ins::generic::n(&mut *$obj.borrow_mut(), $opcode); @@ -166,6 +227,7 @@ pub mod generic { } }; + // Internal-use specialisation: no-operand instructions (@arg_count N) => { { 0 } }; @@ -174,11 +236,16 @@ pub mod generic { }; } + /// Specialisation for no-operand instructions – simply just push opcode #[inline] pub fn n(obj: &mut Object, opcode: u8) { obj.sections.text.push(opcode); } + // Generate opcode-generic instruction inserters + // (operand identifiers are arbitrary) + // + // New instruction types have to be added manually here gen_ins! { o0: R, o1: R; o0: R, o1: R, o2: R; @@ -205,6 +272,17 @@ pub mod generic { pub(super) use gen_ins_fn; } +/// Generate instructions from instruction table +/// +/// ```ignore +/// instructions!(($module, $obj) { +/// // Data from instruction table +/// $opcode, $mnemonic, $opty, $doc; +/// … +/// }); +/// ``` +/// - `$module`: Rhai module +/// - `$obj`: Code object macro_rules! instructions { ( ($module:expr, $obj:expr $(,)?) @@ -212,7 +290,10 @@ macro_rules! instructions { ) => {{ let (module, obj) = ($module, $obj); $({ + // Object is shared across all functions let obj = Rc::clone(&obj); + + // Register newly generated function for each instruction FuncRegistration::new(stringify!([<$mnemonic:lower>])) .with_namespace(rhai::FnNamespace::Global) .set_into_module::<_, { generic::gen_ins_fn!(@arg_count $ops) }, false, _, true, _>( @@ -227,7 +308,9 @@ macro_rules! instructions { }}; } +/// Setup instruction insertors pub fn setup(module: &mut Module, obj: Rc>) { + // Import instructions table and use it for generation with_builtin_macros::with_builtin! { let $spec = include_from_root!("../hbbytecode/instructions.in") in { instructions!((module, obj) { $spec }); diff --git a/hbasm/src/label.rs b/hbasm/src/label.rs index 72287c2..86142df 100644 --- a/hbasm/src/label.rs +++ b/hbasm/src/label.rs @@ -1,8 +1,31 @@ +//! Stuff related to labels + use { crate::SharedObject, rhai::{Engine, FuncRegistration, ImmutableString, Module}, }; +/// Macro for creating functions for Rhai which +/// is bit more friendly +/// +/// ```ignore +/// shdm_fns!{ +/// module: $module; +/// shared: $shared => $shname; +/// +/// $vis fn $name($param_name: $param_ty, …) -> $ret { … } +/// … +/// } +/// ``` +/// - `$module`: Rhai module +/// - `$shared`: Data to be shared across the functions +/// - `$shname`: The binding name inside functions +/// - `$vis`: Function visibility for Rhai +/// - Lowercased [`rhai::FnNamespace`] variants +/// - `$name`: Function name +/// - `$param_name`: Parameter name +/// - `$param_ty`: Rust parameter type +/// - `$ret`: Optional return type (otherwise infer) macro_rules! shdm_fns { ( module: $module:expr; @@ -14,23 +37,26 @@ macro_rules! shdm_fns { ) => {{ let module = $module; let shared = $shared; - $({ + paste::paste! { + $({ - let $shname = SharedObject::clone(&shared); - - FuncRegistration::new(stringify!($name)) - .with_namespace(rhai::FnNamespace::Global) - .set_into_module::<_, { ["", $(stringify!($param_name)),*].len() - 1 }, false, _, true, _>( - module, - move |$($param_name: $param_ty),*| $(-> $ret)? { - let mut $shname = $shname.borrow_mut(); - $blk - } - ); - })* + let $shname = SharedObject::clone(&shared); + + FuncRegistration::new(stringify!($name)) + .with_namespace(rhai::FnNamespace::[<$vis:camel>]) + .set_into_module::<_, { ["", $(stringify!($param_name)),*].len() - 1 }, false, _, true, _>( + module, + move |$($param_name: $param_ty),*| $(-> $ret)? { + let mut $shname = $shname.borrow_mut(); + $blk + } + ); + })* + } }}; } +/// Label without any place bound #[derive(Clone, Copy, Debug)] pub struct UnboundLabel(pub usize); @@ -39,11 +65,13 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) { module: module; shared: object => obj; + // Insert unnamed label global fn label() { let symbol = obj.symbol(crate::object::Section::Text); Ok(symbol) } + // Insert string-labeled label global fn label(label: ImmutableString) { let symbol = obj.symbol(crate::object::Section::Text); obj.labels.insert(label, symbol.0); @@ -51,6 +79,7 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) { Ok(symbol) } + // Declare unbound label (to be bound later) global fn declabel() { let index = obj.symbols.len(); obj.symbols.push(None); @@ -58,6 +87,8 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) { Ok(UnboundLabel(index)) } + // Declare unbound label (to be bound later) + // with string label global fn declabel(label: ImmutableString) { let index = obj.symbols.len(); obj.symbols.push(None); @@ -66,6 +97,7 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) { Ok(UnboundLabel(index)) } + // Set location for unbound label global fn here(label: UnboundLabel) { obj.symbols[label.0] = Some(crate::object::SymbolEntry { location: crate::object::Section::Text, diff --git a/hbasm/src/linker.rs b/hbasm/src/linker.rs index d31c73b..8096884 100644 --- a/hbasm/src/linker.rs +++ b/hbasm/src/linker.rs @@ -1,3 +1,5 @@ +//! Simple flat-bytecode linker + use { crate::{ object::{RelocKey, RelocType, Section}, @@ -8,18 +10,27 @@ use { pub fn link(object: SharedObject, out: &mut impl Write) -> std::io::Result<()> { let obj = &mut *object.borrow_mut(); + + // Walk relocation table entries for (&loc, entry) in &obj.relocs { let value = match &entry.key { + // Symbol – direct reference RelocKey::Symbol(sym) => obj.symbols[*sym], + + // Label – indirect label reference RelocKey::Label(label) => obj.symbols[obj.labels[label]], } .ok_or_else(|| std::io::Error::other("Invalid symbol"))?; let offset = match value.location { + // Text section is on the beginning Section::Text => value.offset, + + // Data section follows text section immediately Section::Data => value.offset + obj.sections.text.len(), }; + // Insert address or calulate relative offset match entry.ty { RelocType::Rel32 => obj.sections.text[loc..loc + 4] .copy_from_slice(&((offset as isize - loc as isize) as i32).to_le_bytes()), @@ -30,6 +41,7 @@ pub fn link(object: SharedObject, out: &mut impl Write) -> std::io::Result<()> { } } + // Write to output out.write_all(&obj.sections.text)?; out.write_all(&obj.sections.data) } diff --git a/hbasm/src/object.rs b/hbasm/src/object.rs index bae43df..073a808 100644 --- a/hbasm/src/object.rs +++ b/hbasm/src/object.rs @@ -1,23 +1,31 @@ +//! Code object + use {rhai::ImmutableString, std::collections::HashMap}; +/// Section tabel #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Section { Text, Data, } +/// Symbol entry (in what section, where) #[derive(Clone, Copy, Debug)] pub struct SymbolEntry { pub location: Section, pub offset: usize, } +/// Relocation table key #[derive(Clone, Debug)] pub enum RelocKey { + /// Direct reference Symbol(usize), + /// Indirect reference Label(ImmutableString), } +/// Relocation type #[derive(Clone, Copy, Debug)] pub enum RelocType { Rel32, @@ -25,23 +33,30 @@ pub enum RelocType { Abs64, } +/// Relocation table entry #[derive(Clone, Debug)] pub struct RelocEntry { pub key: RelocKey, pub ty: RelocType, } +/// Object code #[derive(Clone, Debug, Default)] pub struct Sections { pub text: Vec, pub data: Vec, } +/// Object #[derive(Clone, Debug, Default)] pub struct Object { + /// Vectors with sections pub sections: Sections, + /// Symbol table pub symbols: Vec>, + /// Labels to symbols table pub labels: HashMap, + /// Relocation table pub relocs: HashMap, } @@ -50,6 +65,7 @@ pub struct Object { pub struct SymbolRef(pub usize); impl Object { + /// Insert symbol at current location in specified section pub fn symbol(&mut self, section: Section) -> SymbolRef { let section_buf = match section { Section::Text => &mut self.sections.text, @@ -64,6 +80,7 @@ impl Object { SymbolRef(self.symbols.len() - 1) } + /// Insert to relocation table and write zeroes to code pub fn relocation(&mut self, key: RelocKey, ty: RelocType) { self.relocs .insert(self.sections.text.len(), RelocEntry { key, ty });