Documented hbasm

This commit is contained in:
Erin 2024-02-22 23:57:29 +01:00
parent 6c6e29479f
commit 0070016f74
5 changed files with 187 additions and 27 deletions

View file

@ -1,14 +1,21 @@
use rhai::{CustomType, Engine, FuncRegistration, ImmutableString}; //! Data section inserts
use { use {
crate::{object::SymbolRef, SharedObject}, crate::{object::SymbolRef, SharedObject},
rhai::Module, rhai::{CustomType, Engine, FuncRegistration, ImmutableString, Module},
}; };
/// Generate insertions for data types
///
/// `gen_data_instructions!($module, $obj, [$type, …]);`
/// - `$module`: Rhai module
/// - `$obj`: Code object
/// - `$type`: Type of single array item
macro_rules! gen_data_insertions { macro_rules! gen_data_insertions {
($module:expr, $obj:expr, [$($ty:ident),* $(,)?] $(,)?) => {{ ($module:expr, $obj:expr, [$($ty:ident),* $(,)?] $(,)?) => {{
let (module, obj) = ($module, $obj); let (module, obj) = ($module, $obj);
$({ $({
// Clone object to each function
let obj = ::std::rc::Rc::clone(obj); let obj = ::std::rc::Rc::clone(obj);
FuncRegistration::new(stringify!($ty)) FuncRegistration::new(stringify!($ty))
@ -17,17 +24,23 @@ macro_rules! gen_data_insertions {
let obj = &mut *obj.borrow_mut(); let obj = &mut *obj.borrow_mut();
let symbol = obj.symbol($crate::object::Section::Data); let symbol = obj.symbol($crate::object::Section::Data);
// Reserve space for object so we don't resize it
// all the time
obj.sections obj.sections
.data .data
.reserve(arr.len() * ::std::mem::size_of::<$ty>()); .reserve(arr.len() * ::std::mem::size_of::<$ty>());
// For every item…
for item in arr { for item in arr {
// … try do conversions from i32 to desired type
// and insert it.
obj.sections.data.extend( obj.sections.data.extend(
match item.as_int() { match item.as_int() {
Ok(num) => $ty::try_from(num).map_err(|_| "i64".to_owned()), Ok(num) => $ty::try_from(num).map_err(|_| "i64".to_owned()),
Err(ty) => Err(ty.to_owned()), Err(ty) => Err(ty.to_owned()),
} }
.map_err(|err| { .map_err(|err| {
::rhai::EvalAltResult::ErrorMismatchDataType( ::rhai::EvalAltResult::ErrorMismatchDataType(
stringify!($ty).to_owned(), stringify!($ty).to_owned(),
err, err,
@ -47,6 +60,7 @@ macro_rules! gen_data_insertions {
}}; }};
} }
/// Reference to entry in data section
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub struct DataRef { pub struct DataRef {
pub symbol: SymbolRef, pub symbol: SymbolRef,
@ -67,6 +81,8 @@ pub fn module(engine: &mut Engine, obj: SharedObject) -> Module {
gen_data_insertions!(&mut module, &obj, [i8, i16, i32, i64]); gen_data_insertions!(&mut module, &obj, [i8, i16, i32, i64]);
// Specialisation for strings, they should be
// inserted as plain UTF-8 arrays
FuncRegistration::new("str") FuncRegistration::new("str")
.with_namespace(rhai::FnNamespace::Global) .with_namespace(rhai::FnNamespace::Global)
.set_into_module::<_, 1, false, _, true, _>(&mut module, move |s: ImmutableString| { .set_into_module::<_, 1, false, _, true, _>(&mut module, move |s: ImmutableString| {

View file

@ -1,9 +1,20 @@
//! Functions for inserting instructions
//!
//! Most of the code you see is just metaprogramming stuff.
//! This ensures that adding new instructions won't need any
//! specific changes and consistent behaviour.
//!
//! > I tried to comment stuff here, but I meanwhile forgor how it works.
//!
//! — Erin
use { use {
crate::object::Object, crate::object::Object,
rhai::{FuncRegistration, Module}, rhai::{FuncRegistration, Module},
std::{cell::RefCell, rc::Rc}, std::{cell::RefCell, rc::Rc},
}; };
/// Operand types and their insertions
pub mod optypes { pub mod optypes {
use { use {
crate::{ crate::{
@ -13,6 +24,7 @@ pub mod optypes {
rhai::{Dynamic, EvalAltResult, ImmutableString, Position}, rhai::{Dynamic, EvalAltResult, ImmutableString, Position},
}; };
// These types represent operand types to be inserted
pub type R = u8; pub type R = u8;
pub type B = i8; pub type B = i8;
pub type H = i16; pub type H = i16;
@ -23,12 +35,18 @@ pub mod optypes {
pub type O = Dynamic; pub type O = Dynamic;
pub type P = Dynamic; pub type P = Dynamic;
/// Insert relocation into code
///
/// - If integer, just write it to the code
/// - Otherwise insert entry into relocation table
/// and fill zeroes
pub fn insert_reloc( pub fn insert_reloc(
obj: &mut Object, obj: &mut Object,
ty: RelocType, ty: RelocType,
val: &Dynamic, val: &Dynamic,
) -> Result<(), EvalAltResult> { ) -> Result<(), EvalAltResult> {
match () { match () {
// Direct references insert directly to table
_ if val.is::<SymbolRef>() => { _ if val.is::<SymbolRef>() => {
obj.relocation(RelocKey::Symbol(val.clone_cast::<SymbolRef>().0), ty) obj.relocation(RelocKey::Symbol(val.clone_cast::<SymbolRef>().0), ty)
} }
@ -38,9 +56,13 @@ pub mod optypes {
_ if val.is::<DataRef>() => { _ if val.is::<DataRef>() => {
obj.relocation(RelocKey::Symbol(val.clone_cast::<DataRef>().symbol.0), ty) obj.relocation(RelocKey::Symbol(val.clone_cast::<DataRef>().symbol.0), ty)
} }
// String (indirect) reference
_ if val.is_string() => { _ if val.is_string() => {
obj.relocation(RelocKey::Label(val.clone_cast::<ImmutableString>()), ty) obj.relocation(RelocKey::Label(val.clone_cast::<ImmutableString>()), ty)
} }
// Manual offset
_ if val.is_int() => { _ if val.is_int() => {
let int = val.clone_cast::<i64>(); let int = val.clone_cast::<i64>();
match ty { match ty {
@ -49,9 +71,10 @@ pub mod optypes {
RelocType::Abs64 => obj.sections.text.extend(int.to_le_bytes()), RelocType::Abs64 => obj.sections.text.extend(int.to_le_bytes()),
} }
} }
_ => { _ => {
return Err(EvalAltResult::ErrorMismatchDataType( return Err(EvalAltResult::ErrorMismatchDataType(
"SybolRef, UnboundLabel, String or Int".to_owned(), "SymbolRef, UnboundLabel, String or Int".to_owned(),
val.type_name().to_owned(), val.type_name().to_owned(),
Position::NONE, Position::NONE,
)) ))
@ -61,8 +84,23 @@ pub mod optypes {
Ok(()) Ok(())
} }
/// Generate macro for inserting item into the output object
///
/// Pre-defines inserts for absolute address and relative offsets.
/// These are inserted with function [`insert_reloc`]
/// # le_bytes
/// `gen_insert!(le_bytes: [B, …]);`
///
/// Takes sequence of operand types which should be inserted
/// by invoking `to_le_bytes` method on it.
macro_rules! gen_insert { macro_rules! gen_insert {
(le_bytes: [$($lety:ident),* $(,)?]) => { (le_bytes: [$($lety:ident),* $(,)?]) => {
/// `insert!($thing, $obj, $type)` where
/// - `$thing`: Value you want to insert
/// - `$obj`: Code object
/// - `$type`: Type of inserted value
///
/// Eg. `insert!(69_u8, obj, B);`
macro_rules! insert { macro_rules! insert {
$(($thing:expr, $obj: expr, $lety) => { $(($thing:expr, $obj: expr, $lety) => {
$obj.sections.text.extend($thing.to_le_bytes()); $obj.sections.text.extend($thing.to_le_bytes());
@ -101,6 +139,7 @@ pub mod optypes {
use crate::data::DataRef; use crate::data::DataRef;
} }
/// Rhai Types (types for function parameters as Rhai uses only 64bit signed integers)
pub mod rity { pub mod rity {
pub use super::optypes::{A, O, P, R}; pub use super::optypes::{A, O, P, R};
pub type B = i64; pub type B = i64;
@ -109,6 +148,7 @@ pub mod rity {
pub type D = i64; pub type D = i64;
} }
/// Generic instruction (instruction of certain operands type) inserts
pub mod generic { pub mod generic {
use {crate::object::Object, rhai::EvalAltResult}; use {crate::object::Object, rhai::EvalAltResult};
@ -122,28 +162,47 @@ pub mod generic {
}) })
} }
/// Generate opcode-generic instruction insert macro
macro_rules! gen_ins { macro_rules! gen_ins {
($($($name:ident : $ty:ty),*;)*) => { ($($($name:ident : $ty:ty),*;)*) => {
paste::paste! { paste::paste! {
$(#[inline] $(
pub fn [<$($ty:lower)*>]( /// Instruction-generic opcode insertion function
obj: &mut Object, /// - `obj`: Code object
opcode: u8, /// - `opcode`: opcode, not checked if valid for instruction type
$($name: $crate::ins::optypes::$ty),*, /// - … for operands
) -> Result<(), EvalAltResult> { #[inline]
obj.sections.text.push(opcode); pub fn [<$($ty:lower)*>](
$($crate::ins::optypes::insert!(&$name, obj, $ty);)* obj: &mut Object,
Ok(()) opcode: u8,
})* $($name: $crate::ins::optypes::$ty),*,
) -> Result<(), EvalAltResult> {
// Push opcode
obj.sections.text.push(opcode);
// Insert based on type
$($crate::ins::optypes::insert!(&$name, obj, $ty);)*
Ok(())
}
)*
/// Generate Rhai opcode-specific instruction insertion functions
///
/// `gen_ins_fn!($obj, $opcode, $optype);` where:
/// - `$obj`: Code object
/// - `$opcode`: Opcode value
macro_rules! gen_ins_fn { macro_rules! gen_ins_fn {
$( $(
($obj:expr, $opcode:expr, [<$($ty)*>]) => { ($obj:expr, $opcode:expr, [<$($ty)*>]) => {
// Opcode-specific insertion function
// - Parameters = operands
move |$($name: $crate::ins::rity::$ty),*| { move |$($name: $crate::ins::rity::$ty),*| {
// Invoke generic function
$crate::ins::generic::[<$($ty:lower)*>]( $crate::ins::generic::[<$($ty:lower)*>](
&mut *$obj.borrow_mut(), &mut *$obj.borrow_mut(),
$opcode, $opcode,
$( $(
// Convert to desired type (from Rhai-provided values)
$crate::ins::generic::convert_op::< $crate::ins::generic::convert_op::<
_, _,
$crate::ins::optypes::$ty $crate::ins::optypes::$ty
@ -154,11 +213,13 @@ pub mod generic {
} }
}; };
// Internal-use: count args
(@arg_count [<$($ty)*>]) => { (@arg_count [<$($ty)*>]) => {
{ ["", $(stringify!($ty)),*].len() - 1 } { ["", $(stringify!($ty)),*].len() - 1 }
}; };
)* )*
// Specialisation for no-operand instructions
($obj:expr, $opcode:expr, N) => { ($obj:expr, $opcode:expr, N) => {
move || { move || {
$crate::ins::generic::n(&mut *$obj.borrow_mut(), $opcode); $crate::ins::generic::n(&mut *$obj.borrow_mut(), $opcode);
@ -166,6 +227,7 @@ pub mod generic {
} }
}; };
// Internal-use specialisation: no-operand instructions
(@arg_count N) => { (@arg_count N) => {
{ 0 } { 0 }
}; };
@ -174,11 +236,16 @@ pub mod generic {
}; };
} }
/// Specialisation for no-operand instructions simply just push opcode
#[inline] #[inline]
pub fn n(obj: &mut Object, opcode: u8) { pub fn n(obj: &mut Object, opcode: u8) {
obj.sections.text.push(opcode); obj.sections.text.push(opcode);
} }
// Generate opcode-generic instruction inserters
// (operand identifiers are arbitrary)
//
// New instruction types have to be added manually here
gen_ins! { gen_ins! {
o0: R, o1: R; o0: R, o1: R;
o0: R, o1: R, o2: R; o0: R, o1: R, o2: R;
@ -205,6 +272,17 @@ pub mod generic {
pub(super) use gen_ins_fn; pub(super) use gen_ins_fn;
} }
/// Generate instructions from instruction table
///
/// ```ignore
/// instructions!(($module, $obj) {
/// // Data from instruction table
/// $opcode, $mnemonic, $opty, $doc;
/// …
/// });
/// ```
/// - `$module`: Rhai module
/// - `$obj`: Code object
macro_rules! instructions { macro_rules! instructions {
( (
($module:expr, $obj:expr $(,)?) ($module:expr, $obj:expr $(,)?)
@ -212,7 +290,10 @@ macro_rules! instructions {
) => {{ ) => {{
let (module, obj) = ($module, $obj); let (module, obj) = ($module, $obj);
$({ $({
// Object is shared across all functions
let obj = Rc::clone(&obj); let obj = Rc::clone(&obj);
// Register newly generated function for each instruction
FuncRegistration::new(stringify!([<$mnemonic:lower>])) FuncRegistration::new(stringify!([<$mnemonic:lower>]))
.with_namespace(rhai::FnNamespace::Global) .with_namespace(rhai::FnNamespace::Global)
.set_into_module::<_, { generic::gen_ins_fn!(@arg_count $ops) }, false, _, true, _>( .set_into_module::<_, { generic::gen_ins_fn!(@arg_count $ops) }, false, _, true, _>(
@ -227,7 +308,9 @@ macro_rules! instructions {
}}; }};
} }
/// Setup instruction insertors
pub fn setup(module: &mut Module, obj: Rc<RefCell<Object>>) { pub fn setup(module: &mut Module, obj: Rc<RefCell<Object>>) {
// Import instructions table and use it for generation
with_builtin_macros::with_builtin! { with_builtin_macros::with_builtin! {
let $spec = include_from_root!("../hbbytecode/instructions.in") in { let $spec = include_from_root!("../hbbytecode/instructions.in") in {
instructions!((module, obj) { $spec }); instructions!((module, obj) { $spec });

View file

@ -1,8 +1,31 @@
//! Stuff related to labels
use { use {
crate::SharedObject, crate::SharedObject,
rhai::{Engine, FuncRegistration, ImmutableString, Module}, rhai::{Engine, FuncRegistration, ImmutableString, Module},
}; };
/// Macro for creating functions for Rhai which
/// is bit more friendly
///
/// ```ignore
/// shdm_fns!{
/// module: $module;
/// shared: $shared => $shname;
///
/// $vis fn $name($param_name: $param_ty, …) -> $ret { … }
/// …
/// }
/// ```
/// - `$module`: Rhai module
/// - `$shared`: Data to be shared across the functions
/// - `$shname`: The binding name inside functions
/// - `$vis`: Function visibility for Rhai
/// - Lowercased [`rhai::FnNamespace`] variants
/// - `$name`: Function name
/// - `$param_name`: Parameter name
/// - `$param_ty`: Rust parameter type
/// - `$ret`: Optional return type (otherwise infer)
macro_rules! shdm_fns { macro_rules! shdm_fns {
( (
module: $module:expr; module: $module:expr;
@ -14,23 +37,26 @@ macro_rules! shdm_fns {
) => {{ ) => {{
let module = $module; let module = $module;
let shared = $shared; let shared = $shared;
$({ paste::paste! {
$({
let $shname = SharedObject::clone(&shared); let $shname = SharedObject::clone(&shared);
FuncRegistration::new(stringify!($name)) FuncRegistration::new(stringify!($name))
.with_namespace(rhai::FnNamespace::Global) .with_namespace(rhai::FnNamespace::[<$vis:camel>])
.set_into_module::<_, { ["", $(stringify!($param_name)),*].len() - 1 }, false, _, true, _>( .set_into_module::<_, { ["", $(stringify!($param_name)),*].len() - 1 }, false, _, true, _>(
module, module,
move |$($param_name: $param_ty),*| $(-> $ret)? { move |$($param_name: $param_ty),*| $(-> $ret)? {
let mut $shname = $shname.borrow_mut(); let mut $shname = $shname.borrow_mut();
$blk $blk
} }
); );
})* })*
}
}}; }};
} }
/// Label without any place bound
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub struct UnboundLabel(pub usize); pub struct UnboundLabel(pub usize);
@ -39,11 +65,13 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) {
module: module; module: module;
shared: object => obj; shared: object => obj;
// Insert unnamed label
global fn label() { global fn label() {
let symbol = obj.symbol(crate::object::Section::Text); let symbol = obj.symbol(crate::object::Section::Text);
Ok(symbol) Ok(symbol)
} }
// Insert string-labeled label
global fn label(label: ImmutableString) { global fn label(label: ImmutableString) {
let symbol = obj.symbol(crate::object::Section::Text); let symbol = obj.symbol(crate::object::Section::Text);
obj.labels.insert(label, symbol.0); obj.labels.insert(label, symbol.0);
@ -51,6 +79,7 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) {
Ok(symbol) Ok(symbol)
} }
// Declare unbound label (to be bound later)
global fn declabel() { global fn declabel() {
let index = obj.symbols.len(); let index = obj.symbols.len();
obj.symbols.push(None); obj.symbols.push(None);
@ -58,6 +87,8 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) {
Ok(UnboundLabel(index)) Ok(UnboundLabel(index))
} }
// Declare unbound label (to be bound later)
// with string label
global fn declabel(label: ImmutableString) { global fn declabel(label: ImmutableString) {
let index = obj.symbols.len(); let index = obj.symbols.len();
obj.symbols.push(None); obj.symbols.push(None);
@ -66,6 +97,7 @@ pub fn setup(engine: &mut Engine, module: &mut Module, object: SharedObject) {
Ok(UnboundLabel(index)) Ok(UnboundLabel(index))
} }
// Set location for unbound label
global fn here(label: UnboundLabel) { global fn here(label: UnboundLabel) {
obj.symbols[label.0] = Some(crate::object::SymbolEntry { obj.symbols[label.0] = Some(crate::object::SymbolEntry {
location: crate::object::Section::Text, location: crate::object::Section::Text,

View file

@ -1,3 +1,5 @@
//! Simple flat-bytecode linker
use { use {
crate::{ crate::{
object::{RelocKey, RelocType, Section}, object::{RelocKey, RelocType, Section},
@ -8,18 +10,27 @@ use {
pub fn link(object: SharedObject, out: &mut impl Write) -> std::io::Result<()> { pub fn link(object: SharedObject, out: &mut impl Write) -> std::io::Result<()> {
let obj = &mut *object.borrow_mut(); let obj = &mut *object.borrow_mut();
// Walk relocation table entries
for (&loc, entry) in &obj.relocs { for (&loc, entry) in &obj.relocs {
let value = match &entry.key { let value = match &entry.key {
// Symbol direct reference
RelocKey::Symbol(sym) => obj.symbols[*sym], RelocKey::Symbol(sym) => obj.symbols[*sym],
// Label indirect label reference
RelocKey::Label(label) => obj.symbols[obj.labels[label]], RelocKey::Label(label) => obj.symbols[obj.labels[label]],
} }
.ok_or_else(|| std::io::Error::other("Invalid symbol"))?; .ok_or_else(|| std::io::Error::other("Invalid symbol"))?;
let offset = match value.location { let offset = match value.location {
// Text section is on the beginning
Section::Text => value.offset, Section::Text => value.offset,
// Data section follows text section immediately
Section::Data => value.offset + obj.sections.text.len(), Section::Data => value.offset + obj.sections.text.len(),
}; };
// Insert address or calulate relative offset
match entry.ty { match entry.ty {
RelocType::Rel32 => obj.sections.text[loc..loc + 4] RelocType::Rel32 => obj.sections.text[loc..loc + 4]
.copy_from_slice(&((offset as isize - loc as isize) as i32).to_le_bytes()), .copy_from_slice(&((offset as isize - loc as isize) as i32).to_le_bytes()),
@ -30,6 +41,7 @@ pub fn link(object: SharedObject, out: &mut impl Write) -> std::io::Result<()> {
} }
} }
// Write to output
out.write_all(&obj.sections.text)?; out.write_all(&obj.sections.text)?;
out.write_all(&obj.sections.data) out.write_all(&obj.sections.data)
} }

View file

@ -1,23 +1,31 @@
//! Code object
use {rhai::ImmutableString, std::collections::HashMap}; use {rhai::ImmutableString, std::collections::HashMap};
/// Section tabel
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Section { pub enum Section {
Text, Text,
Data, Data,
} }
/// Symbol entry (in what section, where)
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub struct SymbolEntry { pub struct SymbolEntry {
pub location: Section, pub location: Section,
pub offset: usize, pub offset: usize,
} }
/// Relocation table key
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum RelocKey { pub enum RelocKey {
/// Direct reference
Symbol(usize), Symbol(usize),
/// Indirect reference
Label(ImmutableString), Label(ImmutableString),
} }
/// Relocation type
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub enum RelocType { pub enum RelocType {
Rel32, Rel32,
@ -25,23 +33,30 @@ pub enum RelocType {
Abs64, Abs64,
} }
/// Relocation table entry
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct RelocEntry { pub struct RelocEntry {
pub key: RelocKey, pub key: RelocKey,
pub ty: RelocType, pub ty: RelocType,
} }
/// Object code
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Sections { pub struct Sections {
pub text: Vec<u8>, pub text: Vec<u8>,
pub data: Vec<u8>, pub data: Vec<u8>,
} }
/// Object
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Object { pub struct Object {
/// Vectors with sections
pub sections: Sections, pub sections: Sections,
/// Symbol table
pub symbols: Vec<Option<SymbolEntry>>, pub symbols: Vec<Option<SymbolEntry>>,
/// Labels to symbols table
pub labels: HashMap<ImmutableString, usize>, pub labels: HashMap<ImmutableString, usize>,
/// Relocation table
pub relocs: HashMap<usize, RelocEntry>, pub relocs: HashMap<usize, RelocEntry>,
} }
@ -50,6 +65,7 @@ pub struct Object {
pub struct SymbolRef(pub usize); pub struct SymbolRef(pub usize);
impl Object { impl Object {
/// Insert symbol at current location in specified section
pub fn symbol(&mut self, section: Section) -> SymbolRef { pub fn symbol(&mut self, section: Section) -> SymbolRef {
let section_buf = match section { let section_buf = match section {
Section::Text => &mut self.sections.text, Section::Text => &mut self.sections.text,
@ -64,6 +80,7 @@ impl Object {
SymbolRef(self.symbols.len() - 1) SymbolRef(self.symbols.len() - 1)
} }
/// Insert to relocation table and write zeroes to code
pub fn relocation(&mut self, key: RelocKey, ty: RelocType) { pub fn relocation(&mut self, key: RelocKey, ty: RelocType) {
self.relocs self.relocs
.insert(self.sections.text.len(), RelocEntry { key, ty }); .insert(self.sections.text.len(), RelocEntry { key, ty });