diff --git a/.gitignore b/.gitignore index 499fa03..c73e0df 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,4 @@ /target -/hbbytecode/src/opcode.rs -/hbbytecode/src/ops.rs -/hblang/src/instrs.rs -/hblang/src/disasm.rs +/hbbytecode/src/instrs.rs /.rgignore -rust-ice-* +rustc-ice-* diff --git a/Cargo.lock b/Cargo.lock index a7d03f3..49297bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,7 @@ version = "0.1.0" name = "hblang" version = "0.1.0" dependencies = [ + "hbbytecode", "hbvm", "regalloc2", ] diff --git a/hbbytecode/Cargo.toml b/hbbytecode/Cargo.toml index 1b50f51..a542869 100644 --- a/hbbytecode/Cargo.toml +++ b/hbbytecode/Cargo.toml @@ -2,3 +2,8 @@ name = "hbbytecode" version = "0.1.0" edition = "2018" + +[features] +default = ["disasm"] +std = [] +disasm = ["std"] diff --git a/hbbytecode/build.rs b/hbbytecode/build.rs index 9ac45d2..6b549c6 100644 --- a/hbbytecode/build.rs +++ b/hbbytecode/build.rs @@ -1,58 +1,208 @@ #![feature(iter_next_chunk)] +use std::{collections::HashSet, fmt::Write}; + fn main() -> Result<(), Box> { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-changed=instructions.in"); let mut generated = String::new(); - gen_op_structs(&mut generated)?; - std::fs::write("src/ops.rs", generated)?; - - let mut generated = String::new(); - gen_op_codes(&mut generated)?; - std::fs::write("src/opcode.rs", generated)?; + gen_instrs(&mut generated)?; + std::fs::write("src/instrs.rs", generated)?; Ok(()) } -fn gen_op_structs(generated: &mut String) -> std::fmt::Result { - use std::fmt::Write; - let mut seen = std::collections::HashSet::new(); +fn gen_instrs(generated: &mut String) -> Result<(), Box> { + writeln!(generated, "#![allow(dead_code)] #![allow(clippy::upper_case_acronyms)]")?; writeln!(generated, "use crate::*;")?; - for [.., args, _] in instructions() { - if !seen.insert(args) { - continue; - } - writeln!(generated, "#[derive(Clone, Copy, Debug)]")?; - writeln!(generated, "#[repr(packed)]")?; - write!(generated, "pub struct Ops{args}(")?; - let mut first = true; - for ch in args.chars().filter(|&ch| ch != 'N') { - if !std::mem::take(&mut first) { - write!(generated, ",")?; + '_opcode_structs: { + let mut seen = HashSet::new(); + for [.., args, _] in instructions() { + if !seen.insert(args) { + continue; } - write!(generated, "pub Op{ch}")?; + + writeln!(generated, "#[derive(Clone, Copy, Debug)]")?; + writeln!(generated, "#[repr(packed)]")?; + write!(generated, "pub struct Ops{args}(")?; + let mut first = true; + for ch in args.chars().filter(|&ch| ch != 'N') { + if !std::mem::take(&mut first) { + write!(generated, ",")?; + } + write!(generated, "pub Op{ch}")?; + } + writeln!(generated, ");")?; + writeln!(generated, "unsafe impl BytecodeItem for Ops{args} {{}}")?; } - writeln!(generated, ");")?; - writeln!(generated, "unsafe impl BytecodeItem for Ops{args} {{}}")?; } + '_max_size: { + let max = instructions() + .map( + |[_, _, ty, _]| { + if ty == "N" { + 1 + } else { + iter_args(ty).map(arg_to_width).sum::() + 1 + } + }, + ) + .max() + .unwrap(); + + writeln!(generated, "pub const MAX_SIZE: usize = {max};")?; + } + + '_encoders: { + for [op, name, ty, doc] in instructions() { + writeln!(generated, "/// {}", doc.trim_matches('"'))?; + let name = name.to_lowercase(); + let args = comma_sep( + iter_args(ty) + .enumerate() + .map(|(i, c)| format!("{}{i}: {}", arg_to_name(c), arg_to_type(c))), + ); + writeln!(generated, "pub fn {name}({args}) -> (usize, [u8; MAX_SIZE]) {{")?; + let arg_names = + comma_sep(iter_args(ty).enumerate().map(|(i, c)| format!("{}{i}", arg_to_name(c)))); + writeln!(generated, " unsafe {{ crate::encode({ty}({op}, {arg_names})) }}")?; + writeln!(generated, "}}")?; + } + } + + '_structs: { + let mut seen = std::collections::HashSet::new(); + for [_, _, ty, _] in instructions() { + if !seen.insert(ty) { + continue; + } + let types = comma_sep(iter_args(ty).map(arg_to_type).map(|s| s.to_string())); + writeln!(generated, "#[repr(packed)] pub struct {ty}(u8, {types});")?; + } + } + + '_name_list: { + writeln!(generated, "pub const NAMES: [&str; {}] = [", instructions().count())?; + for [_, name, _, _] in instructions() { + writeln!(generated, " \"{}\",", name.to_lowercase())?; + } + writeln!(generated, "];")?; + } + + let instr = "Instr"; + let oper = "Oper"; + + '_instr_enum: { + writeln!(generated, "#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)]")?; + writeln!(generated, "pub enum {instr} {{")?; + for [id, name, ..] in instructions() { + writeln!(generated, " {name} = {id},")?; + } + writeln!(generated, "}}")?; + } + + '_arg_kind: { + writeln!(generated, "#[derive(Debug, Clone, Copy, PartialEq, Eq)]")?; + writeln!(generated, "pub enum {oper} {{")?; + let mut seen = HashSet::new(); + for ty in instructions().flat_map(|[.., ty, _]| iter_args(ty)) { + if !seen.insert(ty) { + continue; + } + writeln!(generated, " {ty}({}),", arg_to_type(ty))?; + } + writeln!(generated, "}}")?; + } + + '_parse_opers: { + writeln!( + generated, + "/// This assumes the instruction byte is still at the beginning of the buffer" + )?; + writeln!(generated, "#[cfg(feature = \"disasm\")]")?; + writeln!(generated, "pub fn parse_args(bytes: &mut &[u8], kind: {instr}, buf: &mut std::vec::Vec<{oper}>) -> Option<()> {{")?; + writeln!(generated, " match kind {{")?; + let mut instrs = instructions().collect::>(); + instrs.sort_unstable_by_key(|&[.., ty, _]| ty); + for group in instrs.chunk_by(|[.., a, _], [.., b, _]| a == b) { + let ty = group[0][2]; + for &[_, name, ..] in group { + writeln!(generated, " | {instr}::{name}")?; + } + generated.pop(); + writeln!(generated, " => {{")?; + if iter_args(ty).count() != 0 { + writeln!(generated, " let data = crate::decode::<{ty}>(bytes)?;")?; + writeln!( + generated, + " buf.extend([{}]);", + comma_sep( + iter_args(ty).zip(1u32..).map(|(t, i)| format!("{oper}::{t}(data.{i})")) + ) + )?; + } else { + writeln!(generated, " crate::decode::<{ty}>(bytes)?;")?; + } + + writeln!(generated, " }}")?; + } + writeln!(generated, " }}")?; + writeln!(generated, " Some(())")?; + writeln!(generated, "}}")?; + } + + std::fs::write("src/instrs.rs", generated)?; Ok(()) } -fn gen_op_codes(generated: &mut String) -> std::fmt::Result { - use std::fmt::Write; - for [op, name, _, comment] in instructions() { - writeln!(generated, "#[doc = {comment}]")?; - writeln!(generated, "pub const {name}: u8 = {op};")?; - } - Ok(()) +fn comma_sep(items: impl Iterator) -> String { + items.map(|item| item.to_string()).collect::>().join(", ") } fn instructions() -> impl Iterator { include_str!("../hbbytecode/instructions.in") .lines() - .map(|line| line.strip_suffix(';').unwrap()) + .filter_map(|line| line.strip_suffix(';')) .map(|line| line.splitn(4, ',').map(str::trim).next_chunk().unwrap()) } + +fn arg_to_type(arg: char) -> &'static str { + match arg { + 'R' | 'B' => "u8", + 'H' => "u16", + 'W' => "u32", + 'D' | 'A' => "u64", + 'P' => "i16", + 'O' => "i32", + _ => panic!("unknown type: {}", arg), + } +} + +fn arg_to_width(arg: char) -> usize { + match arg { + 'R' | 'B' => 1, + 'H' => 2, + 'W' => 4, + 'D' | 'A' => 8, + 'P' => 2, + 'O' => 4, + _ => panic!("unknown type: {}", arg), + } +} + +fn arg_to_name(arg: char) -> &'static str { + match arg { + 'R' => "reg", + 'B' | 'H' | 'W' | 'D' => "imm", + 'P' | 'O' => "offset", + 'A' => "addr", + _ => panic!("unknown type: {}", arg), + } +} + +fn iter_args(ty: &'static str) -> impl Iterator { + ty.chars().filter(|c| *c != 'N') +} diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index bcebd5b..3a45841 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -1,10 +1,12 @@ #![no_std] -pub use crate::ops::*; +#[cfg(feature = "std")] +extern crate std; + +pub use crate::instrs::*; use core::convert::TryFrom; -pub mod opcode; -mod ops; +mod instrs; type OpR = u8; @@ -22,6 +24,38 @@ type OpD = u64; pub unsafe trait BytecodeItem {} unsafe impl BytecodeItem for u8 {} +impl TryFrom for Instr { + type Error = u8; + + #[inline] + fn try_from(value: u8) -> Result { + #[cold] + fn failed(value: u8) -> Result { + Err(value) + } + + if value < NAMES.len() as u8 { + unsafe { Ok(std::mem::transmute::(value)) } + } else { + failed(value) + } + } +} + +#[inline] +unsafe fn encode(instr: T) -> (usize, [u8; instrs::MAX_SIZE]) { + let mut buf = [0; instrs::MAX_SIZE]; + core::ptr::write(buf.as_mut_ptr() as *mut T, instr); + (core::mem::size_of::(), buf) +} + +#[inline] +fn decode(binary: &mut &[u8]) -> Option { + let (front, rest) = std::mem::take(binary).split_at_checked(core::mem::size_of::())?; + *binary = rest; + unsafe { Some(core::ptr::read(front.as_ptr() as *const T)) } +} + /// Rounding mode #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u8)] @@ -39,3 +73,181 @@ impl TryFrom for RoundingMode { (value <= 3).then(|| unsafe { core::mem::transmute(value) }).ok_or(()) } } + +#[cfg(feature = "disasm")] +#[derive(Clone, Copy)] +pub enum DisasmItem { + Func, + Global, +} + +#[cfg(feature = "disasm")] +pub fn disasm( + binary: &mut &[u8], + functions: &std::collections::BTreeMap, + out: &mut impl std::io::Write, + mut eca_handler: impl FnMut(&mut &[u8]), +) -> std::io::Result<()> { + use { + self::instrs::Instr, + std::{ + collections::{hash_map::Entry, HashMap}, + convert::TryInto, + vec::Vec, + }, + }; + + fn instr_from_byte(b: u8) -> std::io::Result { + if b as usize >= instrs::NAMES.len() { + return Err(std::io::ErrorKind::InvalidData.into()); + } + Ok(unsafe { std::mem::transmute::(b) }) + } + + let mut labels = HashMap::::default(); + let mut buf = Vec::::new(); + let mut has_cycle = false; + let mut has_oob = false; + + '_offset_pass: for (&off, &(_name, len, kind)) in functions.iter() { + if matches!(kind, DisasmItem::Global) { + continue; + } + + let prev = *binary; + + *binary = &binary[..off as usize]; + + let mut label_count = 0; + while let Some(&byte) = binary.first() { + let offset: i32 = (prev.len() - binary.len()).try_into().unwrap(); + if offset as u32 == off + len { + break; + } + let Ok(inst) = instr_from_byte(byte) else { break }; + instrs::parse_args(binary, inst, &mut buf).ok_or(std::io::ErrorKind::OutOfMemory)?; + + for op in buf.drain(..) { + let rel = match op { + instrs::Oper::O(rel) => rel, + instrs::Oper::P(rel) => rel.into(), + _ => continue, + }; + + has_cycle |= rel == 0; + + let global_offset: u32 = (offset + rel).try_into().unwrap(); + if functions.get(&global_offset).is_some() { + continue; + } + label_count += match labels.entry(global_offset) { + Entry::Occupied(_) => 0, + Entry::Vacant(entry) => { + entry.insert(label_count); + 1 + } + } + } + + if matches!(inst, Instr::ECA) { + eca_handler(binary); + } + } + + *binary = prev; + } + + let mut ordered = functions.iter().collect::>(); + ordered.sort_unstable_by_key(|(_, (name, _, _))| name); + + '_dump: for (&off, &(name, len, kind)) in ordered { + if matches!(kind, DisasmItem::Global) { + continue; + } + let prev = *binary; + + writeln!(out, "{name}:")?; + + *binary = &binary[..off as usize]; + while let Some(&byte) = binary.first() { + let offset: i32 = (prev.len() - binary.len()).try_into().unwrap(); + if offset as u32 == off + len { + break; + } + let Ok(inst) = instr_from_byte(byte) else { + writeln!(out, "invalid instr {byte}")?; + break; + }; + instrs::parse_args(binary, inst, &mut buf).unwrap(); + + if let Some(label) = labels.get(&offset.try_into().unwrap()) { + write!(out, "{:>2}: ", label)?; + } else { + write!(out, " ")?; + } + + write!(out, "{inst:<8?} ")?; + + 'a: for (i, op) in buf.drain(..).enumerate() { + if i != 0 { + write!(out, ", ")?; + } + + let rel = 'b: { + match op { + instrs::Oper::O(rel) => break 'b rel, + instrs::Oper::P(rel) => break 'b rel.into(), + instrs::Oper::R(r) => write!(out, "r{r}")?, + instrs::Oper::B(b) => write!(out, "{b}b")?, + instrs::Oper::H(h) => write!(out, "{h}h")?, + instrs::Oper::W(w) => write!(out, "{w}w")?, + instrs::Oper::D(d) if (d as i64) < 0 => write!(out, "{}d", d as i64)?, + instrs::Oper::D(d) => write!(out, "{d}d")?, + instrs::Oper::A(a) => write!(out, "{a}a")?, + } + + continue 'a; + }; + + let global_offset: u32 = (offset + rel).try_into().unwrap(); + if let Some(&(name, ..)) = functions.get(&global_offset) { + if name.contains('\0') { + write!(out, ":{name:?}")?; + } else { + write!(out, ":{name}")?; + } + } else { + let local_has_oob = global_offset < off + || global_offset > off + len + || instr_from_byte(prev[global_offset as usize]).is_err() + || prev[global_offset as usize] == 0; + has_oob |= local_has_oob; + let label = labels.get(&global_offset).unwrap(); + if local_has_oob { + write!(out, "!!!!!!!!!{rel}")?; + } else { + write!(out, ":{label}")?; + } + } + } + + writeln!(out)?; + + if matches!(inst, Instr::ECA) { + eca_handler(binary); + } + } + + *binary = prev; + } + + if has_oob { + return Err(std::io::ErrorKind::InvalidInput.into()); + } + + if has_cycle { + return Err(std::io::ErrorKind::TimedOut.into()); + } + + Ok(()) +} diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml index 90a0c45..a2f7887 100644 --- a/hblang/Cargo.toml +++ b/hblang/Cargo.toml @@ -8,5 +8,6 @@ name = "hbc" path = "src/main.rs" [dependencies] +hbbytecode = { version = "0.1.0", path = "../hbbytecode" } hbvm = { path = "../hbvm", features = ["nightly"] } regalloc2 = { git = "https://github.com/jakubDoka/regalloc2" } diff --git a/hblang/build.rs b/hblang/build.rs deleted file mode 100644 index 1598082..0000000 --- a/hblang/build.rs +++ /dev/null @@ -1,183 +0,0 @@ -#![feature(iter_next_chunk)] -use std::{collections::HashSet, fmt::Write}; - -fn main() -> Result<(), Box> { - println!("cargo:rerun-if-changed=build.rs"); - println!("cargo:rerun-if-changed=../hbbytecode/instructions.in"); - - gen_instrs()?; - - Ok(()) -} - -fn gen_instrs() -> Result<(), Box> { - let mut generated = String::new(); - - writeln!(generated, "#![allow(dead_code)] #![allow(clippy::upper_case_acronyms)]")?; - - '_max_size: { - let max = instructions() - .map( - |[_, _, ty, _]| { - if ty == "N" { - 1 - } else { - iter_args(ty).map(arg_to_width).sum::() + 1 - } - }, - ) - .max() - .unwrap(); - - writeln!(generated, "pub const MAX_SIZE: usize = {max};")?; - } - - '_encoders: { - for [op, name, ty, doc] in instructions() { - writeln!(generated, "/// {}", doc.trim_matches('"'))?; - let name = name.to_lowercase(); - let args = comma_sep( - iter_args(ty) - .enumerate() - .map(|(i, c)| format!("{}{i}: {}", arg_to_name(c), arg_to_type(c))), - ); - writeln!(generated, "pub fn {name}({args}) -> (usize, [u8; MAX_SIZE]) {{")?; - let arg_names = - comma_sep(iter_args(ty).enumerate().map(|(i, c)| format!("{}{i}", arg_to_name(c)))); - writeln!(generated, " unsafe {{ crate::encode({ty}({op}, {arg_names})) }}")?; - writeln!(generated, "}}")?; - } - } - - '_structs: { - let mut seen = std::collections::HashSet::new(); - for [_, _, ty, _] in instructions() { - if !seen.insert(ty) { - continue; - } - let types = comma_sep(iter_args(ty).map(arg_to_type).map(|s| s.to_string())); - writeln!(generated, "#[repr(packed)] pub struct {ty}(u8, {types});")?; - } - } - - '_name_list: { - writeln!(generated, "pub const NAMES: [&str; {}] = [", instructions().count())?; - for [_, name, _, _] in instructions() { - writeln!(generated, " \"{}\",", name.to_lowercase())?; - } - writeln!(generated, "];")?; - } - - let instr = "Instr"; - let oper = "Oper"; - - '_instr_enum: { - writeln!(generated, "#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)]")?; - writeln!(generated, "pub enum {instr} {{")?; - for [id, name, ..] in instructions() { - writeln!(generated, " {name} = {id},")?; - } - writeln!(generated, "}}")?; - } - - '_arg_kind: { - writeln!(generated, "#[derive(Debug, Clone, Copy, PartialEq, Eq)]")?; - writeln!(generated, "pub enum {oper} {{")?; - let mut seen = HashSet::new(); - for ty in instructions().flat_map(|[.., ty, _]| iter_args(ty)) { - if !seen.insert(ty) { - continue; - } - writeln!(generated, " {ty}({}),", arg_to_type(ty))?; - } - writeln!(generated, "}}")?; - } - - '_parse_opers: { - writeln!( - generated, - "/// This assumes the instruction byte is still at the beginning of the buffer" - )?; - writeln!(generated, "pub fn parse_args(bytes: &mut &[u8], kind: {instr}, buf: &mut Vec<{oper}>) -> Option<()> {{")?; - writeln!(generated, " match kind {{")?; - let mut instrs = instructions().collect::>(); - instrs.sort_unstable_by_key(|&[.., ty, _]| ty); - for group in instrs.chunk_by(|[.., a, _], [.., b, _]| a == b) { - let ty = group[0][2]; - for &[_, name, ..] in group { - writeln!(generated, " | {instr}::{name}")?; - } - generated.pop(); - writeln!(generated, " => {{")?; - if iter_args(ty).count() != 0 { - writeln!(generated, " let data = crate::decode::<{ty}>(bytes)?;")?; - writeln!( - generated, - " buf.extend([{}]);", - comma_sep( - iter_args(ty).zip(1u32..).map(|(t, i)| format!("{oper}::{t}(data.{i})")) - ) - )?; - } else { - writeln!(generated, " crate::decode::<{ty}>(bytes)?;")?; - } - - writeln!(generated, " }}")?; - } - writeln!(generated, " }}")?; - writeln!(generated, " Some(())")?; - writeln!(generated, "}}")?; - } - - std::fs::write("src/instrs.rs", generated)?; - Ok(()) -} - -fn comma_sep(items: impl Iterator) -> String { - items.map(|item| item.to_string()).collect::>().join(", ") -} - -fn instructions() -> impl Iterator { - include_str!("../hbbytecode/instructions.in") - .lines() - .filter_map(|line| line.strip_suffix(';')) - .map(|line| line.splitn(4, ',').map(str::trim).next_chunk().unwrap()) -} - -fn arg_to_type(arg: char) -> &'static str { - match arg { - 'R' | 'B' => "u8", - 'H' => "u16", - 'W' => "u32", - 'D' | 'A' => "u64", - 'P' => "i16", - 'O' => "i32", - _ => panic!("unknown type: {}", arg), - } -} - -fn arg_to_width(arg: char) -> usize { - match arg { - 'R' | 'B' => 1, - 'H' => 2, - 'W' => 4, - 'D' | 'A' => 8, - 'P' => 2, - 'O' => 4, - _ => panic!("unknown type: {}", arg), - } -} - -fn arg_to_name(arg: char) -> &'static str { - match arg { - 'R' => "reg", - 'B' | 'H' | 'W' | 'D' => "imm", - 'P' | 'O' => "offset", - 'A' => "addr", - _ => panic!("unknown type: {}", arg), - } -} - -fn iter_args(ty: &'static str) -> impl Iterator { - ty.chars().filter(|c| *c != 'N') -} diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 4bc6835..d305722 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -27,6 +27,7 @@ use { son::reg, ty::ArrayLen, }, + hbbytecode as instrs, parser::Ast, std::{ collections::{hash_map, BTreeMap, VecDeque}, @@ -52,7 +53,6 @@ pub mod codegen; pub mod parser; pub mod son; -mod instrs; mod lexer; mod task { @@ -715,7 +715,7 @@ impl Types { output: &mut impl std::io::Write, eca_handler: impl FnMut(&mut &[u8]), ) -> std::io::Result<()> { - use crate::DisasmItem; + use instrs::DisasmItem; let functions = self .funcs .iter() @@ -744,7 +744,7 @@ impl Types { (g.offset, (name, g.data.len() as Size, DisasmItem::Global)) })) .collect::>(); - crate::disasm(&mut sluce, &functions, output, eca_handler) + instrs::disasm(&mut sluce, &functions, output, eca_handler) } fn parama(&self, ret: impl Into) -> ParamAlloc { @@ -857,181 +857,6 @@ impl Types { } } -#[inline] -unsafe fn encode(instr: T) -> (usize, [u8; instrs::MAX_SIZE]) { - let mut buf = [0; instrs::MAX_SIZE]; - std::ptr::write(buf.as_mut_ptr() as *mut T, instr); - (std::mem::size_of::(), buf) -} - -#[inline] -fn decode(binary: &mut &[u8]) -> Option { - unsafe { Some(std::ptr::read(binary.take(..std::mem::size_of::())?.as_ptr() as *const T)) } -} - -#[derive(Clone, Copy)] -enum DisasmItem { - Func, - Global, -} - -fn disasm( - binary: &mut &[u8], - functions: &BTreeMap, - out: &mut impl std::io::Write, - mut eca_handler: impl FnMut(&mut &[u8]), -) -> std::io::Result<()> { - use self::instrs::Instr; - - fn instr_from_byte(b: u8) -> std::io::Result { - if b as usize >= instrs::NAMES.len() { - return Err(std::io::ErrorKind::InvalidData.into()); - } - Ok(unsafe { std::mem::transmute::(b) }) - } - - let mut labels = HashMap::::default(); - let mut buf = Vec::::new(); - let mut has_cycle = false; - let mut has_oob = false; - - '_offset_pass: for (&off, &(_name, len, kind)) in functions.iter() { - if matches!(kind, DisasmItem::Global) { - continue; - } - - let prev = *binary; - - binary.take(..off as usize).unwrap(); - - let mut label_count = 0; - while let Some(&byte) = binary.first() { - let offset: i32 = (prev.len() - binary.len()).try_into().unwrap(); - if offset as u32 == off + len { - break; - } - let Ok(inst) = instr_from_byte(byte) else { break }; - instrs::parse_args(binary, inst, &mut buf).ok_or(std::io::ErrorKind::OutOfMemory)?; - - for op in buf.drain(..) { - let rel = match op { - instrs::Oper::O(rel) => rel, - instrs::Oper::P(rel) => rel.into(), - _ => continue, - }; - - has_cycle |= rel == 0; - - let global_offset: u32 = (offset + rel).try_into().unwrap(); - if functions.get(&global_offset).is_some() { - continue; - } - label_count += labels.try_insert(global_offset, label_count).is_ok() as u32; - } - - if matches!(inst, Instr::ECA) { - eca_handler(binary); - } - } - - *binary = prev; - } - - let mut ordered = functions.iter().collect::>(); - ordered.sort_unstable_by_key(|(_, (name, _, _))| name); - - '_dump: for (&off, &(name, len, kind)) in ordered { - if matches!(kind, DisasmItem::Global) { - continue; - } - let prev = *binary; - - writeln!(out, "{name}:")?; - - binary.take(..off as usize).unwrap(); - while let Some(&byte) = binary.first() { - let offset: i32 = (prev.len() - binary.len()).try_into().unwrap(); - if offset as u32 == off + len { - break; - } - let Ok(inst) = instr_from_byte(byte) else { - writeln!(out, "invalid instr {byte}")?; - break; - }; - instrs::parse_args(binary, inst, &mut buf).unwrap(); - - if let Some(label) = labels.get(&offset.try_into().unwrap()) { - write!(out, "{:>2}: ", label)?; - } else { - write!(out, " ")?; - } - - write!(out, "{inst:<8?} ")?; - - 'a: for (i, op) in buf.drain(..).enumerate() { - if i != 0 { - write!(out, ", ")?; - } - - let rel = 'b: { - match op { - instrs::Oper::O(rel) => break 'b rel, - instrs::Oper::P(rel) => break 'b rel.into(), - instrs::Oper::R(r) => write!(out, "r{r}")?, - instrs::Oper::B(b) => write!(out, "{b}b")?, - instrs::Oper::H(h) => write!(out, "{h}h")?, - instrs::Oper::W(w) => write!(out, "{w}w")?, - instrs::Oper::D(d) if (d as i64) < 0 => write!(out, "{}d", d as i64)?, - instrs::Oper::D(d) => write!(out, "{d}d")?, - instrs::Oper::A(a) => write!(out, "{a}a")?, - } - - continue 'a; - }; - - let global_offset: u32 = (offset + rel).try_into().unwrap(); - if let Some(&(name, ..)) = functions.get(&global_offset) { - if name.contains('\0') { - write!(out, ":{name:?}")?; - } else { - write!(out, ":{name}")?; - } - } else { - let local_has_oob = global_offset < off - || global_offset > off + len - || instr_from_byte(prev[global_offset as usize]).is_err() - || prev[global_offset as usize] == 0; - has_oob |= local_has_oob; - let label = labels.get(&global_offset).unwrap(); - if local_has_oob { - write!(out, "!!!!!!!!!{rel}")?; - } else { - write!(out, ":{label}")?; - } - } - } - - writeln!(out)?; - - if matches!(inst, Instr::ECA) { - eca_handler(binary); - } - } - - *binary = prev; - } - - if has_oob { - return Err(std::io::ErrorKind::InvalidInput.into()); - } - - if has_cycle { - return Err(std::io::ErrorKind::TimedOut.into()); - } - - Ok(()) -} - struct TaskQueue { inner: Mutex>, } diff --git a/hbvm/Cargo.toml b/hbvm/Cargo.toml index 447ddf2..a6e25b0 100644 --- a/hbvm/Cargo.toml +++ b/hbvm/Cargo.toml @@ -9,4 +9,4 @@ alloc = [] nightly = [] [dependencies] -hbbytecode = { path = "../hbbytecode" } +hbbytecode = { path = "../hbbytecode", default-features = false } diff --git a/hbvm/src/vmrun.rs b/hbvm/src/vmrun.rs index e2821f0..8d302d4 100644 --- a/hbvm/src/vmrun.rs +++ b/hbvm/src/vmrun.rs @@ -34,7 +34,7 @@ where /// Program can return [`VmRunError`] if a trap handling failed #[cfg_attr(feature = "nightly", repr(align(4096)))] pub fn run(&mut self) -> Result { - use hbbytecode::opcode::*; + use hbbytecode::Instr as I; loop { // Big match // @@ -56,105 +56,112 @@ where // - Yes, we assume you run 64 bit CPU. Else ?conradluget a better CPU // sorry 8 bit fans, HBVM won't run on your Speccy :( unsafe { - match self.memory.prog_read::(self.pc as _) { - UN => { + match self + .memory + .prog_read::(self.pc as _) + .try_into() + .map_err(VmRunError::InvalidOpcode)? + { + I::UN => { self.bump_pc::(); return Err(VmRunError::Unreachable); } - TX => { + I::TX => { self.bump_pc::(); return Ok(VmRunOk::End); } - NOP => handler!(self, |OpsN()| ()), - ADD8 => self.binary_op(u8::wrapping_add), - ADD16 => self.binary_op(u16::wrapping_add), - ADD32 => self.binary_op(u32::wrapping_add), - ADD64 => self.binary_op(u64::wrapping_add), - SUB8 => self.binary_op(u8::wrapping_sub), - SUB16 => self.binary_op(u16::wrapping_sub), - SUB32 => self.binary_op(u32::wrapping_sub), - SUB64 => self.binary_op(u64::wrapping_sub), - MUL8 => self.binary_op(u8::wrapping_mul), - MUL16 => self.binary_op(u16::wrapping_mul), - MUL32 => self.binary_op(u32::wrapping_mul), - MUL64 => self.binary_op(u64::wrapping_mul), - AND => self.binary_op::(ops::BitAnd::bitand), - OR => self.binary_op::(ops::BitOr::bitor), - XOR => self.binary_op::(ops::BitXor::bitxor), - SLU8 => self.binary_op_shift::(u8::wrapping_shl), - SLU16 => self.binary_op_shift::(u16::wrapping_shl), - SLU32 => self.binary_op_shift::(u32::wrapping_shl), - SLU64 => self.binary_op_shift::(u64::wrapping_shl), - SRU8 => self.binary_op_shift::(u8::wrapping_shr), - SRU16 => self.binary_op_shift::(u16::wrapping_shr), - SRU32 => self.binary_op_shift::(u32::wrapping_shr), - SRU64 => self.binary_op_shift::(u64::wrapping_shr), - SRS8 => self.binary_op_shift::(i8::wrapping_shr), - SRS16 => self.binary_op_shift::(i16::wrapping_shr), - SRS32 => self.binary_op_shift::(i32::wrapping_shr), - SRS64 => self.binary_op_shift::(i64::wrapping_shr), - CMPU => handler!(self, |OpsRRR(tg, a0, a1)| self.cmp( + I::NOP => handler!(self, |OpsN()| ()), + I::ADD8 => self.binary_op(u8::wrapping_add), + I::ADD16 => self.binary_op(u16::wrapping_add), + I::ADD32 => self.binary_op(u32::wrapping_add), + I::ADD64 => self.binary_op(u64::wrapping_add), + I::SUB8 => self.binary_op(u8::wrapping_sub), + I::SUB16 => self.binary_op(u16::wrapping_sub), + I::SUB32 => self.binary_op(u32::wrapping_sub), + I::SUB64 => self.binary_op(u64::wrapping_sub), + I::MUL8 => self.binary_op(u8::wrapping_mul), + I::MUL16 => self.binary_op(u16::wrapping_mul), + I::MUL32 => self.binary_op(u32::wrapping_mul), + I::MUL64 => self.binary_op(u64::wrapping_mul), + I::AND => self.binary_op::(ops::BitAnd::bitand), + I::OR => self.binary_op::(ops::BitOr::bitor), + I::XOR => self.binary_op::(ops::BitXor::bitxor), + I::SLU8 => self.binary_op_shift::(u8::wrapping_shl), + I::SLU16 => self.binary_op_shift::(u16::wrapping_shl), + I::SLU32 => self.binary_op_shift::(u32::wrapping_shl), + I::SLU64 => self.binary_op_shift::(u64::wrapping_shl), + I::SRU8 => self.binary_op_shift::(u8::wrapping_shr), + I::SRU16 => self.binary_op_shift::(u16::wrapping_shr), + I::SRU32 => self.binary_op_shift::(u32::wrapping_shr), + I::SRU64 => self.binary_op_shift::(u64::wrapping_shr), + I::SRS8 => self.binary_op_shift::(i8::wrapping_shr), + I::SRS16 => self.binary_op_shift::(i16::wrapping_shr), + I::SRS32 => self.binary_op_shift::(i32::wrapping_shr), + I::SRS64 => self.binary_op_shift::(i64::wrapping_shr), + I::CMPU => handler!(self, |OpsRRR(tg, a0, a1)| self.cmp( tg, a0, self.read_reg(a1).cast::() )), - CMPS => handler!(self, |OpsRRR(tg, a0, a1)| self.cmp( + I::CMPS => handler!(self, |OpsRRR(tg, a0, a1)| self.cmp( tg, a0, self.read_reg(a1).cast::() )), - DIRU8 => self.dir::(), - DIRU16 => self.dir::(), - DIRU32 => self.dir::(), - DIRU64 => self.dir::(), - DIRS8 => self.dir::(), - DIRS16 => self.dir::(), - DIRS32 => self.dir::(), - DIRS64 => self.dir::(), - NEG => handler!(self, |OpsRR(tg, a0)| { + I::DIRU8 => self.dir::(), + I::DIRU16 => self.dir::(), + I::DIRU32 => self.dir::(), + I::DIRU64 => self.dir::(), + I::DIRS8 => self.dir::(), + I::DIRS16 => self.dir::(), + I::DIRS32 => self.dir::(), + I::DIRS64 => self.dir::(), + I::NEG => handler!(self, |OpsRR(tg, a0)| { // Bit negation self.write_reg(tg, self.read_reg(a0).cast::().wrapping_neg()) }), - NOT => handler!(self, |OpsRR(tg, a0)| { + I::NOT => handler!(self, |OpsRR(tg, a0)| { // Logical negation self.write_reg(tg, u64::from(self.read_reg(a0).cast::() == 0)); }), - SXT8 => handler!(self, |OpsRR(tg, a0)| { + I::SXT8 => handler!(self, |OpsRR(tg, a0)| { self.write_reg(tg, self.read_reg(a0).cast::() as i64) }), - SXT16 => handler!(self, |OpsRR(tg, a0)| { + I::SXT16 => handler!(self, |OpsRR(tg, a0)| { self.write_reg(tg, self.read_reg(a0).cast::() as i64) }), - SXT32 => handler!(self, |OpsRR(tg, a0)| { + I::SXT32 => handler!(self, |OpsRR(tg, a0)| { self.write_reg(tg, self.read_reg(a0).cast::() as i64) }), - ADDI8 => self.binary_op_imm(u8::wrapping_add), - ADDI16 => self.binary_op_imm(u16::wrapping_add), - ADDI32 => self.binary_op_imm(u32::wrapping_add), - ADDI64 => self.binary_op_imm(u64::wrapping_add), - MULI8 => self.binary_op_imm(u8::wrapping_mul), - MULI16 => self.binary_op_imm(u16::wrapping_mul), - MULI32 => self.binary_op_imm(u32::wrapping_mul), - MULI64 => self.binary_op_imm(u64::wrapping_mul), - ANDI => self.binary_op_imm::(ops::BitAnd::bitand), - ORI => self.binary_op_imm::(ops::BitOr::bitor), - XORI => self.binary_op_imm::(ops::BitXor::bitxor), - SLUI8 => self.binary_op_ims::(u8::wrapping_shl), - SLUI16 => self.binary_op_ims::(u16::wrapping_shl), - SLUI32 => self.binary_op_ims::(u32::wrapping_shl), - SLUI64 => self.binary_op_ims::(u64::wrapping_shl), - SRUI8 => self.binary_op_ims::(u8::wrapping_shr), - SRUI16 => self.binary_op_ims::(u16::wrapping_shr), - SRUI32 => self.binary_op_ims::(u32::wrapping_shr), - SRUI64 => self.binary_op_ims::(u64::wrapping_shr), - SRSI8 => self.binary_op_ims::(i8::wrapping_shr), - SRSI16 => self.binary_op_ims::(i16::wrapping_shr), - SRSI32 => self.binary_op_ims::(i32::wrapping_shr), - SRSI64 => self.binary_op_ims::(i64::wrapping_shr), - CMPUI => handler!(self, |OpsRRD(tg, a0, imm)| { self.cmp(tg, a0, imm) }), - CMPSI => handler!(self, |OpsRRD(tg, a0, imm)| { self.cmp(tg, a0, imm as i64) }), - CP => handler!(self, |OpsRR(tg, a0)| self.write_reg(tg, self.read_reg(a0))), - SWA => handler!(self, |OpsRR(r0, r1)| { + I::ADDI8 => self.binary_op_imm(u8::wrapping_add), + I::ADDI16 => self.binary_op_imm(u16::wrapping_add), + I::ADDI32 => self.binary_op_imm(u32::wrapping_add), + I::ADDI64 => self.binary_op_imm(u64::wrapping_add), + I::MULI8 => self.binary_op_imm(u8::wrapping_mul), + I::MULI16 => self.binary_op_imm(u16::wrapping_mul), + I::MULI32 => self.binary_op_imm(u32::wrapping_mul), + I::MULI64 => self.binary_op_imm(u64::wrapping_mul), + I::ANDI => self.binary_op_imm::(ops::BitAnd::bitand), + I::ORI => self.binary_op_imm::(ops::BitOr::bitor), + I::XORI => self.binary_op_imm::(ops::BitXor::bitxor), + I::SLUI8 => self.binary_op_ims::(u8::wrapping_shl), + I::SLUI16 => self.binary_op_ims::(u16::wrapping_shl), + I::SLUI32 => self.binary_op_ims::(u32::wrapping_shl), + I::SLUI64 => self.binary_op_ims::(u64::wrapping_shl), + I::SRUI8 => self.binary_op_ims::(u8::wrapping_shr), + I::SRUI16 => self.binary_op_ims::(u16::wrapping_shr), + I::SRUI32 => self.binary_op_ims::(u32::wrapping_shr), + I::SRUI64 => self.binary_op_ims::(u64::wrapping_shr), + I::SRSI8 => self.binary_op_ims::(i8::wrapping_shr), + I::SRSI16 => self.binary_op_ims::(i16::wrapping_shr), + I::SRSI32 => self.binary_op_ims::(i32::wrapping_shr), + I::SRSI64 => self.binary_op_ims::(i64::wrapping_shr), + I::CMPUI => handler!(self, |OpsRRD(tg, a0, imm)| { self.cmp(tg, a0, imm) }), + I::CMPSI => { + handler!(self, |OpsRRD(tg, a0, imm)| { self.cmp(tg, a0, imm as i64) }) + } + I::CP => handler!(self, |OpsRR(tg, a0)| self.write_reg(tg, self.read_reg(a0))), + I::SWA => handler!(self, |OpsRR(r0, r1)| { // Swap registers match (r0, r1) { (0, 0) => (), @@ -167,33 +174,33 @@ where } } }), - LI8 => handler!(self, |OpsRB(tg, imm)| self.write_reg(tg, imm)), - LI16 => handler!(self, |OpsRH(tg, imm)| self.write_reg(tg, imm)), - LI32 => handler!(self, |OpsRW(tg, imm)| self.write_reg(tg, imm)), - LI64 => handler!(self, |OpsRD(tg, imm)| self.write_reg(tg, imm)), - LRA => handler!(self, |OpsRRO(tg, reg, off)| self.write_reg( + I::LI8 => handler!(self, |OpsRB(tg, imm)| self.write_reg(tg, imm)), + I::LI16 => handler!(self, |OpsRH(tg, imm)| self.write_reg(tg, imm)), + I::LI32 => handler!(self, |OpsRW(tg, imm)| self.write_reg(tg, imm)), + I::LI64 => handler!(self, |OpsRD(tg, imm)| self.write_reg(tg, imm)), + I::LRA => handler!(self, |OpsRRO(tg, reg, off)| self.write_reg( tg, self.pcrel(off).wrapping_add(self.read_reg(reg).cast::()).get(), )), // Load. If loading more than register size, continue on adjecent registers - LD => handler!(self, |OpsRRAH(dst, base, off, count)| self + I::LD => handler!(self, |OpsRRAH(dst, base, off, count)| self .load(dst, base, off, count)?), // Store. Same rules apply as to LD - ST => handler!(self, |OpsRRAH(dst, base, off, count)| self + I::ST => handler!(self, |OpsRRAH(dst, base, off, count)| self .store(dst, base, off, count)?), - LDR => handler!(self, |OpsRROH(dst, base, off, count)| self.load( + I::LDR => handler!(self, |OpsRROH(dst, base, off, count)| self.load( dst, base, self.pcrel(off).get(), count )?), - STR => handler!(self, |OpsRROH(dst, base, off, count)| self.store( + I::STR => handler!(self, |OpsRROH(dst, base, off, count)| self.store( dst, base, self.pcrel(off).get(), count )?), - BMC => { + I::BMC => { // Block memory copy match if let Some(copier) = &mut self.copier { // There is some copier, poll. @@ -227,7 +234,7 @@ where core::task::Poll::Pending => (), } } - BRC => handler!(self, |OpsRRB(src, dst, count)| { + I::BRC => handler!(self, |OpsRRB(src, dst, count)| { // Block register copy if src.checked_add(count).is_none() || dst.checked_add(count).is_none() { return Err(VmRunError::RegOutOfBounds); @@ -239,11 +246,11 @@ where usize::from(count), ); }), - JMP => { + I::JMP => { let OpsO(off) = self.decode(); self.pc = self.pc.wrapping_add(off); } - JAL => { + I::JAL => { // Jump and link. Save PC after this instruction to // specified register and jump to reg + relative offset. let OpsRRO(save, reg, offset) = self.decode(); @@ -251,7 +258,7 @@ where self.write_reg(save, self.pc.next::()); self.pc = self.pcrel(offset).wrapping_add(self.read_reg(reg).cast::()); } - JALA => { + I::JALA => { // Jump and link. Save PC after this instruction to // specified register and jump to reg let OpsRRA(save, reg, offset) = self.decode(); @@ -261,8 +268,8 @@ where Address::new(self.read_reg(reg).cast::().wrapping_add(offset)); } // Conditional jumps, jump only to immediates - JEQ => self.cond_jmp::(Ordering::Equal), - JNE => { + I::JEQ => self.cond_jmp::(Ordering::Equal), + I::JNE => { let OpsRRP(a0, a1, ja) = self.decode(); if self.read_reg(a0).cast::() != self.read_reg(a1).cast::() { self.pc = self.pcrel(ja); @@ -270,11 +277,11 @@ where self.bump_pc::(); } } - JLTS => self.cond_jmp::(Ordering::Less), - JGTS => self.cond_jmp::(Ordering::Greater), - JLTU => self.cond_jmp::(Ordering::Less), - JGTU => self.cond_jmp::(Ordering::Greater), - ECA => { + I::JLTS => self.cond_jmp::(Ordering::Less), + I::JGTS => self.cond_jmp::(Ordering::Greater), + I::JLTU => self.cond_jmp::(Ordering::Less), + I::JGTU => self.cond_jmp::(Ordering::Greater), + I::ECA => { // So we don't get timer interrupt after ECALL if TIMER_QUOTIENT != 0 { self.timer = self.timer.wrapping_add(1); @@ -283,33 +290,33 @@ where self.bump_pc::(); return Ok(VmRunOk::Ecall); } - EBP => { + I::EBP => { self.bump_pc::(); return Ok(VmRunOk::Breakpoint); } - FADD32 => self.binary_op::(ops::Add::add), - FADD64 => self.binary_op::(ops::Add::add), - FSUB32 => self.binary_op::(ops::Sub::sub), - FSUB64 => self.binary_op::(ops::Sub::sub), - FMUL32 => self.binary_op::(ops::Mul::mul), - FMUL64 => self.binary_op::(ops::Mul::mul), - FDIV32 => self.binary_op::(ops::Div::div), - FDIV64 => self.binary_op::(ops::Div::div), - FMA32 => self.fma::(), - FMA64 => self.fma::(), - FINV32 => handler!(self, |OpsRR(tg, reg)| self + I::FADD32 => self.binary_op::(ops::Add::add), + I::FADD64 => self.binary_op::(ops::Add::add), + I::FSUB32 => self.binary_op::(ops::Sub::sub), + I::FSUB64 => self.binary_op::(ops::Sub::sub), + I::FMUL32 => self.binary_op::(ops::Mul::mul), + I::FMUL64 => self.binary_op::(ops::Mul::mul), + I::FDIV32 => self.binary_op::(ops::Div::div), + I::FDIV64 => self.binary_op::(ops::Div::div), + I::FMA32 => self.fma::(), + I::FMA64 => self.fma::(), + I::FINV32 => handler!(self, |OpsRR(tg, reg)| self .write_reg(tg, 1. / self.read_reg(reg).cast::())), - FINV64 => handler!(self, |OpsRR(tg, reg)| self + I::FINV64 => handler!(self, |OpsRR(tg, reg)| self .write_reg(tg, 1. / self.read_reg(reg).cast::())), - FCMPLT32 => self.fcmp::(Ordering::Less), - FCMPLT64 => self.fcmp::(Ordering::Less), - FCMPGT32 => self.fcmp::(Ordering::Greater), - FCMPGT64 => self.fcmp::(Ordering::Greater), - ITF32 => handler!(self, |OpsRR(tg, reg)| self + I::FCMPLT32 => self.fcmp::(Ordering::Less), + I::FCMPLT64 => self.fcmp::(Ordering::Less), + I::FCMPGT32 => self.fcmp::(Ordering::Greater), + I::FCMPGT64 => self.fcmp::(Ordering::Greater), + I::ITF32 => handler!(self, |OpsRR(tg, reg)| self .write_reg(tg, self.read_reg(reg).cast::() as f32)), - ITF64 => handler!(self, |OpsRR(tg, reg)| self + I::ITF64 => handler!(self, |OpsRR(tg, reg)| self .write_reg(tg, self.read_reg(reg).cast::() as f64)), - FTI32 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg( + I::FTI32 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg( tg, crate::float::f32toint( self.read_reg(reg).cast::(), @@ -317,7 +324,7 @@ where .map_err(|()| VmRunError::InvalidOperand)?, ), )), - FTI64 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg( + I::FTI64 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg( tg, crate::float::f64toint( self.read_reg(reg).cast::(), @@ -325,9 +332,9 @@ where .map_err(|()| VmRunError::InvalidOperand)?, ), )), - FC32T64 => handler!(self, |OpsRR(tg, reg)| self + I::FC32T64 => handler!(self, |OpsRR(tg, reg)| self .write_reg(tg, self.read_reg(reg).cast::() as f64)), - FC64T32 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg( + I::FC64T32 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg( tg, crate::float::conv64to32( self.read_reg(reg).cast(), @@ -335,27 +342,26 @@ where .map_err(|()| VmRunError::InvalidOperand)?, ), )), - LRA16 => handler!(self, |OpsRRP(tg, reg, imm)| self.write_reg( + I::LRA16 => handler!(self, |OpsRRP(tg, reg, imm)| self.write_reg( tg, (self.pc + self.read_reg(reg).cast::() + imm + 3_u16).get(), )), - LDR16 => handler!(self, |OpsRRPH(dst, base, off, count)| self.load( + I::LDR16 => handler!(self, |OpsRRPH(dst, base, off, count)| self.load( dst, base, self.pcrel(off).get(), count )?), - STR16 => handler!(self, |OpsRRPH(dst, base, off, count)| self.store( + I::STR16 => handler!(self, |OpsRRPH(dst, base, off, count)| self.store( dst, base, self.pcrel(off).get(), count )?), - JMP16 => { + I::JMP16 => { let OpsP(off) = self.decode(); self.pc = self.pcrel(off); } - op => return Err(VmRunError::InvalidOpcode(op)), } }