Rewritten assembler

This commit is contained in:
Erin 2023-07-12 02:16:23 +02:00 committed by ondra05
parent 3fc6bb9171
commit 271ab5a953
8 changed files with 312 additions and 344 deletions

28
Cargo.lock generated
View file

@ -35,6 +35,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bytemuck"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -101,9 +107,11 @@ name = "hbasm"
version = "0.1.0"
dependencies = [
"ariadne",
"bytemuck",
"hashbrown 0.14.0",
"hbbytecode",
"lasso",
"literify",
"logos",
"paste",
]
@ -135,6 +143,26 @@ dependencies = [
"hashbrown 0.13.2",
]
[[package]]
name = "literify"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54e4d365df794ed78b4ce1061886f82eae7afa8e3a98ce4c4b0bfd0c777b1175"
dependencies = [
"litrs",
"proc-macro2",
"quote",
]
[[package]]
name = "litrs"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f17c3668f3cc1132437cdadc93dab05e52d592f06948d3f64828430c36e4a70"
dependencies = [
"proc-macro2",
]
[[package]]
name = "log"
version = "0.4.17"

View file

@ -4,10 +4,12 @@ version = "0.1.0"
edition = "2021"
[dependencies]
ariadne = "0.3"
bytemuck = "1.13"
hashbrown = "0.14"
hbbytecode = { path = "../hbbytecode" }
literify = "0.1"
paste = "1.0"
hashbrown = "0.14.0"
ariadne = "0.3.0"
[dependencies.lasso]
version = "0.7"

View file

@ -1,10 +1,7 @@
#![no_std]
#![feature(error_in_core)]
extern crate alloc;
pub mod text;
mod macros;
use {alloc::vec::Vec, hashbrown::HashSet};
@ -12,28 +9,26 @@ use {alloc::vec::Vec, hashbrown::HashSet};
#[derive(Default)]
pub struct Assembler {
pub buf: Vec<u8>,
sub: HashSet<usize>,
pub sub: HashSet<usize>,
}
impl Assembler {
macros::impl_asm!(
bbbb(p0: u8, p1: u8, p2: u8, p3: u8)
macros::impl_both!(
bbbb(p0: R, p1: R, p2: R, p3: R)
=> [DIR, DIRF, FMAF],
bbb(p0: u8, p1: u8, p2: u8)
bbb(p0: R, p1: R, p2: R)
=> [ADD, SUB, MUL, AND, OR, XOR, SL, SR, SRS, CMP, CMPU, BRC, ADDF, SUBF, MULF],
bbdh(p0: u8, p1: u8, p2: impl Imm, p3: u16)
bbdh(p0: R, p1: R, p2: I, p3: u16)
=> [LD, ST],
bbd(p0: u8, p1: u8, p2: impl Imm)
bbd(p0: R, p1: R, p2: I)
=> [ADDI, MULI, ANDI, ORI, XORI, SLI, SRI, SRSI, CMPI, CMPUI,
BMC, JEQ, JNE, JLT, JGT, JLTU, JGTU, ADDFI, MULFI],
bb(p0: u8, p1: u8)
bb(p0: R, p1: R)
=> [NEG, NOT, CP, SWA, NEGF, ITF, FTI],
bd(p0: u8, p1: impl Imm)
bd(p0: R, p1: I)
=> [LI, JMP],
n()
=> [NOP, ECALL],
);
}
);
pub trait Imm {
fn insert(&self, asm: &mut Assembler);
@ -53,13 +48,3 @@ macro_rules! impl_imm_le_bytes {
}
impl_imm_le_bytes!(u64, i64, f64);
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Symbol(pub u64);
impl Imm for Symbol {
#[inline(always)]
fn insert(&self, asm: &mut Assembler) {
asm.sub.insert(asm.buf.len());
asm.buf.extend(self.0.to_le_bytes());
}
}

View file

@ -18,47 +18,41 @@ macro_rules! impl_asm_opcodes {
}
}
macros::impl_asm_opcodes!(
macros::asm::impl_asm_opcodes!(
$generic($($param_i: $param_ty),*)
=> [$($rest)*]
);
};
}
macro_rules! gen_impl_asm_insert {
($($ty:ident),* $(,)?) => {
macro_rules! impl_asm_insert {
$(($self:expr, $id:ident, $ty) => {
$self.buf.extend($id.to_le_bytes())
};)*
($self:expr, $id:ident, $_:ty) => {
macro_rules! impl_asm_insert {
($self:expr, $id:ident, I) => {
Imm::insert(&$id, $self)
};
}
($self:expr, $id:ident, $_:ident) => {
$self.buf.extend($id.to_le_bytes())
};
}
gen_impl_asm_insert!(u8, u16, u64);
macro_rules! impl_asm {
(
$(
$ityn:ident
($($param_i:ident: $param_ty:ty),* $(,)?)
($($param_i:ident: $param_ty:ident),* $(,)?)
=> [$($opcode:ident),* $(,)?],
)*
) => {
paste::paste! {
$(
#[allow(dead_code)]
fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: $param_ty),*) {
fn [<i_param_ $ityn>](&mut self, opcode: u8, $($param_i: macros::asm::ident_map_ty!($param_ty)),*) {
self.buf.push(opcode);
$(macros::impl_asm_insert!(self, $param_i, $param_ty);)*
$(macros::asm::impl_asm_insert!(self, $param_i, $param_ty);)*
}
macros::impl_asm_opcodes!(
[<i_param_ $ityn>]($($param_i: $param_ty),*)
macros::asm::impl_asm_opcodes!(
[<i_param_ $ityn>]($($param_i: macros::asm::ident_map_ty!($param_ty)),*)
=> [$($opcode,)*]
);
)*
@ -66,7 +60,14 @@ macro_rules! impl_asm {
};
}
pub(super) use {impl_asm, impl_asm_opcodes};
#[rustfmt::skip]
macro_rules! ident_map_ty {
(R) => { u8 };
(I) => { impl Imm };
($id:ident) => { $id };
}
pub(crate) use {ident_map_ty, impl_asm, impl_asm_opcodes};
#[allow(clippy::single_component_path_imports)]
pub(super) use impl_asm_insert;
pub(crate) use impl_asm_insert;

14
hbasm/src/macros/mod.rs Normal file
View file

@ -0,0 +1,14 @@
pub mod asm;
pub mod text;
macro_rules! impl_both {
($($tt:tt)*) => {
impl Assembler {
$crate::macros::asm::impl_asm!($($tt)*);
}
$crate::macros::text::gen_text!($($tt)*);
};
}
pub(crate) use impl_both;

202
hbasm/src/macros/text.rs Normal file
View file

@ -0,0 +1,202 @@
macro_rules! gen_text {
(
$(
$ityn:ident
($($param_i:ident: $param_ty:ident),* $(,)?)
=> [$($opcode:ident),* $(,)?],
)*
) => {
pub mod text {
use {
crate::{
Assembler,
macros::text::*,
},
hashbrown::HashMap,
lasso::{Key, Rodeo, Spur},
logos::{Lexer, Logos, Span},
};
paste::paste!(literify::literify! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(extras = Rodeo)]
#[logos(skip r"[ \t\t]+")]
#[logos(skip r"-- .*")]
pub enum Token {
$($(#[token(~([<$opcode:lower>]), |_| hbbytecode::opcode::[<$opcode:upper>])])*)*
Opcode(u8),
#[regex("[0-9]+", |l| l.slice().parse().ok())]
#[regex(
"-[0-9]+",
|lexer| {
Some(u64::from_ne_bytes(lexer.slice().parse::<i64>().ok()?.to_ne_bytes()))
},
)] Integer(u64),
#[regex(
"r[0-9]+",
|lexer| match lexer.slice()[1..].parse() {
Ok(n) => Some(n),
_ => None
},
)] Register(u8),
#[regex(
r"\p{XID_Start}\p{XID_Continue}*:",
|lexer| lexer.extras.get_or_intern(&lexer.slice()[..lexer.slice().len() - 1]),
)] Label(Spur),
#[regex(
r"\p{XID_Start}\p{XID_Continue}*",
|lexer| lexer.extras.get_or_intern(lexer.slice()),
)] Symbol(Spur),
#[token("\n")]
#[token(";")] ISep,
#[token(",")] PSep,
}
});
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind {
UnexpectedToken,
InvalidToken,
UnexpectedEnd,
InvalidSymbol,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error {
pub kind: ErrorKind,
pub span: Span,
}
pub fn assemble(asm: &mut Assembler, code: &str) -> Result<(), Error> {
pub struct TextAsm<'a> {
asm: &'a mut Assembler,
lexer: Lexer<'a, Token>,
symloc: HashMap<Spur, usize>,
}
impl<'a> TextAsm<'a> {
fn next(&mut self) -> Result<Token, ErrorKind> {
match self.lexer.next() {
Some(Ok(t)) => Ok(t),
Some(Err(())) => Err(ErrorKind::InvalidToken),
None => Err(ErrorKind::UnexpectedEnd),
}
}
#[inline(always)]
fn run(&mut self) -> Result<(), ErrorKind> {
loop {
match self.lexer.next() {
Some(Ok(Token::Opcode(op))) => {
match op {
$(
$(hbbytecode::opcode::$opcode)|* => paste::paste!({
param_extract_itm!(self, $($param_i: $param_ty),*);
self.asm.[<i_param_ $ityn>](op, $($param_i),*);
}),
)*
_ => unreachable!(),
}
}
Some(Ok(Token::Label(lbl))) => {
self.symloc.insert(lbl, self.asm.buf.len());
}
Some(Ok(Token::ISep)) => (),
Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken),
Some(Err(())) => return Err(ErrorKind::InvalidToken),
None => return Ok(()),
}
}
}
}
let mut asm = TextAsm {
asm,
lexer: Token::lexer(code),
symloc: HashMap::default(),
};
asm.run()
.map_err(|kind| Error { kind, span: asm.lexer.span() })?;
for &loc in &asm.asm.sub {
let val = asm.symloc
.get(
&Spur::try_from_usize(bytemuck::pod_read_unaligned::<u64>(&asm.asm.buf[loc..loc+core::mem::size_of::<u64>()]) as _)
.unwrap()
)
.ok_or(Error { kind: ErrorKind::InvalidSymbol, span: 0..0 })?
.to_le_bytes();
asm.asm.buf[loc..]
.iter_mut()
.zip(val)
.for_each(|(dst, src)| *dst = src);
}
Ok(())
}
enum InternalImm {
Const(u64),
Named(Spur),
}
impl $crate::Imm for InternalImm {
#[inline]
fn insert(&self, asm: &mut Assembler) {
match self {
Self::Const(a) => a.insert(asm),
Self::Named(a) => {
asm.sub.insert(asm.buf.len());
asm.buf.extend((a.into_usize() as u64).to_le_bytes());
},
}
}
}
}
};
}
macro_rules! extract_pat {
($self:expr, $pat:pat) => {
let $pat = $self.next()?
else { return Err(ErrorKind::UnexpectedToken) };
};
}
macro_rules! extract {
($self:expr, R, $id:ident) => {
extract_pat!($self, Token::Register($id));
};
($self:expr, I, $id:ident) => {
let $id = match $self.next()? {
Token::Integer(a) => InternalImm::Const(a),
Token::Symbol(a) => InternalImm::Named(a),
_ => return Err(ErrorKind::UnexpectedToken),
};
};
($self:expr, u16, $id:ident) => {
extract_pat!($self, Token::Integer($id));
let $id = u16::try_from($id).map_err(|_| ErrorKind::InvalidToken)?;
};
}
macro_rules! param_extract_itm {
($self:expr, $($id:ident: $ty:ident)? $(, $($tt:tt)*)?) => {
$(extract!($self, $ty, $id);)?
$(
extract_pat!($self, Token::PSep);
param_extract_itm!($self, $($tt)*);
)?
};
}
pub(crate) use {extract, extract_pat, gen_text, param_extract_itm};

View file

@ -1,3 +1,7 @@
use std::io::Write;
use hbasm::Assembler;
use {
ariadne::{ColorGenerator, Label, Report, ReportKind, Source},
std::{
@ -10,9 +14,8 @@ fn main() -> Result<(), Box<dyn Error>> {
let mut code = String::new();
stdin().read_to_string(&mut code)?;
let mut buf = vec![];
if let Err(e) = hbasm::text::assembly(&code, &mut buf) {
let mut assembler = Assembler::default();
if let Err(e) = hbasm::text::assemble(&mut assembler, &code) {
let mut colors = ColorGenerator::new();
let e_code = match e.kind {
@ -24,8 +27,12 @@ fn main() -> Result<(), Box<dyn Error>> {
let message = match e.kind {
hbasm::text::ErrorKind::UnexpectedToken => "This token is not expected!",
hbasm::text::ErrorKind::InvalidToken => "The token is not valid!",
hbasm::text::ErrorKind::UnexpectedEnd => "The assembler reached the end of input unexpectedly!",
hbasm::text::ErrorKind::InvalidSymbol => "This referenced symbol doesn't have a corresponding label!",
hbasm::text::ErrorKind::UnexpectedEnd => {
"The assembler reached the end of input unexpectedly!"
}
hbasm::text::ErrorKind::InvalidSymbol => {
"This referenced symbol doesn't have a corresponding label!"
}
};
let a = colors.next();
@ -40,6 +47,8 @@ fn main() -> Result<(), Box<dyn Error>> {
.finish()
.eprint(("engine_internal", Source::from(&code)))
.unwrap();
} else {
std::io::stdout().lock().write_all(&assembler.buf).unwrap();
}
Ok(())

View file

@ -1,273 +0,0 @@
extern crate alloc;
use alloc::vec::Vec;
use {
core::fmt::{Display, Formatter},
hashbrown::HashMap,
lasso::{Rodeo, Spur},
logos::{Lexer, Logos, Span},
};
macro_rules! tokendef {
($($opcode:literal),* $(,)?) => {
paste::paste! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(extras = Rodeo)]
#[logos(skip r"[ \t\f]+")]
#[logos(skip r"-- .*")]
pub enum Token {
$(#[token($opcode, |_| hbbytecode::opcode::[<$opcode:upper>])])*
OpCode(u8),
#[regex("[0-9]+", |l| l.slice().parse().ok())]
#[regex(
"-[0-9]+",
|lexer| {
Some(u64::from_ne_bytes(lexer.slice().parse::<i64>().ok()?.to_ne_bytes()))
},
)] Integer(u64),
#[regex(
"r[0-9]+",
|lexer| match lexer.slice()[1..].parse() {
Ok(n) => Some(n),
_ => None
},
)] Register(u8),
#[regex(
r"\p{XID_Start}\p{XID_Continue}*:",
|lexer| lexer.extras.get_or_intern(&lexer.slice()[..lexer.slice().len() - 1]),
)] Label(Spur),
#[regex(
r"\p{XID_Start}\p{XID_Continue}*",
|lexer| lexer.extras.get_or_intern(lexer.slice()),
)] Symbol(Spur),
#[token("\n")]
#[token(";")] ISep,
#[token(",")] PSep,
}
}
};
}
#[rustfmt::skip]
tokendef![
"nop", "add", "sub", "mul", "and", "or", "xor", "sl", "sr", "srs", "cmp", "cmpu",
"dir", "neg", "not", "addi", "muli", "andi", "ori", "xori", "sli", "sri", "srsi",
"cmpi", "cmpui", "cp", "swa", "li", "ld", "st", "bmc", "brc", "jmp", "jeq", "jne",
"jlt", "jgt", "jltu", "jgtu", "ecall", "addf", "subf", "mulf", "dirf", "fmaf", "negf",
"itf", "fti", "addfi", "mulfi",
];
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind {
UnexpectedToken,
InvalidToken,
UnexpectedEnd,
InvalidSymbol,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error {
pub kind: ErrorKind,
pub span: Span,
}
impl Display for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "Error {:?} at {:?}", self.kind, self.span)
}
}
impl core::error::Error for Error {}
macro_rules! expect_matches {
($self:expr, $($pat:pat),* $(,)?) => {$(
let $pat = $self.next()?
else { return Err(ErrorKind::UnexpectedToken) };
)*}
}
pub fn assembly(code: &str, buf: &mut Vec<u8>) -> Result<(), Error> {
struct Assembler<'a> {
lexer: Lexer<'a, Token>,
buf: &'a mut Vec<u8>,
label_map: HashMap<Spur, u64>,
to_sub_label: HashMap<usize, Spur>,
}
impl<'a> Assembler<'a> {
fn next(&mut self) -> Result<Token, ErrorKind> {
match self.lexer.next() {
Some(Ok(t)) => Ok(t),
Some(Err(())) => Err(ErrorKind::InvalidToken),
None => Err(ErrorKind::UnexpectedEnd),
}
}
fn assemble(&mut self) -> Result<(), ErrorKind> {
use hbbytecode::opcode::*;
loop {
match self.lexer.next() {
Some(Ok(Token::OpCode(op))) => {
self.buf.push(op);
match op {
NOP | ECALL => Ok(()),
DIR | DIRF => {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
Token::Register(r2),
Token::PSep,
Token::Register(r3),
);
self.buf.extend([r0, r1, r2, r3]);
Ok(())
}
ADD..=CMPU | ADDF..=MULF => {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
Token::Register(r2),
);
self.buf.extend([r0, r1, r2]);
Ok(())
}
BRC => {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
Token::Integer(count),
);
self.buf.extend([
r0,
r1,
u8::try_from(count).map_err(|_| ErrorKind::UnexpectedToken)?,
]);
Ok(())
}
NEG..=NOT | CP..=SWA | NEGF..=FTI => {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
);
self.buf.extend([r0, r1]);
Ok(())
}
LI | JMP => {
expect_matches!(self, Token::Register(r0), Token::PSep);
self.buf.push(r0);
self.insert_imm()?;
Ok(())
}
ADDI..=CMPUI | BMC | JEQ..=JGTU | ADDFI..=MULFI => {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
);
self.buf.extend([r0, r1]);
self.insert_imm()?;
Ok(())
}
LD..=ST => {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
Token::Integer(offset),
Token::PSep,
Token::Integer(len),
);
self.buf.extend([r0, r1]);
self.buf.extend(offset.to_le_bytes());
self.buf.extend(
u16::try_from(len)
.map_err(|_| ErrorKind::InvalidToken)?
.to_le_bytes(),
);
Ok(())
}
_ => unreachable!(),
}?;
match self.next() {
Ok(Token::ISep) => (),
Ok(_) => return Err(ErrorKind::UnexpectedToken),
Err(ErrorKind::UnexpectedEnd) => return Ok(()),
Err(e) => return Err(e),
}
}
Some(Ok(Token::Label(lbl))) => {
self.label_map.insert(lbl, self.buf.len() as u64);
}
Some(Ok(Token::ISep)) => (),
Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken),
Some(Err(())) => return Err(ErrorKind::InvalidToken),
None => return Ok(()),
}
}
}
fn link_local_syms(&mut self) -> Result<(), ErrorKind> {
for (ix, sym) in &self.to_sub_label {
self.label_map
.get(sym)
.ok_or(ErrorKind::InvalidSymbol)?
.to_le_bytes()
.iter()
.enumerate()
.for_each(|(i, b)| {
self.buf[ix + i] = *b;
});
}
Ok(())
}
fn insert_imm(&mut self) -> Result<(), ErrorKind> {
let imm = match self.next()? {
Token::Integer(i) => i.to_le_bytes(),
Token::Symbol(s) => {
self.to_sub_label.insert(self.buf.len(), s);
[0; 8]
}
_ => return Err(ErrorKind::UnexpectedToken),
};
self.buf.extend(imm);
Ok(())
}
}
let mut asm = Assembler {
lexer: Token::lexer(code),
label_map: Default::default(),
to_sub_label: Default::default(),
buf,
};
asm.assemble().map_err(|kind| Error {
kind,
span: asm.lexer.span(),
})?;
asm.link_local_syms()
.map_err(|kind| Error { kind, span: 0..0 })
}