diff --git a/hbasm/src/lib.rs b/hbasm/src/lib.rs index e2201b8a..6568916f 100644 --- a/hbasm/src/lib.rs +++ b/hbasm/src/lib.rs @@ -1,277 +1,57 @@ #![no_std] #![feature(error_in_core)] -// use std::collections::HashMap; extern crate alloc; -use alloc::vec::Vec; -use { - core::fmt::{Display, Formatter}, - hashbrown::HashMap, - lasso::{Rodeo, Spur}, - logos::{Lexer, Logos, Span}, -}; +pub mod text; -macro_rules! tokendef { - ($($opcode:literal),* $(,)?) => { - paste::paste! { - #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] - #[logos(extras = Rodeo)] - #[logos(skip r"[ \t\f]+")] - #[logos(skip r"-- .*")] - pub enum Token { - $(#[token($opcode, |_| hbbytecode::opcode::[<$opcode:upper>])])* - OpCode(u8), +mod macros; - #[regex("[0-9]+", |l| l.slice().parse().ok())] - #[regex( - "-[0-9]+", - |lexer| { - Some(u64::from_ne_bytes(lexer.slice().parse::().ok()?.to_ne_bytes())) - }, - )] Integer(u64), +use {alloc::vec::Vec, hashbrown::HashSet}; - #[regex( - "r[0-9]+", - |lexer| match lexer.slice()[1..].parse() { - Ok(n) => Some(n), - _ => None - }, - )] Register(u8), - - #[regex( - r"\p{XID_Start}\p{XID_Continue}*:", - |lexer| lexer.extras.get_or_intern(&lexer.slice()[..lexer.slice().len() - 1]), - )] Label(Spur), - - #[regex( - r"\p{XID_Start}\p{XID_Continue}*", - |lexer| lexer.extras.get_or_intern(lexer.slice()), - )] Symbol(Spur), - - #[token("\n")] - #[token(";")] ISep, - #[token(",")] PSep, - } - } - }; +#[derive(Default)] +pub struct Assembler { + pub buf: Vec, + sub: HashSet, } -#[rustfmt::skip] -tokendef![ - "nop", "add", "sub", "mul", "and", "or", "xor", "sl", "sr", "srs", "cmp", "cmpu", - "dir", "neg", "not", "addi", "muli", "andi", "ori", "xori", "sli", "sri", "srsi", - "cmpi", "cmpui", "cp", "swa", "li", "ld", "st", "bmc", "brc", "jmp", "jeq", "jne", - "jlt", "jgt", "jltu", "jgtu", "ecall", "addf", "subf", "mulf", "dirf", "fmaf", "negf", - "itf", "fti", "addfi", "mulfi", -]; - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum ErrorKind { - UnexpectedToken, - InvalidToken, - UnexpectedEnd, - InvalidSymbol, +impl Assembler { + macros::impl_asm!( + bbbb(p0: u8, p1: u8, p2: u8, p3: u8) + => [DIR, DIRF], + bbb(p0: u8, p1: u8, p2: u8) + => [ADD, SUB, MUL, AND, OR, XOR, SL, SRS, CMP, CMPU, BRC, ADDF, MULF], + bbdh(p0: u8, p1: u8, p2: impl Imm, p3: u16) + => [LD, ST], + bbd(p0: u8, p1: u8, p2: impl Imm) + => [ADDI, MULI, ANDI, ORI, XORI, SLI, SRI, SRSI, CMPI, CMPUI, + JEQ, JNE, JLT, JGT, JLTU, JGTU, ADDFI, MULFI], + bb(p0: u8, p1: u8) + => [NEG, NOT, CP, SWA], + bd(p0: u8, p1: impl Imm) + => [LI, JMP], + n() + => [NOP, ECALL], + ); } -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Error { - pub kind: ErrorKind, - pub span: Span, +pub trait Imm { + fn insert(self, asm: &mut Assembler); } -impl Display for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { - write!(f, "Error {:?} at {:?}", self.kind, self.span) +impl Imm for u64 { + #[inline(always)] + fn insert(self, asm: &mut Assembler) { + asm.buf.extend(self.to_le_bytes()); } } -impl core::error::Error for Error {} - -macro_rules! expect_matches { - ($self:expr, $($pat:pat),* $(,)?) => {$( - let $pat = $self.next()? - else { return Err(ErrorKind::UnexpectedToken) }; - )*} -} - -pub fn assembly(code: &str, buf: &mut Vec) -> Result<(), Error> { - struct Assembler<'a> { - lexer: Lexer<'a, Token>, - buf: &'a mut Vec, - label_map: HashMap, - to_sub_label: HashMap, +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Symbol(pub u64); +impl Imm for Symbol { + #[inline(always)] + fn insert(self, asm: &mut Assembler) { + asm.sub.insert(asm.buf.len()); + asm.buf.extend(self.0.to_le_bytes()); } - - impl<'a> Assembler<'a> { - fn next(&mut self) -> Result { - match self.lexer.next() { - Some(Ok(t)) => Ok(t), - Some(Err(())) => Err(ErrorKind::InvalidToken), - None => Err(ErrorKind::UnexpectedEnd), - } - } - - fn assemble(&mut self) -> Result<(), ErrorKind> { - use hbbytecode::opcode::*; - loop { - match self.lexer.next() { - Some(Ok(Token::OpCode(op))) => { - self.buf.push(op); - match op { - NOP | ECALL => Ok(()), - DIR | DIRF => { - expect_matches!( - self, - Token::Register(r0), - Token::PSep, - Token::Register(r1), - Token::PSep, - Token::Register(r2), - Token::PSep, - Token::Register(r3), - ); - self.buf.extend([r0, r1, r2, r3]); - Ok(()) - } - ADD..=CMPU | ADDF..=MULF => { - expect_matches!( - self, - Token::Register(r0), - Token::PSep, - Token::Register(r1), - Token::PSep, - Token::Register(r2), - ); - self.buf.extend([r0, r1, r2]); - Ok(()) - } - BRC => { - expect_matches!( - self, - Token::Register(r0), - Token::PSep, - Token::Register(r1), - Token::PSep, - Token::Integer(count), - ); - self.buf.extend([ - r0, - r1, - u8::try_from(count).map_err(|_| ErrorKind::UnexpectedToken)?, - ]); - Ok(()) - } - NEG..=NOT | CP..=SWA | NEGF..=FTI => { - expect_matches!( - self, - Token::Register(r0), - Token::PSep, - Token::Register(r1), - ); - self.buf.extend([r0, r1]); - Ok(()) - } - LI | JMP => { - expect_matches!(self, Token::Register(r0), Token::PSep); - self.buf.push(r0); - self.insert_imm()?; - Ok(()) - } - ADDI..=CMPUI | BMC | JEQ..=JGTU | ADDFI..=MULFI => { - expect_matches!( - self, - Token::Register(r0), - Token::PSep, - Token::Register(r1), - Token::PSep, - ); - self.buf.extend([r0, r1]); - self.insert_imm()?; - Ok(()) - } - LD..=ST => { - expect_matches!( - self, - Token::Register(r0), - Token::PSep, - Token::Register(r1), - Token::PSep, - Token::Integer(offset), - Token::PSep, - Token::Integer(len), - ); - self.buf.extend([r0, r1]); - self.buf.extend(offset.to_le_bytes()); - self.buf.extend( - u16::try_from(len) - .map_err(|_| ErrorKind::InvalidToken)? - .to_le_bytes(), - ); - Ok(()) - } - _ => unreachable!(), - }?; - match self.next() { - Ok(Token::ISep) => (), - Ok(_) => return Err(ErrorKind::UnexpectedToken), - Err(ErrorKind::UnexpectedEnd) => return Ok(()), - Err(e) => return Err(e), - } - } - Some(Ok(Token::Label(lbl))) => { - self.label_map.insert(lbl, self.buf.len() as u64); - } - Some(Ok(Token::ISep)) => (), - Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken), - Some(Err(())) => return Err(ErrorKind::InvalidToken), - None => return Ok(()), - } - } - } - - fn link_local_syms(&mut self) -> Result<(), ErrorKind> { - for (ix, sym) in &self.to_sub_label { - self.label_map - .get(sym) - .ok_or(ErrorKind::InvalidSymbol)? - .to_le_bytes() - .iter() - .enumerate() - .for_each(|(i, b)| { - self.buf[ix + i] = *b; - }); - } - - Ok(()) - } - - fn insert_imm(&mut self) -> Result<(), ErrorKind> { - let imm = match self.next()? { - Token::Integer(i) => i.to_le_bytes(), - Token::Symbol(s) => { - self.to_sub_label.insert(self.buf.len(), s); - [0; 8] - } - _ => return Err(ErrorKind::UnexpectedToken), - }; - self.buf.extend(imm); - Ok(()) - } - } - - let mut asm = Assembler { - lexer: Token::lexer(code), - label_map: Default::default(), - to_sub_label: Default::default(), - buf, - }; - - asm.assemble().map_err(|kind| Error { - kind, - span: asm.lexer.span(), - })?; - - asm.link_local_syms() - .map_err(|kind| Error { kind, span: 0..0 }) } diff --git a/hbasm/src/macros.rs b/hbasm/src/macros.rs new file mode 100644 index 00000000..b6557ff3 --- /dev/null +++ b/hbasm/src/macros.rs @@ -0,0 +1,73 @@ +macro_rules! impl_asm_opcodes { + ( + $generic:ident + ($($param_i:ident: $param_ty:ty),*) + => [] + ) => {}; + + ( + $generic:ident + ($($param_i:ident: $param_ty:ty),*) + => [$opcode:ident, $($rest:tt)*] + ) => { + paste::paste! { + #[allow(dead_code)] + #[inline(always)] + pub fn [](&mut self, $($param_i: $param_ty),*) { + self.$generic(hbbytecode::opcode::$opcode, $($param_i),*) + } + } + + macros::impl_asm_opcodes!( + $generic($($param_i: $param_ty),*) + => [$($rest)*] + ); + }; +} + +macro_rules! gen_impl_asm_insert { + ($($ty:ident),* $(,)?) => { + macro_rules! impl_asm_insert { + $(($self:expr, $id:ident, $ty) => { + $self.buf.extend($id.to_le_bytes()) + };)* + + ($self:expr, $id:ident, $_:ty) => { + Imm::insert($id, $self) + }; + } + }; +} + +gen_impl_asm_insert!(u8, u16, u64); + +macro_rules! impl_asm { + ( + $( + $ityn:ident + ($($param_i:ident: $param_ty:ty),* $(,)?) + => [$($opcode:ident),* $(,)?], + )* + ) => { + paste::paste! { + $( + #[allow(dead_code)] + fn [](&mut self, opcode: u8, $($param_i: $param_ty),*) { + self.buf.push(opcode); + $(macros::impl_asm_insert!(self, $param_i, $param_ty);)* + } + + macros::impl_asm_opcodes!( + []($($param_i: $param_ty),*) + => [$($opcode,)*] + ); + )* + } + }; +} + + +pub(super) use {impl_asm, impl_asm_opcodes}; + +#[allow(clippy::single_component_path_imports)] +pub(super) use impl_asm_insert; diff --git a/hbasm/src/main.rs b/hbasm/src/main.rs index 6eae52fd..046e92f5 100644 --- a/hbasm/src/main.rs +++ b/hbasm/src/main.rs @@ -8,7 +8,7 @@ fn main() -> Result<(), Box> { stdin().read_to_string(&mut code)?; let mut buf = vec![]; - if let Err(e) = hbasm::assembly(&code, &mut buf) { + if let Err(e) = hbasm::text::assembly(&code, &mut buf) { eprintln!( "Error {:?} at {:?} (`{}`)", e.kind, diff --git a/hbasm/src/text.rs b/hbasm/src/text.rs new file mode 100644 index 00000000..d4a3a9ec --- /dev/null +++ b/hbasm/src/text.rs @@ -0,0 +1,276 @@ + + +// use std::collections::HashMap; +extern crate alloc; +use alloc::vec::Vec; + +use { + core::fmt::{Display, Formatter}, + hashbrown::HashMap, + lasso::{Rodeo, Spur}, + logos::{Lexer, Logos, Span}, +}; + +macro_rules! tokendef { + ($($opcode:literal),* $(,)?) => { + paste::paste! { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)] + #[logos(extras = Rodeo)] + #[logos(skip r"[ \t\f]+")] + #[logos(skip r"-- .*")] + pub enum Token { + $(#[token($opcode, |_| hbbytecode::opcode::[<$opcode:upper>])])* + OpCode(u8), + + #[regex("[0-9]+", |l| l.slice().parse().ok())] + #[regex( + "-[0-9]+", + |lexer| { + Some(u64::from_ne_bytes(lexer.slice().parse::().ok()?.to_ne_bytes())) + }, + )] Integer(u64), + + #[regex( + "r[0-9]+", + |lexer| match lexer.slice()[1..].parse() { + Ok(n) => Some(n), + _ => None + }, + )] Register(u8), + + #[regex( + r"\p{XID_Start}\p{XID_Continue}*:", + |lexer| lexer.extras.get_or_intern(&lexer.slice()[..lexer.slice().len() - 1]), + )] Label(Spur), + + #[regex( + r"\p{XID_Start}\p{XID_Continue}*", + |lexer| lexer.extras.get_or_intern(lexer.slice()), + )] Symbol(Spur), + + #[token("\n")] + #[token(";")] ISep, + #[token(",")] PSep, + } + } + }; +} + +#[rustfmt::skip] +tokendef![ + "nop", "add", "sub", "mul", "and", "or", "xor", "sl", "sr", "srs", "cmp", "cmpu", + "dir", "neg", "not", "addi", "muli", "andi", "ori", "xori", "sli", "sri", "srsi", + "cmpi", "cmpui", "cp", "swa", "li", "ld", "st", "bmc", "brc", "jmp", "jeq", "jne", + "jlt", "jgt", "jltu", "jgtu", "ecall", "addf", "subf", "mulf", "dirf", "fmaf", "negf", + "itf", "fti", "addfi", "mulfi", +]; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ErrorKind { + UnexpectedToken, + InvalidToken, + UnexpectedEnd, + InvalidSymbol, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Error { + pub kind: ErrorKind, + pub span: Span, +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "Error {:?} at {:?}", self.kind, self.span) + } +} + +impl core::error::Error for Error {} + +macro_rules! expect_matches { + ($self:expr, $($pat:pat),* $(,)?) => {$( + let $pat = $self.next()? + else { return Err(ErrorKind::UnexpectedToken) }; + )*} +} + +pub fn assembly(code: &str, buf: &mut Vec) -> Result<(), Error> { + struct Assembler<'a> { + lexer: Lexer<'a, Token>, + buf: &'a mut Vec, + label_map: HashMap, + to_sub_label: HashMap, + } + + impl<'a> Assembler<'a> { + fn next(&mut self) -> Result { + match self.lexer.next() { + Some(Ok(t)) => Ok(t), + Some(Err(())) => Err(ErrorKind::InvalidToken), + None => Err(ErrorKind::UnexpectedEnd), + } + } + + fn assemble(&mut self) -> Result<(), ErrorKind> { + use hbbytecode::opcode::*; + loop { + match self.lexer.next() { + Some(Ok(Token::OpCode(op))) => { + self.buf.push(op); + match op { + NOP | ECALL => Ok(()), + DIR | DIRF => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Register(r2), + Token::PSep, + Token::Register(r3), + ); + self.buf.extend([r0, r1, r2, r3]); + Ok(()) + } + ADD..=CMPU | ADDF..=MULF => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Register(r2), + ); + self.buf.extend([r0, r1, r2]); + Ok(()) + } + BRC => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Integer(count), + ); + self.buf.extend([ + r0, + r1, + u8::try_from(count).map_err(|_| ErrorKind::UnexpectedToken)?, + ]); + Ok(()) + } + NEG..=NOT | CP..=SWA | NEGF..=FTI => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + ); + self.buf.extend([r0, r1]); + Ok(()) + } + LI | JMP => { + expect_matches!(self, Token::Register(r0), Token::PSep); + self.buf.push(r0); + self.insert_imm()?; + Ok(()) + } + ADDI..=CMPUI | BMC | JEQ..=JGTU | ADDFI..=MULFI => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + ); + self.buf.extend([r0, r1]); + self.insert_imm()?; + Ok(()) + } + LD..=ST => { + expect_matches!( + self, + Token::Register(r0), + Token::PSep, + Token::Register(r1), + Token::PSep, + Token::Integer(offset), + Token::PSep, + Token::Integer(len), + ); + self.buf.extend([r0, r1]); + self.buf.extend(offset.to_le_bytes()); + self.buf.extend( + u16::try_from(len) + .map_err(|_| ErrorKind::InvalidToken)? + .to_le_bytes(), + ); + Ok(()) + } + _ => unreachable!(), + }?; + match self.next() { + Ok(Token::ISep) => (), + Ok(_) => return Err(ErrorKind::UnexpectedToken), + Err(ErrorKind::UnexpectedEnd) => return Ok(()), + Err(e) => return Err(e), + } + } + Some(Ok(Token::Label(lbl))) => { + self.label_map.insert(lbl, self.buf.len() as u64); + } + Some(Ok(Token::ISep)) => (), + Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken), + Some(Err(())) => return Err(ErrorKind::InvalidToken), + None => return Ok(()), + } + } + } + + fn link_local_syms(&mut self) -> Result<(), ErrorKind> { + for (ix, sym) in &self.to_sub_label { + self.label_map + .get(sym) + .ok_or(ErrorKind::InvalidSymbol)? + .to_le_bytes() + .iter() + .enumerate() + .for_each(|(i, b)| { + self.buf[ix + i] = *b; + }); + } + + Ok(()) + } + + fn insert_imm(&mut self) -> Result<(), ErrorKind> { + let imm = match self.next()? { + Token::Integer(i) => i.to_le_bytes(), + Token::Symbol(s) => { + self.to_sub_label.insert(self.buf.len(), s); + [0; 8] + } + _ => return Err(ErrorKind::UnexpectedToken), + }; + self.buf.extend(imm); + Ok(()) + } + } + + let mut asm = Assembler { + lexer: Token::lexer(code), + label_map: Default::default(), + to_sub_label: Default::default(), + buf, + }; + + asm.assemble().map_err(|kind| Error { + kind, + span: asm.lexer.span(), + })?; + + asm.link_local_syms() + .map_err(|kind| Error { kind, span: 0..0 }) +}