holey-bytes/hbasm/src/lib.rs

181 lines
5.3 KiB
Rust

use {
logos::{Lexer, Logos, Span},
std::{
fmt::{Display, Formatter},
ops::Range,
str::FromStr,
},
};
macro_rules! tokendef {
($($opcode:literal),* $(,)?) => {
paste::paste! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
#[logos(skip r"[ \t\f]+")]
pub enum Token {
$(#[token($opcode, |_| hbbytecode::opcode::[<$opcode:upper>])])*
OpCode(u8),
#[regex("[0-9]+", |l| l.slice().parse().ok())]
#[regex(
"-[0-9]+",
|lexer| {
Some(u64::from_ne_bytes(lexer.slice().parse::<i64>().ok()?.to_ne_bytes()))
},
)] Integer(u64),
#[regex(
"r[0-9]+",
|lexer| match lexer.slice()[1..].parse() {
Ok(n) if n <= 59 => Some(n),
_ => None
},
)] Register(u8),
#[token("\n")]
#[token(";")] ISep,
#[token(",")] PSep,
}
}
};
}
#[rustfmt::skip]
tokendef![
"nop", "add", "sub", "mul", "rem", "and", "or", "xor", "sl", "sr", "srs",
"not", "addf", "subf", "mulf", "divf", "addi", "muli", "remi", "andi",
"ori", "xori", "sli", "sri", "srsi", "addfi", "mulfi", "cp", "li", "lb",
"ld", "lq", "lo", "sb", "sd", "sq", "so", "jmp", "jmpcond", "ret", "ecall",
];
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ErrorKind {
UnexpectedToken,
InvalidToken,
UnexpectedEnd,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error {
kind: ErrorKind,
span: Span,
}
impl Display for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "Error {:?} at {:?}", self.kind, self.span)
}
}
impl std::error::Error for Error {}
macro_rules! expect_matches {
($self:expr, $($pat:pat),* $(,)?) => {$(
let $pat = $self.next()?
else { return Err(ErrorKind::UnexpectedToken) };
)*}
}
pub fn assembly(code: &str, buf: &mut Vec<u8>) -> Result<(), Error> {
struct Assembler<'a> {
lexer: Lexer<'a, Token>,
buf: &'a mut Vec<u8>,
}
impl<'a> Assembler<'a> {
fn next(&mut self) -> Result<Token, ErrorKind> {
match self.lexer.next() {
Some(Ok(t)) => Ok(t),
Some(Err(())) => Err(ErrorKind::InvalidToken),
None => Err(ErrorKind::UnexpectedEnd),
}
}
fn assemble(&mut self) -> Result<(), ErrorKind> {
use hbbytecode::opcode::*;
loop {
match self.lexer.next() {
Some(Ok(Token::OpCode(op))) => {
self.buf.push(op);
match op {
NOP | RET | ECALL => Ok(()),
ADD..=SRS | ADDF..=DIVF => self.rrr(),
NOT | CP => self.rr(),
LI | JMP => self.ri(),
ADDI..=MULFI | LB..=SO => self.rri(),
_ => unreachable!(),
}?;
match self.next() {
Ok(Token::ISep) => (),
Ok(_) => return Err(ErrorKind::UnexpectedToken),
Err(ErrorKind::UnexpectedEnd) => return Ok(()),
Err(e) => return Err(e),
}
}
Some(Ok(_)) => return Err(ErrorKind::UnexpectedToken),
Some(Err(())) => return Err(ErrorKind::InvalidToken),
None => return Ok(()),
}
}
}
fn rrr(&mut self) -> Result<(), ErrorKind> {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
Token::Register(r2)
);
self.buf.extend([r0, r1, r2]);
Ok(())
}
fn rr(&mut self) -> Result<(), ErrorKind> {
expect_matches!(self, Token::Register(r0), Token::PSep, Token::Register(r1),);
self.buf.extend([r0, r1]);
Ok(())
}
fn ri(&mut self) -> Result<(), ErrorKind> {
expect_matches!(self, Token::Register(r0), Token::PSep, Token::Integer(r1),);
self.buf.push(r0);
self.buf.extend(r1.to_le_bytes());
Ok(())
}
fn rri(&mut self) -> Result<(), ErrorKind> {
expect_matches!(
self,
Token::Register(r0),
Token::PSep,
Token::Register(r1),
Token::PSep,
Token::Integer(imm),
);
self.buf.extend([r0, r1]);
self.buf.extend(imm.to_le_bytes());
Ok(())
}
fn i(&mut self) -> Result<(), ErrorKind> {
expect_matches!(self, Token::Integer(imm),);
self.buf.extend(imm.to_le_bytes());
Ok(())
}
}
let mut asm = Assembler {
lexer: Token::lexer(code),
buf,
};
asm.assemble().map_err(|kind| Error {
kind,
span: asm.lexer.span(),
})
}