ableos/hblang/src/codegen.rs

403 lines
11 KiB
Rust
Raw Normal View History

2024-05-10 14:33:42 -05:00
use {
2024-05-10 15:54:12 -05:00
crate::{
lexer,
parser::{self, Expr},
},
2024-05-10 14:33:42 -05:00
std::rc::Rc,
};
2024-05-09 11:16:01 -05:00
2024-05-10 14:33:42 -05:00
type LabelId = u32;
type Reg = u8;
type MaskElem = u64;
const STACK_PTR: Reg = 254;
const ZERO: Reg = 0;
const RET_ADDR: Reg = 31;
const ELEM_WIDTH: usize = std::mem::size_of::<MaskElem>() * 8;
2024-05-10 14:33:42 -05:00
struct Frame {
label: LabelId,
prev_relocs: usize,
offset: u32,
}
struct Reloc {
id: LabelId,
offset: u32,
size: u16,
}
#[derive(Default)]
pub struct Func {
code: Vec<u8>,
relocs: Vec<Reloc>,
}
impl Func {
pub fn extend(&mut self, bytes: &[u8]) {
self.code.extend_from_slice(bytes);
}
pub fn offset(&mut self, id: LabelId, offset: u32, size: u16) {
self.relocs.push(Reloc {
id,
offset: self.code.len() as u32 + offset,
size,
});
}
fn push(&mut self, value: Reg, size: usize) {
self.st(value, STACK_PTR, 0, size as _);
self.addi64(STACK_PTR, STACK_PTR, size as _);
}
fn pop(&mut self, value: Reg, size: usize) {
self.addi64(STACK_PTR, STACK_PTR, (size as u64).wrapping_neg());
self.ld(value, STACK_PTR, 0, size as _);
}
fn call(&mut self, func: LabelId) {
self.jal(RET_ADDR, ZERO, func);
}
fn ret(&mut self) {
self.jala(ZERO, RET_ADDR, 0);
}
fn prelude(&mut self, entry: LabelId) {
self.call(entry);
self.tx();
}
2024-05-10 15:54:12 -05:00
fn div64(&mut self, reg0: Reg, reg1: Reg, reg2: Reg) {
self.diru64(reg0, ZERO, reg1, reg2);
}
2024-05-10 14:33:42 -05:00
fn relocate(&mut self, labels: &[Label], shift: i64) {
for reloc in self.relocs.drain(..) {
let label = &labels[reloc.id as usize];
let offset = if reloc.size == 8 {
reloc.offset as i64
} else {
label.offset as i64 - reloc.offset as i64
} + shift;
let dest = &mut self.code[reloc.offset as usize..][..reloc.size as usize];
match reloc.size {
2 => dest.copy_from_slice(&(offset as i16).to_le_bytes()),
4 => dest.copy_from_slice(&(offset as i32).to_le_bytes()),
8 => dest.copy_from_slice(&(offset as i64).to_le_bytes()),
_ => unreachable!(),
};
}
}
}
#[derive(Default)]
pub struct RegAlloc {
free: Vec<Reg>,
// TODO:use 256 bit mask instead
2024-05-10 14:33:42 -05:00
used: Vec<Reg>,
}
impl RegAlloc {
2024-05-10 14:33:42 -05:00
fn init_caller(&mut self) {
self.clear();
self.free.extend(1..=31);
}
fn clear(&mut self) {
self.free.clear();
self.used.clear();
}
fn allocate(&mut self) -> Reg {
let reg = self.free.pop().expect("TODO: we need to spill");
2024-05-10 14:33:42 -05:00
if self.used.binary_search_by_key(&!reg, |&r| !r).is_err() {
self.used.push(reg);
}
reg
}
fn free(&mut self, reg: Reg) {
self.free.push(reg);
}
}
2024-05-09 11:16:01 -05:00
2024-05-10 14:33:42 -05:00
struct Label {
offset: u32,
// TODO: use different stile of identifier that does not allocate, eg. index + length into a
// file
name: Rc<str>,
}
2024-05-09 16:41:59 -05:00
pub struct Codegen<'a> {
2024-05-10 14:33:42 -05:00
path: &'a std::path::Path,
ret: Expr<'a>,
gpa: RegAlloc,
code: Func,
temp: Func,
labels: Vec<Label>,
2024-05-09 11:16:01 -05:00
}
2024-05-09 16:41:59 -05:00
impl<'a> Codegen<'a> {
pub fn new() -> Self {
2024-05-09 11:16:01 -05:00
Self {
2024-05-10 14:33:42 -05:00
path: std::path::Path::new(""),
ret: Expr::Return { val: None },
gpa: Default::default(),
code: Default::default(),
temp: Default::default(),
labels: Default::default(),
2024-05-09 11:16:01 -05:00
}
}
2024-05-10 14:33:42 -05:00
pub fn file(
&mut self,
path: &'a std::path::Path,
exprs: &'a [parser::Expr<'a>],
) -> std::fmt::Result {
self.path = path;
2024-05-09 16:41:59 -05:00
for expr in exprs {
2024-05-10 14:33:42 -05:00
self.expr(expr, None);
2024-05-09 11:16:01 -05:00
}
2024-05-09 16:41:59 -05:00
Ok(())
2024-05-09 11:16:01 -05:00
}
2024-05-10 14:33:42 -05:00
fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option<Expr<'a>>) -> Option<Value<'a>> {
2024-05-10 15:54:12 -05:00
use {lexer::TokenKind as T, parser::Expr as E};
match *expr {
2024-05-09 16:41:59 -05:00
E::Decl {
name,
2024-05-10 14:33:42 -05:00
val: E::Closure { ret, body },
2024-05-09 16:41:59 -05:00
} => {
2024-05-10 14:33:42 -05:00
let frame = self.add_label(name);
2024-05-10 15:54:12 -05:00
self.gpa.init_caller();
2024-05-10 14:33:42 -05:00
self.ret = **ret;
self.expr(body, None);
self.write_fn_prelude(frame);
None
}
E::Return { val } => {
if let Some(val) = val {
let val = self.expr(val, Some(self.ret)).unwrap();
if val.ty != self.ret {
panic!("expected {:?}, got {:?}", self.ret, val.ty);
}
match val.loc {
Loc::Reg(reg) => self.code.cp(1, reg),
Loc::Imm(imm) => self.code.li64(1, imm),
}
}
self.ret();
None
2024-05-09 11:16:01 -05:00
}
2024-05-09 16:41:59 -05:00
E::Block { stmts } => {
for stmt in stmts {
2024-05-10 14:33:42 -05:00
self.expr(stmt, None);
2024-05-09 11:16:01 -05:00
}
2024-05-10 14:33:42 -05:00
None
2024-05-09 11:16:01 -05:00
}
2024-05-10 14:33:42 -05:00
E::Number { value } => Some(Value {
ty: expeted.unwrap_or(Expr::Ident { name: "int" }),
loc: Loc::Imm(value),
}),
2024-05-10 15:54:12 -05:00
E::BinOp { left, op, right } => {
let left = self.expr(left, expeted).unwrap();
let right = self.expr(right, Some(left.ty)).unwrap();
type Op = fn(&mut Func, u8, u8, u8);
type ImmOp = fn(&mut Func, u8, u8, u64);
let op = match op {
T::Plus => Func::add64 as Op,
T::Minus => Func::sub64 as Op,
T::Star => Func::mul64 as Op,
T::FSlash => Func::div64 as Op,
_ => unimplemented!("{:#?}", op),
};
let lhs = match left.loc {
Loc::Reg(reg) => reg,
Loc::Imm(imm) => {
let reg = self.gpa.allocate();
self.code.li64(reg, imm);
reg
}
};
let rhs = match right.loc {
Loc::Reg(reg) => reg,
Loc::Imm(imm) => {
let reg = self.gpa.allocate();
self.code.li64(reg, imm);
reg
}
};
op(&mut self.code, lhs, lhs, rhs);
self.gpa.free(rhs);
Some(Value {
ty: left.ty,
loc: Loc::Reg(lhs),
})
}
ast => unimplemented!("{:#?}", ast),
2024-05-09 11:16:01 -05:00
}
}
2024-05-10 14:33:42 -05:00
fn get_or_reserve_label(&mut self, name: &str) -> LabelId {
if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) {
label as u32
} else {
self.labels.push(Label {
offset: 0,
name: name.into(),
});
self.labels.len() as u32 - 1
}
2024-05-09 11:16:01 -05:00
}
2024-05-10 14:33:42 -05:00
fn add_label(&mut self, name: &str) -> Frame {
let offset = self.code.code.len() as u32;
let label = if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) {
self.labels[label].offset = offset;
label as u32
} else {
self.labels.push(Label {
offset,
name: name.into(),
});
self.labels.len() as u32 - 1
};
Frame {
label,
prev_relocs: self.code.relocs.len(),
offset,
}
2024-05-09 11:16:01 -05:00
}
2024-05-10 14:33:42 -05:00
fn get_label(&self, name: &str) -> LabelId {
self.labels
.iter()
.position(|l| l.name.as_ref() == name)
.unwrap() as _
2024-05-09 11:16:01 -05:00
}
2024-05-10 14:33:42 -05:00
fn write_fn_prelude(&mut self, frame: Frame) {
for &reg in self.gpa.used.clone().iter() {
self.temp.push(reg, 8);
}
2024-05-10 14:33:42 -05:00
for reloc in &mut self.code.relocs[frame.prev_relocs..] {
reloc.offset += self.temp.code.len() as u32;
}
2024-05-10 14:33:42 -05:00
self.code.code.splice(
frame.offset as usize..frame.offset as usize,
self.temp.code.drain(..),
);
}
2024-05-10 14:33:42 -05:00
fn ret(&mut self) {
for reg in self.gpa.used.clone().iter().rev() {
self.code.pop(*reg, 8);
}
self.code.ret();
}
pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> {
self.temp.prelude(self.get_label("main"));
self.temp
.relocate(&self.labels, self.temp.code.len() as i64);
self.code.relocate(&self.labels, 0);
out.write_all(&self.temp.code)?;
out.write_all(&self.code.code)
}
}
2024-05-10 14:33:42 -05:00
pub struct Value<'a> {
ty: Expr<'a>,
loc: Loc,
}
2024-05-10 14:33:42 -05:00
pub enum Loc {
Reg(Reg),
Imm(u64),
}
2024-05-09 16:41:59 -05:00
#[cfg(test)]
mod tests {
struct TestMem;
impl hbvm::mem::Memory for TestMem {
#[inline]
unsafe fn load(
&mut self,
addr: hbvm::mem::Address,
target: *mut u8,
count: usize,
) -> Result<(), hbvm::mem::LoadError> {
unsafe { core::ptr::copy(addr.get() as *const u8, target, count) }
Ok(())
}
#[inline]
unsafe fn store(
&mut self,
addr: hbvm::mem::Address,
source: *const u8,
count: usize,
) -> Result<(), hbvm::mem::StoreError> {
unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) }
Ok(())
}
#[inline]
unsafe fn prog_read<T: Copy>(&mut self, addr: hbvm::mem::Address) -> T {
unsafe { core::ptr::read(addr.get() as *const T) }
}
}
2024-05-09 16:41:59 -05:00
fn generate(input: &'static str, output: &mut String) {
let path = std::path::Path::new("test");
let arena = crate::parser::Arena::default();
let mut buffer = Vec::new();
let mut parser = super::parser::Parser::new(input, path, &arena, &mut buffer);
2024-05-09 16:41:59 -05:00
let exprs = parser.file();
let mut codegen = super::Codegen::new();
codegen.file(path, &exprs).unwrap();
2024-05-10 14:33:42 -05:00
let mut out = Vec::new();
codegen.dump(&mut out).unwrap();
2024-05-10 14:33:42 -05:00
use std::fmt::Write;
2024-05-10 14:33:42 -05:00
let mut stack = [0_u64; 1024];
let mut vm = unsafe {
hbvm::Vm::<TestMem, 0>::new(TestMem, hbvm::mem::Address::new(out.as_ptr() as u64))
};
2024-05-10 14:33:42 -05:00
vm.write_reg(super::STACK_PTR, stack.as_mut_ptr() as u64);
2024-05-10 14:33:42 -05:00
let stat = loop {
match vm.run() {
Ok(hbvm::VmRunOk::End) => break Ok(()),
Ok(ev) => writeln!(output, "ev: {:?}", ev).unwrap(),
Err(e) => break Err(e),
}
};
2024-05-10 14:33:42 -05:00
writeln!(output, "ret: {:?}", vm.read_reg(1)).unwrap();
writeln!(output, "status: {:?}", stat).unwrap();
2024-05-09 11:16:01 -05:00
}
2024-05-09 16:41:59 -05:00
crate::run_tests! { generate:
example => include_str!("../examples/main_fn.hb");
2024-05-10 15:54:12 -05:00
arithmetic => include_str!("../examples/arithmetic.hb");
2024-05-09 11:16:01 -05:00
}
}