From 7435218999af438ed7a5a8061ba669ae2b851f31 Mon Sep 17 00:00:00 2001 From: mlokr Date: Sat, 11 May 2024 12:51:32 +0200 Subject: [PATCH] fixing relative jumps to not offset from immidiate adress but from instruction adress --- hblang/build.rs | 207 +++--- hblang/examples/variables.hb | 6 + hblang/src/codegen.rs | 225 ++++-- hblang/src/instrs.rs | 640 +++++++++++------- hblang/src/lexer.rs | 14 +- hblang/src/lib.rs | 13 +- hblang/src/parser.rs | 4 +- .../hblang::codegen::tests::variables.txt | 0 hbvm/src/vmrun.rs | 22 +- 9 files changed, 711 insertions(+), 420 deletions(-) create mode 100644 hblang/examples/variables.hb create mode 100644 hblang/tests/hblang::codegen::tests::variables.txt diff --git a/hblang/build.rs b/hblang/build.rs index 9f72d18..f61acbb 100644 --- a/hblang/build.rs +++ b/hblang/build.rs @@ -1,95 +1,136 @@ #![feature(iter_next_chunk)] +use std::fmt::Write; + fn main() -> Result<(), Box> { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-changed=../hbbytecode/instructions.in"); - let instructions = include_str!("../hbbytecode/instructions.in"); - let mut generated = String::new(); - use std::fmt::Write; - writeln!(&mut generated, "impl crate::codegen::Func {{")?; - - for line in instructions.lines() { - let line = line.strip_suffix(";").unwrap(); - let [opcode, name, ty, doc] = line.splitn(4, ',').map(str::trim).next_chunk().unwrap(); - - writeln!(&mut generated, "/// {}", doc.trim_matches('"'))?; - write!(&mut generated, "pub fn {}(&mut self", name.to_lowercase())?; - for (i, c) in ty.chars().enumerate() { - let (name, ty) = match c { - 'N' => continue, - 'R' => ("reg", "u8"), - 'B' => ("imm", "u8"), - 'H' => ("imm", "u16"), - 'W' => ("imm", "u32"), - 'D' => ("imm", "u64"), - 'P' => ("offset", "u32"), - 'O' => ("offset", "u32"), - 'A' => ("addr", "u64"), - _ => panic!("unknown type: {}", c), - }; - write!(&mut generated, ", {name}{i}: {ty}")?; - } - writeln!(&mut generated, ") {{")?; - - let mut offset = 1; - for (i, c) in ty.chars().enumerate() { - let width = match c { - 'N' => 0, - 'R' => 1, - 'B' => 1, - 'H' => 2, - 'W' => 4, - 'D' => 8, - 'A' => 8, - 'P' => 2, - 'O' => 4, - _ => panic!("unknown type: {}", c), - }; - - if matches!(c, 'P' | 'O') { - writeln!( - &mut generated, - " self.offset(offset{i}, {offset}, {width});", - )?; - } - - offset += width; - } - - write!( - &mut generated, - " self.extend(crate::as_bytes(&crate::Args({opcode}" - )?; - for (i, c) in ty.chars().enumerate() { - let name = match c { - 'N' => continue, - 'R' => "reg", - 'B' | 'H' | 'W' | 'D' => "imm", - 'P' => "0u16", - 'O' => "0u32", - 'A' => "addr", - _ => panic!("unknown type: {}", c), - }; - - if matches!(c, 'P' | 'O') { - write!(&mut generated, ", {name}")?; - } else { - write!(&mut generated, ", {name}{i}")?; - } - } - for _ in ty.len() - (ty == "N") as usize..4 { - write!(&mut generated, ", ()")?; - } - writeln!(&mut generated, ")));")?; - - writeln!(&mut generated, "}}")?; - } - - writeln!(&mut generated, "}}")?; + gen_max_size(&mut generated)?; + gen_encodes(&mut generated)?; + gen_structs(&mut generated)?; + gen_name_list(&mut generated)?; std::fs::write("src/instrs.rs", generated)?; Ok(()) } + +fn gen_name_list(generated: &mut String) -> Result<(), Box> { + writeln!( + generated, + "pub const NAMES: [&str; {}] = [", + instructions().count() + )?; + for [_, name, _, _] in instructions() { + writeln!(generated, " \"{}\",", name.to_lowercase())?; + } + writeln!(generated, "];")?; + + Ok(()) +} + +fn gen_max_size(generated: &mut String) -> Result<(), Box> { + let max = instructions() + .map(|[_, _, ty, _]| { + if ty == "N" { + 1 + } else { + iter_args(ty).map(|(_, c)| arg_to_width(c)).sum::() + 1 + } + }) + .max() + .unwrap(); + + writeln!(generated, "pub const MAX_SIZE: usize = {};", max)?; + + Ok(()) +} + +fn gen_encodes(generated: &mut String) -> Result<(), Box> { + for [op, name, ty, doc] in instructions() { + writeln!(generated, "/// {}", doc.trim_matches('"'))?; + let name = name.to_lowercase(); + let args = comma_sep( + iter_args(ty).map(|(i, c)| format!("{}{i}: {}", arg_to_name(c), arg_to_type(c))), + ); + writeln!( + generated, + "pub fn {name}({args}) -> (usize, [u8; MAX_SIZE]) {{" + )?; + let arg_names = comma_sep(iter_args(ty).map(|(i, c)| format!("{}{i}", arg_to_name(c)))); + writeln!( + generated, + " unsafe {{ crate::encode({ty}({op}, {arg_names})) }}" + )?; + writeln!(generated, "}}")?; + } + + Ok(()) +} + +fn gen_structs(generated: &mut String) -> Result<(), Box> { + let mut seen = std::collections::HashSet::new(); + for [_, _, ty, _] in instructions() { + if !seen.insert(ty) { + continue; + } + let types = comma_sep(iter_args(ty).map(|(_, c)| arg_to_type(c).to_string())); + writeln!(generated, "#[repr(packed)] pub struct {ty}(u8, {types});")?; + } + + Ok(()) +} + +fn comma_sep(items: impl Iterator) -> String { + items + .map(|item| item.to_string()) + .collect::>() + .join(", ") +} + +fn instructions() -> impl Iterator { + include_str!("../hbbytecode/instructions.in") + .lines() + .filter_map(|line| line.strip_suffix(';')) + .map(|line| line.splitn(4, ',').map(str::trim).next_chunk().unwrap()) +} + +fn arg_to_type(arg: char) -> &'static str { + match arg { + 'R' | 'B' => "u8", + 'H' => "u16", + 'W' => "u32", + 'D' | 'A' => "u64", + 'P' => "i16", + 'O' => "i32", + _ => panic!("unknown type: {}", arg), + } +} + +fn arg_to_width(arg: char) -> usize { + match arg { + 'R' | 'B' => 1, + 'H' => 2, + 'W' => 4, + 'D' | 'A' => 8, + 'P' => 2, + 'O' => 4, + _ => panic!("unknown type: {}", arg), + } +} + +fn arg_to_name(arg: char) -> &'static str { + match arg { + 'R' => "reg", + 'B' | 'H' | 'W' | 'D' => "imm", + 'P' | 'O' => "offset", + 'A' => "addr", + _ => panic!("unknown type: {}", arg), + } +} + +fn iter_args(ty: &'static str) -> impl Iterator { + ty.chars().enumerate().filter(|(_, c)| *c != 'N') +} diff --git a/hblang/examples/variables.hb b/hblang/examples/variables.hb new file mode 100644 index 0000000..fc4afe4 --- /dev/null +++ b/hblang/examples/variables.hb @@ -0,0 +1,6 @@ +main := ||: int { + a := 1; + b := 2; + a = a + 1; + return a - b; +} diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index 9abc906..dc33360 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -1,6 +1,6 @@ use { crate::{ - lexer, + instrs, lexer, parser::{self, Expr}, }, std::rc::Rc, @@ -27,6 +27,11 @@ struct Reloc { size: u16, } +struct StackReloc { + offset: u32, + size: u16, +} + #[derive(Default)] pub struct Func { code: Vec, @@ -46,31 +51,48 @@ impl Func { }); } + fn encode(&mut self, (len, instr): (usize, [u8; instrs::MAX_SIZE])) { + let name = instrs::NAMES[instr[0] as usize]; + println!( + "{}: {}", + name, + instr + .iter() + .take(len) + .skip(1) + .map(|b| format!("{:02x}", b)) + .collect::() + ); + self.code.extend_from_slice(&instr[..len]); + } + fn push(&mut self, value: Reg, size: usize) { - self.st(value, STACK_PTR, 0, size as _); - self.addi64(STACK_PTR, STACK_PTR, size as _); + self.subi64(STACK_PTR, STACK_PTR, size as _); + self.encode(instrs::st(value, STACK_PTR, 0, size as _)); } fn pop(&mut self, value: Reg, size: usize) { - self.addi64(STACK_PTR, STACK_PTR, (size as u64).wrapping_neg()); - self.ld(value, STACK_PTR, 0, size as _); + self.encode(instrs::ld(value, STACK_PTR, 0, size as _)); + self.encode(instrs::addi64(STACK_PTR, STACK_PTR, size as _)); + } + + fn subi64(&mut self, dest: Reg, src: Reg, imm: u64) { + self.encode(instrs::addi64(dest, src, imm.wrapping_neg())); } fn call(&mut self, func: LabelId) { - self.jal(RET_ADDR, ZERO, func); + self.offset(func, 3, 4); + self.encode(instrs::jal(RET_ADDR, ZERO, 0)); } fn ret(&mut self) { - self.jala(ZERO, RET_ADDR, 0); + self.pop(RET_ADDR, 8); + self.encode(instrs::jala(ZERO, RET_ADDR, 0)); } fn prelude(&mut self, entry: LabelId) { self.call(entry); - self.tx(); - } - - fn div64(&mut self, reg0: Reg, reg1: Reg, reg2: Reg) { - self.diru64(reg0, ZERO, reg1, reg2); + self.encode(instrs::tx()); } fn relocate(&mut self, labels: &[Label], shift: i64) { @@ -131,24 +153,38 @@ struct Label { name: Rc, } +struct Variable<'a> { + name: Rc, + offset: u64, + ty: Expr<'a>, +} + pub struct Codegen<'a> { - path: &'a std::path::Path, - ret: Expr<'a>, - gpa: RegAlloc, - code: Func, - temp: Func, - labels: Vec