fixing JALA and JAL saving self reference instead of reference to the next instruction

This commit is contained in:
mlokr 2024-05-10 15:29:11 +02:00
parent b80528bfd7
commit aa77a2f822
13 changed files with 457 additions and 94 deletions

3
Cargo.lock generated
View file

@ -204,6 +204,9 @@ dependencies = [
[[package]]
name = "hblang"
version = "0.1.0"
dependencies = [
"hbvm",
]
[[package]]
name = "hbvm"

View file

@ -4,3 +4,6 @@ version = "0.1.0"
edition = "2021"
[dependencies]
[dev-dependencies]
hbvm = { path = "../hbvm", features = ["nightly"] }

View file

@ -1,25 +1,62 @@
use {crate::parser, std::fmt::Write};
const STACK_PTR: &str = "r254";
const ZERO: &str = "r0";
const RET_ADDR: &str = "r31";
type Reg = u8;
type MaskElem = u64;
pub struct Codegen<'a> {
path: &'a std::path::Path,
code: String,
data: String,
const STACK_PTR: Reg = 254;
const ZERO: Reg = 0;
const RET_ADDR: Reg = 31;
const ELEM_WIDTH: usize = std::mem::size_of::<MaskElem>() * 8;
#[derive(Default)]
pub struct RegAlloc {
free: Vec<Reg>,
// TODO:use 256 bit mask instead
used: Vec<std::cmp::Reverse<Reg>>,
}
impl<'a> Codegen<'a> {
pub fn new(path: &'a std::path::Path) -> Self {
impl RegAlloc {
fn callee_general_purpose() -> Self {
Self {
path,
code: String::new(),
data: String::new(),
free: (32..=253).collect(),
used: Vec::new(),
}
}
pub fn file(&mut self, exprs: &[parser::Expr]) -> std::fmt::Result {
fn allocate(&mut self) -> Reg {
let reg = self.free.pop().expect("TODO: we need to spill");
if self.used.binary_search(&std::cmp::Reverse(reg)).is_err() {
self.used.push(std::cmp::Reverse(reg));
}
reg
}
fn free(&mut self, reg: Reg) {
self.free.push(reg);
}
}
pub struct Codegen<'a> {
path: &'a std::path::Path,
gpa: RegAlloc,
code: String,
data: String,
prelude_buf: String,
}
impl<'a> Codegen<'a> {
pub fn new() -> Self {
Self {
path: std::path::Path::new(""),
gpa: RegAlloc::callee_general_purpose(),
code: String::new(),
data: String::new(),
prelude_buf: String::new(),
}
}
pub fn file(&mut self, path: &'a std::path::Path, exprs: &[parser::Expr]) -> std::fmt::Result {
self.path = path;
for expr in exprs {
self.expr(expr)?;
}
@ -28,7 +65,7 @@ impl<'a> Codegen<'a> {
fn expr(&mut self, expr: &parser::Expr) -> std::fmt::Result {
use parser::Expr as E;
match expr {
match *expr {
E::Decl {
name,
val:
@ -38,7 +75,9 @@ impl<'a> Codegen<'a> {
},
} => {
writeln!(self.code, "{name}:")?;
self.expr(body)
let fn_start = self.code.len();
self.expr(body)?;
self.write_fn_prelude(fn_start)
}
E::Return { val: None } => self.ret(),
E::Block { stmts } => {
@ -51,55 +90,163 @@ impl<'a> Codegen<'a> {
}
}
fn stack_push(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result {
writeln!(self.code, " st {value}, {STACK_PTR}, {ZERO}, {size}")?;
writeln!(
self.code,
" addi{} {STACK_PTR}, {STACK_PTR}, {size}",
size * 8
)
}
fn write_fn_prelude(&mut self, fn_start: usize) -> std::fmt::Result {
self.prelude_buf.clear();
// TODO: avoid clone here
for reg in self.gpa.used.clone().iter() {
stack_push(&mut self.prelude_buf, reg.0, 8)?;
}
fn stack_pop(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result {
writeln!(
self.code,
" subi{} {STACK_PTR}, {STACK_PTR}, {size}",
size * 8
)?;
writeln!(self.code, " ld {value}, {STACK_PTR}, {ZERO}, {size}")
}
self.code.insert_str(fn_start, &self.prelude_buf);
self.gpa = RegAlloc::callee_general_purpose();
fn call(&mut self, func: impl std::fmt::Display) -> std::fmt::Result {
self.stack_push(&func, 8)?;
self.global_jump(func)
Ok(())
}
fn ret(&mut self) -> std::fmt::Result {
self.stack_pop(RET_ADDR, 8)?;
self.global_jump(RET_ADDR)
for reg in self.gpa.used.clone().iter().rev() {
stack_pop(&mut self.code, reg.0, 8)?;
}
ret(&mut self.code)
}
fn global_jump(&mut self, label: impl std::fmt::Display) -> std::fmt::Result {
writeln!(self.code, " jala {ZERO}, {label}, 0")
}
pub fn dump(&mut self, mut out: impl std::fmt::Write) -> std::fmt::Result {
writeln!(out, "start:")?;
writeln!(out, " jala {ZERO}, main, 0")?;
writeln!(out, " tx")?;
pub fn dump(self, mut out: impl std::fmt::Write) -> std::fmt::Result {
prelude(&mut out)?;
writeln!(out, "{}", self.code)?;
writeln!(out, "{}", self.data)
}
}
fn stack_push(out: &mut impl std::fmt::Write, value: Reg, size: usize) -> std::fmt::Result {
writeln!(out, " st r{value}, r{STACK_PTR}, r{ZERO}, {size}")?;
writeln!(
out,
" addi{} r{STACK_PTR}, r{STACK_PTR}, {size}",
size * 8
)
}
fn stack_pop(out: &mut impl std::fmt::Write, value: Reg, size: usize) -> std::fmt::Result {
writeln!(
out,
" subi{} r{STACK_PTR}, r{STACK_PTR}, {size}",
size * 8
)?;
writeln!(out, " ld r{value}, r{STACK_PTR}, r{ZERO}, {size}")
}
fn call(out: &mut impl std::fmt::Write, func: &str) -> std::fmt::Result {
stack_push(out, RET_ADDR, 8)?;
jump_label(out, func)?;
stack_pop(out, RET_ADDR, 8)
}
fn ret(out: &mut impl std::fmt::Write) -> std::fmt::Result {
writeln!(out, " jala r{ZERO}, r{RET_ADDR}, 0")
}
fn jump_label(out: &mut impl std::fmt::Write, label: &str) -> std::fmt::Result {
writeln!(out, " jal r{RET_ADDR}, r{ZERO}, {label}")
}
fn prelude(out: &mut impl std::fmt::Write) -> std::fmt::Result {
writeln!(out, "start:")?;
writeln!(out, " jal r{RET_ADDR}, r{ZERO}, main")?;
writeln!(out, " tx")
}
#[cfg(test)]
mod tests {
use std::io::Write;
struct TestMem;
impl hbvm::mem::Memory for TestMem {
#[inline]
unsafe fn load(
&mut self,
addr: hbvm::mem::Address,
target: *mut u8,
count: usize,
) -> Result<(), hbvm::mem::LoadError> {
unsafe { core::ptr::copy(addr.get() as *const u8, target, count) }
Ok(())
}
#[inline]
unsafe fn store(
&mut self,
addr: hbvm::mem::Address,
source: *const u8,
count: usize,
) -> Result<(), hbvm::mem::StoreError> {
unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) }
Ok(())
}
#[inline]
unsafe fn prog_read<T: Copy>(&mut self, addr: hbvm::mem::Address) -> T {
unsafe { core::ptr::read(addr.get() as *const T) }
}
}
fn generate(input: &'static str, output: &mut String) {
let mut parser = super::parser::Parser::new(input, std::path::Path::new("test"));
let path = std::path::Path::new("test");
let arena = crate::parser::Arena::default();
let mut buffer = Vec::new();
let mut parser = super::parser::Parser::new(input, path, &arena, &mut buffer);
let exprs = parser.file();
let mut codegen = super::Codegen::new(std::path::Path::new("test"));
codegen.file(&exprs).unwrap();
codegen.dump(output).unwrap();
let mut codegen = super::Codegen::new();
codegen.file(path, &exprs).unwrap();
codegen.dump(&mut *output).unwrap();
let mut proc = std::process::Command::new("/usr/bin/hbas")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.spawn()
.unwrap();
proc.stdin
.as_mut()
.unwrap()
.write_all(output.as_bytes())
.unwrap();
let out = proc.wait_with_output().unwrap();
if !out.status.success() {
panic!(
"hbas failed with status: {}\n{}",
out.status,
String::from_utf8_lossy(&out.stderr)
);
} else {
use std::fmt::Write;
let mut stack = [0_u64; 1024];
for b in &out.stdout {
writeln!(output, "{:02x}", b).unwrap();
}
let mut vm = unsafe {
hbvm::Vm::<TestMem, 0>::new(
TestMem,
hbvm::mem::Address::new(out.stdout.as_ptr() as u64),
)
};
vm.write_reg(super::STACK_PTR, stack.as_mut_ptr() as u64);
let stat = loop {
match vm.run() {
Ok(hbvm::VmRunOk::End) => break Ok(()),
Ok(ev) => writeln!(output, "ev: {:?}", ev).unwrap(),
Err(e) => break Err(e),
}
};
writeln!(output, "ret: {:?}", vm.read_reg(0)).unwrap();
writeln!(output, "status: {:?}", stat).unwrap();
}
}
crate::run_tests! { generate:

View file

@ -1,5 +1,3 @@
use std::{iter::Peekable, str::Chars};
#[derive(Debug, PartialEq)]
pub struct Token {
pub kind: TokenKind,

View file

@ -1,4 +1,8 @@
#![feature(noop_waker)]
#![feature(non_null_convenience)]
#![allow(dead_code)]
#![feature(const_mut_refs)]
#[macro_export]
macro_rules! run_tests {
($runner:path: $($name:ident => $input:expr;)*) => {$(

View file

@ -1,43 +1,54 @@
use std::{cell::Cell, ops::Not};
use std::{cell::Cell, ptr::NonNull};
use crate::lexer::{Lexer, Token, TokenKind};
type Ptr<T> = &'static T;
type Ptr<'a, T> = &'a T;
type Slice<'a, T> = &'a [T];
fn ptr<T>(val: T) -> Ptr<T> {
Box::leak(Box::new(val))
pub struct Parser<'a, 'b> {
path: &'a std::path::Path,
lexer: Lexer<'a>,
arena: &'b Arena<'a>,
expr_buf: &'b mut Vec<Expr<'a>>,
token: Token,
}
pub struct Parser<'a> {
path: &'a std::path::Path,
lexer: Lexer<'a>,
token: Token,
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str, path: &'a std::path::Path) -> Self {
impl<'a, 'b> Parser<'a, 'b> {
pub fn new(
input: &'a str,
path: &'a std::path::Path,
arena: &'b Arena<'a>,
expr_buf: &'b mut Vec<Expr<'static>>,
) -> Self {
let mut lexer = Lexer::new(input);
let token = lexer.next();
Self { lexer, token, path }
Self {
lexer,
token,
path,
arena,
// we ensure its empty before returning form parse
expr_buf: unsafe { std::mem::transmute(expr_buf) },
}
}
pub fn file(&mut self) -> Slice<'a, Expr<'a>> {
self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr()))
}
fn next(&mut self) -> Token {
std::mem::replace(&mut self.token, self.lexer.next())
}
pub fn file(&mut self) -> Vec<Expr> {
std::iter::from_fn(|| (self.token.kind != TokenKind::Eof).then(|| self.expr())).collect()
fn ptr_expr(&mut self) -> Ptr<'a, Expr<'a>> {
self.arena.alloc(self.expr())
}
fn ptr_expr(&mut self) -> Ptr<Expr> {
ptr(self.expr())
}
pub fn expr(&mut self) -> Expr {
fn expr(&mut self) -> Expr<'a> {
let token = self.next();
let expr = match token.kind {
TokenKind::Ident => {
let name = self.lexer.slice(token).to_owned().leak();
let name = self.arena.alloc_str(self.lexer.slice(token));
if self.advance_if(TokenKind::Decl) {
let val = self.ptr_expr();
Expr::Decl { name, val }
@ -55,12 +66,7 @@ impl<'a> Parser<'a> {
Expr::Closure { ret, body }
}
TokenKind::LBrace => Expr::Block {
stmts: std::iter::from_fn(|| {
self.advance_if(TokenKind::RBrace)
.not()
.then(|| self.expr())
})
.collect::<Vec<_>>(),
stmts: self.collect(|s| (!s.advance_if(TokenKind::RBrace)).then(|| s.expr())),
},
TokenKind::Number => Expr::Number {
value: match self.lexer.slice(token).parse() {
@ -68,7 +74,7 @@ impl<'a> Parser<'a> {
Err(e) => self.report(format_args!("invalid number: {e}")),
},
},
tok => self.report(format_args!("unexpected token: {:?}", tok)),
tok => self.report(format_args!("unexpected token: {tok:?}")),
};
self.advance_if(TokenKind::Semi);
@ -76,6 +82,16 @@ impl<'a> Parser<'a> {
expr
}
fn collect(&mut self, mut f: impl FnMut(&mut Self) -> Option<Expr<'a>>) -> Slice<'a, Expr<'a>> {
let prev_len = self.expr_buf.len();
while let Some(v) = f(self) {
self.expr_buf.push(v);
}
let sl = self.arena.alloc_slice(&self.expr_buf[prev_len..]);
self.expr_buf.truncate(prev_len);
sl
}
fn advance_if(&mut self, kind: TokenKind) -> bool {
if self.token.kind == kind {
self.next();
@ -102,17 +118,31 @@ impl<'a> Parser<'a> {
}
}
#[derive(Debug)]
pub enum Expr {
Decl { name: Ptr<str>, val: Ptr<Expr> },
Closure { ret: Ptr<Expr>, body: Ptr<Expr> },
Return { val: Option<Ptr<Expr>> },
Ident { name: Ptr<str> },
Block { stmts: Vec<Expr> },
Number { value: u64 },
#[derive(Debug, Clone, Copy)]
pub enum Expr<'a> {
Decl {
name: Ptr<'a, str>,
val: Ptr<'a, Expr<'a>>,
},
Closure {
ret: Ptr<'a, Expr<'a>>,
body: Ptr<'a, Expr<'a>>,
},
Return {
val: Option<Ptr<'a, Expr<'a>>>,
},
Ident {
name: Ptr<'a, str>,
},
Block {
stmts: Slice<'a, Expr<'a>>,
},
Number {
value: u64,
},
}
impl std::fmt::Display for Expr {
impl<'a> std::fmt::Display for Expr<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
thread_local! {
static INDENT: Cell<usize> = Cell::new(0);
@ -128,7 +158,7 @@ impl std::fmt::Display for Expr {
writeln!(f, "{{")?;
INDENT.with(|i| i.set(i.get() + 1));
let res = crate::try_block(|| {
for stmt in stmts {
for stmt in *stmts {
for _ in 0..INDENT.with(|i| i.get()) {
write!(f, " ")?;
}
@ -145,14 +175,187 @@ impl std::fmt::Display for Expr {
}
}
#[derive(Default)]
pub struct Arena<'a> {
chunk: Cell<ArenaChunk>,
ph: std::marker::PhantomData<&'a ()>,
}
impl<'a> Arena<'a> {
pub fn alloc_str(&self, token: &str) -> &'a str {
let ptr = self.alloc_slice(token.as_bytes());
unsafe { std::str::from_utf8_unchecked_mut(ptr) }
}
pub fn alloc<T>(&self, value: T) -> &'a mut T {
let layout = std::alloc::Layout::new::<T>();
let ptr = self.alloc_low(layout);
unsafe { ptr.cast::<T>().write(value) };
unsafe { ptr.cast::<T>().as_mut() }
}
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &'a mut [T] {
let layout = std::alloc::Layout::array::<T>(slice.len()).unwrap();
let ptr = self.alloc_low(layout);
unsafe {
ptr.as_ptr()
.cast::<T>()
.copy_from_nonoverlapping(slice.as_ptr(), slice.len())
};
unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr() as _, slice.len()) }
}
pub fn clear(&mut self) {
let chunk = self.chunk.get_mut();
if chunk.base.is_null() {
return;
}
loop {
let prev = ArenaChunk::prev(chunk.base);
if prev.is_null() {
break;
}
chunk.base = prev;
}
chunk.end = unsafe { chunk.base.add(ArenaChunk::PREV_OFFSET) };
}
fn with_chunk<R>(&self, f: impl FnOnce(&mut ArenaChunk) -> R) -> R {
let mut chunk = self.chunk.get();
let r = f(&mut chunk);
self.chunk.set(chunk);
r
}
fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull<u8> {
assert!(layout.align() <= ArenaChunk::ALIGN);
assert!(layout.size() <= ArenaChunk::CHUNK_SIZE);
self.with_chunk(|chunk| {
if let Some(ptr) = chunk.alloc(layout) {
return ptr;
}
if let Some(prev) = ArenaChunk::reset(ArenaChunk::prev(chunk.base)) {
*chunk = prev;
} else {
*chunk = ArenaChunk::new(chunk.base);
}
chunk.alloc(layout).unwrap()
})
}
}
impl<'a> Drop for Arena<'a> {
fn drop(&mut self) {
use ArenaChunk as AC;
let mut current = self.chunk.get().base;
let mut prev = AC::prev(current);
while !prev.is_null() {
let next = AC::next(prev);
unsafe { std::alloc::dealloc(prev, AC::LAYOUT) };
prev = next;
}
while !current.is_null() {
let next = AC::next(current);
unsafe { std::alloc::dealloc(current, AC::LAYOUT) };
current = next;
}
}
}
#[derive(Clone, Copy)]
struct ArenaChunk {
base: *mut u8,
end: *mut u8,
}
impl Default for ArenaChunk {
fn default() -> Self {
Self {
base: std::ptr::null_mut(),
end: std::ptr::null_mut(),
}
}
}
impl ArenaChunk {
const CHUNK_SIZE: usize = 1 << 16;
const ALIGN: usize = std::mem::align_of::<Self>();
const NEXT_OFFSET: usize = Self::CHUNK_SIZE - std::mem::size_of::<*mut u8>();
const PREV_OFFSET: usize = Self::NEXT_OFFSET - std::mem::size_of::<*mut u8>();
const LAYOUT: std::alloc::Layout =
unsafe { std::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) };
fn new(next: *mut u8) -> Self {
let base = unsafe { std::alloc::alloc(Self::LAYOUT) };
let end = unsafe { base.add(Self::PREV_OFFSET) };
if !next.is_null() {
Self::set_prev(next, base);
}
Self::set_next(base, next);
Self::set_prev(base, std::ptr::null_mut());
Self { base, end }
}
fn set_next(curr: *mut u8, next: *mut u8) {
unsafe { std::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) };
}
fn set_prev(curr: *mut u8, prev: *mut u8) {
unsafe { std::ptr::write(curr.add(Self::PREV_OFFSET) as *mut _, prev) };
}
fn next(curr: *mut u8) -> *mut u8 {
unsafe { std::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) }
}
fn prev(curr: *mut u8) -> *mut u8 {
if curr.is_null() {
return std::ptr::null_mut();
}
unsafe { std::ptr::read(curr.add(Self::PREV_OFFSET) as *mut _) }
}
fn reset(prev: *mut u8) -> Option<Self> {
if prev.is_null() {
return None;
}
Some(Self {
base: prev,
end: unsafe { prev.add(Self::CHUNK_SIZE) },
})
}
fn alloc(&mut self, layout: std::alloc::Layout) -> Option<NonNull<u8>> {
let padding = self.end as usize - (self.end as usize & !(layout.align() - 1));
let size = layout.size() + padding;
if size > self.end as usize - self.base as usize {
return None;
}
unsafe { self.end = self.end.sub(size) };
unsafe { Some(NonNull::new_unchecked(self.end)) }
}
}
#[cfg(test)]
mod tests {
fn parse(input: &'static str, output: &mut String) {
use std::fmt::Write;
let mut parser = super::Parser::new(input, std::path::Path::new("test"));
let mut arena = super::Arena::default();
let mut buffer = Vec::new();
let mut parser =
super::Parser::new(input, std::path::Path::new("test"), &arena, &mut buffer);
for expr in parser.file() {
writeln!(output, "{}", expr).unwrap();
}
arena.clear();
}
crate::run_tests! { parse:

View file

@ -1,9 +1,8 @@
start:
jala r0, main, 0
addi64 r254, r254, 8
jal r0, r0, main
tx
main:
subi64 r254, r254, 8
ld r31, r254, r0, 8
jala r0, r31, 0

View file

@ -47,6 +47,12 @@ impl Address {
self.0
}
/// Get ptr to the next instruction
#[inline(always)]
pub fn next<A>(self) -> u64 {
self.0.wrapping_add(core::mem::size_of::<A>() as u64 + 1)
}
/// Construct new address
#[inline(always)]
pub fn new(val: u64) -> Self {

View file

@ -250,7 +250,7 @@ where
// specified register and jump to reg + relative offset.
let OpsRRO(save, reg, offset) = self.decode();
self.write_reg(save, self.pc.get());
self.write_reg(save, self.pc.next::<OpsRRO>());
self.pc = self
.pcrel(offset, 3)
.wrapping_add(self.read_reg(reg).cast::<i64>());
@ -260,7 +260,7 @@ where
// specified register and jump to reg
let OpsRRA(save, reg, offset) = self.decode();
self.write_reg(save, self.pc.get());
self.write_reg(save, self.pc.next::<OpsRRA>());
self.pc =
Address::new(self.read_reg(reg).cast::<u64>().wrapping_add(offset));
}

View file

View file

View file