making the functions kind of walk

This commit is contained in:
mlokr 2024-05-11 16:04:13 +02:00
parent 7435218999
commit 1d74f27b0e
7 changed files with 231 additions and 57 deletions

View file

@ -0,0 +1,14 @@
main := ||: int {
return add_one(10) + add_two(20);
}
add_two := |x: int|: int {
return x + 2;
}
add_one := |x: int|: int {
return x + 1;
}

View file

@ -24,6 +24,7 @@ struct Frame {
struct Reloc { struct Reloc {
id: LabelId, id: LabelId,
offset: u32, offset: u32,
instr_offset: u16,
size: u16, size: u16,
} }
@ -43,26 +44,27 @@ impl Func {
self.code.extend_from_slice(bytes); self.code.extend_from_slice(bytes);
} }
pub fn offset(&mut self, id: LabelId, offset: u32, size: u16) { pub fn offset(&mut self, id: LabelId, instr_offset: u16, size: u16) {
self.relocs.push(Reloc { self.relocs.push(Reloc {
id, id,
offset: self.code.len() as u32 + offset, offset: self.code.len() as u32,
instr_offset,
size, size,
}); });
} }
fn encode(&mut self, (len, instr): (usize, [u8; instrs::MAX_SIZE])) { fn encode(&mut self, (len, instr): (usize, [u8; instrs::MAX_SIZE])) {
let name = instrs::NAMES[instr[0] as usize]; // let name = instrs::NAMES[instr[0] as usize];
println!( // println!(
"{}: {}", // "{}: {}",
name, // name,
instr // instr
.iter() // .iter()
.take(len) // .take(len)
.skip(1) // .skip(1)
.map(|b| format!("{:02x}", b)) // .map(|b| format!("{:02x}", b))
.collect::<String>() // .collect::<String>()
); // );
self.code.extend_from_slice(&instr[..len]); self.code.extend_from_slice(&instr[..len]);
} }
@ -95,7 +97,7 @@ impl Func {
self.encode(instrs::tx()); self.encode(instrs::tx());
} }
fn relocate(&mut self, labels: &[Label], shift: i64) { fn relocate(&mut self, labels: &[FnLabel], shift: i64) {
for reloc in self.relocs.drain(..) { for reloc in self.relocs.drain(..) {
let label = &labels[reloc.id as usize]; let label = &labels[reloc.id as usize];
let offset = if reloc.size == 8 { let offset = if reloc.size == 8 {
@ -104,7 +106,18 @@ impl Func {
label.offset as i64 - reloc.offset as i64 label.offset as i64 - reloc.offset as i64
} + shift; } + shift;
let dest = &mut self.code[reloc.offset as usize..][..reloc.size as usize]; dbg!(
label.name.as_ref(),
offset,
reloc.size,
reloc.instr_offset,
reloc.offset,
shift,
label.offset
);
let dest = &mut self.code[reloc.offset as usize + reloc.instr_offset as usize..]
[..reloc.size as usize];
match reloc.size { match reloc.size {
2 => dest.copy_from_slice(&(offset as i16).to_le_bytes()), 2 => dest.copy_from_slice(&(offset as i16).to_le_bytes()),
4 => dest.copy_from_slice(&(offset as i32).to_le_bytes()), 4 => dest.copy_from_slice(&(offset as i32).to_le_bytes()),
@ -123,9 +136,9 @@ pub struct RegAlloc {
} }
impl RegAlloc { impl RegAlloc {
fn init_caller(&mut self) { fn init_callee(&mut self) {
self.clear(); self.clear();
self.free.extend(1..=31); self.free.extend(32..=253);
} }
fn clear(&mut self) { fn clear(&mut self) {
@ -146,7 +159,7 @@ impl RegAlloc {
} }
} }
struct Label { struct FnLabel {
offset: u32, offset: u32,
// TODO: use different stile of identifier that does not allocate, eg. index + length into a // TODO: use different stile of identifier that does not allocate, eg. index + length into a
// file // file
@ -159,17 +172,23 @@ struct Variable<'a> {
ty: Expr<'a>, ty: Expr<'a>,
} }
struct RetReloc {
offset: u32,
instr_offset: u16,
size: u16,
}
pub struct Codegen<'a> { pub struct Codegen<'a> {
path: &'a std::path::Path, path: &'a std::path::Path,
ret: Expr<'a>, ret: Expr<'a>,
gpa: RegAlloc, gpa: RegAlloc,
code: Func, code: Func,
temp: Func, temp: Func,
labels: Vec<Label>, labels: Vec<FnLabel>,
stack_size: u64, stack_size: u64,
vars: Vec<Variable<'a>>, vars: Vec<Variable<'a>>,
stack_relocs: Vec<StackReloc>, stack_relocs: Vec<StackReloc>,
ret_relocs: Vec<RetReloc>,
} }
impl<'a> Codegen<'a> { impl<'a> Codegen<'a> {
@ -185,6 +204,7 @@ impl<'a> Codegen<'a> {
vars: Default::default(), vars: Default::default(),
stack_relocs: Default::default(), stack_relocs: Default::default(),
ret_relocs: Default::default(),
} }
} }
@ -247,22 +267,59 @@ impl<'a> Codegen<'a> {
} }
} }
fn reloc_rets(&mut self) {
let len = self.code.code.len() as i32;
for reloc in self.ret_relocs.drain(..) {
let dest = &mut self.code.code[reloc.offset as usize + reloc.instr_offset as usize..]
[..reloc.size as usize];
debug_assert!(dest.iter().all(|&b| b == 0));
let offset = len - reloc.offset as i32;
dest.copy_from_slice(&offset.to_ne_bytes());
}
}
fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option<Expr<'a>>) -> Option<Value<'a>> { fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option<Expr<'a>>) -> Option<Value<'a>> {
use {lexer::TokenKind as T, parser::Expr as E}; use {lexer::TokenKind as T, parser::Expr as E};
match *expr { match *expr {
E::Decl { E::Decl {
name, name,
val: E::Closure { ret, body }, val: E::Closure { ret, body, args },
} => { } => {
let frame = self.add_label(name); let frame = self.add_label(name);
self.gpa.init_caller(); for (i, &(name, ty)) in args.iter().enumerate() {
let offset = self.alloc_stack(8);
self.decl_var(name, offset, ty);
self.store_stack(i as Reg + 2, offset, 8);
}
self.gpa.init_callee();
self.ret = **ret; self.ret = **ret;
self.expr(body, None); self.expr(body, None);
self.vars.clear();
let stack = std::mem::take(&mut self.stack_size); let stack = std::mem::take(&mut self.stack_size);
self.reloc_stack(stack); self.reloc_stack(stack);
self.write_fn_prelude(frame); self.write_fn_prelude(frame);
self.reloc_rets();
self.ret();
None None
} }
E::Call {
func: E::Ident { name },
args,
} => {
for (i, arg) in args.iter().enumerate() {
let arg = self.expr(arg, None).unwrap();
let reg = self.loc_to_reg(arg.loc);
self.code.encode(instrs::cp(i as Reg + 2, reg));
}
let func = self.get_or_reserve_label(name);
self.code.call(func);
let reg = self.gpa.allocate();
self.code.encode(instrs::cp(reg, 1));
Some(Value {
ty: self.ret,
loc: Loc::Reg(reg),
})
}
E::Decl { name, val } => { E::Decl { name, val } => {
let val = self.expr(val, None).unwrap(); let val = self.expr(val, None).unwrap();
let reg = self.loc_to_reg(val.loc); let reg = self.loc_to_reg(val.loc);
@ -292,7 +349,12 @@ impl<'a> Codegen<'a> {
val, val,
); );
} }
self.ret(); self.ret_relocs.push(RetReloc {
offset: self.code.code.len() as u32,
instr_offset: 1,
size: 4,
});
self.code.encode(instrs::jmp(0));
None None
} }
E::Block { stmts } => { E::Block { stmts } => {
@ -348,7 +410,7 @@ impl<'a> Codegen<'a> {
if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) { if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) {
label as u32 label as u32
} else { } else {
self.labels.push(Label { self.labels.push(FnLabel {
offset: 0, offset: 0,
name: name.into(), name: name.into(),
}); });
@ -362,7 +424,7 @@ impl<'a> Codegen<'a> {
self.labels[label].offset = offset; self.labels[label].offset = offset;
label as u32 label as u32
} else { } else {
self.labels.push(Label { self.labels.push(FnLabel {
offset, offset,
name: name.into(), name: name.into(),
}); });
@ -394,6 +456,10 @@ impl<'a> Codegen<'a> {
reloc.offset += self.temp.code.len() as u32; reloc.offset += self.temp.code.len() as u32;
} }
for reloc in &mut self.ret_relocs {
reloc.offset += self.temp.code.len() as u32;
}
self.code.code.splice( self.code.code.splice(
frame.offset as usize..frame.offset as usize, frame.offset as usize..frame.offset as usize,
self.temp.code.drain(..), self.temp.code.drain(..),
@ -401,11 +467,8 @@ impl<'a> Codegen<'a> {
} }
fn ret(&mut self) { fn ret(&mut self) {
self.stack_relocs.push(StackReloc { self.code
offset: self.code.code.len() as u32 + 3, .encode(instrs::addi64(STACK_PTR, STACK_PTR, self.stack_size as _));
size: 8,
});
self.code.encode(instrs::addi64(STACK_PTR, STACK_PTR, 0));
for reg in self.gpa.used.clone().iter().rev() { for reg in self.gpa.used.clone().iter().rev() {
self.code.pop(*reg, 8); self.code.pop(*reg, 8);
} }
@ -413,6 +476,8 @@ impl<'a> Codegen<'a> {
} }
pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> { pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> {
assert!(self.labels.iter().filter(|l| l.offset == 0).count() == 1);
self.temp.prelude(self.get_label("main")); self.temp.prelude(self.get_label("main"));
self.temp self.temp
.relocate(&self.labels, self.temp.code.len() as i64); .relocate(&self.labels, self.temp.code.len() as i64);
@ -444,6 +509,8 @@ pub enum Loc {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::instrs;
struct TestMem; struct TestMem;
impl hbvm::mem::Memory for TestMem { impl hbvm::mem::Memory for TestMem {
@ -454,6 +521,17 @@ mod tests {
target: *mut u8, target: *mut u8,
count: usize, count: usize,
) -> Result<(), hbvm::mem::LoadError> { ) -> Result<(), hbvm::mem::LoadError> {
println!(
"read: {:x} {} {:?}",
addr.get(),
count,
core::slice::from_raw_parts(target, count)
.iter()
.rev()
.skip_while(|&&b| b == 0)
.map(|&b| format!("{:02x}", b))
.collect::<String>()
);
unsafe { core::ptr::copy(addr.get() as *const u8, target, count) } unsafe { core::ptr::copy(addr.get() as *const u8, target, count) }
Ok(()) Ok(())
} }
@ -465,12 +543,26 @@ mod tests {
source: *const u8, source: *const u8,
count: usize, count: usize,
) -> Result<(), hbvm::mem::StoreError> { ) -> Result<(), hbvm::mem::StoreError> {
println!("write: {:x} {}", addr.get(), count);
unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) } unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) }
Ok(()) Ok(())
} }
#[inline] #[inline]
unsafe fn prog_read<T: Copy>(&mut self, addr: hbvm::mem::Address) -> T { unsafe fn prog_read<T: Copy>(&mut self, addr: hbvm::mem::Address) -> T {
println!(
"read-typed: {:x} {} {:?}",
addr.get(),
std::any::type_name::<T>(),
if core::mem::size_of::<T>() == 1 {
instrs::NAMES[std::ptr::read(addr.get() as *const u8) as usize].to_string()
} else {
core::slice::from_raw_parts(addr.get() as *const u8, core::mem::size_of::<T>())
.iter()
.map(|&b| format!("{:02x}", b))
.collect::<String>()
}
);
unsafe { core::ptr::read(addr.get() as *const T) } unsafe { core::ptr::read(addr.get() as *const T) }
} }
} }
@ -507,7 +599,7 @@ mod tests {
} }
}; };
writeln!(output, "ret: {:?}", vm.read_reg(1)).unwrap(); writeln!(output, "ret: {:?}", vm.read_reg(1).0).unwrap();
writeln!(output, "status: {:?}", stat).unwrap(); writeln!(output, "status: {:?}", stat).unwrap();
} }
@ -515,5 +607,6 @@ mod tests {
example => include_str!("../examples/main_fn.hb"); example => include_str!("../examples/main_fn.hb");
arithmetic => include_str!("../examples/arithmetic.hb"); arithmetic => include_str!("../examples/arithmetic.hb");
variables => include_str!("../examples/variables.hb"); variables => include_str!("../examples/variables.hb");
functions => include_str!("../examples/functions.hb");
} }
} }

View file

@ -1,4 +1,4 @@
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct Token { pub struct Token {
pub kind: TokenKind, pub kind: TokenKind,
pub start: u32, pub start: u32,
@ -27,9 +27,11 @@ pub enum TokenKind {
Minus, Minus,
Star, Star,
FSlash, FSlash,
Bor,
Or, Or,
Semi, Semi,
Colon, Colon,
Comma,
Return, Return,
Eof, Eof,
Error, Error,
@ -53,9 +55,11 @@ impl std::fmt::Display for TokenKind {
T::Minus => "-", T::Minus => "-",
T::Star => "*", T::Star => "*",
T::FSlash => "/", T::FSlash => "/",
T::Bor => "|",
T::Or => "||", T::Or => "||",
T::Semi => ";", T::Semi => ";",
T::Colon => ":", T::Colon => ":",
T::Comma => ",",
T::Return => "return", T::Return => "return",
T::Eof => "<eof>", T::Eof => "<eof>",
T::Error => "<error>", T::Error => "<error>",
@ -164,6 +168,7 @@ impl<'a> Iterator for Lexer<'a> {
true => T::Decl, true => T::Decl,
false => T::Colon, false => T::Colon,
}, },
b',' => T::Comma,
b';' => T::Semi, b';' => T::Semi,
b'=' => T::Assign, b'=' => T::Assign,
b'+' => T::Plus, b'+' => T::Plus,
@ -172,7 +177,7 @@ impl<'a> Iterator for Lexer<'a> {
b'/' => T::FSlash, b'/' => T::FSlash,
b'|' => match self.advance_if(b'|') { b'|' => match self.advance_if(b'|') {
true => T::Or, true => T::Or,
false => T::Error, false => T::Bor,
}, },
b'(' => T::LParen, b'(' => T::LParen,
b')' => T::RParen, b')' => T::RParen,

View file

@ -1,4 +1,4 @@
use std::{cell::Cell, ptr::NonNull}; use std::{cell::Cell, ops::Not, ptr::NonNull};
use crate::lexer::{Lexer, Token, TokenKind}; use crate::lexer::{Lexer, Token, TokenKind};
@ -74,7 +74,7 @@ impl<'a, 'b> Parser<'a, 'b> {
fn unit_expr(&mut self) -> Expr<'a> { fn unit_expr(&mut self) -> Expr<'a> {
let token = self.next(); let token = self.next();
let expr = match token.kind { let mut expr = match token.kind {
TokenKind::Ident => { TokenKind::Ident => {
let name = self.arena.alloc_str(self.lexer.slice(token)); let name = self.arena.alloc_str(self.lexer.slice(token));
if self.advance_if(TokenKind::Decl) { if self.advance_if(TokenKind::Decl) {
@ -91,7 +91,27 @@ impl<'a, 'b> Parser<'a, 'b> {
self.expect_advance(TokenKind::Colon); self.expect_advance(TokenKind::Colon);
let ret = self.ptr_expr(); let ret = self.ptr_expr();
let body = self.ptr_expr(); let body = self.ptr_expr();
Expr::Closure { ret, body } Expr::Closure {
ret,
body,
args: &[],
}
}
TokenKind::Bor => {
let args = self.collect(|s| {
s.advance_if(TokenKind::Bor).not().then(|| {
let name = s.expect_advance(TokenKind::Ident);
let name = s.arena.alloc_str(s.lexer.slice(name));
s.expect_advance(TokenKind::Colon);
let val = s.expr();
s.advance_if(TokenKind::Comma);
(name, val)
})
});
self.expect_advance(TokenKind::Colon);
let ret = self.ptr_expr();
let body = self.ptr_expr();
Expr::Closure { args, ret, body }
} }
TokenKind::LBrace => Expr::Block { TokenKind::LBrace => Expr::Block {
stmts: self.collect(|s| (!s.advance_if(TokenKind::RBrace)).then(|| s.expr())), stmts: self.collect(|s| (!s.advance_if(TokenKind::RBrace)).then(|| s.expr())),
@ -110,19 +130,33 @@ impl<'a, 'b> Parser<'a, 'b> {
tok => self.report(format_args!("unexpected token: {tok:?}")), tok => self.report(format_args!("unexpected token: {tok:?}")),
}; };
loop {
expr = match self.token.kind {
TokenKind::LParen => {
self.next();
Expr::Call {
func: self.arena.alloc(expr),
args: self.collect(|s| {
s.advance_if(TokenKind::RParen).not().then(|| {
let arg = s.expr();
s.advance_if(TokenKind::Comma);
arg
})
}),
}
}
_ => break,
}
}
self.advance_if(TokenKind::Semi); self.advance_if(TokenKind::Semi);
expr expr
} }
fn collect(&mut self, mut f: impl FnMut(&mut Self) -> Option<Expr<'a>>) -> Slice<'a, Expr<'a>> { fn collect<T: Copy>(&mut self, mut f: impl FnMut(&mut Self) -> Option<T>) -> Slice<'a, T> {
let prev_len = self.expr_buf.len(); let vec = std::iter::from_fn(|| f(self)).collect::<Vec<_>>();
while let Some(v) = f(self) { self.arena.alloc_slice(&vec)
self.expr_buf.push(v);
}
let sl = self.arena.alloc_slice(&self.expr_buf[prev_len..]);
self.expr_buf.truncate(prev_len);
sl
} }
fn advance_if(&mut self, kind: TokenKind) -> bool { fn advance_if(&mut self, kind: TokenKind) -> bool {
@ -134,14 +168,14 @@ impl<'a, 'b> Parser<'a, 'b> {
} }
} }
fn expect_advance(&mut self, kind: TokenKind) { fn expect_advance(&mut self, kind: TokenKind) -> Token {
if self.token.kind != kind { if self.token.kind != kind {
self.report(format_args!( self.report(format_args!(
"expected {:?}, found {:?}", "expected {:?}, found {:?}",
kind, self.token.kind kind, self.token.kind
)); ));
} }
self.next(); self.next()
} }
fn report(&self, msg: impl std::fmt::Display) -> ! { fn report(&self, msg: impl std::fmt::Display) -> ! {
@ -158,9 +192,14 @@ pub enum Expr<'a> {
val: Ptr<'a, Expr<'a>>, val: Ptr<'a, Expr<'a>>,
}, },
Closure { Closure {
args: Slice<'a, (Ptr<'a, str>, Expr<'a>)>,
ret: Ptr<'a, Expr<'a>>, ret: Ptr<'a, Expr<'a>>,
body: Ptr<'a, Expr<'a>>, body: Ptr<'a, Expr<'a>>,
}, },
Call {
func: Ptr<'a, Expr<'a>>,
args: Slice<'a, Expr<'a>>,
},
Return { Return {
val: Option<Ptr<'a, Expr<'a>>>, val: Option<Ptr<'a, Expr<'a>>>,
}, },
@ -188,7 +227,28 @@ impl<'a> std::fmt::Display for Expr<'a> {
match *self { match *self {
Self::Decl { name, val } => write!(f, "{} := {}", name, val), Self::Decl { name, val } => write!(f, "{} := {}", name, val),
Self::Closure { ret, body } => write!(f, "||: {} {}", ret, body), Self::Closure { ret, body, args } => {
write!(f, "|")?;
let first = &mut true;
for (name, val) in args {
if !std::mem::take(first) {
write!(f, ", ")?;
}
write!(f, "{}: {}", name, val)?;
}
write!(f, "|: {} {}", ret, body)
}
Self::Call { func, args } => {
write!(f, "{}(", func)?;
let first = &mut true;
for arg in args {
if !std::mem::take(first) {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
Self::Return { val: Some(val) } => write!(f, "return {};", val), Self::Return { val: Some(val) } => write!(f, "return {};", val),
Self::Return { val: None } => write!(f, "return;"), Self::Return { val: None } => write!(f, "return;"),
Self::Ident { name } => write!(f, "{}", name), Self::Ident { name } => write!(f, "{}", name),

View file

@ -0,0 +1,2 @@
ret: 0
status: Ok(())

View file

@ -243,7 +243,7 @@ where
}), }),
JMP => { JMP => {
let OpsO(off) = self.decode(); let OpsO(off) = self.decode();
self.pc = self.pc.wrapping_add(off).wrapping_add(1); self.pc = self.pc.wrapping_add(off);
} }
JAL => { JAL => {
// Jump and link. Save PC after this instruction to // Jump and link. Save PC after this instruction to