diff --git a/Cargo.lock b/Cargo.lock index 98417cbb..1c70201c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,6 +204,9 @@ dependencies = [ [[package]] name = "hblang" version = "0.1.0" +dependencies = [ + "hbvm", +] [[package]] name = "hbvm" diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml index 4c70389a..3cd92339 100644 --- a/hblang/Cargo.toml +++ b/hblang/Cargo.toml @@ -4,3 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] + +[dev-dependencies] +hbvm = { path = "../hbvm", features = ["nightly"] } diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index d5f41f14..7dd1b621 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -1,25 +1,62 @@ use {crate::parser, std::fmt::Write}; -const STACK_PTR: &str = "r254"; -const ZERO: &str = "r0"; -const RET_ADDR: &str = "r31"; +type Reg = u8; +type MaskElem = u64; -pub struct Codegen<'a> { - path: &'a std::path::Path, - code: String, - data: String, +const STACK_PTR: Reg = 254; +const ZERO: Reg = 0; +const RET_ADDR: Reg = 31; +const ELEM_WIDTH: usize = std::mem::size_of::() * 8; + +#[derive(Default)] +pub struct RegAlloc { + free: Vec, + // TODO:use 256 bit mask instead + used: Vec>, } -impl<'a> Codegen<'a> { - pub fn new(path: &'a std::path::Path) -> Self { +impl RegAlloc { + fn callee_general_purpose() -> Self { Self { - path, - code: String::new(), - data: String::new(), + free: (32..=253).collect(), + used: Vec::new(), } } - pub fn file(&mut self, exprs: &[parser::Expr]) -> std::fmt::Result { + fn allocate(&mut self) -> Reg { + let reg = self.free.pop().expect("TODO: we need to spill"); + if self.used.binary_search(&std::cmp::Reverse(reg)).is_err() { + self.used.push(std::cmp::Reverse(reg)); + } + reg + } + + fn free(&mut self, reg: Reg) { + self.free.push(reg); + } +} + +pub struct Codegen<'a> { + path: &'a std::path::Path, + gpa: RegAlloc, + code: String, + data: String, + prelude_buf: String, +} + +impl<'a> Codegen<'a> { + pub fn new() -> Self { + Self { + path: std::path::Path::new(""), + gpa: RegAlloc::callee_general_purpose(), + code: String::new(), + data: String::new(), + prelude_buf: String::new(), + } + } + + pub fn file(&mut self, path: &'a std::path::Path, exprs: &[parser::Expr]) -> std::fmt::Result { + self.path = path; for expr in exprs { self.expr(expr)?; } @@ -28,7 +65,7 @@ impl<'a> Codegen<'a> { fn expr(&mut self, expr: &parser::Expr) -> std::fmt::Result { use parser::Expr as E; - match expr { + match *expr { E::Decl { name, val: @@ -38,7 +75,9 @@ impl<'a> Codegen<'a> { }, } => { writeln!(self.code, "{name}:")?; - self.expr(body) + let fn_start = self.code.len(); + self.expr(body)?; + self.write_fn_prelude(fn_start) } E::Return { val: None } => self.ret(), E::Block { stmts } => { @@ -51,55 +90,163 @@ impl<'a> Codegen<'a> { } } - fn stack_push(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result { - writeln!(self.code, " st {value}, {STACK_PTR}, {ZERO}, {size}")?; - writeln!( - self.code, - " addi{} {STACK_PTR}, {STACK_PTR}, {size}", - size * 8 - ) - } + fn write_fn_prelude(&mut self, fn_start: usize) -> std::fmt::Result { + self.prelude_buf.clear(); + // TODO: avoid clone here + for reg in self.gpa.used.clone().iter() { + stack_push(&mut self.prelude_buf, reg.0, 8)?; + } - fn stack_pop(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result { - writeln!( - self.code, - " subi{} {STACK_PTR}, {STACK_PTR}, {size}", - size * 8 - )?; - writeln!(self.code, " ld {value}, {STACK_PTR}, {ZERO}, {size}") - } + self.code.insert_str(fn_start, &self.prelude_buf); + self.gpa = RegAlloc::callee_general_purpose(); - fn call(&mut self, func: impl std::fmt::Display) -> std::fmt::Result { - self.stack_push(&func, 8)?; - self.global_jump(func) + Ok(()) } fn ret(&mut self) -> std::fmt::Result { - self.stack_pop(RET_ADDR, 8)?; - self.global_jump(RET_ADDR) + for reg in self.gpa.used.clone().iter().rev() { + stack_pop(&mut self.code, reg.0, 8)?; + } + ret(&mut self.code) } - fn global_jump(&mut self, label: impl std::fmt::Display) -> std::fmt::Result { - writeln!(self.code, " jala {ZERO}, {label}, 0") - } - - pub fn dump(&mut self, mut out: impl std::fmt::Write) -> std::fmt::Result { - writeln!(out, "start:")?; - writeln!(out, " jala {ZERO}, main, 0")?; - writeln!(out, " tx")?; + pub fn dump(self, mut out: impl std::fmt::Write) -> std::fmt::Result { + prelude(&mut out)?; writeln!(out, "{}", self.code)?; writeln!(out, "{}", self.data) } } +fn stack_push(out: &mut impl std::fmt::Write, value: Reg, size: usize) -> std::fmt::Result { + writeln!(out, " st r{value}, r{STACK_PTR}, r{ZERO}, {size}")?; + writeln!( + out, + " addi{} r{STACK_PTR}, r{STACK_PTR}, {size}", + size * 8 + ) +} + +fn stack_pop(out: &mut impl std::fmt::Write, value: Reg, size: usize) -> std::fmt::Result { + writeln!( + out, + " subi{} r{STACK_PTR}, r{STACK_PTR}, {size}", + size * 8 + )?; + writeln!(out, " ld r{value}, r{STACK_PTR}, r{ZERO}, {size}") +} + +fn call(out: &mut impl std::fmt::Write, func: &str) -> std::fmt::Result { + stack_push(out, RET_ADDR, 8)?; + jump_label(out, func)?; + stack_pop(out, RET_ADDR, 8) +} + +fn ret(out: &mut impl std::fmt::Write) -> std::fmt::Result { + writeln!(out, " jala r{ZERO}, r{RET_ADDR}, 0") +} + +fn jump_label(out: &mut impl std::fmt::Write, label: &str) -> std::fmt::Result { + writeln!(out, " jal r{RET_ADDR}, r{ZERO}, {label}") +} + +fn prelude(out: &mut impl std::fmt::Write) -> std::fmt::Result { + writeln!(out, "start:")?; + writeln!(out, " jal r{RET_ADDR}, r{ZERO}, main")?; + writeln!(out, " tx") +} + #[cfg(test)] mod tests { + use std::io::Write; + + struct TestMem; + + impl hbvm::mem::Memory for TestMem { + #[inline] + unsafe fn load( + &mut self, + addr: hbvm::mem::Address, + target: *mut u8, + count: usize, + ) -> Result<(), hbvm::mem::LoadError> { + unsafe { core::ptr::copy(addr.get() as *const u8, target, count) } + Ok(()) + } + + #[inline] + unsafe fn store( + &mut self, + addr: hbvm::mem::Address, + source: *const u8, + count: usize, + ) -> Result<(), hbvm::mem::StoreError> { + unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) } + Ok(()) + } + + #[inline] + unsafe fn prog_read(&mut self, addr: hbvm::mem::Address) -> T { + unsafe { core::ptr::read(addr.get() as *const T) } + } + } + fn generate(input: &'static str, output: &mut String) { - let mut parser = super::parser::Parser::new(input, std::path::Path::new("test")); + let path = std::path::Path::new("test"); + let arena = crate::parser::Arena::default(); + let mut buffer = Vec::new(); + let mut parser = super::parser::Parser::new(input, path, &arena, &mut buffer); let exprs = parser.file(); - let mut codegen = super::Codegen::new(std::path::Path::new("test")); - codegen.file(&exprs).unwrap(); - codegen.dump(output).unwrap(); + let mut codegen = super::Codegen::new(); + codegen.file(path, &exprs).unwrap(); + codegen.dump(&mut *output).unwrap(); + + let mut proc = std::process::Command::new("/usr/bin/hbas") + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .spawn() + .unwrap(); + proc.stdin + .as_mut() + .unwrap() + .write_all(output.as_bytes()) + .unwrap(); + let out = proc.wait_with_output().unwrap(); + + if !out.status.success() { + panic!( + "hbas failed with status: {}\n{}", + out.status, + String::from_utf8_lossy(&out.stderr) + ); + } else { + use std::fmt::Write; + + let mut stack = [0_u64; 1024]; + + for b in &out.stdout { + writeln!(output, "{:02x}", b).unwrap(); + } + + let mut vm = unsafe { + hbvm::Vm::::new( + TestMem, + hbvm::mem::Address::new(out.stdout.as_ptr() as u64), + ) + }; + + vm.write_reg(super::STACK_PTR, stack.as_mut_ptr() as u64); + + let stat = loop { + match vm.run() { + Ok(hbvm::VmRunOk::End) => break Ok(()), + Ok(ev) => writeln!(output, "ev: {:?}", ev).unwrap(), + Err(e) => break Err(e), + } + }; + + writeln!(output, "ret: {:?}", vm.read_reg(0)).unwrap(); + writeln!(output, "status: {:?}", stat).unwrap(); + } } crate::run_tests! { generate: diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index 221c0646..1bf68dc9 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -1,5 +1,3 @@ -use std::{iter::Peekable, str::Chars}; - #[derive(Debug, PartialEq)] pub struct Token { pub kind: TokenKind, diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index ca0c1cf7..cdf49574 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,4 +1,8 @@ #![feature(noop_waker)] +#![feature(non_null_convenience)] +#![allow(dead_code)] +#![feature(const_mut_refs)] + #[macro_export] macro_rules! run_tests { ($runner:path: $($name:ident => $input:expr;)*) => {$( diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 900eda07..de0b35e4 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -1,43 +1,54 @@ -use std::{cell::Cell, ops::Not}; +use std::{cell::Cell, ptr::NonNull}; use crate::lexer::{Lexer, Token, TokenKind}; -type Ptr = &'static T; +type Ptr<'a, T> = &'a T; +type Slice<'a, T> = &'a [T]; -fn ptr(val: T) -> Ptr { - Box::leak(Box::new(val)) +pub struct Parser<'a, 'b> { + path: &'a std::path::Path, + lexer: Lexer<'a>, + arena: &'b Arena<'a>, + expr_buf: &'b mut Vec>, + token: Token, } -pub struct Parser<'a> { - path: &'a std::path::Path, - lexer: Lexer<'a>, - token: Token, -} - -impl<'a> Parser<'a> { - pub fn new(input: &'a str, path: &'a std::path::Path) -> Self { +impl<'a, 'b> Parser<'a, 'b> { + pub fn new( + input: &'a str, + path: &'a std::path::Path, + arena: &'b Arena<'a>, + expr_buf: &'b mut Vec>, + ) -> Self { let mut lexer = Lexer::new(input); let token = lexer.next(); - Self { lexer, token, path } + Self { + lexer, + token, + path, + arena, + // we ensure its empty before returning form parse + expr_buf: unsafe { std::mem::transmute(expr_buf) }, + } + } + + pub fn file(&mut self) -> Slice<'a, Expr<'a>> { + self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr())) } fn next(&mut self) -> Token { std::mem::replace(&mut self.token, self.lexer.next()) } - pub fn file(&mut self) -> Vec { - std::iter::from_fn(|| (self.token.kind != TokenKind::Eof).then(|| self.expr())).collect() + fn ptr_expr(&mut self) -> Ptr<'a, Expr<'a>> { + self.arena.alloc(self.expr()) } - fn ptr_expr(&mut self) -> Ptr { - ptr(self.expr()) - } - - pub fn expr(&mut self) -> Expr { + fn expr(&mut self) -> Expr<'a> { let token = self.next(); let expr = match token.kind { TokenKind::Ident => { - let name = self.lexer.slice(token).to_owned().leak(); + let name = self.arena.alloc_str(self.lexer.slice(token)); if self.advance_if(TokenKind::Decl) { let val = self.ptr_expr(); Expr::Decl { name, val } @@ -55,12 +66,7 @@ impl<'a> Parser<'a> { Expr::Closure { ret, body } } TokenKind::LBrace => Expr::Block { - stmts: std::iter::from_fn(|| { - self.advance_if(TokenKind::RBrace) - .not() - .then(|| self.expr()) - }) - .collect::>(), + stmts: self.collect(|s| (!s.advance_if(TokenKind::RBrace)).then(|| s.expr())), }, TokenKind::Number => Expr::Number { value: match self.lexer.slice(token).parse() { @@ -68,7 +74,7 @@ impl<'a> Parser<'a> { Err(e) => self.report(format_args!("invalid number: {e}")), }, }, - tok => self.report(format_args!("unexpected token: {:?}", tok)), + tok => self.report(format_args!("unexpected token: {tok:?}")), }; self.advance_if(TokenKind::Semi); @@ -76,6 +82,16 @@ impl<'a> Parser<'a> { expr } + fn collect(&mut self, mut f: impl FnMut(&mut Self) -> Option>) -> Slice<'a, Expr<'a>> { + let prev_len = self.expr_buf.len(); + while let Some(v) = f(self) { + self.expr_buf.push(v); + } + let sl = self.arena.alloc_slice(&self.expr_buf[prev_len..]); + self.expr_buf.truncate(prev_len); + sl + } + fn advance_if(&mut self, kind: TokenKind) -> bool { if self.token.kind == kind { self.next(); @@ -102,17 +118,31 @@ impl<'a> Parser<'a> { } } -#[derive(Debug)] -pub enum Expr { - Decl { name: Ptr, val: Ptr }, - Closure { ret: Ptr, body: Ptr }, - Return { val: Option> }, - Ident { name: Ptr }, - Block { stmts: Vec }, - Number { value: u64 }, +#[derive(Debug, Clone, Copy)] +pub enum Expr<'a> { + Decl { + name: Ptr<'a, str>, + val: Ptr<'a, Expr<'a>>, + }, + Closure { + ret: Ptr<'a, Expr<'a>>, + body: Ptr<'a, Expr<'a>>, + }, + Return { + val: Option>>, + }, + Ident { + name: Ptr<'a, str>, + }, + Block { + stmts: Slice<'a, Expr<'a>>, + }, + Number { + value: u64, + }, } -impl std::fmt::Display for Expr { +impl<'a> std::fmt::Display for Expr<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { thread_local! { static INDENT: Cell = Cell::new(0); @@ -128,7 +158,7 @@ impl std::fmt::Display for Expr { writeln!(f, "{{")?; INDENT.with(|i| i.set(i.get() + 1)); let res = crate::try_block(|| { - for stmt in stmts { + for stmt in *stmts { for _ in 0..INDENT.with(|i| i.get()) { write!(f, " ")?; } @@ -145,14 +175,187 @@ impl std::fmt::Display for Expr { } } +#[derive(Default)] +pub struct Arena<'a> { + chunk: Cell, + ph: std::marker::PhantomData<&'a ()>, +} + +impl<'a> Arena<'a> { + pub fn alloc_str(&self, token: &str) -> &'a str { + let ptr = self.alloc_slice(token.as_bytes()); + unsafe { std::str::from_utf8_unchecked_mut(ptr) } + } + + pub fn alloc(&self, value: T) -> &'a mut T { + let layout = std::alloc::Layout::new::(); + let ptr = self.alloc_low(layout); + unsafe { ptr.cast::().write(value) }; + unsafe { ptr.cast::().as_mut() } + } + + pub fn alloc_slice(&self, slice: &[T]) -> &'a mut [T] { + let layout = std::alloc::Layout::array::(slice.len()).unwrap(); + let ptr = self.alloc_low(layout); + unsafe { + ptr.as_ptr() + .cast::() + .copy_from_nonoverlapping(slice.as_ptr(), slice.len()) + }; + unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr() as _, slice.len()) } + } + + pub fn clear(&mut self) { + let chunk = self.chunk.get_mut(); + if chunk.base.is_null() { + return; + } + + loop { + let prev = ArenaChunk::prev(chunk.base); + if prev.is_null() { + break; + } + chunk.base = prev; + } + + chunk.end = unsafe { chunk.base.add(ArenaChunk::PREV_OFFSET) }; + } + + fn with_chunk(&self, f: impl FnOnce(&mut ArenaChunk) -> R) -> R { + let mut chunk = self.chunk.get(); + let r = f(&mut chunk); + self.chunk.set(chunk); + r + } + + fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull { + assert!(layout.align() <= ArenaChunk::ALIGN); + assert!(layout.size() <= ArenaChunk::CHUNK_SIZE); + self.with_chunk(|chunk| { + if let Some(ptr) = chunk.alloc(layout) { + return ptr; + } + + if let Some(prev) = ArenaChunk::reset(ArenaChunk::prev(chunk.base)) { + *chunk = prev; + } else { + *chunk = ArenaChunk::new(chunk.base); + } + + chunk.alloc(layout).unwrap() + }) + } +} + +impl<'a> Drop for Arena<'a> { + fn drop(&mut self) { + use ArenaChunk as AC; + + let mut current = self.chunk.get().base; + + let mut prev = AC::prev(current); + while !prev.is_null() { + let next = AC::next(prev); + unsafe { std::alloc::dealloc(prev, AC::LAYOUT) }; + prev = next; + } + + while !current.is_null() { + let next = AC::next(current); + unsafe { std::alloc::dealloc(current, AC::LAYOUT) }; + current = next; + } + } +} + +#[derive(Clone, Copy)] +struct ArenaChunk { + base: *mut u8, + end: *mut u8, +} + +impl Default for ArenaChunk { + fn default() -> Self { + Self { + base: std::ptr::null_mut(), + end: std::ptr::null_mut(), + } + } +} + +impl ArenaChunk { + const CHUNK_SIZE: usize = 1 << 16; + const ALIGN: usize = std::mem::align_of::(); + const NEXT_OFFSET: usize = Self::CHUNK_SIZE - std::mem::size_of::<*mut u8>(); + const PREV_OFFSET: usize = Self::NEXT_OFFSET - std::mem::size_of::<*mut u8>(); + const LAYOUT: std::alloc::Layout = + unsafe { std::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; + + fn new(next: *mut u8) -> Self { + let base = unsafe { std::alloc::alloc(Self::LAYOUT) }; + let end = unsafe { base.add(Self::PREV_OFFSET) }; + if !next.is_null() { + Self::set_prev(next, base); + } + Self::set_next(base, next); + Self::set_prev(base, std::ptr::null_mut()); + Self { base, end } + } + + fn set_next(curr: *mut u8, next: *mut u8) { + unsafe { std::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) }; + } + + fn set_prev(curr: *mut u8, prev: *mut u8) { + unsafe { std::ptr::write(curr.add(Self::PREV_OFFSET) as *mut _, prev) }; + } + + fn next(curr: *mut u8) -> *mut u8 { + unsafe { std::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) } + } + + fn prev(curr: *mut u8) -> *mut u8 { + if curr.is_null() { + return std::ptr::null_mut(); + } + unsafe { std::ptr::read(curr.add(Self::PREV_OFFSET) as *mut _) } + } + + fn reset(prev: *mut u8) -> Option { + if prev.is_null() { + return None; + } + + Some(Self { + base: prev, + end: unsafe { prev.add(Self::CHUNK_SIZE) }, + }) + } + + fn alloc(&mut self, layout: std::alloc::Layout) -> Option> { + let padding = self.end as usize - (self.end as usize & !(layout.align() - 1)); + let size = layout.size() + padding; + if size > self.end as usize - self.base as usize { + return None; + } + unsafe { self.end = self.end.sub(size) }; + unsafe { Some(NonNull::new_unchecked(self.end)) } + } +} + #[cfg(test)] mod tests { fn parse(input: &'static str, output: &mut String) { use std::fmt::Write; - let mut parser = super::Parser::new(input, std::path::Path::new("test")); + let mut arena = super::Arena::default(); + let mut buffer = Vec::new(); + let mut parser = + super::Parser::new(input, std::path::Path::new("test"), &arena, &mut buffer); for expr in parser.file() { writeln!(output, "{}", expr).unwrap(); } + arena.clear(); } crate::run_tests! { parse: diff --git a/hblang/tests/hblang::codegen::tests::example.txt b/hblang/tests/hblang::codegen::tests::example.txt index 3e43936a..def855cf 100644 --- a/hblang/tests/hblang::codegen::tests::example.txt +++ b/hblang/tests/hblang::codegen::tests::example.txt @@ -1,9 +1,8 @@ start: - jala r0, main, 0 + addi64 r254, r254, 8 + jal r0, r0, main tx main: - subi64 r254, r254, 8 - ld r31, r254, r0, 8 jala r0, r31, 0 diff --git a/hbvm/src/mem/addr.rs b/hbvm/src/mem/addr.rs index 2a162afc..d994a070 100644 --- a/hbvm/src/mem/addr.rs +++ b/hbvm/src/mem/addr.rs @@ -47,6 +47,12 @@ impl Address { self.0 } + /// Get ptr to the next instruction + #[inline(always)] + pub fn next(self) -> u64 { + self.0.wrapping_add(core::mem::size_of::() as u64 + 1) + } + /// Construct new address #[inline(always)] pub fn new(val: u64) -> Self { diff --git a/hbvm/src/vmrun.rs b/hbvm/src/vmrun.rs index 506c0698..c8141515 100644 --- a/hbvm/src/vmrun.rs +++ b/hbvm/src/vmrun.rs @@ -250,7 +250,7 @@ where // specified register and jump to reg + relative offset. let OpsRRO(save, reg, offset) = self.decode(); - self.write_reg(save, self.pc.get()); + self.write_reg(save, self.pc.next::()); self.pc = self .pcrel(offset, 3) .wrapping_add(self.read_reg(reg).cast::()); @@ -260,7 +260,7 @@ where // specified register and jump to reg let OpsRRA(save, reg, offset) = self.decode(); - self.write_reg(save, self.pc.get()); + self.write_reg(save, self.pc.next::()); self.pc = Address::new(self.read_reg(reg).cast::().wrapping_add(offset)); } diff --git a/tests/hblang::lexer::tests::empty.txt b/tests/hblang::lexer::tests::empty.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/hblang::lexer::tests::examples.txt b/tests/hblang::lexer::tests::examples.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/hblang::lexer::tests::whitespace.txt b/tests/hblang::lexer::tests::whitespace.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/hblang::parser::tests::example.txt b/tests/hblang::parser::tests::example.txt new file mode 100644 index 00000000..e69de29b