identifiers are now properly checked

This commit is contained in:
mlokr 2024-05-12 11:52:58 +02:00
parent 465b185452
commit 86013a50a4
10 changed files with 462 additions and 172 deletions

View file

@ -1,33 +1,31 @@
arm_fb_ptr := ||:int return 100; arm_fb_ptr := fn(): int return 100;
x86_fb_ptr := ||:int return 100; x86_fb_ptr := fn(): int return 100;
check_platform:= ||: int { check_platform := fn(): int {
return x86_fb_ptr(); return x86_fb_ptr();
} }
set_pixel := |x: int, y: int, r: u8, g: u8, b: u8|: int := { set_pixel := fn(x: int, y: int, width: int): int {
pix_offset := y * width + x; pix_offset := y * width + x;
return 0; return 0;
} }
main := ||: int { main := fn(): int {
fb_ptr := check_platform(); fb_ptr := check_platform();
width := 100;
width := 1024; height := 30;
height := 768;
x:= 0; x:= 0;
y:= 0; y:= 0;
loop { loop {
if x <= height + 1 { if x <= height + 1 {
set_pixel(x,y,100,100,100); set_pixel(x,y,width);
x= x + 1; x = x + 1;
} else { } else {
set_pixel(x,y,100,100,100); set_pixel(x,y,width);
x := 0; x = 0;
y = y + 1; y = y + 1;
} }
if y == width { if y == width {

View file

@ -0,0 +1,6 @@
main := fn(): int {
a := 1;
b := &a;
*b = 2;
return a - 2;
}

5
hblang/foo.b Normal file
View file

@ -0,0 +1,5 @@
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 14 filtered out; finished in 0.00s

View file

@ -1,17 +1,33 @@
use crate::ident::Ident;
use { use {
crate::{ crate::{
instrs, lexer, log, instrs, lexer, log,
parser::{self, Expr}, parser::{self},
}, },
std::rc::Rc, std::rc::Rc,
}; };
use {lexer::TokenKind as T, parser::Expr as E};
type LabelId = u32; type LabelId = u32;
type Reg = u8; type Reg = u8;
type MaskElem = u64; type MaskElem = u64;
type Type = u32; type Type = u32;
mod bt { #[derive(Debug)]
struct LinReg(Reg);
#[cfg(debug_assertions)]
impl Drop for LinReg {
fn drop(&mut self) {
if !std::thread::panicking() {
panic!("reg leaked");
}
}
}
pub mod bt {
use super::*; use super::*;
const fn builtin_type(id: u32) -> Type { const fn builtin_type(id: u32) -> Type {
@ -20,30 +36,44 @@ mod bt {
macro_rules! builtin_type { macro_rules! builtin_type {
($($name:ident;)*) => {$( ($($name:ident;)*) => {$(
pub const $name: Type = builtin_type(${index(0)}); pub const $name: Type = TypeKind::Builtin(${index(0)}).encode();
)*}; )*};
} }
builtin_type! { builtin_type! {
VOID;
UNREACHABLE;
INT; INT;
BOOL; BOOL;
MAX;
} }
} }
#[derive(Debug)]
enum TypeKind { enum TypeKind {
Builtin(Type), Builtin(Type),
Struct(Type), Struct(Type),
Pointer(Type),
} }
impl TypeKind { impl TypeKind {
fn from_ty(ty: Type) -> Self { const fn from_ty(ty: Type) -> Self {
if ty > bt::MAX { let (flag, index) = (ty & 0b11, ty >> 2);
Self::Builtin(ty) match flag {
} else { 0 => Self::Builtin(index),
Self::Struct(ty) 1 => Self::Pointer(index),
2 => Self::Struct(index),
_ => unreachable!(),
} }
} }
const fn encode(self) -> Type {
let (index, flag) = match self {
Self::Builtin(index) => (index, 0),
Self::Pointer(index) => (index, 1),
Self::Struct(index) => (index, 2),
};
index << 2 | flag
}
} }
const STACK_PTR: Reg = 254; const STACK_PTR: Reg = 254;
@ -144,7 +174,7 @@ impl Func {
} + shift; } + shift;
log::dbg!( log::dbg!(
label.name.as_ref(), label.name,
offset, offset,
reloc.size, reloc.size,
reloc.instr_offset, reloc.instr_offset,
@ -183,16 +213,17 @@ impl RegAlloc {
self.used.clear(); self.used.clear();
} }
fn allocate(&mut self) -> Reg { fn allocate(&mut self) -> LinReg {
let reg = self.free.pop().expect("TODO: we need to spill"); let reg = self.free.pop().expect("TODO: we need to spill");
if self.used.binary_search_by_key(&!reg, |&r| !r).is_err() { if self.used.binary_search_by_key(&!reg, |&r| !r).is_err() {
self.used.push(reg); self.used.push(reg);
} }
reg LinReg(reg)
} }
fn free(&mut self, reg: Reg) { fn free(&mut self, reg: LinReg) {
self.free.push(reg); self.free.push(reg.0);
std::mem::forget(reg);
} }
} }
@ -200,13 +231,12 @@ struct FnLabel {
offset: u32, offset: u32,
// TODO: use different stile of identifier that does not allocate, eg. index + length into a // TODO: use different stile of identifier that does not allocate, eg. index + length into a
// file // file
name: Rc<str>, name: Ident,
} }
struct Variable<'a> { struct Variable {
name: Rc<str>, id: Ident,
offset: u64, value: Value,
ty: Expr<'a>,
} }
struct RetReloc { struct RetReloc {
@ -227,35 +257,38 @@ struct Struct {
pub struct Codegen<'a> { pub struct Codegen<'a> {
path: &'a std::path::Path, path: &'a std::path::Path,
ret: Expr<'a>, ret: Type,
gpa: RegAlloc, gpa: RegAlloc,
code: Func, code: Func,
temp: Func, temp: Func,
labels: Vec<FnLabel>, labels: Vec<FnLabel>,
stack_size: u64, stack_size: u64,
vars: Vec<Variable<'a>>, vars: Vec<Variable>,
stack_relocs: Vec<StackReloc>, stack_relocs: Vec<StackReloc>,
ret_relocs: Vec<RetReloc>, ret_relocs: Vec<RetReloc>,
loops: Vec<Loop>, loops: Vec<Loop>,
records: Vec<Struct>, records: Vec<Struct>,
pointers: Vec<Type>,
main: Option<LabelId>,
} }
impl<'a> Codegen<'a> { impl<'a> Codegen<'a> {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
path: std::path::Path::new(""), path: std::path::Path::new(""),
ret: Expr::Return { val: None, pos: 0 }, ret: bt::VOID,
gpa: Default::default(), gpa: Default::default(),
code: Default::default(), code: Default::default(),
temp: Default::default(), temp: Default::default(),
labels: Default::default(), labels: Default::default(),
stack_size: 0, stack_size: 0,
vars: Default::default(), vars: Default::default(),
stack_relocs: Default::default(), stack_relocs: Default::default(),
ret_relocs: Default::default(), ret_relocs: Default::default(),
loops: Default::default(), loops: Default::default(),
records: Default::default(), records: Default::default(),
pointers: Default::default(),
main: None,
} }
} }
@ -271,17 +304,42 @@ impl<'a> Codegen<'a> {
Ok(()) Ok(())
} }
fn loc_to_reg(&mut self, loc: Loc) -> Reg { fn size_of(&self, ty: Type) -> u64 {
// TODO: proper alignment
match TypeKind::from_ty(ty) {
TypeKind::Pointer(_) | TypeKind::Builtin(bt::INT) => 8,
TypeKind::Builtin(bt::BOOL) => 1,
TypeKind::Builtin(_) => unreachable!(),
TypeKind::Struct(ty) => self.records[ty as usize]
.fields
.iter()
.map(|(_, ty)| self.size_of(*ty))
.sum(),
}
}
fn loc_to_reg(&mut self, loc: Loc) -> LinReg {
match loc { match loc {
Loc::RegRef(rr) => {
let reg = self.gpa.allocate();
self.code.encode(instrs::cp(reg.0, rr));
reg
}
Loc::Reg(reg) => reg, Loc::Reg(reg) => reg,
Loc::Deref(dreg) => {
let reg = self.gpa.allocate();
self.code.encode(instrs::ld(reg.0, dreg.0, 0, 8));
self.gpa.free(dreg);
reg
}
Loc::Imm(imm) => { Loc::Imm(imm) => {
let reg = self.gpa.allocate(); let reg = self.gpa.allocate();
self.code.encode(instrs::li64(reg, imm)); self.code.encode(instrs::li64(reg.0, imm));
reg reg
} }
Loc::Stack(offset) => { Loc::Stack(offset) => {
let reg = self.gpa.allocate(); let reg = self.gpa.allocate();
self.load_stack(reg, offset, 8); self.load_stack(reg.0, offset, 8);
reg reg
} }
} }
@ -309,11 +367,11 @@ impl<'a> Codegen<'a> {
self.code.encode(instrs::ld(reg, STACK_PTR, offset, size)); self.code.encode(instrs::ld(reg, STACK_PTR, offset, size));
} }
fn reloc_stack(&mut self, stack_height: u64) { fn reloc_stack(&mut self) {
for reloc in self.stack_relocs.drain(..) { for reloc in self.stack_relocs.drain(..) {
let dest = &mut self.code.code[reloc.offset as usize..][..reloc.size as usize]; let dest = &mut self.code.code[reloc.offset as usize..][..reloc.size as usize];
let value = u64::from_ne_bytes(dest.try_into().unwrap()); let value = u64::from_ne_bytes(dest.try_into().unwrap());
let offset = stack_height - value; let offset = self.stack_size - value;
dest.copy_from_slice(&offset.to_ne_bytes()); dest.copy_from_slice(&offset.to_ne_bytes());
} }
} }
@ -329,80 +387,149 @@ impl<'a> Codegen<'a> {
} }
} }
fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option<Expr<'a>>) -> Option<Value<'a>> { fn ty(&mut self, expr: &parser::Expr<'a>) -> Type {
use {lexer::TokenKind as T, parser::Expr as E};
match *expr { match *expr {
E::Ident { name: "int", .. } => bt::INT,
E::Ident { name: "bool", .. } => bt::BOOL,
expr => unimplemented!("type: {:#?}", expr),
}
}
fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option<Type>) -> Option<Value> {
match *expr {
E::UnOp {
op: T::Amp, val, ..
} => {
let val = self.expr(val, None).unwrap();
match val.loc {
Loc::Stack(off) => {
let reg = self.gpa.allocate();
self.stack_relocs.push(StackReloc {
offset: self.code.code.len() as u32 + 3,
size: 8,
});
self.code.encode(instrs::addi64(reg.0, STACK_PTR, off));
Some(Value {
ty: self.alloc_pointer(val.ty),
loc: Loc::Reg(reg),
})
}
l => panic!("cant take pointer of {:?}", l),
}
}
E::UnOp {
op: T::Star, val, ..
} => {
let val = self.expr(val, None).unwrap();
let reg = self.loc_to_reg(val.loc);
match TypeKind::from_ty(val.ty) {
TypeKind::Pointer(ty) => Some(Value {
ty: self.pointers[ty as usize],
loc: Loc::Deref(reg),
}),
_ => panic!("cant deref {:?}", val.ty),
}
}
E::BinOp { E::BinOp {
left: E::Ident { name, .. }, left: E::Ident { name, id, .. },
op: T::Decl, op: T::Decl,
right: E::Closure { right: E::Closure {
ret, body, args, .. ret, body, args, ..
}, },
} => { } => {
let frame = self.add_label(name); log::dbg!("fn: {}", name);
for (i, &(name, ty)) in args.iter().enumerate() { let frame = self.add_label(*id);
if *name == "main" {
self.main = Some(frame.label);
}
log::dbg!("fn-args");
for (i, arg) in args.iter().enumerate() {
let offset = self.alloc_stack(8); let offset = self.alloc_stack(8);
self.decl_var(name, offset, ty); let ty = self.ty(&arg.ty);
self.vars.push(Variable {
id: arg.id,
value: Value {
ty,
loc: Loc::Stack(offset),
},
});
self.store_stack(i as Reg + 2, offset, 8); self.store_stack(i as Reg + 2, offset, 8);
} }
self.gpa.init_callee(); self.gpa.init_callee();
self.ret = **ret; self.ret = self.ty(ret);
log::dbg!("fn-body");
self.expr(body, None); self.expr(body, None);
self.vars.clear(); self.vars.clear();
let stack = std::mem::take(&mut self.stack_size); log::dbg!("fn-relocs");
self.reloc_stack(stack); self.reloc_stack();
log::dbg!("fn-prelude");
self.write_fn_prelude(frame); self.write_fn_prelude(frame);
log::dbg!("fn-ret");
self.reloc_rets(); self.reloc_rets();
self.ret(); self.ret();
self.stack_size = 0;
self.vars.clear();
None None
} }
E::BinOp { E::BinOp {
left: E::Ident { name, .. }, left: E::Ident { id, .. },
op: T::Decl, op: T::Decl,
right, right,
} => { } => {
let val = self.expr(right, None).unwrap(); let val = self.expr(right, None).unwrap();
let reg = self.loc_to_reg(val.loc); let reg = self.loc_to_reg(val.loc);
let offset = self.alloc_stack(8); let offset = self.alloc_stack(8);
self.decl_var(name, offset, val.ty); self.vars.push(Variable {
self.store_stack(reg, offset, 8); id: *id,
value: Value {
ty: val.ty,
loc: Loc::Stack(offset),
},
});
self.store_stack(reg.0, offset, 8);
self.gpa.free(reg);
None None
} }
E::Call { E::Call {
func: E::Ident { name, .. }, func: E::Ident { id, .. },
args, args,
} => { } => {
for (i, arg) in args.iter().enumerate() { for (i, arg) in args.iter().enumerate() {
let arg = self.expr(arg, None).unwrap(); let arg = self.expr(arg, None).unwrap();
let reg = self.loc_to_reg(arg.loc); let reg = self.loc_to_reg(arg.loc);
self.code.encode(instrs::cp(i as Reg + 2, reg)); self.code.encode(instrs::cp(i as Reg + 2, reg.0));
self.gpa.free(reg);
} }
let func = self.get_or_reserve_label(name); let func = self.get_or_reserve_label(*id);
self.code.call(func); self.code.call(func);
let reg = self.gpa.allocate(); let reg = self.gpa.allocate();
self.code.encode(instrs::cp(reg, 1)); self.code.encode(instrs::cp(reg.0, 1));
Some(Value { Some(Value {
ty: self.ret, ty: self.ret,
loc: Loc::Reg(reg), loc: Loc::Reg(reg),
}) })
} }
E::Ident { name, .. } => { E::Ident { name, id, .. } => {
let var = self.vars.iter().find(|v| v.name.as_ref() == name).unwrap(); let var = self
.vars
.iter()
.find(|v| v.id == id)
.unwrap_or_else(|| panic!("variable not found: {:?}", name));
Some(Value { Some(Value {
ty: var.ty, ty: var.value.ty,
loc: Loc::Stack(var.offset), loc: var.value.loc.take_ref(),
}) })
} }
E::Return { val, .. } => { E::Return { val, .. } => {
if let Some(val) = val { if let Some(val) = val {
let val = self.expr(val, Some(self.ret)).unwrap(); let val = self.expr(val, Some(self.ret)).unwrap();
if val.ty != self.ret { if val.ty != self.ret {
//panic!("expected {:?}, got {:?}", self.ret, val.ty); panic!("expected {:?}, got {:?}", self.ret, val.ty);
} }
self.assign( self.assign(
Value { Value {
ty: self.ret, ty: self.ret,
loc: Loc::Reg(1), loc: Loc::RegRef(1),
}, },
val, val,
); );
@ -417,39 +544,33 @@ impl<'a> Codegen<'a> {
} }
E::Block { stmts, .. } => { E::Block { stmts, .. } => {
for stmt in stmts { for stmt in stmts {
self.expr(stmt, None); if let Some(Loc::Reg(reg)) = self.expr(stmt, None).map(|v| v.loc) {
self.gpa.free(reg);
}
} }
None None
} }
E::Number { value, .. } => Some(Value { E::Number { value, .. } => Some(Value {
ty: expeted.unwrap_or(Expr::Ident { ty: expeted.unwrap_or(bt::INT),
name: "int",
pos: 0,
}),
loc: Loc::Imm(value), loc: Loc::Imm(value),
}), }),
E::If { E::If {
cond, then, else_, .. cond, then, else_, ..
} => { } => {
let cond = self log::dbg!("if-cond");
.expr( let cond = self.expr(cond, Some(bt::BOOL)).unwrap();
cond,
Some(Expr::Ident {
name: "bool",
pos: 0,
}),
)
.unwrap();
let reg = self.loc_to_reg(cond.loc); let reg = self.loc_to_reg(cond.loc);
let jump_offset = self.code.code.len() as u32; let jump_offset = self.code.code.len() as u32;
self.code.encode(instrs::jeq(reg, 0, 0)); self.code.encode(instrs::jeq(reg.0, 0, 0));
self.gpa.free(reg); self.gpa.free(reg);
log::dbg!("if-then");
self.expr(then, None); self.expr(then, None);
let jump; let jump;
if let Some(else_) = else_ { if let Some(else_) = else_ {
log::dbg!("if-else");
let else_jump_offset = self.code.code.len() as u32; let else_jump_offset = self.code.code.len() as u32;
self.code.encode(instrs::jmp(0)); self.code.encode(instrs::jmp(0));
@ -458,18 +579,21 @@ impl<'a> Codegen<'a> {
self.expr(else_, None); self.expr(else_, None);
let jump = self.code.code.len() as i32 - else_jump_offset as i32; let jump = self.code.code.len() as i32 - else_jump_offset as i32;
log::dbg!("if-else-jump: {}", jump);
self.code.code[else_jump_offset as usize + 1..][..4] self.code.code[else_jump_offset as usize + 1..][..4]
.copy_from_slice(&jump.to_ne_bytes()); .copy_from_slice(&jump.to_ne_bytes());
} else { } else {
jump = self.code.code.len() as i16 - jump_offset as i16; jump = self.code.code.len() as i16 - jump_offset as i16;
} }
log::dbg!("if-then-jump: {}", jump);
self.code.code[jump_offset as usize + 3..][..2] self.code.code[jump_offset as usize + 3..][..2]
.copy_from_slice(&jump.to_ne_bytes()); .copy_from_slice(&jump.to_ne_bytes());
None None
} }
E::Loop { body, .. } => { E::Loop { body, .. } => {
log::dbg!("loop");
let loop_start = self.code.code.len() as u32; let loop_start = self.code.code.len() as u32;
self.loops.push(Loop { self.loops.push(Loop {
offset: loop_start, offset: loop_start,
@ -477,6 +601,7 @@ impl<'a> Codegen<'a> {
}); });
self.expr(body, None); self.expr(body, None);
log::dbg!("loop-end");
let loop_end = self.code.code.len(); let loop_end = self.code.code.len();
self.code self.code
.encode(instrs::jmp(loop_start as i32 - loop_end as i32)); .encode(instrs::jmp(loop_start as i32 - loop_end as i32));
@ -514,6 +639,9 @@ impl<'a> Codegen<'a> {
E::BinOp { left, op, right } => { E::BinOp { left, op, right } => {
let left = self.expr(left, expeted).unwrap(); let left = self.expr(left, expeted).unwrap();
let right = self.expr(right, Some(left.ty)).unwrap(); let right = self.expr(right, Some(left.ty)).unwrap();
if op == T::Assign {
return self.assign(left, right);
}
let lhs = self.loc_to_reg(left.loc); let lhs = self.loc_to_reg(left.loc);
let rhs = self.loc_to_reg(right.loc); let rhs = self.loc_to_reg(right.loc);
@ -523,36 +651,29 @@ impl<'a> Codegen<'a> {
T::Minus => instrs::sub64, T::Minus => instrs::sub64,
T::Star => instrs::mul64, T::Star => instrs::mul64,
T::Le => { T::Le => {
self.code.encode(instrs::cmpu(lhs, lhs, rhs)); self.code.encode(instrs::cmpu(lhs.0, lhs.0, rhs.0));
self.gpa.free(rhs); self.gpa.free(rhs);
self.code.encode(instrs::cmpui(lhs, lhs, 1)); self.code.encode(instrs::cmpui(lhs.0, lhs.0, 1));
return Some(Value { return Some(Value {
ty: Expr::Ident { ty: bt::BOOL,
name: "bool",
pos: 0,
},
loc: Loc::Reg(lhs), loc: Loc::Reg(lhs),
}); });
} }
T::Eq => { T::Eq => {
self.code.encode(instrs::cmpu(lhs, lhs, rhs)); self.code.encode(instrs::cmpu(lhs.0, lhs.0, rhs.0));
self.gpa.free(rhs); self.gpa.free(rhs);
self.code.encode(instrs::cmpui(lhs, lhs, 0)); self.code.encode(instrs::cmpui(lhs.0, lhs.0, 0));
self.code.encode(instrs::not(lhs, lhs)); self.code.encode(instrs::not(lhs.0, lhs.0));
return Some(Value { return Some(Value {
ty: Expr::Ident { ty: bt::BOOL,
name: "bool",
pos: 0,
},
loc: Loc::Reg(lhs), loc: Loc::Reg(lhs),
}); });
} }
T::FSlash => |reg0, reg1, reg2| instrs::diru64(reg0, ZERO, reg1, reg2), T::FSlash => |reg0, reg1, reg2| instrs::diru64(reg0, ZERO, reg1, reg2),
T::Assign => return self.assign(left, right),
_ => unimplemented!("{:#?}", op), _ => unimplemented!("{:#?}", op),
}; };
self.code.encode(op(lhs, lhs, rhs)); self.code.encode(op(lhs.0, lhs.0, rhs.0));
self.gpa.free(rhs); self.gpa.free(rhs);
Some(Value { Some(Value {
@ -564,32 +685,33 @@ impl<'a> Codegen<'a> {
} }
} }
fn assign(&mut self, left: Value<'a>, right: Value<'a>) -> Option<Value<'a>> { fn assign(&mut self, left: Value, right: Value) -> Option<Value> {
let rhs = self.loc_to_reg(right.loc); let rhs = self.loc_to_reg(right.loc);
match left.loc { match left.loc {
Loc::Reg(reg) => self.code.encode(instrs::cp(reg, rhs)), Loc::Deref(reg) => {
Loc::Stack(offset) => self.store_stack(rhs, offset, 8), self.code.encode(instrs::st(rhs.0, reg.0, 0, 8));
self.gpa.free(reg);
}
Loc::RegRef(reg) => self.code.encode(instrs::cp(reg, rhs.0)),
Loc::Stack(offset) => self.store_stack(rhs.0, offset, 8),
_ => unimplemented!(), _ => unimplemented!(),
} }
self.gpa.free(rhs); self.gpa.free(rhs);
Some(left) None
} }
fn get_or_reserve_label(&mut self, name: &str) -> LabelId { fn get_or_reserve_label(&mut self, name: Ident) -> LabelId {
if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) { if let Some(label) = self.labels.iter().position(|l| l.name == name) {
label as u32 label as u32
} else { } else {
self.labels.push(FnLabel { self.labels.push(FnLabel { offset: 0, name });
offset: 0,
name: name.into(),
});
self.labels.len() as u32 - 1 self.labels.len() as u32 - 1
} }
} }
fn add_label(&mut self, name: &str) -> Frame { fn add_label(&mut self, name: Ident) -> Frame {
let offset = self.code.code.len() as u32; let offset = self.code.code.len() as u32;
let label = if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) { let label = if let Some(label) = self.labels.iter().position(|l| l.name == name) {
self.labels[label].offset = offset; self.labels[label].offset = offset;
label as u32 label as u32
} else { } else {
@ -607,11 +729,8 @@ impl<'a> Codegen<'a> {
} }
} }
fn get_label(&self, name: &str) -> LabelId { fn get_label(&self, name: Ident) -> LabelId {
self.labels self.labels.iter().position(|l| l.name == name).unwrap() as _
.iter()
.position(|l| l.name.as_ref() == name)
.unwrap() as _
} }
fn write_fn_prelude(&mut self, frame: Frame) { fn write_fn_prelude(&mut self, frame: Frame) {
@ -619,7 +738,7 @@ impl<'a> Codegen<'a> {
for &reg in self.gpa.used.clone().iter() { for &reg in self.gpa.used.clone().iter() {
self.temp.push(reg, 8); self.temp.push(reg, 8);
} }
self.temp.subi64(STACK_PTR, STACK_PTR, self.stack_size as _); self.temp.subi64(STACK_PTR, STACK_PTR, self.stack_size);
for reloc in &mut self.code.relocs[frame.prev_relocs..] { for reloc in &mut self.code.relocs[frame.prev_relocs..] {
reloc.offset += self.temp.code.len() as u32; reloc.offset += self.temp.code.len() as u32;
@ -637,7 +756,7 @@ impl<'a> Codegen<'a> {
fn ret(&mut self) { fn ret(&mut self) {
self.code self.code
.encode(instrs::addi64(STACK_PTR, STACK_PTR, self.stack_size as _)); .encode(instrs::addi64(STACK_PTR, STACK_PTR, self.stack_size));
for reg in self.gpa.used.clone().iter().rev() { for reg in self.gpa.used.clone().iter().rev() {
self.code.pop(*reg, 8); self.code.pop(*reg, 8);
} }
@ -645,7 +764,7 @@ impl<'a> Codegen<'a> {
} }
pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> { pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> {
self.temp.prelude(self.get_label("main")); self.temp.prelude(self.main.unwrap());
self.temp self.temp
.relocate(&self.labels, self.temp.code.len() as i64); .relocate(&self.labels, self.temp.code.len() as i64);
@ -654,26 +773,42 @@ impl<'a> Codegen<'a> {
out.write_all(&self.code.code) out.write_all(&self.code.code)
} }
fn decl_var(&mut self, name: &str, offset: u64, ty: Expr<'a>) { fn alloc_pointer(&mut self, ty: Type) -> Type {
self.vars.push(Variable { let ty = self
name: name.into(), .pointers
offset, .iter()
ty, .position(|&p| p == ty)
.unwrap_or_else(|| {
self.pointers.push(ty);
self.pointers.len() - 1
}); });
TypeKind::Pointer(ty as Type).encode()
} }
} }
pub struct Value<'a> { pub struct Value {
ty: Expr<'a>, ty: Type,
loc: Loc, loc: Loc,
} }
#[derive(Clone, Copy)] #[derive(Debug)]
pub enum Loc { enum Loc {
Reg(Reg), Reg(LinReg),
RegRef(Reg),
Deref(LinReg),
Imm(u64), Imm(u64),
Stack(u64), Stack(u64),
} }
impl Loc {
fn take_ref(&self) -> Loc {
match self {
Self::Reg(reg) => Self::RegRef(reg.0),
Self::Stack(off) => Self::Stack(*off),
_ => unreachable!(),
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
@ -738,8 +873,7 @@ mod tests {
fn generate(input: &'static str, output: &mut String) { fn generate(input: &'static str, output: &mut String) {
let path = std::path::Path::new("test"); let path = std::path::Path::new("test");
let arena = crate::parser::Arena::default(); let arena = crate::parser::Arena::default();
let mut buffer = Vec::new(); let mut parser = super::parser::Parser::new(input, path, &arena);
let mut parser = super::parser::Parser::new(input, path, &arena, &mut buffer);
let exprs = parser.file(); let exprs = parser.file();
let mut codegen = super::Codegen::new(); let mut codegen = super::Codegen::new();
codegen.file(path, &exprs).unwrap(); codegen.file(path, &exprs).unwrap();
@ -749,7 +883,7 @@ mod tests {
std::fs::write("test.bin", &out).unwrap(); std::fs::write("test.bin", &out).unwrap();
use std::fmt::Write; use std::fmt::Write;
let mut stack = [0_u64; 1024]; let mut stack = [0_u64; 128];
let mut vm = unsafe { let mut vm = unsafe {
hbvm::Vm::<TestMem, 0>::new(TestMem, hbvm::mem::Address::new(out.as_ptr() as u64)) hbvm::Vm::<TestMem, 0>::new(TestMem, hbvm::mem::Address::new(out.as_ptr() as u64))
@ -779,5 +913,7 @@ mod tests {
functions => include_str!("../examples/functions.hb"); functions => include_str!("../examples/functions.hb");
if_statements => include_str!("../examples/if_statement.hb"); if_statements => include_str!("../examples/if_statement.hb");
loops => include_str!("../examples/loops.hb"); loops => include_str!("../examples/loops.hb");
fb_driver => include_str!("../examples/fb_driver.hb");
pointers => include_str!("../examples/pointers.hb");
} }
} }

View file

@ -0,0 +1,21 @@
pub type Ident = u32;
const LEN_BITS: u32 = 6;
pub fn len(ident: Ident) -> u32 {
ident & ((1 << LEN_BITS) - 1)
}
pub fn pos(ident: Ident) -> u32 {
ident >> LEN_BITS
}
pub fn new(pos: u32, len: u32) -> Ident {
debug_assert!(len < (1 << LEN_BITS));
(pos << LEN_BITS) | len
}
pub fn range(ident: Ident) -> std::ops::Range<usize> {
let (len, pos) = (len(ident) as usize, pos(ident) as usize);
pos..pos + len
}

View file

@ -9,6 +9,10 @@ impl Token {
pub fn range(&self) -> std::ops::Range<usize> { pub fn range(&self) -> std::ops::Range<usize> {
self.start as usize..self.end as usize self.start as usize..self.end as usize
} }
pub fn len(&self) -> u32 {
self.end - self.start
}
} }
macro_rules! gen_token_kind { macro_rules! gen_token_kind {
@ -102,6 +106,8 @@ gen_token_kind! {
#[prec = 21] #[prec = 21]
Le = "<=", Le = "<=",
Eq = "==", Eq = "==",
#[prec = 22]
Amp = "&",
#[prec = 23] #[prec = 23]
Plus = "+", Plus = "+",
Minus = "-", Minus = "-",
@ -124,8 +130,8 @@ impl<'a> Lexer<'a> {
} }
} }
pub fn slice(&self, tok: Token) -> &'a str { pub fn slice(&self, tok: std::ops::Range<usize>) -> &'a str {
unsafe { std::str::from_utf8_unchecked(&self.bytes[tok.range()]) } unsafe { std::str::from_utf8_unchecked(&self.bytes[tok]) }
} }
fn peek(&self) -> Option<u8> { fn peek(&self) -> Option<u8> {
@ -204,6 +210,7 @@ impl<'a> Iterator for Lexer<'a> {
b'-' => T::Minus, b'-' => T::Minus,
b'*' => T::Star, b'*' => T::Star,
b'/' => T::FSlash, b'/' => T::FSlash,
b'&' => T::Amp,
b'(' => T::LParen, b'(' => T::LParen,
b')' => T::RParen, b')' => T::RParen,
b'{' => T::LBrace, b'{' => T::LBrace,

View file

@ -1,22 +1,27 @@
use std::{cell::Cell, ops::Not, ptr::NonNull}; use std::{cell::Cell, ops::Not, ptr::NonNull};
use crate::lexer::{Lexer, Token, TokenKind}; use crate::{
codegen::bt,
ident::{self, Ident},
lexer::{Lexer, Token, TokenKind},
};
struct ScopeIdent<'a> {
ident: Ident,
declared: bool,
last: &'a Cell<bool>,
}
pub struct Parser<'a, 'b> { pub struct Parser<'a, 'b> {
path: &'a std::path::Path, path: &'a std::path::Path,
lexer: Lexer<'a>, lexer: Lexer<'a>,
arena: &'b Arena<'a>, arena: &'b Arena<'a>,
expr_buf: &'b mut Vec<Expr<'a>>,
token: Token, token: Token,
idents: Vec<ScopeIdent<'a>>,
} }
impl<'a, 'b> Parser<'a, 'b> { impl<'a, 'b> Parser<'a, 'b> {
pub fn new( pub fn new(input: &'a str, path: &'a std::path::Path, arena: &'b Arena<'a>) -> Self {
input: &'a str,
path: &'a std::path::Path,
arena: &'b Arena<'a>,
expr_buf: &'b mut Vec<Expr<'static>>,
) -> Self {
let mut lexer = Lexer::new(input); let mut lexer = Lexer::new(input);
let token = lexer.next(); let token = lexer.next();
Self { Self {
@ -24,13 +29,30 @@ impl<'a, 'b> Parser<'a, 'b> {
token, token,
path, path,
arena, arena,
// we ensure its empty before returning form parse idents: Vec::new(),
expr_buf: unsafe { std::mem::transmute(expr_buf) },
} }
} }
pub fn file(&mut self) -> &'a [Expr<'a>] { pub fn file(&mut self) -> &'a [Expr<'a>] {
self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr())) let f = self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr()));
self.pop_scope(0);
let has_undeclared = !self.idents.is_empty();
for id in self.idents.drain(..) {
let (line, col) = self.lexer.line_col(ident::pos(id.ident));
eprintln!(
"{}:{}:{} => undeclared identifier: {}",
self.path.display(),
line,
col,
self.lexer.slice(ident::range(id.ident))
);
}
if has_undeclared {
unreachable!();
}
f
} }
fn next(&mut self) -> Token { fn next(&mut self) -> Token {
@ -69,14 +91,59 @@ impl<'a, 'b> Parser<'a, 'b> {
left left
} }
fn try_resolve_builtin(name: &str) -> Option<Ident> {
// FIXME: we actually do this the second time in the codegen
Some(match name {
"int" => bt::INT,
"bool" => bt::BOOL,
_ => return None,
})
}
fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, Option<&'a Cell<bool>>) {
let name = self.lexer.slice(token.range());
if let Some(builtin) = Self::try_resolve_builtin(name) {
return (builtin, None);
}
let last = self.arena.alloc(Cell::new(false));
let id = match self
.idents
.iter_mut()
.rfind(|elem| self.lexer.slice(ident::range(elem.ident)) == name)
{
Some(elem) if decl && elem.declared => {
self.report(format_args!("redeclaration of identifier: {name}"))
}
Some(elem) => elem,
None => {
let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent {
ident: id,
declared: false,
last,
});
self.idents.last_mut().unwrap()
}
};
id.last = last;
id.declared |= decl;
(id.ident, Some(last))
}
fn unit_expr(&mut self) -> Expr<'a> { fn unit_expr(&mut self) -> Expr<'a> {
use {Expr as E, TokenKind as T}; use {Expr as E, TokenKind as T};
let frame = self.idents.len();
let token = self.next(); let token = self.next();
let mut expr = match token.kind { let mut expr = match token.kind {
T::Ident => E::Ident { T::Ident => {
pos: token.start, let (id, last) = self.resolve_ident(token, self.token.kind == T::Decl);
name: self.arena.alloc_str(self.lexer.slice(token)), let name = self.lexer.slice(token.range());
}, E::Ident { name, id, last }
}
T::If => E::If { T::If => E::If {
pos: token.start, pos: token.start,
cond: self.ptr_expr(), cond: self.ptr_expr(),
@ -99,10 +166,14 @@ impl<'a, 'b> Parser<'a, 'b> {
self.expect_advance(T::LParen); self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, |s| { self.collect_list(T::Comma, T::RParen, |s| {
let name = s.expect_advance(T::Ident); let name = s.expect_advance(T::Ident);
let name = s.arena.alloc_str(s.lexer.slice(name)); let (id, last) = s.resolve_ident(name, true);
s.expect_advance(T::Colon); s.expect_advance(T::Colon);
let val = s.expr(); Arg {
(name, val) name: s.lexer.slice(name.range()),
id,
last,
ty: s.expr(),
}
}) })
}, },
ret: { ret: {
@ -111,13 +182,18 @@ impl<'a, 'b> Parser<'a, 'b> {
}, },
body: self.ptr_expr(), body: self.ptr_expr(),
}, },
T::Amp | T::Star => E::UnOp {
pos: token.start,
op: token.kind,
val: self.ptr_unit_expr(),
},
T::LBrace => E::Block { T::LBrace => E::Block {
pos: token.start, pos: token.start,
stmts: self.collect_list(T::Semi, T::RBrace, Self::expr), stmts: self.collect_list(T::Semi, T::RBrace, Self::expr),
}, },
T::Number => E::Number { T::Number => E::Number {
pos: token.start, pos: token.start,
value: match self.lexer.slice(token).parse() { value: match self.lexer.slice(token.range()).parse() {
Ok(value) => value, Ok(value) => value,
Err(e) => self.report(format_args!("invalid number: {e}")), Err(e) => self.report(format_args!("invalid number: {e}")),
}, },
@ -132,22 +208,46 @@ impl<'a, 'b> Parser<'a, 'b> {
loop { loop {
expr = match self.token.kind { expr = match self.token.kind {
TokenKind::LParen => { T::LParen => {
self.next(); self.next();
Expr::Call { Expr::Call {
func: self.arena.alloc(expr), func: self.arena.alloc(expr),
args: self.collect_list(TokenKind::Comma, TokenKind::RParen, Self::expr), args: self.collect_list(T::Comma, T::RParen, Self::expr),
} }
} }
_ => break, _ => break,
} }
} }
self.advance_if(TokenKind::Semi); if matches!(token.kind, T::Return) {
self.expect_advance(T::Semi);
}
if matches!(token.kind, T::Loop | T::LBrace | T::Fn) {
self.pop_scope(frame);
}
expr expr
} }
fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame;
for i in frame..self.idents.len() {
if !self.idents[i].declared {
self.idents.swap(i, undeclared_count);
undeclared_count += 1;
}
}
for id in self.idents.drain(undeclared_count..) {
id.last.set(true);
}
}
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
self.arena.alloc(self.unit_expr())
}
fn collect_list<T: Copy>( fn collect_list<T: Copy>(
&mut self, &mut self,
delim: TokenKind, delim: TokenKind,
@ -194,6 +294,14 @@ impl<'a, 'b> Parser<'a, 'b> {
} }
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Arg<'a> {
pub name: &'a str,
pub id: Ident,
pub last: Option<&'a Cell<bool>>,
pub ty: Expr<'a>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Expr<'a> { pub enum Expr<'a> {
Break { Break {
@ -204,7 +312,7 @@ pub enum Expr<'a> {
}, },
Closure { Closure {
pos: u32, pos: u32,
args: &'a [(&'a str, Expr<'a>)], args: &'a [Arg<'a>],
ret: &'a Expr<'a>, ret: &'a Expr<'a>,
body: &'a Expr<'a>, body: &'a Expr<'a>,
}, },
@ -217,8 +325,9 @@ pub enum Expr<'a> {
val: Option<&'a Expr<'a>>, val: Option<&'a Expr<'a>>,
}, },
Ident { Ident {
pos: u32,
name: &'a str, name: &'a str,
id: Ident,
last: Option<&'a Cell<bool>>,
}, },
Block { Block {
pos: u32, pos: u32,
@ -243,6 +352,11 @@ pub enum Expr<'a> {
pos: u32, pos: u32,
body: &'a Expr<'a>, body: &'a Expr<'a>,
}, },
UnOp {
pos: u32,
op: TokenKind,
val: &'a Expr<'a>,
},
} }
impl<'a> std::fmt::Display for Expr<'a> { impl<'a> std::fmt::Display for Expr<'a> {
@ -252,6 +366,7 @@ impl<'a> std::fmt::Display for Expr<'a> {
} }
match *self { match *self {
Self::UnOp { op, val, .. } => write!(f, "{}{}", op, val),
Self::Break { .. } => write!(f, "break;"), Self::Break { .. } => write!(f, "break;"),
Self::Continue { .. } => write!(f, "continue;"), Self::Continue { .. } => write!(f, "continue;"),
Self::If { Self::If {
@ -269,11 +384,11 @@ impl<'a> std::fmt::Display for Expr<'a> {
} => { } => {
write!(f, "|")?; write!(f, "|")?;
let first = &mut true; let first = &mut true;
for (name, val) in args { for arg in args {
if !std::mem::take(first) { if !std::mem::take(first) {
write!(f, ", ")?; write!(f, ", ")?;
} }
write!(f, "{}: {}", name, val)?; write!(f, "{}: {}", arg.name, arg.ty)?;
} }
write!(f, "|: {} {}", ret, body) write!(f, "|: {} {}", ret, body)
} }
@ -501,9 +616,7 @@ mod tests {
fn parse(input: &'static str, output: &mut String) { fn parse(input: &'static str, output: &mut String) {
use std::fmt::Write; use std::fmt::Write;
let mut arena = super::Arena::default(); let mut arena = super::Arena::default();
let mut buffer = Vec::new(); let mut parser = super::Parser::new(input, std::path::Path::new("test"), &arena);
let mut parser =
super::Parser::new(input, std::path::Path::new("test"), &arena, &mut buffer);
for expr in parser.file() { for expr in parser.file() {
writeln!(output, "{}", expr).unwrap(); writeln!(output, "{}", expr).unwrap();
} }

Binary file not shown.

View file

@ -0,0 +1,2 @@
ret: 0
status: Ok(())

View file

@ -0,0 +1,2 @@
ret: 0
status: Ok(())