From 86013a50a43d196adba0af5857504452716b58b6 Mon Sep 17 00:00:00 2001 From: mlokr Date: Sun, 12 May 2024 11:52:58 +0200 Subject: [PATCH] identifiers are now properly checked --- hblang/examples/fb_driver.hb | 26 +- hblang/examples/pointers.hb | 6 + hblang/foo.b | 5 + hblang/src/codegen.rs | 382 ++++++++++++------ hblang/src/ident.rs | 21 + hblang/src/lexer.rs | 11 +- hblang/src/parser.rs | 179 ++++++-- hblang/test.bin | Bin 466 -> 1396 bytes .../tests/hblang_codegen_tests_fb_driver.txt | 2 + .../tests/hblang_codegen_tests_pointers.txt | 2 + 10 files changed, 462 insertions(+), 172 deletions(-) create mode 100644 hblang/examples/pointers.hb create mode 100644 hblang/foo.b create mode 100644 hblang/tests/hblang_codegen_tests_fb_driver.txt create mode 100644 hblang/tests/hblang_codegen_tests_pointers.txt diff --git a/hblang/examples/fb_driver.hb b/hblang/examples/fb_driver.hb index ce3cdaf..bcbd5df 100644 --- a/hblang/examples/fb_driver.hb +++ b/hblang/examples/fb_driver.hb @@ -1,33 +1,31 @@ -arm_fb_ptr := ||:int return 100; -x86_fb_ptr := ||:int return 100; +arm_fb_ptr := fn(): int return 100; +x86_fb_ptr := fn(): int return 100; -check_platform:= ||: int { - +check_platform := fn(): int { return x86_fb_ptr(); } -set_pixel := |x: int, y: int, r: u8, g: u8, b: u8|: int := { +set_pixel := fn(x: int, y: int, width: int): int { pix_offset := y * width + x; return 0; } -main := ||: int { - fb_ptr := check_platform(); - - width := 1024; - height := 768; +main := fn(): int { + fb_ptr := check_platform(); + width := 100; + height := 30; x:= 0; y:= 0; loop { if x <= height + 1 { - set_pixel(x,y,100,100,100); - x= x + 1; + set_pixel(x,y,width); + x = x + 1; } else { - set_pixel(x,y,100,100,100); - x := 0; + set_pixel(x,y,width); + x = 0; y = y + 1; } if y == width { diff --git a/hblang/examples/pointers.hb b/hblang/examples/pointers.hb new file mode 100644 index 0000000..0a4b7d0 --- /dev/null +++ b/hblang/examples/pointers.hb @@ -0,0 +1,6 @@ +main := fn(): int { + a := 1; + b := &a; + *b = 2; + return a - 2; +} diff --git a/hblang/foo.b b/hblang/foo.b new file mode 100644 index 0000000..44d91e8 --- /dev/null +++ b/hblang/foo.b @@ -0,0 +1,5 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 14 filtered out; finished in 0.00s + diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index 5db0a6d..129b582 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -1,17 +1,33 @@ +use crate::ident::Ident; + use { crate::{ instrs, lexer, log, - parser::{self, Expr}, + parser::{self}, }, std::rc::Rc, }; +use {lexer::TokenKind as T, parser::Expr as E}; + type LabelId = u32; type Reg = u8; type MaskElem = u64; type Type = u32; -mod bt { +#[derive(Debug)] +struct LinReg(Reg); + +#[cfg(debug_assertions)] +impl Drop for LinReg { + fn drop(&mut self) { + if !std::thread::panicking() { + panic!("reg leaked"); + } + } +} + +pub mod bt { use super::*; const fn builtin_type(id: u32) -> Type { @@ -20,30 +36,44 @@ mod bt { macro_rules! builtin_type { ($($name:ident;)*) => {$( - pub const $name: Type = builtin_type(${index(0)}); + pub const $name: Type = TypeKind::Builtin(${index(0)}).encode(); )*}; } builtin_type! { + VOID; + UNREACHABLE; INT; BOOL; - MAX; } } +#[derive(Debug)] enum TypeKind { Builtin(Type), Struct(Type), + Pointer(Type), } impl TypeKind { - fn from_ty(ty: Type) -> Self { - if ty > bt::MAX { - Self::Builtin(ty) - } else { - Self::Struct(ty) + const fn from_ty(ty: Type) -> Self { + let (flag, index) = (ty & 0b11, ty >> 2); + match flag { + 0 => Self::Builtin(index), + 1 => Self::Pointer(index), + 2 => Self::Struct(index), + _ => unreachable!(), } } + + const fn encode(self) -> Type { + let (index, flag) = match self { + Self::Builtin(index) => (index, 0), + Self::Pointer(index) => (index, 1), + Self::Struct(index) => (index, 2), + }; + index << 2 | flag + } } const STACK_PTR: Reg = 254; @@ -144,7 +174,7 @@ impl Func { } + shift; log::dbg!( - label.name.as_ref(), + label.name, offset, reloc.size, reloc.instr_offset, @@ -183,16 +213,17 @@ impl RegAlloc { self.used.clear(); } - fn allocate(&mut self) -> Reg { + fn allocate(&mut self) -> LinReg { let reg = self.free.pop().expect("TODO: we need to spill"); if self.used.binary_search_by_key(&!reg, |&r| !r).is_err() { self.used.push(reg); } - reg + LinReg(reg) } - fn free(&mut self, reg: Reg) { - self.free.push(reg); + fn free(&mut self, reg: LinReg) { + self.free.push(reg.0); + std::mem::forget(reg); } } @@ -200,13 +231,12 @@ struct FnLabel { offset: u32, // TODO: use different stile of identifier that does not allocate, eg. index + length into a // file - name: Rc, + name: Ident, } -struct Variable<'a> { - name: Rc, - offset: u64, - ty: Expr<'a>, +struct Variable { + id: Ident, + value: Value, } struct RetReloc { @@ -227,35 +257,38 @@ struct Struct { pub struct Codegen<'a> { path: &'a std::path::Path, - ret: Expr<'a>, + ret: Type, gpa: RegAlloc, code: Func, temp: Func, labels: Vec, stack_size: u64, - vars: Vec>, + vars: Vec, stack_relocs: Vec, ret_relocs: Vec, loops: Vec, records: Vec, + pointers: Vec, + main: Option, } impl<'a> Codegen<'a> { pub fn new() -> Self { Self { - path: std::path::Path::new(""), - ret: Expr::Return { val: None, pos: 0 }, - gpa: Default::default(), - code: Default::default(), - temp: Default::default(), - labels: Default::default(), + path: std::path::Path::new(""), + ret: bt::VOID, + gpa: Default::default(), + code: Default::default(), + temp: Default::default(), + labels: Default::default(), stack_size: 0, - vars: Default::default(), - + vars: Default::default(), stack_relocs: Default::default(), - ret_relocs: Default::default(), - loops: Default::default(), - records: Default::default(), + ret_relocs: Default::default(), + loops: Default::default(), + records: Default::default(), + pointers: Default::default(), + main: None, } } @@ -271,17 +304,42 @@ impl<'a> Codegen<'a> { Ok(()) } - fn loc_to_reg(&mut self, loc: Loc) -> Reg { + fn size_of(&self, ty: Type) -> u64 { + // TODO: proper alignment + match TypeKind::from_ty(ty) { + TypeKind::Pointer(_) | TypeKind::Builtin(bt::INT) => 8, + TypeKind::Builtin(bt::BOOL) => 1, + TypeKind::Builtin(_) => unreachable!(), + TypeKind::Struct(ty) => self.records[ty as usize] + .fields + .iter() + .map(|(_, ty)| self.size_of(*ty)) + .sum(), + } + } + + fn loc_to_reg(&mut self, loc: Loc) -> LinReg { match loc { + Loc::RegRef(rr) => { + let reg = self.gpa.allocate(); + self.code.encode(instrs::cp(reg.0, rr)); + reg + } Loc::Reg(reg) => reg, + Loc::Deref(dreg) => { + let reg = self.gpa.allocate(); + self.code.encode(instrs::ld(reg.0, dreg.0, 0, 8)); + self.gpa.free(dreg); + reg + } Loc::Imm(imm) => { let reg = self.gpa.allocate(); - self.code.encode(instrs::li64(reg, imm)); + self.code.encode(instrs::li64(reg.0, imm)); reg } Loc::Stack(offset) => { let reg = self.gpa.allocate(); - self.load_stack(reg, offset, 8); + self.load_stack(reg.0, offset, 8); reg } } @@ -309,11 +367,11 @@ impl<'a> Codegen<'a> { self.code.encode(instrs::ld(reg, STACK_PTR, offset, size)); } - fn reloc_stack(&mut self, stack_height: u64) { + fn reloc_stack(&mut self) { for reloc in self.stack_relocs.drain(..) { let dest = &mut self.code.code[reloc.offset as usize..][..reloc.size as usize]; let value = u64::from_ne_bytes(dest.try_into().unwrap()); - let offset = stack_height - value; + let offset = self.stack_size - value; dest.copy_from_slice(&offset.to_ne_bytes()); } } @@ -329,80 +387,149 @@ impl<'a> Codegen<'a> { } } - fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option>) -> Option> { - use {lexer::TokenKind as T, parser::Expr as E}; + fn ty(&mut self, expr: &parser::Expr<'a>) -> Type { match *expr { + E::Ident { name: "int", .. } => bt::INT, + E::Ident { name: "bool", .. } => bt::BOOL, + expr => unimplemented!("type: {:#?}", expr), + } + } + + fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option) -> Option { + match *expr { + E::UnOp { + op: T::Amp, val, .. + } => { + let val = self.expr(val, None).unwrap(); + match val.loc { + Loc::Stack(off) => { + let reg = self.gpa.allocate(); + self.stack_relocs.push(StackReloc { + offset: self.code.code.len() as u32 + 3, + size: 8, + }); + self.code.encode(instrs::addi64(reg.0, STACK_PTR, off)); + Some(Value { + ty: self.alloc_pointer(val.ty), + loc: Loc::Reg(reg), + }) + } + l => panic!("cant take pointer of {:?}", l), + } + } + E::UnOp { + op: T::Star, val, .. + } => { + let val = self.expr(val, None).unwrap(); + let reg = self.loc_to_reg(val.loc); + match TypeKind::from_ty(val.ty) { + TypeKind::Pointer(ty) => Some(Value { + ty: self.pointers[ty as usize], + loc: Loc::Deref(reg), + }), + _ => panic!("cant deref {:?}", val.ty), + } + } E::BinOp { - left: E::Ident { name, .. }, + left: E::Ident { name, id, .. }, op: T::Decl, right: E::Closure { ret, body, args, .. }, } => { - let frame = self.add_label(name); - for (i, &(name, ty)) in args.iter().enumerate() { + log::dbg!("fn: {}", name); + let frame = self.add_label(*id); + if *name == "main" { + self.main = Some(frame.label); + } + log::dbg!("fn-args"); + for (i, arg) in args.iter().enumerate() { let offset = self.alloc_stack(8); - self.decl_var(name, offset, ty); + let ty = self.ty(&arg.ty); + self.vars.push(Variable { + id: arg.id, + value: Value { + ty, + loc: Loc::Stack(offset), + }, + }); self.store_stack(i as Reg + 2, offset, 8); } self.gpa.init_callee(); - self.ret = **ret; + self.ret = self.ty(ret); + log::dbg!("fn-body"); self.expr(body, None); self.vars.clear(); - let stack = std::mem::take(&mut self.stack_size); - self.reloc_stack(stack); + log::dbg!("fn-relocs"); + self.reloc_stack(); + log::dbg!("fn-prelude"); self.write_fn_prelude(frame); + log::dbg!("fn-ret"); self.reloc_rets(); self.ret(); + self.stack_size = 0; + self.vars.clear(); None } E::BinOp { - left: E::Ident { name, .. }, + left: E::Ident { id, .. }, op: T::Decl, right, } => { let val = self.expr(right, None).unwrap(); let reg = self.loc_to_reg(val.loc); let offset = self.alloc_stack(8); - self.decl_var(name, offset, val.ty); - self.store_stack(reg, offset, 8); + self.vars.push(Variable { + id: *id, + value: Value { + ty: val.ty, + loc: Loc::Stack(offset), + }, + }); + self.store_stack(reg.0, offset, 8); + self.gpa.free(reg); None } E::Call { - func: E::Ident { name, .. }, + func: E::Ident { id, .. }, args, } => { for (i, arg) in args.iter().enumerate() { let arg = self.expr(arg, None).unwrap(); let reg = self.loc_to_reg(arg.loc); - self.code.encode(instrs::cp(i as Reg + 2, reg)); + self.code.encode(instrs::cp(i as Reg + 2, reg.0)); + self.gpa.free(reg); } - let func = self.get_or_reserve_label(name); + let func = self.get_or_reserve_label(*id); self.code.call(func); let reg = self.gpa.allocate(); - self.code.encode(instrs::cp(reg, 1)); + self.code.encode(instrs::cp(reg.0, 1)); Some(Value { ty: self.ret, loc: Loc::Reg(reg), }) } - E::Ident { name, .. } => { - let var = self.vars.iter().find(|v| v.name.as_ref() == name).unwrap(); + E::Ident { name, id, .. } => { + let var = self + .vars + .iter() + .find(|v| v.id == id) + .unwrap_or_else(|| panic!("variable not found: {:?}", name)); Some(Value { - ty: var.ty, - loc: Loc::Stack(var.offset), + ty: var.value.ty, + loc: var.value.loc.take_ref(), }) } E::Return { val, .. } => { if let Some(val) = val { let val = self.expr(val, Some(self.ret)).unwrap(); if val.ty != self.ret { - //panic!("expected {:?}, got {:?}", self.ret, val.ty); + panic!("expected {:?}, got {:?}", self.ret, val.ty); } self.assign( Value { ty: self.ret, - loc: Loc::Reg(1), + loc: Loc::RegRef(1), }, val, ); @@ -417,39 +544,33 @@ impl<'a> Codegen<'a> { } E::Block { stmts, .. } => { for stmt in stmts { - self.expr(stmt, None); + if let Some(Loc::Reg(reg)) = self.expr(stmt, None).map(|v| v.loc) { + self.gpa.free(reg); + } } None } E::Number { value, .. } => Some(Value { - ty: expeted.unwrap_or(Expr::Ident { - name: "int", - pos: 0, - }), + ty: expeted.unwrap_or(bt::INT), loc: Loc::Imm(value), }), E::If { cond, then, else_, .. } => { - let cond = self - .expr( - cond, - Some(Expr::Ident { - name: "bool", - pos: 0, - }), - ) - .unwrap(); + log::dbg!("if-cond"); + let cond = self.expr(cond, Some(bt::BOOL)).unwrap(); let reg = self.loc_to_reg(cond.loc); let jump_offset = self.code.code.len() as u32; - self.code.encode(instrs::jeq(reg, 0, 0)); + self.code.encode(instrs::jeq(reg.0, 0, 0)); self.gpa.free(reg); + log::dbg!("if-then"); self.expr(then, None); let jump; if let Some(else_) = else_ { + log::dbg!("if-else"); let else_jump_offset = self.code.code.len() as u32; self.code.encode(instrs::jmp(0)); @@ -458,18 +579,21 @@ impl<'a> Codegen<'a> { self.expr(else_, None); let jump = self.code.code.len() as i32 - else_jump_offset as i32; + log::dbg!("if-else-jump: {}", jump); self.code.code[else_jump_offset as usize + 1..][..4] .copy_from_slice(&jump.to_ne_bytes()); } else { jump = self.code.code.len() as i16 - jump_offset as i16; } + log::dbg!("if-then-jump: {}", jump); self.code.code[jump_offset as usize + 3..][..2] .copy_from_slice(&jump.to_ne_bytes()); None } E::Loop { body, .. } => { + log::dbg!("loop"); let loop_start = self.code.code.len() as u32; self.loops.push(Loop { offset: loop_start, @@ -477,6 +601,7 @@ impl<'a> Codegen<'a> { }); self.expr(body, None); + log::dbg!("loop-end"); let loop_end = self.code.code.len(); self.code .encode(instrs::jmp(loop_start as i32 - loop_end as i32)); @@ -514,6 +639,9 @@ impl<'a> Codegen<'a> { E::BinOp { left, op, right } => { let left = self.expr(left, expeted).unwrap(); let right = self.expr(right, Some(left.ty)).unwrap(); + if op == T::Assign { + return self.assign(left, right); + } let lhs = self.loc_to_reg(left.loc); let rhs = self.loc_to_reg(right.loc); @@ -523,36 +651,29 @@ impl<'a> Codegen<'a> { T::Minus => instrs::sub64, T::Star => instrs::mul64, T::Le => { - self.code.encode(instrs::cmpu(lhs, lhs, rhs)); + self.code.encode(instrs::cmpu(lhs.0, lhs.0, rhs.0)); self.gpa.free(rhs); - self.code.encode(instrs::cmpui(lhs, lhs, 1)); + self.code.encode(instrs::cmpui(lhs.0, lhs.0, 1)); return Some(Value { - ty: Expr::Ident { - name: "bool", - pos: 0, - }, + ty: bt::BOOL, loc: Loc::Reg(lhs), }); } T::Eq => { - self.code.encode(instrs::cmpu(lhs, lhs, rhs)); + self.code.encode(instrs::cmpu(lhs.0, lhs.0, rhs.0)); self.gpa.free(rhs); - self.code.encode(instrs::cmpui(lhs, lhs, 0)); - self.code.encode(instrs::not(lhs, lhs)); + self.code.encode(instrs::cmpui(lhs.0, lhs.0, 0)); + self.code.encode(instrs::not(lhs.0, lhs.0)); return Some(Value { - ty: Expr::Ident { - name: "bool", - pos: 0, - }, + ty: bt::BOOL, loc: Loc::Reg(lhs), }); } T::FSlash => |reg0, reg1, reg2| instrs::diru64(reg0, ZERO, reg1, reg2), - T::Assign => return self.assign(left, right), _ => unimplemented!("{:#?}", op), }; - self.code.encode(op(lhs, lhs, rhs)); + self.code.encode(op(lhs.0, lhs.0, rhs.0)); self.gpa.free(rhs); Some(Value { @@ -564,32 +685,33 @@ impl<'a> Codegen<'a> { } } - fn assign(&mut self, left: Value<'a>, right: Value<'a>) -> Option> { + fn assign(&mut self, left: Value, right: Value) -> Option { let rhs = self.loc_to_reg(right.loc); match left.loc { - Loc::Reg(reg) => self.code.encode(instrs::cp(reg, rhs)), - Loc::Stack(offset) => self.store_stack(rhs, offset, 8), + Loc::Deref(reg) => { + self.code.encode(instrs::st(rhs.0, reg.0, 0, 8)); + self.gpa.free(reg); + } + Loc::RegRef(reg) => self.code.encode(instrs::cp(reg, rhs.0)), + Loc::Stack(offset) => self.store_stack(rhs.0, offset, 8), _ => unimplemented!(), } self.gpa.free(rhs); - Some(left) + None } - fn get_or_reserve_label(&mut self, name: &str) -> LabelId { - if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) { + fn get_or_reserve_label(&mut self, name: Ident) -> LabelId { + if let Some(label) = self.labels.iter().position(|l| l.name == name) { label as u32 } else { - self.labels.push(FnLabel { - offset: 0, - name: name.into(), - }); + self.labels.push(FnLabel { offset: 0, name }); self.labels.len() as u32 - 1 } } - fn add_label(&mut self, name: &str) -> Frame { + fn add_label(&mut self, name: Ident) -> Frame { let offset = self.code.code.len() as u32; - let label = if let Some(label) = self.labels.iter().position(|l| l.name.as_ref() == name) { + let label = if let Some(label) = self.labels.iter().position(|l| l.name == name) { self.labels[label].offset = offset; label as u32 } else { @@ -607,11 +729,8 @@ impl<'a> Codegen<'a> { } } - fn get_label(&self, name: &str) -> LabelId { - self.labels - .iter() - .position(|l| l.name.as_ref() == name) - .unwrap() as _ + fn get_label(&self, name: Ident) -> LabelId { + self.labels.iter().position(|l| l.name == name).unwrap() as _ } fn write_fn_prelude(&mut self, frame: Frame) { @@ -619,7 +738,7 @@ impl<'a> Codegen<'a> { for ® in self.gpa.used.clone().iter() { self.temp.push(reg, 8); } - self.temp.subi64(STACK_PTR, STACK_PTR, self.stack_size as _); + self.temp.subi64(STACK_PTR, STACK_PTR, self.stack_size); for reloc in &mut self.code.relocs[frame.prev_relocs..] { reloc.offset += self.temp.code.len() as u32; @@ -637,7 +756,7 @@ impl<'a> Codegen<'a> { fn ret(&mut self) { self.code - .encode(instrs::addi64(STACK_PTR, STACK_PTR, self.stack_size as _)); + .encode(instrs::addi64(STACK_PTR, STACK_PTR, self.stack_size)); for reg in self.gpa.used.clone().iter().rev() { self.code.pop(*reg, 8); } @@ -645,7 +764,7 @@ impl<'a> Codegen<'a> { } pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> { - self.temp.prelude(self.get_label("main")); + self.temp.prelude(self.main.unwrap()); self.temp .relocate(&self.labels, self.temp.code.len() as i64); @@ -654,26 +773,42 @@ impl<'a> Codegen<'a> { out.write_all(&self.code.code) } - fn decl_var(&mut self, name: &str, offset: u64, ty: Expr<'a>) { - self.vars.push(Variable { - name: name.into(), - offset, - ty, - }); + fn alloc_pointer(&mut self, ty: Type) -> Type { + let ty = self + .pointers + .iter() + .position(|&p| p == ty) + .unwrap_or_else(|| { + self.pointers.push(ty); + self.pointers.len() - 1 + }); + + TypeKind::Pointer(ty as Type).encode() } } -pub struct Value<'a> { - ty: Expr<'a>, +pub struct Value { + ty: Type, loc: Loc, } -#[derive(Clone, Copy)] -pub enum Loc { - Reg(Reg), +#[derive(Debug)] +enum Loc { + Reg(LinReg), + RegRef(Reg), + Deref(LinReg), Imm(u64), Stack(u64), } +impl Loc { + fn take_ref(&self) -> Loc { + match self { + Self::Reg(reg) => Self::RegRef(reg.0), + Self::Stack(off) => Self::Stack(*off), + _ => unreachable!(), + } + } +} #[cfg(test)] mod tests { @@ -738,8 +873,7 @@ mod tests { fn generate(input: &'static str, output: &mut String) { let path = std::path::Path::new("test"); let arena = crate::parser::Arena::default(); - let mut buffer = Vec::new(); - let mut parser = super::parser::Parser::new(input, path, &arena, &mut buffer); + let mut parser = super::parser::Parser::new(input, path, &arena); let exprs = parser.file(); let mut codegen = super::Codegen::new(); codegen.file(path, &exprs).unwrap(); @@ -749,7 +883,7 @@ mod tests { std::fs::write("test.bin", &out).unwrap(); use std::fmt::Write; - let mut stack = [0_u64; 1024]; + let mut stack = [0_u64; 128]; let mut vm = unsafe { hbvm::Vm::::new(TestMem, hbvm::mem::Address::new(out.as_ptr() as u64)) @@ -779,5 +913,7 @@ mod tests { functions => include_str!("../examples/functions.hb"); if_statements => include_str!("../examples/if_statement.hb"); loops => include_str!("../examples/loops.hb"); + fb_driver => include_str!("../examples/fb_driver.hb"); + pointers => include_str!("../examples/pointers.hb"); } } diff --git a/hblang/src/ident.rs b/hblang/src/ident.rs index e69de29..7b8b2cd 100644 --- a/hblang/src/ident.rs +++ b/hblang/src/ident.rs @@ -0,0 +1,21 @@ +pub type Ident = u32; + +const LEN_BITS: u32 = 6; + +pub fn len(ident: Ident) -> u32 { + ident & ((1 << LEN_BITS) - 1) +} + +pub fn pos(ident: Ident) -> u32 { + ident >> LEN_BITS +} + +pub fn new(pos: u32, len: u32) -> Ident { + debug_assert!(len < (1 << LEN_BITS)); + (pos << LEN_BITS) | len +} + +pub fn range(ident: Ident) -> std::ops::Range { + let (len, pos) = (len(ident) as usize, pos(ident) as usize); + pos..pos + len +} diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index 625b92e..3698108 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -9,6 +9,10 @@ impl Token { pub fn range(&self) -> std::ops::Range { self.start as usize..self.end as usize } + + pub fn len(&self) -> u32 { + self.end - self.start + } } macro_rules! gen_token_kind { @@ -102,6 +106,8 @@ gen_token_kind! { #[prec = 21] Le = "<=", Eq = "==", + #[prec = 22] + Amp = "&", #[prec = 23] Plus = "+", Minus = "-", @@ -124,8 +130,8 @@ impl<'a> Lexer<'a> { } } - pub fn slice(&self, tok: Token) -> &'a str { - unsafe { std::str::from_utf8_unchecked(&self.bytes[tok.range()]) } + pub fn slice(&self, tok: std::ops::Range) -> &'a str { + unsafe { std::str::from_utf8_unchecked(&self.bytes[tok]) } } fn peek(&self) -> Option { @@ -204,6 +210,7 @@ impl<'a> Iterator for Lexer<'a> { b'-' => T::Minus, b'*' => T::Star, b'/' => T::FSlash, + b'&' => T::Amp, b'(' => T::LParen, b')' => T::RParen, b'{' => T::LBrace, diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index fa3b319..8d0a769 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -1,22 +1,27 @@ use std::{cell::Cell, ops::Not, ptr::NonNull}; -use crate::lexer::{Lexer, Token, TokenKind}; +use crate::{ + codegen::bt, + ident::{self, Ident}, + lexer::{Lexer, Token, TokenKind}, +}; + +struct ScopeIdent<'a> { + ident: Ident, + declared: bool, + last: &'a Cell, +} pub struct Parser<'a, 'b> { - path: &'a std::path::Path, - lexer: Lexer<'a>, - arena: &'b Arena<'a>, - expr_buf: &'b mut Vec>, - token: Token, + path: &'a std::path::Path, + lexer: Lexer<'a>, + arena: &'b Arena<'a>, + token: Token, + idents: Vec>, } impl<'a, 'b> Parser<'a, 'b> { - pub fn new( - input: &'a str, - path: &'a std::path::Path, - arena: &'b Arena<'a>, - expr_buf: &'b mut Vec>, - ) -> Self { + pub fn new(input: &'a str, path: &'a std::path::Path, arena: &'b Arena<'a>) -> Self { let mut lexer = Lexer::new(input); let token = lexer.next(); Self { @@ -24,13 +29,30 @@ impl<'a, 'b> Parser<'a, 'b> { token, path, arena, - // we ensure its empty before returning form parse - expr_buf: unsafe { std::mem::transmute(expr_buf) }, + idents: Vec::new(), } } pub fn file(&mut self) -> &'a [Expr<'a>] { - self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr())) + let f = self.collect(|s| (s.token.kind != TokenKind::Eof).then(|| s.expr())); + self.pop_scope(0); + let has_undeclared = !self.idents.is_empty(); + for id in self.idents.drain(..) { + let (line, col) = self.lexer.line_col(ident::pos(id.ident)); + eprintln!( + "{}:{}:{} => undeclared identifier: {}", + self.path.display(), + line, + col, + self.lexer.slice(ident::range(id.ident)) + ); + } + + if has_undeclared { + unreachable!(); + } + + f } fn next(&mut self) -> Token { @@ -69,14 +91,59 @@ impl<'a, 'b> Parser<'a, 'b> { left } + fn try_resolve_builtin(name: &str) -> Option { + // FIXME: we actually do this the second time in the codegen + Some(match name { + "int" => bt::INT, + "bool" => bt::BOOL, + _ => return None, + }) + } + + fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, Option<&'a Cell>) { + let name = self.lexer.slice(token.range()); + + if let Some(builtin) = Self::try_resolve_builtin(name) { + return (builtin, None); + } + + let last = self.arena.alloc(Cell::new(false)); + let id = match self + .idents + .iter_mut() + .rfind(|elem| self.lexer.slice(ident::range(elem.ident)) == name) + { + Some(elem) if decl && elem.declared => { + self.report(format_args!("redeclaration of identifier: {name}")) + } + Some(elem) => elem, + None => { + let id = ident::new(token.start, name.len() as _); + self.idents.push(ScopeIdent { + ident: id, + declared: false, + last, + }); + self.idents.last_mut().unwrap() + } + }; + + id.last = last; + id.declared |= decl; + + (id.ident, Some(last)) + } + fn unit_expr(&mut self) -> Expr<'a> { use {Expr as E, TokenKind as T}; + let frame = self.idents.len(); let token = self.next(); let mut expr = match token.kind { - T::Ident => E::Ident { - pos: token.start, - name: self.arena.alloc_str(self.lexer.slice(token)), - }, + T::Ident => { + let (id, last) = self.resolve_ident(token, self.token.kind == T::Decl); + let name = self.lexer.slice(token.range()); + E::Ident { name, id, last } + } T::If => E::If { pos: token.start, cond: self.ptr_expr(), @@ -99,10 +166,14 @@ impl<'a, 'b> Parser<'a, 'b> { self.expect_advance(T::LParen); self.collect_list(T::Comma, T::RParen, |s| { let name = s.expect_advance(T::Ident); - let name = s.arena.alloc_str(s.lexer.slice(name)); + let (id, last) = s.resolve_ident(name, true); s.expect_advance(T::Colon); - let val = s.expr(); - (name, val) + Arg { + name: s.lexer.slice(name.range()), + id, + last, + ty: s.expr(), + } }) }, ret: { @@ -111,13 +182,18 @@ impl<'a, 'b> Parser<'a, 'b> { }, body: self.ptr_expr(), }, + T::Amp | T::Star => E::UnOp { + pos: token.start, + op: token.kind, + val: self.ptr_unit_expr(), + }, T::LBrace => E::Block { pos: token.start, stmts: self.collect_list(T::Semi, T::RBrace, Self::expr), }, T::Number => E::Number { pos: token.start, - value: match self.lexer.slice(token).parse() { + value: match self.lexer.slice(token.range()).parse() { Ok(value) => value, Err(e) => self.report(format_args!("invalid number: {e}")), }, @@ -132,22 +208,46 @@ impl<'a, 'b> Parser<'a, 'b> { loop { expr = match self.token.kind { - TokenKind::LParen => { + T::LParen => { self.next(); Expr::Call { func: self.arena.alloc(expr), - args: self.collect_list(TokenKind::Comma, TokenKind::RParen, Self::expr), + args: self.collect_list(T::Comma, T::RParen, Self::expr), } } _ => break, } } - self.advance_if(TokenKind::Semi); + if matches!(token.kind, T::Return) { + self.expect_advance(T::Semi); + } + + if matches!(token.kind, T::Loop | T::LBrace | T::Fn) { + self.pop_scope(frame); + } expr } + fn pop_scope(&mut self, frame: usize) { + let mut undeclared_count = frame; + for i in frame..self.idents.len() { + if !self.idents[i].declared { + self.idents.swap(i, undeclared_count); + undeclared_count += 1; + } + } + + for id in self.idents.drain(undeclared_count..) { + id.last.set(true); + } + } + + fn ptr_unit_expr(&mut self) -> &'a Expr<'a> { + self.arena.alloc(self.unit_expr()) + } + fn collect_list( &mut self, delim: TokenKind, @@ -194,6 +294,14 @@ impl<'a, 'b> Parser<'a, 'b> { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Arg<'a> { + pub name: &'a str, + pub id: Ident, + pub last: Option<&'a Cell>, + pub ty: Expr<'a>, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Expr<'a> { Break { @@ -204,7 +312,7 @@ pub enum Expr<'a> { }, Closure { pos: u32, - args: &'a [(&'a str, Expr<'a>)], + args: &'a [Arg<'a>], ret: &'a Expr<'a>, body: &'a Expr<'a>, }, @@ -217,8 +325,9 @@ pub enum Expr<'a> { val: Option<&'a Expr<'a>>, }, Ident { - pos: u32, name: &'a str, + id: Ident, + last: Option<&'a Cell>, }, Block { pos: u32, @@ -243,6 +352,11 @@ pub enum Expr<'a> { pos: u32, body: &'a Expr<'a>, }, + UnOp { + pos: u32, + op: TokenKind, + val: &'a Expr<'a>, + }, } impl<'a> std::fmt::Display for Expr<'a> { @@ -252,6 +366,7 @@ impl<'a> std::fmt::Display for Expr<'a> { } match *self { + Self::UnOp { op, val, .. } => write!(f, "{}{}", op, val), Self::Break { .. } => write!(f, "break;"), Self::Continue { .. } => write!(f, "continue;"), Self::If { @@ -269,11 +384,11 @@ impl<'a> std::fmt::Display for Expr<'a> { } => { write!(f, "|")?; let first = &mut true; - for (name, val) in args { + for arg in args { if !std::mem::take(first) { write!(f, ", ")?; } - write!(f, "{}: {}", name, val)?; + write!(f, "{}: {}", arg.name, arg.ty)?; } write!(f, "|: {} {}", ret, body) } @@ -501,9 +616,7 @@ mod tests { fn parse(input: &'static str, output: &mut String) { use std::fmt::Write; let mut arena = super::Arena::default(); - let mut buffer = Vec::new(); - let mut parser = - super::Parser::new(input, std::path::Path::new("test"), &arena, &mut buffer); + let mut parser = super::Parser::new(input, std::path::Path::new("test"), &arena); for expr in parser.file() { writeln!(output, "{}", expr).unwrap(); } diff --git a/hblang/test.bin b/hblang/test.bin index 760c53371e52eb3440d9a1d74aac980c3c7fbff3..2e76a9a05b3092378bf7f2f7e5964553ad206fc8 100644 GIT binary patch literal 1396 zcmd5+F;2ul4D{v>w7LROQeCd(I>Zl<&^A%hQDI0MH*~pMP35!tcl`x zP-|(~dX}9=ov~}_KOAXS!!_$0$o;uWNj*$9O-3K|0#tD$N_yxP@gl!0<9T{vYjdQ< zB63EA3}lj120DsrvWJF*1`0JapG!45&820(dzZda=|xmxL*HmJlsCJSfyd4CYhYM6 z8sW91Fq~z}Ixw82HL$Ml!6@FK6UI?pqyHL_ZZXC05gx%Lgla6v#9?9m3QOemp2qdi znaYMzS8mdkuJ?PM$0UER&CRh*gR%}7Rx{_P?s)~1$3ks{&4Z|+>{~<@WCi_-k^VUj TL+%_L>JLmEWnQiPsmu5QOC5r< literal 466 zcmZ{h!41P83`I?)R;Z_JzzUtBmk2IN_Fto5aO;p5IjH!6tjEV6KC1%@0Om#X`5qM< zF{x!i^D^Lhz;VFSfX6GBOU%WbZ74LIt%W(?1ZUdcu$KjoT9f4H12Orewu07O4*96Y rJrU#E;Bx2ToJBR^g*DRFjKrTU{_`%2rth+Ayvw@r?r(%|;KcU_t