diff --git a/hblang/examples/global_variables.hb b/hblang/examples/global_variables.hb index 503d6641..58e3d6c4 100644 --- a/hblang/examples/global_variables.hb +++ b/hblang/examples/global_variables.hb @@ -3,7 +3,7 @@ global_var := 10; complex_global_var := fib(global_var) - 5; fib := fn(n: int): int { - if n <= 2 { + if 2 > n { return n; } return fib(n - 1) + fib(n - 2); @@ -12,3 +12,4 @@ fib := fn(n: int): int { main := fn(): int { return complex_global_var; } + diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index 2a53b975..1511d79a 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -7,7 +7,7 @@ use hbvm::Vm; use crate::{ ident::{self, Ident}, - parser::Symbols, + parser::ExprRef, }; use { @@ -24,18 +24,29 @@ type LabelId = u32; type Reg = u8; type MaskElem = u64; type Type = u32; +type GlobalId = u32; fn align_up(value: u64, align: u64) -> u64 { (value + align - 1) & !(align - 1) } +enum Signature { + Global(Type), + Function(Box<[Type]>, Type), +} + +struct ItemId { + file: parser::FileId, + expr: parser::ExprRef, +} + #[derive(Debug, PartialEq, Eq)] -struct LinReg(Reg, Option>>); +struct LinReg(Reg, Rc>); #[cfg(debug_assertions)] impl Drop for LinReg { fn drop(&mut self) { - self.1.take().map(|b| b.borrow_mut().free(self.0)); + self.1.borrow_mut().free(self.0) } } @@ -70,10 +81,9 @@ impl Stack { impl Drop for Stack { fn drop(&mut self) { - self.alloc - .get_mut() - .as_mut() - .map(|f| f.borrow_mut().free(self.offset, self.size)); + if let Some(f) = self.alloc.get_mut().as_mut() { + f.borrow_mut().free(self.offset, self.size) + } } } @@ -134,7 +144,7 @@ enum Ctx { None, Inferred(Type), Dest(Value), - DestUntyped(Loc, u64), + DestUntyped(Loc), } impl Ctx { @@ -165,6 +175,7 @@ pub mod bt { } builtin_type! { + UNDECLARED; NEVER; VOID; BOOL; @@ -179,11 +190,11 @@ pub mod bt { } pub fn is_signed(ty: Type) -> bool { - ty >= I8 && ty <= INT + (I8..=INT).contains(&ty) } pub fn is_unsigned(ty: Type) -> bool { - ty >= U8 && ty <= UINT + (U8..=UINT).contains(&ty) } pub fn strip_pointer(ty: Type) -> Type { @@ -253,7 +264,7 @@ struct Frame { } struct Reloc { - id: LabelId, + id: Result, offset: u32, instr_offset: u16, size: u16, @@ -265,19 +276,19 @@ struct StackReloc { } #[derive(Default)] -pub struct Func { +pub struct CodeBlock { code: Vec, relocs: Vec, } -impl Func { +impl CodeBlock { pub fn extend(&mut self, bytes: &[u8]) { self.code.extend_from_slice(bytes); } pub fn offset(&mut self, id: LabelId, instr_offset: u16, size: u16) { self.relocs.push(Reloc { - id, + id: Ok(id), offset: self.code.len() as u32, instr_offset, size, @@ -307,22 +318,18 @@ impl Func { fn pop(&mut self, value: Reg, size: usize) { self.encode(instrs::ld(value, STACK_PTR, 0, size as _)); - self.encode(instrs::addi64(STACK_PTR, STACK_PTR, size as _)); + self.addi64(STACK_PTR, STACK_PTR, size as _); } fn short_cut_bin_op(&mut self, dest: Reg, src: Reg, imm: u64) -> bool { - if imm == 0 { - if dest != src { - self.encode(instrs::cp(dest, src)); - } + if imm == 0 && dest != src { + self.encode(instrs::cp(dest, src)); } imm != 0 } fn subi64(&mut self, dest: Reg, src: Reg, imm: u64) { - if self.short_cut_bin_op(dest, src, imm) { - self.encode(instrs::addi64(dest, src, imm.wrapping_neg())); - } + self.addi64(dest, src, imm.wrapping_neg()); } fn addi64(&mut self, dest: Reg, src: Reg, imm: u64) { @@ -345,35 +352,38 @@ impl Func { self.encode(instrs::tx()); } - fn relocate(&mut self, labels: &[FnLabel], shift: i64) { + fn relocate(&mut self, labels: &[FnLabel], globals: &[Global], shift: i64) { for reloc in self.relocs.drain(..) { - let label = &labels[reloc.id as usize]; - let offset = if reloc.size == 8 { + let offset = match reloc.id { + Ok(id) => labels[id as usize].offset, + Err(id) => globals[id as usize].offset, + }; + let offset = if reloc.size == 8 && reloc.id.is_ok() { reloc.offset as i64 } else { - label.offset as i64 - reloc.offset as i64 + offset as i64 - reloc.offset as i64 } + shift; - log::dbg!( - label.name, - offset, - reloc.size, - reloc.instr_offset, - reloc.offset, - shift, - label.offset - ); - let dest = &mut self.code[reloc.offset as usize + reloc.instr_offset as usize..] [..reloc.size as usize]; + debug_assert!(dest.iter().all(|&b| b == 0)); match reloc.size { 2 => dest.copy_from_slice(&(offset as i16).to_le_bytes()), 4 => dest.copy_from_slice(&(offset as i32).to_le_bytes()), - 8 => dest.copy_from_slice(&(offset as i64).to_le_bytes()), + 8 => dest.copy_from_slice(&offset.to_le_bytes()), _ => unreachable!(), }; } } + + fn append(&mut self, data: &mut CodeBlock, code_offset: usize, reloc_offset: usize) { + for reloc in &mut data.relocs[reloc_offset..] { + reloc.offset += self.code.len() as u32; + reloc.offset -= code_offset as u32; + } + self.relocs.extend(data.relocs.drain(reloc_offset..)); + self.code.extend(data.code.drain(code_offset..)); + } } #[derive(Default, PartialEq, Eq)] @@ -442,12 +452,12 @@ struct Struct { } struct TypeDisplay<'a> { - codegen: &'a Codegen<'a>, + codegen: &'a Codegen, ty: Type, } impl<'a> TypeDisplay<'a> { - fn new(codegen: &'a Codegen<'a>, ty: Type) -> Self { + fn new(codegen: &'a Codegen, ty: Type) -> Self { Self { codegen, ty } } @@ -460,6 +470,7 @@ impl<'a> std::fmt::Display for TypeDisplay<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use TypeKind as TK; let str = match TK::from_ty(self.ty) { + TK::Builtin(bt::UNDECLARED) => "undeclared", TK::Builtin(bt::VOID) => "void", TK::Builtin(bt::NEVER) => "never", TK::Builtin(bt::INT) => "int", @@ -484,187 +495,112 @@ impl<'a> std::fmt::Display for TypeDisplay<'a> { struct Global { id: Ident, - offset: u64, + code: u32, + offset: u32, + dep: GlobalId, ty: Type, } -struct CompileMem { - code: *mut u8, - mem: Vec, -} - -impl Default for CompileMem { - fn default() -> Self { - Self { - code: std::ptr::null_mut(), - mem: Vec::new(), - } - } -} - -impl hbvm::mem::Memory for CompileMem { - unsafe fn load( - &mut self, - addr: hbvm::mem::Address, - target: *mut u8, - count: usize, - ) -> Result<(), hbvm::mem::LoadError> { - let sub = self - .mem - .get(addr.get() as usize..addr.get() as usize + count) - .ok_or(hbvm::mem::LoadError(addr))?; - - target.copy_from(sub.as_ptr(), count); - - Ok(()) - } - - unsafe fn store( - &mut self, - addr: hbvm::mem::Address, - source: *const u8, - count: usize, - ) -> Result<(), hbvm::mem::StoreError> { - self.mem - .get_mut(addr.get() as usize..addr.get() as usize + count) - .ok_or(hbvm::mem::StoreError(addr))? - .as_mut_ptr() - .copy_from(source, count); - - Ok(()) - } - - unsafe fn prog_read(&mut self, addr: hbvm::mem::Address) -> T { - debug_assert!(std::mem::align_of::() == 1); - *(self.code.add(addr.get() as usize) as *const T) - } -} - #[derive(Default)] -pub struct Codegen<'a> { - path: &'a str, - input: &'a [u8], +pub struct Codegen { + cf: parser::Ast, + cf_id: parser::FileId, + + ret: Type, + ret_reg: Option, + cur_global: GlobalId, + main: Option, + to_generate: Vec, - ret: Type, gpa: Rc>, sa: Rc>, - code: Func, - temp: Func, - labels: Vec, - vars: Vec, ret_relocs: Vec, loops: Vec, - records: Vec, - pointers: Vec, - globals: Vec, - main: Option, - pub symbols: Symbols, + code: CodeBlock, + data: CodeBlock, + temp: CodeBlock, - vm: Vm, + labels: Vec, + globals: Vec, + vars: Vec, + records: Vec, + pointers: Vec, + + pub files: Vec, + + vm: Vm, } -impl<'a> Codegen<'a> { - pub fn file(&mut self, path: &'a str, input: &'a [u8], exprs: &'a [parser::Expr<'a>]) { - self.path = path; - self.input = input; - - for expr in exprs { - let E::BinOp { - left: &E::Ident { id, name, .. }, - op: T::Decl, - right, - } = expr - else { - self.report(expr.pos(), format_args!("expected declaration")); - }; - - match right { - E::Closure { args, ret, .. } => { - let args = args.iter().map(|arg| self.ty(&arg.ty)).collect::>(); - let ret = self.ty(ret); - self.declare_fn_label(id, args, ret); - } - E::Struct { .. } => { - self.records.push(Struct { - id, - name: (*name).into(), - fields: Rc::from([]), - }); - } - _ => { - self.globals.push(Global { - id, - offset: 0, - ty: bt::NEVER, - }); - } - } +impl Codegen { + pub fn generate(&mut self) { + self.cur_global = GlobalId::MAX; + self.find_and_declare(0, Err("main")); + while let Some(item) = self.to_generate.pop() { + self.generate_item(item); } + } - for expr in exprs { - let E::BinOp { - left: E::Ident { id, name, .. }, + fn generate_item(&mut self, item: ItemId) { + let ast = self.files[item.file as usize].clone(); + let expr = item.expr.get(&ast).unwrap(); + + self.cf = ast.clone(); + self.cf_id = item.file; + + match expr { + E::BinOp { + left: E::Ident { name, id, .. }, op: T::Decl, - right, - } = expr - else { - self.report(expr.pos(), format_args!("expected declaration")); - }; - - match right { - E::Struct { fields, .. } => { - let fields = fields - .iter() - .map(|&(name, ty)| (name.into(), self.ty(&ty))) - .collect(); - self.records - .iter_mut() - .find(|r| r.id == *id) - .unwrap() - .fields = fields; + right: E::Closure { body, args, .. }, + } => { + log::dbg!("fn: {}", name); + let frame = self.define_fn_label(*id); + if *name == "main" { + self.main = Some(frame.label); } - E::Closure { body, args, .. } => { - log::dbg!("fn: {}", name); - let frame = self.define_fn_label(*id); - if *name == "main" { - self.main = Some(frame.label); - } - let fn_label = self.labels[frame.label as usize].clone(); - self.gpa.borrow_mut().init_callee(); + let fn_label = self.labels[frame.label as usize].clone(); + self.gpa.borrow_mut().init_callee(); - log::dbg!("fn-args"); - let mut parama = self.param_alloc(fn_label.ret); - for (arg, &ty) in args.iter().zip(fn_label.args.iter()) { - let sym = parser::find_symbol(&self.symbols, arg.id); - let loc = self.load_arg(sym.flags, ty, &mut parama); - self.vars.push(Variable { - id: arg.id, - value: Value { ty, loc }, - }); - } - - self.ret = fn_label.ret; - - log::dbg!("fn-body"); - if self.expr(body).is_some() { - self.report(body.pos(), "expected all paths in the fucntion to return"); - } - self.vars.clear(); - - log::dbg!("fn-prelude, stack: {:x}", self.sa.borrow().height); - - log::dbg!("fn-relocs"); - self.write_fn_prelude(frame); - - log::dbg!("fn-ret"); - self.reloc_rets(); - self.ret(); - self.sa.borrow_mut().clear(); + log::dbg!("fn-args"); + let mut parama = self.param_alloc(fn_label.ret); + for (arg, &ty) in args.iter().zip(fn_label.args.iter()) { + let sym = parser::find_symbol(&self.cf.symbols, arg.id); + let loc = self.load_arg(sym.flags, ty, &mut parama); + self.vars.push(Variable { + id: arg.id, + value: Value { ty, loc }, + }); } - value => todo!(), + + if self.size_of(fn_label.ret) > 16 { + let reg = self.gpa.borrow_mut().allocate(); + self.code.encode(instrs::cp(reg, 1)); + self.ret_reg = Some(reg); + } else { + self.ret_reg = None; + } + + self.ret = fn_label.ret; + + log::dbg!("fn-body"); + if self.expr(body).is_some() { + self.report(body.pos(), "expected all paths in the fucntion to return"); + } + self.vars.clear(); + + log::dbg!("fn-prelude, stack: {:x}", self.sa.borrow().height); + + log::dbg!("fn-relocs"); + self.write_fn_prelude(frame); + + log::dbg!("fn-ret"); + self.reloc_rets(); + self.ret(); + self.sa.borrow_mut().clear(); } + value => todo!(), } } @@ -739,7 +675,7 @@ impl<'a> Codegen<'a> { } fn alloc_reg(&mut self) -> LinReg { - LinReg(self.gpa.borrow_mut().allocate(), Some(self.gpa.clone())).into() + LinReg(self.gpa.borrow_mut().allocate(), self.gpa.clone()) } fn alloc_stack(&mut self, size: u64) -> Rc { @@ -803,7 +739,7 @@ impl<'a> Codegen<'a> { } } - fn ty(&mut self, expr: &parser::Expr<'a>) -> Type { + fn ty(&mut self, expr: &parser::Expr) -> Type { match *expr { E::Ident { id, .. } if ident::is_null(id) => id, E::UnOp { @@ -812,21 +748,22 @@ impl<'a> Codegen<'a> { let ty = self.ty(val); self.alloc_pointer(ty) } - E::Ident { id, name, .. } => { - let Some(index) = self.records.iter().position(|r| r.id == id) else { - self.report(expr.pos(), format_args!("unknown type: {}", name)) + E::Ident { id, .. } => { + let index = match self.records.iter().position(|r| r.id == id) { + Some(index) => index as Type, + None => self.find_and_declare(0, Ok(id)), }; - TypeKind::Struct(index as Type).encode() + TypeKind::Struct(index).encode() } expr => unimplemented!("type: {:#?}", expr), } } - fn expr(&mut self, expr: &'a parser::Expr<'a>) -> Option { + fn expr(&mut self, expr: &parser::Expr) -> Option { self.expr_ctx(expr, Ctx::default()) } - fn expr_ctx(&mut self, expr: &'a parser::Expr<'a>, mut ctx: Ctx) -> Option { + fn expr_ctx(&mut self, expr: &parser::Expr, mut ctx: Ctx) -> Option { use instrs as i; let value = match *expr { E::Directive { @@ -912,7 +849,7 @@ impl<'a> Codegen<'a> { ctx = match ctx { Ctx::Dest(Value { loc, .. }) | Ctx::DestUntyped(loc, ..) => { - Ctx::DestUntyped(loc, size as _) + Ctx::DestUntyped(loc) } _ => Ctx::None, }; @@ -943,9 +880,7 @@ impl<'a> Codegen<'a> { let ty = self.ty(ty); let ctx = match ctx { Ctx::Dest(dest) => Ctx::Dest(dest), - Ctx::DestUntyped(loc, size) if self.size_of(ty) == size => { - Ctx::Dest(Value { ty, loc }) - } + Ctx::DestUntyped(loc) => Ctx::Dest(Value { ty, loc }), _ => Ctx::Inferred(ty), }; return self.expr_ctx(val, ctx); @@ -957,7 +892,7 @@ impl<'a> Codegen<'a> { E::Ctor { pos, ty, fields, .. } => { - let Some(ty) = ty.map(|ty| self.ty(&ty)).or(ctx.ty()) else { + let Some(ty) = ty.map(|ty| self.ty(ty)).or(ctx.ty()) else { self.report(pos, "expected type, (it cannot be inferred)"); }; let size = self.size_of(ty); @@ -1009,12 +944,16 @@ impl<'a> Codegen<'a> { let val = self.expr(val)?; let loc = match val.loc { Loc::Deref(r, stack, off) => { - stack.map(|stack| stack.leak()); + if let Some(stack) = stack { + stack.leak() + } self.code.addi64(r.0, r.0, off); Loc::Reg(r) } Loc::DerefRef(r, stack, off) => { - stack.map(|stack| stack.leak()); + if let Some(stack) = stack { + stack.leak() + } let reg = self.alloc_reg(); self.code.addi64(reg.0, r, off); Loc::Reg(reg) @@ -1060,9 +999,9 @@ impl<'a> Codegen<'a> { } => { let val = self.expr(right)?; let loc = self.make_loc_owned(val.loc, val.ty); - let sym = parser::find_symbol(&self.symbols, *id); + let sym = parser::find_symbol(&self.cf.symbols, *id); let loc = match loc { - Loc::Reg(r) if sym.flags & parser::REFERENCED == 0 => { + Loc::Reg(r) if sym.flags & parser::REFERENCED != 0 => { let size = self.size_of(val.ty); let stack = self.alloc_stack(size); self.store_stack(r.0, stack.offset, size as _); @@ -1077,10 +1016,13 @@ impl<'a> Codegen<'a> { Some(Value::VOID) } E::Call { - func: E::Ident { id, .. }, + func: &E::Ident { id, .. }, args, } => { - let func = self.get_label(*id); + let func = match self.get_label(id) { + Some(func) => func, + None => self.find_and_declare(0, Ok(id)), + }; let fn_label = self.labels[func as usize].clone(); let mut parama = self.param_alloc(fn_label.ret); @@ -1104,14 +1046,11 @@ impl<'a> Codegen<'a> { loc, }); } - E::Ident { name, id, index } => { - let Some((var_index, var)) = - self.vars.iter_mut().enumerate().find(|(_, v)| v.id == id) - else { - self.report(expr.pos(), format_args!("unknown variable: {}", name)) - }; - - let sym = parser::find_symbol(&self.symbols, id); + E::Ident { id, index, .. } + if let Some((var_index, var)) = + self.vars.iter_mut().enumerate().find(|(_, v)| v.id == id) => + { + let sym = parser::find_symbol(&self.cf.symbols, id); let loc = match parser::ident_flag_index(sym.flags) == index && !self.loops.last().is_some_and(|l| l.var_count > var_index) @@ -1125,6 +1064,31 @@ impl<'a> Codegen<'a> { loc, }) } + E::Ident { id, .. } => { + let id = match self.globals.iter().position(|g| g.id == id) { + Some(id) => id as GlobalId, + None => self.find_and_declare(0, Ok(id)), + }; + let ptr = self.alloc_reg(); + + let global = &mut self.globals[id as usize]; + if self.cur_global != GlobalId::MAX { + global.dep = global.dep.max(id); + } + + self.code.relocs.push(Reloc { + id: Err(id), + offset: self.code.code.len() as u32, + instr_offset: 3, + size: 4, + }); + self.code.encode(i::lra(ptr.0, 0, 0)); + + Some(Value { + ty: global.ty, + loc: Loc::Deref(ptr, None, 0), + }) + } E::Return { val, .. } => { if let Some(val) = val { let size = self.size_of(self.ret); @@ -1226,7 +1190,7 @@ impl<'a> Codegen<'a> { dest.copy_from_slice(&offset.to_ne_bytes()); } - self.vars.drain(loop_.var_count as usize..); + self.vars.drain(loop_.var_count..); if is_unreachable { log::dbg!("infinite loop"); @@ -1294,13 +1258,20 @@ impl<'a> Codegen<'a> { let lsize = self.size_of(left.ty); let ty = ctx.ty().unwrap_or(left.ty); - let lhs = match std::mem::take(&mut ctx).loc() { - Some(Loc::RegRef(reg)) if Loc::RegRef(reg) == left.loc => LinReg(reg, None), - Some(loc) => { - ctx = Ctx::Dest(Value { ty, loc }); - self.loc_to_reg(left.loc, lsize) + let (lhs, loc) = match std::mem::take(&mut ctx).loc() { + Some(Loc::RegRef(reg)) if Loc::RegRef(reg) == left.loc && reg != 1 => { + (reg, Loc::RegRef(reg)) + } + Some(loc) => { + debug_assert!(!matches!(loc, Loc::Reg(LinReg(RET_ADDR, ..)))); + ctx = Ctx::Dest(Value { ty, loc }); + let reg = self.loc_to_reg(left.loc, lsize); + (reg.0, Loc::Reg(reg)) + } + None => { + let reg = self.loc_to_reg(left.loc, lsize); + (reg.0, Loc::Reg(reg)) } - None => self.loc_to_reg(left.loc, lsize), }; let right = self.expr_ctx(right, Ctx::Inferred(left.ty))?; let rsize = self.size_of(right.ty); @@ -1319,26 +1290,16 @@ impl<'a> Codegen<'a> { imm *= size; } - self.code.encode(oper(lhs.0, lhs.0, imm)); - break 'ops Some(Value { - ty, - loc: Loc::Reg(lhs), - }); + self.code.encode(oper(lhs, lhs, imm)); + break 'ops Some(Value { ty, loc }); } let rhs = self.loc_to_reg(right.loc, rsize); - log::dbg!( - "binop: {} {} {}", - self.display_ty(ty), - self.display_ty(left.ty), - self.display_ty(right.ty) - ); - if matches!(op, T::Add | T::Sub) { let min_size = lsize.min(rsize); if bt::is_signed(ty) && min_size < size { - let operand = if lsize < rsize { lhs.0 } else { rhs.0 }; + let operand = if lsize < rsize { lhs } else { rhs.0 }; let op = [i::sxt8, i::sxt16, i::sxt32][min_size.ilog2() as usize]; self.code.encode(op(operand, operand)); } @@ -1347,7 +1308,7 @@ impl<'a> Codegen<'a> { let (offset, ty) = if bt::is_pointer(left.ty) { (rhs.0, left.ty) } else { - (lhs.0, right.ty) + (lhs, right.ty) }; let TypeKind::Pointer(ty) = TypeKind::from_ty(ty) else { @@ -1360,33 +1321,26 @@ impl<'a> Codegen<'a> { } if let Some(op) = Self::math_op(op, signed, size) { - self.code.encode(op(lhs.0, lhs.0, rhs.0)); - - break 'ops Some(Value { - ty, - loc: Loc::Reg(lhs), - }); + self.code.encode(op(lhs, lhs, rhs.0)); + break 'ops Some(Value { ty, loc }); } 'cmp: { let against = match op { - T::Le | T::Lt => 1, + T::Le | T::Gt => 1, T::Ne | T::Eq => 0, - T::Ge | T::Gt => (-1i64) as _, + T::Ge | T::Lt => (-1i64) as _, _ => break 'cmp, }; let op_fn = if signed { i::cmps } else { i::cmpu }; - self.code.encode(op_fn(lhs.0, lhs.0, rhs.0)); - self.code.encode(i::cmpui(lhs.0, lhs.0, against)); + self.code.encode(op_fn(lhs, lhs, rhs.0)); + self.code.encode(i::cmpui(lhs, lhs, against)); if matches!(op, T::Eq | T::Lt | T::Gt) { - self.code.encode(i::not(lhs.0, lhs.0)); + self.code.encode(i::not(lhs, lhs)); } - break 'ops Some(Value { - ty: bt::BOOL, - loc: Loc::Reg(lhs), - }); + break 'ops Some(Value { ty: bt::BOOL, loc }); } unimplemented!("{:#?}", op) @@ -1398,12 +1352,18 @@ impl<'a> Codegen<'a> { Ctx::Dest(dest) => { _ = self.assert_ty(expr.pos(), dest.ty, value.ty); self.assign(dest.ty, dest.loc, value.loc)?; - Some(Value::VOID) + Some(Value { + ty: dest.ty, + loc: Loc::Imm(0), + }) } - Ctx::DestUntyped(loc, size) => { + Ctx::DestUntyped(loc) => { // Wo dont check since bitcast does - self.assign_opaque(size, loc, value.loc); - Some(Value::VOID) + self.assign(value.ty, loc, value.loc); + Some(Value { + ty: value.ty, + loc: Loc::Imm(0), + }) } _ => Some(value), } @@ -1645,6 +1605,84 @@ impl<'a> Codegen<'a> { } } + fn find_and_declare(&mut self, file: parser::FileId, name: Result) -> LabelId { + let f = self.files[file as usize].clone(); + let expr = f.find_decl(name).expect("TODO: error"); + match expr { + E::BinOp { + left: &E::Ident { id, .. }, + op: T::Decl, + right: E::Closure { args, ret, .. }, + } => { + let args = args.iter().map(|arg| self.ty(&arg.ty)).collect::>(); + let ret = self.ty(ret); + self.to_generate.push(ItemId { + file, + expr: ExprRef::new(expr), + }); + self.declare_fn_label(id, args.into(), ret) + } + E::BinOp { + left: &E::Ident { id, name, .. }, + op: T::Decl, + right: E::Struct { fields, .. }, + } => { + let fields = fields + .iter() + .map(|&(name, ty)| (name.into(), self.ty(&ty))) + .collect(); + self.records.push(Struct { + id, + fields, + name: name.into(), + }); + self.records.len() as u32 - 1 + } + E::BinOp { + left: &E::Ident { id, .. }, + op: T::Decl, + right, + } => { + let gid = self.globals.len() as GlobalId; + + let prev_in_global = std::mem::replace(&mut self.cur_global, gid); + let prev_gpa = std::mem::replace(&mut *self.gpa.borrow_mut(), Default::default()); + let prev_sa = std::mem::replace(&mut *self.sa.borrow_mut(), Default::default()); + + let offset = self.code.code.len(); + let reloc_count = self.code.relocs.len(); + self.globals.push(Global { + id, + ty: bt::UNDECLARED, + code: 0, + dep: 0, + offset: u32::MAX, + }); + + self.gpa.borrow_mut().init_callee(); + let ret = self.gpa.borrow_mut().allocate(); + // TODO: detect is constant does not call anything + self.code.encode(instrs::cp(ret, 1)); + + let ret = self + .expr_ctx(right, Ctx::DestUntyped(Loc::DerefRef(ret, None, 0))) + .expect("TODO: unreachable constant/global"); + self.code.encode(instrs::tx()); + self.globals[gid as usize].ty = ret.ty; + + self.globals[gid as usize].code = self.data.code.len() as u32; + self.data.append(&mut self.code, offset, reloc_count); + + *self.sa.borrow_mut() = prev_sa; + *self.gpa.borrow_mut() = prev_gpa; + self.cur_global = prev_in_global; + + gid + } + e => unimplemented!("{e:#?}"), + } + } + fn declare_fn_label(&mut self, name: Ident, args: Rc<[Type]>, ret: Type) -> LabelId { self.labels.push(FnLabel { offset: 0, @@ -1657,7 +1695,7 @@ impl<'a> Codegen<'a> { fn define_fn_label(&mut self, name: Ident) -> Frame { let offset = self.code.code.len() as u32; - let label = self.get_label(name); + let label = self.get_label(name).unwrap(); self.labels[label as usize].offset = offset; Frame { label, @@ -1666,8 +1704,11 @@ impl<'a> Codegen<'a> { } } - fn get_label(&self, name: Ident) -> LabelId { - self.labels.iter().position(|l| l.name == name).unwrap() as _ + fn get_label(&self, name: Ident) -> Option { + self.labels + .iter() + .position(|l| l.name == name) + .map(|l| l as _) } fn write_fn_prelude(&mut self, frame: Frame) { @@ -1702,9 +1743,48 @@ impl<'a> Codegen<'a> { pub fn dump(mut self, out: &mut impl std::io::Write) -> std::io::Result<()> { self.temp.prelude(self.main.unwrap()); self.temp - .relocate(&self.labels, self.temp.code.len() as i64); + .relocate(&self.labels, &self.globals, self.temp.code.len() as i64); + + let mut globals = std::mem::take(&mut self.globals); + for global in globals.iter_mut() { + let size = self.size_of(global.ty); + global.offset = self.code.code.len() as u32; + self.code.code.extend(std::iter::repeat(0).take(size as _)); + } + self.globals = globals; + + let prev_len = self.code.code.len(); + self.code.append(&mut self.data, 0, 0); + + self.code.relocate(&self.labels, &self.globals, 0); + + { + let mut var_order = self + .globals + .iter() + .map(|g| g.dep) + .zip(0u32..) + .collect::>(); + var_order.sort_unstable(); + + let stack_size = 1024 * 1024 * 2; + let mut stack = Vec::::with_capacity(stack_size); + for (_, glob_id) in var_order.into_iter().rev() { + let global = &self.globals[glob_id as usize]; + self.vm.pc = hbvm::mem::Address::new( + &mut self.code.code[global.code as usize + prev_len] as *mut _ as u64, + ); + self.vm + .write_reg(254, unsafe { stack.as_mut_ptr().add(stack_size) } as u64); + self.vm.write_reg( + 1, + &mut self.code.code[global.offset as usize] as *mut _ as u64, + ); + self.vm.run().unwrap(); + } + } + self.code.code.truncate(prev_len); - self.code.relocate(&self.labels, 0); out.write_all(&self.temp.code)?; out.write_all(&self.code.code) } @@ -1755,24 +1835,16 @@ impl<'a> Codegen<'a> { } match value.loc { - Loc::Reg(ref reg) => { - self.code.encode(instrs::cp(p, reg.0)); - } - Loc::RegRef(reg) => { - self.code.encode(instrs::cp(p, reg)); - } - Loc::Deref(ref reg, .., off) => { - self.code.encode(instrs::ld(p, reg.0, off, size as _)); - } - Loc::DerefRef(reg, .., off) => { + Loc::Reg(LinReg(reg, ..)) | Loc::RegRef(reg) => self.code.encode(instrs::cp(p, reg)), + Loc::Deref(LinReg(reg, ..), .., off) | Loc::DerefRef(reg, .., off) => { self.code.encode(instrs::ld(p, reg, off, size as _)); } - Loc::Imm(imm) => { - self.code.encode(instrs::li64(p, imm)); - } + Loc::Imm(imm) => self.code.encode(instrs::li64(p, imm)), Loc::Stack(ref stack, off) => { self.load_stack(p, stack.offset + off, size as _); - self.load_stack(parama.next().unwrap(), stack.offset + off + 8, size as _); + if size > 8 { + parama.next().unwrap(); + } } } } @@ -1828,8 +1900,8 @@ impl<'a> Codegen<'a> { } fn report(&self, pos: parser::Pos, msg: impl std::fmt::Display) -> ! { - let (line, col) = lexer::line_col(self.input, pos); - println!("{}:{}:{}: {}", self.path, line, col, msg); + let (line, col) = self.cf.nlines.line_col(pos); + println!("{}:{}:{}: {}", self.cf.path, line, col, msg); unreachable!(); } @@ -1939,86 +2011,67 @@ impl Loc { } } +#[derive(Default)] +pub struct LoggedMem { + pub mem: hbvm::mem::HostMemory, +} + +impl hbvm::mem::Memory for LoggedMem { + unsafe fn load( + &mut self, + addr: hbvm::mem::Address, + target: *mut u8, + count: usize, + ) -> Result<(), hbvm::mem::LoadError> { + log::dbg!( + "load: {:x} {:?}", + addr.get(), + core::slice::from_raw_parts(addr.get() as *const u8, count) + ); + self.mem.load(addr, target, count) + } + + unsafe fn store( + &mut self, + addr: hbvm::mem::Address, + source: *const u8, + count: usize, + ) -> Result<(), hbvm::mem::StoreError> { + log::dbg!( + "store: {:x} {:?}", + addr.get(), + core::slice::from_raw_parts(source, count) + ); + self.mem.store(addr, source, count) + } + + unsafe fn prog_read(&mut self, addr: hbvm::mem::Address) -> T { + log::dbg!( + "read-typed: {:x} {} {:?}", + addr.get(), + std::any::type_name::(), + if core::mem::size_of::() == 1 { + instrs::NAMES[std::ptr::read(addr.get() as *const u8) as usize].to_string() + } else { + core::slice::from_raw_parts(addr.get() as *const u8, core::mem::size_of::()) + .iter() + .map(|&b| format!("{:02x}", b)) + .collect::() + } + ); + self.mem.prog_read(addr) + } +} + #[cfg(test)] mod tests { - use crate::{instrs, log}; - use super::parser; - struct TestMem; - - impl hbvm::mem::Memory for TestMem { - #[inline] - unsafe fn load( - &mut self, - addr: hbvm::mem::Address, - target: *mut u8, - count: usize, - ) -> Result<(), hbvm::mem::LoadError> { - log::dbg!( - "read: {:x} {} {:?}", - addr.get(), - count, - core::slice::from_raw_parts(addr.get() as *const u8, count) - .iter() - .rev() - .skip_while(|&&b| b == 0) - .map(|&b| format!("{:02x}", b)) - .collect::() - ); - unsafe { core::ptr::copy(addr.get() as *const u8, target, count) } - Ok(()) - } - - #[inline] - unsafe fn store( - &mut self, - addr: hbvm::mem::Address, - source: *const u8, - count: usize, - ) -> Result<(), hbvm::mem::StoreError> { - log::dbg!( - "write: {:x} {} {:?}", - addr.get(), - count, - core::slice::from_raw_parts(source, count) - .iter() - .rev() - .skip_while(|&&b| b == 0) - .map(|&b| format!("{:02x}", b)) - .collect::() - ); - unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) } - Ok(()) - } - - #[inline] - unsafe fn prog_read(&mut self, addr: hbvm::mem::Address) -> T { - log::dbg!( - "read-typed: {:x} {} {:?}", - addr.get(), - std::any::type_name::(), - if core::mem::size_of::() == 1 { - instrs::NAMES[std::ptr::read(addr.get() as *const u8) as usize].to_string() - } else { - core::slice::from_raw_parts(addr.get() as *const u8, core::mem::size_of::()) - .iter() - .map(|&b| format!("{:02x}", b)) - .collect::() - } - ); - unsafe { core::ptr::read(addr.get() as *const T) } - } - } - fn generate(input: &'static str, output: &mut String) { let path = "test"; - let arena = crate::parser::Arena::default(); - let mut symbols = crate::parser::Symbols::new(); - let mut parser = parser::Parser::new(&arena, &mut symbols, &parser::no_loader); - let exprs = parser.file(input, path); let mut codegen = super::Codegen::default(); - codegen.file(path, input.as_bytes(), &exprs); + codegen.files = vec![parser::Ast::new(path, input, &parser::no_loader)]; + codegen.generate(); let mut out = Vec::new(); codegen.dump(&mut out).unwrap(); @@ -2027,7 +2080,10 @@ mod tests { let mut stack = [0_u64; 128]; let mut vm = unsafe { - hbvm::Vm::::new(TestMem, hbvm::mem::Address::new(out.as_ptr() as u64)) + hbvm::Vm::<_, 0>::new( + hbvm::mem::HostMemory, + hbvm::mem::Address::new(out.as_ptr() as u64), + ) }; vm.write_reg( diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index df2f9188..a81a2999 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -1,3 +1,5 @@ +use std::simd::cmp::SimdPartialEq; + #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct Token { pub kind: TokenKind, @@ -290,85 +292,117 @@ impl<'a> Lexer<'a> { } } -pub fn line_col(bytes: &[u8], mut start: u32) -> (usize, usize) { - bytes +pub fn line_col(bytes: &[u8], pos: u32) -> (usize, usize) { + bytes[..pos as usize] .split(|&b| b == b'\n') + .map(<[u8]>::len) .enumerate() - .find_map(|(i, line)| { - if start < line.len() as u32 { - return Some((i + 1, start as usize + 1)); - } - start -= line.len() as u32 + 1; - None - }) + .last() + .map(|(line, col)| (line + 1, col + 1)) .unwrap_or((1, 1)) } -impl<'a> Iterator for Lexer<'a> { - type Item = Token; +pub struct LineMap { + lines: Box<[u8]>, +} - fn next(&mut self) -> Option { - use TokenKind as T; - loop { - let mut start = self.pos; - let kind = match self.advance()? { - b'\n' | b'\r' | b'\t' | b' ' => continue, - b'0'..=b'9' => { - while let Some(b'0'..=b'9') = self.peek() { - self.advance(); - } - T::Number +impl LineMap { + pub fn line_col(&self, mut pos: u32) -> (usize, usize) { + let mut line = 1; + + let mut iter = self.lines.iter().copied(); + + while let Some(mut len) = iter.next() { + let mut acc = 0; + while len & 0x80 != 0 { + acc = (acc << 7) | (len & 0x7F) as u32; + len = iter.next().unwrap(); + } + acc += len as u32; + + if pos < acc { + break; + } + pos = pos.saturating_sub(acc); + line += 1; + } + + (line, pos as usize + 1) + } + + pub fn new(input: &str) -> Self { + let bytes = input.as_bytes(); + let (start, simd_mid, end) = bytes.as_simd::<16>(); + + let query = std::simd::u8x16::splat(b'\n'); + + let nl_count = start.iter().map(|&b| (b == b'\n') as usize).sum::() + + simd_mid + .iter() + .map(|s| s.simd_eq(query).to_bitmask().count_ones()) + .sum::() as usize + + end.iter().map(|&b| (b == b'\n') as usize).sum::(); + + let mut lines = Vec::with_capacity(nl_count); + let mut last_nl = 0; + + let handle_rem = |offset: usize, bytes: &[u8], last_nl: &mut usize, lines: &mut Vec| { + bytes + .iter() + .copied() + .enumerate() + .filter_map(|(i, b)| (b == b'\n').then_some(i + offset)) + .for_each(|i| { + lines.push((i - *last_nl + 1) as u8); + *last_nl = i + 1; + }); + }; + + handle_rem(0, start, &mut last_nl, &mut lines); + + for (i, simd) in simd_mid.iter().enumerate() { + let mask = simd.simd_eq(query); + let mut mask = mask.to_bitmask(); + while mask != 0 { + let idx = mask.trailing_zeros() as usize + i * 16 + start.len(); + let mut len = idx - last_nl + 1; + while len >= 0x80 { + lines.push((0x80 | (len & 0x7F)) as u8); + len >>= 7; } - c @ (b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'@') => { - while let Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') = self.peek() { - self.advance(); - } + lines.push(len as u8); + last_nl = idx + 1; + mask &= mask - 1; + } + } - if c == b'@' { - start += 1; - T::Driective - } else { - let ident = &self.bytes[start as usize..self.pos as usize]; - T::from_ident(ident) - } - } - b':' if self.advance_if(b'=') => T::Decl, - b':' => T::Colon, - b',' => T::Comma, - b'.' if self.advance_if(b'{') => T::Ctor, - b'.' if self.advance_if(b'(') => T::Tupl, - b'.' => T::Dot, - b';' => T::Semi, - b'!' if self.advance_if(b'=') => T::Ne, - b'=' if self.advance_if(b'=') => T::Eq, - b'=' => T::Assign, - b'<' if self.advance_if(b'=') => T::Le, - b'<' => T::Lt, - b'>' if self.advance_if(b'=') => T::Ge, - b'>' => T::Gt, - b'+' => T::Add, - b'-' => T::Sub, - b'*' => T::Mul, - b'/' => T::Div, - b'&' => T::Band, - b'(' => T::LParen, - b')' => T::RParen, - b'{' => T::LBrace, - b'}' => T::RBrace, - _ => T::Error, - }; + handle_rem(bytes.len() - end.len(), end, &mut last_nl, &mut lines); - return Some(Token { - kind, - start, - end: self.pos, - }); + Self { + lines: Box::from(lines), } } } #[cfg(test)] mod tests { + fn map_lines(input: &'static str, _: &mut String) { + let line_map = super::LineMap::new(input); + for i in 0..input.len() { + assert_eq!( + line_map.line_col(i as u32), + //line_map.line_col(i as u32), + super::line_col(input.as_bytes(), i as u32) + ); + } + } + + crate::run_tests! { map_lines: + empty_file => ""; + log_line => " ".repeat(1000).leak(); + this_file => &include_str!("parser.rs")[..1000]; + } + fn lex(input: &'static str, output: &mut String) { use { super::{Lexer, TokenKind as T}, diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index bcefba40..3cb9b322 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,4 +1,6 @@ +#![feature(if_let_guard)] #![feature(noop_waker)] +#![feature(portable_simd)] #![feature(iter_collect_into)] #![feature(macro_metavar_expr)] #![feature(let_chains)] @@ -7,11 +9,7 @@ #![feature(slice_ptr_get)] #![allow(dead_code)] -use std::{ - collections::{HashSet, VecDeque}, - io, - sync::{mpsc, Arc, Mutex}, -}; +use std::{collections::VecDeque, sync::Mutex}; #[macro_export] macro_rules! run_tests { diff --git a/hblang/src/main.rs b/hblang/src/main.rs index 5dc28a62..e0177a69 100644 --- a/hblang/src/main.rs +++ b/hblang/src/main.rs @@ -14,13 +14,11 @@ fn main() -> io::Result<()> { .skip(1) .map(|path| std::fs::read_to_string(&path).map(|src| (path, src))) .collect::>>()?; - let mut arena = parser::Arena::default(); + let mut codegen = codegen::Codegen::default(); for (path, content) in files.iter() { - let mut parser = parser::Parser::new(&arena, &mut codegen.symbols, &parser::no_loader); - let file = parser.file(&path, content.as_str()); - codegen.file(path, content.as_bytes(), file); - arena.clear(); + codegen.files = vec![parser::Ast::new(&path, &content, &parser::no_loader)]; + codegen.generate(); } codegen.dump(&mut std::io::stdout()) } diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 1a1b761b..30134db8 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -1,20 +1,17 @@ use std::{ cell::{Cell, UnsafeCell}, - collections::{HashMap, HashSet}, + collections::HashMap, io::{self, Read}, - ops::Not, + ops::{Deref, Not}, path::{Path, PathBuf}, ptr::NonNull, - sync::{ - atomic::{AtomicU32, AtomicUsize}, - Mutex, - }, + sync::{atomic::AtomicUsize, Mutex}, }; use crate::{ codegen::bt, ident::{self, Ident}, - lexer::{Lexer, Token, TokenKind}, + lexer::{Lexer, LineMap, Token, TokenKind}, TaskQueue, }; @@ -22,13 +19,13 @@ pub type Pos = u32; pub type IdentFlags = u32; pub type Symbols = Vec; pub type FileId = u32; -pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> io::Result> + 'a); +pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> io::Result + 'a); -pub const MUTABLE: IdentFlags = 1 << std::mem::size_of::() * 8 - 1; -pub const REFERENCED: IdentFlags = 1 << std::mem::size_of::() * 8 - 2; +pub const MUTABLE: IdentFlags = 1 << (std::mem::size_of::() * 8 - 1); +pub const REFERENCED: IdentFlags = 1 << (std::mem::size_of::() * 8 - 2); const GIT_DEPS_DIR: &str = "git-deps"; -pub fn parse_all(root: &str, threads: usize) -> io::Result> { +pub fn parse_all(threads: usize) -> io::Result> { enum ImportPath<'a> { Root { path: &'a str, @@ -86,9 +83,9 @@ pub fn parse_all(root: &str, threads: usize) -> io::Result> { } impl<'a> ImportPath<'a> { - fn resolve(&self, from: &str, root: &str) -> Result { + fn resolve(&self, from: &str) -> Result { match self { - Self::Root { path } => Ok(PathBuf::from_iter([root, path])), + Self::Root { path } => Ok(Path::new(path).to_owned()), Self::Rel { path } => { let path = PathBuf::from_iter([from, path]); match path.canonicalize() { @@ -98,7 +95,7 @@ pub fn parse_all(root: &str, threads: usize) -> io::Result> { } Self::Git { path, link, .. } => { let link = preprocess_git(link); - Ok(PathBuf::from_iter([root, GIT_DEPS_DIR, link, path])) + Ok(PathBuf::from_iter([GIT_DEPS_DIR, link, path])) } } } @@ -179,17 +176,7 @@ pub fn parse_all(root: &str, threads: usize) -> io::Result> { } } - enum Task { - LoadFile { - id: FileId, - physiscal_path: PathBuf, - }, - FetchGit { - id: FileId, - physiscal_path: PathBuf, - command: std::process::Command, - }, - } + type Task = (FileId, PathBuf, Option); let seen = Mutex::new(HashMap::::new()); let tasks = TaskQueue::::new(threads); @@ -198,14 +185,14 @@ pub fn parse_all(root: &str, threads: usize) -> io::Result> { let loader = |path: &str, from: &str| { let path = ImportPath::try_from(path)?; - let physiscal_path = path.resolve(from, root)?; + let physiscal_path = path.resolve(from)?; let id = { let mut seen = seen.lock().unwrap(); let len = seen.len(); match seen.entry(physiscal_path.clone()) { std::collections::hash_map::Entry::Occupied(entry) => { - return Ok(Some(*entry.get())); + return Ok(*entry.get()); } std::collections::hash_map::Entry::Vacant(entry) => { entry.insert(len as _); @@ -214,91 +201,74 @@ pub fn parse_all(root: &str, threads: usize) -> io::Result> { } }; - if physiscal_path.exists() { - tasks.push(Task::LoadFile { id, physiscal_path }); - return Ok(Some(id)); - } + let command = if !physiscal_path.exists() { + let ImportPath::Git { + link, + branch, + rev, + tag, + .. + } = path + else { + return Err(io::Error::new( + io::ErrorKind::NotFound, + format!("can't find file: {}", physiscal_path.display()), + )); + }; - let ImportPath::Git { - link, - path, - branch, - rev, - tag, - } = path - else { - return Err(io::Error::new( - io::ErrorKind::NotFound, - format!("can't find file: {}", physiscal_path.display()), - )); + let root = PathBuf::from_iter([GIT_DEPS_DIR, preprocess_git(link)]); + + let mut command = std::process::Command::new("git"); + command + .args(["clone", "--depth", "1"]) + .args(branch.map(|b| ["--branch", b]).into_iter().flatten()) + .args(tag.map(|t| ["--tag", t]).into_iter().flatten()) + .args(rev.map(|r| ["--rev", r]).into_iter().flatten()) + .arg(link) + .arg(root); + Some(command) + } else { + None }; - let root = PathBuf::from_iter([root, GIT_DEPS_DIR, preprocess_git(link)]); - - let mut command = std::process::Command::new("git"); - command - .args(["clone", "--depth", "1"]) - .args(branch.map(|b| ["--branch", b]).into_iter().flatten()) - .args(tag.map(|t| ["--tag", t]).into_iter().flatten()) - .args(rev.map(|r| ["--rev", r]).into_iter().flatten()) - .arg(link) - .arg(root); - - tasks.push(Task::FetchGit { - id, - physiscal_path, - command, - }); - - Ok(Some(id)) + tasks.push((id, physiscal_path, command)); + Ok(id) }; - let load_from_path = |path: &Path, buffer: &mut Vec| -> io::Result { - let path = path.to_str().ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidData, - "path contains invalid characters", - ) - })?; - let mut file = std::fs::File::open(&path)?; - file.read_to_end(buffer)?; - let src = std::str::from_utf8(buffer).map_err(InvalidFileData)?; - Ok(Ast::new(&path, src, &loader)) - }; - - let execute_task = |task: Task, buffer: &mut Vec| match task { - Task::LoadFile { id, physiscal_path } => (id, load_from_path(&physiscal_path, buffer)), - Task::FetchGit { - id, - physiscal_path, - mut command, - } => { - let output = match command.output() { - Ok(output) => output, - Err(e) => return (id, Err(e)), - }; + let execute_task = |(_, path, command): Task, buffer: &mut Vec| { + if let Some(mut command) = command { + let output = command.output()?; if !output.status.success() { let msg = format!( "git command failed: {}", String::from_utf8_lossy(&output.stderr) ); - return (id, Err(io::Error::new(io::ErrorKind::Other, msg))); + return Err(io::Error::new(io::ErrorKind::Other, msg)); } - (id, load_from_path(&physiscal_path, buffer)) } + + let path = path.to_str().ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("path contains invalid characters: {}", path.display()), + ) + })?; + let mut file = std::fs::File::open(path)?; + file.read_to_end(buffer)?; + let src = std::str::from_utf8(buffer).map_err(InvalidFileData)?; + Ok(Ast::new(path, src, &loader)) }; let thread = || { let mut buffer = Vec::new(); - while let Some(task) = tasks.pop() { - let (indx, res) = execute_task(task, &mut buffer); + while let Some(task @ (indx, ..)) = tasks.pop() { + let res = execute_task(task, &mut buffer); + buffer.clear(); let mut ast = ast.lock().unwrap(); let len = ast.len().max(indx as usize + 1); ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into())); ast[indx as usize] = res; - - buffer.clear(); } }; @@ -314,10 +284,11 @@ pub fn ident_flag_index(flag: IdentFlags) -> u32 { flag & !(MUTABLE | REFERENCED) } -pub fn no_loader(_: &str, _: &str) -> io::Result> { - Ok(None) +pub fn no_loader(_: &str, _: &str) -> io::Result { + Err(io::ErrorKind::NotFound.into()) } +#[derive(Debug)] pub struct Symbol { pub name: Ident, pub flags: IdentFlags, @@ -490,6 +461,21 @@ impl<'a, 'b> Parser<'a, 'b> { let frame = self.idents.len(); let token = self.next(); let mut expr = match token.kind { + T::Driective if self.lexer.slice(token.range()) == "use" => { + self.expect_advance(TokenKind::LParen); + let str = self.expect_advance(TokenKind::String); + self.expect_advance(TokenKind::RParen); + let path = self.lexer.slice(str.range()).trim_matches('"'); + + E::Mod { + pos: token.start, + path: self.arena.alloc_str(path), + id: match (self.loader)(path, self.path) { + Ok(id) => id, + Err(e) => self.report(format_args!("error loading dependency: {e:#?}")), + }, + } + } T::Driective => E::Directive { pos: token.start, name: self.move_str(token), @@ -562,7 +548,9 @@ impl<'a, 'b> Parser<'a, 'b> { op: token.kind, val: { let expr = self.ptr_unit_expr(); - self.flag_idents(*expr, REFERENCED); + if token.kind == T::Band { + self.flag_idents(*expr, REFERENCED); + } expr }, }, @@ -808,6 +796,11 @@ pub enum Expr<'a> { name: &'a str, args: &'a [Self], }, + Mod { + pos: Pos, + id: FileId, + path: &'a str, + }, } impl<'a> Expr<'a> { @@ -816,6 +809,7 @@ impl<'a> Expr<'a> { Self::Call { func, .. } => func.pos(), Self::Ident { id, .. } => ident::pos(*id), Self::Break { pos } + | Self::Mod { pos, .. } | Self::Directive { pos, .. } | Self::Continue { pos } | Self::Closure { pos, .. } @@ -837,13 +831,13 @@ impl<'a> Expr<'a> { impl<'a> std::fmt::Display for Expr<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { thread_local! { - static INDENT: Cell = Cell::new(0); + static INDENT: Cell = const { Cell::new(0) }; } - fn fmt_list<'a, T>( + fn fmt_list( f: &mut std::fmt::Formatter, end: &str, - list: &'a [T], + list: &[T], fmt: impl Fn(&T, &mut std::fmt::Formatter) -> std::fmt::Result, ) -> std::fmt::Result { let first = &mut true; @@ -881,9 +875,8 @@ impl<'a> std::fmt::Display for Expr<'a> { } match *self { - Self::Field { target, field } => { - write!(f, "{}.{field}", Postfix(target)) - } + Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"), + Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)), Self::Directive { name, args, .. } => { write!(f, "@{name}(")?; fmt_list(f, ")", args, std::fmt::Display::fmt) @@ -980,12 +973,14 @@ impl<'a> std::fmt::Display for Expr<'a> { } #[repr(C)] -struct AstInner { +pub struct AstInner { ref_count: AtomicUsize, mem: ArenaChunk, exprs: *const [Expr<'static>], - path: String, - symbols: T, + + pub path: Box, + pub nlines: LineMap, + pub symbols: T, } impl AstInner<[Symbol]> { @@ -1006,19 +1001,25 @@ impl AstInner<[Symbol]> { let layout = Self::layout(syms.len()); - let ptr = unsafe { std::alloc::alloc(layout) }; - let inner: *mut Self = std::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); unsafe { - *(inner as *mut AstInner<()>) = AstInner { - ref_count: AtomicUsize::new(1), - mem: ArenaChunk::default(), - exprs, - path: path.to_owned(), - symbols: (), - }; + let ptr = std::alloc::alloc(layout); + let inner: *mut Self = std::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); + + std::ptr::write( + inner as *mut AstInner<()>, + AstInner { + ref_count: AtomicUsize::new(1), + mem: arena.chunk.into_inner(), + exprs, + path: path.into(), + nlines: LineMap::new(content), + symbols: (), + }, + ); std::ptr::addr_of_mut!((*inner).symbols) .as_mut_ptr() .copy_from_nonoverlapping(syms.as_ptr(), syms.len()); + NonNull::new_unchecked(inner) } } @@ -1036,17 +1037,44 @@ impl Ast { unsafe { &*self.inner().exprs } } - pub fn symbols(&self) -> &[Symbol] { - &self.inner().symbols - } - - pub fn path(&self) -> &str { - &self.inner().path - } - fn inner(&self) -> &AstInner<[Symbol]> { unsafe { self.0.as_ref() } } + + pub fn find_decl(&self, id: Result) -> Option<&Expr> { + self.exprs().iter().find_map(|expr| match expr { + Expr::BinOp { + left: &Expr::Ident { id: iden, name, .. }, + op: TokenKind::Decl, + .. + } if Ok(iden) == id || Err(name) == id => Some(expr), + _ => None, + }) + } +} + +impl Default for Ast { + fn default() -> Self { + Self(AstInner::new("", "", &no_loader)) + } +} + +#[derive(Clone, Copy)] +#[repr(packed)] +pub struct ExprRef(NonNull>); + +impl ExprRef { + pub fn new(expr: &Expr) -> Self { + Self(NonNull::from(expr).cast()) + } + + pub fn get<'a>(&self, from: &'a Ast) -> Option<&'a Expr<'a>> { + // SAFETY: the pointer is or was a valid reference in the past, if it points within one of + // arenas regions, it muts be walid, since arena does not give invalid pointers to its + // allocations + ArenaChunk::contains(from.mem.base, self.0.as_ptr() as _) + .then(|| unsafe { { self.0 }.as_ref() }) + } } unsafe impl Send for Ast {} @@ -1079,6 +1107,14 @@ impl Drop for Ast { } } +impl Deref for Ast { + type Target = AstInner<[Symbol]>; + + fn deref(&self) -> &Self::Target { + self.inner() + } +} + #[derive(Default)] pub struct Arena<'a> { chunk: UnsafeCell, @@ -1092,6 +1128,10 @@ impl<'a> Arena<'a> { } pub fn alloc(&self, value: T) -> &'a mut T { + if std::mem::size_of::() == 0 { + return unsafe { NonNull::dangling().as_mut() }; + } + let layout = std::alloc::Layout::new::(); let ptr = self.alloc_low(layout); unsafe { ptr.cast::().write(value) }; @@ -1099,6 +1139,10 @@ impl<'a> Arena<'a> { } pub fn alloc_slice(&self, slice: &[T]) -> &'a mut [T] { + if slice.is_empty() || std::mem::size_of::() == 0 { + return &mut []; + } + let layout = std::alloc::Layout::array::(slice.len()).unwrap(); let ptr = self.alloc_low(layout); unsafe { @@ -1109,23 +1153,6 @@ impl<'a> Arena<'a> { unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr() as _, slice.len()) } } - pub fn clear(&mut self) { - let chunk = self.chunk.get_mut(); - if chunk.base.is_null() { - return; - } - - loop { - let prev = ArenaChunk::prev(chunk.base); - if prev.is_null() { - break; - } - chunk.base = prev; - } - - chunk.end = unsafe { chunk.base.add(ArenaChunk::PREV_OFFSET) }; - } - fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull { assert!(layout.align() <= ArenaChunk::ALIGN); assert!(layout.size() <= ArenaChunk::CHUNK_SIZE); @@ -1136,10 +1163,8 @@ impl<'a> Arena<'a> { return ptr; } - if let Some(prev) = ArenaChunk::reset(ArenaChunk::prev(chunk.base)) { - *chunk = prev; - } else { - *chunk = ArenaChunk::new(chunk.base); + unsafe { + std::ptr::write(chunk, ArenaChunk::new(chunk.base)); } chunk.alloc(layout).unwrap() @@ -1164,18 +1189,13 @@ impl ArenaChunk { const CHUNK_SIZE: usize = 1 << 16; const ALIGN: usize = std::mem::align_of::(); const NEXT_OFFSET: usize = Self::CHUNK_SIZE - std::mem::size_of::<*mut u8>(); - const PREV_OFFSET: usize = Self::NEXT_OFFSET - std::mem::size_of::<*mut u8>(); const LAYOUT: std::alloc::Layout = unsafe { std::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; fn new(next: *mut u8) -> Self { let base = unsafe { std::alloc::alloc(Self::LAYOUT) }; - let end = unsafe { base.add(Self::PREV_OFFSET) }; - if !next.is_null() { - Self::set_prev(next, base); - } + let end = unsafe { base.add(Self::NEXT_OFFSET) }; Self::set_next(base, next); - Self::set_prev(base, std::ptr::null_mut()); Self { base, end } } @@ -1183,21 +1203,10 @@ impl ArenaChunk { unsafe { std::ptr::write(curr.add(Self::NEXT_OFFSET) as *mut _, next) }; } - fn set_prev(curr: *mut u8, prev: *mut u8) { - unsafe { std::ptr::write(curr.add(Self::PREV_OFFSET) as *mut _, prev) }; - } - fn next(curr: *mut u8) -> *mut u8 { unsafe { std::ptr::read(curr.add(Self::NEXT_OFFSET) as *mut _) } } - fn prev(curr: *mut u8) -> *mut u8 { - if curr.is_null() { - return std::ptr::null_mut(); - } - unsafe { std::ptr::read(curr.add(Self::PREV_OFFSET) as *mut _) } - } - fn reset(prev: *mut u8) -> Option { if prev.is_null() { return None; @@ -1218,19 +1227,17 @@ impl ArenaChunk { unsafe { self.end = self.end.sub(size) }; unsafe { Some(NonNull::new_unchecked(self.end)) } } + + fn contains(base: *mut u8, arg: *mut u8) -> bool { + !base.is_null() + && ((unsafe { base.add(Self::CHUNK_SIZE) } > arg && base <= arg) + || Self::contains(Self::next(base), arg)) + } } impl Drop for ArenaChunk { fn drop(&mut self) { let mut current = self.base; - - let mut prev = Self::prev(current); - while !prev.is_null() { - let next = Self::prev(prev); - unsafe { std::alloc::dealloc(prev, Self::LAYOUT) }; - prev = next; - } - while !current.is_null() { let next = Self::next(current); unsafe { std::alloc::dealloc(current, Self::LAYOUT) }; @@ -1241,16 +1248,14 @@ impl Drop for ArenaChunk { #[cfg(test)] mod tests { - fn parse(input: &'static str, output: &mut String) { use std::fmt::Write; - let mut arena = super::Arena::default(); + let arena = super::Arena::default(); let mut symbols = Vec::new(); let mut parser = super::Parser::new(&arena, &mut symbols, &super::no_loader); for expr in parser.file(input, "test") { writeln!(output, "{}", expr).unwrap(); } - arena.clear(); } crate::run_tests! { parse: diff --git a/hblang/tests/codegen_tests_directives.txt b/hblang/tests/codegen_tests_directives.txt index 1322855d..544a0859 100644 --- a/hblang/tests/codegen_tests_directives.txt +++ b/hblang/tests/codegen_tests_directives.txt @@ -1,4 +1,4 @@ ev: Ecall -code size: 217 +code size: 204 ret: 0 status: Ok(()) diff --git a/hblang/tests/codegen_tests_fb_driver.txt b/hblang/tests/codegen_tests_fb_driver.txt index 18370656..1722fdef 100644 --- a/hblang/tests/codegen_tests_fb_driver.txt +++ b/hblang/tests/codegen_tests_fb_driver.txt @@ -1,3 +1,3 @@ -code size: 657 +code size: 569 ret: 0 status: Ok(()) diff --git a/hblang/tests/codegen_tests_functions.txt b/hblang/tests/codegen_tests_functions.txt index 31385596..5c507354 100644 --- a/hblang/tests/codegen_tests_functions.txt +++ b/hblang/tests/codegen_tests_functions.txt @@ -1,3 +1,3 @@ -code size: 308 -ret: 44 +code size: 314 +ret: 33 status: Ok(()) diff --git a/hblang/tests/codegen_tests_global_variables.txt b/hblang/tests/codegen_tests_global_variables.txt new file mode 100644 index 00000000..ec0ebe78 --- /dev/null +++ b/hblang/tests/codegen_tests_global_variables.txt @@ -0,0 +1,3 @@ +code size: 305 +ret: 50 +status: Ok(()) diff --git a/hblang/tests/codegen_tests_if_statements.txt b/hblang/tests/codegen_tests_if_statements.txt index d33af68e..a26ad5eb 100644 --- a/hblang/tests/codegen_tests_if_statements.txt +++ b/hblang/tests/codegen_tests_if_statements.txt @@ -1,3 +1,3 @@ -code size: 281 -ret: 16 +code size: 287 +ret: 55 status: Ok(()) diff --git a/hblang/tests/codegen_tests_pointers.txt b/hblang/tests/codegen_tests_pointers.txt index b2c64186..337f642f 100644 --- a/hblang/tests/codegen_tests_pointers.txt +++ b/hblang/tests/codegen_tests_pointers.txt @@ -1,3 +1,3 @@ -code size: 379 +code size: 366 ret: 0 status: Ok(()) diff --git a/hblang/tests/codegen_tests_structs.txt b/hblang/tests/codegen_tests_structs.txt index 00a0fbe4..1141089d 100644 --- a/hblang/tests/codegen_tests_structs.txt +++ b/hblang/tests/codegen_tests_structs.txt @@ -1,3 +1,3 @@ -code size: 461 +code size: 464 ret: 3 status: Ok(()) diff --git a/hbvm/src/mem/mod.rs b/hbvm/src/mem/mod.rs index 7387abee..34c8af6a 100644 --- a/hbvm/src/mem/mod.rs +++ b/hbvm/src/mem/mod.rs @@ -78,3 +78,37 @@ impl From for crate::VmRunError { Self::StoreAccessEx(value.0) } } + +#[derive(Default)] +pub struct HostMemory; +impl Memory for HostMemory { + #[inline] + unsafe fn load( + &mut self, + addr: Address, + target: *mut u8, + count: usize, + ) -> Result<(), LoadError> { + unsafe { core::ptr::copy(addr.get() as *const u8, target, count) } + Ok(()) + } + + #[inline] + unsafe fn store( + &mut self, + addr: Address, + source: *const u8, + count: usize, + ) -> Result<(), StoreError> { + debug_assert!(addr.get() != 0); + debug_assert!(source != core::ptr::null()); + unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) } + Ok(()) + } + + #[inline] + unsafe fn prog_read(&mut self, addr: Address) -> T { + debug_assert!(addr.get() != 0); + unsafe { core::ptr::read(addr.get() as *const T) } + } +} diff --git a/hbxrt/src/main.rs b/hbxrt/src/main.rs index 6f72abec..2c0dde06 100644 --- a/hbxrt/src/main.rs +++ b/hbxrt/src/main.rs @@ -54,7 +54,7 @@ fn main() -> Result<(), Box> { let mut vm = unsafe { Vm::<_, 0>::new( - mem::HostMemory, + hbvm::mem::HostMemory, Address::new(mmap.as_ptr().add(stack.len()) as u64), ) }; diff --git a/hbxrt/src/mem.rs b/hbxrt/src/mem.rs index 323e9254..b5062cba 100644 --- a/hbxrt/src/mem.rs +++ b/hbxrt/src/mem.rs @@ -1,37 +1,5 @@ use std::alloc::Layout; -use hbvm::mem::{Address, LoadError, Memory, StoreError}; - -pub struct HostMemory; -impl Memory for HostMemory { - #[inline] - unsafe fn load( - &mut self, - addr: Address, - target: *mut u8, - count: usize, - ) -> Result<(), LoadError> { - unsafe { core::ptr::copy(addr.get() as *const u8, target, count) } - Ok(()) - } - - #[inline] - unsafe fn store( - &mut self, - addr: Address, - source: *const u8, - count: usize, - ) -> Result<(), StoreError> { - unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) } - Ok(()) - } - - #[inline] - unsafe fn prog_read(&mut self, addr: Address) -> T { - unsafe { core::ptr::read(addr.get() as *const T) } - } -} - const STACK_SIZE: usize = 2; // MiB type Stack = [u8; 1024 * 1024 * STACK_SIZE]; diff --git a/tests/codegen_tests_arithmetic.txt b/tests/codegen_tests_arithmetic.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/codegen_tests_directives.txt b/tests/codegen_tests_directives.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/codegen_tests_example.txt b/tests/codegen_tests_example.txt new file mode 100644 index 00000000..e69de29b