From bcbe47bcd67e9340c654a83e4675f8c98945e485 Mon Sep 17 00:00:00 2001 From: mlokr Date: Sat, 3 Feb 2024 21:43:30 +0100 Subject: [PATCH] some more progress on codegen --- hbbytecode/src/lib.rs | 56 +++--- hblang/src/codegen.rs | 397 ++++++++++++++++++++++++++++++++++++++++-- hblang/src/lexer.rs | 1 + hblang/src/lib.rs | 2 + hblang/src/parser.rs | 38 ++-- 5 files changed, 434 insertions(+), 60 deletions(-) diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index 6a91fbcb..a6ccc5ae 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -17,7 +17,7 @@ type OpD = u64; /// Has to be valid to be decoded from bytecode. pub unsafe trait BytecodeItem {} macro_rules! define_items { - ($($name:ident ($($item:ident),* $(,)?)),* $(,)?) => { + ($($name:ident ($($nm:ident: $item:ident),* $(,)?)),* $(,)?) => { $( #[derive(Clone, Copy, Debug)] #[repr(packed)] @@ -25,13 +25,13 @@ macro_rules! define_items { unsafe impl BytecodeItem for $name {} impl Encodable for $name { - fn encode(self, buffer: &mut impl Buffer) { - let array = unsafe { - core::mem::transmute::()]>(self) - }; - for byte in array { - unsafe { buffer.write(byte) }; - } + fn encode(self, _buffer: &mut impl Buffer) { + let Self($($nm),*) = self; + $( + for byte in $nm.to_le_bytes() { + unsafe { _buffer.write(byte) }; + } + )* } fn encode_len(self) -> usize { @@ -43,26 +43,26 @@ macro_rules! define_items { } define_items! { - OpsRR (OpR, OpR ), - OpsRRR (OpR, OpR, OpR ), - OpsRRRR (OpR, OpR, OpR, OpR), - OpsRRB (OpR, OpR, OpB ), - OpsRRH (OpR, OpR, OpH ), - OpsRRW (OpR, OpR, OpW ), - OpsRRD (OpR, OpR, OpD ), - OpsRB (OpR, OpB ), - OpsRH (OpR, OpH ), - OpsRW (OpR, OpW ), - OpsRD (OpR, OpD ), - OpsRRA (OpR, OpR, OpA ), - OpsRRAH (OpR, OpR, OpA, OpH), - OpsRROH (OpR, OpR, OpO, OpH), - OpsRRPH (OpR, OpR, OpP, OpH), - OpsRRO (OpR, OpR, OpO ), - OpsRRP (OpR, OpR, OpP ), - OpsO (OpO, ), - OpsP (OpP, ), - OpsN ( ), + OpsRR (a: OpR, b: OpR ), + OpsRRR (a: OpR, b: OpR, c: OpR ), + OpsRRRR (a: OpR, b: OpR, c: OpR, d: OpR), + OpsRRB (a: OpR, b: OpR, c: OpB ), + OpsRRH (a: OpR, b: OpR, c: OpH ), + OpsRRW (a: OpR, b: OpR, c: OpW ), + OpsRRD (a: OpR, b: OpR, c: OpD ), + OpsRB (a: OpR, b: OpB ), + OpsRH (a: OpR, b: OpH ), + OpsRW (a: OpR, b: OpW ), + OpsRD (a: OpR, b: OpD ), + OpsRRA (a: OpR, b: OpR, c: OpA ), + OpsRRAH (a: OpR, b: OpR, c: OpA, d: OpH), + OpsRROH (a: OpR, b: OpR, c: OpO, d: OpH), + OpsRRPH (a: OpR, b: OpR, c: OpP, d: OpH), + OpsRRO (a: OpR, b: OpR, c: OpO ), + OpsRRP (a: OpR, b: OpR, c: OpP ), + OpsO (a: OpO, ), + OpsP (a: OpP, ), + OpsN ( ), } unsafe impl BytecodeItem for u8 {} diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index 42e5b799..df705440 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -1,40 +1,403 @@ -use crate::parser::Type; +use std::{iter::Cycle, ops::Range}; + +use crate::{ + lexer::Ty, + parser::{Exp, Function, Item, Literal, Struct, Type}, + typechk::Type, +}; + +//| Register | Description | Saver | +//|:-----------|:--------------------|:-------| +//| r0 | Hard-wired zero | N/A | +//| r1 - r2 | Return values | Caller | +//| r2 - r11 | Function parameters | Caller | +//| r12 - r30 | General purpose | Caller | +//| r31 | Return address | Caller | +//| r32 - r253 | General purpose | Callee | +//| r254 | Stack pointer | Callee | +//| r255 | Thread pointer | N/A | struct RegAlloc { - pub regs: Box<[Option; 256]>, + pub regs: Box<[Option; 256]>, + pub used: Box<[bool; 256]>, + pub spill_cycle: Cycle>, +} + +impl RegAlloc { + fn alloc_regurn(&mut self, slot: SlotId) -> Option { + self.regs[1..2] + .iter_mut() + .position(|reg| { + if reg.is_none() { + *reg = Some(slot); + true + } else { + false + } + }) + .map(|reg| reg as Reg + 1) + } + + fn alloc_general(&mut self, slot: SlotId) -> Option { + self.regs[32..254] + .iter_mut() + .zip(&mut self.used[32..254]) + .position(|(reg, used)| { + if reg.is_none() { + *reg = Some(slot); + *used = true; + true + } else { + false + } + }) + .map(|reg| reg as Reg + 32) + } + + fn free(&mut self, reg: Reg) { + assert!(self.regs[reg as usize].take().is_some()); + } + + fn spill(&mut self, for_slot: SlotId) -> (Reg, SlotId) { + let to_spill = self.spill_cycle.next().unwrap(); + let slot = self.regs[to_spill].replace(for_slot).unwrap(); + (to_spill as Reg + 32, slot) + } + + fn restore(&mut self, reg: Reg, slot: SlotId) -> SlotId { + self.regs[reg as usize].replace(slot).unwrap() + } +} + +pub struct ParamAlloc { + reg_range: Range, + stack: Offset, +} + +impl ParamAlloc { + fn new(reg_range: Range) -> Self { + Self { + stack: 16, + reg_range, + } + } + + fn alloc(&mut self, mut size: usize) -> Value { + match self.try_alloc_regs(size) { + Some(reg) => reg, + None => panic!("Too many arguments o7"), + } + } + + fn try_alloc_regs(&mut self, size: usize) -> Option { + let mut needed = size.div_ceil(8); + if needed > 2 { + needed = 1; // passed by ref + } + + if self.reg_range.len() < needed { + return None; + } + + match needed { + 1 => { + let reg = self.reg_range.start; + self.reg_range.start += 1; + Some(Value::Reg(reg)) + } + 2 => { + let reg = self.reg_range.start; + self.reg_range.start += 2; + Some(Value::Pair(reg, reg + 1)) + } + _ => unreachable!(), + } + } +} + +impl Default for RegAlloc { + fn default() -> Self { + Self { + regs: Box::new([None; 256]), + used: Box::new([false; 256]), + spill_cycle: (32..254).cycle(), + } + } } struct Variable { name: String, - location: usize, + location: SlotId, } -enum Symbol { - Type(String, Type), - Func(String, Vec, Type), -} +type SlotId = usize; struct Slot { ty: Type, value: Value, } +#[repr(transparent)] +struct InstBuffer { + buffer: Vec, +} + +impl InstBuffer { + fn new(vec: &mut Vec) -> &mut Self { + unsafe { &mut *(vec as *mut Vec as *mut Self) } + } +} + +impl hbbytecode::Buffer for InstBuffer { + fn reserve(&mut self, bytes: usize) { + self.buffer.reserve(bytes); + } + + unsafe fn write(&mut self, byte: u8) { + self.buffer.push(byte); + } +} + +type Reg = u8; +type Offset = i32; + enum Value { - Reg(u8), - Stack(i32), + Pair(Reg, Reg), + Reg(Reg), + Stack(Offset), Imm(u64), + Spilled(Reg, SlotId), + DoubleSpilled(SlotId, Offset), } type Label = usize; +type Data = usize; -pub struct Generator { - regs: RegAlloc, - symbols: Vec, - variables: Vec, - slots: Vec, - relocations: Vec<(Label, usize)>, +pub struct LabelReloc { + pub label: Label, + pub offset: usize, } -impl Generator { - pub fn gen(); +pub struct DataReloc { + pub data: Data, + pub offset: usize, +} + +#[must_use] +pub struct Frame { + pub slot_count: usize, + pub var_count: usize, +} + +#[derive(Default)] +pub struct Generator<'a> { + ast: &'a [Item], + + func_labels: Vec<(String, Label)>, + + regs: RegAlloc, + variables: Vec, + slots: Vec, + + labels: Vec>, + label_relocs: Vec, + + data: Vec>, + data_relocs: Vec, + + code_section: Vec, + data_section: Vec, +} + +impl<'a> Generator<'a> { + fn generate(mut self) -> Vec { + for item in self.ast { + let Item::Function(f) = item else { continue }; + self.generate_function(f); + } + + self.link() + } + + fn generate_function(&mut self, f: &Function) { + let frame = self.push_frame(); + + let mut param_alloc = ParamAlloc::new(2..12); + + for param in f.args.iter() { + let param_size = self.size_of(¶m.ty); + let slot = self.add_slot(param.ty.clone(), param_alloc.alloc(param_size)); + self.add_variable(param.name.clone(), slot); + } + + for stmt in f.body.iter() { + assert!(self + .generate_expr(Some(Type::Builtin(Ty::Void)), stmt) + .is_none()); + } + + self.pop_frame(frame); + } + + fn generate_expr(&mut self, expected: Option, expr: &Exp) -> Option { + let value = match expr { + Exp::Literal(lit) => match lit { + Literal::Int(i) => self.add_slot(expected.unwrap(), Value::Imm(*i)), + Literal::Bool(b) => self.add_slot(Type::Builtin(Ty::Bool), Value::Imm(*b as u64)), + }, + Exp::Variable(ident) => self.lookup_variable(ident).unwrap().location, + Exp::Call { name, args } => todo!(), + Exp::Ctor { name, fields } => todo!(), + Exp::Index { base, index } => todo!(), + Exp::Field { base, field } => todo!(), + Exp::Unary { op, exp } => todo!(), + Exp::Binary { op, left, right } => todo!(), + Exp::If { cond, then, else_ } => todo!(), + Exp::Let { name, ty, value } => todo!(), + Exp::For { + init, + cond, + step, + block, + } => todo!(), + Exp::Block(_) => todo!(), + Exp::Return(_) => todo!(), + Exp::Break => todo!(), + Exp::Continue => todo!(), + }; + + if let Some(expected) = expected { + let actual = self.slots[value].ty.clone(); + assert_eq!(expected, actual); + } + + Some(value) + } + + fn size_of(&self, ty: &Type) -> usize { + match ty { + Type::Builtin(ty) => match ty { + Ty::U8 | Ty::I8 | Ty::Bool => 1, + Ty::U16 | Ty::I16 => 2, + Ty::U32 | Ty::I32 => 4, + Ty::U64 | Ty::I64 => 8, + Ty::Void => 0, + }, + Type::Struct(name) => self + .lookup_struct(name) + .fields + .iter() + .map(|field| self.size_of(&field.ty)) + .sum(), + Type::Pinter(_) => 8, + } + } + + fn add_variable(&mut self, name: String, location: SlotId) { + self.variables.push(Variable { name, location }); + } + + fn add_slot(&mut self, ty: Type, value: Value) -> SlotId { + let slot = self.slots.len(); + self.slots.push(Slot { ty, value }); + slot + } + + fn link(mut self) -> Vec { + for reloc in self.label_relocs { + let label = self.labels[reloc.label].unwrap(); + let offset = reloc.offset; + let target = label - offset; + let target_bytes = u64::to_le_bytes(target as u64); + self.code_section[offset..offset + 8].copy_from_slice(&target_bytes); + } + + for reloc in self.data_relocs { + let data = self.data[reloc.data].unwrap(); + let offset = reloc.offset; + let target = data; + let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64); + self.data_section[offset..offset + 8].copy_from_slice(&target_bytes); + } + + self.code_section.extend_from_slice(&self.data_section); + self.code_section + } + + fn lookup_func_label(&mut self, name: &str) -> Label { + if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) { + return label.1; + } + + panic!("Function not found: {}", name); + } + + fn declare_label(&mut self) -> Label { + self.labels.push(None); + self.labels.len() - 1 + } + + fn define_label(&mut self, label: Label) { + self.labels[label] = Some(self.code_section.len()); + } + + fn declare_data(&mut self) -> Data { + self.data.push(None); + self.data.len() - 1 + } + + fn define_data(&mut self, data: Data, bytes: &[u8]) { + self.data[data] = Some(self.data.len()); + self.data_section.extend_from_slice(bytes); + } + + fn lookup_struct(&self, name: &str) -> &Struct { + self.lookup_item(name) + .and_then(|item| match item { + Item::Struct(s) => Some(s), + _ => panic!("Not a struct: {}", name), + }) + .expect("Struct not found") + } + + fn lookup_function(&self, name: &str) -> &Function { + self.lookup_item(name) + .and_then(|item| match item { + Item::Function(f) => Some(f), + _ => panic!("Not a function: {}", name), + }) + .expect("Function not found") + } + + fn lookup_item(&self, name: &str) -> Option<&Item> { + self.ast.iter().find(|item| match item { + Item::Import(_) => false, + Item::Struct(s) => s.name == name, + Item::Function(f) => f.name == name, + }) + } + + fn lookup_variable(&self, name: &str) -> Option<&Variable> { + self.variables.iter().find(|variable| variable.name == name) + } + + fn push_frame(&mut self) -> Frame { + Frame { + slot_count: self.slots.len(), + var_count: self.variables.len(), + } + } + + fn pop_frame(&mut self, frame: Frame) { + self.slots.truncate(frame.slot_count); + self.variables.truncate(frame.var_count); + } +} + +pub fn generate(ast: &[Item]) -> Vec { + Generator { + ast, + ..Default::default() + } + .generate() } diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index d04b97c3..11ee2669 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -144,6 +144,7 @@ gen_token! { }, regexes: { Ident = "[a-zA-Z_][a-zA-Z0-9_]*", + String = r#""([^"\\]|\\.)*""#, Number = "[0-9]+", }, } diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 17e1be45..04776791 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + mod codegen; mod lexer; mod parser; diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 799ef5b6..4bb0282a 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -13,7 +13,7 @@ pub enum Item { Function(Function), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum Type { Builtin(Ty), Struct(String), @@ -22,6 +22,7 @@ pub enum Type { #[derive(Clone, Debug)] pub struct Struct { + pub name: String, pub fields: Vec, } @@ -104,7 +105,7 @@ pub enum Exp { #[derive(Clone, Debug)] pub enum Literal { - Int(i64), + Int(u64), Bool(bool), } @@ -197,6 +198,7 @@ impl<'a> Parser<'a> { match token.kind { TokenKind::Struct => Some(self.parse_struct()), TokenKind::Fn => Some(self.parse_function()), + TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)), tkn => { let (line, col) = self.pos_to_line_col(token.span.start); panic!("Unexpected {:?} at {}:{}", tkn, line, col) @@ -301,7 +303,6 @@ impl<'a> Parser<'a> { let value = token.value.parse().unwrap(); Exp::Literal(Literal::Int(value)) } - TokenKind::Fn => todo!(), TokenKind::Let => { let name = self.expect(TokenKind::Ident).value; let ty = self.try_advance(TokenKind::Colon).then(|| self.type_()); @@ -332,7 +333,6 @@ impl<'a> Parser<'a> { else_, } } - TokenKind::Else => todo!(), TokenKind::For => { let params = self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr); @@ -385,21 +385,10 @@ impl<'a> Parser<'a> { .then(|| Box::new(self.parse_expr())); Exp::Return(value) } - TokenKind::Break => todo!(), - TokenKind::Continue => todo!(), - TokenKind::Struct => todo!(), - TokenKind::RBrace => todo!(), - TokenKind::RParen => todo!(), - TokenKind::LBracket => todo!(), - TokenKind::RBracket => todo!(), - TokenKind::Colon => todo!(), - TokenKind::Semicolon => todo!(), - TokenKind::Comma => todo!(), TokenKind::Op(op) => Exp::Unary { op, exp: Box::new(self.parse_expr()), }, - TokenKind::Ty(_) => todo!(), TokenKind::Dot => { let token = self.expect_any(); match token.kind { @@ -417,6 +406,25 @@ impl<'a> Parser<'a> { } } } + + TokenKind::Ty(_) + | TokenKind::String + | TokenKind::Use + | TokenKind::Break + | TokenKind::Continue + | TokenKind::Struct + | TokenKind::RBrace + | TokenKind::RParen + | TokenKind::LBracket + | TokenKind::RBracket + | TokenKind::Colon + | TokenKind::Semicolon + | TokenKind::Comma + | TokenKind::Fn + | TokenKind::Else => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", token.kind, line, col) + } }; loop {