some more progress on codegen

This commit is contained in:
mlokr 2024-02-03 21:43:30 +01:00
parent 30ee6c84fc
commit bcbe47bcd6
5 changed files with 434 additions and 60 deletions

View file

@ -17,7 +17,7 @@ type OpD = u64;
/// Has to be valid to be decoded from bytecode.
pub unsafe trait BytecodeItem {}
macro_rules! define_items {
($($name:ident ($($item:ident),* $(,)?)),* $(,)?) => {
($($name:ident ($($nm:ident: $item:ident),* $(,)?)),* $(,)?) => {
$(
#[derive(Clone, Copy, Debug)]
#[repr(packed)]
@ -25,13 +25,13 @@ macro_rules! define_items {
unsafe impl BytecodeItem for $name {}
impl Encodable for $name {
fn encode(self, buffer: &mut impl Buffer) {
let array = unsafe {
core::mem::transmute::<Self, [u8; core::mem::size_of::<Self>()]>(self)
};
for byte in array {
unsafe { buffer.write(byte) };
}
fn encode(self, _buffer: &mut impl Buffer) {
let Self($($nm),*) = self;
$(
for byte in $nm.to_le_bytes() {
unsafe { _buffer.write(byte) };
}
)*
}
fn encode_len(self) -> usize {
@ -43,26 +43,26 @@ macro_rules! define_items {
}
define_items! {
OpsRR (OpR, OpR ),
OpsRRR (OpR, OpR, OpR ),
OpsRRRR (OpR, OpR, OpR, OpR),
OpsRRB (OpR, OpR, OpB ),
OpsRRH (OpR, OpR, OpH ),
OpsRRW (OpR, OpR, OpW ),
OpsRRD (OpR, OpR, OpD ),
OpsRB (OpR, OpB ),
OpsRH (OpR, OpH ),
OpsRW (OpR, OpW ),
OpsRD (OpR, OpD ),
OpsRRA (OpR, OpR, OpA ),
OpsRRAH (OpR, OpR, OpA, OpH),
OpsRROH (OpR, OpR, OpO, OpH),
OpsRRPH (OpR, OpR, OpP, OpH),
OpsRRO (OpR, OpR, OpO ),
OpsRRP (OpR, OpR, OpP ),
OpsO (OpO, ),
OpsP (OpP, ),
OpsN ( ),
OpsRR (a: OpR, b: OpR ),
OpsRRR (a: OpR, b: OpR, c: OpR ),
OpsRRRR (a: OpR, b: OpR, c: OpR, d: OpR),
OpsRRB (a: OpR, b: OpR, c: OpB ),
OpsRRH (a: OpR, b: OpR, c: OpH ),
OpsRRW (a: OpR, b: OpR, c: OpW ),
OpsRRD (a: OpR, b: OpR, c: OpD ),
OpsRB (a: OpR, b: OpB ),
OpsRH (a: OpR, b: OpH ),
OpsRW (a: OpR, b: OpW ),
OpsRD (a: OpR, b: OpD ),
OpsRRA (a: OpR, b: OpR, c: OpA ),
OpsRRAH (a: OpR, b: OpR, c: OpA, d: OpH),
OpsRROH (a: OpR, b: OpR, c: OpO, d: OpH),
OpsRRPH (a: OpR, b: OpR, c: OpP, d: OpH),
OpsRRO (a: OpR, b: OpR, c: OpO ),
OpsRRP (a: OpR, b: OpR, c: OpP ),
OpsO (a: OpO, ),
OpsP (a: OpP, ),
OpsN ( ),
}
unsafe impl BytecodeItem for u8 {}

View file

@ -1,40 +1,403 @@
use crate::parser::Type;
use std::{iter::Cycle, ops::Range};
use crate::{
lexer::Ty,
parser::{Exp, Function, Item, Literal, Struct, Type},
typechk::Type,
};
//| Register | Description | Saver |
//|:-----------|:--------------------|:-------|
//| r0 | Hard-wired zero | N/A |
//| r1 - r2 | Return values | Caller |
//| r2 - r11 | Function parameters | Caller |
//| r12 - r30 | General purpose | Caller |
//| r31 | Return address | Caller |
//| r32 - r253 | General purpose | Callee |
//| r254 | Stack pointer | Callee |
//| r255 | Thread pointer | N/A |
struct RegAlloc {
pub regs: Box<[Option<usize>; 256]>,
pub regs: Box<[Option<usize>; 256]>,
pub used: Box<[bool; 256]>,
pub spill_cycle: Cycle<Range<usize>>,
}
impl RegAlloc {
fn alloc_regurn(&mut self, slot: SlotId) -> Option<Reg> {
self.regs[1..2]
.iter_mut()
.position(|reg| {
if reg.is_none() {
*reg = Some(slot);
true
} else {
false
}
})
.map(|reg| reg as Reg + 1)
}
fn alloc_general(&mut self, slot: SlotId) -> Option<Reg> {
self.regs[32..254]
.iter_mut()
.zip(&mut self.used[32..254])
.position(|(reg, used)| {
if reg.is_none() {
*reg = Some(slot);
*used = true;
true
} else {
false
}
})
.map(|reg| reg as Reg + 32)
}
fn free(&mut self, reg: Reg) {
assert!(self.regs[reg as usize].take().is_some());
}
fn spill(&mut self, for_slot: SlotId) -> (Reg, SlotId) {
let to_spill = self.spill_cycle.next().unwrap();
let slot = self.regs[to_spill].replace(for_slot).unwrap();
(to_spill as Reg + 32, slot)
}
fn restore(&mut self, reg: Reg, slot: SlotId) -> SlotId {
self.regs[reg as usize].replace(slot).unwrap()
}
}
pub struct ParamAlloc {
reg_range: Range<Reg>,
stack: Offset,
}
impl ParamAlloc {
fn new(reg_range: Range<Reg>) -> Self {
Self {
stack: 16,
reg_range,
}
}
fn alloc(&mut self, mut size: usize) -> Value {
match self.try_alloc_regs(size) {
Some(reg) => reg,
None => panic!("Too many arguments o7"),
}
}
fn try_alloc_regs(&mut self, size: usize) -> Option<Value> {
let mut needed = size.div_ceil(8);
if needed > 2 {
needed = 1; // passed by ref
}
if self.reg_range.len() < needed {
return None;
}
match needed {
1 => {
let reg = self.reg_range.start;
self.reg_range.start += 1;
Some(Value::Reg(reg))
}
2 => {
let reg = self.reg_range.start;
self.reg_range.start += 2;
Some(Value::Pair(reg, reg + 1))
}
_ => unreachable!(),
}
}
}
impl Default for RegAlloc {
fn default() -> Self {
Self {
regs: Box::new([None; 256]),
used: Box::new([false; 256]),
spill_cycle: (32..254).cycle(),
}
}
}
struct Variable {
name: String,
location: usize,
location: SlotId,
}
enum Symbol {
Type(String, Type),
Func(String, Vec<Type>, Type),
}
type SlotId = usize;
struct Slot {
ty: Type,
value: Value,
}
#[repr(transparent)]
struct InstBuffer {
buffer: Vec<u8>,
}
impl InstBuffer {
fn new(vec: &mut Vec<u8>) -> &mut Self {
unsafe { &mut *(vec as *mut Vec<u8> as *mut Self) }
}
}
impl hbbytecode::Buffer for InstBuffer {
fn reserve(&mut self, bytes: usize) {
self.buffer.reserve(bytes);
}
unsafe fn write(&mut self, byte: u8) {
self.buffer.push(byte);
}
}
type Reg = u8;
type Offset = i32;
enum Value {
Reg(u8),
Stack(i32),
Pair(Reg, Reg),
Reg(Reg),
Stack(Offset),
Imm(u64),
Spilled(Reg, SlotId),
DoubleSpilled(SlotId, Offset),
}
type Label = usize;
type Data = usize;
pub struct Generator {
regs: RegAlloc,
symbols: Vec<Symbol>,
variables: Vec<Variable>,
slots: Vec<Slot>,
relocations: Vec<(Label, usize)>,
pub struct LabelReloc {
pub label: Label,
pub offset: usize,
}
impl Generator {
pub fn gen();
pub struct DataReloc {
pub data: Data,
pub offset: usize,
}
#[must_use]
pub struct Frame {
pub slot_count: usize,
pub var_count: usize,
}
#[derive(Default)]
pub struct Generator<'a> {
ast: &'a [Item],
func_labels: Vec<(String, Label)>,
regs: RegAlloc,
variables: Vec<Variable>,
slots: Vec<Slot>,
labels: Vec<Option<usize>>,
label_relocs: Vec<LabelReloc>,
data: Vec<Option<usize>>,
data_relocs: Vec<DataReloc>,
code_section: Vec<u8>,
data_section: Vec<u8>,
}
impl<'a> Generator<'a> {
fn generate(mut self) -> Vec<u8> {
for item in self.ast {
let Item::Function(f) = item else { continue };
self.generate_function(f);
}
self.link()
}
fn generate_function(&mut self, f: &Function) {
let frame = self.push_frame();
let mut param_alloc = ParamAlloc::new(2..12);
for param in f.args.iter() {
let param_size = self.size_of(&param.ty);
let slot = self.add_slot(param.ty.clone(), param_alloc.alloc(param_size));
self.add_variable(param.name.clone(), slot);
}
for stmt in f.body.iter() {
assert!(self
.generate_expr(Some(Type::Builtin(Ty::Void)), stmt)
.is_none());
}
self.pop_frame(frame);
}
fn generate_expr(&mut self, expected: Option<Type>, expr: &Exp) -> Option<SlotId> {
let value = match expr {
Exp::Literal(lit) => match lit {
Literal::Int(i) => self.add_slot(expected.unwrap(), Value::Imm(*i)),
Literal::Bool(b) => self.add_slot(Type::Builtin(Ty::Bool), Value::Imm(*b as u64)),
},
Exp::Variable(ident) => self.lookup_variable(ident).unwrap().location,
Exp::Call { name, args } => todo!(),
Exp::Ctor { name, fields } => todo!(),
Exp::Index { base, index } => todo!(),
Exp::Field { base, field } => todo!(),
Exp::Unary { op, exp } => todo!(),
Exp::Binary { op, left, right } => todo!(),
Exp::If { cond, then, else_ } => todo!(),
Exp::Let { name, ty, value } => todo!(),
Exp::For {
init,
cond,
step,
block,
} => todo!(),
Exp::Block(_) => todo!(),
Exp::Return(_) => todo!(),
Exp::Break => todo!(),
Exp::Continue => todo!(),
};
if let Some(expected) = expected {
let actual = self.slots[value].ty.clone();
assert_eq!(expected, actual);
}
Some(value)
}
fn size_of(&self, ty: &Type) -> usize {
match ty {
Type::Builtin(ty) => match ty {
Ty::U8 | Ty::I8 | Ty::Bool => 1,
Ty::U16 | Ty::I16 => 2,
Ty::U32 | Ty::I32 => 4,
Ty::U64 | Ty::I64 => 8,
Ty::Void => 0,
},
Type::Struct(name) => self
.lookup_struct(name)
.fields
.iter()
.map(|field| self.size_of(&field.ty))
.sum(),
Type::Pinter(_) => 8,
}
}
fn add_variable(&mut self, name: String, location: SlotId) {
self.variables.push(Variable { name, location });
}
fn add_slot(&mut self, ty: Type, value: Value) -> SlotId {
let slot = self.slots.len();
self.slots.push(Slot { ty, value });
slot
}
fn link(mut self) -> Vec<u8> {
for reloc in self.label_relocs {
let label = self.labels[reloc.label].unwrap();
let offset = reloc.offset;
let target = label - offset;
let target_bytes = u64::to_le_bytes(target as u64);
self.code_section[offset..offset + 8].copy_from_slice(&target_bytes);
}
for reloc in self.data_relocs {
let data = self.data[reloc.data].unwrap();
let offset = reloc.offset;
let target = data;
let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64);
self.data_section[offset..offset + 8].copy_from_slice(&target_bytes);
}
self.code_section.extend_from_slice(&self.data_section);
self.code_section
}
fn lookup_func_label(&mut self, name: &str) -> Label {
if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) {
return label.1;
}
panic!("Function not found: {}", name);
}
fn declare_label(&mut self) -> Label {
self.labels.push(None);
self.labels.len() - 1
}
fn define_label(&mut self, label: Label) {
self.labels[label] = Some(self.code_section.len());
}
fn declare_data(&mut self) -> Data {
self.data.push(None);
self.data.len() - 1
}
fn define_data(&mut self, data: Data, bytes: &[u8]) {
self.data[data] = Some(self.data.len());
self.data_section.extend_from_slice(bytes);
}
fn lookup_struct(&self, name: &str) -> &Struct {
self.lookup_item(name)
.and_then(|item| match item {
Item::Struct(s) => Some(s),
_ => panic!("Not a struct: {}", name),
})
.expect("Struct not found")
}
fn lookup_function(&self, name: &str) -> &Function {
self.lookup_item(name)
.and_then(|item| match item {
Item::Function(f) => Some(f),
_ => panic!("Not a function: {}", name),
})
.expect("Function not found")
}
fn lookup_item(&self, name: &str) -> Option<&Item> {
self.ast.iter().find(|item| match item {
Item::Import(_) => false,
Item::Struct(s) => s.name == name,
Item::Function(f) => f.name == name,
})
}
fn lookup_variable(&self, name: &str) -> Option<&Variable> {
self.variables.iter().find(|variable| variable.name == name)
}
fn push_frame(&mut self) -> Frame {
Frame {
slot_count: self.slots.len(),
var_count: self.variables.len(),
}
}
fn pop_frame(&mut self, frame: Frame) {
self.slots.truncate(frame.slot_count);
self.variables.truncate(frame.var_count);
}
}
pub fn generate(ast: &[Item]) -> Vec<u8> {
Generator {
ast,
..Default::default()
}
.generate()
}

View file

@ -144,6 +144,7 @@ gen_token! {
},
regexes: {
Ident = "[a-zA-Z_][a-zA-Z0-9_]*",
String = r#""([^"\\]|\\.)*""#,
Number = "[0-9]+",
},
}

View file

@ -1,3 +1,5 @@
#![allow(dead_code)]
mod codegen;
mod lexer;
mod parser;

View file

@ -13,7 +13,7 @@ pub enum Item {
Function(Function),
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Type {
Builtin(Ty),
Struct(String),
@ -22,6 +22,7 @@ pub enum Type {
#[derive(Clone, Debug)]
pub struct Struct {
pub name: String,
pub fields: Vec<Field>,
}
@ -104,7 +105,7 @@ pub enum Exp {
#[derive(Clone, Debug)]
pub enum Literal {
Int(i64),
Int(u64),
Bool(bool),
}
@ -197,6 +198,7 @@ impl<'a> Parser<'a> {
match token.kind {
TokenKind::Struct => Some(self.parse_struct()),
TokenKind::Fn => Some(self.parse_function()),
TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)),
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
@ -301,7 +303,6 @@ impl<'a> Parser<'a> {
let value = token.value.parse().unwrap();
Exp::Literal(Literal::Int(value))
}
TokenKind::Fn => todo!(),
TokenKind::Let => {
let name = self.expect(TokenKind::Ident).value;
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
@ -332,7 +333,6 @@ impl<'a> Parser<'a> {
else_,
}
}
TokenKind::Else => todo!(),
TokenKind::For => {
let params =
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
@ -385,21 +385,10 @@ impl<'a> Parser<'a> {
.then(|| Box::new(self.parse_expr()));
Exp::Return(value)
}
TokenKind::Break => todo!(),
TokenKind::Continue => todo!(),
TokenKind::Struct => todo!(),
TokenKind::RBrace => todo!(),
TokenKind::RParen => todo!(),
TokenKind::LBracket => todo!(),
TokenKind::RBracket => todo!(),
TokenKind::Colon => todo!(),
TokenKind::Semicolon => todo!(),
TokenKind::Comma => todo!(),
TokenKind::Op(op) => Exp::Unary {
op,
exp: Box::new(self.parse_expr()),
},
TokenKind::Ty(_) => todo!(),
TokenKind::Dot => {
let token = self.expect_any();
match token.kind {
@ -417,6 +406,25 @@ impl<'a> Parser<'a> {
}
}
}
TokenKind::Ty(_)
| TokenKind::String
| TokenKind::Use
| TokenKind::Break
| TokenKind::Continue
| TokenKind::Struct
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::LBracket
| TokenKind::RBracket
| TokenKind::Colon
| TokenKind::Semicolon
| TokenKind::Comma
| TokenKind::Fn
| TokenKind::Else => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", token.kind, line, col)
}
};
loop {