forked from AbleOS/holey-bytes
some more progress on codegen
This commit is contained in:
parent
30ee6c84fc
commit
bcbe47bcd6
|
@ -17,7 +17,7 @@ type OpD = u64;
|
|||
/// Has to be valid to be decoded from bytecode.
|
||||
pub unsafe trait BytecodeItem {}
|
||||
macro_rules! define_items {
|
||||
($($name:ident ($($item:ident),* $(,)?)),* $(,)?) => {
|
||||
($($name:ident ($($nm:ident: $item:ident),* $(,)?)),* $(,)?) => {
|
||||
$(
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(packed)]
|
||||
|
@ -25,13 +25,13 @@ macro_rules! define_items {
|
|||
unsafe impl BytecodeItem for $name {}
|
||||
|
||||
impl Encodable for $name {
|
||||
fn encode(self, buffer: &mut impl Buffer) {
|
||||
let array = unsafe {
|
||||
core::mem::transmute::<Self, [u8; core::mem::size_of::<Self>()]>(self)
|
||||
};
|
||||
for byte in array {
|
||||
unsafe { buffer.write(byte) };
|
||||
}
|
||||
fn encode(self, _buffer: &mut impl Buffer) {
|
||||
let Self($($nm),*) = self;
|
||||
$(
|
||||
for byte in $nm.to_le_bytes() {
|
||||
unsafe { _buffer.write(byte) };
|
||||
}
|
||||
)*
|
||||
}
|
||||
|
||||
fn encode_len(self) -> usize {
|
||||
|
@ -43,26 +43,26 @@ macro_rules! define_items {
|
|||
}
|
||||
|
||||
define_items! {
|
||||
OpsRR (OpR, OpR ),
|
||||
OpsRRR (OpR, OpR, OpR ),
|
||||
OpsRRRR (OpR, OpR, OpR, OpR),
|
||||
OpsRRB (OpR, OpR, OpB ),
|
||||
OpsRRH (OpR, OpR, OpH ),
|
||||
OpsRRW (OpR, OpR, OpW ),
|
||||
OpsRRD (OpR, OpR, OpD ),
|
||||
OpsRB (OpR, OpB ),
|
||||
OpsRH (OpR, OpH ),
|
||||
OpsRW (OpR, OpW ),
|
||||
OpsRD (OpR, OpD ),
|
||||
OpsRRA (OpR, OpR, OpA ),
|
||||
OpsRRAH (OpR, OpR, OpA, OpH),
|
||||
OpsRROH (OpR, OpR, OpO, OpH),
|
||||
OpsRRPH (OpR, OpR, OpP, OpH),
|
||||
OpsRRO (OpR, OpR, OpO ),
|
||||
OpsRRP (OpR, OpR, OpP ),
|
||||
OpsO (OpO, ),
|
||||
OpsP (OpP, ),
|
||||
OpsN ( ),
|
||||
OpsRR (a: OpR, b: OpR ),
|
||||
OpsRRR (a: OpR, b: OpR, c: OpR ),
|
||||
OpsRRRR (a: OpR, b: OpR, c: OpR, d: OpR),
|
||||
OpsRRB (a: OpR, b: OpR, c: OpB ),
|
||||
OpsRRH (a: OpR, b: OpR, c: OpH ),
|
||||
OpsRRW (a: OpR, b: OpR, c: OpW ),
|
||||
OpsRRD (a: OpR, b: OpR, c: OpD ),
|
||||
OpsRB (a: OpR, b: OpB ),
|
||||
OpsRH (a: OpR, b: OpH ),
|
||||
OpsRW (a: OpR, b: OpW ),
|
||||
OpsRD (a: OpR, b: OpD ),
|
||||
OpsRRA (a: OpR, b: OpR, c: OpA ),
|
||||
OpsRRAH (a: OpR, b: OpR, c: OpA, d: OpH),
|
||||
OpsRROH (a: OpR, b: OpR, c: OpO, d: OpH),
|
||||
OpsRRPH (a: OpR, b: OpR, c: OpP, d: OpH),
|
||||
OpsRRO (a: OpR, b: OpR, c: OpO ),
|
||||
OpsRRP (a: OpR, b: OpR, c: OpP ),
|
||||
OpsO (a: OpO, ),
|
||||
OpsP (a: OpP, ),
|
||||
OpsN ( ),
|
||||
}
|
||||
|
||||
unsafe impl BytecodeItem for u8 {}
|
||||
|
|
|
@ -1,40 +1,403 @@
|
|||
use crate::parser::Type;
|
||||
use std::{iter::Cycle, ops::Range};
|
||||
|
||||
use crate::{
|
||||
lexer::Ty,
|
||||
parser::{Exp, Function, Item, Literal, Struct, Type},
|
||||
typechk::Type,
|
||||
};
|
||||
|
||||
//| Register | Description | Saver |
|
||||
//|:-----------|:--------------------|:-------|
|
||||
//| r0 | Hard-wired zero | N/A |
|
||||
//| r1 - r2 | Return values | Caller |
|
||||
//| r2 - r11 | Function parameters | Caller |
|
||||
//| r12 - r30 | General purpose | Caller |
|
||||
//| r31 | Return address | Caller |
|
||||
//| r32 - r253 | General purpose | Callee |
|
||||
//| r254 | Stack pointer | Callee |
|
||||
//| r255 | Thread pointer | N/A |
|
||||
|
||||
struct RegAlloc {
|
||||
pub regs: Box<[Option<usize>; 256]>,
|
||||
pub regs: Box<[Option<usize>; 256]>,
|
||||
pub used: Box<[bool; 256]>,
|
||||
pub spill_cycle: Cycle<Range<usize>>,
|
||||
}
|
||||
|
||||
impl RegAlloc {
|
||||
fn alloc_regurn(&mut self, slot: SlotId) -> Option<Reg> {
|
||||
self.regs[1..2]
|
||||
.iter_mut()
|
||||
.position(|reg| {
|
||||
if reg.is_none() {
|
||||
*reg = Some(slot);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.map(|reg| reg as Reg + 1)
|
||||
}
|
||||
|
||||
fn alloc_general(&mut self, slot: SlotId) -> Option<Reg> {
|
||||
self.regs[32..254]
|
||||
.iter_mut()
|
||||
.zip(&mut self.used[32..254])
|
||||
.position(|(reg, used)| {
|
||||
if reg.is_none() {
|
||||
*reg = Some(slot);
|
||||
*used = true;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.map(|reg| reg as Reg + 32)
|
||||
}
|
||||
|
||||
fn free(&mut self, reg: Reg) {
|
||||
assert!(self.regs[reg as usize].take().is_some());
|
||||
}
|
||||
|
||||
fn spill(&mut self, for_slot: SlotId) -> (Reg, SlotId) {
|
||||
let to_spill = self.spill_cycle.next().unwrap();
|
||||
let slot = self.regs[to_spill].replace(for_slot).unwrap();
|
||||
(to_spill as Reg + 32, slot)
|
||||
}
|
||||
|
||||
fn restore(&mut self, reg: Reg, slot: SlotId) -> SlotId {
|
||||
self.regs[reg as usize].replace(slot).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParamAlloc {
|
||||
reg_range: Range<Reg>,
|
||||
stack: Offset,
|
||||
}
|
||||
|
||||
impl ParamAlloc {
|
||||
fn new(reg_range: Range<Reg>) -> Self {
|
||||
Self {
|
||||
stack: 16,
|
||||
reg_range,
|
||||
}
|
||||
}
|
||||
|
||||
fn alloc(&mut self, mut size: usize) -> Value {
|
||||
match self.try_alloc_regs(size) {
|
||||
Some(reg) => reg,
|
||||
None => panic!("Too many arguments o7"),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_alloc_regs(&mut self, size: usize) -> Option<Value> {
|
||||
let mut needed = size.div_ceil(8);
|
||||
if needed > 2 {
|
||||
needed = 1; // passed by ref
|
||||
}
|
||||
|
||||
if self.reg_range.len() < needed {
|
||||
return None;
|
||||
}
|
||||
|
||||
match needed {
|
||||
1 => {
|
||||
let reg = self.reg_range.start;
|
||||
self.reg_range.start += 1;
|
||||
Some(Value::Reg(reg))
|
||||
}
|
||||
2 => {
|
||||
let reg = self.reg_range.start;
|
||||
self.reg_range.start += 2;
|
||||
Some(Value::Pair(reg, reg + 1))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RegAlloc {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
regs: Box::new([None; 256]),
|
||||
used: Box::new([false; 256]),
|
||||
spill_cycle: (32..254).cycle(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Variable {
|
||||
name: String,
|
||||
location: usize,
|
||||
location: SlotId,
|
||||
}
|
||||
|
||||
enum Symbol {
|
||||
Type(String, Type),
|
||||
Func(String, Vec<Type>, Type),
|
||||
}
|
||||
type SlotId = usize;
|
||||
|
||||
struct Slot {
|
||||
ty: Type,
|
||||
value: Value,
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
struct InstBuffer {
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl InstBuffer {
|
||||
fn new(vec: &mut Vec<u8>) -> &mut Self {
|
||||
unsafe { &mut *(vec as *mut Vec<u8> as *mut Self) }
|
||||
}
|
||||
}
|
||||
|
||||
impl hbbytecode::Buffer for InstBuffer {
|
||||
fn reserve(&mut self, bytes: usize) {
|
||||
self.buffer.reserve(bytes);
|
||||
}
|
||||
|
||||
unsafe fn write(&mut self, byte: u8) {
|
||||
self.buffer.push(byte);
|
||||
}
|
||||
}
|
||||
|
||||
type Reg = u8;
|
||||
type Offset = i32;
|
||||
|
||||
enum Value {
|
||||
Reg(u8),
|
||||
Stack(i32),
|
||||
Pair(Reg, Reg),
|
||||
Reg(Reg),
|
||||
Stack(Offset),
|
||||
Imm(u64),
|
||||
Spilled(Reg, SlotId),
|
||||
DoubleSpilled(SlotId, Offset),
|
||||
}
|
||||
|
||||
type Label = usize;
|
||||
type Data = usize;
|
||||
|
||||
pub struct Generator {
|
||||
regs: RegAlloc,
|
||||
symbols: Vec<Symbol>,
|
||||
variables: Vec<Variable>,
|
||||
slots: Vec<Slot>,
|
||||
relocations: Vec<(Label, usize)>,
|
||||
pub struct LabelReloc {
|
||||
pub label: Label,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
impl Generator {
|
||||
pub fn gen();
|
||||
pub struct DataReloc {
|
||||
pub data: Data,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub struct Frame {
|
||||
pub slot_count: usize,
|
||||
pub var_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Generator<'a> {
|
||||
ast: &'a [Item],
|
||||
|
||||
func_labels: Vec<(String, Label)>,
|
||||
|
||||
regs: RegAlloc,
|
||||
variables: Vec<Variable>,
|
||||
slots: Vec<Slot>,
|
||||
|
||||
labels: Vec<Option<usize>>,
|
||||
label_relocs: Vec<LabelReloc>,
|
||||
|
||||
data: Vec<Option<usize>>,
|
||||
data_relocs: Vec<DataReloc>,
|
||||
|
||||
code_section: Vec<u8>,
|
||||
data_section: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<'a> Generator<'a> {
|
||||
fn generate(mut self) -> Vec<u8> {
|
||||
for item in self.ast {
|
||||
let Item::Function(f) = item else { continue };
|
||||
self.generate_function(f);
|
||||
}
|
||||
|
||||
self.link()
|
||||
}
|
||||
|
||||
fn generate_function(&mut self, f: &Function) {
|
||||
let frame = self.push_frame();
|
||||
|
||||
let mut param_alloc = ParamAlloc::new(2..12);
|
||||
|
||||
for param in f.args.iter() {
|
||||
let param_size = self.size_of(¶m.ty);
|
||||
let slot = self.add_slot(param.ty.clone(), param_alloc.alloc(param_size));
|
||||
self.add_variable(param.name.clone(), slot);
|
||||
}
|
||||
|
||||
for stmt in f.body.iter() {
|
||||
assert!(self
|
||||
.generate_expr(Some(Type::Builtin(Ty::Void)), stmt)
|
||||
.is_none());
|
||||
}
|
||||
|
||||
self.pop_frame(frame);
|
||||
}
|
||||
|
||||
fn generate_expr(&mut self, expected: Option<Type>, expr: &Exp) -> Option<SlotId> {
|
||||
let value = match expr {
|
||||
Exp::Literal(lit) => match lit {
|
||||
Literal::Int(i) => self.add_slot(expected.unwrap(), Value::Imm(*i)),
|
||||
Literal::Bool(b) => self.add_slot(Type::Builtin(Ty::Bool), Value::Imm(*b as u64)),
|
||||
},
|
||||
Exp::Variable(ident) => self.lookup_variable(ident).unwrap().location,
|
||||
Exp::Call { name, args } => todo!(),
|
||||
Exp::Ctor { name, fields } => todo!(),
|
||||
Exp::Index { base, index } => todo!(),
|
||||
Exp::Field { base, field } => todo!(),
|
||||
Exp::Unary { op, exp } => todo!(),
|
||||
Exp::Binary { op, left, right } => todo!(),
|
||||
Exp::If { cond, then, else_ } => todo!(),
|
||||
Exp::Let { name, ty, value } => todo!(),
|
||||
Exp::For {
|
||||
init,
|
||||
cond,
|
||||
step,
|
||||
block,
|
||||
} => todo!(),
|
||||
Exp::Block(_) => todo!(),
|
||||
Exp::Return(_) => todo!(),
|
||||
Exp::Break => todo!(),
|
||||
Exp::Continue => todo!(),
|
||||
};
|
||||
|
||||
if let Some(expected) = expected {
|
||||
let actual = self.slots[value].ty.clone();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
Some(value)
|
||||
}
|
||||
|
||||
fn size_of(&self, ty: &Type) -> usize {
|
||||
match ty {
|
||||
Type::Builtin(ty) => match ty {
|
||||
Ty::U8 | Ty::I8 | Ty::Bool => 1,
|
||||
Ty::U16 | Ty::I16 => 2,
|
||||
Ty::U32 | Ty::I32 => 4,
|
||||
Ty::U64 | Ty::I64 => 8,
|
||||
Ty::Void => 0,
|
||||
},
|
||||
Type::Struct(name) => self
|
||||
.lookup_struct(name)
|
||||
.fields
|
||||
.iter()
|
||||
.map(|field| self.size_of(&field.ty))
|
||||
.sum(),
|
||||
Type::Pinter(_) => 8,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_variable(&mut self, name: String, location: SlotId) {
|
||||
self.variables.push(Variable { name, location });
|
||||
}
|
||||
|
||||
fn add_slot(&mut self, ty: Type, value: Value) -> SlotId {
|
||||
let slot = self.slots.len();
|
||||
self.slots.push(Slot { ty, value });
|
||||
slot
|
||||
}
|
||||
|
||||
fn link(mut self) -> Vec<u8> {
|
||||
for reloc in self.label_relocs {
|
||||
let label = self.labels[reloc.label].unwrap();
|
||||
let offset = reloc.offset;
|
||||
let target = label - offset;
|
||||
let target_bytes = u64::to_le_bytes(target as u64);
|
||||
self.code_section[offset..offset + 8].copy_from_slice(&target_bytes);
|
||||
}
|
||||
|
||||
for reloc in self.data_relocs {
|
||||
let data = self.data[reloc.data].unwrap();
|
||||
let offset = reloc.offset;
|
||||
let target = data;
|
||||
let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64);
|
||||
self.data_section[offset..offset + 8].copy_from_slice(&target_bytes);
|
||||
}
|
||||
|
||||
self.code_section.extend_from_slice(&self.data_section);
|
||||
self.code_section
|
||||
}
|
||||
|
||||
fn lookup_func_label(&mut self, name: &str) -> Label {
|
||||
if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) {
|
||||
return label.1;
|
||||
}
|
||||
|
||||
panic!("Function not found: {}", name);
|
||||
}
|
||||
|
||||
fn declare_label(&mut self) -> Label {
|
||||
self.labels.push(None);
|
||||
self.labels.len() - 1
|
||||
}
|
||||
|
||||
fn define_label(&mut self, label: Label) {
|
||||
self.labels[label] = Some(self.code_section.len());
|
||||
}
|
||||
|
||||
fn declare_data(&mut self) -> Data {
|
||||
self.data.push(None);
|
||||
self.data.len() - 1
|
||||
}
|
||||
|
||||
fn define_data(&mut self, data: Data, bytes: &[u8]) {
|
||||
self.data[data] = Some(self.data.len());
|
||||
self.data_section.extend_from_slice(bytes);
|
||||
}
|
||||
|
||||
fn lookup_struct(&self, name: &str) -> &Struct {
|
||||
self.lookup_item(name)
|
||||
.and_then(|item| match item {
|
||||
Item::Struct(s) => Some(s),
|
||||
_ => panic!("Not a struct: {}", name),
|
||||
})
|
||||
.expect("Struct not found")
|
||||
}
|
||||
|
||||
fn lookup_function(&self, name: &str) -> &Function {
|
||||
self.lookup_item(name)
|
||||
.and_then(|item| match item {
|
||||
Item::Function(f) => Some(f),
|
||||
_ => panic!("Not a function: {}", name),
|
||||
})
|
||||
.expect("Function not found")
|
||||
}
|
||||
|
||||
fn lookup_item(&self, name: &str) -> Option<&Item> {
|
||||
self.ast.iter().find(|item| match item {
|
||||
Item::Import(_) => false,
|
||||
Item::Struct(s) => s.name == name,
|
||||
Item::Function(f) => f.name == name,
|
||||
})
|
||||
}
|
||||
|
||||
fn lookup_variable(&self, name: &str) -> Option<&Variable> {
|
||||
self.variables.iter().find(|variable| variable.name == name)
|
||||
}
|
||||
|
||||
fn push_frame(&mut self) -> Frame {
|
||||
Frame {
|
||||
slot_count: self.slots.len(),
|
||||
var_count: self.variables.len(),
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_frame(&mut self, frame: Frame) {
|
||||
self.slots.truncate(frame.slot_count);
|
||||
self.variables.truncate(frame.var_count);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate(ast: &[Item]) -> Vec<u8> {
|
||||
Generator {
|
||||
ast,
|
||||
..Default::default()
|
||||
}
|
||||
.generate()
|
||||
}
|
||||
|
|
|
@ -144,6 +144,7 @@ gen_token! {
|
|||
},
|
||||
regexes: {
|
||||
Ident = "[a-zA-Z_][a-zA-Z0-9_]*",
|
||||
String = r#""([^"\\]|\\.)*""#,
|
||||
Number = "[0-9]+",
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
|
|
|
@ -13,7 +13,7 @@ pub enum Item {
|
|||
Function(Function),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Type {
|
||||
Builtin(Ty),
|
||||
Struct(String),
|
||||
|
@ -22,6 +22,7 @@ pub enum Type {
|
|||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Struct {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
}
|
||||
|
||||
|
@ -104,7 +105,7 @@ pub enum Exp {
|
|||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Literal {
|
||||
Int(i64),
|
||||
Int(u64),
|
||||
Bool(bool),
|
||||
}
|
||||
|
||||
|
@ -197,6 +198,7 @@ impl<'a> Parser<'a> {
|
|||
match token.kind {
|
||||
TokenKind::Struct => Some(self.parse_struct()),
|
||||
TokenKind::Fn => Some(self.parse_function()),
|
||||
TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)),
|
||||
tkn => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
|
||||
|
@ -301,7 +303,6 @@ impl<'a> Parser<'a> {
|
|||
let value = token.value.parse().unwrap();
|
||||
Exp::Literal(Literal::Int(value))
|
||||
}
|
||||
TokenKind::Fn => todo!(),
|
||||
TokenKind::Let => {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
|
||||
|
@ -332,7 +333,6 @@ impl<'a> Parser<'a> {
|
|||
else_,
|
||||
}
|
||||
}
|
||||
TokenKind::Else => todo!(),
|
||||
TokenKind::For => {
|
||||
let params =
|
||||
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
|
||||
|
@ -385,21 +385,10 @@ impl<'a> Parser<'a> {
|
|||
.then(|| Box::new(self.parse_expr()));
|
||||
Exp::Return(value)
|
||||
}
|
||||
TokenKind::Break => todo!(),
|
||||
TokenKind::Continue => todo!(),
|
||||
TokenKind::Struct => todo!(),
|
||||
TokenKind::RBrace => todo!(),
|
||||
TokenKind::RParen => todo!(),
|
||||
TokenKind::LBracket => todo!(),
|
||||
TokenKind::RBracket => todo!(),
|
||||
TokenKind::Colon => todo!(),
|
||||
TokenKind::Semicolon => todo!(),
|
||||
TokenKind::Comma => todo!(),
|
||||
TokenKind::Op(op) => Exp::Unary {
|
||||
op,
|
||||
exp: Box::new(self.parse_expr()),
|
||||
},
|
||||
TokenKind::Ty(_) => todo!(),
|
||||
TokenKind::Dot => {
|
||||
let token = self.expect_any();
|
||||
match token.kind {
|
||||
|
@ -417,6 +406,25 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
TokenKind::Ty(_)
|
||||
| TokenKind::String
|
||||
| TokenKind::Use
|
||||
| TokenKind::Break
|
||||
| TokenKind::Continue
|
||||
| TokenKind::Struct
|
||||
| TokenKind::RBrace
|
||||
| TokenKind::RParen
|
||||
| TokenKind::LBracket
|
||||
| TokenKind::RBracket
|
||||
| TokenKind::Colon
|
||||
| TokenKind::Semicolon
|
||||
| TokenKind::Comma
|
||||
| TokenKind::Fn
|
||||
| TokenKind::Else => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", token.kind, line, col)
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
|
|
Loading…
Reference in a new issue