starting from zero again
This commit is contained in:
parent
774735b515
commit
1c08148dc9
6
hblang/Cargo.toml
Normal file
6
hblang/Cargo.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
name = "hblang"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
3
hblang/examples/main_fn.hb
Normal file
3
hblang/examples/main_fn.hb
Normal file
|
@ -0,0 +1,3 @@
|
|||
main := ||: void {
|
||||
return;
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
[package]
|
||||
name = "hblang"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
|
||||
logos = "0.13.0"
|
||||
|
|
@ -1,595 +0,0 @@
|
|||
use std::{iter::Cycle, ops::Range, usize};
|
||||
|
||||
use crate::{
|
||||
lexer::{self, Ty},
|
||||
parser::{Exp, Function, Item, Literal, Struct, Type},
|
||||
};
|
||||
|
||||
type Reg = u8;
|
||||
type Offset = i32;
|
||||
type Pushed = bool;
|
||||
type SlotIndex = usize;
|
||||
type Label = usize;
|
||||
type Data = usize;
|
||||
type Size = usize;
|
||||
|
||||
//| Register | Description | Saver |
|
||||
//|:-----------|:--------------------|:-------|
|
||||
//| r0 | Hard-wired zero | N/A |
|
||||
//| r1 - r2 | Return values | Caller |
|
||||
//| r2 - r11 | Function parameters | Caller |
|
||||
//| r12 - r30 | General purpose | Caller |
|
||||
//| r31 | Return address | Caller |
|
||||
//| r32 - r253 | General purpose | Callee |
|
||||
//| r254 | Stack pointer | Callee |
|
||||
//| r255 | Thread pointer | N/A |
|
||||
|
||||
struct RegAlloc {
|
||||
pub regs: Box<[Option<usize>; 256]>,
|
||||
pub used: Box<[bool; 256]>,
|
||||
pub spill_cycle: Cycle<Range<u8>>,
|
||||
}
|
||||
|
||||
impl RegAlloc {
|
||||
const STACK_POINTER: Reg = 254;
|
||||
const ZERO: Reg = 0;
|
||||
const RETURN_ADDRESS: Reg = 31;
|
||||
|
||||
fn alloc_return(&mut self, slot: usize) -> Option<Reg> {
|
||||
self.regs[1..2]
|
||||
.iter_mut()
|
||||
.position(|reg| {
|
||||
if reg.is_none() {
|
||||
*reg = Some(slot);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.map(|reg| reg as Reg + 1)
|
||||
}
|
||||
|
||||
fn alloc_general(&mut self, slot: usize) -> Option<Reg> {
|
||||
self.regs[32..254]
|
||||
.iter_mut()
|
||||
.zip(&mut self.used[32..254])
|
||||
.position(|(reg, used)| {
|
||||
if reg.is_none() {
|
||||
*reg = Some(slot);
|
||||
*used = true;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.map(|reg| reg as Reg + 32)
|
||||
}
|
||||
|
||||
fn free(&mut self, reg: Reg) {
|
||||
assert!(self.regs[reg as usize].take().is_some());
|
||||
}
|
||||
|
||||
fn is_used(&self, reg: Reg) -> bool {
|
||||
self.regs[reg as usize].is_some()
|
||||
}
|
||||
|
||||
fn spill(&mut self, for_slot: usize) -> (Reg, Option<usize>) {
|
||||
let to_spill = self.spill_cycle.next().unwrap();
|
||||
let slot = self.spill_specific(to_spill, for_slot);
|
||||
(to_spill as Reg + 32, slot)
|
||||
}
|
||||
|
||||
fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option<usize> {
|
||||
self.regs[reg as usize].replace(for_slot)
|
||||
}
|
||||
|
||||
fn restore(&mut self, reg: Reg, slot: usize) -> usize {
|
||||
self.regs[reg as usize].replace(slot).unwrap()
|
||||
}
|
||||
|
||||
fn alloc_specific(&mut self, reg: u8, to: usize) {
|
||||
assert!(self.regs[reg as usize].replace(to).is_none());
|
||||
}
|
||||
|
||||
fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) {
|
||||
match reg {
|
||||
InReg::Single(r) => self.alloc_specific(r, to),
|
||||
InReg::Pair(r1, r2) => {
|
||||
self.alloc_specific(r1, to);
|
||||
self.alloc_specific(r2, to);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParamAlloc {
|
||||
reg_range: Range<Reg>,
|
||||
stack: Offset,
|
||||
}
|
||||
|
||||
impl ParamAlloc {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
stack: 8, // return adress is in callers stack frame
|
||||
reg_range: 2..12,
|
||||
}
|
||||
}
|
||||
|
||||
fn alloc(&mut self, size: usize) -> SlotValue {
|
||||
match self.try_alloc_regs(size) {
|
||||
Some(reg) => reg,
|
||||
None => {
|
||||
let stack = self.stack;
|
||||
self.stack += size as Offset;
|
||||
SlotValue::Stack(stack)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn try_alloc_regs(&mut self, size: usize) -> Option<SlotValue> {
|
||||
let mut needed = size.div_ceil(8);
|
||||
if needed > 2 {
|
||||
needed = 1; // passed by ref
|
||||
}
|
||||
|
||||
if self.reg_range.len() < needed {
|
||||
return None;
|
||||
}
|
||||
|
||||
match needed {
|
||||
1 => {
|
||||
let reg = self.reg_range.start;
|
||||
self.reg_range.start += 1;
|
||||
Some(SlotValue::Reg(InReg::Single(reg)))
|
||||
}
|
||||
2 => {
|
||||
let reg = self.reg_range.start;
|
||||
self.reg_range.start += 2;
|
||||
Some(SlotValue::Reg(InReg::Pair(reg, reg + 1)))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RegAlloc {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
regs: Box::new([None; 256]),
|
||||
used: Box::new([false; 256]),
|
||||
spill_cycle: (32..254).cycle(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Variable {
|
||||
name: String,
|
||||
location: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct SlotId {
|
||||
// index into slot stack
|
||||
index: SlotIndex,
|
||||
// temorary offset carried over when eg. accessing fields
|
||||
offset: Offset,
|
||||
// this means we can mutate the value as part of computation
|
||||
owned: bool,
|
||||
}
|
||||
|
||||
impl SlotId {
|
||||
fn base(location: usize) -> Self {
|
||||
Self {
|
||||
index: location,
|
||||
offset: 0,
|
||||
owned: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn borrowed(self) -> Self {
|
||||
Self {
|
||||
owned: false,
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Slot {
|
||||
ty: Type,
|
||||
value: SlotValue,
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
struct InstBuffer {
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl InstBuffer {
|
||||
fn new(vec: &mut Vec<u8>) -> &mut Self {
|
||||
unsafe { &mut *(vec as *mut Vec<u8> as *mut Self) }
|
||||
}
|
||||
}
|
||||
|
||||
impl hbbytecode::Buffer for InstBuffer {
|
||||
fn reserve(&mut self, bytes: usize) {
|
||||
self.buffer.reserve(bytes);
|
||||
}
|
||||
|
||||
unsafe fn write(&mut self, byte: u8) {
|
||||
self.buffer.push(byte);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum InReg {
|
||||
Single(Reg),
|
||||
// if one of the registes is allocated, the other is too, ALWAYS
|
||||
// with the same slot
|
||||
Pair(Reg, Reg),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum Spill {
|
||||
Reg(InReg),
|
||||
Stack(Offset), // relative to frame end (rsp if nothing was pushed)
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum SlotValue {
|
||||
Reg(InReg),
|
||||
Stack(Offset), // relative to frame start (rbp)
|
||||
Imm(u64),
|
||||
Spilled(Spill, SlotIndex),
|
||||
}
|
||||
|
||||
pub struct Value {
|
||||
store: ValueStore,
|
||||
offset: Offset,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum ValueStore {
|
||||
Reg(InReg),
|
||||
Stack(Offset, Pushed),
|
||||
Imm(u64),
|
||||
}
|
||||
|
||||
impl From<SlotValue> for ValueStore {
|
||||
fn from(value: SlotValue) -> Self {
|
||||
match value {
|
||||
SlotValue::Reg(reg) => ValueStore::Reg(reg),
|
||||
SlotValue::Stack(offset) => ValueStore::Stack(offset, false),
|
||||
SlotValue::Imm(imm) => ValueStore::Imm(imm),
|
||||
SlotValue::Spilled(spill, _) => match spill {
|
||||
Spill::Reg(reg) => ValueStore::Reg(reg),
|
||||
Spill::Stack(offset) => ValueStore::Stack(offset, true),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LabelReloc {
|
||||
pub label: Label,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
pub struct DataReloc {
|
||||
pub data: Data,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub struct Frame {
|
||||
pub slot_count: usize,
|
||||
pub var_count: usize,
|
||||
}
|
||||
|
||||
enum Instr {
|
||||
BinOp(lexer::Op, Value, Value),
|
||||
Move(Size, Value, Value),
|
||||
Push(Reg),
|
||||
Jump(Label),
|
||||
Call(String),
|
||||
JumpIfZero(Value, Label),
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Generator<'a> {
|
||||
ast: &'a [Item],
|
||||
|
||||
func_labels: Vec<(String, Label)>,
|
||||
|
||||
stack_size: Offset,
|
||||
pushed_size: Offset,
|
||||
|
||||
regs: RegAlloc,
|
||||
variables: Vec<Variable>,
|
||||
slots: Vec<Slot>,
|
||||
|
||||
labels: Vec<Option<usize>>,
|
||||
label_relocs: Vec<LabelReloc>,
|
||||
|
||||
data: Vec<Option<usize>>,
|
||||
data_relocs: Vec<DataReloc>,
|
||||
|
||||
code_section: Vec<u8>,
|
||||
data_section: Vec<u8>,
|
||||
|
||||
instrs: Vec<Instr>,
|
||||
}
|
||||
|
||||
impl<'a> Generator<'a> {
|
||||
fn generate(mut self) -> Vec<u8> {
|
||||
for item in self.ast {
|
||||
let Item::Function(f) = item else { continue };
|
||||
self.generate_function(f);
|
||||
}
|
||||
|
||||
self.link()
|
||||
}
|
||||
|
||||
fn generate_function(&mut self, f: &Function) {
|
||||
let frame = self.push_frame();
|
||||
|
||||
let mut param_alloc = ParamAlloc::new();
|
||||
|
||||
for param in f.args.iter() {
|
||||
let param_size = self.size_of(¶m.ty);
|
||||
let value = param_alloc.alloc(param_size);
|
||||
let slot = self.add_slot(param.ty.clone(), value);
|
||||
if let SlotValue::Reg(reg) = value {
|
||||
self.regs.alloc_specific_in_reg(reg, slot);
|
||||
}
|
||||
self.add_variable(param.name.clone(), slot);
|
||||
}
|
||||
|
||||
for stmt in f.body.iter() {
|
||||
assert!(self
|
||||
.generate_expr(Some(Type::Builtin(Ty::Void)), stmt)
|
||||
.is_none());
|
||||
}
|
||||
|
||||
self.pop_frame(frame);
|
||||
}
|
||||
|
||||
fn generate_expr(&mut self, expected: Option<Type>, expr: &Exp) -> Option<SlotId> {
|
||||
let value = match expr {
|
||||
Exp::Literal(lit) => SlotId::base(match lit {
|
||||
Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)),
|
||||
Literal::Bool(b) => {
|
||||
self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64))
|
||||
}
|
||||
}),
|
||||
Exp::Variable(ident) => {
|
||||
SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed()
|
||||
}
|
||||
Exp::Call { name, args } => self.generate_call(expected.clone(), name, args),
|
||||
Exp::Ctor { name, fields } => todo!(),
|
||||
Exp::Index { base, index } => todo!(),
|
||||
Exp::Field { base, field } => todo!(),
|
||||
Exp::Unary { op, exp } => todo!(),
|
||||
Exp::Binary { op, left, right } => todo!(),
|
||||
Exp::If { cond, then, else_ } => todo!(),
|
||||
Exp::Let { name, ty, value } => todo!(),
|
||||
Exp::For {
|
||||
init,
|
||||
cond,
|
||||
step,
|
||||
block,
|
||||
} => todo!(),
|
||||
Exp::Block(_) => todo!(),
|
||||
Exp::Return(_) => todo!(),
|
||||
Exp::Break => todo!(),
|
||||
Exp::Continue => todo!(),
|
||||
};
|
||||
|
||||
if let Some(expected) = expected {
|
||||
let actual = self.slots[value.index].ty.clone();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
Some(value)
|
||||
}
|
||||
|
||||
fn generate_call(&mut self, expected: Option<Type>, name: &str, args: &[Exp]) -> SlotId {
|
||||
let frame = self.push_frame();
|
||||
let func = self.lookup_function(name);
|
||||
|
||||
let mut arg_alloc = ParamAlloc::new();
|
||||
for (arg, param) in args.iter().zip(&func.args) {
|
||||
let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap();
|
||||
let arg_size = self.size_of(¶m.ty);
|
||||
let param_slot = arg_alloc.alloc(arg_size);
|
||||
self.set_temporarly(arg_slot, param_slot);
|
||||
}
|
||||
|
||||
self.instrs.push(Instr::Call(name.to_owned()));
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn set_temporarly(&mut self, from: SlotId, to: SlotValue) {
|
||||
let to = self.make_mutable(to, from.index);
|
||||
let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to);
|
||||
self.emit_move(from, SlotId::base(to_slot));
|
||||
}
|
||||
|
||||
fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue {
|
||||
match target {
|
||||
SlotValue::Reg(in_reg) => {
|
||||
self.regs.alloc_specific_in_reg(in_reg, by);
|
||||
target
|
||||
}
|
||||
SlotValue::Spilled(Spill::Reg(in_reg), slot) => {
|
||||
let new_val = SlotValue::Spilled(
|
||||
match in_reg {
|
||||
InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)),
|
||||
InReg::Pair(r1, r2) => {
|
||||
self.emmit_push(r2);
|
||||
Spill::Stack(self.emmit_push(r1))
|
||||
}
|
||||
},
|
||||
slot,
|
||||
);
|
||||
let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val);
|
||||
SlotValue::Spilled(Spill::Reg(in_reg), new_slot)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn emmit_push(&mut self, reg: Reg) -> Offset {
|
||||
self.pushed_size += 8;
|
||||
self.instrs.push(Instr::Push(reg));
|
||||
self.pushed_size
|
||||
}
|
||||
|
||||
fn emit_move(&mut self, from: SlotId, to: SlotId) {
|
||||
let size = self.size_of(&self.slots[from.index].ty);
|
||||
let other_size = self.size_of(&self.slots[to.index].ty);
|
||||
assert_eq!(size, other_size);
|
||||
|
||||
self.instrs.push(Instr::Move(
|
||||
size,
|
||||
self.slot_to_value(from),
|
||||
self.slot_to_value(to),
|
||||
));
|
||||
}
|
||||
|
||||
fn slot_to_value(&self, slot: SlotId) -> Value {
|
||||
let slot_val = &self.slots[slot.index];
|
||||
Value {
|
||||
store: slot_val.value.into(),
|
||||
offset: slot.offset,
|
||||
}
|
||||
}
|
||||
|
||||
fn size_of(&self, ty: &Type) -> Size {
|
||||
match ty {
|
||||
Type::Builtin(ty) => match ty {
|
||||
Ty::U8 | Ty::I8 | Ty::Bool => 1,
|
||||
Ty::U16 | Ty::I16 => 2,
|
||||
Ty::U32 | Ty::I32 => 4,
|
||||
Ty::U64 | Ty::I64 => 8,
|
||||
Ty::Void => 0,
|
||||
},
|
||||
Type::Struct(name) => self
|
||||
.lookup_struct(name)
|
||||
.fields
|
||||
.iter()
|
||||
.map(|field| self.size_of(&field.ty))
|
||||
.sum(),
|
||||
Type::Pinter(_) => 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Generator<'a> {
|
||||
fn add_variable(&mut self, name: String, location: usize) {
|
||||
self.variables.push(Variable { name, location });
|
||||
}
|
||||
|
||||
fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize {
|
||||
let slot = self.slots.len();
|
||||
self.slots.push(Slot { ty, value });
|
||||
slot
|
||||
}
|
||||
|
||||
fn link(mut self) -> Vec<u8> {
|
||||
for reloc in self.label_relocs {
|
||||
let label = self.labels[reloc.label].unwrap();
|
||||
let offset = reloc.offset;
|
||||
let target = label - offset;
|
||||
let target_bytes = u64::to_le_bytes(target as u64);
|
||||
self.code_section[offset..offset + 8].copy_from_slice(&target_bytes);
|
||||
}
|
||||
|
||||
for reloc in self.data_relocs {
|
||||
let data = self.data[reloc.data].unwrap();
|
||||
let offset = reloc.offset;
|
||||
let target = data;
|
||||
let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64);
|
||||
self.data_section[offset..offset + 8].copy_from_slice(&target_bytes);
|
||||
}
|
||||
|
||||
self.code_section.extend_from_slice(&self.data_section);
|
||||
self.code_section
|
||||
}
|
||||
|
||||
fn lookup_func_label(&mut self, name: &str) -> Label {
|
||||
if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) {
|
||||
return label.1;
|
||||
}
|
||||
|
||||
panic!("Function not found: {}", name);
|
||||
}
|
||||
|
||||
fn declare_label(&mut self) -> Label {
|
||||
self.labels.push(None);
|
||||
self.labels.len() - 1
|
||||
}
|
||||
|
||||
fn define_label(&mut self, label: Label) {
|
||||
self.labels[label] = Some(self.code_section.len());
|
||||
}
|
||||
|
||||
fn declare_data(&mut self) -> Data {
|
||||
self.data.push(None);
|
||||
self.data.len() - 1
|
||||
}
|
||||
|
||||
fn define_data(&mut self, data: Data, bytes: &[u8]) {
|
||||
self.data[data] = Some(self.data.len());
|
||||
self.data_section.extend_from_slice(bytes);
|
||||
}
|
||||
|
||||
fn lookup_struct(&self, name: &str) -> &Struct {
|
||||
self.lookup_item(name)
|
||||
.map(|item| match item {
|
||||
Item::Struct(s) => s,
|
||||
_ => panic!("Not a struct: {}", name),
|
||||
})
|
||||
.expect("Struct not found")
|
||||
}
|
||||
|
||||
fn lookup_function(&self, name: &str) -> &'a Function {
|
||||
self.lookup_item(name)
|
||||
.map(|item| match item {
|
||||
Item::Function(f) => f,
|
||||
_ => panic!("Not a function: {}", name),
|
||||
})
|
||||
.expect("Function not found")
|
||||
}
|
||||
|
||||
fn lookup_item(&self, name: &str) -> Option<&'a Item> {
|
||||
self.ast.iter().find(|item| match item {
|
||||
Item::Import(_) => false,
|
||||
Item::Struct(s) => s.name == name,
|
||||
Item::Function(f) => f.name == name,
|
||||
})
|
||||
}
|
||||
|
||||
fn lookup_variable(&self, name: &str) -> Option<&Variable> {
|
||||
self.variables.iter().find(|variable| variable.name == name)
|
||||
}
|
||||
|
||||
fn push_frame(&mut self) -> Frame {
|
||||
Frame {
|
||||
slot_count: self.slots.len(),
|
||||
var_count: self.variables.len(),
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_frame(&mut self, frame: Frame) {
|
||||
self.slots.truncate(frame.slot_count);
|
||||
self.variables.truncate(frame.var_count);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate(ast: &[Item]) -> Vec<u8> {
|
||||
Generator {
|
||||
ast,
|
||||
..Default::default()
|
||||
}
|
||||
.generate()
|
||||
}
|
|
@ -1,151 +0,0 @@
|
|||
use logos::Logos;
|
||||
|
||||
macro_rules! gen_token {
|
||||
($name:ident {
|
||||
keywords: {
|
||||
$($keyword:ident = $lit:literal,)*
|
||||
},
|
||||
operators: $op_name:ident {
|
||||
$($prec:literal: {$(
|
||||
$op:ident = $op_lit:literal,
|
||||
)*},)*
|
||||
},
|
||||
types: $ty_type:ident {
|
||||
$($ty:ident = $ty_lit:literal,)*
|
||||
},
|
||||
regexes: {
|
||||
$($regex:ident = $regex_lit:literal,)*
|
||||
},
|
||||
}) => {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy, Logos)]
|
||||
#[logos(skip "[ \t\n]+")]
|
||||
pub enum $name {
|
||||
$(#[token($lit)] $keyword,)*
|
||||
$($(#[token($op_lit, |_| $op_name::$op)])*)*
|
||||
Op($op_name),
|
||||
$(#[token($ty_lit, |_| $ty_type::$ty)])*
|
||||
Ty($ty_type),
|
||||
$(#[regex($regex_lit)] $regex,)*
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum $op_name {
|
||||
$($($op,)*)*
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum $ty_type {
|
||||
$($ty,)*
|
||||
}
|
||||
|
||||
impl $op_name {
|
||||
pub fn prec(&self) -> u8 {
|
||||
match self {
|
||||
$($($op_name::$op => $prec,)*)*
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
gen_token! {
|
||||
TokenKind {
|
||||
keywords: {
|
||||
Use = "use",
|
||||
Fn = "fn",
|
||||
Let = "let",
|
||||
If = "if",
|
||||
Else = "else",
|
||||
For = "for",
|
||||
Return = "return",
|
||||
Break = "break",
|
||||
Continue = "continue",
|
||||
Struct = "struct",
|
||||
|
||||
True = "true",
|
||||
False = "false",
|
||||
|
||||
LBrace = "{",
|
||||
RBrace = "}",
|
||||
LParen = "(",
|
||||
RParen = ")",
|
||||
LBracket = "[",
|
||||
RBracket = "]",
|
||||
|
||||
Colon = ":",
|
||||
Semicolon = ";",
|
||||
Comma = ",",
|
||||
Dot = ".",
|
||||
},
|
||||
operators: Op {
|
||||
14: {
|
||||
Assign = "=",
|
||||
AddAssign = "+=",
|
||||
SubAssign = "-=",
|
||||
MulAssign = "*=",
|
||||
DivAssign = "/=",
|
||||
ModAssign = "%=",
|
||||
AndAssign = "&=",
|
||||
OrAssign = "|=",
|
||||
XorAssign = "^=",
|
||||
ShlAssign = "<<=",
|
||||
ShrAssign = ">>=",
|
||||
},
|
||||
12: {
|
||||
Or = "||",
|
||||
},
|
||||
11: {
|
||||
And = "&&",
|
||||
},
|
||||
10: {
|
||||
Bor = "|",
|
||||
},
|
||||
9: {
|
||||
Xor = "^",
|
||||
},
|
||||
8: {
|
||||
Band = "&",
|
||||
},
|
||||
7: {
|
||||
Eq = "==",
|
||||
Neq = "!=",
|
||||
},
|
||||
6: {
|
||||
Lt = "<",
|
||||
Gt = ">",
|
||||
Le = "<=",
|
||||
Ge = ">=",
|
||||
},
|
||||
5: {
|
||||
Shl = "<<",
|
||||
Shr = ">>",
|
||||
},
|
||||
4: {
|
||||
Add = "+",
|
||||
Sub = "-",
|
||||
},
|
||||
3: {
|
||||
Mul = "*",
|
||||
Div = "/",
|
||||
Mod = "%",
|
||||
},
|
||||
},
|
||||
types: Ty {
|
||||
U8 = "u8",
|
||||
U16 = "u16",
|
||||
U32 = "u32",
|
||||
U64 = "u64",
|
||||
I8 = "i8",
|
||||
I16 = "i16",
|
||||
I32 = "i32",
|
||||
I64 = "i64",
|
||||
Bool = "bool",
|
||||
Void = "void",
|
||||
},
|
||||
regexes: {
|
||||
Ident = "[a-zA-Z_][a-zA-Z0-9_]*",
|
||||
String = r#""([^"\\]|\\.)*""#,
|
||||
Number = "[0-9]+",
|
||||
},
|
||||
}
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod typechk;
|
|
@ -1,566 +0,0 @@
|
|||
use {core::panic, std::iter};
|
||||
|
||||
use std::array;
|
||||
|
||||
use logos::{Lexer, Logos};
|
||||
|
||||
use crate::lexer::{Op, TokenKind, Ty};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Item {
|
||||
Import(String),
|
||||
Struct(Struct),
|
||||
Function(Function),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Type {
|
||||
Builtin(Ty),
|
||||
Struct(String),
|
||||
Pinter(Box<Type>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Struct {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Field {
|
||||
pub name: String,
|
||||
pub ty: Type,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Function {
|
||||
pub name: String,
|
||||
pub args: Vec<Arg>,
|
||||
pub ret: Type,
|
||||
pub body: Vec<Exp>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Arg {
|
||||
pub name: String,
|
||||
pub ty: Type,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CtorField {
|
||||
pub name: String,
|
||||
pub value: Exp,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Exp {
|
||||
Literal(Literal),
|
||||
Variable(String),
|
||||
Call {
|
||||
name: String,
|
||||
args: Vec<Exp>,
|
||||
},
|
||||
Ctor {
|
||||
name: Option<Box<Exp>>,
|
||||
fields: Vec<CtorField>,
|
||||
},
|
||||
Index {
|
||||
base: Box<Exp>,
|
||||
index: Box<Exp>,
|
||||
},
|
||||
Field {
|
||||
base: Box<Exp>,
|
||||
field: String,
|
||||
},
|
||||
Unary {
|
||||
op: Op,
|
||||
exp: Box<Exp>,
|
||||
},
|
||||
Binary {
|
||||
op: Op,
|
||||
left: Box<Exp>,
|
||||
right: Box<Exp>,
|
||||
},
|
||||
If {
|
||||
cond: Box<Exp>,
|
||||
then: Box<Exp>,
|
||||
else_: Option<Box<Exp>>,
|
||||
},
|
||||
Let {
|
||||
name: String,
|
||||
ty: Option<Type>,
|
||||
value: Box<Exp>,
|
||||
},
|
||||
For {
|
||||
init: Option<Box<Exp>>,
|
||||
cond: Option<Box<Exp>>,
|
||||
step: Option<Box<Exp>>,
|
||||
block: Box<Exp>,
|
||||
},
|
||||
Block(Vec<Exp>),
|
||||
Return(Option<Box<Exp>>),
|
||||
Break,
|
||||
Continue,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Literal {
|
||||
Int(u64),
|
||||
Bool(bool),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub span: std::ops::Range<usize>,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
struct Parser<'a> {
|
||||
next_token: Option<Token>,
|
||||
lexer: logos::Lexer<'a, TokenKind>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(input: &'a str) -> Self {
|
||||
let mut lexer = TokenKind::lexer(input);
|
||||
let next_token = Self::next_token(&mut lexer);
|
||||
Self { next_token, lexer }
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<Token> {
|
||||
let token = self.next_token.clone();
|
||||
self.next_token = Self::next_token(&mut self.lexer);
|
||||
token
|
||||
}
|
||||
|
||||
pub fn next_token(lexer: &mut Lexer<TokenKind>) -> Option<Token> {
|
||||
lexer.next().map(|r| {
|
||||
r.map(|e| Token {
|
||||
kind: e,
|
||||
span: lexer.span(),
|
||||
value: lexer.slice().to_owned(),
|
||||
})
|
||||
.unwrap_or_else(|e| {
|
||||
let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start);
|
||||
println!("Lexer error: {}:{}: {:?}", line, col, e);
|
||||
std::process::exit(1);
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) {
|
||||
Self::pos_to_line_col_low(self.lexer.source(), pos)
|
||||
}
|
||||
|
||||
pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) {
|
||||
let line = source[..pos].lines().count();
|
||||
let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0);
|
||||
(line, col)
|
||||
}
|
||||
|
||||
pub fn expect(&mut self, kind: TokenKind) -> Token {
|
||||
let token = self.expect_any();
|
||||
if token.kind == kind {
|
||||
token
|
||||
} else {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!(
|
||||
"Expected {:?} at {}:{}, found {:?}",
|
||||
kind, line, col, token.kind
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect_any(&mut self) -> Token {
|
||||
self.next().unwrap_or_else(|| panic!("Unexpected EOF"))
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<&Token> {
|
||||
self.next_token.as_ref()
|
||||
}
|
||||
|
||||
pub fn try_advance(&mut self, kind: TokenKind) -> bool {
|
||||
if self.peek().is_some_and(|t| t.kind == kind) {
|
||||
self.next();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Vec<Item> {
|
||||
iter::from_fn(|| self.parse_item()).collect()
|
||||
}
|
||||
|
||||
fn parse_item(&mut self) -> Option<Item> {
|
||||
let token = self.next()?;
|
||||
match token.kind {
|
||||
TokenKind::Struct => Some(self.parse_struct()),
|
||||
TokenKind::Fn => Some(self.parse_function()),
|
||||
TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)),
|
||||
tkn => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_struct(&mut self) -> Item {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
self.expect(TokenKind::LBrace);
|
||||
let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field);
|
||||
Item::Struct(Struct { name, fields })
|
||||
}
|
||||
|
||||
fn parse_field(&mut self) -> Field {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
self.expect(TokenKind::Colon);
|
||||
let ty = self.type_();
|
||||
|
||||
Field { name, ty }
|
||||
}
|
||||
|
||||
fn type_(&mut self) -> Type {
|
||||
let token = self.next().unwrap();
|
||||
match token.kind {
|
||||
TokenKind::Ty(ty) => Type::Builtin(ty),
|
||||
TokenKind::Ident => Type::Struct(token.value),
|
||||
TokenKind::Op(Op::Band) => {
|
||||
let ty = self.type_();
|
||||
Type::Pinter(Box::new(ty))
|
||||
}
|
||||
tkn => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_function(&mut self) -> Item {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
self.expect(TokenKind::LParen);
|
||||
let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg);
|
||||
self.expect(TokenKind::Colon);
|
||||
let ret = self.type_();
|
||||
Item::Function(Function {
|
||||
name,
|
||||
args,
|
||||
ret,
|
||||
body: self.parse_block(),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_arg(&mut self) -> Arg {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
self.expect(TokenKind::Colon);
|
||||
let ty = self.type_();
|
||||
self.try_advance(TokenKind::Comma);
|
||||
Arg { name, ty }
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self) -> Exp {
|
||||
self.parse_binary_expr(255)
|
||||
}
|
||||
|
||||
fn parse_binary_expr(&mut self, min_prec: u8) -> Exp {
|
||||
let mut lhs = self.parse_unit_expr();
|
||||
|
||||
while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) {
|
||||
let prec = op.prec();
|
||||
if prec > min_prec {
|
||||
break;
|
||||
}
|
||||
|
||||
self.next();
|
||||
let rhs = self.parse_binary_expr(prec);
|
||||
|
||||
lhs = Exp::Binary {
|
||||
op,
|
||||
left: Box::new(lhs),
|
||||
right: Box::new(rhs),
|
||||
};
|
||||
}
|
||||
|
||||
lhs
|
||||
}
|
||||
|
||||
fn parse_unit_expr(&mut self) -> Exp {
|
||||
let token = self.next().unwrap();
|
||||
let mut expr = match token.kind {
|
||||
TokenKind::True => Exp::Literal(Literal::Bool(true)),
|
||||
TokenKind::False => Exp::Literal(Literal::Bool(false)),
|
||||
TokenKind::Ident => Exp::Variable(token.value),
|
||||
TokenKind::LBrace => {
|
||||
Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr))
|
||||
}
|
||||
TokenKind::LParen => {
|
||||
let expr = self.parse_expr();
|
||||
self.expect(TokenKind::RParen);
|
||||
expr
|
||||
}
|
||||
TokenKind::Number => {
|
||||
let value = token.value.parse().unwrap();
|
||||
Exp::Literal(Literal::Int(value))
|
||||
}
|
||||
TokenKind::Let => {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
|
||||
self.expect(TokenKind::Op(Op::Assign));
|
||||
let value = self.parse_expr();
|
||||
Exp::Let {
|
||||
name,
|
||||
ty,
|
||||
value: Box::new(value),
|
||||
}
|
||||
}
|
||||
TokenKind::If => {
|
||||
let cond = self.parse_expr();
|
||||
let then = Exp::Block(self.parse_block());
|
||||
let else_ = self
|
||||
.try_advance(TokenKind::Else)
|
||||
.then(|| {
|
||||
if self.peek().is_some_and(|t| t.kind == TokenKind::If) {
|
||||
self.parse_expr()
|
||||
} else {
|
||||
Exp::Block(self.parse_block())
|
||||
}
|
||||
})
|
||||
.map(Box::new);
|
||||
Exp::If {
|
||||
cond: Box::new(cond),
|
||||
then: Box::new(then),
|
||||
else_,
|
||||
}
|
||||
}
|
||||
TokenKind::For => {
|
||||
let params =
|
||||
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
|
||||
let mut exprs = Vec::new();
|
||||
while !self.try_advance(TokenKind::RBrace) {
|
||||
exprs.push(self.parse_expr());
|
||||
self.try_advance(TokenKind::Semicolon);
|
||||
}
|
||||
let block = Exp::Block(exprs);
|
||||
let len = params.len();
|
||||
let mut exprs = params.into_iter();
|
||||
let [init, consd, step] = array::from_fn(|_| exprs.next());
|
||||
match len {
|
||||
0 => Exp::For {
|
||||
init: None,
|
||||
cond: None,
|
||||
step: None,
|
||||
block: Box::new(block),
|
||||
},
|
||||
1 => Exp::For {
|
||||
init: None,
|
||||
cond: init.map(Box::new),
|
||||
step: None,
|
||||
block: Box::new(block),
|
||||
},
|
||||
3 => Exp::For {
|
||||
init: init.map(Box::new),
|
||||
cond: consd.map(Box::new),
|
||||
step: step.map(Box::new),
|
||||
block: Box::new(block),
|
||||
},
|
||||
_ => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col)
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenKind::Return => {
|
||||
let value = self
|
||||
.peek()
|
||||
.is_some_and(|t| {
|
||||
!matches!(
|
||||
t.kind,
|
||||
TokenKind::Semicolon
|
||||
| TokenKind::RBrace
|
||||
| TokenKind::RParen
|
||||
| TokenKind::Comma
|
||||
)
|
||||
})
|
||||
.then(|| Box::new(self.parse_expr()));
|
||||
Exp::Return(value)
|
||||
}
|
||||
TokenKind::Op(op) => Exp::Unary {
|
||||
op,
|
||||
exp: Box::new(self.parse_expr()),
|
||||
},
|
||||
TokenKind::Dot => {
|
||||
let token = self.expect_any();
|
||||
match token.kind {
|
||||
TokenKind::LBrace => {
|
||||
let fields = self.sequence(
|
||||
TokenKind::Comma,
|
||||
TokenKind::RBrace,
|
||||
Self::parse_ctor_field,
|
||||
);
|
||||
Exp::Ctor { name: None, fields }
|
||||
}
|
||||
tkn => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TokenKind::Ty(_)
|
||||
| TokenKind::String
|
||||
| TokenKind::Use
|
||||
| TokenKind::Break
|
||||
| TokenKind::Continue
|
||||
| TokenKind::Struct
|
||||
| TokenKind::RBrace
|
||||
| TokenKind::RParen
|
||||
| TokenKind::LBracket
|
||||
| TokenKind::RBracket
|
||||
| TokenKind::Colon
|
||||
| TokenKind::Semicolon
|
||||
| TokenKind::Comma
|
||||
| TokenKind::Fn
|
||||
| TokenKind::Else => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", token.kind, line, col)
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
match self.peek().map(|t| t.kind) {
|
||||
Some(TokenKind::LParen) => {
|
||||
self.next();
|
||||
expr = Exp::Call {
|
||||
name: match expr {
|
||||
Exp::Variable(name) => name,
|
||||
_ => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Expected function name at {}:{}", line, col)
|
||||
}
|
||||
},
|
||||
args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr),
|
||||
};
|
||||
}
|
||||
Some(TokenKind::LBracket) => {
|
||||
self.next();
|
||||
let index = self.parse_expr();
|
||||
self.expect(TokenKind::RBracket);
|
||||
expr = Exp::Index {
|
||||
base: Box::new(expr),
|
||||
index: Box::new(index),
|
||||
};
|
||||
}
|
||||
Some(TokenKind::Dot) => {
|
||||
self.next();
|
||||
|
||||
let token = self.expect_any();
|
||||
match token.kind {
|
||||
TokenKind::Ident => {
|
||||
expr = Exp::Field {
|
||||
base: Box::new(expr),
|
||||
field: token.value,
|
||||
};
|
||||
}
|
||||
TokenKind::LBrace => {
|
||||
let fields = self.sequence(
|
||||
TokenKind::Comma,
|
||||
TokenKind::RBrace,
|
||||
Self::parse_ctor_field,
|
||||
);
|
||||
expr = Exp::Ctor {
|
||||
name: Some(Box::new(expr)),
|
||||
fields,
|
||||
};
|
||||
}
|
||||
tkn => {
|
||||
let (line, col) = self.pos_to_line_col(token.span.start);
|
||||
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break expr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_ctor_field(&mut self) -> CtorField {
|
||||
let name = self.expect(TokenKind::Ident).value;
|
||||
self.expect(TokenKind::Colon);
|
||||
let value = self.parse_expr();
|
||||
CtorField { name, value }
|
||||
}
|
||||
|
||||
pub fn parse_block(&mut self) -> Vec<Exp> {
|
||||
self.expect(TokenKind::LBrace);
|
||||
let mut exprs = Vec::new();
|
||||
while !self.try_advance(TokenKind::RBrace) {
|
||||
exprs.push(self.parse_expr());
|
||||
self.try_advance(TokenKind::Semicolon);
|
||||
}
|
||||
exprs
|
||||
}
|
||||
|
||||
pub fn sequence<T>(
|
||||
&mut self,
|
||||
sep: TokenKind,
|
||||
term: TokenKind,
|
||||
mut parser: impl FnMut(&mut Self) -> T,
|
||||
) -> Vec<T> {
|
||||
let mut items = Vec::new();
|
||||
while !self.try_advance(term) {
|
||||
items.push(parser(self));
|
||||
if self.try_advance(term) {
|
||||
break;
|
||||
}
|
||||
self.expect(sep);
|
||||
}
|
||||
items
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(input: &str) -> Vec<Item> {
|
||||
Parser::new(input).parse()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
#[test]
|
||||
fn sanity() {
|
||||
let input = r#"
|
||||
struct Foo {
|
||||
x: i32,
|
||||
y: i32,
|
||||
}
|
||||
|
||||
fn main(): void {
|
||||
let foo = Foo.{ x: 1, y: 2 };
|
||||
if foo.x > 0 {
|
||||
return foo.x;
|
||||
} else {
|
||||
return foo.y;
|
||||
}
|
||||
for i < 10 {
|
||||
i = i + 1;
|
||||
}
|
||||
for let i = 0; i < 10; i = i + 1 {
|
||||
i = i + 1;
|
||||
}
|
||||
i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7;
|
||||
fomething();
|
||||
pahum(&foo);
|
||||
lupa(*soo);
|
||||
return foo.x + foo.y;
|
||||
}
|
||||
|
||||
fn lupa(x: i32): i32 {
|
||||
return x;
|
||||
}
|
||||
|
||||
fn pahum(x: &Foo): void {
|
||||
return;
|
||||
}
|
||||
"#;
|
||||
let _ = super::parse(input);
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
use crate::lexer::Ty;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Type {
|
||||
Builtin(Ty),
|
||||
Struct(StructType),
|
||||
Pointer(Box<Type>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StructType {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Field {
|
||||
pub name: String,
|
||||
pub ty: Type,
|
||||
}
|
|
@ -1,592 +1,108 @@
|
|||
use std::{iter::Cycle, ops::Range, usize};
|
||||
use {crate::parser, std::fmt::Write};
|
||||
|
||||
use crate::{
|
||||
lexer::{self, Ty},
|
||||
parser::{Exp, Function, Item, Literal, Struct, Type},
|
||||
};
|
||||
const STACK_PTR: &str = "r254";
|
||||
const ZERO: &str = "r0";
|
||||
const RET_ADDR: &str = "r31";
|
||||
|
||||
type Reg = u8;
|
||||
type Offset = i32;
|
||||
type Pushed = bool;
|
||||
type SlotIndex = usize;
|
||||
type Label = usize;
|
||||
type Data = usize;
|
||||
type Size = usize;
|
||||
|
||||
//| Register | Description | Saver |
|
||||
//|:-----------|:--------------------|:-------|
|
||||
//| r0 | Hard-wired zero | N/A |
|
||||
//| r1 - r2 | Return values | Caller |
|
||||
//| r2 - r11 | Function parameters | Caller |
|
||||
//| r12 - r30 | General purpose | Caller |
|
||||
//| r31 | Return address | Caller |
|
||||
//| r32 - r253 | General purpose | Callee |
|
||||
//| r254 | Stack pointer | Callee |
|
||||
//| r255 | Thread pointer | N/A |
|
||||
|
||||
struct RegAlloc {
|
||||
pub regs: Box<[Option<usize>; 256]>,
|
||||
pub used: Box<[bool; 256]>,
|
||||
pub spill_cycle: Cycle<Range<u8>>,
|
||||
pub struct Codegen<'a> {
|
||||
path: &'a std::path::Path,
|
||||
code: String,
|
||||
data: String,
|
||||
}
|
||||
|
||||
impl RegAlloc {
|
||||
const STACK_POINTER: Reg = 254;
|
||||
const ZERO: Reg = 0;
|
||||
const RETURN_ADDRESS: Reg = 31;
|
||||
|
||||
fn alloc_general(&mut self, slot: usize) -> Option<Reg> {
|
||||
self.regs[32..254]
|
||||
.iter_mut()
|
||||
.zip(&mut self.used[32..254])
|
||||
.position(|(reg, used)| {
|
||||
if reg.is_none() {
|
||||
*reg = Some(slot);
|
||||
*used = true;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
.map(|reg| reg as Reg + 32)
|
||||
}
|
||||
|
||||
fn free(&mut self, reg: Reg) {
|
||||
assert!(self.regs[reg as usize].take().is_some());
|
||||
}
|
||||
|
||||
fn is_used(&self, reg: Reg) -> bool {
|
||||
self.regs[reg as usize].is_some()
|
||||
}
|
||||
|
||||
fn spill(&mut self, for_slot: usize) -> (Reg, Option<usize>) {
|
||||
let to_spill = self.spill_cycle.next().unwrap();
|
||||
let slot = self.spill_specific(to_spill, for_slot);
|
||||
(to_spill as Reg + 32, slot)
|
||||
}
|
||||
|
||||
fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option<usize> {
|
||||
self.regs[reg as usize].replace(for_slot)
|
||||
}
|
||||
|
||||
fn restore(&mut self, reg: Reg, slot: usize) -> usize {
|
||||
self.regs[reg as usize].replace(slot).unwrap()
|
||||
}
|
||||
|
||||
fn alloc_specific(&mut self, reg: u8, to: usize) {
|
||||
assert!(self.regs[reg as usize].replace(to).is_none());
|
||||
}
|
||||
|
||||
fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) {
|
||||
match reg {
|
||||
InReg::Single(r) => self.alloc_specific(r, to),
|
||||
InReg::Pair(r1, r2) => {
|
||||
self.alloc_specific(r1, to);
|
||||
self.alloc_specific(r2, to);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParamAlloc {
|
||||
reg_range: Range<Reg>,
|
||||
stack: Offset,
|
||||
}
|
||||
|
||||
impl ParamAlloc {
|
||||
fn for_params() -> Self {
|
||||
impl<'a> Codegen<'a> {
|
||||
pub fn new(path: &'a std::path::Path) -> Self {
|
||||
Self {
|
||||
stack: 8, // return adress is in callers stack frame
|
||||
reg_range: 2..12,
|
||||
path,
|
||||
code: String::new(),
|
||||
data: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn for_returns() -> Self {
|
||||
Self {
|
||||
stack: 0,
|
||||
reg_range: 0..2,
|
||||
pub fn file(&mut self, exprs: &[parser::Expr]) -> std::fmt::Result {
|
||||
for expr in exprs {
|
||||
self.expr(expr)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn alloc(&mut self, size: usize) -> SlotValue {
|
||||
match self.try_alloc_regs(size) {
|
||||
Some(reg) => reg,
|
||||
None => {
|
||||
let stack = self.stack;
|
||||
self.stack += size as Offset;
|
||||
SlotValue::Stack(stack)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn try_alloc_regs(&mut self, size: usize) -> Option<SlotValue> {
|
||||
let mut needed = size.div_ceil(8);
|
||||
if needed > 2 {
|
||||
needed = 1; // passed by ref
|
||||
}
|
||||
|
||||
if self.reg_range.len() < needed {
|
||||
return None;
|
||||
}
|
||||
|
||||
match needed {
|
||||
1 => {
|
||||
let reg = self.reg_range.start;
|
||||
self.reg_range.start += 1;
|
||||
Some(SlotValue::Reg(InReg::Single(reg)))
|
||||
}
|
||||
2 => {
|
||||
let reg = self.reg_range.start;
|
||||
self.reg_range.start += 2;
|
||||
Some(SlotValue::Reg(InReg::Pair(reg, reg + 1)))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RegAlloc {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
regs: Box::new([None; 256]),
|
||||
used: Box::new([false; 256]),
|
||||
spill_cycle: (32..254).cycle(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Variable {
|
||||
name: String,
|
||||
location: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct SlotId {
|
||||
// index into slot stack
|
||||
index: SlotIndex,
|
||||
// temorary offset carried over when eg. accessing fields
|
||||
offset: Offset,
|
||||
// this means we can mutate the value as part of computation
|
||||
owned: bool,
|
||||
}
|
||||
|
||||
impl SlotId {
|
||||
fn base(location: usize) -> Self {
|
||||
Self {
|
||||
index: location,
|
||||
offset: 0,
|
||||
owned: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn borrowed(self) -> Self {
|
||||
Self {
|
||||
owned: false,
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Slot {
|
||||
ty: Type,
|
||||
value: SlotValue,
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
struct InstBuffer {
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl InstBuffer {
|
||||
fn new(vec: &mut Vec<u8>) -> &mut Self {
|
||||
unsafe { &mut *(vec as *mut Vec<u8> as *mut Self) }
|
||||
}
|
||||
}
|
||||
|
||||
impl hbbytecode::Buffer for InstBuffer {
|
||||
fn reserve(&mut self, bytes: usize) {
|
||||
self.buffer.reserve(bytes);
|
||||
}
|
||||
|
||||
unsafe fn write(&mut self, byte: u8) {
|
||||
self.buffer.push(byte);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum InReg {
|
||||
Single(Reg),
|
||||
// if one of the registes is allocated, the other is too, ALWAYS
|
||||
// with the same slot
|
||||
Pair(Reg, Reg),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum Spill {
|
||||
Reg(InReg),
|
||||
Stack(Offset), // relative to frame end (rsp if nothing was pushed)
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum SlotValue {
|
||||
Reg(InReg),
|
||||
Stack(Offset), // relative to frame start (rbp)
|
||||
Imm(u64),
|
||||
Spilled(Spill, SlotIndex),
|
||||
}
|
||||
|
||||
pub struct Value {
|
||||
store: ValueStore,
|
||||
offset: Offset,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum ValueStore {
|
||||
Reg(InReg),
|
||||
Stack(Offset, Pushed),
|
||||
Imm(u64),
|
||||
}
|
||||
|
||||
impl From<SlotValue> for ValueStore {
|
||||
fn from(value: SlotValue) -> Self {
|
||||
match value {
|
||||
SlotValue::Reg(reg) => ValueStore::Reg(reg),
|
||||
SlotValue::Stack(offset) => ValueStore::Stack(offset, false),
|
||||
SlotValue::Imm(imm) => ValueStore::Imm(imm),
|
||||
SlotValue::Spilled(spill, _) => match spill {
|
||||
Spill::Reg(reg) => ValueStore::Reg(reg),
|
||||
Spill::Stack(offset) => ValueStore::Stack(offset, true),
|
||||
fn expr(&mut self, expr: &parser::Expr) -> std::fmt::Result {
|
||||
use parser::Expr as E;
|
||||
match expr {
|
||||
E::Decl {
|
||||
name,
|
||||
val:
|
||||
E::Closure {
|
||||
ret: E::Ident { name: "void" },
|
||||
body,
|
||||
},
|
||||
} => {
|
||||
writeln!(self.code, "{name}:")?;
|
||||
self.expr(body)
|
||||
}
|
||||
E::Return { val: None } => self.ret(),
|
||||
E::Block { stmts } => {
|
||||
for stmt in stmts {
|
||||
self.expr(stmt)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct LabelReloc {
|
||||
pub label: Label,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
pub struct DataReloc {
|
||||
pub data: Data,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub struct Frame {
|
||||
pub slot_count: usize,
|
||||
pub var_count: usize,
|
||||
}
|
||||
|
||||
enum Instr {
|
||||
BinOp(lexer::Op, Value, Value),
|
||||
Move(Size, Value, Value),
|
||||
Push(Reg),
|
||||
Jump(Label),
|
||||
Call(String),
|
||||
JumpIfZero(Value, Label),
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Generator<'a> {
|
||||
ast: &'a [Item],
|
||||
|
||||
func_labels: Vec<(String, Label)>,
|
||||
|
||||
stack_size: Offset,
|
||||
pushed_size: Offset,
|
||||
|
||||
regs: RegAlloc,
|
||||
variables: Vec<Variable>,
|
||||
slots: Vec<Slot>,
|
||||
|
||||
labels: Vec<Option<usize>>,
|
||||
label_relocs: Vec<LabelReloc>,
|
||||
|
||||
data: Vec<Option<usize>>,
|
||||
data_relocs: Vec<DataReloc>,
|
||||
|
||||
code_section: Vec<u8>,
|
||||
data_section: Vec<u8>,
|
||||
|
||||
instrs: Vec<Instr>,
|
||||
}
|
||||
|
||||
impl<'a> Generator<'a> {
|
||||
fn generate(mut self) -> Vec<u8> {
|
||||
for item in self.ast {
|
||||
let Item::Function(f) = item else { continue };
|
||||
self.generate_function(f);
|
||||
}
|
||||
|
||||
self.link()
|
||||
}
|
||||
|
||||
fn generate_function(&mut self, f: &Function) {
|
||||
let frame = self.push_frame();
|
||||
|
||||
let mut param_alloc = ParamAlloc::for_params();
|
||||
|
||||
for param in f.args.iter() {
|
||||
let param_size = self.size_of(¶m.ty);
|
||||
let value = param_alloc.alloc(param_size);
|
||||
let slot = self.add_slot(param.ty.clone(), value);
|
||||
if let SlotValue::Reg(reg) = value {
|
||||
self.regs.alloc_specific_in_reg(reg, slot);
|
||||
}
|
||||
self.add_variable(param.name.clone(), slot);
|
||||
}
|
||||
|
||||
for stmt in f.body.iter() {
|
||||
assert!(self
|
||||
.generate_expr(Some(Type::Builtin(Ty::Void)), stmt)
|
||||
.is_none());
|
||||
}
|
||||
|
||||
self.pop_frame(frame);
|
||||
}
|
||||
|
||||
fn generate_expr(&mut self, expected: Option<Type>, expr: &Exp) -> Option<SlotId> {
|
||||
let value = match expr {
|
||||
Exp::Literal(lit) => SlotId::base(match lit {
|
||||
Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)),
|
||||
Literal::Bool(b) => {
|
||||
self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64))
|
||||
}
|
||||
}),
|
||||
Exp::Variable(ident) => {
|
||||
SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed()
|
||||
}
|
||||
Exp::Call { name, args } => self.generate_call(expected.clone(), name, args),
|
||||
Exp::Ctor { name, fields } => todo!(),
|
||||
Exp::Index { base, index } => todo!(),
|
||||
Exp::Field { base, field } => todo!(),
|
||||
Exp::Unary { op, exp } => todo!(),
|
||||
Exp::Binary { op, left, right } => todo!(),
|
||||
Exp::If { cond, then, else_ } => todo!(),
|
||||
Exp::Let { name, ty, value } => todo!(),
|
||||
Exp::For {
|
||||
init,
|
||||
cond,
|
||||
step,
|
||||
block,
|
||||
} => todo!(),
|
||||
Exp::Block(_) => todo!(),
|
||||
Exp::Return(_) => todo!(),
|
||||
Exp::Break => todo!(),
|
||||
Exp::Continue => todo!(),
|
||||
};
|
||||
|
||||
if let Some(expected) = expected {
|
||||
let actual = self.slots[value.index].ty.clone();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
Some(value)
|
||||
}
|
||||
|
||||
fn generate_call(&mut self, expected: Option<Type>, name: &str, args: &[Exp]) -> SlotId {
|
||||
let frame = self.push_frame();
|
||||
let func = self.lookup_function(name);
|
||||
|
||||
let mut ret_alloc = ParamAlloc::for_returns();
|
||||
let ret_size = self.size_of(&func.ret);
|
||||
let ret_slot = ret_alloc.alloc(ret_size);
|
||||
|
||||
let mut arg_alloc = ParamAlloc::for_params();
|
||||
for (arg, param) in args.iter().zip(&func.args) {
|
||||
let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap();
|
||||
let arg_size = self.size_of(¶m.ty);
|
||||
let param_slot = arg_alloc.alloc(arg_size);
|
||||
self.set_temporarly(arg_slot, param_slot);
|
||||
}
|
||||
|
||||
self.instrs.push(Instr::Call(name.to_owned()));
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn set_temporarly(&mut self, from: SlotId, to: SlotValue) {
|
||||
let to = self.make_mutable(to, from.index);
|
||||
let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to);
|
||||
self.emit_move(from, SlotId::base(to_slot));
|
||||
}
|
||||
|
||||
fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue {
|
||||
match target {
|
||||
SlotValue::Reg(in_reg) => {
|
||||
self.regs.alloc_specific_in_reg(in_reg, by);
|
||||
target
|
||||
}
|
||||
SlotValue::Spilled(Spill::Reg(in_reg), slot) => {
|
||||
let new_val = SlotValue::Spilled(
|
||||
match in_reg {
|
||||
InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)),
|
||||
InReg::Pair(r1, r2) => {
|
||||
self.emmit_push(r2);
|
||||
Spill::Stack(self.emmit_push(r1))
|
||||
}
|
||||
},
|
||||
slot,
|
||||
);
|
||||
let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val);
|
||||
SlotValue::Spilled(Spill::Reg(in_reg), new_slot)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
ast => unimplemented!("{:?}", ast),
|
||||
}
|
||||
}
|
||||
|
||||
fn emmit_push(&mut self, reg: Reg) -> Offset {
|
||||
self.pushed_size += 8;
|
||||
self.instrs.push(Instr::Push(reg));
|
||||
self.pushed_size
|
||||
fn stack_push(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result {
|
||||
writeln!(self.code, " st {value}, {STACK_PTR}, {ZERO}, {size}")?;
|
||||
writeln!(
|
||||
self.code,
|
||||
" addi{} {STACK_PTR}, {STACK_PTR}, {size}",
|
||||
size * 8
|
||||
)
|
||||
}
|
||||
|
||||
fn emit_move(&mut self, from: SlotId, to: SlotId) {
|
||||
let size = self.size_of(&self.slots[from.index].ty);
|
||||
let other_size = self.size_of(&self.slots[to.index].ty);
|
||||
assert_eq!(size, other_size);
|
||||
|
||||
self.instrs.push(Instr::Move(
|
||||
size,
|
||||
self.slot_to_value(from),
|
||||
self.slot_to_value(to),
|
||||
));
|
||||
fn stack_pop(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result {
|
||||
writeln!(
|
||||
self.code,
|
||||
" subi{} {STACK_PTR}, {STACK_PTR}, {size}",
|
||||
size * 8
|
||||
)?;
|
||||
writeln!(self.code, " ld {value}, {STACK_PTR}, {ZERO}, {size}")
|
||||
}
|
||||
|
||||
fn slot_to_value(&self, slot: SlotId) -> Value {
|
||||
let slot_val = &self.slots[slot.index];
|
||||
Value {
|
||||
store: slot_val.value.into(),
|
||||
offset: slot.offset,
|
||||
fn call(&mut self, func: impl std::fmt::Display) -> std::fmt::Result {
|
||||
self.stack_push(&func, 8)?;
|
||||
self.global_jump(func)
|
||||
}
|
||||
|
||||
fn ret(&mut self) -> std::fmt::Result {
|
||||
self.stack_pop(RET_ADDR, 8)?;
|
||||
self.global_jump(RET_ADDR)
|
||||
}
|
||||
|
||||
fn global_jump(&mut self, label: impl std::fmt::Display) -> std::fmt::Result {
|
||||
writeln!(self.code, " jala {ZERO}, {label}, 0")
|
||||
}
|
||||
|
||||
pub fn dump(&mut self, mut out: impl std::fmt::Write) -> std::fmt::Result {
|
||||
writeln!(out, "start:")?;
|
||||
writeln!(out, " jala {ZERO}, main, 0")?;
|
||||
writeln!(out, " tx")?;
|
||||
writeln!(out, "{}", self.code)?;
|
||||
writeln!(out, "{}", self.data)
|
||||
}
|
||||
}
|
||||
|
||||
fn size_of(&self, ty: &Type) -> Size {
|
||||
match ty {
|
||||
Type::Builtin(ty) => match ty {
|
||||
Ty::U8 | Ty::I8 | Ty::Bool => 1,
|
||||
Ty::U16 | Ty::I16 => 2,
|
||||
Ty::U32 | Ty::I32 => 4,
|
||||
Ty::U64 | Ty::I64 => 8,
|
||||
Ty::Void => 0,
|
||||
},
|
||||
Type::Struct(name) => self
|
||||
.lookup_struct(name)
|
||||
.fields
|
||||
.iter()
|
||||
.map(|field| self.size_of(&field.ty))
|
||||
.sum(),
|
||||
Type::Pinter(_) => 8,
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
fn generate(input: &'static str, output: &mut String) {
|
||||
let mut parser = super::parser::Parser::new(input, std::path::Path::new("test"));
|
||||
let exprs = parser.file();
|
||||
let mut codegen = super::Codegen::new(std::path::Path::new("test"));
|
||||
codegen.file(&exprs).unwrap();
|
||||
codegen.dump(output).unwrap();
|
||||
}
|
||||
|
||||
impl<'a> Generator<'a> {
|
||||
fn add_variable(&mut self, name: String, location: usize) {
|
||||
self.variables.push(Variable { name, location });
|
||||
}
|
||||
|
||||
fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize {
|
||||
let slot = self.slots.len();
|
||||
self.slots.push(Slot { ty, value });
|
||||
slot
|
||||
}
|
||||
|
||||
fn link(mut self) -> Vec<u8> {
|
||||
for reloc in self.label_relocs {
|
||||
let label = self.labels[reloc.label].unwrap();
|
||||
let offset = reloc.offset;
|
||||
let target = label - offset;
|
||||
let target_bytes = u64::to_le_bytes(target as u64);
|
||||
self.code_section[offset..offset + 8].copy_from_slice(&target_bytes);
|
||||
}
|
||||
|
||||
for reloc in self.data_relocs {
|
||||
let data = self.data[reloc.data].unwrap();
|
||||
let offset = reloc.offset;
|
||||
let target = data;
|
||||
let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64);
|
||||
self.data_section[offset..offset + 8].copy_from_slice(&target_bytes);
|
||||
}
|
||||
|
||||
self.code_section.extend_from_slice(&self.data_section);
|
||||
self.code_section
|
||||
}
|
||||
|
||||
fn lookup_func_label(&mut self, name: &str) -> Label {
|
||||
if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) {
|
||||
return label.1;
|
||||
}
|
||||
|
||||
panic!("Function not found: {}", name);
|
||||
}
|
||||
|
||||
fn declare_label(&mut self) -> Label {
|
||||
self.labels.push(None);
|
||||
self.labels.len() - 1
|
||||
}
|
||||
|
||||
fn define_label(&mut self, label: Label) {
|
||||
self.labels[label] = Some(self.code_section.len());
|
||||
}
|
||||
|
||||
fn declare_data(&mut self) -> Data {
|
||||
self.data.push(None);
|
||||
self.data.len() - 1
|
||||
}
|
||||
|
||||
fn define_data(&mut self, data: Data, bytes: &[u8]) {
|
||||
self.data[data] = Some(self.data.len());
|
||||
self.data_section.extend_from_slice(bytes);
|
||||
}
|
||||
|
||||
fn lookup_struct(&self, name: &str) -> &Struct {
|
||||
self.lookup_item(name)
|
||||
.map(|item| match item {
|
||||
Item::Struct(s) => s,
|
||||
_ => panic!("Not a struct: {}", name),
|
||||
})
|
||||
.expect("Struct not found")
|
||||
}
|
||||
|
||||
fn lookup_function(&self, name: &str) -> &'a Function {
|
||||
self.lookup_item(name)
|
||||
.map(|item| match item {
|
||||
Item::Function(f) => f,
|
||||
_ => panic!("Not a function: {}", name),
|
||||
})
|
||||
.expect("Function not found")
|
||||
}
|
||||
|
||||
fn lookup_item(&self, name: &str) -> Option<&'a Item> {
|
||||
self.ast.iter().find(|item| match item {
|
||||
Item::Import(_) => false,
|
||||
Item::Struct(s) => s.name == name,
|
||||
Item::Function(f) => f.name == name,
|
||||
})
|
||||
}
|
||||
|
||||
fn lookup_variable(&self, name: &str) -> Option<&Variable> {
|
||||
self.variables.iter().find(|variable| variable.name == name)
|
||||
}
|
||||
|
||||
fn push_frame(&mut self) -> Frame {
|
||||
Frame {
|
||||
slot_count: self.slots.len(),
|
||||
var_count: self.variables.len(),
|
||||
crate::run_tests! { generate:
|
||||
example => include_str!("../examples/main_fn.hb");
|
||||
}
|
||||
}
|
||||
|
||||
fn pop_frame(&mut self, frame: Frame) {
|
||||
self.slots.truncate(frame.slot_count);
|
||||
self.variables.truncate(frame.var_count);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate(ast: &[Item]) -> Vec<u8> {
|
||||
Generator {
|
||||
ast,
|
||||
..Default::default()
|
||||
}
|
||||
.generate()
|
||||
}
|
||||
|
|
169
hblang/src/lexer.rs
Normal file
169
hblang/src/lexer.rs
Normal file
|
@ -0,0 +1,169 @@
|
|||
use std::{iter::Peekable, str::Chars};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub start: u32,
|
||||
pub end: u32,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn range(&self) -> std::ops::Range<usize> {
|
||||
self.start as usize..self.end as usize
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum TokenKind {
|
||||
Ident,
|
||||
Number,
|
||||
LParen,
|
||||
RParen,
|
||||
LBrace,
|
||||
RBrace,
|
||||
LBrack,
|
||||
RBrack,
|
||||
Decl,
|
||||
Or,
|
||||
Semi,
|
||||
Colon,
|
||||
Return,
|
||||
Eof,
|
||||
Error,
|
||||
}
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
pos: u32,
|
||||
bytes: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(input: &'a str) -> Self {
|
||||
Self {
|
||||
pos: 0,
|
||||
bytes: input.as_bytes(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn slice(&self, tok: Token) -> &'a str {
|
||||
unsafe { std::str::from_utf8_unchecked(&self.bytes[tok.range()]) }
|
||||
}
|
||||
|
||||
fn peek(&self) -> Option<u8> {
|
||||
self.bytes.get(self.pos as usize).copied()
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<u8> {
|
||||
let c = self.peek()?;
|
||||
self.pos += 1;
|
||||
Some(c)
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Token {
|
||||
Iterator::next(self).unwrap_or(Token {
|
||||
kind: TokenKind::Eof,
|
||||
start: self.pos,
|
||||
end: self.pos,
|
||||
})
|
||||
}
|
||||
|
||||
fn advance_if(&mut self, arg: u8) -> bool {
|
||||
if self.peek() == Some(arg) {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn line_col(&self, mut start: u32) -> (usize, usize) {
|
||||
self.bytes
|
||||
.split(|&b| b == b'\n')
|
||||
.enumerate()
|
||||
.find_map(|(i, line)| {
|
||||
if start < line.len() as u32 {
|
||||
return Some((i + 1, start as usize + 1));
|
||||
}
|
||||
start -= line.len() as u32 + 1;
|
||||
None
|
||||
})
|
||||
.unwrap_or((1, 1))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
use TokenKind as T;
|
||||
loop {
|
||||
let start = self.pos;
|
||||
let kind = match self.advance()? {
|
||||
b'\n' | b'\r' | b'\t' | b' ' => continue,
|
||||
b'0'..=b'9' => {
|
||||
while let Some(b'0'..=b'9') = self.peek() {
|
||||
self.advance();
|
||||
}
|
||||
T::Number
|
||||
}
|
||||
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
|
||||
while let Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') = self.peek() {
|
||||
self.advance();
|
||||
}
|
||||
|
||||
let ident = &self.bytes[start as usize..self.pos as usize];
|
||||
match ident {
|
||||
b"return" => T::Return,
|
||||
_ => T::Ident,
|
||||
}
|
||||
}
|
||||
b':' => match self.advance_if(b'=') {
|
||||
true => T::Decl,
|
||||
false => T::Colon,
|
||||
},
|
||||
b';' => T::Semi,
|
||||
b'|' => match self.advance_if(b'|') {
|
||||
true => T::Or,
|
||||
false => T::Error,
|
||||
},
|
||||
b'(' => T::LParen,
|
||||
b')' => T::RParen,
|
||||
b'{' => T::LBrace,
|
||||
b'}' => T::RBrace,
|
||||
b'[' => T::LBrack,
|
||||
b']' => T::RBrack,
|
||||
_ => T::Error,
|
||||
};
|
||||
|
||||
return Some(Token {
|
||||
kind,
|
||||
start,
|
||||
end: self.pos,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
fn lex(input: &'static str, output: &mut String) {
|
||||
use {
|
||||
super::{Lexer, TokenKind as T},
|
||||
std::fmt::Write,
|
||||
};
|
||||
let mut lexer = Lexer::new(input);
|
||||
loop {
|
||||
let token = lexer.next();
|
||||
writeln!(output, "{:?} {:?}", token.kind, &input[token.range()],).unwrap();
|
||||
if token.kind == T::Eof {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crate::run_tests! { lex:
|
||||
empty => "";
|
||||
whitespace => " \t\n\r";
|
||||
examples => include_str!("../examples/main_fn.hb");
|
||||
}
|
||||
}
|
20
hblang/src/lib.rs
Normal file
20
hblang/src/lib.rs
Normal file
|
@ -0,0 +1,20 @@
|
|||
#![feature(noop_waker)]
|
||||
#[macro_export]
|
||||
macro_rules! run_tests {
|
||||
($runner:path: $($name:ident => $input:expr;)*) => {$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
$crate::tests::run_test(std::any::type_name_of_val(&$name), $input, $runner);
|
||||
}
|
||||
)*};
|
||||
}
|
||||
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod tests;
|
||||
mod typechk;
|
||||
|
||||
pub fn try_block<R>(f: impl FnOnce() -> R) -> R {
|
||||
f()
|
||||
}
|
161
hblang/src/parser.rs
Normal file
161
hblang/src/parser.rs
Normal file
|
@ -0,0 +1,161 @@
|
|||
use std::{cell::Cell, ops::Not};
|
||||
|
||||
use crate::lexer::{Lexer, Token, TokenKind};
|
||||
|
||||
type Ptr<T> = &'static T;
|
||||
|
||||
fn ptr<T>(val: T) -> Ptr<T> {
|
||||
Box::leak(Box::new(val))
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
path: &'a std::path::Path,
|
||||
lexer: Lexer<'a>,
|
||||
token: Token,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(input: &'a str, path: &'a std::path::Path) -> Self {
|
||||
let mut lexer = Lexer::new(input);
|
||||
let token = lexer.next();
|
||||
Self { lexer, token, path }
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Token {
|
||||
std::mem::replace(&mut self.token, self.lexer.next())
|
||||
}
|
||||
|
||||
pub fn file(&mut self) -> Vec<Expr> {
|
||||
std::iter::from_fn(|| (self.token.kind != TokenKind::Eof).then(|| self.expr())).collect()
|
||||
}
|
||||
|
||||
fn ptr_expr(&mut self) -> Ptr<Expr> {
|
||||
ptr(self.expr())
|
||||
}
|
||||
|
||||
pub fn expr(&mut self) -> Expr {
|
||||
let token = self.next();
|
||||
let expr = match token.kind {
|
||||
TokenKind::Ident => {
|
||||
let name = self.lexer.slice(token).to_owned().leak();
|
||||
if self.advance_if(TokenKind::Decl) {
|
||||
let val = self.ptr_expr();
|
||||
Expr::Decl { name, val }
|
||||
} else {
|
||||
Expr::Ident { name }
|
||||
}
|
||||
}
|
||||
TokenKind::Return => Expr::Return {
|
||||
val: (self.token.kind != TokenKind::Semi).then(|| self.ptr_expr()),
|
||||
},
|
||||
TokenKind::Or => {
|
||||
self.expect_advance(TokenKind::Colon);
|
||||
let ret = self.ptr_expr();
|
||||
let body = self.ptr_expr();
|
||||
Expr::Closure { ret, body }
|
||||
}
|
||||
TokenKind::LBrace => Expr::Block {
|
||||
stmts: std::iter::from_fn(|| {
|
||||
self.advance_if(TokenKind::RBrace)
|
||||
.not()
|
||||
.then(|| self.expr())
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
},
|
||||
TokenKind::Number => Expr::Number {
|
||||
value: match self.lexer.slice(token).parse() {
|
||||
Ok(value) => value,
|
||||
Err(e) => self.report(format_args!("invalid number: {e}")),
|
||||
},
|
||||
},
|
||||
tok => self.report(format_args!("unexpected token: {:?}", tok)),
|
||||
};
|
||||
|
||||
self.advance_if(TokenKind::Semi);
|
||||
|
||||
expr
|
||||
}
|
||||
|
||||
fn advance_if(&mut self, kind: TokenKind) -> bool {
|
||||
if self.token.kind == kind {
|
||||
self.next();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_advance(&mut self, kind: TokenKind) {
|
||||
if self.token.kind != kind {
|
||||
self.report(format_args!(
|
||||
"expected {:?}, found {:?}",
|
||||
kind, self.token.kind
|
||||
));
|
||||
}
|
||||
self.next();
|
||||
}
|
||||
|
||||
fn report(&self, msg: impl std::fmt::Display) -> ! {
|
||||
let (line, col) = self.lexer.line_col(self.token.start);
|
||||
eprintln!("{}:{}:{} => {}", self.path.display(), line, col, msg);
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Expr {
|
||||
Decl { name: Ptr<str>, val: Ptr<Expr> },
|
||||
Closure { ret: Ptr<Expr>, body: Ptr<Expr> },
|
||||
Return { val: Option<Ptr<Expr>> },
|
||||
Ident { name: Ptr<str> },
|
||||
Block { stmts: Vec<Expr> },
|
||||
Number { value: u64 },
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Expr {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
thread_local! {
|
||||
static INDENT: Cell<usize> = Cell::new(0);
|
||||
}
|
||||
|
||||
match self {
|
||||
Self::Decl { name, val } => write!(f, "{} := {}", name, val),
|
||||
Self::Closure { ret, body } => write!(f, "||: {} {}", ret, body),
|
||||
Self::Return { val: Some(val) } => write!(f, "return {};", val),
|
||||
Self::Return { val: None } => write!(f, "return;"),
|
||||
Self::Ident { name } => write!(f, "{}", name),
|
||||
Self::Block { stmts } => {
|
||||
writeln!(f, "{{")?;
|
||||
INDENT.with(|i| i.set(i.get() + 1));
|
||||
let res = crate::try_block(|| {
|
||||
for stmt in stmts {
|
||||
for _ in 0..INDENT.with(|i| i.get()) {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
writeln!(f, "{}", stmt)?;
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
INDENT.with(|i| i.set(i.get() - 1));
|
||||
write!(f, "}}")?;
|
||||
res
|
||||
}
|
||||
Self::Number { value } => write!(f, "{}", value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
fn parse(input: &'static str, output: &mut String) {
|
||||
use std::fmt::Write;
|
||||
let mut parser = super::Parser::new(input, std::path::Path::new("test"));
|
||||
for expr in parser.file() {
|
||||
writeln!(output, "{}", expr).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
crate::run_tests! { parse:
|
||||
example => include_str!("../examples/main_fn.hb");
|
||||
}
|
||||
}
|
53
hblang/src/tests.rs
Normal file
53
hblang/src/tests.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
#![cfg(test)]
|
||||
|
||||
pub fn run_test(name: &'static str, input: &'static str, test: fn(&'static str, &mut String)) {
|
||||
use std::{io::Write, path::PathBuf};
|
||||
|
||||
let filter = std::env::var("PT_FILTER").unwrap_or_default();
|
||||
if !filter.is_empty() && !name.contains(&filter) {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut output = String::new();
|
||||
test(input, &mut output);
|
||||
|
||||
let mut root = PathBuf::from(std::env::var("PT_TEST_ROOT").unwrap_or("tests".to_string()));
|
||||
root.push(name);
|
||||
root.set_extension("txt");
|
||||
|
||||
let expected = std::fs::read_to_string(&root).unwrap_or_default();
|
||||
|
||||
if output == expected {
|
||||
return;
|
||||
}
|
||||
|
||||
if std::env::var("PT_UPDATE").is_ok() {
|
||||
std::fs::write(&root, output).unwrap();
|
||||
return;
|
||||
}
|
||||
|
||||
if !root.exists() {
|
||||
std::fs::create_dir_all(root.parent().unwrap()).unwrap();
|
||||
std::fs::write(&root, vec![]).unwrap();
|
||||
}
|
||||
|
||||
let mut proc = std::process::Command::new("diff")
|
||||
.arg("-u")
|
||||
.arg("--color")
|
||||
.arg(&root)
|
||||
.arg("-")
|
||||
.stdin(std::process::Stdio::piped())
|
||||
.stdout(std::process::Stdio::inherit())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
proc.stdin
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.write_all(output.as_bytes())
|
||||
.unwrap();
|
||||
|
||||
proc.wait().unwrap();
|
||||
|
||||
panic!();
|
||||
}
|
0
hblang/src/typechk.rs
Normal file
0
hblang/src/typechk.rs
Normal file
9
hblang/tests/hblang::codegen::tests::example.txt
Normal file
9
hblang/tests/hblang::codegen::tests::example.txt
Normal file
|
@ -0,0 +1,9 @@
|
|||
start:
|
||||
jala r0, main, 0
|
||||
tx
|
||||
main:
|
||||
subi64 r254, r254, 8
|
||||
ld r31, r254, r0, 8
|
||||
jala r0, r31, 0
|
||||
|
||||
|
1
hblang/tests/hblang::lexer::tests::empty.txt
Normal file
1
hblang/tests/hblang::lexer::tests::empty.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Eof ""
|
10
hblang/tests/hblang::lexer::tests::examples.txt
Normal file
10
hblang/tests/hblang::lexer::tests::examples.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
Ident "main"
|
||||
Decl ":="
|
||||
Or "||"
|
||||
Colon ":"
|
||||
Ident "void"
|
||||
LBrace "{"
|
||||
Return "return"
|
||||
Semi ";"
|
||||
RBrace "}"
|
||||
Eof ""
|
1
hblang/tests/hblang::lexer::tests::whitespace.txt
Normal file
1
hblang/tests/hblang::lexer::tests::whitespace.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Eof ""
|
3
hblang/tests/hblang::parser::tests::example.txt
Normal file
3
hblang/tests/hblang::parser::tests::example.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
main := ||: void {
|
||||
return;
|
||||
}
|
Loading…
Reference in a new issue