starting from zero again

This commit is contained in:
mlokr 2024-05-09 23:41:59 +02:00
parent 774735b515
commit 1c08148dc9
19 changed files with 515 additions and 1912 deletions

6
hblang/Cargo.toml Normal file
View file

@ -0,0 +1,6 @@
[package]
name = "hblang"
version = "0.1.0"
edition = "2021"
[dependencies]

View file

@ -0,0 +1,3 @@
main := ||: void {
return;
}

View file

@ -1,11 +0,0 @@
[package]
name = "hblang"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
logos = "0.13.0"

View file

@ -1,595 +0,0 @@
use std::{iter::Cycle, ops::Range, usize};
use crate::{
lexer::{self, Ty},
parser::{Exp, Function, Item, Literal, Struct, Type},
};
type Reg = u8;
type Offset = i32;
type Pushed = bool;
type SlotIndex = usize;
type Label = usize;
type Data = usize;
type Size = usize;
//| Register | Description | Saver |
//|:-----------|:--------------------|:-------|
//| r0 | Hard-wired zero | N/A |
//| r1 - r2 | Return values | Caller |
//| r2 - r11 | Function parameters | Caller |
//| r12 - r30 | General purpose | Caller |
//| r31 | Return address | Caller |
//| r32 - r253 | General purpose | Callee |
//| r254 | Stack pointer | Callee |
//| r255 | Thread pointer | N/A |
struct RegAlloc {
pub regs: Box<[Option<usize>; 256]>,
pub used: Box<[bool; 256]>,
pub spill_cycle: Cycle<Range<u8>>,
}
impl RegAlloc {
const STACK_POINTER: Reg = 254;
const ZERO: Reg = 0;
const RETURN_ADDRESS: Reg = 31;
fn alloc_return(&mut self, slot: usize) -> Option<Reg> {
self.regs[1..2]
.iter_mut()
.position(|reg| {
if reg.is_none() {
*reg = Some(slot);
true
} else {
false
}
})
.map(|reg| reg as Reg + 1)
}
fn alloc_general(&mut self, slot: usize) -> Option<Reg> {
self.regs[32..254]
.iter_mut()
.zip(&mut self.used[32..254])
.position(|(reg, used)| {
if reg.is_none() {
*reg = Some(slot);
*used = true;
true
} else {
false
}
})
.map(|reg| reg as Reg + 32)
}
fn free(&mut self, reg: Reg) {
assert!(self.regs[reg as usize].take().is_some());
}
fn is_used(&self, reg: Reg) -> bool {
self.regs[reg as usize].is_some()
}
fn spill(&mut self, for_slot: usize) -> (Reg, Option<usize>) {
let to_spill = self.spill_cycle.next().unwrap();
let slot = self.spill_specific(to_spill, for_slot);
(to_spill as Reg + 32, slot)
}
fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option<usize> {
self.regs[reg as usize].replace(for_slot)
}
fn restore(&mut self, reg: Reg, slot: usize) -> usize {
self.regs[reg as usize].replace(slot).unwrap()
}
fn alloc_specific(&mut self, reg: u8, to: usize) {
assert!(self.regs[reg as usize].replace(to).is_none());
}
fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) {
match reg {
InReg::Single(r) => self.alloc_specific(r, to),
InReg::Pair(r1, r2) => {
self.alloc_specific(r1, to);
self.alloc_specific(r2, to);
}
}
}
}
pub struct ParamAlloc {
reg_range: Range<Reg>,
stack: Offset,
}
impl ParamAlloc {
fn new() -> Self {
Self {
stack: 8, // return adress is in callers stack frame
reg_range: 2..12,
}
}
fn alloc(&mut self, size: usize) -> SlotValue {
match self.try_alloc_regs(size) {
Some(reg) => reg,
None => {
let stack = self.stack;
self.stack += size as Offset;
SlotValue::Stack(stack)
}
}
}
fn try_alloc_regs(&mut self, size: usize) -> Option<SlotValue> {
let mut needed = size.div_ceil(8);
if needed > 2 {
needed = 1; // passed by ref
}
if self.reg_range.len() < needed {
return None;
}
match needed {
1 => {
let reg = self.reg_range.start;
self.reg_range.start += 1;
Some(SlotValue::Reg(InReg::Single(reg)))
}
2 => {
let reg = self.reg_range.start;
self.reg_range.start += 2;
Some(SlotValue::Reg(InReg::Pair(reg, reg + 1)))
}
_ => unreachable!(),
}
}
}
impl Default for RegAlloc {
fn default() -> Self {
Self {
regs: Box::new([None; 256]),
used: Box::new([false; 256]),
spill_cycle: (32..254).cycle(),
}
}
}
struct Variable {
name: String,
location: usize,
}
#[derive(Clone, Copy)]
struct SlotId {
// index into slot stack
index: SlotIndex,
// temorary offset carried over when eg. accessing fields
offset: Offset,
// this means we can mutate the value as part of computation
owned: bool,
}
impl SlotId {
fn base(location: usize) -> Self {
Self {
index: location,
offset: 0,
owned: true,
}
}
fn borrowed(self) -> Self {
Self {
owned: false,
..self
}
}
}
struct Slot {
ty: Type,
value: SlotValue,
}
#[repr(transparent)]
struct InstBuffer {
buffer: Vec<u8>,
}
impl InstBuffer {
fn new(vec: &mut Vec<u8>) -> &mut Self {
unsafe { &mut *(vec as *mut Vec<u8> as *mut Self) }
}
}
impl hbbytecode::Buffer for InstBuffer {
fn reserve(&mut self, bytes: usize) {
self.buffer.reserve(bytes);
}
unsafe fn write(&mut self, byte: u8) {
self.buffer.push(byte);
}
}
#[derive(Clone, Copy)]
enum InReg {
Single(Reg),
// if one of the registes is allocated, the other is too, ALWAYS
// with the same slot
Pair(Reg, Reg),
}
#[derive(Clone, Copy)]
enum Spill {
Reg(InReg),
Stack(Offset), // relative to frame end (rsp if nothing was pushed)
}
#[derive(Clone, Copy)]
enum SlotValue {
Reg(InReg),
Stack(Offset), // relative to frame start (rbp)
Imm(u64),
Spilled(Spill, SlotIndex),
}
pub struct Value {
store: ValueStore,
offset: Offset,
}
#[derive(Clone, Copy)]
enum ValueStore {
Reg(InReg),
Stack(Offset, Pushed),
Imm(u64),
}
impl From<SlotValue> for ValueStore {
fn from(value: SlotValue) -> Self {
match value {
SlotValue::Reg(reg) => ValueStore::Reg(reg),
SlotValue::Stack(offset) => ValueStore::Stack(offset, false),
SlotValue::Imm(imm) => ValueStore::Imm(imm),
SlotValue::Spilled(spill, _) => match spill {
Spill::Reg(reg) => ValueStore::Reg(reg),
Spill::Stack(offset) => ValueStore::Stack(offset, true),
},
}
}
}
pub struct LabelReloc {
pub label: Label,
pub offset: usize,
}
pub struct DataReloc {
pub data: Data,
pub offset: usize,
}
#[must_use]
pub struct Frame {
pub slot_count: usize,
pub var_count: usize,
}
enum Instr {
BinOp(lexer::Op, Value, Value),
Move(Size, Value, Value),
Push(Reg),
Jump(Label),
Call(String),
JumpIfZero(Value, Label),
}
#[derive(Default)]
pub struct Generator<'a> {
ast: &'a [Item],
func_labels: Vec<(String, Label)>,
stack_size: Offset,
pushed_size: Offset,
regs: RegAlloc,
variables: Vec<Variable>,
slots: Vec<Slot>,
labels: Vec<Option<usize>>,
label_relocs: Vec<LabelReloc>,
data: Vec<Option<usize>>,
data_relocs: Vec<DataReloc>,
code_section: Vec<u8>,
data_section: Vec<u8>,
instrs: Vec<Instr>,
}
impl<'a> Generator<'a> {
fn generate(mut self) -> Vec<u8> {
for item in self.ast {
let Item::Function(f) = item else { continue };
self.generate_function(f);
}
self.link()
}
fn generate_function(&mut self, f: &Function) {
let frame = self.push_frame();
let mut param_alloc = ParamAlloc::new();
for param in f.args.iter() {
let param_size = self.size_of(&param.ty);
let value = param_alloc.alloc(param_size);
let slot = self.add_slot(param.ty.clone(), value);
if let SlotValue::Reg(reg) = value {
self.regs.alloc_specific_in_reg(reg, slot);
}
self.add_variable(param.name.clone(), slot);
}
for stmt in f.body.iter() {
assert!(self
.generate_expr(Some(Type::Builtin(Ty::Void)), stmt)
.is_none());
}
self.pop_frame(frame);
}
fn generate_expr(&mut self, expected: Option<Type>, expr: &Exp) -> Option<SlotId> {
let value = match expr {
Exp::Literal(lit) => SlotId::base(match lit {
Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)),
Literal::Bool(b) => {
self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64))
}
}),
Exp::Variable(ident) => {
SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed()
}
Exp::Call { name, args } => self.generate_call(expected.clone(), name, args),
Exp::Ctor { name, fields } => todo!(),
Exp::Index { base, index } => todo!(),
Exp::Field { base, field } => todo!(),
Exp::Unary { op, exp } => todo!(),
Exp::Binary { op, left, right } => todo!(),
Exp::If { cond, then, else_ } => todo!(),
Exp::Let { name, ty, value } => todo!(),
Exp::For {
init,
cond,
step,
block,
} => todo!(),
Exp::Block(_) => todo!(),
Exp::Return(_) => todo!(),
Exp::Break => todo!(),
Exp::Continue => todo!(),
};
if let Some(expected) = expected {
let actual = self.slots[value.index].ty.clone();
assert_eq!(expected, actual);
}
Some(value)
}
fn generate_call(&mut self, expected: Option<Type>, name: &str, args: &[Exp]) -> SlotId {
let frame = self.push_frame();
let func = self.lookup_function(name);
let mut arg_alloc = ParamAlloc::new();
for (arg, param) in args.iter().zip(&func.args) {
let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap();
let arg_size = self.size_of(&param.ty);
let param_slot = arg_alloc.alloc(arg_size);
self.set_temporarly(arg_slot, param_slot);
}
self.instrs.push(Instr::Call(name.to_owned()));
todo!()
}
fn set_temporarly(&mut self, from: SlotId, to: SlotValue) {
let to = self.make_mutable(to, from.index);
let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to);
self.emit_move(from, SlotId::base(to_slot));
}
fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue {
match target {
SlotValue::Reg(in_reg) => {
self.regs.alloc_specific_in_reg(in_reg, by);
target
}
SlotValue::Spilled(Spill::Reg(in_reg), slot) => {
let new_val = SlotValue::Spilled(
match in_reg {
InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)),
InReg::Pair(r1, r2) => {
self.emmit_push(r2);
Spill::Stack(self.emmit_push(r1))
}
},
slot,
);
let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val);
SlotValue::Spilled(Spill::Reg(in_reg), new_slot)
}
_ => unreachable!(),
}
}
fn emmit_push(&mut self, reg: Reg) -> Offset {
self.pushed_size += 8;
self.instrs.push(Instr::Push(reg));
self.pushed_size
}
fn emit_move(&mut self, from: SlotId, to: SlotId) {
let size = self.size_of(&self.slots[from.index].ty);
let other_size = self.size_of(&self.slots[to.index].ty);
assert_eq!(size, other_size);
self.instrs.push(Instr::Move(
size,
self.slot_to_value(from),
self.slot_to_value(to),
));
}
fn slot_to_value(&self, slot: SlotId) -> Value {
let slot_val = &self.slots[slot.index];
Value {
store: slot_val.value.into(),
offset: slot.offset,
}
}
fn size_of(&self, ty: &Type) -> Size {
match ty {
Type::Builtin(ty) => match ty {
Ty::U8 | Ty::I8 | Ty::Bool => 1,
Ty::U16 | Ty::I16 => 2,
Ty::U32 | Ty::I32 => 4,
Ty::U64 | Ty::I64 => 8,
Ty::Void => 0,
},
Type::Struct(name) => self
.lookup_struct(name)
.fields
.iter()
.map(|field| self.size_of(&field.ty))
.sum(),
Type::Pinter(_) => 8,
}
}
}
impl<'a> Generator<'a> {
fn add_variable(&mut self, name: String, location: usize) {
self.variables.push(Variable { name, location });
}
fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize {
let slot = self.slots.len();
self.slots.push(Slot { ty, value });
slot
}
fn link(mut self) -> Vec<u8> {
for reloc in self.label_relocs {
let label = self.labels[reloc.label].unwrap();
let offset = reloc.offset;
let target = label - offset;
let target_bytes = u64::to_le_bytes(target as u64);
self.code_section[offset..offset + 8].copy_from_slice(&target_bytes);
}
for reloc in self.data_relocs {
let data = self.data[reloc.data].unwrap();
let offset = reloc.offset;
let target = data;
let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64);
self.data_section[offset..offset + 8].copy_from_slice(&target_bytes);
}
self.code_section.extend_from_slice(&self.data_section);
self.code_section
}
fn lookup_func_label(&mut self, name: &str) -> Label {
if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) {
return label.1;
}
panic!("Function not found: {}", name);
}
fn declare_label(&mut self) -> Label {
self.labels.push(None);
self.labels.len() - 1
}
fn define_label(&mut self, label: Label) {
self.labels[label] = Some(self.code_section.len());
}
fn declare_data(&mut self) -> Data {
self.data.push(None);
self.data.len() - 1
}
fn define_data(&mut self, data: Data, bytes: &[u8]) {
self.data[data] = Some(self.data.len());
self.data_section.extend_from_slice(bytes);
}
fn lookup_struct(&self, name: &str) -> &Struct {
self.lookup_item(name)
.map(|item| match item {
Item::Struct(s) => s,
_ => panic!("Not a struct: {}", name),
})
.expect("Struct not found")
}
fn lookup_function(&self, name: &str) -> &'a Function {
self.lookup_item(name)
.map(|item| match item {
Item::Function(f) => f,
_ => panic!("Not a function: {}", name),
})
.expect("Function not found")
}
fn lookup_item(&self, name: &str) -> Option<&'a Item> {
self.ast.iter().find(|item| match item {
Item::Import(_) => false,
Item::Struct(s) => s.name == name,
Item::Function(f) => f.name == name,
})
}
fn lookup_variable(&self, name: &str) -> Option<&Variable> {
self.variables.iter().find(|variable| variable.name == name)
}
fn push_frame(&mut self) -> Frame {
Frame {
slot_count: self.slots.len(),
var_count: self.variables.len(),
}
}
fn pop_frame(&mut self, frame: Frame) {
self.slots.truncate(frame.slot_count);
self.variables.truncate(frame.var_count);
}
}
pub fn generate(ast: &[Item]) -> Vec<u8> {
Generator {
ast,
..Default::default()
}
.generate()
}

View file

@ -1,151 +0,0 @@
use logos::Logos;
macro_rules! gen_token {
($name:ident {
keywords: {
$($keyword:ident = $lit:literal,)*
},
operators: $op_name:ident {
$($prec:literal: {$(
$op:ident = $op_lit:literal,
)*},)*
},
types: $ty_type:ident {
$($ty:ident = $ty_lit:literal,)*
},
regexes: {
$($regex:ident = $regex_lit:literal,)*
},
}) => {
#[derive(Debug, Clone, PartialEq, Eq, Copy, Logos)]
#[logos(skip "[ \t\n]+")]
pub enum $name {
$(#[token($lit)] $keyword,)*
$($(#[token($op_lit, |_| $op_name::$op)])*)*
Op($op_name),
$(#[token($ty_lit, |_| $ty_type::$ty)])*
Ty($ty_type),
$(#[regex($regex_lit)] $regex,)*
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum $op_name {
$($($op,)*)*
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum $ty_type {
$($ty,)*
}
impl $op_name {
pub fn prec(&self) -> u8 {
match self {
$($($op_name::$op => $prec,)*)*
}
}
}
};
}
gen_token! {
TokenKind {
keywords: {
Use = "use",
Fn = "fn",
Let = "let",
If = "if",
Else = "else",
For = "for",
Return = "return",
Break = "break",
Continue = "continue",
Struct = "struct",
True = "true",
False = "false",
LBrace = "{",
RBrace = "}",
LParen = "(",
RParen = ")",
LBracket = "[",
RBracket = "]",
Colon = ":",
Semicolon = ";",
Comma = ",",
Dot = ".",
},
operators: Op {
14: {
Assign = "=",
AddAssign = "+=",
SubAssign = "-=",
MulAssign = "*=",
DivAssign = "/=",
ModAssign = "%=",
AndAssign = "&=",
OrAssign = "|=",
XorAssign = "^=",
ShlAssign = "<<=",
ShrAssign = ">>=",
},
12: {
Or = "||",
},
11: {
And = "&&",
},
10: {
Bor = "|",
},
9: {
Xor = "^",
},
8: {
Band = "&",
},
7: {
Eq = "==",
Neq = "!=",
},
6: {
Lt = "<",
Gt = ">",
Le = "<=",
Ge = ">=",
},
5: {
Shl = "<<",
Shr = ">>",
},
4: {
Add = "+",
Sub = "-",
},
3: {
Mul = "*",
Div = "/",
Mod = "%",
},
},
types: Ty {
U8 = "u8",
U16 = "u16",
U32 = "u32",
U64 = "u64",
I8 = "i8",
I16 = "i16",
I32 = "i32",
I64 = "i64",
Bool = "bool",
Void = "void",
},
regexes: {
Ident = "[a-zA-Z_][a-zA-Z0-9_]*",
String = r#""([^"\\]|\\.)*""#,
Number = "[0-9]+",
},
}
}

View file

@ -1,6 +0,0 @@
#![allow(dead_code)]
mod codegen;
mod lexer;
mod parser;
mod typechk;

View file

@ -1,566 +0,0 @@
use {core::panic, std::iter};
use std::array;
use logos::{Lexer, Logos};
use crate::lexer::{Op, TokenKind, Ty};
#[derive(Clone, Debug)]
pub enum Item {
Import(String),
Struct(Struct),
Function(Function),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Type {
Builtin(Ty),
Struct(String),
Pinter(Box<Type>),
}
#[derive(Clone, Debug)]
pub struct Struct {
pub name: String,
pub fields: Vec<Field>,
}
#[derive(Clone, Debug)]
pub struct Field {
pub name: String,
pub ty: Type,
}
#[derive(Clone, Debug)]
pub struct Function {
pub name: String,
pub args: Vec<Arg>,
pub ret: Type,
pub body: Vec<Exp>,
}
#[derive(Clone, Debug)]
pub struct Arg {
pub name: String,
pub ty: Type,
}
#[derive(Clone, Debug)]
pub struct CtorField {
pub name: String,
pub value: Exp,
}
#[derive(Clone, Debug)]
pub enum Exp {
Literal(Literal),
Variable(String),
Call {
name: String,
args: Vec<Exp>,
},
Ctor {
name: Option<Box<Exp>>,
fields: Vec<CtorField>,
},
Index {
base: Box<Exp>,
index: Box<Exp>,
},
Field {
base: Box<Exp>,
field: String,
},
Unary {
op: Op,
exp: Box<Exp>,
},
Binary {
op: Op,
left: Box<Exp>,
right: Box<Exp>,
},
If {
cond: Box<Exp>,
then: Box<Exp>,
else_: Option<Box<Exp>>,
},
Let {
name: String,
ty: Option<Type>,
value: Box<Exp>,
},
For {
init: Option<Box<Exp>>,
cond: Option<Box<Exp>>,
step: Option<Box<Exp>>,
block: Box<Exp>,
},
Block(Vec<Exp>),
Return(Option<Box<Exp>>),
Break,
Continue,
}
#[derive(Clone, Debug)]
pub enum Literal {
Int(u64),
Bool(bool),
}
#[derive(Debug, PartialEq, Clone)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
pub value: String,
}
struct Parser<'a> {
next_token: Option<Token>,
lexer: logos::Lexer<'a, TokenKind>,
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str) -> Self {
let mut lexer = TokenKind::lexer(input);
let next_token = Self::next_token(&mut lexer);
Self { next_token, lexer }
}
pub fn next(&mut self) -> Option<Token> {
let token = self.next_token.clone();
self.next_token = Self::next_token(&mut self.lexer);
token
}
pub fn next_token(lexer: &mut Lexer<TokenKind>) -> Option<Token> {
lexer.next().map(|r| {
r.map(|e| Token {
kind: e,
span: lexer.span(),
value: lexer.slice().to_owned(),
})
.unwrap_or_else(|e| {
let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start);
println!("Lexer error: {}:{}: {:?}", line, col, e);
std::process::exit(1);
})
})
}
pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) {
Self::pos_to_line_col_low(self.lexer.source(), pos)
}
pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) {
let line = source[..pos].lines().count();
let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0);
(line, col)
}
pub fn expect(&mut self, kind: TokenKind) -> Token {
let token = self.expect_any();
if token.kind == kind {
token
} else {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!(
"Expected {:?} at {}:{}, found {:?}",
kind, line, col, token.kind
)
}
}
pub fn expect_any(&mut self) -> Token {
self.next().unwrap_or_else(|| panic!("Unexpected EOF"))
}
pub fn peek(&self) -> Option<&Token> {
self.next_token.as_ref()
}
pub fn try_advance(&mut self, kind: TokenKind) -> bool {
if self.peek().is_some_and(|t| t.kind == kind) {
self.next();
true
} else {
false
}
}
pub fn parse(&mut self) -> Vec<Item> {
iter::from_fn(|| self.parse_item()).collect()
}
fn parse_item(&mut self) -> Option<Item> {
let token = self.next()?;
match token.kind {
TokenKind::Struct => Some(self.parse_struct()),
TokenKind::Fn => Some(self.parse_function()),
TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)),
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
fn parse_struct(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LBrace);
let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field);
Item::Struct(Struct { name, fields })
}
fn parse_field(&mut self) -> Field {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
Field { name, ty }
}
fn type_(&mut self) -> Type {
let token = self.next().unwrap();
match token.kind {
TokenKind::Ty(ty) => Type::Builtin(ty),
TokenKind::Ident => Type::Struct(token.value),
TokenKind::Op(Op::Band) => {
let ty = self.type_();
Type::Pinter(Box::new(ty))
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
fn parse_function(&mut self) -> Item {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::LParen);
let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg);
self.expect(TokenKind::Colon);
let ret = self.type_();
Item::Function(Function {
name,
args,
ret,
body: self.parse_block(),
})
}
fn parse_arg(&mut self) -> Arg {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let ty = self.type_();
self.try_advance(TokenKind::Comma);
Arg { name, ty }
}
fn parse_expr(&mut self) -> Exp {
self.parse_binary_expr(255)
}
fn parse_binary_expr(&mut self, min_prec: u8) -> Exp {
let mut lhs = self.parse_unit_expr();
while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) {
let prec = op.prec();
if prec > min_prec {
break;
}
self.next();
let rhs = self.parse_binary_expr(prec);
lhs = Exp::Binary {
op,
left: Box::new(lhs),
right: Box::new(rhs),
};
}
lhs
}
fn parse_unit_expr(&mut self) -> Exp {
let token = self.next().unwrap();
let mut expr = match token.kind {
TokenKind::True => Exp::Literal(Literal::Bool(true)),
TokenKind::False => Exp::Literal(Literal::Bool(false)),
TokenKind::Ident => Exp::Variable(token.value),
TokenKind::LBrace => {
Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr))
}
TokenKind::LParen => {
let expr = self.parse_expr();
self.expect(TokenKind::RParen);
expr
}
TokenKind::Number => {
let value = token.value.parse().unwrap();
Exp::Literal(Literal::Int(value))
}
TokenKind::Let => {
let name = self.expect(TokenKind::Ident).value;
let ty = self.try_advance(TokenKind::Colon).then(|| self.type_());
self.expect(TokenKind::Op(Op::Assign));
let value = self.parse_expr();
Exp::Let {
name,
ty,
value: Box::new(value),
}
}
TokenKind::If => {
let cond = self.parse_expr();
let then = Exp::Block(self.parse_block());
let else_ = self
.try_advance(TokenKind::Else)
.then(|| {
if self.peek().is_some_and(|t| t.kind == TokenKind::If) {
self.parse_expr()
} else {
Exp::Block(self.parse_block())
}
})
.map(Box::new);
Exp::If {
cond: Box::new(cond),
then: Box::new(then),
else_,
}
}
TokenKind::For => {
let params =
self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
let block = Exp::Block(exprs);
let len = params.len();
let mut exprs = params.into_iter();
let [init, consd, step] = array::from_fn(|_| exprs.next());
match len {
0 => Exp::For {
init: None,
cond: None,
step: None,
block: Box::new(block),
},
1 => Exp::For {
init: None,
cond: init.map(Box::new),
step: None,
block: Box::new(block),
},
3 => Exp::For {
init: init.map(Box::new),
cond: consd.map(Box::new),
step: step.map(Box::new),
block: Box::new(block),
},
_ => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col)
}
}
}
TokenKind::Return => {
let value = self
.peek()
.is_some_and(|t| {
!matches!(
t.kind,
TokenKind::Semicolon
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::Comma
)
})
.then(|| Box::new(self.parse_expr()));
Exp::Return(value)
}
TokenKind::Op(op) => Exp::Unary {
op,
exp: Box::new(self.parse_expr()),
},
TokenKind::Dot => {
let token = self.expect_any();
match token.kind {
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
Exp::Ctor { name: None, fields }
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
TokenKind::Ty(_)
| TokenKind::String
| TokenKind::Use
| TokenKind::Break
| TokenKind::Continue
| TokenKind::Struct
| TokenKind::RBrace
| TokenKind::RParen
| TokenKind::LBracket
| TokenKind::RBracket
| TokenKind::Colon
| TokenKind::Semicolon
| TokenKind::Comma
| TokenKind::Fn
| TokenKind::Else => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", token.kind, line, col)
}
};
loop {
match self.peek().map(|t| t.kind) {
Some(TokenKind::LParen) => {
self.next();
expr = Exp::Call {
name: match expr {
Exp::Variable(name) => name,
_ => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Expected function name at {}:{}", line, col)
}
},
args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr),
};
}
Some(TokenKind::LBracket) => {
self.next();
let index = self.parse_expr();
self.expect(TokenKind::RBracket);
expr = Exp::Index {
base: Box::new(expr),
index: Box::new(index),
};
}
Some(TokenKind::Dot) => {
self.next();
let token = self.expect_any();
match token.kind {
TokenKind::Ident => {
expr = Exp::Field {
base: Box::new(expr),
field: token.value,
};
}
TokenKind::LBrace => {
let fields = self.sequence(
TokenKind::Comma,
TokenKind::RBrace,
Self::parse_ctor_field,
);
expr = Exp::Ctor {
name: Some(Box::new(expr)),
fields,
};
}
tkn => {
let (line, col) = self.pos_to_line_col(token.span.start);
panic!("Unexpected {:?} at {}:{}", tkn, line, col)
}
}
}
_ => break expr,
}
}
}
pub fn parse_ctor_field(&mut self) -> CtorField {
let name = self.expect(TokenKind::Ident).value;
self.expect(TokenKind::Colon);
let value = self.parse_expr();
CtorField { name, value }
}
pub fn parse_block(&mut self) -> Vec<Exp> {
self.expect(TokenKind::LBrace);
let mut exprs = Vec::new();
while !self.try_advance(TokenKind::RBrace) {
exprs.push(self.parse_expr());
self.try_advance(TokenKind::Semicolon);
}
exprs
}
pub fn sequence<T>(
&mut self,
sep: TokenKind,
term: TokenKind,
mut parser: impl FnMut(&mut Self) -> T,
) -> Vec<T> {
let mut items = Vec::new();
while !self.try_advance(term) {
items.push(parser(self));
if self.try_advance(term) {
break;
}
self.expect(sep);
}
items
}
}
pub fn parse(input: &str) -> Vec<Item> {
Parser::new(input).parse()
}
#[cfg(test)]
mod test {
#[test]
fn sanity() {
let input = r#"
struct Foo {
x: i32,
y: i32,
}
fn main(): void {
let foo = Foo.{ x: 1, y: 2 };
if foo.x > 0 {
return foo.x;
} else {
return foo.y;
}
for i < 10 {
i = i + 1;
}
for let i = 0; i < 10; i = i + 1 {
i = i + 1;
}
i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7;
fomething();
pahum(&foo);
lupa(*soo);
return foo.x + foo.y;
}
fn lupa(x: i32): i32 {
return x;
}
fn pahum(x: &Foo): void {
return;
}
"#;
let _ = super::parse(input);
}
}

View file

@ -1,20 +0,0 @@
use crate::lexer::Ty;
#[derive(Clone, Debug)]
pub enum Type {
Builtin(Ty),
Struct(StructType),
Pointer(Box<Type>),
}
#[derive(Clone, Debug)]
pub struct StructType {
pub name: String,
pub fields: Vec<Field>,
}
#[derive(Clone, Debug)]
pub struct Field {
pub name: String,
pub ty: Type,
}

View file

@ -1,592 +1,108 @@
use std::{iter::Cycle, ops::Range, usize};
use {crate::parser, std::fmt::Write};
use crate::{
lexer::{self, Ty},
parser::{Exp, Function, Item, Literal, Struct, Type},
};
const STACK_PTR: &str = "r254";
const ZERO: &str = "r0";
const RET_ADDR: &str = "r31";
type Reg = u8;
type Offset = i32;
type Pushed = bool;
type SlotIndex = usize;
type Label = usize;
type Data = usize;
type Size = usize;
//| Register | Description | Saver |
//|:-----------|:--------------------|:-------|
//| r0 | Hard-wired zero | N/A |
//| r1 - r2 | Return values | Caller |
//| r2 - r11 | Function parameters | Caller |
//| r12 - r30 | General purpose | Caller |
//| r31 | Return address | Caller |
//| r32 - r253 | General purpose | Callee |
//| r254 | Stack pointer | Callee |
//| r255 | Thread pointer | N/A |
struct RegAlloc {
pub regs: Box<[Option<usize>; 256]>,
pub used: Box<[bool; 256]>,
pub spill_cycle: Cycle<Range<u8>>,
pub struct Codegen<'a> {
path: &'a std::path::Path,
code: String,
data: String,
}
impl RegAlloc {
const STACK_POINTER: Reg = 254;
const ZERO: Reg = 0;
const RETURN_ADDRESS: Reg = 31;
fn alloc_general(&mut self, slot: usize) -> Option<Reg> {
self.regs[32..254]
.iter_mut()
.zip(&mut self.used[32..254])
.position(|(reg, used)| {
if reg.is_none() {
*reg = Some(slot);
*used = true;
true
} else {
false
}
})
.map(|reg| reg as Reg + 32)
}
fn free(&mut self, reg: Reg) {
assert!(self.regs[reg as usize].take().is_some());
}
fn is_used(&self, reg: Reg) -> bool {
self.regs[reg as usize].is_some()
}
fn spill(&mut self, for_slot: usize) -> (Reg, Option<usize>) {
let to_spill = self.spill_cycle.next().unwrap();
let slot = self.spill_specific(to_spill, for_slot);
(to_spill as Reg + 32, slot)
}
fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option<usize> {
self.regs[reg as usize].replace(for_slot)
}
fn restore(&mut self, reg: Reg, slot: usize) -> usize {
self.regs[reg as usize].replace(slot).unwrap()
}
fn alloc_specific(&mut self, reg: u8, to: usize) {
assert!(self.regs[reg as usize].replace(to).is_none());
}
fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) {
match reg {
InReg::Single(r) => self.alloc_specific(r, to),
InReg::Pair(r1, r2) => {
self.alloc_specific(r1, to);
self.alloc_specific(r2, to);
}
}
}
}
pub struct ParamAlloc {
reg_range: Range<Reg>,
stack: Offset,
}
impl ParamAlloc {
fn for_params() -> Self {
impl<'a> Codegen<'a> {
pub fn new(path: &'a std::path::Path) -> Self {
Self {
stack: 8, // return adress is in callers stack frame
reg_range: 2..12,
path,
code: String::new(),
data: String::new(),
}
}
fn for_returns() -> Self {
Self {
stack: 0,
reg_range: 0..2,
pub fn file(&mut self, exprs: &[parser::Expr]) -> std::fmt::Result {
for expr in exprs {
self.expr(expr)?;
}
Ok(())
}
fn alloc(&mut self, size: usize) -> SlotValue {
match self.try_alloc_regs(size) {
Some(reg) => reg,
None => {
let stack = self.stack;
self.stack += size as Offset;
SlotValue::Stack(stack)
}
}
}
fn try_alloc_regs(&mut self, size: usize) -> Option<SlotValue> {
let mut needed = size.div_ceil(8);
if needed > 2 {
needed = 1; // passed by ref
}
if self.reg_range.len() < needed {
return None;
}
match needed {
1 => {
let reg = self.reg_range.start;
self.reg_range.start += 1;
Some(SlotValue::Reg(InReg::Single(reg)))
}
2 => {
let reg = self.reg_range.start;
self.reg_range.start += 2;
Some(SlotValue::Reg(InReg::Pair(reg, reg + 1)))
}
_ => unreachable!(),
}
}
}
impl Default for RegAlloc {
fn default() -> Self {
Self {
regs: Box::new([None; 256]),
used: Box::new([false; 256]),
spill_cycle: (32..254).cycle(),
}
}
}
struct Variable {
name: String,
location: usize,
}
#[derive(Clone, Copy)]
struct SlotId {
// index into slot stack
index: SlotIndex,
// temorary offset carried over when eg. accessing fields
offset: Offset,
// this means we can mutate the value as part of computation
owned: bool,
}
impl SlotId {
fn base(location: usize) -> Self {
Self {
index: location,
offset: 0,
owned: true,
}
}
fn borrowed(self) -> Self {
Self {
owned: false,
..self
}
}
}
struct Slot {
ty: Type,
value: SlotValue,
}
#[repr(transparent)]
struct InstBuffer {
buffer: Vec<u8>,
}
impl InstBuffer {
fn new(vec: &mut Vec<u8>) -> &mut Self {
unsafe { &mut *(vec as *mut Vec<u8> as *mut Self) }
}
}
impl hbbytecode::Buffer for InstBuffer {
fn reserve(&mut self, bytes: usize) {
self.buffer.reserve(bytes);
}
unsafe fn write(&mut self, byte: u8) {
self.buffer.push(byte);
}
}
#[derive(Clone, Copy)]
enum InReg {
Single(Reg),
// if one of the registes is allocated, the other is too, ALWAYS
// with the same slot
Pair(Reg, Reg),
}
#[derive(Clone, Copy)]
enum Spill {
Reg(InReg),
Stack(Offset), // relative to frame end (rsp if nothing was pushed)
}
#[derive(Clone, Copy)]
enum SlotValue {
Reg(InReg),
Stack(Offset), // relative to frame start (rbp)
Imm(u64),
Spilled(Spill, SlotIndex),
}
pub struct Value {
store: ValueStore,
offset: Offset,
}
#[derive(Clone, Copy)]
enum ValueStore {
Reg(InReg),
Stack(Offset, Pushed),
Imm(u64),
}
impl From<SlotValue> for ValueStore {
fn from(value: SlotValue) -> Self {
match value {
SlotValue::Reg(reg) => ValueStore::Reg(reg),
SlotValue::Stack(offset) => ValueStore::Stack(offset, false),
SlotValue::Imm(imm) => ValueStore::Imm(imm),
SlotValue::Spilled(spill, _) => match spill {
Spill::Reg(reg) => ValueStore::Reg(reg),
Spill::Stack(offset) => ValueStore::Stack(offset, true),
fn expr(&mut self, expr: &parser::Expr) -> std::fmt::Result {
use parser::Expr as E;
match expr {
E::Decl {
name,
val:
E::Closure {
ret: E::Ident { name: "void" },
body,
},
} => {
writeln!(self.code, "{name}:")?;
self.expr(body)
}
E::Return { val: None } => self.ret(),
E::Block { stmts } => {
for stmt in stmts {
self.expr(stmt)?;
}
Ok(())
}
ast => unimplemented!("{:?}", ast),
}
}
fn stack_push(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result {
writeln!(self.code, " st {value}, {STACK_PTR}, {ZERO}, {size}")?;
writeln!(
self.code,
" addi{} {STACK_PTR}, {STACK_PTR}, {size}",
size * 8
)
}
fn stack_pop(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result {
writeln!(
self.code,
" subi{} {STACK_PTR}, {STACK_PTR}, {size}",
size * 8
)?;
writeln!(self.code, " ld {value}, {STACK_PTR}, {ZERO}, {size}")
}
fn call(&mut self, func: impl std::fmt::Display) -> std::fmt::Result {
self.stack_push(&func, 8)?;
self.global_jump(func)
}
fn ret(&mut self) -> std::fmt::Result {
self.stack_pop(RET_ADDR, 8)?;
self.global_jump(RET_ADDR)
}
fn global_jump(&mut self, label: impl std::fmt::Display) -> std::fmt::Result {
writeln!(self.code, " jala {ZERO}, {label}, 0")
}
pub fn dump(&mut self, mut out: impl std::fmt::Write) -> std::fmt::Result {
writeln!(out, "start:")?;
writeln!(out, " jala {ZERO}, main, 0")?;
writeln!(out, " tx")?;
writeln!(out, "{}", self.code)?;
writeln!(out, "{}", self.data)
}
}
pub struct LabelReloc {
pub label: Label,
pub offset: usize,
}
pub struct DataReloc {
pub data: Data,
pub offset: usize,
}
#[must_use]
pub struct Frame {
pub slot_count: usize,
pub var_count: usize,
}
enum Instr {
BinOp(lexer::Op, Value, Value),
Move(Size, Value, Value),
Push(Reg),
Jump(Label),
Call(String),
JumpIfZero(Value, Label),
}
#[derive(Default)]
pub struct Generator<'a> {
ast: &'a [Item],
func_labels: Vec<(String, Label)>,
stack_size: Offset,
pushed_size: Offset,
regs: RegAlloc,
variables: Vec<Variable>,
slots: Vec<Slot>,
labels: Vec<Option<usize>>,
label_relocs: Vec<LabelReloc>,
data: Vec<Option<usize>>,
data_relocs: Vec<DataReloc>,
code_section: Vec<u8>,
data_section: Vec<u8>,
instrs: Vec<Instr>,
}
impl<'a> Generator<'a> {
fn generate(mut self) -> Vec<u8> {
for item in self.ast {
let Item::Function(f) = item else { continue };
self.generate_function(f);
#[cfg(test)]
mod tests {
fn generate(input: &'static str, output: &mut String) {
let mut parser = super::parser::Parser::new(input, std::path::Path::new("test"));
let exprs = parser.file();
let mut codegen = super::Codegen::new(std::path::Path::new("test"));
codegen.file(&exprs).unwrap();
codegen.dump(output).unwrap();
}
self.link()
}
fn generate_function(&mut self, f: &Function) {
let frame = self.push_frame();
let mut param_alloc = ParamAlloc::for_params();
for param in f.args.iter() {
let param_size = self.size_of(&param.ty);
let value = param_alloc.alloc(param_size);
let slot = self.add_slot(param.ty.clone(), value);
if let SlotValue::Reg(reg) = value {
self.regs.alloc_specific_in_reg(reg, slot);
}
self.add_variable(param.name.clone(), slot);
}
for stmt in f.body.iter() {
assert!(self
.generate_expr(Some(Type::Builtin(Ty::Void)), stmt)
.is_none());
}
self.pop_frame(frame);
}
fn generate_expr(&mut self, expected: Option<Type>, expr: &Exp) -> Option<SlotId> {
let value = match expr {
Exp::Literal(lit) => SlotId::base(match lit {
Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)),
Literal::Bool(b) => {
self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64))
}
}),
Exp::Variable(ident) => {
SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed()
}
Exp::Call { name, args } => self.generate_call(expected.clone(), name, args),
Exp::Ctor { name, fields } => todo!(),
Exp::Index { base, index } => todo!(),
Exp::Field { base, field } => todo!(),
Exp::Unary { op, exp } => todo!(),
Exp::Binary { op, left, right } => todo!(),
Exp::If { cond, then, else_ } => todo!(),
Exp::Let { name, ty, value } => todo!(),
Exp::For {
init,
cond,
step,
block,
} => todo!(),
Exp::Block(_) => todo!(),
Exp::Return(_) => todo!(),
Exp::Break => todo!(),
Exp::Continue => todo!(),
};
if let Some(expected) = expected {
let actual = self.slots[value.index].ty.clone();
assert_eq!(expected, actual);
}
Some(value)
}
fn generate_call(&mut self, expected: Option<Type>, name: &str, args: &[Exp]) -> SlotId {
let frame = self.push_frame();
let func = self.lookup_function(name);
let mut ret_alloc = ParamAlloc::for_returns();
let ret_size = self.size_of(&func.ret);
let ret_slot = ret_alloc.alloc(ret_size);
let mut arg_alloc = ParamAlloc::for_params();
for (arg, param) in args.iter().zip(&func.args) {
let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap();
let arg_size = self.size_of(&param.ty);
let param_slot = arg_alloc.alloc(arg_size);
self.set_temporarly(arg_slot, param_slot);
}
self.instrs.push(Instr::Call(name.to_owned()));
todo!()
}
fn set_temporarly(&mut self, from: SlotId, to: SlotValue) {
let to = self.make_mutable(to, from.index);
let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to);
self.emit_move(from, SlotId::base(to_slot));
}
fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue {
match target {
SlotValue::Reg(in_reg) => {
self.regs.alloc_specific_in_reg(in_reg, by);
target
}
SlotValue::Spilled(Spill::Reg(in_reg), slot) => {
let new_val = SlotValue::Spilled(
match in_reg {
InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)),
InReg::Pair(r1, r2) => {
self.emmit_push(r2);
Spill::Stack(self.emmit_push(r1))
}
},
slot,
);
let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val);
SlotValue::Spilled(Spill::Reg(in_reg), new_slot)
}
_ => unreachable!(),
}
}
fn emmit_push(&mut self, reg: Reg) -> Offset {
self.pushed_size += 8;
self.instrs.push(Instr::Push(reg));
self.pushed_size
}
fn emit_move(&mut self, from: SlotId, to: SlotId) {
let size = self.size_of(&self.slots[from.index].ty);
let other_size = self.size_of(&self.slots[to.index].ty);
assert_eq!(size, other_size);
self.instrs.push(Instr::Move(
size,
self.slot_to_value(from),
self.slot_to_value(to),
));
}
fn slot_to_value(&self, slot: SlotId) -> Value {
let slot_val = &self.slots[slot.index];
Value {
store: slot_val.value.into(),
offset: slot.offset,
}
}
fn size_of(&self, ty: &Type) -> Size {
match ty {
Type::Builtin(ty) => match ty {
Ty::U8 | Ty::I8 | Ty::Bool => 1,
Ty::U16 | Ty::I16 => 2,
Ty::U32 | Ty::I32 => 4,
Ty::U64 | Ty::I64 => 8,
Ty::Void => 0,
},
Type::Struct(name) => self
.lookup_struct(name)
.fields
.iter()
.map(|field| self.size_of(&field.ty))
.sum(),
Type::Pinter(_) => 8,
}
crate::run_tests! { generate:
example => include_str!("../examples/main_fn.hb");
}
}
impl<'a> Generator<'a> {
fn add_variable(&mut self, name: String, location: usize) {
self.variables.push(Variable { name, location });
}
fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize {
let slot = self.slots.len();
self.slots.push(Slot { ty, value });
slot
}
fn link(mut self) -> Vec<u8> {
for reloc in self.label_relocs {
let label = self.labels[reloc.label].unwrap();
let offset = reloc.offset;
let target = label - offset;
let target_bytes = u64::to_le_bytes(target as u64);
self.code_section[offset..offset + 8].copy_from_slice(&target_bytes);
}
for reloc in self.data_relocs {
let data = self.data[reloc.data].unwrap();
let offset = reloc.offset;
let target = data;
let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64);
self.data_section[offset..offset + 8].copy_from_slice(&target_bytes);
}
self.code_section.extend_from_slice(&self.data_section);
self.code_section
}
fn lookup_func_label(&mut self, name: &str) -> Label {
if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) {
return label.1;
}
panic!("Function not found: {}", name);
}
fn declare_label(&mut self) -> Label {
self.labels.push(None);
self.labels.len() - 1
}
fn define_label(&mut self, label: Label) {
self.labels[label] = Some(self.code_section.len());
}
fn declare_data(&mut self) -> Data {
self.data.push(None);
self.data.len() - 1
}
fn define_data(&mut self, data: Data, bytes: &[u8]) {
self.data[data] = Some(self.data.len());
self.data_section.extend_from_slice(bytes);
}
fn lookup_struct(&self, name: &str) -> &Struct {
self.lookup_item(name)
.map(|item| match item {
Item::Struct(s) => s,
_ => panic!("Not a struct: {}", name),
})
.expect("Struct not found")
}
fn lookup_function(&self, name: &str) -> &'a Function {
self.lookup_item(name)
.map(|item| match item {
Item::Function(f) => f,
_ => panic!("Not a function: {}", name),
})
.expect("Function not found")
}
fn lookup_item(&self, name: &str) -> Option<&'a Item> {
self.ast.iter().find(|item| match item {
Item::Import(_) => false,
Item::Struct(s) => s.name == name,
Item::Function(f) => f.name == name,
})
}
fn lookup_variable(&self, name: &str) -> Option<&Variable> {
self.variables.iter().find(|variable| variable.name == name)
}
fn push_frame(&mut self) -> Frame {
Frame {
slot_count: self.slots.len(),
var_count: self.variables.len(),
}
}
fn pop_frame(&mut self, frame: Frame) {
self.slots.truncate(frame.slot_count);
self.variables.truncate(frame.var_count);
}
}
pub fn generate(ast: &[Item]) -> Vec<u8> {
Generator {
ast,
..Default::default()
}
.generate()
}

169
hblang/src/lexer.rs Normal file
View file

@ -0,0 +1,169 @@
use std::{iter::Peekable, str::Chars};
#[derive(Debug, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub start: u32,
pub end: u32,
}
impl Token {
pub fn range(&self) -> std::ops::Range<usize> {
self.start as usize..self.end as usize
}
}
#[derive(Debug, PartialEq)]
pub enum TokenKind {
Ident,
Number,
LParen,
RParen,
LBrace,
RBrace,
LBrack,
RBrack,
Decl,
Or,
Semi,
Colon,
Return,
Eof,
Error,
}
pub struct Lexer<'a> {
pos: u32,
bytes: &'a [u8],
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
pos: 0,
bytes: input.as_bytes(),
}
}
pub fn slice(&self, tok: Token) -> &'a str {
unsafe { std::str::from_utf8_unchecked(&self.bytes[tok.range()]) }
}
fn peek(&self) -> Option<u8> {
self.bytes.get(self.pos as usize).copied()
}
fn advance(&mut self) -> Option<u8> {
let c = self.peek()?;
self.pos += 1;
Some(c)
}
pub fn next(&mut self) -> Token {
Iterator::next(self).unwrap_or(Token {
kind: TokenKind::Eof,
start: self.pos,
end: self.pos,
})
}
fn advance_if(&mut self, arg: u8) -> bool {
if self.peek() == Some(arg) {
self.advance();
true
} else {
false
}
}
pub fn line_col(&self, mut start: u32) -> (usize, usize) {
self.bytes
.split(|&b| b == b'\n')
.enumerate()
.find_map(|(i, line)| {
if start < line.len() as u32 {
return Some((i + 1, start as usize + 1));
}
start -= line.len() as u32 + 1;
None
})
.unwrap_or((1, 1))
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
use TokenKind as T;
loop {
let start = self.pos;
let kind = match self.advance()? {
b'\n' | b'\r' | b'\t' | b' ' => continue,
b'0'..=b'9' => {
while let Some(b'0'..=b'9') = self.peek() {
self.advance();
}
T::Number
}
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
while let Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') = self.peek() {
self.advance();
}
let ident = &self.bytes[start as usize..self.pos as usize];
match ident {
b"return" => T::Return,
_ => T::Ident,
}
}
b':' => match self.advance_if(b'=') {
true => T::Decl,
false => T::Colon,
},
b';' => T::Semi,
b'|' => match self.advance_if(b'|') {
true => T::Or,
false => T::Error,
},
b'(' => T::LParen,
b')' => T::RParen,
b'{' => T::LBrace,
b'}' => T::RBrace,
b'[' => T::LBrack,
b']' => T::RBrack,
_ => T::Error,
};
return Some(Token {
kind,
start,
end: self.pos,
});
}
}
}
#[cfg(test)]
mod tests {
fn lex(input: &'static str, output: &mut String) {
use {
super::{Lexer, TokenKind as T},
std::fmt::Write,
};
let mut lexer = Lexer::new(input);
loop {
let token = lexer.next();
writeln!(output, "{:?} {:?}", token.kind, &input[token.range()],).unwrap();
if token.kind == T::Eof {
break;
}
}
}
crate::run_tests! { lex:
empty => "";
whitespace => " \t\n\r";
examples => include_str!("../examples/main_fn.hb");
}
}

20
hblang/src/lib.rs Normal file
View file

@ -0,0 +1,20 @@
#![feature(noop_waker)]
#[macro_export]
macro_rules! run_tests {
($runner:path: $($name:ident => $input:expr;)*) => {$(
#[test]
fn $name() {
$crate::tests::run_test(std::any::type_name_of_val(&$name), $input, $runner);
}
)*};
}
mod codegen;
mod lexer;
mod parser;
mod tests;
mod typechk;
pub fn try_block<R>(f: impl FnOnce() -> R) -> R {
f()
}

161
hblang/src/parser.rs Normal file
View file

@ -0,0 +1,161 @@
use std::{cell::Cell, ops::Not};
use crate::lexer::{Lexer, Token, TokenKind};
type Ptr<T> = &'static T;
fn ptr<T>(val: T) -> Ptr<T> {
Box::leak(Box::new(val))
}
pub struct Parser<'a> {
path: &'a std::path::Path,
lexer: Lexer<'a>,
token: Token,
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str, path: &'a std::path::Path) -> Self {
let mut lexer = Lexer::new(input);
let token = lexer.next();
Self { lexer, token, path }
}
fn next(&mut self) -> Token {
std::mem::replace(&mut self.token, self.lexer.next())
}
pub fn file(&mut self) -> Vec<Expr> {
std::iter::from_fn(|| (self.token.kind != TokenKind::Eof).then(|| self.expr())).collect()
}
fn ptr_expr(&mut self) -> Ptr<Expr> {
ptr(self.expr())
}
pub fn expr(&mut self) -> Expr {
let token = self.next();
let expr = match token.kind {
TokenKind::Ident => {
let name = self.lexer.slice(token).to_owned().leak();
if self.advance_if(TokenKind::Decl) {
let val = self.ptr_expr();
Expr::Decl { name, val }
} else {
Expr::Ident { name }
}
}
TokenKind::Return => Expr::Return {
val: (self.token.kind != TokenKind::Semi).then(|| self.ptr_expr()),
},
TokenKind::Or => {
self.expect_advance(TokenKind::Colon);
let ret = self.ptr_expr();
let body = self.ptr_expr();
Expr::Closure { ret, body }
}
TokenKind::LBrace => Expr::Block {
stmts: std::iter::from_fn(|| {
self.advance_if(TokenKind::RBrace)
.not()
.then(|| self.expr())
})
.collect::<Vec<_>>(),
},
TokenKind::Number => Expr::Number {
value: match self.lexer.slice(token).parse() {
Ok(value) => value,
Err(e) => self.report(format_args!("invalid number: {e}")),
},
},
tok => self.report(format_args!("unexpected token: {:?}", tok)),
};
self.advance_if(TokenKind::Semi);
expr
}
fn advance_if(&mut self, kind: TokenKind) -> bool {
if self.token.kind == kind {
self.next();
true
} else {
false
}
}
fn expect_advance(&mut self, kind: TokenKind) {
if self.token.kind != kind {
self.report(format_args!(
"expected {:?}, found {:?}",
kind, self.token.kind
));
}
self.next();
}
fn report(&self, msg: impl std::fmt::Display) -> ! {
let (line, col) = self.lexer.line_col(self.token.start);
eprintln!("{}:{}:{} => {}", self.path.display(), line, col, msg);
unreachable!();
}
}
#[derive(Debug)]
pub enum Expr {
Decl { name: Ptr<str>, val: Ptr<Expr> },
Closure { ret: Ptr<Expr>, body: Ptr<Expr> },
Return { val: Option<Ptr<Expr>> },
Ident { name: Ptr<str> },
Block { stmts: Vec<Expr> },
Number { value: u64 },
}
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
thread_local! {
static INDENT: Cell<usize> = Cell::new(0);
}
match self {
Self::Decl { name, val } => write!(f, "{} := {}", name, val),
Self::Closure { ret, body } => write!(f, "||: {} {}", ret, body),
Self::Return { val: Some(val) } => write!(f, "return {};", val),
Self::Return { val: None } => write!(f, "return;"),
Self::Ident { name } => write!(f, "{}", name),
Self::Block { stmts } => {
writeln!(f, "{{")?;
INDENT.with(|i| i.set(i.get() + 1));
let res = crate::try_block(|| {
for stmt in stmts {
for _ in 0..INDENT.with(|i| i.get()) {
write!(f, " ")?;
}
writeln!(f, "{}", stmt)?;
}
Ok(())
});
INDENT.with(|i| i.set(i.get() - 1));
write!(f, "}}")?;
res
}
Self::Number { value } => write!(f, "{}", value),
}
}
}
#[cfg(test)]
mod tests {
fn parse(input: &'static str, output: &mut String) {
use std::fmt::Write;
let mut parser = super::Parser::new(input, std::path::Path::new("test"));
for expr in parser.file() {
writeln!(output, "{}", expr).unwrap();
}
}
crate::run_tests! { parse:
example => include_str!("../examples/main_fn.hb");
}
}

53
hblang/src/tests.rs Normal file
View file

@ -0,0 +1,53 @@
#![cfg(test)]
pub fn run_test(name: &'static str, input: &'static str, test: fn(&'static str, &mut String)) {
use std::{io::Write, path::PathBuf};
let filter = std::env::var("PT_FILTER").unwrap_or_default();
if !filter.is_empty() && !name.contains(&filter) {
return;
}
let mut output = String::new();
test(input, &mut output);
let mut root = PathBuf::from(std::env::var("PT_TEST_ROOT").unwrap_or("tests".to_string()));
root.push(name);
root.set_extension("txt");
let expected = std::fs::read_to_string(&root).unwrap_or_default();
if output == expected {
return;
}
if std::env::var("PT_UPDATE").is_ok() {
std::fs::write(&root, output).unwrap();
return;
}
if !root.exists() {
std::fs::create_dir_all(root.parent().unwrap()).unwrap();
std::fs::write(&root, vec![]).unwrap();
}
let mut proc = std::process::Command::new("diff")
.arg("-u")
.arg("--color")
.arg(&root)
.arg("-")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::inherit())
.spawn()
.unwrap();
proc.stdin
.as_mut()
.unwrap()
.write_all(output.as_bytes())
.unwrap();
proc.wait().unwrap();
panic!();
}

0
hblang/src/typechk.rs Normal file
View file

View file

@ -0,0 +1,9 @@
start:
jala r0, main, 0
tx
main:
subi64 r254, r254, 8
ld r31, r254, r0, 8
jala r0, r31, 0

View file

@ -0,0 +1 @@
Eof ""

View file

@ -0,0 +1,10 @@
Ident "main"
Decl ":="
Or "||"
Colon ":"
Ident "void"
LBrace "{"
Return "return"
Semi ";"
RBrace "}"
Eof ""

View file

@ -0,0 +1 @@
Eof ""

View file

@ -0,0 +1,3 @@
main := ||: void {
return;
}