From 1c08148dc91e47cde5d1fa19cc88dc0b7c59389f Mon Sep 17 00:00:00 2001 From: mlokr Date: Thu, 9 May 2024 23:41:59 +0200 Subject: [PATCH] starting from zero again --- hblang/Cargo.toml | 6 + hblang/examples/main_fn.hb | 3 + hblang/hblang-f/Cargo.toml | 11 - hblang/hblang-f/src/codegen.rs | 595 ---------------- hblang/hblang-f/src/lexer.rs | 151 ---- hblang/hblang-f/src/lib.rs | 6 - hblang/hblang-f/src/parser.rs | 566 --------------- hblang/hblang-f/src/typechk.rs | 20 - hblang/src/codegen.rs | 642 +++--------------- hblang/src/lexer.rs | 169 +++++ hblang/src/lib.rs | 20 + hblang/src/parser.rs | 161 +++++ hblang/src/tests.rs | 53 ++ hblang/src/typechk.rs | 0 .../tests/hblang::codegen::tests::example.txt | 9 + hblang/tests/hblang::lexer::tests::empty.txt | 1 + .../tests/hblang::lexer::tests::examples.txt | 10 + .../hblang::lexer::tests::whitespace.txt | 1 + .../tests/hblang::parser::tests::example.txt | 3 + 19 files changed, 515 insertions(+), 1912 deletions(-) create mode 100644 hblang/Cargo.toml create mode 100644 hblang/examples/main_fn.hb delete mode 100644 hblang/hblang-f/Cargo.toml delete mode 100644 hblang/hblang-f/src/codegen.rs delete mode 100644 hblang/hblang-f/src/lexer.rs delete mode 100644 hblang/hblang-f/src/lib.rs delete mode 100644 hblang/hblang-f/src/parser.rs delete mode 100644 hblang/hblang-f/src/typechk.rs create mode 100644 hblang/src/lexer.rs create mode 100644 hblang/src/lib.rs create mode 100644 hblang/src/parser.rs create mode 100644 hblang/src/tests.rs create mode 100644 hblang/src/typechk.rs create mode 100644 hblang/tests/hblang::codegen::tests::example.txt create mode 100644 hblang/tests/hblang::lexer::tests::empty.txt create mode 100644 hblang/tests/hblang::lexer::tests::examples.txt create mode 100644 hblang/tests/hblang::lexer::tests::whitespace.txt create mode 100644 hblang/tests/hblang::parser::tests::example.txt diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml new file mode 100644 index 00000000..4c70389a --- /dev/null +++ b/hblang/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "hblang" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/hblang/examples/main_fn.hb b/hblang/examples/main_fn.hb new file mode 100644 index 00000000..2a60bce3 --- /dev/null +++ b/hblang/examples/main_fn.hb @@ -0,0 +1,3 @@ +main := ||: void { + return; +} diff --git a/hblang/hblang-f/Cargo.toml b/hblang/hblang-f/Cargo.toml deleted file mode 100644 index 0e19b10a..00000000 --- a/hblang/hblang-f/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "hblang" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -hbbytecode = { version = "0.1.0", path = "../hbbytecode" } -logos = "0.13.0" - diff --git a/hblang/hblang-f/src/codegen.rs b/hblang/hblang-f/src/codegen.rs deleted file mode 100644 index fdd8a367..00000000 --- a/hblang/hblang-f/src/codegen.rs +++ /dev/null @@ -1,595 +0,0 @@ -use std::{iter::Cycle, ops::Range, usize}; - -use crate::{ - lexer::{self, Ty}, - parser::{Exp, Function, Item, Literal, Struct, Type}, -}; - -type Reg = u8; -type Offset = i32; -type Pushed = bool; -type SlotIndex = usize; -type Label = usize; -type Data = usize; -type Size = usize; - -//| Register | Description | Saver | -//|:-----------|:--------------------|:-------| -//| r0 | Hard-wired zero | N/A | -//| r1 - r2 | Return values | Caller | -//| r2 - r11 | Function parameters | Caller | -//| r12 - r30 | General purpose | Caller | -//| r31 | Return address | Caller | -//| r32 - r253 | General purpose | Callee | -//| r254 | Stack pointer | Callee | -//| r255 | Thread pointer | N/A | - -struct RegAlloc { - pub regs: Box<[Option; 256]>, - pub used: Box<[bool; 256]>, - pub spill_cycle: Cycle>, -} - -impl RegAlloc { - const STACK_POINTER: Reg = 254; - const ZERO: Reg = 0; - const RETURN_ADDRESS: Reg = 31; - - fn alloc_return(&mut self, slot: usize) -> Option { - self.regs[1..2] - .iter_mut() - .position(|reg| { - if reg.is_none() { - *reg = Some(slot); - true - } else { - false - } - }) - .map(|reg| reg as Reg + 1) - } - - fn alloc_general(&mut self, slot: usize) -> Option { - self.regs[32..254] - .iter_mut() - .zip(&mut self.used[32..254]) - .position(|(reg, used)| { - if reg.is_none() { - *reg = Some(slot); - *used = true; - true - } else { - false - } - }) - .map(|reg| reg as Reg + 32) - } - - fn free(&mut self, reg: Reg) { - assert!(self.regs[reg as usize].take().is_some()); - } - - fn is_used(&self, reg: Reg) -> bool { - self.regs[reg as usize].is_some() - } - - fn spill(&mut self, for_slot: usize) -> (Reg, Option) { - let to_spill = self.spill_cycle.next().unwrap(); - let slot = self.spill_specific(to_spill, for_slot); - (to_spill as Reg + 32, slot) - } - - fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option { - self.regs[reg as usize].replace(for_slot) - } - - fn restore(&mut self, reg: Reg, slot: usize) -> usize { - self.regs[reg as usize].replace(slot).unwrap() - } - - fn alloc_specific(&mut self, reg: u8, to: usize) { - assert!(self.regs[reg as usize].replace(to).is_none()); - } - - fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) { - match reg { - InReg::Single(r) => self.alloc_specific(r, to), - InReg::Pair(r1, r2) => { - self.alloc_specific(r1, to); - self.alloc_specific(r2, to); - } - } - } -} - -pub struct ParamAlloc { - reg_range: Range, - stack: Offset, -} - -impl ParamAlloc { - fn new() -> Self { - Self { - stack: 8, // return adress is in callers stack frame - reg_range: 2..12, - } - } - - fn alloc(&mut self, size: usize) -> SlotValue { - match self.try_alloc_regs(size) { - Some(reg) => reg, - None => { - let stack = self.stack; - self.stack += size as Offset; - SlotValue::Stack(stack) - } - } - } - - fn try_alloc_regs(&mut self, size: usize) -> Option { - let mut needed = size.div_ceil(8); - if needed > 2 { - needed = 1; // passed by ref - } - - if self.reg_range.len() < needed { - return None; - } - - match needed { - 1 => { - let reg = self.reg_range.start; - self.reg_range.start += 1; - Some(SlotValue::Reg(InReg::Single(reg))) - } - 2 => { - let reg = self.reg_range.start; - self.reg_range.start += 2; - Some(SlotValue::Reg(InReg::Pair(reg, reg + 1))) - } - _ => unreachable!(), - } - } -} - -impl Default for RegAlloc { - fn default() -> Self { - Self { - regs: Box::new([None; 256]), - used: Box::new([false; 256]), - spill_cycle: (32..254).cycle(), - } - } -} - -struct Variable { - name: String, - location: usize, -} - -#[derive(Clone, Copy)] -struct SlotId { - // index into slot stack - index: SlotIndex, - // temorary offset carried over when eg. accessing fields - offset: Offset, - // this means we can mutate the value as part of computation - owned: bool, -} - -impl SlotId { - fn base(location: usize) -> Self { - Self { - index: location, - offset: 0, - owned: true, - } - } - - fn borrowed(self) -> Self { - Self { - owned: false, - ..self - } - } -} - -struct Slot { - ty: Type, - value: SlotValue, -} - -#[repr(transparent)] -struct InstBuffer { - buffer: Vec, -} - -impl InstBuffer { - fn new(vec: &mut Vec) -> &mut Self { - unsafe { &mut *(vec as *mut Vec as *mut Self) } - } -} - -impl hbbytecode::Buffer for InstBuffer { - fn reserve(&mut self, bytes: usize) { - self.buffer.reserve(bytes); - } - - unsafe fn write(&mut self, byte: u8) { - self.buffer.push(byte); - } -} - -#[derive(Clone, Copy)] -enum InReg { - Single(Reg), - // if one of the registes is allocated, the other is too, ALWAYS - // with the same slot - Pair(Reg, Reg), -} - -#[derive(Clone, Copy)] -enum Spill { - Reg(InReg), - Stack(Offset), // relative to frame end (rsp if nothing was pushed) -} - -#[derive(Clone, Copy)] -enum SlotValue { - Reg(InReg), - Stack(Offset), // relative to frame start (rbp) - Imm(u64), - Spilled(Spill, SlotIndex), -} - -pub struct Value { - store: ValueStore, - offset: Offset, -} - -#[derive(Clone, Copy)] -enum ValueStore { - Reg(InReg), - Stack(Offset, Pushed), - Imm(u64), -} - -impl From for ValueStore { - fn from(value: SlotValue) -> Self { - match value { - SlotValue::Reg(reg) => ValueStore::Reg(reg), - SlotValue::Stack(offset) => ValueStore::Stack(offset, false), - SlotValue::Imm(imm) => ValueStore::Imm(imm), - SlotValue::Spilled(spill, _) => match spill { - Spill::Reg(reg) => ValueStore::Reg(reg), - Spill::Stack(offset) => ValueStore::Stack(offset, true), - }, - } - } -} - -pub struct LabelReloc { - pub label: Label, - pub offset: usize, -} - -pub struct DataReloc { - pub data: Data, - pub offset: usize, -} - -#[must_use] -pub struct Frame { - pub slot_count: usize, - pub var_count: usize, -} - -enum Instr { - BinOp(lexer::Op, Value, Value), - Move(Size, Value, Value), - Push(Reg), - Jump(Label), - Call(String), - JumpIfZero(Value, Label), -} - -#[derive(Default)] -pub struct Generator<'a> { - ast: &'a [Item], - - func_labels: Vec<(String, Label)>, - - stack_size: Offset, - pushed_size: Offset, - - regs: RegAlloc, - variables: Vec, - slots: Vec, - - labels: Vec>, - label_relocs: Vec, - - data: Vec>, - data_relocs: Vec, - - code_section: Vec, - data_section: Vec, - - instrs: Vec, -} - -impl<'a> Generator<'a> { - fn generate(mut self) -> Vec { - for item in self.ast { - let Item::Function(f) = item else { continue }; - self.generate_function(f); - } - - self.link() - } - - fn generate_function(&mut self, f: &Function) { - let frame = self.push_frame(); - - let mut param_alloc = ParamAlloc::new(); - - for param in f.args.iter() { - let param_size = self.size_of(¶m.ty); - let value = param_alloc.alloc(param_size); - let slot = self.add_slot(param.ty.clone(), value); - if let SlotValue::Reg(reg) = value { - self.regs.alloc_specific_in_reg(reg, slot); - } - self.add_variable(param.name.clone(), slot); - } - - for stmt in f.body.iter() { - assert!(self - .generate_expr(Some(Type::Builtin(Ty::Void)), stmt) - .is_none()); - } - - self.pop_frame(frame); - } - - fn generate_expr(&mut self, expected: Option, expr: &Exp) -> Option { - let value = match expr { - Exp::Literal(lit) => SlotId::base(match lit { - Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)), - Literal::Bool(b) => { - self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64)) - } - }), - Exp::Variable(ident) => { - SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed() - } - Exp::Call { name, args } => self.generate_call(expected.clone(), name, args), - Exp::Ctor { name, fields } => todo!(), - Exp::Index { base, index } => todo!(), - Exp::Field { base, field } => todo!(), - Exp::Unary { op, exp } => todo!(), - Exp::Binary { op, left, right } => todo!(), - Exp::If { cond, then, else_ } => todo!(), - Exp::Let { name, ty, value } => todo!(), - Exp::For { - init, - cond, - step, - block, - } => todo!(), - Exp::Block(_) => todo!(), - Exp::Return(_) => todo!(), - Exp::Break => todo!(), - Exp::Continue => todo!(), - }; - - if let Some(expected) = expected { - let actual = self.slots[value.index].ty.clone(); - assert_eq!(expected, actual); - } - - Some(value) - } - - fn generate_call(&mut self, expected: Option, name: &str, args: &[Exp]) -> SlotId { - let frame = self.push_frame(); - let func = self.lookup_function(name); - - let mut arg_alloc = ParamAlloc::new(); - for (arg, param) in args.iter().zip(&func.args) { - let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap(); - let arg_size = self.size_of(¶m.ty); - let param_slot = arg_alloc.alloc(arg_size); - self.set_temporarly(arg_slot, param_slot); - } - - self.instrs.push(Instr::Call(name.to_owned())); - - todo!() - } - - fn set_temporarly(&mut self, from: SlotId, to: SlotValue) { - let to = self.make_mutable(to, from.index); - let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to); - self.emit_move(from, SlotId::base(to_slot)); - } - - fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue { - match target { - SlotValue::Reg(in_reg) => { - self.regs.alloc_specific_in_reg(in_reg, by); - target - } - SlotValue::Spilled(Spill::Reg(in_reg), slot) => { - let new_val = SlotValue::Spilled( - match in_reg { - InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)), - InReg::Pair(r1, r2) => { - self.emmit_push(r2); - Spill::Stack(self.emmit_push(r1)) - } - }, - slot, - ); - let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val); - SlotValue::Spilled(Spill::Reg(in_reg), new_slot) - } - _ => unreachable!(), - } - } - - fn emmit_push(&mut self, reg: Reg) -> Offset { - self.pushed_size += 8; - self.instrs.push(Instr::Push(reg)); - self.pushed_size - } - - fn emit_move(&mut self, from: SlotId, to: SlotId) { - let size = self.size_of(&self.slots[from.index].ty); - let other_size = self.size_of(&self.slots[to.index].ty); - assert_eq!(size, other_size); - - self.instrs.push(Instr::Move( - size, - self.slot_to_value(from), - self.slot_to_value(to), - )); - } - - fn slot_to_value(&self, slot: SlotId) -> Value { - let slot_val = &self.slots[slot.index]; - Value { - store: slot_val.value.into(), - offset: slot.offset, - } - } - - fn size_of(&self, ty: &Type) -> Size { - match ty { - Type::Builtin(ty) => match ty { - Ty::U8 | Ty::I8 | Ty::Bool => 1, - Ty::U16 | Ty::I16 => 2, - Ty::U32 | Ty::I32 => 4, - Ty::U64 | Ty::I64 => 8, - Ty::Void => 0, - }, - Type::Struct(name) => self - .lookup_struct(name) - .fields - .iter() - .map(|field| self.size_of(&field.ty)) - .sum(), - Type::Pinter(_) => 8, - } - } -} - -impl<'a> Generator<'a> { - fn add_variable(&mut self, name: String, location: usize) { - self.variables.push(Variable { name, location }); - } - - fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize { - let slot = self.slots.len(); - self.slots.push(Slot { ty, value }); - slot - } - - fn link(mut self) -> Vec { - for reloc in self.label_relocs { - let label = self.labels[reloc.label].unwrap(); - let offset = reloc.offset; - let target = label - offset; - let target_bytes = u64::to_le_bytes(target as u64); - self.code_section[offset..offset + 8].copy_from_slice(&target_bytes); - } - - for reloc in self.data_relocs { - let data = self.data[reloc.data].unwrap(); - let offset = reloc.offset; - let target = data; - let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64); - self.data_section[offset..offset + 8].copy_from_slice(&target_bytes); - } - - self.code_section.extend_from_slice(&self.data_section); - self.code_section - } - - fn lookup_func_label(&mut self, name: &str) -> Label { - if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) { - return label.1; - } - - panic!("Function not found: {}", name); - } - - fn declare_label(&mut self) -> Label { - self.labels.push(None); - self.labels.len() - 1 - } - - fn define_label(&mut self, label: Label) { - self.labels[label] = Some(self.code_section.len()); - } - - fn declare_data(&mut self) -> Data { - self.data.push(None); - self.data.len() - 1 - } - - fn define_data(&mut self, data: Data, bytes: &[u8]) { - self.data[data] = Some(self.data.len()); - self.data_section.extend_from_slice(bytes); - } - - fn lookup_struct(&self, name: &str) -> &Struct { - self.lookup_item(name) - .map(|item| match item { - Item::Struct(s) => s, - _ => panic!("Not a struct: {}", name), - }) - .expect("Struct not found") - } - - fn lookup_function(&self, name: &str) -> &'a Function { - self.lookup_item(name) - .map(|item| match item { - Item::Function(f) => f, - _ => panic!("Not a function: {}", name), - }) - .expect("Function not found") - } - - fn lookup_item(&self, name: &str) -> Option<&'a Item> { - self.ast.iter().find(|item| match item { - Item::Import(_) => false, - Item::Struct(s) => s.name == name, - Item::Function(f) => f.name == name, - }) - } - - fn lookup_variable(&self, name: &str) -> Option<&Variable> { - self.variables.iter().find(|variable| variable.name == name) - } - - fn push_frame(&mut self) -> Frame { - Frame { - slot_count: self.slots.len(), - var_count: self.variables.len(), - } - } - - fn pop_frame(&mut self, frame: Frame) { - self.slots.truncate(frame.slot_count); - self.variables.truncate(frame.var_count); - } -} - -pub fn generate(ast: &[Item]) -> Vec { - Generator { - ast, - ..Default::default() - } - .generate() -} diff --git a/hblang/hblang-f/src/lexer.rs b/hblang/hblang-f/src/lexer.rs deleted file mode 100644 index 11ee2669..00000000 --- a/hblang/hblang-f/src/lexer.rs +++ /dev/null @@ -1,151 +0,0 @@ -use logos::Logos; - -macro_rules! gen_token { - ($name:ident { - keywords: { - $($keyword:ident = $lit:literal,)* - }, - operators: $op_name:ident { - $($prec:literal: {$( - $op:ident = $op_lit:literal, - )*},)* - }, - types: $ty_type:ident { - $($ty:ident = $ty_lit:literal,)* - }, - regexes: { - $($regex:ident = $regex_lit:literal,)* - }, - }) => { - #[derive(Debug, Clone, PartialEq, Eq, Copy, Logos)] - #[logos(skip "[ \t\n]+")] - pub enum $name { - $(#[token($lit)] $keyword,)* - $($(#[token($op_lit, |_| $op_name::$op)])*)* - Op($op_name), - $(#[token($ty_lit, |_| $ty_type::$ty)])* - Ty($ty_type), - $(#[regex($regex_lit)] $regex,)* - } - - #[derive(Debug, Clone, PartialEq, Eq, Copy)] - pub enum $op_name { - $($($op,)*)* - } - - #[derive(Debug, Clone, PartialEq, Eq, Copy)] - pub enum $ty_type { - $($ty,)* - } - - impl $op_name { - pub fn prec(&self) -> u8 { - match self { - $($($op_name::$op => $prec,)*)* - } - } - } - }; -} - -gen_token! { - TokenKind { - keywords: { - Use = "use", - Fn = "fn", - Let = "let", - If = "if", - Else = "else", - For = "for", - Return = "return", - Break = "break", - Continue = "continue", - Struct = "struct", - - True = "true", - False = "false", - - LBrace = "{", - RBrace = "}", - LParen = "(", - RParen = ")", - LBracket = "[", - RBracket = "]", - - Colon = ":", - Semicolon = ";", - Comma = ",", - Dot = ".", - }, - operators: Op { - 14: { - Assign = "=", - AddAssign = "+=", - SubAssign = "-=", - MulAssign = "*=", - DivAssign = "/=", - ModAssign = "%=", - AndAssign = "&=", - OrAssign = "|=", - XorAssign = "^=", - ShlAssign = "<<=", - ShrAssign = ">>=", - }, - 12: { - Or = "||", - }, - 11: { - And = "&&", - }, - 10: { - Bor = "|", - }, - 9: { - Xor = "^", - }, - 8: { - Band = "&", - }, - 7: { - Eq = "==", - Neq = "!=", - }, - 6: { - Lt = "<", - Gt = ">", - Le = "<=", - Ge = ">=", - }, - 5: { - Shl = "<<", - Shr = ">>", - }, - 4: { - Add = "+", - Sub = "-", - }, - 3: { - Mul = "*", - Div = "/", - Mod = "%", - }, - }, - types: Ty { - U8 = "u8", - U16 = "u16", - U32 = "u32", - U64 = "u64", - I8 = "i8", - I16 = "i16", - I32 = "i32", - I64 = "i64", - Bool = "bool", - Void = "void", - }, - regexes: { - Ident = "[a-zA-Z_][a-zA-Z0-9_]*", - String = r#""([^"\\]|\\.)*""#, - Number = "[0-9]+", - }, - } -} diff --git a/hblang/hblang-f/src/lib.rs b/hblang/hblang-f/src/lib.rs deleted file mode 100644 index 04776791..00000000 --- a/hblang/hblang-f/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -#![allow(dead_code)] - -mod codegen; -mod lexer; -mod parser; -mod typechk; diff --git a/hblang/hblang-f/src/parser.rs b/hblang/hblang-f/src/parser.rs deleted file mode 100644 index e3ed3982..00000000 --- a/hblang/hblang-f/src/parser.rs +++ /dev/null @@ -1,566 +0,0 @@ -use {core::panic, std::iter}; - -use std::array; - -use logos::{Lexer, Logos}; - -use crate::lexer::{Op, TokenKind, Ty}; - -#[derive(Clone, Debug)] -pub enum Item { - Import(String), - Struct(Struct), - Function(Function), -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Type { - Builtin(Ty), - Struct(String), - Pinter(Box), -} - -#[derive(Clone, Debug)] -pub struct Struct { - pub name: String, - pub fields: Vec, -} - -#[derive(Clone, Debug)] -pub struct Field { - pub name: String, - pub ty: Type, -} - -#[derive(Clone, Debug)] -pub struct Function { - pub name: String, - pub args: Vec, - pub ret: Type, - pub body: Vec, -} - -#[derive(Clone, Debug)] -pub struct Arg { - pub name: String, - pub ty: Type, -} - -#[derive(Clone, Debug)] -pub struct CtorField { - pub name: String, - pub value: Exp, -} - -#[derive(Clone, Debug)] -pub enum Exp { - Literal(Literal), - Variable(String), - Call { - name: String, - args: Vec, - }, - Ctor { - name: Option>, - fields: Vec, - }, - Index { - base: Box, - index: Box, - }, - Field { - base: Box, - field: String, - }, - Unary { - op: Op, - exp: Box, - }, - Binary { - op: Op, - left: Box, - right: Box, - }, - If { - cond: Box, - then: Box, - else_: Option>, - }, - Let { - name: String, - ty: Option, - value: Box, - }, - For { - init: Option>, - cond: Option>, - step: Option>, - block: Box, - }, - Block(Vec), - Return(Option>), - Break, - Continue, -} - -#[derive(Clone, Debug)] -pub enum Literal { - Int(u64), - Bool(bool), -} - -#[derive(Debug, PartialEq, Clone)] -pub struct Token { - pub kind: TokenKind, - pub span: std::ops::Range, - pub value: String, -} - -struct Parser<'a> { - next_token: Option, - lexer: logos::Lexer<'a, TokenKind>, -} - -impl<'a> Parser<'a> { - pub fn new(input: &'a str) -> Self { - let mut lexer = TokenKind::lexer(input); - let next_token = Self::next_token(&mut lexer); - Self { next_token, lexer } - } - - pub fn next(&mut self) -> Option { - let token = self.next_token.clone(); - self.next_token = Self::next_token(&mut self.lexer); - token - } - - pub fn next_token(lexer: &mut Lexer) -> Option { - lexer.next().map(|r| { - r.map(|e| Token { - kind: e, - span: lexer.span(), - value: lexer.slice().to_owned(), - }) - .unwrap_or_else(|e| { - let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start); - println!("Lexer error: {}:{}: {:?}", line, col, e); - std::process::exit(1); - }) - }) - } - - pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) { - Self::pos_to_line_col_low(self.lexer.source(), pos) - } - - pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) { - let line = source[..pos].lines().count(); - let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0); - (line, col) - } - - pub fn expect(&mut self, kind: TokenKind) -> Token { - let token = self.expect_any(); - if token.kind == kind { - token - } else { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!( - "Expected {:?} at {}:{}, found {:?}", - kind, line, col, token.kind - ) - } - } - - pub fn expect_any(&mut self) -> Token { - self.next().unwrap_or_else(|| panic!("Unexpected EOF")) - } - - pub fn peek(&self) -> Option<&Token> { - self.next_token.as_ref() - } - - pub fn try_advance(&mut self, kind: TokenKind) -> bool { - if self.peek().is_some_and(|t| t.kind == kind) { - self.next(); - true - } else { - false - } - } - - pub fn parse(&mut self) -> Vec { - iter::from_fn(|| self.parse_item()).collect() - } - - fn parse_item(&mut self) -> Option { - let token = self.next()?; - match token.kind { - TokenKind::Struct => Some(self.parse_struct()), - TokenKind::Fn => Some(self.parse_function()), - TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)), - tkn => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Unexpected {:?} at {}:{}", tkn, line, col) - } - } - } - - fn parse_struct(&mut self) -> Item { - let name = self.expect(TokenKind::Ident).value; - self.expect(TokenKind::LBrace); - let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field); - Item::Struct(Struct { name, fields }) - } - - fn parse_field(&mut self) -> Field { - let name = self.expect(TokenKind::Ident).value; - self.expect(TokenKind::Colon); - let ty = self.type_(); - - Field { name, ty } - } - - fn type_(&mut self) -> Type { - let token = self.next().unwrap(); - match token.kind { - TokenKind::Ty(ty) => Type::Builtin(ty), - TokenKind::Ident => Type::Struct(token.value), - TokenKind::Op(Op::Band) => { - let ty = self.type_(); - Type::Pinter(Box::new(ty)) - } - tkn => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Unexpected {:?} at {}:{}", tkn, line, col) - } - } - } - - fn parse_function(&mut self) -> Item { - let name = self.expect(TokenKind::Ident).value; - self.expect(TokenKind::LParen); - let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg); - self.expect(TokenKind::Colon); - let ret = self.type_(); - Item::Function(Function { - name, - args, - ret, - body: self.parse_block(), - }) - } - - fn parse_arg(&mut self) -> Arg { - let name = self.expect(TokenKind::Ident).value; - self.expect(TokenKind::Colon); - let ty = self.type_(); - self.try_advance(TokenKind::Comma); - Arg { name, ty } - } - - fn parse_expr(&mut self) -> Exp { - self.parse_binary_expr(255) - } - - fn parse_binary_expr(&mut self, min_prec: u8) -> Exp { - let mut lhs = self.parse_unit_expr(); - - while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) { - let prec = op.prec(); - if prec > min_prec { - break; - } - - self.next(); - let rhs = self.parse_binary_expr(prec); - - lhs = Exp::Binary { - op, - left: Box::new(lhs), - right: Box::new(rhs), - }; - } - - lhs - } - - fn parse_unit_expr(&mut self) -> Exp { - let token = self.next().unwrap(); - let mut expr = match token.kind { - TokenKind::True => Exp::Literal(Literal::Bool(true)), - TokenKind::False => Exp::Literal(Literal::Bool(false)), - TokenKind::Ident => Exp::Variable(token.value), - TokenKind::LBrace => { - Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr)) - } - TokenKind::LParen => { - let expr = self.parse_expr(); - self.expect(TokenKind::RParen); - expr - } - TokenKind::Number => { - let value = token.value.parse().unwrap(); - Exp::Literal(Literal::Int(value)) - } - TokenKind::Let => { - let name = self.expect(TokenKind::Ident).value; - let ty = self.try_advance(TokenKind::Colon).then(|| self.type_()); - self.expect(TokenKind::Op(Op::Assign)); - let value = self.parse_expr(); - Exp::Let { - name, - ty, - value: Box::new(value), - } - } - TokenKind::If => { - let cond = self.parse_expr(); - let then = Exp::Block(self.parse_block()); - let else_ = self - .try_advance(TokenKind::Else) - .then(|| { - if self.peek().is_some_and(|t| t.kind == TokenKind::If) { - self.parse_expr() - } else { - Exp::Block(self.parse_block()) - } - }) - .map(Box::new); - Exp::If { - cond: Box::new(cond), - then: Box::new(then), - else_, - } - } - TokenKind::For => { - let params = - self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr); - let mut exprs = Vec::new(); - while !self.try_advance(TokenKind::RBrace) { - exprs.push(self.parse_expr()); - self.try_advance(TokenKind::Semicolon); - } - let block = Exp::Block(exprs); - let len = params.len(); - let mut exprs = params.into_iter(); - let [init, consd, step] = array::from_fn(|_| exprs.next()); - match len { - 0 => Exp::For { - init: None, - cond: None, - step: None, - block: Box::new(block), - }, - 1 => Exp::For { - init: None, - cond: init.map(Box::new), - step: None, - block: Box::new(block), - }, - 3 => Exp::For { - init: init.map(Box::new), - cond: consd.map(Box::new), - step: step.map(Box::new), - block: Box::new(block), - }, - _ => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col) - } - } - } - TokenKind::Return => { - let value = self - .peek() - .is_some_and(|t| { - !matches!( - t.kind, - TokenKind::Semicolon - | TokenKind::RBrace - | TokenKind::RParen - | TokenKind::Comma - ) - }) - .then(|| Box::new(self.parse_expr())); - Exp::Return(value) - } - TokenKind::Op(op) => Exp::Unary { - op, - exp: Box::new(self.parse_expr()), - }, - TokenKind::Dot => { - let token = self.expect_any(); - match token.kind { - TokenKind::LBrace => { - let fields = self.sequence( - TokenKind::Comma, - TokenKind::RBrace, - Self::parse_ctor_field, - ); - Exp::Ctor { name: None, fields } - } - tkn => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Unexpected {:?} at {}:{}", tkn, line, col) - } - } - } - - TokenKind::Ty(_) - | TokenKind::String - | TokenKind::Use - | TokenKind::Break - | TokenKind::Continue - | TokenKind::Struct - | TokenKind::RBrace - | TokenKind::RParen - | TokenKind::LBracket - | TokenKind::RBracket - | TokenKind::Colon - | TokenKind::Semicolon - | TokenKind::Comma - | TokenKind::Fn - | TokenKind::Else => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Unexpected {:?} at {}:{}", token.kind, line, col) - } - }; - - loop { - match self.peek().map(|t| t.kind) { - Some(TokenKind::LParen) => { - self.next(); - expr = Exp::Call { - name: match expr { - Exp::Variable(name) => name, - _ => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Expected function name at {}:{}", line, col) - } - }, - args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr), - }; - } - Some(TokenKind::LBracket) => { - self.next(); - let index = self.parse_expr(); - self.expect(TokenKind::RBracket); - expr = Exp::Index { - base: Box::new(expr), - index: Box::new(index), - }; - } - Some(TokenKind::Dot) => { - self.next(); - - let token = self.expect_any(); - match token.kind { - TokenKind::Ident => { - expr = Exp::Field { - base: Box::new(expr), - field: token.value, - }; - } - TokenKind::LBrace => { - let fields = self.sequence( - TokenKind::Comma, - TokenKind::RBrace, - Self::parse_ctor_field, - ); - expr = Exp::Ctor { - name: Some(Box::new(expr)), - fields, - }; - } - tkn => { - let (line, col) = self.pos_to_line_col(token.span.start); - panic!("Unexpected {:?} at {}:{}", tkn, line, col) - } - } - } - _ => break expr, - } - } - } - - pub fn parse_ctor_field(&mut self) -> CtorField { - let name = self.expect(TokenKind::Ident).value; - self.expect(TokenKind::Colon); - let value = self.parse_expr(); - CtorField { name, value } - } - - pub fn parse_block(&mut self) -> Vec { - self.expect(TokenKind::LBrace); - let mut exprs = Vec::new(); - while !self.try_advance(TokenKind::RBrace) { - exprs.push(self.parse_expr()); - self.try_advance(TokenKind::Semicolon); - } - exprs - } - - pub fn sequence( - &mut self, - sep: TokenKind, - term: TokenKind, - mut parser: impl FnMut(&mut Self) -> T, - ) -> Vec { - let mut items = Vec::new(); - while !self.try_advance(term) { - items.push(parser(self)); - if self.try_advance(term) { - break; - } - self.expect(sep); - } - items - } -} - -pub fn parse(input: &str) -> Vec { - Parser::new(input).parse() -} - -#[cfg(test)] -mod test { - #[test] - fn sanity() { - let input = r#" - struct Foo { - x: i32, - y: i32, - } - - fn main(): void { - let foo = Foo.{ x: 1, y: 2 }; - if foo.x > 0 { - return foo.x; - } else { - return foo.y; - } - for i < 10 { - i = i + 1; - } - for let i = 0; i < 10; i = i + 1 { - i = i + 1; - } - i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7; - fomething(); - pahum(&foo); - lupa(*soo); - return foo.x + foo.y; - } - - fn lupa(x: i32): i32 { - return x; - } - - fn pahum(x: &Foo): void { - return; - } - "#; - let _ = super::parse(input); - } -} diff --git a/hblang/hblang-f/src/typechk.rs b/hblang/hblang-f/src/typechk.rs deleted file mode 100644 index d2f95bc8..00000000 --- a/hblang/hblang-f/src/typechk.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::lexer::Ty; - -#[derive(Clone, Debug)] -pub enum Type { - Builtin(Ty), - Struct(StructType), - Pointer(Box), -} - -#[derive(Clone, Debug)] -pub struct StructType { - pub name: String, - pub fields: Vec, -} - -#[derive(Clone, Debug)] -pub struct Field { - pub name: String, - pub ty: Type, -} diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index e00b2525..d5f41f14 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -1,592 +1,108 @@ -use std::{iter::Cycle, ops::Range, usize}; +use {crate::parser, std::fmt::Write}; -use crate::{ - lexer::{self, Ty}, - parser::{Exp, Function, Item, Literal, Struct, Type}, -}; +const STACK_PTR: &str = "r254"; +const ZERO: &str = "r0"; +const RET_ADDR: &str = "r31"; -type Reg = u8; -type Offset = i32; -type Pushed = bool; -type SlotIndex = usize; -type Label = usize; -type Data = usize; -type Size = usize; - -//| Register | Description | Saver | -//|:-----------|:--------------------|:-------| -//| r0 | Hard-wired zero | N/A | -//| r1 - r2 | Return values | Caller | -//| r2 - r11 | Function parameters | Caller | -//| r12 - r30 | General purpose | Caller | -//| r31 | Return address | Caller | -//| r32 - r253 | General purpose | Callee | -//| r254 | Stack pointer | Callee | -//| r255 | Thread pointer | N/A | - -struct RegAlloc { - pub regs: Box<[Option; 256]>, - pub used: Box<[bool; 256]>, - pub spill_cycle: Cycle>, +pub struct Codegen<'a> { + path: &'a std::path::Path, + code: String, + data: String, } -impl RegAlloc { - const STACK_POINTER: Reg = 254; - const ZERO: Reg = 0; - const RETURN_ADDRESS: Reg = 31; - - fn alloc_general(&mut self, slot: usize) -> Option { - self.regs[32..254] - .iter_mut() - .zip(&mut self.used[32..254]) - .position(|(reg, used)| { - if reg.is_none() { - *reg = Some(slot); - *used = true; - true - } else { - false - } - }) - .map(|reg| reg as Reg + 32) - } - - fn free(&mut self, reg: Reg) { - assert!(self.regs[reg as usize].take().is_some()); - } - - fn is_used(&self, reg: Reg) -> bool { - self.regs[reg as usize].is_some() - } - - fn spill(&mut self, for_slot: usize) -> (Reg, Option) { - let to_spill = self.spill_cycle.next().unwrap(); - let slot = self.spill_specific(to_spill, for_slot); - (to_spill as Reg + 32, slot) - } - - fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option { - self.regs[reg as usize].replace(for_slot) - } - - fn restore(&mut self, reg: Reg, slot: usize) -> usize { - self.regs[reg as usize].replace(slot).unwrap() - } - - fn alloc_specific(&mut self, reg: u8, to: usize) { - assert!(self.regs[reg as usize].replace(to).is_none()); - } - - fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) { - match reg { - InReg::Single(r) => self.alloc_specific(r, to), - InReg::Pair(r1, r2) => { - self.alloc_specific(r1, to); - self.alloc_specific(r2, to); - } - } - } -} - -pub struct ParamAlloc { - reg_range: Range, - stack: Offset, -} - -impl ParamAlloc { - fn for_params() -> Self { +impl<'a> Codegen<'a> { + pub fn new(path: &'a std::path::Path) -> Self { Self { - stack: 8, // return adress is in callers stack frame - reg_range: 2..12, + path, + code: String::new(), + data: String::new(), } } - fn for_returns() -> Self { - Self { - stack: 0, - reg_range: 0..2, + pub fn file(&mut self, exprs: &[parser::Expr]) -> std::fmt::Result { + for expr in exprs { + self.expr(expr)?; } + Ok(()) } - fn alloc(&mut self, size: usize) -> SlotValue { - match self.try_alloc_regs(size) { - Some(reg) => reg, - None => { - let stack = self.stack; - self.stack += size as Offset; - SlotValue::Stack(stack) - } - } - } - - fn try_alloc_regs(&mut self, size: usize) -> Option { - let mut needed = size.div_ceil(8); - if needed > 2 { - needed = 1; // passed by ref - } - - if self.reg_range.len() < needed { - return None; - } - - match needed { - 1 => { - let reg = self.reg_range.start; - self.reg_range.start += 1; - Some(SlotValue::Reg(InReg::Single(reg))) - } - 2 => { - let reg = self.reg_range.start; - self.reg_range.start += 2; - Some(SlotValue::Reg(InReg::Pair(reg, reg + 1))) - } - _ => unreachable!(), - } - } -} - -impl Default for RegAlloc { - fn default() -> Self { - Self { - regs: Box::new([None; 256]), - used: Box::new([false; 256]), - spill_cycle: (32..254).cycle(), - } - } -} - -struct Variable { - name: String, - location: usize, -} - -#[derive(Clone, Copy)] -struct SlotId { - // index into slot stack - index: SlotIndex, - // temorary offset carried over when eg. accessing fields - offset: Offset, - // this means we can mutate the value as part of computation - owned: bool, -} - -impl SlotId { - fn base(location: usize) -> Self { - Self { - index: location, - offset: 0, - owned: true, - } - } - - fn borrowed(self) -> Self { - Self { - owned: false, - ..self - } - } -} - -struct Slot { - ty: Type, - value: SlotValue, -} - -#[repr(transparent)] -struct InstBuffer { - buffer: Vec, -} - -impl InstBuffer { - fn new(vec: &mut Vec) -> &mut Self { - unsafe { &mut *(vec as *mut Vec as *mut Self) } - } -} - -impl hbbytecode::Buffer for InstBuffer { - fn reserve(&mut self, bytes: usize) { - self.buffer.reserve(bytes); - } - - unsafe fn write(&mut self, byte: u8) { - self.buffer.push(byte); - } -} - -#[derive(Clone, Copy)] -enum InReg { - Single(Reg), - // if one of the registes is allocated, the other is too, ALWAYS - // with the same slot - Pair(Reg, Reg), -} - -#[derive(Clone, Copy)] -enum Spill { - Reg(InReg), - Stack(Offset), // relative to frame end (rsp if nothing was pushed) -} - -#[derive(Clone, Copy)] -enum SlotValue { - Reg(InReg), - Stack(Offset), // relative to frame start (rbp) - Imm(u64), - Spilled(Spill, SlotIndex), -} - -pub struct Value { - store: ValueStore, - offset: Offset, -} - -#[derive(Clone, Copy)] -enum ValueStore { - Reg(InReg), - Stack(Offset, Pushed), - Imm(u64), -} - -impl From for ValueStore { - fn from(value: SlotValue) -> Self { - match value { - SlotValue::Reg(reg) => ValueStore::Reg(reg), - SlotValue::Stack(offset) => ValueStore::Stack(offset, false), - SlotValue::Imm(imm) => ValueStore::Imm(imm), - SlotValue::Spilled(spill, _) => match spill { - Spill::Reg(reg) => ValueStore::Reg(reg), - Spill::Stack(offset) => ValueStore::Stack(offset, true), - }, - } - } -} - -pub struct LabelReloc { - pub label: Label, - pub offset: usize, -} - -pub struct DataReloc { - pub data: Data, - pub offset: usize, -} - -#[must_use] -pub struct Frame { - pub slot_count: usize, - pub var_count: usize, -} - -enum Instr { - BinOp(lexer::Op, Value, Value), - Move(Size, Value, Value), - Push(Reg), - Jump(Label), - Call(String), - JumpIfZero(Value, Label), -} - -#[derive(Default)] -pub struct Generator<'a> { - ast: &'a [Item], - - func_labels: Vec<(String, Label)>, - - stack_size: Offset, - pushed_size: Offset, - - regs: RegAlloc, - variables: Vec, - slots: Vec, - - labels: Vec>, - label_relocs: Vec, - - data: Vec>, - data_relocs: Vec, - - code_section: Vec, - data_section: Vec, - - instrs: Vec, -} - -impl<'a> Generator<'a> { - fn generate(mut self) -> Vec { - for item in self.ast { - let Item::Function(f) = item else { continue }; - self.generate_function(f); - } - - self.link() - } - - fn generate_function(&mut self, f: &Function) { - let frame = self.push_frame(); - - let mut param_alloc = ParamAlloc::for_params(); - - for param in f.args.iter() { - let param_size = self.size_of(¶m.ty); - let value = param_alloc.alloc(param_size); - let slot = self.add_slot(param.ty.clone(), value); - if let SlotValue::Reg(reg) = value { - self.regs.alloc_specific_in_reg(reg, slot); - } - self.add_variable(param.name.clone(), slot); - } - - for stmt in f.body.iter() { - assert!(self - .generate_expr(Some(Type::Builtin(Ty::Void)), stmt) - .is_none()); - } - - self.pop_frame(frame); - } - - fn generate_expr(&mut self, expected: Option, expr: &Exp) -> Option { - let value = match expr { - Exp::Literal(lit) => SlotId::base(match lit { - Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)), - Literal::Bool(b) => { - self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64)) - } - }), - Exp::Variable(ident) => { - SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed() - } - Exp::Call { name, args } => self.generate_call(expected.clone(), name, args), - Exp::Ctor { name, fields } => todo!(), - Exp::Index { base, index } => todo!(), - Exp::Field { base, field } => todo!(), - Exp::Unary { op, exp } => todo!(), - Exp::Binary { op, left, right } => todo!(), - Exp::If { cond, then, else_ } => todo!(), - Exp::Let { name, ty, value } => todo!(), - Exp::For { - init, - cond, - step, - block, - } => todo!(), - Exp::Block(_) => todo!(), - Exp::Return(_) => todo!(), - Exp::Break => todo!(), - Exp::Continue => todo!(), - }; - - if let Some(expected) = expected { - let actual = self.slots[value.index].ty.clone(); - assert_eq!(expected, actual); - } - - Some(value) - } - - fn generate_call(&mut self, expected: Option, name: &str, args: &[Exp]) -> SlotId { - let frame = self.push_frame(); - let func = self.lookup_function(name); - - let mut ret_alloc = ParamAlloc::for_returns(); - let ret_size = self.size_of(&func.ret); - let ret_slot = ret_alloc.alloc(ret_size); - - let mut arg_alloc = ParamAlloc::for_params(); - for (arg, param) in args.iter().zip(&func.args) { - let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap(); - let arg_size = self.size_of(¶m.ty); - let param_slot = arg_alloc.alloc(arg_size); - self.set_temporarly(arg_slot, param_slot); - } - - self.instrs.push(Instr::Call(name.to_owned())); - - todo!() - } - - fn set_temporarly(&mut self, from: SlotId, to: SlotValue) { - let to = self.make_mutable(to, from.index); - let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to); - self.emit_move(from, SlotId::base(to_slot)); - } - - fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue { - match target { - SlotValue::Reg(in_reg) => { - self.regs.alloc_specific_in_reg(in_reg, by); - target - } - SlotValue::Spilled(Spill::Reg(in_reg), slot) => { - let new_val = SlotValue::Spilled( - match in_reg { - InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)), - InReg::Pair(r1, r2) => { - self.emmit_push(r2); - Spill::Stack(self.emmit_push(r1)) - } + fn expr(&mut self, expr: &parser::Expr) -> std::fmt::Result { + use parser::Expr as E; + match expr { + E::Decl { + name, + val: + E::Closure { + ret: E::Ident { name: "void" }, + body, }, - slot, - ); - let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val); - SlotValue::Spilled(Spill::Reg(in_reg), new_slot) + } => { + writeln!(self.code, "{name}:")?; + self.expr(body) } - _ => unreachable!(), + E::Return { val: None } => self.ret(), + E::Block { stmts } => { + for stmt in stmts { + self.expr(stmt)?; + } + Ok(()) + } + ast => unimplemented!("{:?}", ast), } } - fn emmit_push(&mut self, reg: Reg) -> Offset { - self.pushed_size += 8; - self.instrs.push(Instr::Push(reg)); - self.pushed_size + fn stack_push(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result { + writeln!(self.code, " st {value}, {STACK_PTR}, {ZERO}, {size}")?; + writeln!( + self.code, + " addi{} {STACK_PTR}, {STACK_PTR}, {size}", + size * 8 + ) } - fn emit_move(&mut self, from: SlotId, to: SlotId) { - let size = self.size_of(&self.slots[from.index].ty); - let other_size = self.size_of(&self.slots[to.index].ty); - assert_eq!(size, other_size); - - self.instrs.push(Instr::Move( - size, - self.slot_to_value(from), - self.slot_to_value(to), - )); + fn stack_pop(&mut self, value: impl std::fmt::Display, size: usize) -> std::fmt::Result { + writeln!( + self.code, + " subi{} {STACK_PTR}, {STACK_PTR}, {size}", + size * 8 + )?; + writeln!(self.code, " ld {value}, {STACK_PTR}, {ZERO}, {size}") } - fn slot_to_value(&self, slot: SlotId) -> Value { - let slot_val = &self.slots[slot.index]; - Value { - store: slot_val.value.into(), - offset: slot.offset, - } + fn call(&mut self, func: impl std::fmt::Display) -> std::fmt::Result { + self.stack_push(&func, 8)?; + self.global_jump(func) } - fn size_of(&self, ty: &Type) -> Size { - match ty { - Type::Builtin(ty) => match ty { - Ty::U8 | Ty::I8 | Ty::Bool => 1, - Ty::U16 | Ty::I16 => 2, - Ty::U32 | Ty::I32 => 4, - Ty::U64 | Ty::I64 => 8, - Ty::Void => 0, - }, - Type::Struct(name) => self - .lookup_struct(name) - .fields - .iter() - .map(|field| self.size_of(&field.ty)) - .sum(), - Type::Pinter(_) => 8, - } + fn ret(&mut self) -> std::fmt::Result { + self.stack_pop(RET_ADDR, 8)?; + self.global_jump(RET_ADDR) + } + + fn global_jump(&mut self, label: impl std::fmt::Display) -> std::fmt::Result { + writeln!(self.code, " jala {ZERO}, {label}, 0") + } + + pub fn dump(&mut self, mut out: impl std::fmt::Write) -> std::fmt::Result { + writeln!(out, "start:")?; + writeln!(out, " jala {ZERO}, main, 0")?; + writeln!(out, " tx")?; + writeln!(out, "{}", self.code)?; + writeln!(out, "{}", self.data) } } -impl<'a> Generator<'a> { - fn add_variable(&mut self, name: String, location: usize) { - self.variables.push(Variable { name, location }); +#[cfg(test)] +mod tests { + fn generate(input: &'static str, output: &mut String) { + let mut parser = super::parser::Parser::new(input, std::path::Path::new("test")); + let exprs = parser.file(); + let mut codegen = super::Codegen::new(std::path::Path::new("test")); + codegen.file(&exprs).unwrap(); + codegen.dump(output).unwrap(); } - fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize { - let slot = self.slots.len(); - self.slots.push(Slot { ty, value }); - slot - } - - fn link(mut self) -> Vec { - for reloc in self.label_relocs { - let label = self.labels[reloc.label].unwrap(); - let offset = reloc.offset; - let target = label - offset; - let target_bytes = u64::to_le_bytes(target as u64); - self.code_section[offset..offset + 8].copy_from_slice(&target_bytes); - } - - for reloc in self.data_relocs { - let data = self.data[reloc.data].unwrap(); - let offset = reloc.offset; - let target = data; - let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64); - self.data_section[offset..offset + 8].copy_from_slice(&target_bytes); - } - - self.code_section.extend_from_slice(&self.data_section); - self.code_section - } - - fn lookup_func_label(&mut self, name: &str) -> Label { - if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) { - return label.1; - } - - panic!("Function not found: {}", name); - } - - fn declare_label(&mut self) -> Label { - self.labels.push(None); - self.labels.len() - 1 - } - - fn define_label(&mut self, label: Label) { - self.labels[label] = Some(self.code_section.len()); - } - - fn declare_data(&mut self) -> Data { - self.data.push(None); - self.data.len() - 1 - } - - fn define_data(&mut self, data: Data, bytes: &[u8]) { - self.data[data] = Some(self.data.len()); - self.data_section.extend_from_slice(bytes); - } - - fn lookup_struct(&self, name: &str) -> &Struct { - self.lookup_item(name) - .map(|item| match item { - Item::Struct(s) => s, - _ => panic!("Not a struct: {}", name), - }) - .expect("Struct not found") - } - - fn lookup_function(&self, name: &str) -> &'a Function { - self.lookup_item(name) - .map(|item| match item { - Item::Function(f) => f, - _ => panic!("Not a function: {}", name), - }) - .expect("Function not found") - } - - fn lookup_item(&self, name: &str) -> Option<&'a Item> { - self.ast.iter().find(|item| match item { - Item::Import(_) => false, - Item::Struct(s) => s.name == name, - Item::Function(f) => f.name == name, - }) - } - - fn lookup_variable(&self, name: &str) -> Option<&Variable> { - self.variables.iter().find(|variable| variable.name == name) - } - - fn push_frame(&mut self) -> Frame { - Frame { - slot_count: self.slots.len(), - var_count: self.variables.len(), - } - } - - fn pop_frame(&mut self, frame: Frame) { - self.slots.truncate(frame.slot_count); - self.variables.truncate(frame.var_count); + crate::run_tests! { generate: + example => include_str!("../examples/main_fn.hb"); } } - -pub fn generate(ast: &[Item]) -> Vec { - Generator { - ast, - ..Default::default() - } - .generate() -} diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs new file mode 100644 index 00000000..221c0646 --- /dev/null +++ b/hblang/src/lexer.rs @@ -0,0 +1,169 @@ +use std::{iter::Peekable, str::Chars}; + +#[derive(Debug, PartialEq)] +pub struct Token { + pub kind: TokenKind, + pub start: u32, + pub end: u32, +} + +impl Token { + pub fn range(&self) -> std::ops::Range { + self.start as usize..self.end as usize + } +} + +#[derive(Debug, PartialEq)] +pub enum TokenKind { + Ident, + Number, + LParen, + RParen, + LBrace, + RBrace, + LBrack, + RBrack, + Decl, + Or, + Semi, + Colon, + Return, + Eof, + Error, +} + +pub struct Lexer<'a> { + pos: u32, + bytes: &'a [u8], +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + Self { + pos: 0, + bytes: input.as_bytes(), + } + } + + pub fn slice(&self, tok: Token) -> &'a str { + unsafe { std::str::from_utf8_unchecked(&self.bytes[tok.range()]) } + } + + fn peek(&self) -> Option { + self.bytes.get(self.pos as usize).copied() + } + + fn advance(&mut self) -> Option { + let c = self.peek()?; + self.pos += 1; + Some(c) + } + + pub fn next(&mut self) -> Token { + Iterator::next(self).unwrap_or(Token { + kind: TokenKind::Eof, + start: self.pos, + end: self.pos, + }) + } + + fn advance_if(&mut self, arg: u8) -> bool { + if self.peek() == Some(arg) { + self.advance(); + true + } else { + false + } + } + + pub fn line_col(&self, mut start: u32) -> (usize, usize) { + self.bytes + .split(|&b| b == b'\n') + .enumerate() + .find_map(|(i, line)| { + if start < line.len() as u32 { + return Some((i + 1, start as usize + 1)); + } + start -= line.len() as u32 + 1; + None + }) + .unwrap_or((1, 1)) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Token; + + fn next(&mut self) -> Option { + use TokenKind as T; + loop { + let start = self.pos; + let kind = match self.advance()? { + b'\n' | b'\r' | b'\t' | b' ' => continue, + b'0'..=b'9' => { + while let Some(b'0'..=b'9') = self.peek() { + self.advance(); + } + T::Number + } + b'a'..=b'z' | b'A'..=b'Z' | b'_' => { + while let Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') = self.peek() { + self.advance(); + } + + let ident = &self.bytes[start as usize..self.pos as usize]; + match ident { + b"return" => T::Return, + _ => T::Ident, + } + } + b':' => match self.advance_if(b'=') { + true => T::Decl, + false => T::Colon, + }, + b';' => T::Semi, + b'|' => match self.advance_if(b'|') { + true => T::Or, + false => T::Error, + }, + b'(' => T::LParen, + b')' => T::RParen, + b'{' => T::LBrace, + b'}' => T::RBrace, + b'[' => T::LBrack, + b']' => T::RBrack, + _ => T::Error, + }; + + return Some(Token { + kind, + start, + end: self.pos, + }); + } + } +} + +#[cfg(test)] +mod tests { + fn lex(input: &'static str, output: &mut String) { + use { + super::{Lexer, TokenKind as T}, + std::fmt::Write, + }; + let mut lexer = Lexer::new(input); + loop { + let token = lexer.next(); + writeln!(output, "{:?} {:?}", token.kind, &input[token.range()],).unwrap(); + if token.kind == T::Eof { + break; + } + } + } + + crate::run_tests! { lex: + empty => ""; + whitespace => " \t\n\r"; + examples => include_str!("../examples/main_fn.hb"); + } +} diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs new file mode 100644 index 00000000..ca0c1cf7 --- /dev/null +++ b/hblang/src/lib.rs @@ -0,0 +1,20 @@ +#![feature(noop_waker)] +#[macro_export] +macro_rules! run_tests { + ($runner:path: $($name:ident => $input:expr;)*) => {$( + #[test] + fn $name() { + $crate::tests::run_test(std::any::type_name_of_val(&$name), $input, $runner); + } + )*}; +} + +mod codegen; +mod lexer; +mod parser; +mod tests; +mod typechk; + +pub fn try_block(f: impl FnOnce() -> R) -> R { + f() +} diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs new file mode 100644 index 00000000..900eda07 --- /dev/null +++ b/hblang/src/parser.rs @@ -0,0 +1,161 @@ +use std::{cell::Cell, ops::Not}; + +use crate::lexer::{Lexer, Token, TokenKind}; + +type Ptr = &'static T; + +fn ptr(val: T) -> Ptr { + Box::leak(Box::new(val)) +} + +pub struct Parser<'a> { + path: &'a std::path::Path, + lexer: Lexer<'a>, + token: Token, +} + +impl<'a> Parser<'a> { + pub fn new(input: &'a str, path: &'a std::path::Path) -> Self { + let mut lexer = Lexer::new(input); + let token = lexer.next(); + Self { lexer, token, path } + } + + fn next(&mut self) -> Token { + std::mem::replace(&mut self.token, self.lexer.next()) + } + + pub fn file(&mut self) -> Vec { + std::iter::from_fn(|| (self.token.kind != TokenKind::Eof).then(|| self.expr())).collect() + } + + fn ptr_expr(&mut self) -> Ptr { + ptr(self.expr()) + } + + pub fn expr(&mut self) -> Expr { + let token = self.next(); + let expr = match token.kind { + TokenKind::Ident => { + let name = self.lexer.slice(token).to_owned().leak(); + if self.advance_if(TokenKind::Decl) { + let val = self.ptr_expr(); + Expr::Decl { name, val } + } else { + Expr::Ident { name } + } + } + TokenKind::Return => Expr::Return { + val: (self.token.kind != TokenKind::Semi).then(|| self.ptr_expr()), + }, + TokenKind::Or => { + self.expect_advance(TokenKind::Colon); + let ret = self.ptr_expr(); + let body = self.ptr_expr(); + Expr::Closure { ret, body } + } + TokenKind::LBrace => Expr::Block { + stmts: std::iter::from_fn(|| { + self.advance_if(TokenKind::RBrace) + .not() + .then(|| self.expr()) + }) + .collect::>(), + }, + TokenKind::Number => Expr::Number { + value: match self.lexer.slice(token).parse() { + Ok(value) => value, + Err(e) => self.report(format_args!("invalid number: {e}")), + }, + }, + tok => self.report(format_args!("unexpected token: {:?}", tok)), + }; + + self.advance_if(TokenKind::Semi); + + expr + } + + fn advance_if(&mut self, kind: TokenKind) -> bool { + if self.token.kind == kind { + self.next(); + true + } else { + false + } + } + + fn expect_advance(&mut self, kind: TokenKind) { + if self.token.kind != kind { + self.report(format_args!( + "expected {:?}, found {:?}", + kind, self.token.kind + )); + } + self.next(); + } + + fn report(&self, msg: impl std::fmt::Display) -> ! { + let (line, col) = self.lexer.line_col(self.token.start); + eprintln!("{}:{}:{} => {}", self.path.display(), line, col, msg); + unreachable!(); + } +} + +#[derive(Debug)] +pub enum Expr { + Decl { name: Ptr, val: Ptr }, + Closure { ret: Ptr, body: Ptr }, + Return { val: Option> }, + Ident { name: Ptr }, + Block { stmts: Vec }, + Number { value: u64 }, +} + +impl std::fmt::Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + thread_local! { + static INDENT: Cell = Cell::new(0); + } + + match self { + Self::Decl { name, val } => write!(f, "{} := {}", name, val), + Self::Closure { ret, body } => write!(f, "||: {} {}", ret, body), + Self::Return { val: Some(val) } => write!(f, "return {};", val), + Self::Return { val: None } => write!(f, "return;"), + Self::Ident { name } => write!(f, "{}", name), + Self::Block { stmts } => { + writeln!(f, "{{")?; + INDENT.with(|i| i.set(i.get() + 1)); + let res = crate::try_block(|| { + for stmt in stmts { + for _ in 0..INDENT.with(|i| i.get()) { + write!(f, " ")?; + } + writeln!(f, "{}", stmt)?; + } + Ok(()) + }); + INDENT.with(|i| i.set(i.get() - 1)); + write!(f, "}}")?; + res + } + Self::Number { value } => write!(f, "{}", value), + } + } +} + +#[cfg(test)] +mod tests { + fn parse(input: &'static str, output: &mut String) { + use std::fmt::Write; + let mut parser = super::Parser::new(input, std::path::Path::new("test")); + for expr in parser.file() { + writeln!(output, "{}", expr).unwrap(); + } + } + + crate::run_tests! { parse: + example => include_str!("../examples/main_fn.hb"); + } +} diff --git a/hblang/src/tests.rs b/hblang/src/tests.rs new file mode 100644 index 00000000..0faaa292 --- /dev/null +++ b/hblang/src/tests.rs @@ -0,0 +1,53 @@ +#![cfg(test)] + +pub fn run_test(name: &'static str, input: &'static str, test: fn(&'static str, &mut String)) { + use std::{io::Write, path::PathBuf}; + + let filter = std::env::var("PT_FILTER").unwrap_or_default(); + if !filter.is_empty() && !name.contains(&filter) { + return; + } + + let mut output = String::new(); + test(input, &mut output); + + let mut root = PathBuf::from(std::env::var("PT_TEST_ROOT").unwrap_or("tests".to_string())); + root.push(name); + root.set_extension("txt"); + + let expected = std::fs::read_to_string(&root).unwrap_or_default(); + + if output == expected { + return; + } + + if std::env::var("PT_UPDATE").is_ok() { + std::fs::write(&root, output).unwrap(); + return; + } + + if !root.exists() { + std::fs::create_dir_all(root.parent().unwrap()).unwrap(); + std::fs::write(&root, vec![]).unwrap(); + } + + let mut proc = std::process::Command::new("diff") + .arg("-u") + .arg("--color") + .arg(&root) + .arg("-") + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::inherit()) + .spawn() + .unwrap(); + + proc.stdin + .as_mut() + .unwrap() + .write_all(output.as_bytes()) + .unwrap(); + + proc.wait().unwrap(); + + panic!(); +} diff --git a/hblang/src/typechk.rs b/hblang/src/typechk.rs new file mode 100644 index 00000000..e69de29b diff --git a/hblang/tests/hblang::codegen::tests::example.txt b/hblang/tests/hblang::codegen::tests::example.txt new file mode 100644 index 00000000..3e43936a --- /dev/null +++ b/hblang/tests/hblang::codegen::tests::example.txt @@ -0,0 +1,9 @@ +start: + jala r0, main, 0 + tx +main: + subi64 r254, r254, 8 + ld r31, r254, r0, 8 + jala r0, r31, 0 + + diff --git a/hblang/tests/hblang::lexer::tests::empty.txt b/hblang/tests/hblang::lexer::tests::empty.txt new file mode 100644 index 00000000..3f671079 --- /dev/null +++ b/hblang/tests/hblang::lexer::tests::empty.txt @@ -0,0 +1 @@ +Eof "" diff --git a/hblang/tests/hblang::lexer::tests::examples.txt b/hblang/tests/hblang::lexer::tests::examples.txt new file mode 100644 index 00000000..d06509f9 --- /dev/null +++ b/hblang/tests/hblang::lexer::tests::examples.txt @@ -0,0 +1,10 @@ +Ident "main" +Decl ":=" +Or "||" +Colon ":" +Ident "void" +LBrace "{" +Return "return" +Semi ";" +RBrace "}" +Eof "" diff --git a/hblang/tests/hblang::lexer::tests::whitespace.txt b/hblang/tests/hblang::lexer::tests::whitespace.txt new file mode 100644 index 00000000..3f671079 --- /dev/null +++ b/hblang/tests/hblang::lexer::tests::whitespace.txt @@ -0,0 +1 @@ +Eof "" diff --git a/hblang/tests/hblang::parser::tests::example.txt b/hblang/tests/hblang::parser::tests::example.txt new file mode 100644 index 00000000..4e57f17d --- /dev/null +++ b/hblang/tests/hblang::parser::tests::example.txt @@ -0,0 +1,3 @@ +main := ||: void { + return; +}