From 870c1f47188f79f1befe5565d5e37418b22fb5d9 Mon Sep 17 00:00:00 2001 From: mlokr Date: Thu, 9 May 2024 18:22:31 +0200 Subject: [PATCH] blah --- hblang/hblang-f/Cargo.toml | 11 + hblang/hblang-f/src/codegen.rs | 595 +++++++++++++++++++++++++++++++++ hblang/hblang-f/src/lexer.rs | 151 +++++++++ hblang/hblang-f/src/lib.rs | 6 + hblang/hblang-f/src/parser.rs | 566 +++++++++++++++++++++++++++++++ hblang/hblang-f/src/typechk.rs | 20 ++ 6 files changed, 1349 insertions(+) create mode 100644 hblang/hblang-f/Cargo.toml create mode 100644 hblang/hblang-f/src/codegen.rs create mode 100644 hblang/hblang-f/src/lexer.rs create mode 100644 hblang/hblang-f/src/lib.rs create mode 100644 hblang/hblang-f/src/parser.rs create mode 100644 hblang/hblang-f/src/typechk.rs diff --git a/hblang/hblang-f/Cargo.toml b/hblang/hblang-f/Cargo.toml new file mode 100644 index 0000000..0e19b10 --- /dev/null +++ b/hblang/hblang-f/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "hblang" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +hbbytecode = { version = "0.1.0", path = "../hbbytecode" } +logos = "0.13.0" + diff --git a/hblang/hblang-f/src/codegen.rs b/hblang/hblang-f/src/codegen.rs new file mode 100644 index 0000000..fdd8a36 --- /dev/null +++ b/hblang/hblang-f/src/codegen.rs @@ -0,0 +1,595 @@ +use std::{iter::Cycle, ops::Range, usize}; + +use crate::{ + lexer::{self, Ty}, + parser::{Exp, Function, Item, Literal, Struct, Type}, +}; + +type Reg = u8; +type Offset = i32; +type Pushed = bool; +type SlotIndex = usize; +type Label = usize; +type Data = usize; +type Size = usize; + +//| Register | Description | Saver | +//|:-----------|:--------------------|:-------| +//| r0 | Hard-wired zero | N/A | +//| r1 - r2 | Return values | Caller | +//| r2 - r11 | Function parameters | Caller | +//| r12 - r30 | General purpose | Caller | +//| r31 | Return address | Caller | +//| r32 - r253 | General purpose | Callee | +//| r254 | Stack pointer | Callee | +//| r255 | Thread pointer | N/A | + +struct RegAlloc { + pub regs: Box<[Option; 256]>, + pub used: Box<[bool; 256]>, + pub spill_cycle: Cycle>, +} + +impl RegAlloc { + const STACK_POINTER: Reg = 254; + const ZERO: Reg = 0; + const RETURN_ADDRESS: Reg = 31; + + fn alloc_return(&mut self, slot: usize) -> Option { + self.regs[1..2] + .iter_mut() + .position(|reg| { + if reg.is_none() { + *reg = Some(slot); + true + } else { + false + } + }) + .map(|reg| reg as Reg + 1) + } + + fn alloc_general(&mut self, slot: usize) -> Option { + self.regs[32..254] + .iter_mut() + .zip(&mut self.used[32..254]) + .position(|(reg, used)| { + if reg.is_none() { + *reg = Some(slot); + *used = true; + true + } else { + false + } + }) + .map(|reg| reg as Reg + 32) + } + + fn free(&mut self, reg: Reg) { + assert!(self.regs[reg as usize].take().is_some()); + } + + fn is_used(&self, reg: Reg) -> bool { + self.regs[reg as usize].is_some() + } + + fn spill(&mut self, for_slot: usize) -> (Reg, Option) { + let to_spill = self.spill_cycle.next().unwrap(); + let slot = self.spill_specific(to_spill, for_slot); + (to_spill as Reg + 32, slot) + } + + fn spill_specific(&mut self, reg: Reg, for_slot: usize) -> Option { + self.regs[reg as usize].replace(for_slot) + } + + fn restore(&mut self, reg: Reg, slot: usize) -> usize { + self.regs[reg as usize].replace(slot).unwrap() + } + + fn alloc_specific(&mut self, reg: u8, to: usize) { + assert!(self.regs[reg as usize].replace(to).is_none()); + } + + fn alloc_specific_in_reg(&mut self, reg: InReg, to: usize) { + match reg { + InReg::Single(r) => self.alloc_specific(r, to), + InReg::Pair(r1, r2) => { + self.alloc_specific(r1, to); + self.alloc_specific(r2, to); + } + } + } +} + +pub struct ParamAlloc { + reg_range: Range, + stack: Offset, +} + +impl ParamAlloc { + fn new() -> Self { + Self { + stack: 8, // return adress is in callers stack frame + reg_range: 2..12, + } + } + + fn alloc(&mut self, size: usize) -> SlotValue { + match self.try_alloc_regs(size) { + Some(reg) => reg, + None => { + let stack = self.stack; + self.stack += size as Offset; + SlotValue::Stack(stack) + } + } + } + + fn try_alloc_regs(&mut self, size: usize) -> Option { + let mut needed = size.div_ceil(8); + if needed > 2 { + needed = 1; // passed by ref + } + + if self.reg_range.len() < needed { + return None; + } + + match needed { + 1 => { + let reg = self.reg_range.start; + self.reg_range.start += 1; + Some(SlotValue::Reg(InReg::Single(reg))) + } + 2 => { + let reg = self.reg_range.start; + self.reg_range.start += 2; + Some(SlotValue::Reg(InReg::Pair(reg, reg + 1))) + } + _ => unreachable!(), + } + } +} + +impl Default for RegAlloc { + fn default() -> Self { + Self { + regs: Box::new([None; 256]), + used: Box::new([false; 256]), + spill_cycle: (32..254).cycle(), + } + } +} + +struct Variable { + name: String, + location: usize, +} + +#[derive(Clone, Copy)] +struct SlotId { + // index into slot stack + index: SlotIndex, + // temorary offset carried over when eg. accessing fields + offset: Offset, + // this means we can mutate the value as part of computation + owned: bool, +} + +impl SlotId { + fn base(location: usize) -> Self { + Self { + index: location, + offset: 0, + owned: true, + } + } + + fn borrowed(self) -> Self { + Self { + owned: false, + ..self + } + } +} + +struct Slot { + ty: Type, + value: SlotValue, +} + +#[repr(transparent)] +struct InstBuffer { + buffer: Vec, +} + +impl InstBuffer { + fn new(vec: &mut Vec) -> &mut Self { + unsafe { &mut *(vec as *mut Vec as *mut Self) } + } +} + +impl hbbytecode::Buffer for InstBuffer { + fn reserve(&mut self, bytes: usize) { + self.buffer.reserve(bytes); + } + + unsafe fn write(&mut self, byte: u8) { + self.buffer.push(byte); + } +} + +#[derive(Clone, Copy)] +enum InReg { + Single(Reg), + // if one of the registes is allocated, the other is too, ALWAYS + // with the same slot + Pair(Reg, Reg), +} + +#[derive(Clone, Copy)] +enum Spill { + Reg(InReg), + Stack(Offset), // relative to frame end (rsp if nothing was pushed) +} + +#[derive(Clone, Copy)] +enum SlotValue { + Reg(InReg), + Stack(Offset), // relative to frame start (rbp) + Imm(u64), + Spilled(Spill, SlotIndex), +} + +pub struct Value { + store: ValueStore, + offset: Offset, +} + +#[derive(Clone, Copy)] +enum ValueStore { + Reg(InReg), + Stack(Offset, Pushed), + Imm(u64), +} + +impl From for ValueStore { + fn from(value: SlotValue) -> Self { + match value { + SlotValue::Reg(reg) => ValueStore::Reg(reg), + SlotValue::Stack(offset) => ValueStore::Stack(offset, false), + SlotValue::Imm(imm) => ValueStore::Imm(imm), + SlotValue::Spilled(spill, _) => match spill { + Spill::Reg(reg) => ValueStore::Reg(reg), + Spill::Stack(offset) => ValueStore::Stack(offset, true), + }, + } + } +} + +pub struct LabelReloc { + pub label: Label, + pub offset: usize, +} + +pub struct DataReloc { + pub data: Data, + pub offset: usize, +} + +#[must_use] +pub struct Frame { + pub slot_count: usize, + pub var_count: usize, +} + +enum Instr { + BinOp(lexer::Op, Value, Value), + Move(Size, Value, Value), + Push(Reg), + Jump(Label), + Call(String), + JumpIfZero(Value, Label), +} + +#[derive(Default)] +pub struct Generator<'a> { + ast: &'a [Item], + + func_labels: Vec<(String, Label)>, + + stack_size: Offset, + pushed_size: Offset, + + regs: RegAlloc, + variables: Vec, + slots: Vec, + + labels: Vec>, + label_relocs: Vec, + + data: Vec>, + data_relocs: Vec, + + code_section: Vec, + data_section: Vec, + + instrs: Vec, +} + +impl<'a> Generator<'a> { + fn generate(mut self) -> Vec { + for item in self.ast { + let Item::Function(f) = item else { continue }; + self.generate_function(f); + } + + self.link() + } + + fn generate_function(&mut self, f: &Function) { + let frame = self.push_frame(); + + let mut param_alloc = ParamAlloc::new(); + + for param in f.args.iter() { + let param_size = self.size_of(¶m.ty); + let value = param_alloc.alloc(param_size); + let slot = self.add_slot(param.ty.clone(), value); + if let SlotValue::Reg(reg) = value { + self.regs.alloc_specific_in_reg(reg, slot); + } + self.add_variable(param.name.clone(), slot); + } + + for stmt in f.body.iter() { + assert!(self + .generate_expr(Some(Type::Builtin(Ty::Void)), stmt) + .is_none()); + } + + self.pop_frame(frame); + } + + fn generate_expr(&mut self, expected: Option, expr: &Exp) -> Option { + let value = match expr { + Exp::Literal(lit) => SlotId::base(match lit { + Literal::Int(i) => self.add_slot(expected.clone().unwrap(), SlotValue::Imm(*i)), + Literal::Bool(b) => { + self.add_slot(Type::Builtin(Ty::Bool), SlotValue::Imm(*b as u64)) + } + }), + Exp::Variable(ident) => { + SlotId::base(self.lookup_variable(ident).unwrap().location).borrowed() + } + Exp::Call { name, args } => self.generate_call(expected.clone(), name, args), + Exp::Ctor { name, fields } => todo!(), + Exp::Index { base, index } => todo!(), + Exp::Field { base, field } => todo!(), + Exp::Unary { op, exp } => todo!(), + Exp::Binary { op, left, right } => todo!(), + Exp::If { cond, then, else_ } => todo!(), + Exp::Let { name, ty, value } => todo!(), + Exp::For { + init, + cond, + step, + block, + } => todo!(), + Exp::Block(_) => todo!(), + Exp::Return(_) => todo!(), + Exp::Break => todo!(), + Exp::Continue => todo!(), + }; + + if let Some(expected) = expected { + let actual = self.slots[value.index].ty.clone(); + assert_eq!(expected, actual); + } + + Some(value) + } + + fn generate_call(&mut self, expected: Option, name: &str, args: &[Exp]) -> SlotId { + let frame = self.push_frame(); + let func = self.lookup_function(name); + + let mut arg_alloc = ParamAlloc::new(); + for (arg, param) in args.iter().zip(&func.args) { + let arg_slot = self.generate_expr(Some(param.ty.clone()), arg).unwrap(); + let arg_size = self.size_of(¶m.ty); + let param_slot = arg_alloc.alloc(arg_size); + self.set_temporarly(arg_slot, param_slot); + } + + self.instrs.push(Instr::Call(name.to_owned())); + + todo!() + } + + fn set_temporarly(&mut self, from: SlotId, to: SlotValue) { + let to = self.make_mutable(to, from.index); + let to_slot = self.add_slot(self.slots[from.index].ty.clone(), to); + self.emit_move(from, SlotId::base(to_slot)); + } + + fn make_mutable(&mut self, target: SlotValue, by: SlotIndex) -> SlotValue { + match target { + SlotValue::Reg(in_reg) => { + self.regs.alloc_specific_in_reg(in_reg, by); + target + } + SlotValue::Spilled(Spill::Reg(in_reg), slot) => { + let new_val = SlotValue::Spilled( + match in_reg { + InReg::Single(reg) => Spill::Stack(self.emmit_push(reg)), + InReg::Pair(r1, r2) => { + self.emmit_push(r2); + Spill::Stack(self.emmit_push(r1)) + } + }, + slot, + ); + let new_slot = self.add_slot(self.slots[slot].ty.clone(), new_val); + SlotValue::Spilled(Spill::Reg(in_reg), new_slot) + } + _ => unreachable!(), + } + } + + fn emmit_push(&mut self, reg: Reg) -> Offset { + self.pushed_size += 8; + self.instrs.push(Instr::Push(reg)); + self.pushed_size + } + + fn emit_move(&mut self, from: SlotId, to: SlotId) { + let size = self.size_of(&self.slots[from.index].ty); + let other_size = self.size_of(&self.slots[to.index].ty); + assert_eq!(size, other_size); + + self.instrs.push(Instr::Move( + size, + self.slot_to_value(from), + self.slot_to_value(to), + )); + } + + fn slot_to_value(&self, slot: SlotId) -> Value { + let slot_val = &self.slots[slot.index]; + Value { + store: slot_val.value.into(), + offset: slot.offset, + } + } + + fn size_of(&self, ty: &Type) -> Size { + match ty { + Type::Builtin(ty) => match ty { + Ty::U8 | Ty::I8 | Ty::Bool => 1, + Ty::U16 | Ty::I16 => 2, + Ty::U32 | Ty::I32 => 4, + Ty::U64 | Ty::I64 => 8, + Ty::Void => 0, + }, + Type::Struct(name) => self + .lookup_struct(name) + .fields + .iter() + .map(|field| self.size_of(&field.ty)) + .sum(), + Type::Pinter(_) => 8, + } + } +} + +impl<'a> Generator<'a> { + fn add_variable(&mut self, name: String, location: usize) { + self.variables.push(Variable { name, location }); + } + + fn add_slot(&mut self, ty: Type, value: SlotValue) -> usize { + let slot = self.slots.len(); + self.slots.push(Slot { ty, value }); + slot + } + + fn link(mut self) -> Vec { + for reloc in self.label_relocs { + let label = self.labels[reloc.label].unwrap(); + let offset = reloc.offset; + let target = label - offset; + let target_bytes = u64::to_le_bytes(target as u64); + self.code_section[offset..offset + 8].copy_from_slice(&target_bytes); + } + + for reloc in self.data_relocs { + let data = self.data[reloc.data].unwrap(); + let offset = reloc.offset; + let target = data; + let target_bytes = u64::to_le_bytes((target + self.code_section.len()) as u64); + self.data_section[offset..offset + 8].copy_from_slice(&target_bytes); + } + + self.code_section.extend_from_slice(&self.data_section); + self.code_section + } + + fn lookup_func_label(&mut self, name: &str) -> Label { + if let Some(label) = self.func_labels.iter().find(|(n, _)| n == name) { + return label.1; + } + + panic!("Function not found: {}", name); + } + + fn declare_label(&mut self) -> Label { + self.labels.push(None); + self.labels.len() - 1 + } + + fn define_label(&mut self, label: Label) { + self.labels[label] = Some(self.code_section.len()); + } + + fn declare_data(&mut self) -> Data { + self.data.push(None); + self.data.len() - 1 + } + + fn define_data(&mut self, data: Data, bytes: &[u8]) { + self.data[data] = Some(self.data.len()); + self.data_section.extend_from_slice(bytes); + } + + fn lookup_struct(&self, name: &str) -> &Struct { + self.lookup_item(name) + .map(|item| match item { + Item::Struct(s) => s, + _ => panic!("Not a struct: {}", name), + }) + .expect("Struct not found") + } + + fn lookup_function(&self, name: &str) -> &'a Function { + self.lookup_item(name) + .map(|item| match item { + Item::Function(f) => f, + _ => panic!("Not a function: {}", name), + }) + .expect("Function not found") + } + + fn lookup_item(&self, name: &str) -> Option<&'a Item> { + self.ast.iter().find(|item| match item { + Item::Import(_) => false, + Item::Struct(s) => s.name == name, + Item::Function(f) => f.name == name, + }) + } + + fn lookup_variable(&self, name: &str) -> Option<&Variable> { + self.variables.iter().find(|variable| variable.name == name) + } + + fn push_frame(&mut self) -> Frame { + Frame { + slot_count: self.slots.len(), + var_count: self.variables.len(), + } + } + + fn pop_frame(&mut self, frame: Frame) { + self.slots.truncate(frame.slot_count); + self.variables.truncate(frame.var_count); + } +} + +pub fn generate(ast: &[Item]) -> Vec { + Generator { + ast, + ..Default::default() + } + .generate() +} diff --git a/hblang/hblang-f/src/lexer.rs b/hblang/hblang-f/src/lexer.rs new file mode 100644 index 0000000..11ee266 --- /dev/null +++ b/hblang/hblang-f/src/lexer.rs @@ -0,0 +1,151 @@ +use logos::Logos; + +macro_rules! gen_token { + ($name:ident { + keywords: { + $($keyword:ident = $lit:literal,)* + }, + operators: $op_name:ident { + $($prec:literal: {$( + $op:ident = $op_lit:literal, + )*},)* + }, + types: $ty_type:ident { + $($ty:ident = $ty_lit:literal,)* + }, + regexes: { + $($regex:ident = $regex_lit:literal,)* + }, + }) => { + #[derive(Debug, Clone, PartialEq, Eq, Copy, Logos)] + #[logos(skip "[ \t\n]+")] + pub enum $name { + $(#[token($lit)] $keyword,)* + $($(#[token($op_lit, |_| $op_name::$op)])*)* + Op($op_name), + $(#[token($ty_lit, |_| $ty_type::$ty)])* + Ty($ty_type), + $(#[regex($regex_lit)] $regex,)* + } + + #[derive(Debug, Clone, PartialEq, Eq, Copy)] + pub enum $op_name { + $($($op,)*)* + } + + #[derive(Debug, Clone, PartialEq, Eq, Copy)] + pub enum $ty_type { + $($ty,)* + } + + impl $op_name { + pub fn prec(&self) -> u8 { + match self { + $($($op_name::$op => $prec,)*)* + } + } + } + }; +} + +gen_token! { + TokenKind { + keywords: { + Use = "use", + Fn = "fn", + Let = "let", + If = "if", + Else = "else", + For = "for", + Return = "return", + Break = "break", + Continue = "continue", + Struct = "struct", + + True = "true", + False = "false", + + LBrace = "{", + RBrace = "}", + LParen = "(", + RParen = ")", + LBracket = "[", + RBracket = "]", + + Colon = ":", + Semicolon = ";", + Comma = ",", + Dot = ".", + }, + operators: Op { + 14: { + Assign = "=", + AddAssign = "+=", + SubAssign = "-=", + MulAssign = "*=", + DivAssign = "/=", + ModAssign = "%=", + AndAssign = "&=", + OrAssign = "|=", + XorAssign = "^=", + ShlAssign = "<<=", + ShrAssign = ">>=", + }, + 12: { + Or = "||", + }, + 11: { + And = "&&", + }, + 10: { + Bor = "|", + }, + 9: { + Xor = "^", + }, + 8: { + Band = "&", + }, + 7: { + Eq = "==", + Neq = "!=", + }, + 6: { + Lt = "<", + Gt = ">", + Le = "<=", + Ge = ">=", + }, + 5: { + Shl = "<<", + Shr = ">>", + }, + 4: { + Add = "+", + Sub = "-", + }, + 3: { + Mul = "*", + Div = "/", + Mod = "%", + }, + }, + types: Ty { + U8 = "u8", + U16 = "u16", + U32 = "u32", + U64 = "u64", + I8 = "i8", + I16 = "i16", + I32 = "i32", + I64 = "i64", + Bool = "bool", + Void = "void", + }, + regexes: { + Ident = "[a-zA-Z_][a-zA-Z0-9_]*", + String = r#""([^"\\]|\\.)*""#, + Number = "[0-9]+", + }, + } +} diff --git a/hblang/hblang-f/src/lib.rs b/hblang/hblang-f/src/lib.rs new file mode 100644 index 0000000..0477679 --- /dev/null +++ b/hblang/hblang-f/src/lib.rs @@ -0,0 +1,6 @@ +#![allow(dead_code)] + +mod codegen; +mod lexer; +mod parser; +mod typechk; diff --git a/hblang/hblang-f/src/parser.rs b/hblang/hblang-f/src/parser.rs new file mode 100644 index 0000000..e3ed398 --- /dev/null +++ b/hblang/hblang-f/src/parser.rs @@ -0,0 +1,566 @@ +use {core::panic, std::iter}; + +use std::array; + +use logos::{Lexer, Logos}; + +use crate::lexer::{Op, TokenKind, Ty}; + +#[derive(Clone, Debug)] +pub enum Item { + Import(String), + Struct(Struct), + Function(Function), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Type { + Builtin(Ty), + Struct(String), + Pinter(Box), +} + +#[derive(Clone, Debug)] +pub struct Struct { + pub name: String, + pub fields: Vec, +} + +#[derive(Clone, Debug)] +pub struct Field { + pub name: String, + pub ty: Type, +} + +#[derive(Clone, Debug)] +pub struct Function { + pub name: String, + pub args: Vec, + pub ret: Type, + pub body: Vec, +} + +#[derive(Clone, Debug)] +pub struct Arg { + pub name: String, + pub ty: Type, +} + +#[derive(Clone, Debug)] +pub struct CtorField { + pub name: String, + pub value: Exp, +} + +#[derive(Clone, Debug)] +pub enum Exp { + Literal(Literal), + Variable(String), + Call { + name: String, + args: Vec, + }, + Ctor { + name: Option>, + fields: Vec, + }, + Index { + base: Box, + index: Box, + }, + Field { + base: Box, + field: String, + }, + Unary { + op: Op, + exp: Box, + }, + Binary { + op: Op, + left: Box, + right: Box, + }, + If { + cond: Box, + then: Box, + else_: Option>, + }, + Let { + name: String, + ty: Option, + value: Box, + }, + For { + init: Option>, + cond: Option>, + step: Option>, + block: Box, + }, + Block(Vec), + Return(Option>), + Break, + Continue, +} + +#[derive(Clone, Debug)] +pub enum Literal { + Int(u64), + Bool(bool), +} + +#[derive(Debug, PartialEq, Clone)] +pub struct Token { + pub kind: TokenKind, + pub span: std::ops::Range, + pub value: String, +} + +struct Parser<'a> { + next_token: Option, + lexer: logos::Lexer<'a, TokenKind>, +} + +impl<'a> Parser<'a> { + pub fn new(input: &'a str) -> Self { + let mut lexer = TokenKind::lexer(input); + let next_token = Self::next_token(&mut lexer); + Self { next_token, lexer } + } + + pub fn next(&mut self) -> Option { + let token = self.next_token.clone(); + self.next_token = Self::next_token(&mut self.lexer); + token + } + + pub fn next_token(lexer: &mut Lexer) -> Option { + lexer.next().map(|r| { + r.map(|e| Token { + kind: e, + span: lexer.span(), + value: lexer.slice().to_owned(), + }) + .unwrap_or_else(|e| { + let (line, col) = Self::pos_to_line_col_low(lexer.source(), lexer.span().start); + println!("Lexer error: {}:{}: {:?}", line, col, e); + std::process::exit(1); + }) + }) + } + + pub fn pos_to_line_col(&self, pos: usize) -> (usize, usize) { + Self::pos_to_line_col_low(self.lexer.source(), pos) + } + + pub fn pos_to_line_col_low(source: &str, pos: usize) -> (usize, usize) { + let line = source[..pos].lines().count(); + let col = source[..pos].lines().last().map(|l| l.len()).unwrap_or(0); + (line, col) + } + + pub fn expect(&mut self, kind: TokenKind) -> Token { + let token = self.expect_any(); + if token.kind == kind { + token + } else { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!( + "Expected {:?} at {}:{}, found {:?}", + kind, line, col, token.kind + ) + } + } + + pub fn expect_any(&mut self) -> Token { + self.next().unwrap_or_else(|| panic!("Unexpected EOF")) + } + + pub fn peek(&self) -> Option<&Token> { + self.next_token.as_ref() + } + + pub fn try_advance(&mut self, kind: TokenKind) -> bool { + if self.peek().is_some_and(|t| t.kind == kind) { + self.next(); + true + } else { + false + } + } + + pub fn parse(&mut self) -> Vec { + iter::from_fn(|| self.parse_item()).collect() + } + + fn parse_item(&mut self) -> Option { + let token = self.next()?; + match token.kind { + TokenKind::Struct => Some(self.parse_struct()), + TokenKind::Fn => Some(self.parse_function()), + TokenKind::Use => Some(Item::Import(self.expect(TokenKind::String).value)), + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } + + fn parse_struct(&mut self) -> Item { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::LBrace); + let fields = self.sequence(TokenKind::Comma, TokenKind::RBrace, Self::parse_field); + Item::Struct(Struct { name, fields }) + } + + fn parse_field(&mut self) -> Field { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::Colon); + let ty = self.type_(); + + Field { name, ty } + } + + fn type_(&mut self) -> Type { + let token = self.next().unwrap(); + match token.kind { + TokenKind::Ty(ty) => Type::Builtin(ty), + TokenKind::Ident => Type::Struct(token.value), + TokenKind::Op(Op::Band) => { + let ty = self.type_(); + Type::Pinter(Box::new(ty)) + } + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } + + fn parse_function(&mut self) -> Item { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::LParen); + let args = self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_arg); + self.expect(TokenKind::Colon); + let ret = self.type_(); + Item::Function(Function { + name, + args, + ret, + body: self.parse_block(), + }) + } + + fn parse_arg(&mut self) -> Arg { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::Colon); + let ty = self.type_(); + self.try_advance(TokenKind::Comma); + Arg { name, ty } + } + + fn parse_expr(&mut self) -> Exp { + self.parse_binary_expr(255) + } + + fn parse_binary_expr(&mut self, min_prec: u8) -> Exp { + let mut lhs = self.parse_unit_expr(); + + while let Some(TokenKind::Op(op)) = self.peek().map(|t| t.kind) { + let prec = op.prec(); + if prec > min_prec { + break; + } + + self.next(); + let rhs = self.parse_binary_expr(prec); + + lhs = Exp::Binary { + op, + left: Box::new(lhs), + right: Box::new(rhs), + }; + } + + lhs + } + + fn parse_unit_expr(&mut self) -> Exp { + let token = self.next().unwrap(); + let mut expr = match token.kind { + TokenKind::True => Exp::Literal(Literal::Bool(true)), + TokenKind::False => Exp::Literal(Literal::Bool(false)), + TokenKind::Ident => Exp::Variable(token.value), + TokenKind::LBrace => { + Exp::Block(self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr)) + } + TokenKind::LParen => { + let expr = self.parse_expr(); + self.expect(TokenKind::RParen); + expr + } + TokenKind::Number => { + let value = token.value.parse().unwrap(); + Exp::Literal(Literal::Int(value)) + } + TokenKind::Let => { + let name = self.expect(TokenKind::Ident).value; + let ty = self.try_advance(TokenKind::Colon).then(|| self.type_()); + self.expect(TokenKind::Op(Op::Assign)); + let value = self.parse_expr(); + Exp::Let { + name, + ty, + value: Box::new(value), + } + } + TokenKind::If => { + let cond = self.parse_expr(); + let then = Exp::Block(self.parse_block()); + let else_ = self + .try_advance(TokenKind::Else) + .then(|| { + if self.peek().is_some_and(|t| t.kind == TokenKind::If) { + self.parse_expr() + } else { + Exp::Block(self.parse_block()) + } + }) + .map(Box::new); + Exp::If { + cond: Box::new(cond), + then: Box::new(then), + else_, + } + } + TokenKind::For => { + let params = + self.sequence(TokenKind::Semicolon, TokenKind::LBrace, Self::parse_expr); + let mut exprs = Vec::new(); + while !self.try_advance(TokenKind::RBrace) { + exprs.push(self.parse_expr()); + self.try_advance(TokenKind::Semicolon); + } + let block = Exp::Block(exprs); + let len = params.len(); + let mut exprs = params.into_iter(); + let [init, consd, step] = array::from_fn(|_| exprs.next()); + match len { + 0 => Exp::For { + init: None, + cond: None, + step: None, + block: Box::new(block), + }, + 1 => Exp::For { + init: None, + cond: init.map(Box::new), + step: None, + block: Box::new(block), + }, + 3 => Exp::For { + init: init.map(Box::new), + cond: consd.map(Box::new), + step: step.map(Box::new), + block: Box::new(block), + }, + _ => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Invalid loop syntax at {}:{}, loop accepts 1 (while), 0 (loop), or 3 (for) statements separated by semicolon", line, col) + } + } + } + TokenKind::Return => { + let value = self + .peek() + .is_some_and(|t| { + !matches!( + t.kind, + TokenKind::Semicolon + | TokenKind::RBrace + | TokenKind::RParen + | TokenKind::Comma + ) + }) + .then(|| Box::new(self.parse_expr())); + Exp::Return(value) + } + TokenKind::Op(op) => Exp::Unary { + op, + exp: Box::new(self.parse_expr()), + }, + TokenKind::Dot => { + let token = self.expect_any(); + match token.kind { + TokenKind::LBrace => { + let fields = self.sequence( + TokenKind::Comma, + TokenKind::RBrace, + Self::parse_ctor_field, + ); + Exp::Ctor { name: None, fields } + } + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } + + TokenKind::Ty(_) + | TokenKind::String + | TokenKind::Use + | TokenKind::Break + | TokenKind::Continue + | TokenKind::Struct + | TokenKind::RBrace + | TokenKind::RParen + | TokenKind::LBracket + | TokenKind::RBracket + | TokenKind::Colon + | TokenKind::Semicolon + | TokenKind::Comma + | TokenKind::Fn + | TokenKind::Else => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", token.kind, line, col) + } + }; + + loop { + match self.peek().map(|t| t.kind) { + Some(TokenKind::LParen) => { + self.next(); + expr = Exp::Call { + name: match expr { + Exp::Variable(name) => name, + _ => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Expected function name at {}:{}", line, col) + } + }, + args: self.sequence(TokenKind::Comma, TokenKind::RParen, Self::parse_expr), + }; + } + Some(TokenKind::LBracket) => { + self.next(); + let index = self.parse_expr(); + self.expect(TokenKind::RBracket); + expr = Exp::Index { + base: Box::new(expr), + index: Box::new(index), + }; + } + Some(TokenKind::Dot) => { + self.next(); + + let token = self.expect_any(); + match token.kind { + TokenKind::Ident => { + expr = Exp::Field { + base: Box::new(expr), + field: token.value, + }; + } + TokenKind::LBrace => { + let fields = self.sequence( + TokenKind::Comma, + TokenKind::RBrace, + Self::parse_ctor_field, + ); + expr = Exp::Ctor { + name: Some(Box::new(expr)), + fields, + }; + } + tkn => { + let (line, col) = self.pos_to_line_col(token.span.start); + panic!("Unexpected {:?} at {}:{}", tkn, line, col) + } + } + } + _ => break expr, + } + } + } + + pub fn parse_ctor_field(&mut self) -> CtorField { + let name = self.expect(TokenKind::Ident).value; + self.expect(TokenKind::Colon); + let value = self.parse_expr(); + CtorField { name, value } + } + + pub fn parse_block(&mut self) -> Vec { + self.expect(TokenKind::LBrace); + let mut exprs = Vec::new(); + while !self.try_advance(TokenKind::RBrace) { + exprs.push(self.parse_expr()); + self.try_advance(TokenKind::Semicolon); + } + exprs + } + + pub fn sequence( + &mut self, + sep: TokenKind, + term: TokenKind, + mut parser: impl FnMut(&mut Self) -> T, + ) -> Vec { + let mut items = Vec::new(); + while !self.try_advance(term) { + items.push(parser(self)); + if self.try_advance(term) { + break; + } + self.expect(sep); + } + items + } +} + +pub fn parse(input: &str) -> Vec { + Parser::new(input).parse() +} + +#[cfg(test)] +mod test { + #[test] + fn sanity() { + let input = r#" + struct Foo { + x: i32, + y: i32, + } + + fn main(): void { + let foo = Foo.{ x: 1, y: 2 }; + if foo.x > 0 { + return foo.x; + } else { + return foo.y; + } + for i < 10 { + i = i + 1; + } + for let i = 0; i < 10; i = i + 1 { + i = i + 1; + } + i + 1 * 3 / 4 % 5 == 2 + 3 - 4 * 5 / 6 % 7; + fomething(); + pahum(&foo); + lupa(*soo); + return foo.x + foo.y; + } + + fn lupa(x: i32): i32 { + return x; + } + + fn pahum(x: &Foo): void { + return; + } + "#; + let _ = super::parse(input); + } +} diff --git a/hblang/hblang-f/src/typechk.rs b/hblang/hblang-f/src/typechk.rs new file mode 100644 index 0000000..d2f95bc --- /dev/null +++ b/hblang/hblang-f/src/typechk.rs @@ -0,0 +1,20 @@ +use crate::lexer::Ty; + +#[derive(Clone, Debug)] +pub enum Type { + Builtin(Ty), + Struct(StructType), + Pointer(Box), +} + +#[derive(Clone, Debug)] +pub struct StructType { + pub name: String, + pub fields: Vec, +} + +#[derive(Clone, Debug)] +pub struct Field { + pub name: String, + pub ty: Type, +}