From b794fa7c3caa664156364bae8cb0b9fdcd08fd94 Mon Sep 17 00:00:00 2001 From: mlokr Date: Sat, 11 May 2024 22:22:08 +0200 Subject: [PATCH] foo bar --- hbbytecode/src/lib.rs | 2 +- hblang/examples/arithmetic.hb | 2 +- hblang/examples/functions.hb | 6 +- hblang/examples/if_statement.hb | 4 +- hblang/examples/loops.hb | 4 +- hblang/examples/main_fn.hb | 2 +- hblang/examples/variables.hb | 2 +- hblang/src/codegen.rs | 137 +++++++++--- hblang/src/lexer.rs | 206 +++++++++--------- hblang/src/lib.rs | 2 + hblang/src/log.rs | 48 ++++ hblang/src/parser.rs | 151 +++++++------ hblang/test.bin | Bin 1118 -> 660 bytes .../tests/hblang_lexer_tests_arithmetic.txt | 4 +- hblang/tests/hblang_lexer_tests_example.txt | 4 +- spec.md | 84 +++---- 16 files changed, 392 insertions(+), 266 deletions(-) create mode 100644 hblang/src/log.rs diff --git a/hbbytecode/src/lib.rs b/hbbytecode/src/lib.rs index fcd3103d..e979c322 100644 --- a/hbbytecode/src/lib.rs +++ b/hbbytecode/src/lib.rs @@ -80,7 +80,7 @@ unsafe impl BytecodeItem for u8 {} /// ```text /// Types consist of letters meaning a single field /// | Type | Size (B) | Meaning | - /// |:-----|:---------|:------------------------| + /// fn():------------------------| /// | N | 0 | Empty | /// | R | 1 | Register | /// | A | 8 | Absolute address | diff --git a/hblang/examples/arithmetic.hb b/hblang/examples/arithmetic.hb index 6a9289ba..ecf81ffd 100644 --- a/hblang/examples/arithmetic.hb +++ b/hblang/examples/arithmetic.hb @@ -1,3 +1,3 @@ -main := ||: int { +main := fn(): int { return 10 - 20 / 2 + 4 * (2 + 2) - 4 * 4 + 1; } diff --git a/hblang/examples/functions.hb b/hblang/examples/functions.hb index c2229717..272d88e5 100644 --- a/hblang/examples/functions.hb +++ b/hblang/examples/functions.hb @@ -1,13 +1,13 @@ -main := ||: int { +main := fn(): int { return add_one(10) + add_two(20); } -add_two := |x: int|: int { +add_two := fn(x: int): int { return x + 2; } -add_one := |x: int|: int { +add_one := fn(x: int): int { return x + 1; } diff --git a/hblang/examples/if_statement.hb b/hblang/examples/if_statement.hb index 022f4951..b4ea2ecb 100644 --- a/hblang/examples/if_statement.hb +++ b/hblang/examples/if_statement.hb @@ -1,9 +1,9 @@ -main := ||: int { +main := fn(): int { return fib(10); } -fib := |x: int|: int { +fib := fn(x: int): int { if x <= 2 { return 1; } else { diff --git a/hblang/examples/loops.hb b/hblang/examples/loops.hb index dbc9b207..e0f3ff79 100644 --- a/hblang/examples/loops.hb +++ b/hblang/examples/loops.hb @@ -1,8 +1,8 @@ -main := ||: int { +main := fn(): int { return fib(10); } -fib := |n: int|: int { +fib := fn(n: int): int { a := 0; b := 1; loop { diff --git a/hblang/examples/main_fn.hb b/hblang/examples/main_fn.hb index d6d26356..b9659ba9 100644 --- a/hblang/examples/main_fn.hb +++ b/hblang/examples/main_fn.hb @@ -1,3 +1,3 @@ -main := ||: int { +main := fn(): int { return 1; } diff --git a/hblang/examples/variables.hb b/hblang/examples/variables.hb index fc4afe4c..42b63f73 100644 --- a/hblang/examples/variables.hb +++ b/hblang/examples/variables.hb @@ -1,4 +1,4 @@ -main := ||: int { +main := fn(): int { a := 1; b := 2; a = a + 1; diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index d18d21eb..3cfcff55 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -1,6 +1,6 @@ use { crate::{ - instrs, lexer, + instrs, lexer, log, parser::{self, Expr}, }, std::rc::Rc, @@ -9,6 +9,42 @@ use { type LabelId = u32; type Reg = u8; type MaskElem = u64; +type Type = u32; + +mod bt { + use super::*; + + const fn builtin_type(id: u32) -> Type { + Type::MAX - id + } + + macro_rules! builtin_type { + ($($name:ident;)*) => {$( + pub const $name: Type = builtin_type(${index(0)}); + )*}; + } + + builtin_type! { + INT; + BOOL; + MAX; + } +} + +enum TypeKind { + Builtin(Type), + Struct(Type), +} + +impl TypeKind { + fn from_ty(ty: Type) -> Self { + if ty > bt::MAX { + Self::Builtin(ty) + } else { + Self::Struct(ty) + } + } +} const STACK_PTR: Reg = 254; const ZERO: Reg = 0; @@ -55,7 +91,7 @@ impl Func { fn encode(&mut self, (len, instr): (usize, [u8; instrs::MAX_SIZE])) { let name = instrs::NAMES[instr[0] as usize]; - println!( + log::dbg!( "{:08x}: {}: {}", self.code.len(), name, @@ -107,7 +143,7 @@ impl Func { label.offset as i64 - reloc.offset as i64 } + shift; - dbg!( + log::dbg!( label.name.as_ref(), offset, reloc.size, @@ -184,6 +220,11 @@ struct Loop { relocs: Vec, } +struct Struct { + name: Rc, + fields: Vec<(Rc, Type)>, +} + pub struct Codegen<'a> { path: &'a std::path::Path, ret: Expr<'a>, @@ -196,13 +237,14 @@ pub struct Codegen<'a> { stack_relocs: Vec, ret_relocs: Vec, loops: Vec, + records: Vec, } impl<'a> Codegen<'a> { pub fn new() -> Self { Self { path: std::path::Path::new(""), - ret: Expr::Return { val: None }, + ret: Expr::Return { val: None, pos: 0 }, gpa: Default::default(), code: Default::default(), temp: Default::default(), @@ -213,6 +255,7 @@ impl<'a> Codegen<'a> { stack_relocs: Default::default(), ret_relocs: Default::default(), loops: Default::default(), + records: Default::default(), } } @@ -289,9 +332,12 @@ impl<'a> Codegen<'a> { fn expr(&mut self, expr: &'a parser::Expr<'a>, expeted: Option>) -> Option> { use {lexer::TokenKind as T, parser::Expr as E}; match *expr { - E::Decl { - name, - val: E::Closure { ret, body, args }, + E::BinOp { + left: E::Ident { name, .. }, + op: T::Decl, + right: E::Closure { + ret, body, args, .. + }, } => { let frame = self.add_label(name); for (i, &(name, ty)) in args.iter().enumerate() { @@ -310,8 +356,20 @@ impl<'a> Codegen<'a> { self.ret(); None } + E::BinOp { + left: E::Ident { name, .. }, + op: T::Decl, + right, + } => { + let val = self.expr(right, None).unwrap(); + let reg = self.loc_to_reg(val.loc); + let offset = self.alloc_stack(8); + self.decl_var(name, offset, val.ty); + self.store_stack(reg, offset, 8); + None + } E::Call { - func: E::Ident { name }, + func: E::Ident { name, .. }, args, } => { for (i, arg) in args.iter().enumerate() { @@ -328,22 +386,14 @@ impl<'a> Codegen<'a> { loc: Loc::Reg(reg), }) } - E::Decl { name, val } => { - let val = self.expr(val, None).unwrap(); - let reg = self.loc_to_reg(val.loc); - let offset = self.alloc_stack(8); - self.decl_var(name, offset, val.ty); - self.store_stack(reg, offset, 8); - None - } - E::Ident { name } => { + E::Ident { name, .. } => { let var = self.vars.iter().find(|v| v.name.as_ref() == name).unwrap(); Some(Value { ty: var.ty, loc: Loc::Stack(var.offset), }) } - E::Return { val } => { + E::Return { val, .. } => { if let Some(val) = val { let val = self.expr(val, Some(self.ret)).unwrap(); if val.ty != self.ret { @@ -365,21 +415,33 @@ impl<'a> Codegen<'a> { self.code.encode(instrs::jmp(0)); None } - E::Block { stmts } => { + E::Block { stmts, .. } => { for stmt in stmts { self.expr(stmt, None); } None } - E::Number { value } => Some(Value { - ty: expeted.unwrap_or(Expr::Ident { name: "int" }), + E::Number { value, .. } => Some(Value { + ty: expeted.unwrap_or(Expr::Ident { + name: "int", + pos: 0, + }), loc: Loc::Imm(value), }), - E::If { cond, then, else_ } => { - let cond = self.expr(cond, Some(Expr::Ident { name: "bool" })).unwrap(); + E::If { + cond, then, else_, .. + } => { + let cond = self + .expr( + cond, + Some(Expr::Ident { + name: "bool", + pos: 0, + }), + ) + .unwrap(); let reg = self.loc_to_reg(cond.loc); let jump_offset = self.code.code.len() as u32; - println!("jump_offset: {:02x}", jump_offset); self.code.encode(instrs::jeq(reg, 0, 0)); self.gpa.free(reg); @@ -389,7 +451,6 @@ impl<'a> Codegen<'a> { if let Some(else_) = else_ { let else_jump_offset = self.code.code.len() as u32; - println!("jump_offset: {:02x}", jump_offset); self.code.encode(instrs::jmp(0)); jump = self.code.code.len() as i16 - jump_offset as i16; @@ -397,20 +458,18 @@ impl<'a> Codegen<'a> { self.expr(else_, None); let jump = self.code.code.len() as i32 - else_jump_offset as i32; - println!("jump: {:02x}", jump); self.code.code[else_jump_offset as usize + 1..][..4] .copy_from_slice(&jump.to_ne_bytes()); } else { jump = self.code.code.len() as i16 - jump_offset as i16; } - println!("jump: {:02x}", jump); self.code.code[jump_offset as usize + 3..][..2] .copy_from_slice(&jump.to_ne_bytes()); None } - E::Loop { body } => { + E::Loop { body, .. } => { let loop_start = self.code.code.len() as u32; self.loops.push(Loop { offset: loop_start, @@ -434,7 +493,7 @@ impl<'a> Codegen<'a> { None } - E::Break => { + E::Break { .. } => { let loop_ = self.loops.last_mut().unwrap(); let offset = self.code.code.len() as u32; self.code.encode(instrs::jmp(0)); @@ -445,7 +504,7 @@ impl<'a> Codegen<'a> { }); None } - E::Continue => { + E::Continue { .. } => { let loop_ = self.loops.last().unwrap(); let offset = self.code.code.len() as u32; self.code @@ -468,7 +527,10 @@ impl<'a> Codegen<'a> { self.gpa.free(rhs); self.code.encode(instrs::cmpui(lhs, lhs, 1)); return Some(Value { - ty: Expr::Ident { name: "bool" }, + ty: Expr::Ident { + name: "bool", + pos: 0, + }, loc: Loc::Reg(lhs), }); } @@ -478,7 +540,10 @@ impl<'a> Codegen<'a> { self.code.encode(instrs::cmpui(lhs, lhs, 0)); self.code.encode(instrs::not(lhs, lhs)); return Some(Value { - ty: Expr::Ident { name: "bool" }, + ty: Expr::Ident { + name: "bool", + pos: 0, + }, loc: Loc::Reg(lhs), }); } @@ -612,7 +677,7 @@ pub enum Loc { #[cfg(test)] mod tests { - use crate::instrs; + use crate::{instrs, log}; struct TestMem; @@ -624,7 +689,7 @@ mod tests { target: *mut u8, count: usize, ) -> Result<(), hbvm::mem::LoadError> { - println!( + log::dbg!( "read: {:x} {} {:?}", addr.get(), count, @@ -646,14 +711,14 @@ mod tests { source: *const u8, count: usize, ) -> Result<(), hbvm::mem::StoreError> { - println!("write: {:x} {}", addr.get(), count); + log::dbg!("write: {:x} {}", addr.get(), count); unsafe { core::ptr::copy(source, addr.get() as *mut u8, count) } Ok(()) } #[inline] unsafe fn prog_read(&mut self, addr: hbvm::mem::Address) -> T { - println!( + log::dbg!( "read-typed: {:x} {} {:?}", addr.get(), std::any::type_name::(), diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index 5c9d1748..625b92e3 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -11,86 +11,103 @@ impl Token { } } -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum TokenKind { - Ident, - Number, - LParen, - RParen, - LBrace, - RBrace, - LBrack, - RBrack, - Decl, - Assign, - Plus, - Minus, - Star, - FSlash, - Bor, - Or, - Le, - Eq, - Semi, - Colon, - Comma, - Return, - If, - Else, - Loop, - Break, - Continue, - Eof, - Error, +macro_rules! gen_token_kind { + ($( + #[$atts:meta])* + $vis:vis enum $name:ident { + #[patterns] $( + $pattern:ident, + )* + #[keywords] $( + $keyword:ident = $keyword_lit:literal, + )* + #[punkt] $( + $punkt:ident = $punkt_lit:literal, + )* + #[ops] $( + #[prec = $prec:literal] $( + $op:ident = $op_lit:literal, + )* + )* + } + ) => { + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let s = match *self { + $( Self::$pattern => concat!('<', stringify!($pattern), '>'), )* + + $( Self::$keyword => stringify!($keyword_lit), )* + $( Self::$punkt => stringify!($punkt_lit), )* + $($( Self::$op => $op_lit, )*)* + }; + f.write_str(s) + } + } + + impl $name { + #[inline(always)] + pub fn precedence(&self) -> Option { + Some(match self { + $($(Self::$op)|* => $prec,)* + _ => return None, + }) + } + + #[inline(always)] + fn from_ident(ident: &[u8]) -> Self { + match ident { + $($keyword_lit => Self::$keyword,)* + _ => Self::Ident, + } + } + } + + #[derive(Debug, PartialEq, Eq, Clone, Copy)] + $vis enum $name { + $( $pattern, )* + $( $keyword, )* + $( $punkt, )* + $($( $op, )*)* + } + }; } -impl std::fmt::Display for TokenKind { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use TokenKind as T; - let s = match self { - T::Ident => "", - T::Number => "", - T::LParen => "(", - T::RParen => ")", - T::LBrace => "{", - T::RBrace => "}", - T::LBrack => "[", - T::RBrack => "]", - T::Decl => ":=", - T::Assign => "=", - T::Plus => "+", - T::Minus => "-", - T::Star => "*", - T::FSlash => "/", - T::Bor => "|", - T::Or => "||", - T::Le => "<=", - T::Eq => "==", - T::Semi => ";", - T::Colon => ":", - T::Comma => ",", - T::Return => "return", - T::If => "if", - T::Else => "else", - T::Loop => "loop", - T::Break => "break", - T::Continue => "continue", - T::Eof => "", - T::Error => "", - }; - write!(f, "{}", s) - } -} - -impl TokenKind { - pub fn precedence(&self) -> Option { - Some(match self { - Self::Assign => 1, - Self::Le | Self::Eq => 21, - Self::Plus | Self::Minus => 23, - Self::Star | Self::FSlash => 24, - _ => return None, - }) +gen_token_kind! { + pub enum TokenKind { + #[patterns] + Ident, + Number, + Eof, + Error, + #[keywords] + Return = b"return", + If = b"if", + Else = b"else", + Loop = b"loop", + Break = b"break", + Continue = b"continue", + Fn = b"fn", + #[punkt] + LParen = b'(', + RParen = b')', + LBrace = b'{', + RBrace = b'}', + Semi = b';', + Colon = b':', + Comma = b',', + #[ops] + #[prec = 1] + Decl = ":=", + Assign = "=", + #[prec = 21] + Le = "<=", + Eq = "==", + #[prec = 23] + Plus = "+", + Minus = "-", + #[prec = 24] + Star = "*", + FSlash = "/", } } @@ -174,44 +191,23 @@ impl<'a> Iterator for Lexer<'a> { } let ident = &self.bytes[start as usize..self.pos as usize]; - match ident { - b"return" => T::Return, - b"if" => T::If, - b"else" => T::Else, - b"loop" => T::Loop, - b"break" => T::Break, - b"continue" => T::Continue, - _ => T::Ident, - } + T::from_ident(ident) } - b':' => match self.advance_if(b'=') { - true => T::Decl, - false => T::Colon, - }, + b':' if self.advance_if(b'=') => T::Decl, + b':' => T::Colon, b',' => T::Comma, b';' => T::Semi, - b'=' => match self.advance_if(b'=') { - true => T::Eq, - false => T::Assign, - }, - b'<' => match self.advance_if(b'=') { - true => T::Le, - false => T::Error, - }, + b'=' if self.advance_if(b'=') => T::Eq, + b'=' => T::Assign, + b'<' if self.advance_if(b'=') => T::Le, b'+' => T::Plus, b'-' => T::Minus, b'*' => T::Star, b'/' => T::FSlash, - b'|' => match self.advance_if(b'|') { - true => T::Or, - false => T::Bor, - }, b'(' => T::LParen, b')' => T::RParen, b'{' => T::LBrace, b'}' => T::RBrace, - b'[' => T::LBrack, - b']' => T::RBrack, _ => T::Error, }; diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 51b21e89..909fd2c0 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -1,4 +1,5 @@ #![feature(noop_waker)] +#![feature(macro_metavar_expr)] #![feature(let_chains)] #![feature(non_null_convenience)] #![allow(dead_code)] @@ -18,6 +19,7 @@ mod codegen; mod ident; mod instrs; mod lexer; +mod log; mod parser; mod tests; mod typechk; diff --git a/hblang/src/log.rs b/hblang/src/log.rs new file mode 100644 index 00000000..92b6b0fe --- /dev/null +++ b/hblang/src/log.rs @@ -0,0 +1,48 @@ +#![allow(unused_macros)] + +#[derive(PartialOrd, PartialEq, Ord, Eq, Debug)] +pub enum Level { + Err, + Wrn, + Inf, + Dbg, +} + +pub const LOG_LEVEL: Level = match option_env!("LOG_LEVEL") { + Some(val) => match val.as_bytes()[0] { + b'e' => Level::Err, + b'w' => Level::Wrn, + b'i' => Level::Inf, + b'd' => Level::Dbg, + _ => panic!("Invalid log level."), + }, + None => { + if cfg!(debug_assertions) { + Level::Dbg + } else { + Level::Err + } + } +}; + +macro_rules! log { + ($level:expr, $fmt:literal $($expr:tt)*) => { + if $level <= $crate::log::LOG_LEVEL { + println!("{:?}: {}", $level, format_args!($fmt $($expr)*)); + } + }; + + ($level:expr, $($arg:expr),*) => { + if $level <= $crate::log::LOG_LEVEL { + $(println!("[{}{}{}][{:?}]: {} = {:?}", line!(), column!(), file!(), $level, stringify!($arg), $arg);)* + } + }; +} + +macro_rules! err { ($($arg:tt)*) => { $crate::log::log!($crate::log::Level::Err, $($arg)*) }; } +macro_rules! wrn { ($($arg:tt)*) => { $crate::log::log!($crate::log::Level::Wrn, $($arg)*) }; } +macro_rules! inf { ($($arg:tt)*) => { $crate::log::log!($crate::log::Level::Inf, $($arg)*) }; } +macro_rules! dbg { ($($arg:tt)*) => { $crate::log::log!($crate::log::Level::Dbg, $($arg)*) }; } + +#[allow(unused_imports)] +pub(crate) use {dbg, err, inf, log, wrn}; diff --git a/hblang/src/parser.rs b/hblang/src/parser.rs index 23a652be..fa3b3197 100644 --- a/hblang/src/parser.rs +++ b/hblang/src/parser.rs @@ -70,69 +70,61 @@ impl<'a, 'b> Parser<'a, 'b> { } fn unit_expr(&mut self) -> Expr<'a> { + use {Expr as E, TokenKind as T}; let token = self.next(); let mut expr = match token.kind { - TokenKind::Ident => { - let name = self.arena.alloc_str(self.lexer.slice(token)); - if self.advance_if(TokenKind::Decl) { - let val = self.ptr_expr(); - Expr::Decl { name, val } - } else { - Expr::Ident { name } - } - } - TokenKind::If => { - let cond = self.ptr_expr(); - let then = self.ptr_expr(); - let else_ = self.advance_if(TokenKind::Else).then(|| self.ptr_expr()); - Expr::If { cond, then, else_ } - } - TokenKind::Loop => Expr::Loop { + T::Ident => E::Ident { + pos: token.start, + name: self.arena.alloc_str(self.lexer.slice(token)), + }, + T::If => E::If { + pos: token.start, + cond: self.ptr_expr(), + then: self.ptr_expr(), + else_: self.advance_if(T::Else).then(|| self.ptr_expr()), + }, + T::Loop => E::Loop { + pos: token.start, body: self.ptr_expr(), }, - TokenKind::Break => Expr::Break, - TokenKind::Continue => Expr::Continue, - TokenKind::Return => Expr::Return { - val: (self.token.kind != TokenKind::Semi).then(|| self.ptr_expr()), + T::Break => E::Break { pos: token.start }, + T::Continue => E::Continue { pos: token.start }, + T::Return => E::Return { + pos: token.start, + val: (self.token.kind != T::Semi).then(|| self.ptr_expr()), }, - TokenKind::Or => { - self.expect_advance(TokenKind::Colon); - let ret = self.ptr_expr(); - let body = self.ptr_expr(); - Expr::Closure { - ret, - body, - args: &[], - } - } - TokenKind::Bor => { - let args = self.collect(|s| { - s.advance_if(TokenKind::Bor).not().then(|| { - let name = s.expect_advance(TokenKind::Ident); + T::Fn => E::Closure { + pos: token.start, + args: { + self.expect_advance(T::LParen); + self.collect_list(T::Comma, T::RParen, |s| { + let name = s.expect_advance(T::Ident); let name = s.arena.alloc_str(s.lexer.slice(name)); - s.expect_advance(TokenKind::Colon); + s.expect_advance(T::Colon); let val = s.expr(); - s.advance_if(TokenKind::Comma); (name, val) }) - }); - self.expect_advance(TokenKind::Colon); - let ret = self.ptr_expr(); - let body = self.ptr_expr(); - Expr::Closure { args, ret, body } - } - TokenKind::LBrace => Expr::Block { - stmts: self.collect(|s| (!s.advance_if(TokenKind::RBrace)).then(|| s.expr())), + }, + ret: { + self.expect_advance(T::Colon); + self.ptr_expr() + }, + body: self.ptr_expr(), }, - TokenKind::Number => Expr::Number { + T::LBrace => E::Block { + pos: token.start, + stmts: self.collect_list(T::Semi, T::RBrace, Self::expr), + }, + T::Number => E::Number { + pos: token.start, value: match self.lexer.slice(token).parse() { Ok(value) => value, Err(e) => self.report(format_args!("invalid number: {e}")), }, }, - TokenKind::LParen => { + T::LParen => { let expr = self.expr(); - self.expect_advance(TokenKind::RParen); + self.expect_advance(T::RParen); expr } tok => self.report(format_args!("unexpected token: {tok:?}")), @@ -144,13 +136,7 @@ impl<'a, 'b> Parser<'a, 'b> { self.next(); Expr::Call { func: self.arena.alloc(expr), - args: self.collect(|s| { - s.advance_if(TokenKind::RParen).not().then(|| { - let arg = s.expr(); - s.advance_if(TokenKind::Comma); - arg - }) - }), + args: self.collect_list(TokenKind::Comma, TokenKind::RParen, Self::expr), } } _ => break, @@ -162,6 +148,21 @@ impl<'a, 'b> Parser<'a, 'b> { expr } + fn collect_list( + &mut self, + delim: TokenKind, + end: TokenKind, + mut f: impl FnMut(&mut Self) -> T, + ) -> &'a [T] { + self.collect(|s| { + s.advance_if(end).not().then(|| { + let val = f(s); + s.advance_if(delim); + val + }) + }) + } + fn collect(&mut self, mut f: impl FnMut(&mut Self) -> Option) -> &'a [T] { let vec = std::iter::from_fn(|| f(self)).collect::>(); self.arena.alloc_slice(&vec) @@ -195,13 +196,14 @@ impl<'a, 'b> Parser<'a, 'b> { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Expr<'a> { - Break, - Continue, - Decl { - name: &'a str, - val: &'a Expr<'a>, + Break { + pos: u32, + }, + Continue { + pos: u32, }, Closure { + pos: u32, args: &'a [(&'a str, Expr<'a>)], ret: &'a Expr<'a>, body: &'a Expr<'a>, @@ -211,15 +213,19 @@ pub enum Expr<'a> { args: &'a [Expr<'a>], }, Return { + pos: u32, val: Option<&'a Expr<'a>>, }, Ident { + pos: u32, name: &'a str, }, Block { + pos: u32, stmts: &'a [Expr<'a>], }, Number { + pos: u32, value: u64, }, BinOp { @@ -228,11 +234,13 @@ pub enum Expr<'a> { right: &'a Expr<'a>, }, If { + pos: u32, cond: &'a Expr<'a>, then: &'a Expr<'a>, else_: Option<&'a Expr<'a>>, }, Loop { + pos: u32, body: &'a Expr<'a>, }, } @@ -244,18 +252,21 @@ impl<'a> std::fmt::Display for Expr<'a> { } match *self { - Self::Break => write!(f, "break;"), - Self::Continue => write!(f, "continue;"), - Self::If { cond, then, else_ } => { + Self::Break { .. } => write!(f, "break;"), + Self::Continue { .. } => write!(f, "continue;"), + Self::If { + cond, then, else_, .. + } => { write!(f, "if {} {}", cond, then)?; if let Some(else_) = else_ { write!(f, " else {}", else_)?; } Ok(()) } - Self::Loop { body } => write!(f, "loop {}", body), - Self::Decl { name, val } => write!(f, "{} := {}", name, val), - Self::Closure { ret, body, args } => { + Self::Loop { body, .. } => write!(f, "loop {}", body), + Self::Closure { + ret, body, args, .. + } => { write!(f, "|")?; let first = &mut true; for (name, val) in args { @@ -277,10 +288,10 @@ impl<'a> std::fmt::Display for Expr<'a> { } write!(f, ")") } - Self::Return { val: Some(val) } => write!(f, "return {};", val), - Self::Return { val: None } => write!(f, "return;"), - Self::Ident { name } => write!(f, "{}", name), - Self::Block { stmts } => { + Self::Return { val: Some(val), .. } => write!(f, "return {};", val), + Self::Return { val: None, .. } => write!(f, "return;"), + Self::Ident { name, .. } => write!(f, "{}", name), + Self::Block { stmts, .. } => { writeln!(f, "{{")?; INDENT.with(|i| i.set(i.get() + 1)); let res = (|| { @@ -296,7 +307,7 @@ impl<'a> std::fmt::Display for Expr<'a> { write!(f, "}}")?; res } - Self::Number { value } => write!(f, "{}", value), + Self::Number { value, .. } => write!(f, "{}", value), Self::BinOp { left, right, op } => { let display_branch = |f: &mut std::fmt::Formatter, expr: &Self| { if let Self::BinOp { op: lop, .. } = expr diff --git a/hblang/test.bin b/hblang/test.bin index 47de0c8a127f15117a6e301c2ac7d9a2d37627c2..50f07108781966eba61495b681a727715fdea8b7 100644 GIT binary patch literal 660 zcmb_aF%H5o40Ixas!lurkuUHFl%;By7Iqe9p3M_b5(lnr9NGvrJVf!uKA-IjhAJXb z#~7cl**X(}Ojgkn=O1`jkIvqt=HMa6`nxIqAfjOL*RdO#(dup<4756th^;aAPgQV18 vnCH90`Y_+tFDp5zRD18N_SnNCBF?F82=3c1if%+TXp=~M80;?N|KHskgLcB- literal 1118 zcmb`Gy>f#v49Df%rN^O|(M z!py&*xo=$5AJq@*crJKP}u0gJMmF_&E9w+H~6b zkWM$O9NrLzPsHrLBYq>)&_5IDkMt^fpbwq6b=DgEH^u7S^PNyRS$UZo=IXyPS98Ez U%?)!kXUx@HGFNlV{PC~w4}e)hKmY&$ diff --git a/hblang/tests/hblang_lexer_tests_arithmetic.txt b/hblang/tests/hblang_lexer_tests_arithmetic.txt index e44cf7dc..6e255bf0 100644 --- a/hblang/tests/hblang_lexer_tests_arithmetic.txt +++ b/hblang/tests/hblang_lexer_tests_arithmetic.txt @@ -1,6 +1,8 @@ Ident "main" Decl ":=" -Or "||" +Fn "fn" +LParen "(" +RParen ")" Colon ":" Ident "int" LBrace "{" diff --git a/hblang/tests/hblang_lexer_tests_example.txt b/hblang/tests/hblang_lexer_tests_example.txt index 0d80f72c..7df9f67d 100644 --- a/hblang/tests/hblang_lexer_tests_example.txt +++ b/hblang/tests/hblang_lexer_tests_example.txt @@ -1,6 +1,8 @@ Ident "main" Decl ":=" -Or "||" +Fn "fn" +LParen "(" +RParen ")" Colon ":" Ident "int" LBrace "{" diff --git a/spec.md b/spec.md index df411699..86e14cde 100644 --- a/spec.md +++ b/spec.md @@ -50,7 +50,7 @@ of offset in the code. Not from the beginning of current or following instructio ## Rounding modes | Rounding mode | Value | -|:-------------------------|:------| +fn():------| | To nearest, ties to even | 0b00 | | Towards 0 (truncate) | 0b01 | | Towards +∞ (up) | 0b10 | @@ -89,7 +89,7 @@ Program counter stays on the currently executed instruction - Type `N` | Opcode | Mnemonic | Action | -|:-------|:---------|:--------------------------------------------| +fn():--------------------------------------------| | 0x00 | UN | Throw unreachable code exception | | 0x01 | TX | Terminate execution (eg. on end of program) | | 0x02 | NOP | Do nothing | @@ -100,7 +100,7 @@ Program counter stays on the currently executed instruction ## Addition (`+`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x03 | ADD8 | Xi8 | | 0x04 | ADD16 | Xi16 | | 0x05 | ADD32 | Xi32 | @@ -108,7 +108,7 @@ Program counter stays on the currently executed instruction ## Subtraction (`-`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x07 | SUB8 | Xi8 | | 0x08 | SUB16 | Xi16 | | 0x09 | SUB32 | Xi32 | @@ -116,7 +116,7 @@ Program counter stays on the currently executed instruction ## Multiplication (`*`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x0B | MUL8 | Xi8 | | 0x0C | MUL16 | Xi16 | | 0x0D | MUL32 | Xi32 | @@ -124,14 +124,14 @@ Program counter stays on the currently executed instruction ## Bitwise ops (type: Xi64) | Opcode | Mnemonic | Operation | -|:-------|:---------|:--------------------| +fn():--------------------| | 0x0F | AND | Conjunction (&) | | 0x10 | OR | Disjunction (\|) | | 0x11 | XOR | Non-equivalence (^) | ## Unsigned left bitshift (`<<`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x12 | SLU8 | Ui8 | | 0x13 | SLU16 | Ui16 | | 0x14 | SLU32 | Ui32 | @@ -139,7 +139,7 @@ Program counter stays on the currently executed instruction ## Unsigned right bitshift (`>>`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x16 | SRU8 | Ui8 | | 0x17 | SRU16 | Ui16 | | 0x18 | SRU32 | Ui32 | @@ -147,7 +147,7 @@ Program counter stays on the currently executed instruction ## Signed right bitshift (`>>`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x1A | SRS8 | Si8 | | 0x1B | SRS16 | Si16 | | 0x1C | SRS32 | Si32 | @@ -158,13 +158,13 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← #1 <=> #2` | Ordering | Number | -|:---------|:-------| +fn():-------| | < | -1 | | = | 0 | | > | 1 | | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x1E | CMPU | Ui64 | | 0x1F | CMPS | Si64 | @@ -179,7 +179,7 @@ Program counter stays on the currently executed instruction - `#1 ← #2` | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x20 | DIRU8 | Ui8 | | 0x21 | DIRU16 | Ui16 | | 0x22 | DIRU32 | Ui32 | @@ -194,7 +194,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← #1` | Opcode | Mnemonic | Operation | -|:-------|:---------|:-------------------------| +fn():-------------------------| | 0x28 | NEG | Bitwise complement (`~`) | | 0x29 | NOT | Logical negation (`!`) | @@ -202,7 +202,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← Si64(#1)` | Opcode | Mnemonic | Source type | -|:-------|:---------|:------------| +fn():------------| | 0x2A | SXT8 | Si8 | | 0x2B | SXT16 | Si16 | | 0x2C | SXT32 | Si32 | @@ -213,7 +213,7 @@ Program counter stays on the currently executed instruction ## Addition (`+`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x2D | ADDI8 | Xi8 | | 0x2E | ADDI16 | Xi16 | | 0x2F | ADDI32 | Xi32 | @@ -221,7 +221,7 @@ Program counter stays on the currently executed instruction ## Multiplication (`*`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x31 | MULI8 | Xi8 | | 0x32 | MULI16 | Xi16 | | 0x33 | MULI32 | Xi32 | @@ -229,7 +229,7 @@ Program counter stays on the currently executed instruction ## Bitwise ops (type: Xi64) | Opcode | Mnemonic | Operation | -|:-------|:---------|:--------------------| +fn():--------------------| | 0x35 | ANDI | Conjunction (&) | | 0x36 | ORI | Disjunction (\|) | | 0x37 | XORI | Non-equivalence (^) | @@ -240,7 +240,7 @@ Program counter stays on the currently executed instruction ## Unsigned left bitshift (`<<`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x38 | SLUI8 | Ui8 | | 0x39 | SLUI16 | Ui16 | | 0x3A | SLUI32 | Ui32 | @@ -248,7 +248,7 @@ Program counter stays on the currently executed instruction ## Unsigned right bitshift (`>>`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x3C | SRUI8 | Ui8 | | 0x3D | SRUI16 | Ui16 | | 0x3E | SRUI32 | Ui32 | @@ -256,7 +256,7 @@ Program counter stays on the currently executed instruction ## Signed right bitshift (`>>`) | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x40 | SRSI8 | Si8 | | 0x41 | SRSI16 | Si16 | | 0x42 | SRSI32 | Si32 | @@ -268,7 +268,7 @@ Program counter stays on the currently executed instruction - Comparsion table same for register-register one | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x44 | CMPUI | Ui64 | | 0x45 | CMPSI | Si64 | @@ -276,7 +276,7 @@ Program counter stays on the currently executed instruction - Type: `RR` | Opcode | Mnemonic | Operation | -|:-------|:---------|:---------------------------------| +fn():---------------------------------| | 0x46 | CP | Copy register value (`#0 ← #1`) | | 0x47 | SWA | Swap register values (`#0 ⇆ #1`) | @@ -286,7 +286,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← $1` | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x48 | LI8 | Xi8 | | 0x49 | LI16 | Xi16 | | 0x4A | Li32 | Xi32 | @@ -298,7 +298,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← pc + #1 + $2` | Opcode | Mnemonic | -|:-------|:---------| +fn():---------| | 0x4C | LRA | # Memory access operations @@ -313,7 +313,7 @@ Program counter stays on the currently executed instruction - Computes address from base register and absolute offset | Opcode | Mnemonic | Operation | -|:-------|:---------|:-------------------| +fn():-------------------| | 0x4D | LD | `#0 ← $3[#1 + $2]` | | 0x4E | ST | `$3[#1 + $2] ← #0` | @@ -322,7 +322,7 @@ Program counter stays on the currently executed instruction - Computes address from register and offset from program counter | Opcode | Mnemonic | Operation | -|:-------|:---------|:------------------------| +fn():------------------------| | 0x4F | LDR | `#0 ← $3[pc + #1 + $2]` | | 0x50 | STR | `$3[pc + #1 + $2] ← #0` | @@ -331,7 +331,7 @@ Program counter stays on the currently executed instruction - Copies block of `$3` bytes from memory location on address on `#0` to `#1` | Opcode | Mnemonic | Operation | -|:-------|:---------|:------------------| +fn():------------------| | 0x51 | BMC | `$3[#1] ← $3[x0]` | # Block register copy @@ -340,14 +340,14 @@ Program counter stays on the currently executed instruction - Copying over the 256 registers causes an exception | Opcode | Mnemonic | Operation | -|:-------|:---------|:--------------| +fn():--------------| | 0x52 | BRC | `$3#1 ← $3#0` | # Relative jump - Type: `O` | Opcode | Mnemonic | Operation | -|:-------|:---------|:---------------| +fn():---------------| | 0x53 | JMP | `pc ← pc + $0` | # Linking jump @@ -357,7 +357,7 @@ Program counter stays on the currently executed instruction - Jump to specified address | Opcode | Mnemonic | Instruction type | Address | -|:-------|:---------|:------------------|:-------------------------| +fn():-------------------------| | 0x54 | JAL | RRO (size = 6 B) | Relative, `pc + #1 + $2` | | 0x55 | JALA | RRA (size = 10 B) | Absolute, `#1 + $2` | @@ -367,7 +367,7 @@ Program counter stays on the currently executed instruction - Operation: `if #0 #1 { pc ← pc + $2 }` | Opcode | Mnemonic | Condition | Type | -|:-------|:---------|:-------------------|:-----| +fn():-----| | 0x56 | JEQ | Equals (`=`) | Xi64 | | 0x57 | JNE | Not-equals (`≠`) | Xi64 | | 0x58 | JLTU | Less-than (`<`) | Ui64 | @@ -380,7 +380,7 @@ Program counter stays on the currently executed instruction - Type: `N` | Opcode | Mnemonic | Trap type | -|:-------|:---------|:-----------------| +fn():-----------------| | 0x5C | ECA | Environment call | | 0x5D | EBP | Breakpoint | @@ -389,7 +389,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← #1 #2` | Opcode | Mnemonic | Operation | Type | -|:-------|:---------|:---------------------|:-----| +fn():-----| | 0x5E | FADD32 | Addition (`+`) | Fl32 | | 0x5F | FADD64 | Addition (`+`) | Fl64 | | 0x60 | FSUB32 | Subtraction (`-`) | Fl32 | @@ -404,7 +404,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← (#1 * #2) + #3` | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x66 | FMA32 | Fl32 | | 0x67 | FMA64 | Fl64 | @@ -415,7 +415,7 @@ Program counter stays on the currently executed instruction - NaN is less-than/greater-than depends on variant | Opcode | Mnemonic | Type | NaN is | -|:-------|:---------|:-----|:-------| +fn():-------| | 0x6A | FCMPLT32 | Fl32 | < | | 0x6B | FCMPLT64 | Fl64 | < | | 0x6C | FCMPGT32 | Fl32 | > | @@ -427,7 +427,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← Fl(#1)` | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x6E | ITF32 | Fl32 | | 0x6F | ITF64 | Fl64 | @@ -437,7 +437,7 @@ Program counter stays on the currently executed instruction - Immediate `$2` specifies rounding mode | Opcode | Mnemonic | Type | -|:-------|:---------|:-----| +fn():-----| | 0x70 | FTI32 | Fl32 | | 0x71 | FTI64 | Fl64 | @@ -446,7 +446,7 @@ Program counter stays on the currently executed instruction - Operation: `#0 ← Fl64(#1)` | Opcode | Mnemonic | -|:-------|:---------| +fn():---------| | 0x72 | FC32T64 | # Fl64 to Fl32 @@ -455,13 +455,13 @@ Program counter stays on the currently executed instruction - Immediate `$2` specified rounding mode | Opcode | Mnemonic | -|:-------|:---------| +fn():---------| | 0x73 | FC64T32 | # 16-bit relative address instruction variants | Opcode | Mnemonic | Type | Variant of | -|:-------|:---------|:-----|:-----------| +fn():-----------| | 0x74 | LRA16 | RRP | LRA | | 0x75 | LDR16 | RRPH | LDR | | 0x76 | STR16 | RRPH | STR | @@ -472,7 +472,7 @@ Program counter stays on the currently executed instruction - One byte is 8 bits | C Type | Description | Byte sizes | -|:------------|:-------------------------|:-----------| +fn():-----------| | char | Character / byte | 1 | | short | Short integer | 2 | | int | Integer | 4 | @@ -491,7 +491,7 @@ Program counter stays on the currently executed instruction - Registers r32 – r255 are callee saved | Register | Description | Saver | -|:-----------|:--------------------|:-------| +fn():-------| | r0 | Hard-wired zero | N/A | | r1 - r2 | Return values | Caller | | r2 - r11 | Function parameters | Caller |