From af19f4e30db9c89d3b97c958e1c162860f47efc8 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sat, 21 Dec 2024 14:21:58 +0100 Subject: [PATCH] making the identifiers accessible if they are captured Signed-off-by: Jakub Doka --- depell/src/static-pages/developing-hblang.md | 4 +- lang/README.md | 19 +++++ lang/src/parser.rs | 34 +++++--- lang/src/son.rs | 84 +++++++++++++------ lang/src/ty.rs | 8 +- .../son_tests_proper_ident_propagation.txt | 6 ++ 6 files changed, 115 insertions(+), 40 deletions(-) create mode 100644 lang/tests/son_tests_proper_ident_propagation.txt diff --git a/depell/src/static-pages/developing-hblang.md b/depell/src/static-pages/developing-hblang.md index 15b45700..0e2022f5 100644 --- a/depell/src/static-pages/developing-hblang.md +++ b/depell/src/static-pages/developing-hblang.md @@ -1,4 +1,4 @@ -# The journey to optimizing compiler +# The journey to an optimizing compiler It's been years since I was continuously trying to make a compiler to implement language of my dreams. Problem was tho that I wanted something similar to Rust, which if you did not know, `rustc` far exceeded the one million lines of code mark some time ago, so implementing such language would take me years if not decades, but I still tired it. @@ -58,4 +58,4 @@ Its stupid but its the world we live in, optimizers are usually a black box you But wait its worse! Since optimizers wont ever share the fact you are stupid, we end up with other people painstakingly writing complex linters, that will do a shitty job detecting things that matter, and instead whine about style and other bullcrap (and they suck even at that). If the people who write linters and people who write optimizers swapped the roles, I would be ranting about optimizers instead. -And so, this is the area where I want to innovate, lets report the dead code to the frontend, and let the compiler frontend filter out the noise and show relevant information in the diagnostics. Refuse to compile the program if you `i /= 0`. Refuse to compile if you `arr[arr.len]`. This is the level of stupid optimizer sees, once it normalizes your code, but proceeds to protect your feeling. And hblang will relay this to you as much as possible. If we can query for optimizations, we can query for bugs too. +And so, this is the area where I want to innovate, lets report the dead code to the frontend, and let the compiler frontend filter out the noise and show relevant information in the diagnostics. Refuse to compile the program if you `i /= 0`. Refuse to compile if you `arr[arr.len]`. This is the level of stupid optimizer sees, once it normalizes your code, but proceeds to protect your feelings. My goal so for hblang to relay this to you as much as possible. If we can query for optimizations, we can query for bugs too. diff --git a/lang/README.md b/lang/README.md index e12d0625..cf3e655b 100644 --- a/lang/README.md +++ b/lang/README.md @@ -774,6 +774,25 @@ main := fn(): uint { ### Purely Testing Examples +#### proper_ident_propagation +```hb +A := fn($T: type): type return struct {a: T} + +$make_a := fn(a: @Any()): A(@TypeOf(a)) { + return .(a) +} + +$make_b := fn(a: @Any()): struct {b: @TypeOf(a)} { + return .(a) +} + +main := fn(): uint { + a := make_a(100) + b := make_b(100) + return a.a - b.b +} +``` + #### method_receiver_by_value ```hb $log := fn(ptr: ^u8): void return @eca(37, ptr) diff --git a/lang/src/parser.rs b/lang/src/parser.rs index 38371ab6..56e13f71 100644 --- a/lang/src/parser.rs +++ b/lang/src/parser.rs @@ -80,6 +80,7 @@ struct ScopeIdent { declared: bool, ordered: bool, used: bool, + is_ct: bool, flags: IdentFlags, } @@ -196,8 +197,8 @@ impl<'a, 'b> Parser<'a, 'b> { fn declare_rec(&mut self, expr: &Expr, top_level: bool) { match *expr { - Expr::Ident { pos, id, is_first, .. } => { - self.declare(pos, id, !top_level, is_first || top_level) + Expr::Ident { pos, id, is_first, is_ct, .. } => { + self.declare(pos, id, !top_level, is_first || top_level, is_ct) } Expr::Ctor { fields, .. } => { for CtorField { value, .. } in fields { @@ -208,7 +209,7 @@ impl<'a, 'b> Parser<'a, 'b> { } } - fn declare(&mut self, pos: Pos, id: Ident, ordered: bool, valid_order: bool) { + fn declare(&mut self, pos: Pos, id: Ident, ordered: bool, valid_order: bool, is_ct: bool) { if !valid_order { self.report( pos, @@ -230,7 +231,7 @@ impl<'a, 'b> Parser<'a, 'b> { ); return; } - + self.ctx.idents[index].is_ct = is_ct; self.ctx.idents[index].ordered = ordered; } @@ -267,6 +268,7 @@ impl<'a, 'b> Parser<'a, 'b> { declared: false, used: false, ordered: false, + is_ct: false, flags: 0, }); (self.ctx.idents.len() - 1, self.ctx.idents.last_mut().unwrap(), true) @@ -276,7 +278,7 @@ impl<'a, 'b> Parser<'a, 'b> { id.flags |= idfl::COMPTIME * is_ct as u32; if id.declared && id.ordered && self.ns_bound > i { id.flags |= idfl::COMPTIME; - self.ctx.captured.push(id.ident); + self.ctx.captured.push(CapturedIdent { id: id.ident, is_ct: id.is_ct }); } (id.ident, bl) @@ -472,7 +474,7 @@ impl<'a, 'b> Parser<'a, 'b> { self.collect_list(T::Comma, T::RParen, |s| { let name = s.advance_ident()?; let (id, _) = s.resolve_ident(name); - s.declare(name.start, id, true, true); + s.declare(name.start, id, true, true, name.kind == T::CtIdent); s.expect_advance(T::Colon)?; Some(Arg { pos: name.start, @@ -666,7 +668,11 @@ impl<'a, 'b> Parser<'a, 'b> { })) } - fn collect_captures(&mut self, prev_captured: usize, prev_boundary: usize) -> &'a [Ident] { + fn collect_captures( + &mut self, + prev_captured: usize, + prev_boundary: usize, + ) -> &'a [CapturedIdent] { self.ns_bound = prev_boundary; let captured = &mut self.ctx.captured[prev_captured..]; crate::quad_sort(captured, core::cmp::Ord::cmp); @@ -1029,7 +1035,7 @@ generate_expr! { Struct { pos: Pos, fields: FieldList<'a, StructField<'a>>, - captured: &'a [Ident], + captured: &'a [CapturedIdent], trailing_comma: bool, packed: bool, }, @@ -1037,14 +1043,14 @@ generate_expr! { Union { pos: Pos, fields: FieldList<'a, UnionField<'a>>, - captured: &'a [Ident], + captured: &'a [CapturedIdent], trailing_comma: bool, }, /// `'enum' LIST('{', ',', '}', Ident)` Enum { pos: Pos, variants: FieldList<'a, EnumField<'a>>, - captured: &'a [Ident], + captured: &'a [CapturedIdent], trailing_comma: bool, }, /// `[Expr] LIST('.{', ',', '}', Ident [':' Expr])` @@ -1118,6 +1124,12 @@ generate_expr! { } } +#[derive(Clone, Copy, PartialEq, Eq, Debug, PartialOrd, Ord)] +pub struct CapturedIdent { + pub id: Ident, + pub is_ct: bool, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ListKind { Tuple, @@ -1320,7 +1332,7 @@ pub struct Ctx { symbols: Vec, stack: StackAlloc, idents: Vec, - captured: Vec, + captured: Vec, } impl Ctx { diff --git a/lang/src/son.rs b/lang/src/son.rs index 862166d6..1c933744 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -11,7 +11,8 @@ use { parser::{ self, idfl::{self}, - CommentOr, CtorField, DeclId, Expr, ExprRef, FieldList, ListKind, MatchBranch, Pos, + CapturedIdent, CommentOr, CtorField, DeclId, Expr, ExprRef, FieldList, ListKind, + MatchBranch, Pos, }, ty::{ self, Arg, ArrayLen, CompState, ConstData, EnumData, EnumField, FTask, FuncData, @@ -30,6 +31,7 @@ use { format_args as fa, mem, }, hbbytecode::DisasmError, + std::panic, }; const DEFAULT_ACLASS: usize = 0; @@ -841,6 +843,31 @@ impl<'a> Codegen<'a> { Some(Value::var(index).ty(var.ty)) } + Expr::Ident { id, .. } + if let Some(vl) = { + let mut piter = self.ci.parent; + let f = self.file(); + loop { + if let Some((captures, capture_tuple)) = self.tys.captures_of(piter, f) + && let Some(idx) = captures.iter().position(|&cid| cid.id == id) + { + let ty = if captures[idx].is_ct { + ty::Id::TYPE + } else { + self.tys.ins.args[capture_tuple.range().start + idx] + }; + break Some(Value::new(NEVER).ty(ty)); + } + + piter = match self.tys.parent_of(piter) { + Some(p) => p, + None => break None, + }; + } + } => + { + Some(vl) + } Expr::Ident { id, pos, .. } => self.find_type_as_value(pos, self.ci.parent, id, ctx), Expr::Comment { .. } => Some(Value::VOID), Expr::Char { pos, literal } | Expr::String { pos, literal } => { @@ -3053,7 +3080,7 @@ impl<'a> Codegen<'a> { self.tys.tmp.args.push(ty); let sym = parser::find_symbol(&fast.symbols, carg.id); - let ty = if ty == ty::Id::ANY_TYPE { + if ty == ty::Id::ANY_TYPE { let ty = self.infer_type(arg); *self.tys.tmp.args.last_mut().unwrap() = ty; self.ci.scope.vars.push(Variable::new( @@ -3063,10 +3090,14 @@ impl<'a> Codegen<'a> { NEVER, &mut self.ci.nodes, )); - continue; } else if sym.flags & idfl::COMPTIME == 0 { - // FIXME: could fuck us - continue; + self.ci.scope.vars.push(Variable::new( + carg.id, + ty, + false, + NEVER, + &mut self.ci.nodes, + )); } else { if ty != ty::Id::TYPE { self.error( @@ -3082,16 +3113,14 @@ impl<'a> Codegen<'a> { } let ty = self.ty(arg); self.tys.tmp.args.push(ty); - ty - }; - - self.ci.scope.vars.push(Variable::new( - carg.id, - ty::Id::TYPE, - false, - self.ci.nodes.new_const(ty::Id::TYPE, ty), - &mut self.ci.nodes, - )); + self.ci.scope.vars.push(Variable::new( + carg.id, + ty::Id::TYPE, + false, + self.ci.nodes.new_const(ty::Id::TYPE, ty), + &mut self.ci.nodes, + )); + } } let Some(args) = self.tys.pack_args(arg_base) else { @@ -3806,13 +3835,13 @@ impl<'a> Codegen<'a> { ty::Id::NEVER } - fn find_local_ty(&mut self, ident: Ident) -> Option { + fn find_local_ty(&mut self, ident: CapturedIdent) -> Option { self.ci .scope .vars .iter() - .rfind(|v| (v.id == ident && v.ty == ty::Id::TYPE)) - .map(|v| self.ci.nodes.as_ty(v.value())) + .rfind(|v| v.id == ident.id && (!ident.is_ct || v.ty == ty::Id::TYPE)) + .map(|v| if ident.is_ct { self.ci.nodes.as_ty(v.value()) } else { v.ty }) } fn find_type_as_value( @@ -3847,7 +3876,7 @@ impl<'a> Codegen<'a> { }; let ty = if let DeclId::Ident(id) = id - && let Some(ty) = self.find_local_ty(id) + && let Some(ty) = self.find_local_ty(CapturedIdent { id, is_ct: true }) { ty } else if let DeclId::Ident(id) = id @@ -3867,7 +3896,8 @@ impl<'a> Codegen<'a> { } if let Some((captures, capture_tuple)) = self.tys.captures_of(piter, f) - && let Some(idx) = captures.iter().position(|&cid| DeclId::Ident(cid) == id) + && let Some(idx) = + captures.iter().position(|&cid| cid.is_ct && DeclId::Ident(cid.id) == id) { return self.tys.ins.args[capture_tuple.range().start + idx]; } @@ -3875,9 +3905,6 @@ impl<'a> Codegen<'a> { piter = match self.tys.parent_of(piter) { Some(p) => p, None => { - if let ty::Kind::Struct(_) = piter.expand() { - panic!(); - } break None; } }; @@ -4135,7 +4162,13 @@ impl<'a> Codegen<'a> { self.eval_global(sc.file, sc.parent, name, expr) } _ if sc.alloc_const => { - ty::Id::from(self.eval_const(sc.file, sc.parent, expr, ty::Id::TYPE)) + let prev_file = mem::replace(&mut self.ci.file, sc.file); + let prev_parent = mem::replace(&mut self.ci.parent, sc.parent); + let id = self.expr(expr).unwrap().id; + self.ci.file = prev_file; + self.ci.parent = prev_parent; + + self.ci.nodes.as_ty(id) } ref e => { self.error_unhandled_ast(e, "bruh"); @@ -4149,7 +4182,7 @@ impl<'a> Codegen<'a> { &mut self, pos: Pos, expr: &Expr, - captured: &[Ident], + captured: &[CapturedIdent], fields: FieldList, sc: TyScope, get_fields: impl Fn(&mut Types) -> [&mut Vec; 2], @@ -4314,6 +4347,7 @@ mod tests { fb_driver; // Purely Testing Examples; + proper_ident_propagation; method_receiver_by_value; comparing_floating_points; pointer_comparison; diff --git a/lang/src/ty.rs b/lang/src/ty.rs index 91bfa359..d919d42a 100644 --- a/lang/src/ty.rs +++ b/lang/src/ty.rs @@ -2,7 +2,7 @@ use { crate::{ ctx_map, lexer::TokenKind, - parser::{self, CommentOr, Expr, ExprRef, Pos}, + parser::{self, CapturedIdent, CommentOr, Expr, ExprRef, Pos}, utils::{self, Ent, EntSlice, EntVec}, Ident, }, @@ -1243,7 +1243,11 @@ impl Types { self.type_base_of(ty).map(|b| b.parent) } - pub fn captures_of<'a>(&self, ty: Id, file: &'a parser::Ast) -> Option<(&'a [Ident], List)> { + pub fn captures_of<'a>( + &self, + ty: Id, + file: &'a parser::Ast, + ) -> Option<(&'a [CapturedIdent], List)> { let base = self.type_base_of(ty)?; let (Expr::Struct { captured, .. } diff --git a/lang/tests/son_tests_proper_ident_propagation.txt b/lang/tests/son_tests_proper_ident_propagation.txt new file mode 100644 index 00000000..5aa16aa1 --- /dev/null +++ b/lang/tests/son_tests_proper_ident_propagation.txt @@ -0,0 +1,6 @@ +main: + CP r1, r0 + JALA r0, r31, 0a +code size: 22 +ret: 0 +status: Ok(())