From 9de631234dfe9aa13211a678e21453e96f7eda4d Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sun, 3 Nov 2024 10:15:03 +0100 Subject: [PATCH] adding unreachable --- lang/README.md | 25 +++++-- lang/src/fmt.rs | 1 + lang/src/lexer.rs | 2 + lang/src/lib.rs | 8 +-- lang/src/parser.rs | 5 ++ lang/src/son.rs | 92 +++++++++++++++++++------- lang/src/son/hbvm.rs | 13 +++- lang/tests/son_tests_die.txt | 5 ++ lang/tests/son_tests_generic_types.txt | 71 ++++++++++---------- 9 files changed, 152 insertions(+), 70 deletions(-) create mode 100644 lang/tests/son_tests_die.txt diff --git a/lang/README.md b/lang/README.md index ab999b12..eb117c0b 100644 --- a/lang/README.md +++ b/lang/README.md @@ -190,6 +190,11 @@ main := fn(): uint { if bar != null return 420 + g := @as(?^uint, null) + g = a + + _rd := *g + return d - *f.a } @@ -458,6 +463,16 @@ main := fn(): uint { } ``` +#### die +```hb +main := fn(): never { + // simply emmits 'un' instruction that immediately terminates the execution + // the expression evaluates into `never` type that can coerce into any other + // type + die +} +``` + ### Incomplete Examples #### comptime_pointers @@ -478,7 +493,7 @@ modify := fn($num: ^uint): void { MALLOC_SYS_CALL := 69 FREE_SYS_CALL := 96 -malloc := fn(size: uint, align: uint): ^void return @eca(MALLOC_SYS_CALL, size, align) +malloc := fn(size: uint, align: uint): ?^void return @eca(MALLOC_SYS_CALL, size, align) free := fn(ptr: ^void, size: uint, align: uint): void return @eca(FREE_SYS_CALL, ptr, size, align) Vec := fn($Elem: type): type { @@ -497,7 +512,7 @@ deinit := fn($Elem: type, vec: ^Vec(Elem)): void { return } -push := fn($Elem: type, vec: ^Vec(Elem), value: Elem): ^Elem { +push := fn($Elem: type, vec: ^Vec(Elem), value: Elem): ?^Elem { if vec.len == vec.cap { if vec.cap == 0 { vec.cap = 1 @@ -505,11 +520,11 @@ push := fn($Elem: type, vec: ^Vec(Elem), value: Elem): ^Elem { vec.cap *= 2 } - new_alloc := @as(^Elem, @bitcast(malloc(vec.cap * @sizeof(Elem), @alignof(Elem)))) - if new_alloc == 0 return @bitcast(0) + new_alloc := @as(?^Elem, @bitcast(malloc(vec.cap * @sizeof(Elem), @alignof(Elem)))) + if new_alloc == null return null src_cursor := vec.data - dst_cursor := new_alloc + dst_cursor := @as(^Elem, new_alloc) end := vec.data + vec.len loop if src_cursor == end break else { diff --git a/lang/src/fmt.rs b/lang/src/fmt.rs index fb371f71..57a2dcb5 100644 --- a/lang/src/fmt.rs +++ b/lang/src/fmt.rs @@ -371,6 +371,7 @@ impl<'a> Formatter<'a> { } Expr::Bool { value, .. } => f.write_str(if value { "true" } else { "false" }), Expr::Idk { .. } => f.write_str("idk"), + Expr::Die { .. } => f.write_str("die"), Expr::Null { .. } => f.write_str("null"), Expr::BinOp { left, diff --git a/lang/src/lexer.rs b/lang/src/lexer.rs index 674ab2da..99963571 100644 --- a/lang/src/lexer.rs +++ b/lang/src/lexer.rs @@ -134,6 +134,7 @@ pub enum TokenKind { False, Null, Idk, + Die, Ctor, Tupl, @@ -306,6 +307,7 @@ gen_token_kind! { False = b"false", Null = b"null", Idk = b"idk", + Die = b"die", #[punkt] Ctor = ".{", Tupl = ".(", diff --git a/lang/src/lib.rs b/lang/src/lib.rs index a06763a4..5d03b584 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -1023,16 +1023,14 @@ trait TypeParser { let Some((Expr::BinOp { left, right, .. }, name)) = f.find_decl(id) else { return match id { - Ok(name) => { - let name = files[from_file as usize].ident_str(name); - self.report(from_file, pos, format_args!("undefined indentifier: {name}")) - } + Ok(_) => ty::Id::NEVER, Err("main") => self.report( from_file, pos, format_args!( "missing main function in '{}', compiler can't \ - emmit libraries since such concept is not defined", + emmit libraries since such concept is not defined \ + (minimal main function: `main := fn(): void {{}}`)", f.path ), ), diff --git a/lang/src/parser.rs b/lang/src/parser.rs index 998e7e38..e8c9e571 100644 --- a/lang/src/parser.rs +++ b/lang/src/parser.rs @@ -337,6 +337,7 @@ impl<'a, 'b> Parser<'a, 'b> { T::False => E::Bool { pos, value: false }, T::Null => E::Null { pos }, T::Idk => E::Idk { pos }, + T::Die => E::Die { pos }, T::DQuote => E::String { pos, literal: self.tok_str(token) }, T::Packed => { self.packed = true; @@ -903,6 +904,10 @@ generate_expr! { Idk { pos: Pos, }, + /// `'die'` + Die { + pos: Pos, + }, /// `'@' Ident List('(', ',', ')', Expr)` Directive { pos: Pos, diff --git a/lang/src/son.rs b/lang/src/son.rs index 5d01c3db..fb8617f4 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -138,7 +138,7 @@ impl Nodes { } depth } - Kind::Start | Kind::End => 1, + Kind::Start | Kind::End | Kind::Die => 1, u => unreachable!("{u:?}"), }; @@ -805,6 +805,12 @@ impl Nodes { if let K::CInt { value } = self[rhs].kind { match (op, value) { + (T::Eq, 0) if self[lhs].ty.is_pointer() || self[lhs].kind == Kind::Stck => { + return Some(self.new_const(ty::Id::BOOL, 0)); + } + (T::Ne, 0) if self[lhs].ty.is_pointer() || self[lhs].kind == Kind::Stck => { + return Some(self.new_const(ty::Id::BOOL, 1)); + } (T::Add | T::Sub | T::Shl, 0) | (T::Mul | T::Div, 1) => return Some(lhs), (T::Mul, 0) => return Some(rhs), _ => {} @@ -1294,6 +1300,7 @@ impl Nodes { Kind::If => write!(out, " if: "), Kind::Region | Kind::Loop => writeln!(out, " goto: {node}"), Kind::Return => write!(out, " ret: "), + Kind::Die => write!(out, " die: "), Kind::CInt { value } => write!(out, "cint: #{value:<4}"), Kind::Phi => write!(out, " phi: "), Kind::Arg => write!( @@ -1380,7 +1387,7 @@ impl Nodes { } node = cfg_index; } - Kind::Return => { + Kind::Return | Kind::Die => { node = self[node].outputs[0]; } Kind::Then | Kind::Else | Kind::Entry => { @@ -1578,6 +1585,8 @@ pub enum Kind { // [ctrl, ?value] Return, // [ctrl] + Die, + // [ctrl] CInt { value: i64, }, @@ -1620,6 +1629,7 @@ impl Kind { Self::Start | Self::End | Self::Return + | Self::Die | Self::Entry | Self::Then | Self::Else @@ -1631,7 +1641,7 @@ impl Kind { } fn ends_basic_block(&self) -> bool { - matches!(self, Self::Return | Self::If | Self::End) + matches!(self, Self::Return | Self::If | Self::End | Self::Die) } fn is_peeped(&self) -> bool { @@ -2478,6 +2488,16 @@ impl<'a> Codegen<'a> { None } + Expr::Die { .. } => { + self.ci.ctrl.set( + self.ci.nodes.new_node_nop(ty::Id::VOID, Kind::Die, [self.ci.ctrl.get()]), + &mut self.ci.nodes, + ); + + self.ci.nodes[NEVER].inputs.push(self.ci.ctrl.get()); + self.ci.nodes[self.ci.ctrl.get()].outputs.push(NEVER); + None + } Expr::Field { target, name, pos } => { let mut vtarget = self.raw_expr(target)?; self.strip_var(&mut vtarget); @@ -2553,20 +2573,20 @@ impl<'a> Codegen<'a> { } Expr::UnOp { op: TokenKind::Mul, val, pos } => { let ctx = Ctx { ty: ctx.ty.map(|ty| self.tys.make_ptr(ty)) }; - let mut val = self.expr_ctx(val, ctx)?; + let mut vl = self.expr_ctx(val, ctx)?; - self.unwrap_opt(pos, &mut val); + self.unwrap_opt(val.pos(), &mut vl); - let Some(base) = self.tys.base_of(val.ty) else { + let Some(base) = self.tys.base_of(vl.ty) else { self.report( pos, - fa!("the '{}' can not be dereferneced", self.ty_display(val.ty)), + fa!("the '{}' can not be dereferneced", self.ty_display(vl.ty)), ); return Value::NEVER; }; - val.ptr = true; - val.ty = base; - Some(val) + vl.ptr = true; + vl.ty = base; + Some(vl) } Expr::UnOp { pos, op: op @ TokenKind::Sub, val } => { let val = @@ -2784,6 +2804,25 @@ impl<'a> Codegen<'a> { val.ty = ty; Some(val) } + Expr::Directive { name: "unwrap", args: [expr], .. } => { + let mut val = self.raw_expr(expr)?; + self.strip_var(&mut val); + + let Some(ty) = self.tys.inner_of(val.ty) else { + self.report( + expr.pos(), + fa!( + "only optional types can be unwrapped ('{}' is not optional)", + self.ty_display(val.ty) + ), + ); + return Value::NEVER; + }; + + self.unwrap_opt_unchecked(ty, val.ty, &mut val); + val.ty = ty; + Some(val) + } Expr::Directive { name: "intcast", args: [expr], pos } => { let mut val = self.expr(expr)?; @@ -4063,18 +4102,7 @@ impl<'a> Codegen<'a> { let oty = mem::replace(&mut opt.ty, ty); match ctrl_ty { ty::Id::LEFT_UNREACHABLE => { - if self.tys.nieche_of(ty).is_some() { - return; - } - - let OptLayout { payload_offset, .. } = self.tys.opt_layout(ty); - - match oty.loc(self.tys) { - Loc::Reg => {} - Loc::Stack => { - opt.id = self.offset(opt.id, payload_offset); - } - } + self.unwrap_opt_unchecked(ty, oty, opt); } ty::Id::RIGHT_UNREACHABLE => { self.report(pos, "the value is always null, some checks might need to be inverted"); @@ -4083,12 +4111,29 @@ impl<'a> Codegen<'a> { self.report( pos, "can't prove the value is not 'null', \ - there is not nice syntax for bypassing this, sorry", + use '@unwrap()' if you believe compiler is stupid, \ + or explicitly check for null and handle it \ + ('if == null { /* handle */ } else { /* use opt */ }')", ); } } } + fn unwrap_opt_unchecked(&mut self, ty: ty::Id, oty: ty::Id, opt: &mut Value) { + if self.tys.nieche_of(ty).is_some() { + return; + } + + let OptLayout { payload_offset, .. } = self.tys.opt_layout(ty); + + match oty.loc(self.tys) { + Loc::Reg => {} + Loc::Stack => { + opt.id = self.offset(opt.id, payload_offset); + } + } + } + fn gen_null_check(&mut self, mut cmped: Value, ty: ty::Id, op: TokenKind) -> Nid { let OptLayout { flag_ty, flag_offset, .. } = self.tys.opt_layout(ty); @@ -4335,6 +4380,7 @@ mod tests { inline; idk; generic_functions; + die; // Incomplete Examples; //comptime_pointers; diff --git a/lang/src/son/hbvm.rs b/lang/src/son/hbvm.rs index cdb38236..4411cd4c 100644 --- a/lang/src/son/hbvm.rs +++ b/lang/src/son/hbvm.rs @@ -308,6 +308,9 @@ impl ItemCtx { self.emit(instrs::jmp(0)); } } + Kind::Die => { + self.emit(instrs::un()); + } Kind::CInt { value } if node.ty.is_float() => { self.emit(match node.ty { ty::Id::F32 => instrs::li32( @@ -617,7 +620,9 @@ impl ItemCtx { self.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, (pushed + stack) as _)); } self.relocs.iter_mut().for_each(|r| r.reloc.offset -= stripped_prelude_size as u32); - self.emit(instrs::jala(reg::ZERO, reg::RET_ADDR, 0)); + if sig.ret != ty::Id::NEVER { + self.emit(instrs::jala(reg::ZERO, reg::RET_ADDR, 0)); + } } } @@ -822,6 +827,10 @@ impl<'a> Function<'a> { self.add_instr(nid, ops); self.emit_node(node.outputs[0], nid); } + Kind::Die => { + self.add_instr(nid, vec![]); + self.emit_node(node.outputs[0], nid); + } Kind::CInt { .. } if node.outputs.iter().all(|&o| { let ond = &self.nodes[o]; @@ -1167,7 +1176,7 @@ impl regalloc2::Function for Function<'_> { } fn is_ret(&self, insn: regalloc2::Inst) -> bool { - self.nodes[self.instrs[insn.index()].nid].kind == Kind::Return + matches!(self.nodes[self.instrs[insn.index()].nid].kind, Kind::Return | Kind::Die) } fn is_branch(&self, insn: regalloc2::Inst) -> bool { diff --git a/lang/tests/son_tests_die.txt b/lang/tests/son_tests_die.txt new file mode 100644 index 00000000..253b7cdc --- /dev/null +++ b/lang/tests/son_tests_die.txt @@ -0,0 +1,5 @@ +main: + UN +code size: 9 +ret: 0 +status: Err(Unreachable) diff --git a/lang/tests/son_tests_generic_types.txt b/lang/tests/son_tests_generic_types.txt index 76612bf4..755dcb17 100644 --- a/lang/tests/son_tests_generic_types.txt +++ b/lang/tests/son_tests_generic_types.txt @@ -75,52 +75,53 @@ push: MUL64 r2, r36, r37 CP r3, r37 JAL r31, r0, :malloc - CP r38, r34 - ST r36, r38, 16a, 8h - JNE r1, r35, :3 - CP r1, r35 + CP r38, r1 + CP r39, r34 + ST r36, r39, 16a, 8h + LI64 r1, 0d + CP r7, r38 + JNE r7, r1, :3 JMP :4 - 3: CP r39, r1 - CP r1, r35 - LD r6, r38, 8a, 8h - MULI64 r8, r6, 8d - LD r12, r38, 0a, 8h - ADD64 r11, r12, r8 - CP r3, r39 - 9: LD r2, r38, 0a, 8h - LD r8, r38, 8a, 8h - JNE r11, r12, :5 - JEQ r8, r1, :6 + 3: CP r38, r7 + LD r8, r39, 8a, 8h + MULI64 r10, r8, 8d + LD r3, r39, 0a, 8h + ADD64 r7, r3, r10 + CP r5, r38 + 9: LD r2, r39, 0a, 8h + LD r10, r39, 8a, 8h + JNE r7, r3, :5 + JEQ r10, r35, :6 CP r4, r37 - MUL64 r3, r8, r4 + MUL64 r3, r10, r4 JAL r31, r0, :free - CP r5, r39 + CP r6, r38 JMP :7 - 6: CP r5, r39 - 7: ST r5, r38, 0a, 8h + 6: CP r6, r38 + 7: ST r6, r39, 0a, 8h JMP :8 5: CP r4, r37 - CP r5, r39 - ADDI64 r6, r3, 8d - ADDI64 r7, r12, 8d - LD r8, r12, 0a, 8h - ST r8, r3, 0a, 8h - CP r3, r6 - CP r12, r7 + CP r6, r38 + ADDI64 r8, r5, 8d + ADDI64 r9, r3, 8d + LD r10, r3, 0a, 8h + ST r10, r5, 0a, 8h + CP r3, r9 + CP r5, r8 JMP :9 - 0: CP r38, r34 - 8: LD r3, r38, 8a, 8h - MULI64 r5, r3, 8d - LD r4, r38, 0a, 8h - ADD64 r1, r4, r5 + 0: CP r39, r34 + 8: LD r5, r39, 8a, 8h + MULI64 r7, r5, 8d + LD r6, r39, 0a, 8h + ADD64 r1, r6, r7 CP r3, r32 ST r3, r1, 0a, 8h - LD r11, r38, 8a, 8h - ADD64 r2, r11, r33 - ST r2, r38, 8a, 8h + LD r2, r39, 8a, 8h + ADD64 r3, r2, r33 + ST r3, r39, 8a, 8h 4: LD r31, r254, 0a, 72h ADDI64 r254, r254, 72d JALA r0, r31, 0a -code size: 945 +code size: 955 ret: 69 status: Ok(())