From b2be007ef0c5d9b8d8f8111f1ff868698af589a4 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Mon, 16 Dec 2024 13:20:47 +0100 Subject: [PATCH] adding unrolled loops, struct indexing and `@len` directive Signed-off-by: Jakub Doka --- lang/README.md | 16 +- lang/src/fmt.rs | 8 +- lang/src/lexer.rs | 69 ++++-- lang/src/parser.rs | 4 +- lang/src/son.rs | 310 +++++++++++++++++------- lang/src/ty.rs | 8 + lang/tests/son_tests_unrolled_loops.txt | 6 + 7 files changed, 304 insertions(+), 117 deletions(-) create mode 100644 lang/tests/son_tests_unrolled_loops.txt diff --git a/lang/README.md b/lang/README.md index 97e592e65..14a4f0cf5 100644 --- a/lang/README.md +++ b/lang/README.md @@ -568,7 +568,6 @@ main := fn(): uint { return big_array[42] } ``` -note: this does not work on scalar values #### generic_functions ```hb @@ -678,6 +677,21 @@ main := fn(): uint { } ``` +#### unrolled_loops +```hb +Nums := struct {a: uint, b: u32, c: u16, d: u8} + +main := fn(): uint { + nums := Nums.(1, 2, 3, 4) + i := 0 + sum := 0 + $loop if i == @len(Nums) break else { + sum += nums[i] + i += 1 + } + return sum - 10 +} +``` ### Incomplete Examples diff --git a/lang/src/fmt.rs b/lang/src/fmt.rs index 210197b1b..9ca9c45f7 100644 --- a/lang/src/fmt.rs +++ b/lang/src/fmt.rs @@ -70,7 +70,7 @@ fn token_group(kind: TokenKind) -> TokenGroup { | ShrAss | ShlAss => TG::Assign, DQuote | Quote => TG::String, Slf | Defer | Return | If | Else | Loop | Break | Continue | Fn | Idk | Die | Struct - | Packed | True | False | Null | Match | Enum | Union => TG::Keyword, + | Packed | True | False | Null | Match | Enum | Union | CtLoop => TG::Keyword, } } @@ -107,7 +107,7 @@ pub fn minify(source: &mut str) -> usize { let mut token = lexer::Lexer::new(reader).eat(); match token.kind { TokenKind::Eof => break, - TokenKind::CtIdent | TokenKind::Directive => token.start -= 1, + TokenKind::CtIdent | TokenKind::CtLoop | TokenKind::Directive => token.start -= 1, _ => {} } @@ -448,8 +448,8 @@ impl<'a> Formatter<'a> { s.fmt(&br.body, f) }) } - Expr::Loop { body, .. } => { - f.write_str("loop ")?; + Expr::Loop { body, unrolled, .. } => { + f.write_str(if unrolled { "$loop " } else { "loop " })?; self.fmt(body, f) } Expr::Closure { ret, body, args, .. } => { diff --git a/lang/src/lexer.rs b/lang/src/lexer.rs index 0c621bb65..1130cd8ec 100644 --- a/lang/src/lexer.rs +++ b/lang/src/lexer.rs @@ -32,6 +32,9 @@ macro_rules! gen_token_kind { #[keywords] $( $keyword:ident = $keyword_lit:literal, )* + #[const_keywords] $( + $const_keyword:ident = $const_keyword_lit:literal, + )* #[punkt] $( $punkt:ident = $punkt_lit:literal, )* @@ -56,6 +59,7 @@ macro_rules! gen_token_kind { match *self { $( Self::$pattern => concat!('<', stringify!($pattern), '>'), )* $( Self::$keyword => stringify!($keyword_lit), )* + $( Self::$const_keyword => concat!('$', $const_keyword_lit), )* $( Self::$punkt => stringify!($punkt_lit), )* $($( Self::$op => $op_lit, $(Self::$assign => concat!($op_lit, "="),)?)*)* @@ -72,12 +76,23 @@ macro_rules! gen_token_kind { } + 1) } + #[allow(non_upper_case_globals)] fn from_ident(ident: &[u8]) -> Self { + $(const $keyword: &[u8] = $keyword_lit.as_bytes();)* match ident { - $($keyword_lit => Self::$keyword,)* + $($keyword => Self::$keyword,)* _ => Self::Ident, } } + + #[allow(non_upper_case_globals)] + fn from_ct_ident(ident: &[u8]) -> Self { + $(const $const_keyword: &[u8] = $const_keyword_lit.as_bytes();)* + match ident { + $($const_keyword => Self::$const_keyword,)* + _ => Self::CtIdent, + } + } } }; } @@ -156,6 +171,8 @@ pub enum TokenKind { Die, Defer, + CtLoop, + // Unused = a-z LBrace = b'{', Bor = b'|', @@ -305,26 +322,28 @@ gen_token_kind! { Eof, Directive, #[keywords] - Slf = b"Self", - Return = b"return", - If = b"if", - Match = b"match", - Else = b"else", - Loop = b"loop", - Break = b"break", - Continue = b"continue", - Fn = b"fn", - Struct = b"struct", - Packed = b"packed", - Enum = b"enum", - Union = b"union", - True = b"true", - False = b"false", - Null = b"null", - Idk = b"idk", - Die = b"die", - Defer = b"defer", - Under = b"_", + Slf = "Self", + Return = "return", + If = "if", + Match = "match", + Else = "else", + Loop = "loop", + Break = "break", + Continue = "continue", + Fn = "fn", + Struct = "struct", + Packed = "packed", + Enum = "enum", + Union = "union", + True = "true", + False = "false", + Null = "null", + Idk = "idk", + Die = "die", + Defer = "defer", + Under = "_", + #[const_keywords] + CtLoop = "loop", #[punkt] Ctor = ".{", Tupl = ".(", @@ -535,11 +554,17 @@ impl<'a> Lexer<'a> { b'&' if self.advance_if(b'&') => T::And, b'|' if self.advance_if(b'|') => T::Or, b'$' if self.advance_if(b':') => T::Ct, - b'@' | b'$' => { + b'@' => { start += 1; advance_ident(self); identity(c) } + b'$' => { + start += 1; + advance_ident(self); + let ident = &self.source[start as usize..self.pos as usize]; + T::from_ct_ident(ident) + } b'<' | b'>' if self.advance_if(c) => { identity(c - 5 + 128 * self.advance_if(b'=') as u8) } diff --git a/lang/src/parser.rs b/lang/src/parser.rs index 8d64302a2..091f414a2 100644 --- a/lang/src/parser.rs +++ b/lang/src/parser.rs @@ -447,7 +447,8 @@ impl<'a, 'b> Parser<'a, 'b> { }) }, }, - T::Loop => E::Loop { pos, body: self.ptr_expr()? }, + T::Loop => E::Loop { pos, unrolled: false, body: self.ptr_expr()? }, + T::CtLoop => E::Loop { pos, unrolled: true, body: self.ptr_expr()? }, T::Break => E::Break { pos }, T::Continue => E::Continue { pos }, T::Return => E::Return { @@ -984,6 +985,7 @@ generate_expr! { /// `'loop' Expr` Loop { pos: Pos, + unrolled: bool, body: &'a Self, }, /// `('&' | '*' | '^') Expr` diff --git a/lang/src/son.rs b/lang/src/son.rs index 1a85bc287..8960c3300 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -2010,8 +2010,11 @@ impl Nodes { debug_assert!(!var.ptr); let [loops @ .., loob] = loops else { unreachable!() }; - let node = loob.node; - let lvar = &mut loob.scope.vars[index]; + let &mut Loop::Runtime { node, ref mut scope, .. } = loob else { + self.load_loop_var(index, var, loops); + return; + }; + let lvar = &mut scope.vars[index]; debug_assert!(!lvar.ptr); @@ -2033,8 +2036,11 @@ impl Nodes { } let [loops @ .., loob] = loops else { unreachable!() }; - let node = loob.node; - let lvar = &mut loob.scope.aclasses[index]; + let &mut Loop::Runtime { node, ref mut scope, .. } = loob else { + self.load_loop_aclass(index, aclass, loops); + return; + }; + let lvar = &mut scope.aclasses[index]; self.load_loop_aclass(index, lvar, loops); @@ -2312,13 +2318,24 @@ type LoopDepth = u16; type LockRc = u16; type IDomDepth = u16; +#[derive(Clone, Copy)] +pub enum CtLoopState { + Terminated, + Continued, +} + #[derive(Clone)] -struct Loop { - node: Nid, - ctrl: [StrongRef; 2], - ctrl_scope: [Scope; 2], - scope: Scope, - defer_base: usize, +enum Loop { + Comptime { + state: Option<(CtLoopState, Pos)>, + }, + Runtime { + node: Nid, + ctrl: [StrongRef; 2], + ctrl_scope: [Scope; 2], + scope: Scope, + defer_base: usize, + }, } mod strong_ref { @@ -2628,14 +2645,16 @@ impl Pool { fn restore_ci(&mut self, dst: &mut ItemCtx) { self.used_cis -= 1; dst.scope.clear(&mut dst.nodes); - dst.loops.drain(..).for_each(|mut l| { - l.ctrl.map(|c| { - if c.is_live() { - c.remove(&mut dst.nodes); - } - }); - l.scope.clear(&mut dst.nodes); - l.ctrl_scope.map(|mut s| s.clear(&mut dst.nodes)); + dst.loops.drain(..).for_each(|l| { + if let Loop::Runtime { ctrl, ctrl_scope, mut scope, .. } = l { + ctrl.map(|c| { + if c.is_live() { + c.remove(&mut dst.nodes); + } + }); + scope.clear(&mut dst.nodes); + ctrl_scope.map(|mut s| s.clear(&mut dst.nodes)); + } }); mem::take(&mut dst.ctrl).remove(&mut dst.nodes); *dst = mem::take(&mut self.cis[self.used_cis]); @@ -3399,38 +3418,67 @@ impl<'a> Codegen<'a> { bs.ty = base; } - let ty::Kind::Slice(s) = bs.ty.expand() else { - return self.error( + let mut idx = self.expr_ctx(index, Ctx::default().with_ty(ty::Id::DINT))?; + + match bs.ty.expand() { + ty::Kind::Slice(s) => { + let elem = self.tys.ins.slices[s].elem; + let size = self.ci.nodes.new_const(ty::Id::INT, self.tys.size_of(elem)); + self.assert_ty(index.pos(), &mut idx, ty::Id::DINT, "subscript"); + let inps = [VOID, idx.id, size]; + let offset = self.ci.nodes.new_node( + ty::Id::INT, + Kind::BinOp { op: TokenKind::Mul }, + inps, + self.tys, + ); + let aclass = self.ci.nodes.aclass_index(bs.id).1; + let inps = [VOID, bs.id, offset]; + let ptr = self.ci.nodes.new_node( + ty::Id::INT, + Kind::BinOp { op: TokenKind::Add }, + inps, + self.tys, + ); + self.ci.nodes.pass_aclass(aclass, ptr); + + Some(Value::ptr(ptr).ty(elem)) + } + ty::Kind::Struct(s) => { + let Kind::CInt { value: idx } = self.ci.nodes[idx.id].kind else { + return self.error( + index.pos(), + "field index needs to be known at compile time", + ); + }; + + let Some((f, offset)) = OffsetIter::new(s, self.tys) + .into_iter(self.tys) + .nth(idx as _) + .map(|(f, off)| (f.ty, off)) + else { + return self.error( + index.pos(), + fa!( + "struct '{}' has only `{}' fields, \ + but index was '{}'", + self.ty_display(bs.ty), + self.tys.struct_fields(s).len(), + idx + ), + ); + }; + + Some(Value::ptr(self.offset(bs.id, offset)).ty(f)) + } + _ => self.error( base.pos(), fa!( "cant index into '{}' which is not array nor slice", self.ty_display(bs.ty) ), - ); - }; - - let elem = self.tys.ins.slices[s].elem; - let mut idx = self.expr_ctx(index, Ctx::default().with_ty(ty::Id::DINT))?; - self.assert_ty(index.pos(), &mut idx, ty::Id::DINT, "subscript"); - let size = self.ci.nodes.new_const(ty::Id::INT, self.tys.size_of(elem)); - let inps = [VOID, idx.id, size]; - let offset = self.ci.nodes.new_node( - ty::Id::INT, - Kind::BinOp { op: TokenKind::Mul }, - inps, - self.tys, - ); - let aclass = self.ci.nodes.aclass_index(bs.id).1; - let inps = [VOID, bs.id, offset]; - let ptr = self.ci.nodes.new_node( - ty::Id::INT, - Kind::BinOp { op: TokenKind::Add }, - inps, - self.tys, - ); - self.ci.nodes.pass_aclass(aclass, ptr); - - Some(Value::ptr(ptr).ty(elem)) + ), + } } Expr::Embed { id, .. } => { let glob = &self.tys.ins.globals[id]; @@ -3447,6 +3495,20 @@ impl<'a> Codegen<'a> { let align = self.tys.align_of(ty); self.gen_inferred_const(ctx, ty::Id::DINT, align, ty::Id::is_integer) } + Expr::Directive { name: "len", args: [ety], .. } => { + let ty = self.ty(ety); + let Some(len) = self.tys.len_of(ty) else { + return self.error( + ety.pos(), + fa!( + "'@len' only supports structs and arrays, \ + '{}' is neither", + self.ty_display(ty) + ), + ); + }; + self.gen_inferred_const(ctx, ty::Id::DINT, len, ty::Id::is_integer) + } Expr::Directive { name: "bitcast", args: [val], pos } => { let mut val = self.raw_expr(val)?; self.strip_var(&mut val); @@ -3886,7 +3948,47 @@ impl<'a> Codegen<'a> { ret } - Expr::Loop { body, .. } => { + Expr::Loop { unrolled: true, body, pos } => { + let mut loop_fuel = 100; + + self.ci.loops.push(Loop::Comptime { state: None }); + + loop { + if loop_fuel == 0 { + return self.error( + pos, + "unrolled loop exceeded 100 iterations, use normal loop instead, TODO: add escape hatch", + ); + } + loop_fuel -= 1; + + let terminated = self.expr(body).is_none(); + + let Some(&Loop::Comptime { state }) = self.ci.loops.last() else { + unreachable!() + }; + + if !terminated && let Some((_, prev)) = state { + self.error( + pos, + "reached a constrol flow keyword inside an unrolled loop, \ + as well ast the end of the loop, make sure control flow is \ + not dependant on a runtime value", + ); + return self.error(prev, "previous reachable control flow found here"); + } + + match state { + Some((CtLoopState::Terminated, _)) => break, + Some((CtLoopState::Continued, _)) | None => {} + } + } + + self.ci.loops.pop().unwrap(); + + Some(Value::VOID) + } + Expr::Loop { body, unrolled: false, .. } => { self.ci.ctrl.set( self.ci.nodes.new_node( ty::Id::VOID, @@ -3897,7 +3999,7 @@ impl<'a> Codegen<'a> { &mut self.ci.nodes, ); - self.ci.loops.push(Loop { + self.ci.loops.push(Loop::Runtime { node: self.ci.ctrl.get(), ctrl: [StrongRef::DEFAULT; 2], ctrl_scope: core::array::from_fn(|_| Default::default()), @@ -3920,8 +4022,11 @@ impl<'a> Codegen<'a> { self.expr(body); - let Loop { ctrl: [con, ..], ctrl_scope: [cons, ..], .. } = - self.ci.loops.last_mut().unwrap(); + let Some(Loop::Runtime { ctrl: [con, ..], ctrl_scope: [cons, ..], .. }) = + self.ci.loops.last_mut() + else { + unreachable!() + }; let mut cons = mem::take(cons); if let Some(con) = mem::take(con).unwrap(&mut self.ci.nodes) { @@ -3944,13 +4049,16 @@ impl<'a> Codegen<'a> { cons.clear(&mut self.ci.nodes); } - let Loop { + let Some(Loop::Runtime { node, ctrl: [.., bre], ctrl_scope: [.., mut bres], mut scope, defer_base, - } = self.ci.loops.pop().unwrap(); + }) = self.ci.loops.pop() + else { + unreachable!() + }; self.gen_defers(defer_base); self.ci.defers.truncate(defer_base); @@ -5006,52 +5114,75 @@ impl<'a> Codegen<'a> { } fn jump_to(&mut self, pos: Pos, id: usize) -> Option { - let Some(&mut Loop { defer_base, .. }) = self.ci.loops.last_mut() else { + let Some(loob) = self.ci.loops.last_mut() else { self.error(pos, "break outside a loop"); return None; }; - self.gen_defers(defer_base)?; + match loob { + &mut Loop::Comptime { state: Some((_, prev)) } => { + self.error( + pos, + "reached multiple control flow keywords inside an unrolled loop, \ + make sure control flow is not dependant on a runtime value", + ); + self.error(prev, "previous reachable control flow found here"); + } + Loop::Comptime { state: state @ None } => { + *state = Some(([CtLoopState::Continued, CtLoopState::Terminated][id], pos)) + } + &mut Loop::Runtime { defer_base, .. } => { + self.gen_defers(defer_base)?; - let mut loob = self.ci.loops.last_mut().unwrap(); + let Loop::Runtime { ctrl: lctrl, ctrl_scope, scope, .. } = + self.ci.loops.last_mut().unwrap() + else { + unreachable!() + }; - if loob.ctrl[id].is_live() { - loob.ctrl[id].set( - self.ci.nodes.new_node( - ty::Id::VOID, - Kind::Region, - [self.ci.ctrl.get(), loob.ctrl[id].get()], - self.tys, - ), - &mut self.ci.nodes, - ); - let mut scope = mem::take(&mut loob.ctrl_scope[id]); - let ctrl = mem::take(&mut loob.ctrl[id]); + if lctrl[id].is_live() { + lctrl[id].set( + self.ci.nodes.new_node( + ty::Id::VOID, + Kind::Region, + [self.ci.ctrl.get(), lctrl[id].get()], + self.tys, + ), + &mut self.ci.nodes, + ); + let mut scope = mem::take(&mut ctrl_scope[id]); + let ctrl = mem::take(&mut lctrl[id]); - self.ci.nodes.merge_scopes( - &mut self.ci.loops, - &ctrl, - &mut scope, - &mut self.ci.scope, - self.tys, - ); + self.ci.nodes.merge_scopes( + &mut self.ci.loops, + &ctrl, + &mut scope, + &mut self.ci.scope, + self.tys, + ); - loob = self.ci.loops.last_mut().unwrap(); - loob.ctrl_scope[id] = scope; - loob.ctrl[id] = ctrl; - self.ci.ctrl.set(NEVER, &mut self.ci.nodes); - } else { - let term = StrongRef::new(NEVER, &mut self.ci.nodes); - loob.ctrl[id] = mem::replace(&mut self.ci.ctrl, term); - loob.ctrl_scope[id] = self.ci.scope.dup(&mut self.ci.nodes); - loob.ctrl_scope[id] - .vars - .drain(loob.scope.vars.len()..) - .for_each(|v| v.remove(&mut self.ci.nodes)); - loob.ctrl_scope[id] - .aclasses - .drain(loob.scope.aclasses.len()..) - .for_each(|v| v.remove(&mut self.ci.nodes)); + let Loop::Runtime { ctrl: lctrl, ctrl_scope, .. } = + self.ci.loops.last_mut().unwrap() + else { + unreachable!() + }; + ctrl_scope[id] = scope; + lctrl[id] = ctrl; + self.ci.ctrl.set(NEVER, &mut self.ci.nodes); + } else { + let term = StrongRef::new(NEVER, &mut self.ci.nodes); + lctrl[id] = mem::replace(&mut self.ci.ctrl, term); + ctrl_scope[id] = self.ci.scope.dup(&mut self.ci.nodes); + ctrl_scope[id] + .vars + .drain(scope.vars.len()..) + .for_each(|v| v.remove(&mut self.ci.nodes)); + ctrl_scope[id] + .aclasses + .drain(scope.aclasses.len()..) + .for_each(|v| v.remove(&mut self.ci.nodes)); + } + } } None @@ -6024,6 +6155,7 @@ mod tests { generic_functions; die; defer; + unrolled_loops; // Incomplete Examples; //comptime_pointers; diff --git a/lang/src/ty.rs b/lang/src/ty.rs index e2a08784e..6dfb1adb1 100644 --- a/lang/src/ty.rs +++ b/lang/src/ty.rs @@ -1176,6 +1176,14 @@ impl Types { debug_assert_eq!(captured.len(), base.captured.len()); Some((captured, base.captured)) } + + pub fn len_of(&self, ty: Id) -> Option { + Some(match ty.expand() { + Kind::Struct(s) => self.struct_field_range(s).len() as _, + Kind::Slice(s) => self.ins.slices[s].len()? as _, + _ => return None, + }) + } } pub struct OptLayout { diff --git a/lang/tests/son_tests_unrolled_loops.txt b/lang/tests/son_tests_unrolled_loops.txt new file mode 100644 index 000000000..5aa16aa1f --- /dev/null +++ b/lang/tests/son_tests_unrolled_loops.txt @@ -0,0 +1,6 @@ +main: + CP r1, r0 + JALA r0, r31, 0a +code size: 22 +ret: 0 +status: Ok(())