diff --git a/lang/README.md b/lang/README.md index 800fc92a9..8a46cd3fd 100644 --- a/lang/README.md +++ b/lang/README.md @@ -798,6 +798,29 @@ main := fn(): uint { ### Purely Testing Examples +#### len_never_goes_down +```hb +chars := fn(iter: []u8): struct { + str: []u8, + + next := fn(self: ^Self): ?u8 { + if self.str.len == 0 return null + self.str = self.str[1..] + return self.str[0] + } +} { + return .(iter) +} + +main := fn(): void { + a := chars("Hello, World!") + loop { + char := a.next() + if char == null break + } +} +``` + #### slice_to_global_pointer ```hb a := @as(^u8, @bitcast(0)) diff --git a/lang/src/nodes.rs b/lang/src/nodes.rs index 594e8739f..311bb6908 100644 --- a/lang/src/nodes.rs +++ b/lang/src/nodes.rs @@ -4,7 +4,7 @@ use { debug, lexer::{self, TokenKind}, parser::Pos, - ty::{self, Loc, Types}, + ty::{self, Loc, Offset, Types}, utils::{BitSet, Vc}, }, alloc::{string::String, vec::Vec}, @@ -712,6 +712,7 @@ impl Nodes { if let Some((entry, hash)) = lookup_meta { entry.insert(crate::ctx_map::Key { value: free, hash }, ()); } + free } @@ -804,17 +805,28 @@ impl Nodes { self.iter() .filter_map(|(id, node)| node.kind.is_peeped().then_some(id)) .collect_into(stack); - stack.iter().for_each(|&s| self.lock(s)); + stack.iter().for_each(|&s| { + debug_assert!(self.is_unlocked(s)); + self.lock(s) + }); while fuel != 0 && let Some(node) = stack.pop() { fuel -= 1; + if self[node].outputs.is_empty() { + self.push_adjacent_nodes(node, stack); + } + + debug_assert_eq!(self[node].lock_rc.get(), 1, "{:?} {}", self[node], node); + if self.unlock_remove(node) { continue; } + debug_assert!(!self[node].outputs.is_empty(), "{:?} {}", self[node], node); + if let Some(new) = self.peephole(node, tys) { self.replace(node, new); self.push_adjacent_nodes(new, stack); @@ -830,7 +842,6 @@ impl Nodes { } debug_assert!(self.queued_peeps.is_empty()); - stack.drain(..).for_each(|s| _ = self.unlock_remove(s)); } @@ -851,7 +862,19 @@ impl Nodes { } self[of].peep_triggers = Vc::default(); - stack.iter().skip(prev_len).for_each(|&n| self.lock(n)); + let mut i = 0; + stack.retain(|&n| { + if i < prev_len { + i += 1; + return true; + } + if self.is_unlocked(n) { + self.lock(n); + true + } else { + false + } + }); } pub fn aclass_index(&self, region: Nid) -> (usize, Nid) { @@ -1153,10 +1176,15 @@ impl Nodes { continue; } - if let Some(&load) = - self[n].outputs.iter().find(|&&n| self[n].kind == Kind::Load) - { - self.add_trigger(load, target); + let mut broken = false; + for o in self[n].outputs.clone() { + if o != target && !matches!(self[o].kind, Kind::Return { .. }) { + self.add_trigger(o, target); + broken = true; + } + } + if broken { + new_inps.push(n); continue; } @@ -1306,9 +1334,9 @@ impl Nodes { cursor = next_store; } - 'eliminate: { + 'forward_store: { if self[target].outputs.is_empty() { - break 'eliminate; + break 'forward_store; } if self[value].kind != Kind::Load @@ -1317,106 +1345,121 @@ impl Nodes { for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) { self.add_trigger(ele, target); } - break 'eliminate; + break 'forward_store; } let &[_, stack, last_store] = self[value].inputs.as_slice() else { unreachable!() }; + // TODO: count othe loads to determine wether this transformation is worth it + + // might be overly restricitive + // but for now, just check we are copiing the full stack allocation if self[stack].ty != self[value].ty || self[stack].kind != Kind::Stck { - break 'eliminate; + break 'forward_store; } - let mut unidentifed = self[stack].outputs.clone(); - let load_idx = unidentifed.iter().position(|&n| n == value).unwrap(); - unidentifed.swap_remove(load_idx); + // pessimistic + // allocation is most likely used in a loop or something so we cant get rid ot it + if last_store != MEM + && self[last_store] + .outputs + .iter() + .any(|&n| !matches!(self[n].kind, Kind::Load | Kind::Return { .. })) + { + break 'forward_store; + } - let mut saved = Vc::default(); - let mut cursor = last_store; - let mut first_store = last_store; - while cursor != MEM && self[cursor].kind == Kind::Stre { - let mut contact_point = cursor; - let mut region = self[cursor].inputs[2]; - if let Kind::BinOp { op } = self[region].kind { - debug_assert_matches!(op, TokenKind::Add | TokenKind::Sub); - contact_point = region; - region = self[region].inputs[1] + let mut store_count = 0; + let [mut cursor, mut first_store] = [last_store; 2]; + while cursor != MEM { + debug_assert_eq!(self[cursor].kind, Kind::Stre); + + // pessimistic + // the offset must only be used for this store + if self[cursor].inputs[2] != stack + && self[self[cursor].inputs[2]].outputs.as_slice() != [cursor] + { + break 'forward_store; } - if region != stack { - break; + // pessimistic + // we load from the store, this might be because the load spans multiple + // stores + if self[cursor].inputs[3] != MEM + && self[self[cursor].inputs[3]].outputs.as_slice() != [cursor] + { + break 'forward_store; } - let Some(index) = unidentifed.iter().position(|&n| n == contact_point) - else { - break 'eliminate; - }; + if self[self[cursor].inputs[1]].kind == Kind::Load && self[value].outputs.iter().any(|&n| { self.aclass_index(self[self[cursor].inputs[1]].inputs[1]).0 == self.aclass_index(self[n].inputs[2]).0 }) { - break 'eliminate; + break 'forward_store; } - unidentifed.remove(index); - saved.push(contact_point); + first_store = cursor; - cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM); - - if unidentifed.is_empty() { - break; - } + cursor = self[cursor].inputs[3]; + store_count += 1; } - if !unidentifed.is_empty() { - break 'eliminate; + if store_count + 1 != self[stack].outputs.len() { + debug_assert!(store_count + 1 < self[stack].outputs.len()); + + break 'forward_store; } - debug_assert_matches!( - self[last_store].kind, - Kind::Stre | Kind::Mem, - "{:?}", - self[last_store] - ); - debug_assert_matches!( - self[first_store].kind, - Kind::Stre | Kind::Mem, - "{:?}", - self[first_store] - ); + // at this potint we know the stack was initialized just to be moved into + // different location so create new stores that store directly to the + // destination and remove the final load from this stack, that shoucl cause + // this stack allocation to be eliminated - // FIXME: when the loads and stores become parallel we will need to get saved - // differently - let mut prev_store = store; - for mut oper in saved.into_iter().rev() { - let mut region = region; - if let Kind::BinOp { op } = self[oper].kind { - debug_assert_eq!(self[oper].outputs.len(), 1); - debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre); - let new_region = self.new_node( - self[oper].ty, - Kind::BinOp { op }, - [VOID, region, self[oper].inputs[2]], - tys, - ); - self.pass_aclass(self.aclass_index(region).1, new_region); - region = new_region; - oper = self[oper].outputs[0]; + let mut base_store = store; + if first_store != MEM { + debug_assert_ne!(last_store, MEM); + let mut cursor = first_store; + loop { + let mut inps = self[cursor].inputs.clone(); + inps[2] = if inps[2] == stack { + region + } else { + let new_region = self.new_node( + self[inps[2]].ty, + self[inps[2]].kind, + [VOID, region, self[inps[2]].inputs[2]], + tys, + ); + self.pass_aclass(self.aclass_index(region).1, new_region); + new_region + }; + inps[3] = base_store; + base_store = self.new_node(self[cursor].ty, Kind::Stre, inps, tys); + + if self.is_unlocked(base_store) { + self.lock(base_store); + self.queued_peeps.push(base_store); + } + + if cursor == last_store { + break; + } + cursor = self[cursor].outputs[0]; } - let mut inps = self[oper].inputs.clone(); - debug_assert_eq!(inps.len(), 4); - inps[2] = region; - inps[3] = prev_store; - prev_store = self.new_node_nop(self[oper].ty, Kind::Stre, inps); - if self.is_unlocked(prev_store) { - self.lock(prev_store); - self.queued_peeps.push(prev_store); + for o in self[last_store].outputs.clone() { + if matches!(self[o].kind, Kind::Return { .. }) && self.is_unlocked(o) { + self.queued_peeps.push(o); + } } + } else { + debug_assert_eq!(last_store, MEM); } - return Some(prev_store); + return Some(base_store); } if let Some(&load) = @@ -1525,12 +1568,6 @@ impl Nodes { self.remove(prev); self.unlock(o); - for o in self[o].outputs.clone() { - if self.is_unlocked(o) { - self.lock(o); - self.queued_peeps.push(o); - } - } self.replace(o, self[o].inputs[1]); } } @@ -2210,6 +2247,7 @@ impl fmt::Display for Kind { } } +#[derive(Debug)] pub enum CondOptRes { Unknown, Known { value: bool, pin: Option }, diff --git a/lang/src/son.rs b/lang/src/son.rs index d7c4fe267..10d23d1df 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -940,7 +940,7 @@ impl<'a> Codegen<'a> { if self.ci.inline_depth == 0 { debug_assert_ne!(self.ci.ctrl.get(), VOID); let mut inps = Vc::from([self.ci.ctrl.get(), value.id]); - for (i, aclass) in self.ci.scope.aclasses.iter_mut().enumerate() { + for (i, aclass) in self.ci.scope.aclasses.iter_mut().enumerate().take(2) { self.ci.nodes.load_loop_aclass(i, aclass, &mut self.ci.loops); if aclass.last_store.get() != MEM { inps.push(aclass.last_store.get()); @@ -1283,7 +1283,6 @@ impl<'a> Codegen<'a> { self.ci.nodes.new_const(ty::Id::UINT, len as i64) } ty::Kind::Slice(_) => { - // Might change let off = self.offset(bs.id, SLICE_LEN_OFF); self.load_mem(off, ty::Id::UINT) } @@ -1303,6 +1302,7 @@ impl<'a> Codegen<'a> { self.tys, ); self.ci.nodes.lock(len.id); + self.ci.nodes.unlock_remove(end); let elem = match bs.ty.expand() { ty::Kind::Slice(s) => self.tys.ins.slices[s].elem, @@ -1333,6 +1333,7 @@ impl<'a> Codegen<'a> { }; ptr.id = self.offset_ptr(ptr.id, elem, start).id; ptr.ty = self.tys.make_ptr(elem); + self.ci.nodes.unlock_remove(start); self.ci.nodes.lock(ptr.id); @@ -1348,10 +1349,8 @@ impl<'a> Codegen<'a> { let region = self.offset(mem, off); self.store_mem(region, value.ty, value.id); - self.ci.nodes.unlock(start); - self.ci.nodes.unlock(len.id); - self.ci.nodes.unlock(end); - self.ci.nodes.unlock(ptr.id); + self.ci.nodes.unlock_remove(len.id); + self.ci.nodes.unlock_remove(ptr.id); Some(Value::ptr(mem).ty(ty)) } Expr::Index { base, index } => { @@ -3550,7 +3549,6 @@ impl<'a> Codegen<'a> { let res = self.ci.nodes.try_match_cond(id); - // TODO: highlight the pin position let msg = match (kind, res) { (AK::UnwrapCheck, CR::Known { value: false, .. }) => { "unwrap is not needed since the value is (provably) never null, \ @@ -3569,7 +3567,7 @@ impl<'a> Codegen<'a> { or explicitly check for null and handle it \ ('if == null { /* handle */ } else { /* use opt */ }')" } - _ => unreachable!(), + v => unreachable!("{v:?} {id}"), }; self.error(pos, msg); } @@ -3729,7 +3727,6 @@ impl<'a> Codegen<'a> { let oty = mem::replace(&mut opt.ty, ty); self.unwrap_opt_unchecked(ty, oty, opt); - // TODO: extract the if check int a fucntion let ass = self.ci.nodes.new_node_nop(oty, Kind::Assert { kind, pos }, [ self.ci.ctrl.get(), null_check, @@ -4465,6 +4462,7 @@ mod tests { fb_driver; // Purely Testing Examples; + len_never_goes_down; slice_to_global_pointer; subsclice_bug; string_array; diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index 169ad9138..377cad330 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -12,6 +12,7 @@ main: 0: ST r0, r32, 0a, 8h LD r33, r32, 0a, 8h JEQ r33, r0, :2 + ST r0, r32, 8a, 8h LI64 r32, 200d CP r1, r32 JMP :1 @@ -66,6 +67,6 @@ new_stru: LD r1, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 655 +code size: 668 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_len_never_goes_down.txt b/lang/tests/son_tests_len_never_goes_down.txt new file mode 100644 index 000000000..95d20e305 --- /dev/null +++ b/lang/tests/son_tests_len_never_goes_down.txt @@ -0,0 +1,49 @@ +chars: + ADDI64 r254, r254, -32d + ST r3, r254, 16a, 16h + ADDI64 r3, r254, 16d + CP r13, r3 + ADDI64 r14, r254, 0d + BMC r13, r14, 16h + LD r1, r14, 0a, 16h + ADDI64 r254, r254, 32d + JALA r0, r31, 0a +main: + ADDI64 r254, r254, -56d + ST r31, r254, 32a, 24h + LRA r32, r0, :Hello, World! + ST r32, r254, 16a, 8h + LI64 r32, 13d + ST r32, r254, 24a, 8h + ADDI64 r32, r254, 0d + LD r3, r254, 16a, 16h + JAL r31, r0, :chars + ST r1, r32, 0a, 16h + 2: CP r2, r32 + JAL r31, r0, :next + CP r33, r1 + ANDI r33, r33, 65535d + JNE r33, r0, :0 + JMP :1 + 0: JMP :2 + 1: LD r31, r254, 32a, 24h + ADDI64 r254, r254, 56d + JALA r0, r31, 0a +next: + CP r13, r2 + LD r14, r13, 8a, 8h + JNE r14, r0, :0 + CP r1, r0 + JMP :1 + 0: LD r15, r13, 0a, 8h + ADDI64 r15, r15, 1d + ST r15, r13, 0a, 8h + ADDI64 r14, r14, -1d + LD r15, r15, 0a, 1h + ST r14, r13, 8a, 8h + ORI r13, r15, 32768d + CP r1, r13 + 1: JALA r0, r31, 0a +code size: 423 +ret: 0 +status: Ok(()) diff --git a/lang/tests/son_tests_nullable_types.txt b/lang/tests/son_tests_nullable_types.txt index b5cc9310f..e9e068403 100644 --- a/lang/tests/son_tests_nullable_types.txt +++ b/lang/tests/son_tests_nullable_types.txt @@ -3,8 +3,8 @@ decide: CP r1, r13 JALA r0, r31, 0a main: - ADDI64 r254, r254, -144d - ST r31, r254, 96a, 48h + ADDI64 r254, r254, -136d + ST r31, r254, 96a, 40h JAL r31, r0, :decide CP r33, r0 ADDI64 r34, r254, 88d @@ -14,9 +14,7 @@ main: CP r32, r33 JMP :1 0: CP r32, r34 - 1: LI64 r35, 1d - ST r35, r254, 88a, 8h - JNE r32, r33, :2 + 1: JNE r32, r33, :2 LI64 r32, 9001d CP r1, r32 JMP :3 @@ -53,15 +51,15 @@ main: 9: ADDI64 r33, r254, 56d JAL r31, r0, :new_foo ST r1, r33, 0a, 16h - LD r36, r254, 56a, 8h - JNE r36, r0, :10 + LD r35, r254, 56a, 8h + JNE r35, r0, :10 LI64 r32, 999d CP r1, r32 JMP :3 -10: LRA r36, r0, :"foo\0" - ST r36, r254, 40a, 8h - LI64 r36, 4d - ST r36, r254, 48a, 8h +10: LRA r35, r0, :"foo\0" + ST r35, r254, 40a, 8h + LI64 r35, 4d + ST r35, r254, 48a, 8h LD r2, r33, 0a, 16h LD r4, r254, 40a, 16h JAL r31, r0, :use_foo @@ -69,12 +67,14 @@ main: JAL r31, r0, :no_foo ST r1, r33, 0a, 16h JAL r31, r0, :decide - CP r36, r1 - ANDI r36, r36, 255d - JNE r36, r0, :11 + CP r35, r1 + ANDI r35, r35, 255d + JNE r35, r0, :11 JMP :12 11: ST r34, r254, 0a, 8h + LI64 r35, 1d ST r35, r254, 8a, 8h + ST r35, r254, 88a, 8h 12: LD r35, r254, 0a, 8h JNE r35, r0, :13 LI64 r32, 34d @@ -101,8 +101,8 @@ main: ANDI r32, r32, 65535d SUB64 r32, r32, r33 CP r1, r32 - 3: LD r31, r254, 96a, 48h - ADDI64 r254, r254, 144d + 3: LD r31, r254, 96a, 40h + ADDI64 r254, r254, 136d JALA r0, r31, 0a new_bar: ADDI64 r254, r254, -24d