i am not useless after all, the invalid store elimination removed

Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
Jakub Doka 2024-12-25 20:27:16 +01:00
parent 3491814b4f
commit 5c8f7c9c79
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
6 changed files with 220 additions and 111 deletions

View file

@ -798,6 +798,29 @@ main := fn(): uint {
### Purely Testing Examples ### Purely Testing Examples
#### len_never_goes_down
```hb
chars := fn(iter: []u8): struct {
str: []u8,
next := fn(self: ^Self): ?u8 {
if self.str.len == 0 return null
self.str = self.str[1..]
return self.str[0]
}
} {
return .(iter)
}
main := fn(): void {
a := chars("Hello, World!")
loop {
char := a.next()
if char == null break
}
}
```
#### slice_to_global_pointer #### slice_to_global_pointer
```hb ```hb
a := @as(^u8, @bitcast(0)) a := @as(^u8, @bitcast(0))

View file

@ -4,7 +4,7 @@ use {
debug, debug,
lexer::{self, TokenKind}, lexer::{self, TokenKind},
parser::Pos, parser::Pos,
ty::{self, Loc, Types}, ty::{self, Loc, Offset, Types},
utils::{BitSet, Vc}, utils::{BitSet, Vc},
}, },
alloc::{string::String, vec::Vec}, alloc::{string::String, vec::Vec},
@ -712,6 +712,7 @@ impl Nodes {
if let Some((entry, hash)) = lookup_meta { if let Some((entry, hash)) = lookup_meta {
entry.insert(crate::ctx_map::Key { value: free, hash }, ()); entry.insert(crate::ctx_map::Key { value: free, hash }, ());
} }
free free
} }
@ -804,17 +805,28 @@ impl Nodes {
self.iter() self.iter()
.filter_map(|(id, node)| node.kind.is_peeped().then_some(id)) .filter_map(|(id, node)| node.kind.is_peeped().then_some(id))
.collect_into(stack); .collect_into(stack);
stack.iter().for_each(|&s| self.lock(s)); stack.iter().for_each(|&s| {
debug_assert!(self.is_unlocked(s));
self.lock(s)
});
while fuel != 0 while fuel != 0
&& let Some(node) = stack.pop() && let Some(node) = stack.pop()
{ {
fuel -= 1; fuel -= 1;
if self[node].outputs.is_empty() {
self.push_adjacent_nodes(node, stack);
}
debug_assert_eq!(self[node].lock_rc.get(), 1, "{:?} {}", self[node], node);
if self.unlock_remove(node) { if self.unlock_remove(node) {
continue; continue;
} }
debug_assert!(!self[node].outputs.is_empty(), "{:?} {}", self[node], node);
if let Some(new) = self.peephole(node, tys) { if let Some(new) = self.peephole(node, tys) {
self.replace(node, new); self.replace(node, new);
self.push_adjacent_nodes(new, stack); self.push_adjacent_nodes(new, stack);
@ -830,7 +842,6 @@ impl Nodes {
} }
debug_assert!(self.queued_peeps.is_empty()); debug_assert!(self.queued_peeps.is_empty());
stack.drain(..).for_each(|s| _ = self.unlock_remove(s)); stack.drain(..).for_each(|s| _ = self.unlock_remove(s));
} }
@ -851,7 +862,19 @@ impl Nodes {
} }
self[of].peep_triggers = Vc::default(); self[of].peep_triggers = Vc::default();
stack.iter().skip(prev_len).for_each(|&n| self.lock(n)); let mut i = 0;
stack.retain(|&n| {
if i < prev_len {
i += 1;
return true;
}
if self.is_unlocked(n) {
self.lock(n);
true
} else {
false
}
});
} }
pub fn aclass_index(&self, region: Nid) -> (usize, Nid) { pub fn aclass_index(&self, region: Nid) -> (usize, Nid) {
@ -1153,10 +1176,15 @@ impl Nodes {
continue; continue;
} }
if let Some(&load) = let mut broken = false;
self[n].outputs.iter().find(|&&n| self[n].kind == Kind::Load) for o in self[n].outputs.clone() {
{ if o != target && !matches!(self[o].kind, Kind::Return { .. }) {
self.add_trigger(load, target); self.add_trigger(o, target);
broken = true;
}
}
if broken {
new_inps.push(n);
continue; continue;
} }
@ -1306,9 +1334,9 @@ impl Nodes {
cursor = next_store; cursor = next_store;
} }
'eliminate: { 'forward_store: {
if self[target].outputs.is_empty() { if self[target].outputs.is_empty() {
break 'eliminate; break 'forward_store;
} }
if self[value].kind != Kind::Load if self[value].kind != Kind::Load
@ -1317,106 +1345,121 @@ impl Nodes {
for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) { for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) {
self.add_trigger(ele, target); self.add_trigger(ele, target);
} }
break 'eliminate; break 'forward_store;
} }
let &[_, stack, last_store] = self[value].inputs.as_slice() else { let &[_, stack, last_store] = self[value].inputs.as_slice() else {
unreachable!() unreachable!()
}; };
// TODO: count othe loads to determine wether this transformation is worth it
// might be overly restricitive
// but for now, just check we are copiing the full stack allocation
if self[stack].ty != self[value].ty || self[stack].kind != Kind::Stck { if self[stack].ty != self[value].ty || self[stack].kind != Kind::Stck {
break 'eliminate; break 'forward_store;
} }
let mut unidentifed = self[stack].outputs.clone(); // pessimistic
let load_idx = unidentifed.iter().position(|&n| n == value).unwrap(); // allocation is most likely used in a loop or something so we cant get rid ot it
unidentifed.swap_remove(load_idx); if last_store != MEM
&& self[last_store]
let mut saved = Vc::default(); .outputs
let mut cursor = last_store; .iter()
let mut first_store = last_store; .any(|&n| !matches!(self[n].kind, Kind::Load | Kind::Return { .. }))
while cursor != MEM && self[cursor].kind == Kind::Stre { {
let mut contact_point = cursor; break 'forward_store;
let mut region = self[cursor].inputs[2];
if let Kind::BinOp { op } = self[region].kind {
debug_assert_matches!(op, TokenKind::Add | TokenKind::Sub);
contact_point = region;
region = self[region].inputs[1]
} }
if region != stack { let mut store_count = 0;
break; let [mut cursor, mut first_store] = [last_store; 2];
while cursor != MEM {
debug_assert_eq!(self[cursor].kind, Kind::Stre);
// pessimistic
// the offset must only be used for this store
if self[cursor].inputs[2] != stack
&& self[self[cursor].inputs[2]].outputs.as_slice() != [cursor]
{
break 'forward_store;
} }
let Some(index) = unidentifed.iter().position(|&n| n == contact_point)
else { // pessimistic
break 'eliminate; // we load from the store, this might be because the load spans multiple
}; // stores
if self[cursor].inputs[3] != MEM
&& self[self[cursor].inputs[3]].outputs.as_slice() != [cursor]
{
break 'forward_store;
}
if self[self[cursor].inputs[1]].kind == Kind::Load if self[self[cursor].inputs[1]].kind == Kind::Load
&& self[value].outputs.iter().any(|&n| { && self[value].outputs.iter().any(|&n| {
self.aclass_index(self[self[cursor].inputs[1]].inputs[1]).0 self.aclass_index(self[self[cursor].inputs[1]].inputs[1]).0
== self.aclass_index(self[n].inputs[2]).0 == self.aclass_index(self[n].inputs[2]).0
}) })
{ {
break 'eliminate; break 'forward_store;
} }
unidentifed.remove(index);
saved.push(contact_point);
first_store = cursor; first_store = cursor;
cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM); cursor = self[cursor].inputs[3];
store_count += 1;
if unidentifed.is_empty() {
break;
}
} }
if !unidentifed.is_empty() { if store_count + 1 != self[stack].outputs.len() {
break 'eliminate; debug_assert!(store_count + 1 < self[stack].outputs.len());
break 'forward_store;
} }
debug_assert_matches!( // at this potint we know the stack was initialized just to be moved into
self[last_store].kind, // different location so create new stores that store directly to the
Kind::Stre | Kind::Mem, // destination and remove the final load from this stack, that shoucl cause
"{:?}", // this stack allocation to be eliminated
self[last_store]
);
debug_assert_matches!(
self[first_store].kind,
Kind::Stre | Kind::Mem,
"{:?}",
self[first_store]
);
// FIXME: when the loads and stores become parallel we will need to get saved let mut base_store = store;
// differently if first_store != MEM {
let mut prev_store = store; debug_assert_ne!(last_store, MEM);
for mut oper in saved.into_iter().rev() { let mut cursor = first_store;
let mut region = region; loop {
if let Kind::BinOp { op } = self[oper].kind { let mut inps = self[cursor].inputs.clone();
debug_assert_eq!(self[oper].outputs.len(), 1); inps[2] = if inps[2] == stack {
debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre); region
} else {
let new_region = self.new_node( let new_region = self.new_node(
self[oper].ty, self[inps[2]].ty,
Kind::BinOp { op }, self[inps[2]].kind,
[VOID, region, self[oper].inputs[2]], [VOID, region, self[inps[2]].inputs[2]],
tys, tys,
); );
self.pass_aclass(self.aclass_index(region).1, new_region); self.pass_aclass(self.aclass_index(region).1, new_region);
region = new_region; new_region
oper = self[oper].outputs[0]; };
inps[3] = base_store;
base_store = self.new_node(self[cursor].ty, Kind::Stre, inps, tys);
if self.is_unlocked(base_store) {
self.lock(base_store);
self.queued_peeps.push(base_store);
} }
let mut inps = self[oper].inputs.clone(); if cursor == last_store {
debug_assert_eq!(inps.len(), 4); break;
inps[2] = region;
inps[3] = prev_store;
prev_store = self.new_node_nop(self[oper].ty, Kind::Stre, inps);
if self.is_unlocked(prev_store) {
self.lock(prev_store);
self.queued_peeps.push(prev_store);
} }
cursor = self[cursor].outputs[0];
} }
return Some(prev_store); for o in self[last_store].outputs.clone() {
if matches!(self[o].kind, Kind::Return { .. }) && self.is_unlocked(o) {
self.queued_peeps.push(o);
}
}
} else {
debug_assert_eq!(last_store, MEM);
}
return Some(base_store);
} }
if let Some(&load) = if let Some(&load) =
@ -1525,12 +1568,6 @@ impl Nodes {
self.remove(prev); self.remove(prev);
self.unlock(o); self.unlock(o);
for o in self[o].outputs.clone() {
if self.is_unlocked(o) {
self.lock(o);
self.queued_peeps.push(o);
}
}
self.replace(o, self[o].inputs[1]); self.replace(o, self[o].inputs[1]);
} }
} }
@ -2210,6 +2247,7 @@ impl fmt::Display for Kind {
} }
} }
#[derive(Debug)]
pub enum CondOptRes { pub enum CondOptRes {
Unknown, Unknown,
Known { value: bool, pin: Option<Nid> }, Known { value: bool, pin: Option<Nid> },

View file

@ -940,7 +940,7 @@ impl<'a> Codegen<'a> {
if self.ci.inline_depth == 0 { if self.ci.inline_depth == 0 {
debug_assert_ne!(self.ci.ctrl.get(), VOID); debug_assert_ne!(self.ci.ctrl.get(), VOID);
let mut inps = Vc::from([self.ci.ctrl.get(), value.id]); let mut inps = Vc::from([self.ci.ctrl.get(), value.id]);
for (i, aclass) in self.ci.scope.aclasses.iter_mut().enumerate() { for (i, aclass) in self.ci.scope.aclasses.iter_mut().enumerate().take(2) {
self.ci.nodes.load_loop_aclass(i, aclass, &mut self.ci.loops); self.ci.nodes.load_loop_aclass(i, aclass, &mut self.ci.loops);
if aclass.last_store.get() != MEM { if aclass.last_store.get() != MEM {
inps.push(aclass.last_store.get()); inps.push(aclass.last_store.get());
@ -1283,7 +1283,6 @@ impl<'a> Codegen<'a> {
self.ci.nodes.new_const(ty::Id::UINT, len as i64) self.ci.nodes.new_const(ty::Id::UINT, len as i64)
} }
ty::Kind::Slice(_) => { ty::Kind::Slice(_) => {
// Might change
let off = self.offset(bs.id, SLICE_LEN_OFF); let off = self.offset(bs.id, SLICE_LEN_OFF);
self.load_mem(off, ty::Id::UINT) self.load_mem(off, ty::Id::UINT)
} }
@ -1303,6 +1302,7 @@ impl<'a> Codegen<'a> {
self.tys, self.tys,
); );
self.ci.nodes.lock(len.id); self.ci.nodes.lock(len.id);
self.ci.nodes.unlock_remove(end);
let elem = match bs.ty.expand() { let elem = match bs.ty.expand() {
ty::Kind::Slice(s) => self.tys.ins.slices[s].elem, ty::Kind::Slice(s) => self.tys.ins.slices[s].elem,
@ -1333,6 +1333,7 @@ impl<'a> Codegen<'a> {
}; };
ptr.id = self.offset_ptr(ptr.id, elem, start).id; ptr.id = self.offset_ptr(ptr.id, elem, start).id;
ptr.ty = self.tys.make_ptr(elem); ptr.ty = self.tys.make_ptr(elem);
self.ci.nodes.unlock_remove(start);
self.ci.nodes.lock(ptr.id); self.ci.nodes.lock(ptr.id);
@ -1348,10 +1349,8 @@ impl<'a> Codegen<'a> {
let region = self.offset(mem, off); let region = self.offset(mem, off);
self.store_mem(region, value.ty, value.id); self.store_mem(region, value.ty, value.id);
self.ci.nodes.unlock(start); self.ci.nodes.unlock_remove(len.id);
self.ci.nodes.unlock(len.id); self.ci.nodes.unlock_remove(ptr.id);
self.ci.nodes.unlock(end);
self.ci.nodes.unlock(ptr.id);
Some(Value::ptr(mem).ty(ty)) Some(Value::ptr(mem).ty(ty))
} }
Expr::Index { base, index } => { Expr::Index { base, index } => {
@ -3550,7 +3549,6 @@ impl<'a> Codegen<'a> {
let res = self.ci.nodes.try_match_cond(id); let res = self.ci.nodes.try_match_cond(id);
// TODO: highlight the pin position
let msg = match (kind, res) { let msg = match (kind, res) {
(AK::UnwrapCheck, CR::Known { value: false, .. }) => { (AK::UnwrapCheck, CR::Known { value: false, .. }) => {
"unwrap is not needed since the value is (provably) never null, \ "unwrap is not needed since the value is (provably) never null, \
@ -3569,7 +3567,7 @@ impl<'a> Codegen<'a> {
or explicitly check for null and handle it \ or explicitly check for null and handle it \
('if <opt> == null { /* handle */ } else { /* use opt */ }')" ('if <opt> == null { /* handle */ } else { /* use opt */ }')"
} }
_ => unreachable!(), v => unreachable!("{v:?} {id}"),
}; };
self.error(pos, msg); self.error(pos, msg);
} }
@ -3729,7 +3727,6 @@ impl<'a> Codegen<'a> {
let oty = mem::replace(&mut opt.ty, ty); let oty = mem::replace(&mut opt.ty, ty);
self.unwrap_opt_unchecked(ty, oty, opt); self.unwrap_opt_unchecked(ty, oty, opt);
// TODO: extract the if check int a fucntion
let ass = self.ci.nodes.new_node_nop(oty, Kind::Assert { kind, pos }, [ let ass = self.ci.nodes.new_node_nop(oty, Kind::Assert { kind, pos }, [
self.ci.ctrl.get(), self.ci.ctrl.get(),
null_check, null_check,
@ -4465,6 +4462,7 @@ mod tests {
fb_driver; fb_driver;
// Purely Testing Examples; // Purely Testing Examples;
len_never_goes_down;
slice_to_global_pointer; slice_to_global_pointer;
subsclice_bug; subsclice_bug;
string_array; string_array;

View file

@ -12,6 +12,7 @@ main:
0: ST r0, r32, 0a, 8h 0: ST r0, r32, 0a, 8h
LD r33, r32, 0a, 8h LD r33, r32, 0a, 8h
JEQ r33, r0, :2 JEQ r33, r0, :2
ST r0, r32, 8a, 8h
LI64 r32, 200d LI64 r32, 200d
CP r1, r32 CP r1, r32
JMP :1 JMP :1
@ -66,6 +67,6 @@ new_stru:
LD r1, r254, 0a, 16h LD r1, r254, 0a, 16h
ADDI64 r254, r254, 16d ADDI64 r254, r254, 16d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 655 code size: 668
ret: 0 ret: 0
status: Ok(()) status: Ok(())

View file

@ -0,0 +1,49 @@
chars:
ADDI64 r254, r254, -32d
ST r3, r254, 16a, 16h
ADDI64 r3, r254, 16d
CP r13, r3
ADDI64 r14, r254, 0d
BMC r13, r14, 16h
LD r1, r14, 0a, 16h
ADDI64 r254, r254, 32d
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -56d
ST r31, r254, 32a, 24h
LRA r32, r0, :Hello, World!
ST r32, r254, 16a, 8h
LI64 r32, 13d
ST r32, r254, 24a, 8h
ADDI64 r32, r254, 0d
LD r3, r254, 16a, 16h
JAL r31, r0, :chars
ST r1, r32, 0a, 16h
2: CP r2, r32
JAL r31, r0, :next
CP r33, r1
ANDI r33, r33, 65535d
JNE r33, r0, :0
JMP :1
0: JMP :2
1: LD r31, r254, 32a, 24h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a
next:
CP r13, r2
LD r14, r13, 8a, 8h
JNE r14, r0, :0
CP r1, r0
JMP :1
0: LD r15, r13, 0a, 8h
ADDI64 r15, r15, 1d
ST r15, r13, 0a, 8h
ADDI64 r14, r14, -1d
LD r15, r15, 0a, 1h
ST r14, r13, 8a, 8h
ORI r13, r15, 32768d
CP r1, r13
1: JALA r0, r31, 0a
code size: 423
ret: 0
status: Ok(())

View file

@ -3,8 +3,8 @@ decide:
CP r1, r13 CP r1, r13
JALA r0, r31, 0a JALA r0, r31, 0a
main: main:
ADDI64 r254, r254, -144d ADDI64 r254, r254, -136d
ST r31, r254, 96a, 48h ST r31, r254, 96a, 40h
JAL r31, r0, :decide JAL r31, r0, :decide
CP r33, r0 CP r33, r0
ADDI64 r34, r254, 88d ADDI64 r34, r254, 88d
@ -14,9 +14,7 @@ main:
CP r32, r33 CP r32, r33
JMP :1 JMP :1
0: CP r32, r34 0: CP r32, r34
1: LI64 r35, 1d 1: JNE r32, r33, :2
ST r35, r254, 88a, 8h
JNE r32, r33, :2
LI64 r32, 9001d LI64 r32, 9001d
CP r1, r32 CP r1, r32
JMP :3 JMP :3
@ -53,15 +51,15 @@ main:
9: ADDI64 r33, r254, 56d 9: ADDI64 r33, r254, 56d
JAL r31, r0, :new_foo JAL r31, r0, :new_foo
ST r1, r33, 0a, 16h ST r1, r33, 0a, 16h
LD r36, r254, 56a, 8h LD r35, r254, 56a, 8h
JNE r36, r0, :10 JNE r35, r0, :10
LI64 r32, 999d LI64 r32, 999d
CP r1, r32 CP r1, r32
JMP :3 JMP :3
10: LRA r36, r0, :"foo\0" 10: LRA r35, r0, :"foo\0"
ST r36, r254, 40a, 8h ST r35, r254, 40a, 8h
LI64 r36, 4d LI64 r35, 4d
ST r36, r254, 48a, 8h ST r35, r254, 48a, 8h
LD r2, r33, 0a, 16h LD r2, r33, 0a, 16h
LD r4, r254, 40a, 16h LD r4, r254, 40a, 16h
JAL r31, r0, :use_foo JAL r31, r0, :use_foo
@ -69,12 +67,14 @@ main:
JAL r31, r0, :no_foo JAL r31, r0, :no_foo
ST r1, r33, 0a, 16h ST r1, r33, 0a, 16h
JAL r31, r0, :decide JAL r31, r0, :decide
CP r36, r1 CP r35, r1
ANDI r36, r36, 255d ANDI r35, r35, 255d
JNE r36, r0, :11 JNE r35, r0, :11
JMP :12 JMP :12
11: ST r34, r254, 0a, 8h 11: ST r34, r254, 0a, 8h
LI64 r35, 1d
ST r35, r254, 8a, 8h ST r35, r254, 8a, 8h
ST r35, r254, 88a, 8h
12: LD r35, r254, 0a, 8h 12: LD r35, r254, 0a, 8h
JNE r35, r0, :13 JNE r35, r0, :13
LI64 r32, 34d LI64 r32, 34d
@ -101,8 +101,8 @@ main:
ANDI r32, r32, 65535d ANDI r32, r32, 65535d
SUB64 r32, r32, r33 SUB64 r32, r32, r33
CP r1, r32 CP r1, r32
3: LD r31, r254, 96a, 48h 3: LD r31, r254, 96a, 40h
ADDI64 r254, r254, 144d ADDI64 r254, r254, 136d
JALA r0, r31, 0a JALA r0, r31, 0a
new_bar: new_bar:
ADDI64 r254, r254, -24d ADDI64 r254, r254, -24d