i am not useless after all, the invalid store elimination removed

Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
Jakub Doka 2024-12-25 20:27:16 +01:00
parent 3491814b4f
commit 5c8f7c9c79
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
6 changed files with 220 additions and 111 deletions

View file

@ -798,6 +798,29 @@ main := fn(): uint {
### Purely Testing Examples
#### len_never_goes_down
```hb
chars := fn(iter: []u8): struct {
str: []u8,
next := fn(self: ^Self): ?u8 {
if self.str.len == 0 return null
self.str = self.str[1..]
return self.str[0]
}
} {
return .(iter)
}
main := fn(): void {
a := chars("Hello, World!")
loop {
char := a.next()
if char == null break
}
}
```
#### slice_to_global_pointer
```hb
a := @as(^u8, @bitcast(0))

View file

@ -4,7 +4,7 @@ use {
debug,
lexer::{self, TokenKind},
parser::Pos,
ty::{self, Loc, Types},
ty::{self, Loc, Offset, Types},
utils::{BitSet, Vc},
},
alloc::{string::String, vec::Vec},
@ -712,6 +712,7 @@ impl Nodes {
if let Some((entry, hash)) = lookup_meta {
entry.insert(crate::ctx_map::Key { value: free, hash }, ());
}
free
}
@ -804,17 +805,28 @@ impl Nodes {
self.iter()
.filter_map(|(id, node)| node.kind.is_peeped().then_some(id))
.collect_into(stack);
stack.iter().for_each(|&s| self.lock(s));
stack.iter().for_each(|&s| {
debug_assert!(self.is_unlocked(s));
self.lock(s)
});
while fuel != 0
&& let Some(node) = stack.pop()
{
fuel -= 1;
if self[node].outputs.is_empty() {
self.push_adjacent_nodes(node, stack);
}
debug_assert_eq!(self[node].lock_rc.get(), 1, "{:?} {}", self[node], node);
if self.unlock_remove(node) {
continue;
}
debug_assert!(!self[node].outputs.is_empty(), "{:?} {}", self[node], node);
if let Some(new) = self.peephole(node, tys) {
self.replace(node, new);
self.push_adjacent_nodes(new, stack);
@ -830,7 +842,6 @@ impl Nodes {
}
debug_assert!(self.queued_peeps.is_empty());
stack.drain(..).for_each(|s| _ = self.unlock_remove(s));
}
@ -851,7 +862,19 @@ impl Nodes {
}
self[of].peep_triggers = Vc::default();
stack.iter().skip(prev_len).for_each(|&n| self.lock(n));
let mut i = 0;
stack.retain(|&n| {
if i < prev_len {
i += 1;
return true;
}
if self.is_unlocked(n) {
self.lock(n);
true
} else {
false
}
});
}
pub fn aclass_index(&self, region: Nid) -> (usize, Nid) {
@ -1153,10 +1176,15 @@ impl Nodes {
continue;
}
if let Some(&load) =
self[n].outputs.iter().find(|&&n| self[n].kind == Kind::Load)
{
self.add_trigger(load, target);
let mut broken = false;
for o in self[n].outputs.clone() {
if o != target && !matches!(self[o].kind, Kind::Return { .. }) {
self.add_trigger(o, target);
broken = true;
}
}
if broken {
new_inps.push(n);
continue;
}
@ -1306,9 +1334,9 @@ impl Nodes {
cursor = next_store;
}
'eliminate: {
'forward_store: {
if self[target].outputs.is_empty() {
break 'eliminate;
break 'forward_store;
}
if self[value].kind != Kind::Load
@ -1317,106 +1345,121 @@ impl Nodes {
for &ele in self[value].outputs.clone().iter().filter(|&&n| n != target) {
self.add_trigger(ele, target);
}
break 'eliminate;
break 'forward_store;
}
let &[_, stack, last_store] = self[value].inputs.as_slice() else {
unreachable!()
};
// TODO: count othe loads to determine wether this transformation is worth it
// might be overly restricitive
// but for now, just check we are copiing the full stack allocation
if self[stack].ty != self[value].ty || self[stack].kind != Kind::Stck {
break 'eliminate;
break 'forward_store;
}
let mut unidentifed = self[stack].outputs.clone();
let load_idx = unidentifed.iter().position(|&n| n == value).unwrap();
unidentifed.swap_remove(load_idx);
let mut saved = Vc::default();
let mut cursor = last_store;
let mut first_store = last_store;
while cursor != MEM && self[cursor].kind == Kind::Stre {
let mut contact_point = cursor;
let mut region = self[cursor].inputs[2];
if let Kind::BinOp { op } = self[region].kind {
debug_assert_matches!(op, TokenKind::Add | TokenKind::Sub);
contact_point = region;
region = self[region].inputs[1]
// pessimistic
// allocation is most likely used in a loop or something so we cant get rid ot it
if last_store != MEM
&& self[last_store]
.outputs
.iter()
.any(|&n| !matches!(self[n].kind, Kind::Load | Kind::Return { .. }))
{
break 'forward_store;
}
if region != stack {
break;
let mut store_count = 0;
let [mut cursor, mut first_store] = [last_store; 2];
while cursor != MEM {
debug_assert_eq!(self[cursor].kind, Kind::Stre);
// pessimistic
// the offset must only be used for this store
if self[cursor].inputs[2] != stack
&& self[self[cursor].inputs[2]].outputs.as_slice() != [cursor]
{
break 'forward_store;
}
let Some(index) = unidentifed.iter().position(|&n| n == contact_point)
else {
break 'eliminate;
};
// pessimistic
// we load from the store, this might be because the load spans multiple
// stores
if self[cursor].inputs[3] != MEM
&& self[self[cursor].inputs[3]].outputs.as_slice() != [cursor]
{
break 'forward_store;
}
if self[self[cursor].inputs[1]].kind == Kind::Load
&& self[value].outputs.iter().any(|&n| {
self.aclass_index(self[self[cursor].inputs[1]].inputs[1]).0
== self.aclass_index(self[n].inputs[2]).0
})
{
break 'eliminate;
break 'forward_store;
}
unidentifed.remove(index);
saved.push(contact_point);
first_store = cursor;
cursor = *self[cursor].inputs.get(3).unwrap_or(&MEM);
if unidentifed.is_empty() {
break;
}
cursor = self[cursor].inputs[3];
store_count += 1;
}
if !unidentifed.is_empty() {
break 'eliminate;
if store_count + 1 != self[stack].outputs.len() {
debug_assert!(store_count + 1 < self[stack].outputs.len());
break 'forward_store;
}
debug_assert_matches!(
self[last_store].kind,
Kind::Stre | Kind::Mem,
"{:?}",
self[last_store]
);
debug_assert_matches!(
self[first_store].kind,
Kind::Stre | Kind::Mem,
"{:?}",
self[first_store]
);
// at this potint we know the stack was initialized just to be moved into
// different location so create new stores that store directly to the
// destination and remove the final load from this stack, that shoucl cause
// this stack allocation to be eliminated
// FIXME: when the loads and stores become parallel we will need to get saved
// differently
let mut prev_store = store;
for mut oper in saved.into_iter().rev() {
let mut region = region;
if let Kind::BinOp { op } = self[oper].kind {
debug_assert_eq!(self[oper].outputs.len(), 1);
debug_assert_eq!(self[self[oper].outputs[0]].kind, Kind::Stre);
let mut base_store = store;
if first_store != MEM {
debug_assert_ne!(last_store, MEM);
let mut cursor = first_store;
loop {
let mut inps = self[cursor].inputs.clone();
inps[2] = if inps[2] == stack {
region
} else {
let new_region = self.new_node(
self[oper].ty,
Kind::BinOp { op },
[VOID, region, self[oper].inputs[2]],
self[inps[2]].ty,
self[inps[2]].kind,
[VOID, region, self[inps[2]].inputs[2]],
tys,
);
self.pass_aclass(self.aclass_index(region).1, new_region);
region = new_region;
oper = self[oper].outputs[0];
new_region
};
inps[3] = base_store;
base_store = self.new_node(self[cursor].ty, Kind::Stre, inps, tys);
if self.is_unlocked(base_store) {
self.lock(base_store);
self.queued_peeps.push(base_store);
}
let mut inps = self[oper].inputs.clone();
debug_assert_eq!(inps.len(), 4);
inps[2] = region;
inps[3] = prev_store;
prev_store = self.new_node_nop(self[oper].ty, Kind::Stre, inps);
if self.is_unlocked(prev_store) {
self.lock(prev_store);
self.queued_peeps.push(prev_store);
if cursor == last_store {
break;
}
cursor = self[cursor].outputs[0];
}
return Some(prev_store);
for o in self[last_store].outputs.clone() {
if matches!(self[o].kind, Kind::Return { .. }) && self.is_unlocked(o) {
self.queued_peeps.push(o);
}
}
} else {
debug_assert_eq!(last_store, MEM);
}
return Some(base_store);
}
if let Some(&load) =
@ -1525,12 +1568,6 @@ impl Nodes {
self.remove(prev);
self.unlock(o);
for o in self[o].outputs.clone() {
if self.is_unlocked(o) {
self.lock(o);
self.queued_peeps.push(o);
}
}
self.replace(o, self[o].inputs[1]);
}
}
@ -2210,6 +2247,7 @@ impl fmt::Display for Kind {
}
}
#[derive(Debug)]
pub enum CondOptRes {
Unknown,
Known { value: bool, pin: Option<Nid> },

View file

@ -940,7 +940,7 @@ impl<'a> Codegen<'a> {
if self.ci.inline_depth == 0 {
debug_assert_ne!(self.ci.ctrl.get(), VOID);
let mut inps = Vc::from([self.ci.ctrl.get(), value.id]);
for (i, aclass) in self.ci.scope.aclasses.iter_mut().enumerate() {
for (i, aclass) in self.ci.scope.aclasses.iter_mut().enumerate().take(2) {
self.ci.nodes.load_loop_aclass(i, aclass, &mut self.ci.loops);
if aclass.last_store.get() != MEM {
inps.push(aclass.last_store.get());
@ -1283,7 +1283,6 @@ impl<'a> Codegen<'a> {
self.ci.nodes.new_const(ty::Id::UINT, len as i64)
}
ty::Kind::Slice(_) => {
// Might change
let off = self.offset(bs.id, SLICE_LEN_OFF);
self.load_mem(off, ty::Id::UINT)
}
@ -1303,6 +1302,7 @@ impl<'a> Codegen<'a> {
self.tys,
);
self.ci.nodes.lock(len.id);
self.ci.nodes.unlock_remove(end);
let elem = match bs.ty.expand() {
ty::Kind::Slice(s) => self.tys.ins.slices[s].elem,
@ -1333,6 +1333,7 @@ impl<'a> Codegen<'a> {
};
ptr.id = self.offset_ptr(ptr.id, elem, start).id;
ptr.ty = self.tys.make_ptr(elem);
self.ci.nodes.unlock_remove(start);
self.ci.nodes.lock(ptr.id);
@ -1348,10 +1349,8 @@ impl<'a> Codegen<'a> {
let region = self.offset(mem, off);
self.store_mem(region, value.ty, value.id);
self.ci.nodes.unlock(start);
self.ci.nodes.unlock(len.id);
self.ci.nodes.unlock(end);
self.ci.nodes.unlock(ptr.id);
self.ci.nodes.unlock_remove(len.id);
self.ci.nodes.unlock_remove(ptr.id);
Some(Value::ptr(mem).ty(ty))
}
Expr::Index { base, index } => {
@ -3550,7 +3549,6 @@ impl<'a> Codegen<'a> {
let res = self.ci.nodes.try_match_cond(id);
// TODO: highlight the pin position
let msg = match (kind, res) {
(AK::UnwrapCheck, CR::Known { value: false, .. }) => {
"unwrap is not needed since the value is (provably) never null, \
@ -3569,7 +3567,7 @@ impl<'a> Codegen<'a> {
or explicitly check for null and handle it \
('if <opt> == null { /* handle */ } else { /* use opt */ }')"
}
_ => unreachable!(),
v => unreachable!("{v:?} {id}"),
};
self.error(pos, msg);
}
@ -3729,7 +3727,6 @@ impl<'a> Codegen<'a> {
let oty = mem::replace(&mut opt.ty, ty);
self.unwrap_opt_unchecked(ty, oty, opt);
// TODO: extract the if check int a fucntion
let ass = self.ci.nodes.new_node_nop(oty, Kind::Assert { kind, pos }, [
self.ci.ctrl.get(),
null_check,
@ -4465,6 +4462,7 @@ mod tests {
fb_driver;
// Purely Testing Examples;
len_never_goes_down;
slice_to_global_pointer;
subsclice_bug;
string_array;

View file

@ -12,6 +12,7 @@ main:
0: ST r0, r32, 0a, 8h
LD r33, r32, 0a, 8h
JEQ r33, r0, :2
ST r0, r32, 8a, 8h
LI64 r32, 200d
CP r1, r32
JMP :1
@ -66,6 +67,6 @@ new_stru:
LD r1, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 655
code size: 668
ret: 0
status: Ok(())

View file

@ -0,0 +1,49 @@
chars:
ADDI64 r254, r254, -32d
ST r3, r254, 16a, 16h
ADDI64 r3, r254, 16d
CP r13, r3
ADDI64 r14, r254, 0d
BMC r13, r14, 16h
LD r1, r14, 0a, 16h
ADDI64 r254, r254, 32d
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -56d
ST r31, r254, 32a, 24h
LRA r32, r0, :Hello, World!
ST r32, r254, 16a, 8h
LI64 r32, 13d
ST r32, r254, 24a, 8h
ADDI64 r32, r254, 0d
LD r3, r254, 16a, 16h
JAL r31, r0, :chars
ST r1, r32, 0a, 16h
2: CP r2, r32
JAL r31, r0, :next
CP r33, r1
ANDI r33, r33, 65535d
JNE r33, r0, :0
JMP :1
0: JMP :2
1: LD r31, r254, 32a, 24h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a
next:
CP r13, r2
LD r14, r13, 8a, 8h
JNE r14, r0, :0
CP r1, r0
JMP :1
0: LD r15, r13, 0a, 8h
ADDI64 r15, r15, 1d
ST r15, r13, 0a, 8h
ADDI64 r14, r14, -1d
LD r15, r15, 0a, 1h
ST r14, r13, 8a, 8h
ORI r13, r15, 32768d
CP r1, r13
1: JALA r0, r31, 0a
code size: 423
ret: 0
status: Ok(())

View file

@ -3,8 +3,8 @@ decide:
CP r1, r13
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -144d
ST r31, r254, 96a, 48h
ADDI64 r254, r254, -136d
ST r31, r254, 96a, 40h
JAL r31, r0, :decide
CP r33, r0
ADDI64 r34, r254, 88d
@ -14,9 +14,7 @@ main:
CP r32, r33
JMP :1
0: CP r32, r34
1: LI64 r35, 1d
ST r35, r254, 88a, 8h
JNE r32, r33, :2
1: JNE r32, r33, :2
LI64 r32, 9001d
CP r1, r32
JMP :3
@ -53,15 +51,15 @@ main:
9: ADDI64 r33, r254, 56d
JAL r31, r0, :new_foo
ST r1, r33, 0a, 16h
LD r36, r254, 56a, 8h
JNE r36, r0, :10
LD r35, r254, 56a, 8h
JNE r35, r0, :10
LI64 r32, 999d
CP r1, r32
JMP :3
10: LRA r36, r0, :"foo\0"
ST r36, r254, 40a, 8h
LI64 r36, 4d
ST r36, r254, 48a, 8h
10: LRA r35, r0, :"foo\0"
ST r35, r254, 40a, 8h
LI64 r35, 4d
ST r35, r254, 48a, 8h
LD r2, r33, 0a, 16h
LD r4, r254, 40a, 16h
JAL r31, r0, :use_foo
@ -69,12 +67,14 @@ main:
JAL r31, r0, :no_foo
ST r1, r33, 0a, 16h
JAL r31, r0, :decide
CP r36, r1
ANDI r36, r36, 255d
JNE r36, r0, :11
CP r35, r1
ANDI r35, r35, 255d
JNE r35, r0, :11
JMP :12
11: ST r34, r254, 0a, 8h
LI64 r35, 1d
ST r35, r254, 8a, 8h
ST r35, r254, 88a, 8h
12: LD r35, r254, 0a, 8h
JNE r35, r0, :13
LI64 r32, 34d
@ -101,8 +101,8 @@ main:
ANDI r32, r32, 65535d
SUB64 r32, r32, r33
CP r1, r32
3: LD r31, r254, 96a, 48h
ADDI64 r254, r254, 144d
3: LD r31, r254, 96a, 40h
ADDI64 r254, r254, 136d
JALA r0, r31, 0a
new_bar:
ADDI64 r254, r254, -24d