implementing the loop iteration optimization

the multiplication and addition to a pointer is replaced with simply
incremrnting the pointer it self

Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
Jakub Doka 2024-12-15 19:37:37 +01:00
parent 5a7a01ca02
commit 7837eeb90d
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
3 changed files with 115 additions and 89 deletions

View file

@ -1068,47 +1068,72 @@ impl Nodes {
return Some(self.new_node(ty, K::BinOp { op }, [ctrl, a, new_rhs], tys));
}
//if op == T::Add
// && self[rhs].kind == (K::BinOp { op: T::Mul })
// && let &[_, mul_lhs, mul_rhs] = self[rhs].inputs.as_slice()
// && self[mul_lhs].kind == K::Phi
// && let &[phi_ctrl, phi_lhs, phi_rhs] = self[mul_lhs].inputs.as_slice()
// && phi_rhs != VOID
// && self[phi_ctrl].kind == K::Loop
// && self[phi_rhs].kind == (K::BinOp { op: T::Add })
// && self[phi_rhs].inputs[1] == mul_lhs
//{
// debug_assert_eq!(
// self[mul_lhs].outputs.len(),
// 1,
// "{:?}",
// self[mul_lhs]
// .outputs
// .iter()
// .map(|&n| (
// &self[n].kind,
// self[n].inputs.iter().position(|&n| n == mul_lhs)
// ))
// .collect::<Vec<_>>()
// );
if op == T::Add
&& self[rhs].kind == (K::BinOp { op: T::Mul })
&& let &[_, index, step] = self[rhs].inputs.as_slice()
&& self[index].kind == K::Phi
&& let &[iter_loop, index_init, new_index] = self[index].inputs.as_slice()
&& new_index != VOID
&& self[iter_loop].kind == K::Loop
&& self[new_index].kind == (K::BinOp { op: T::Add })
&& self[new_index].inputs[1] == index
&& let Some(&iter_cond) = self[index].outputs.iter().find(
|&&n| matches!(self[n].kind, Kind::BinOp { op } if op.is_compatison()),
)
&& self[index].outputs.iter().all(|n| [iter_cond, rhs, new_index].contains(n))
{
// arr := @as([u32; 10], idk)
//
// i := 0
// loop if i == 10 break else {
// arr[i] = 0
// i += 1
// }
//
// |||||
// VVVVV
//
// cursor := &arr[0] + 0
// end := &arr[0] + 10
// loop if cursor == end else {
// *cursor = 0
// i += 1
// }
// let init_shift = self.new_node(
// self[rhs].ty,
// K::BinOp { op: T::Mul },
// [ctrl, phi_lhs, mul_rhs],
// tys,
// );
// let init =
// self.new_node(ty, K::BinOp { op: T::Add }, [ctrl, lhs, init_shift], tys);
// let new_value = self.new_node_nop(ty, K::Phi, [phi_ctrl, init, 0]);
// let next = self.new_node(
// ty,
// Kind::BinOp { op: T::Add },
// [ctrl, new_value, mul_rhs],
// tys,
// );
// return Some(self.modify_input(new_value, 2, next));
//}
debug_assert!(self[iter_cond].inputs.contains(&index));
let iter_bound_index =
self[iter_cond].inputs.iter().rposition(|&n| n != index).unwrap();
debug_assert_ne!(iter_bound_index, 0);
let end_shift = self.new_node(
self[rhs].ty,
K::BinOp { op: T::Mul },
[ctrl, self[iter_cond].inputs[iter_bound_index], step],
tys,
);
let end =
self.new_node(ty, K::BinOp { op: T::Add }, [ctrl, lhs, end_shift], tys);
let init_shift = self.new_node(
self[rhs].ty,
K::BinOp { op: T::Mul },
[ctrl, index_init, step],
tys,
);
let init =
self.new_node(ty, K::BinOp { op: T::Add }, [ctrl, lhs, init_shift], tys);
let new_value = self.new_node_nop(ty, K::Phi, [iter_loop, init, 0]);
let next =
self.new_node(ty, Kind::BinOp { op: T::Add }, [ctrl, new_value, step], tys);
let mut new_cond_inputs = self[iter_cond].inputs.clone();
new_cond_inputs[iter_bound_index] = end;
new_cond_inputs[3 - iter_bound_index] = new_value;
let new_cond = self.new_node(ty, self[iter_cond].kind, new_cond_inputs, tys);
self.replace(iter_cond, new_cond);
return Some(self.modify_input(new_value, 2, next));
}
if changed {
return Some(self.new_node(ty, self[target].kind, [ctrl, lhs, rhs], tys));
@ -1268,6 +1293,14 @@ impl Nodes {
return Some(lhs);
}
// TODO: travese the graph downward and chech if this phi is only consumed by it
// self
if self[target].outputs.as_slice() == [rhs]
&& self[rhs].outputs.as_slice() == [target]
{
return Some(lhs);
}
if self[lhs].kind == Kind::Stre
&& self[rhs].kind == Kind::Stre
&& self[lhs].ty == self[rhs].ty

View file

@ -1,36 +1,35 @@
main:
ADDI64 r254, r254, -104d
ST r31, r254, 48a, 56h
ADDI64 r254, r254, -88d
ST r31, r254, 48a, 40h
LRA r32, r0, :glob_stru
JAL r31, r0, :new_stru
ST r1, r32, 0a, 16h
CP r33, r0
LD r34, r32, 0a, 8h
JEQ r34, r33, :0
LD r33, r32, 0a, 8h
JEQ r33, r0, :0
LI64 r32, 300d
CP r1, r32
JMP :1
0: ST r33, r32, 0a, 8h
LD r34, r32, 0a, 8h
JEQ r34, r33, :2
0: ST r0, r32, 0a, 8h
LD r33, r32, 0a, 8h
JEQ r33, r0, :2
LI64 r32, 200d
CP r1, r32
JMP :1
2: LI64 r34, 1d
ST r34, r32, 0a, 8h
ST r34, r32, 8a, 8h
ADDI64 r35, r254, 0d
ADDI64 r33, r254, 0d
ST r34, r254, 0a, 8h
ST r34, r254, 8a, 8h
ST r34, r254, 16a, 8h
ST r34, r254, 24a, 8h
ST r34, r254, 32a, 8h
ST r34, r254, 40a, 8h
LI64 r36, 3d
ADDI64 r35, r33, 48d
CP r32, r33
8: JNE r32, r36, :3
8: JNE r35, r32, :3
LD r32, r254, 32a, 8h
JEQ r32, r33, :4
JEQ r32, r0, :4
LI64 r32, 100d
CP r1, r32
JMP :1
@ -41,28 +40,25 @@ main:
ST r34, r254, 32a, 8h
ST r34, r254, 40a, 8h
CP r32, r33
7: LD r37, r254, 32a, 8h
JNE r32, r36, :5
JEQ r37, r33, :6
7: LD r34, r254, 32a, 8h
JNE r35, r32, :5
JEQ r34, r0, :6
LI64 r32, 10d
CP r1, r32
JMP :1
6: CP r1, r33
6: CP r1, r0
JMP :1
5: MULI64 r37, r32, 16d
ADD64 r37, r35, r37
ST r33, r37, 0a, 8h
ST r33, r37, 8a, 8h
ADD64 r32, r32, r34
5: ADDI64 r34, r32, 16d
ST r0, r32, 0a, 8h
ST r0, r32, 8a, 8h
CP r32, r34
JMP :7
3: MULI64 r37, r32, 16d
ADD64 r37, r35, r37
JAL r31, r0, :new_stru
ST r1, r37, 0a, 16h
ADD64 r32, r32, r34
3: JAL r31, r0, :new_stru
ST r1, r32, 0a, 16h
ADDI64 r32, r32, 16d
JMP :8
1: LD r31, r254, 48a, 56h
ADDI64 r254, r254, 104d
1: LD r31, r254, 48a, 40h
ADDI64 r254, r254, 88d
JALA r0, r31, 0a
new_stru:
ADDI64 r254, r254, -16d
@ -71,6 +67,6 @@ new_stru:
LD r1, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 673
code size: 658
ret: 0
status: Ok(())

View file

@ -1,31 +1,28 @@
main:
ADDI64 r254, r254, -10240d
LI64 r14, 1d
LI8 r16, 64b
LI64 r17, 1024d
LI8 r15, 64b
LI64 r16, 1024d
CP r13, r0
ADDI64 r18, r254, 0d
4: JLTU r13, r17, :0
LI64 r16, 10d
CP r13, r14
3: LD r15, r254, 2048a, 1h
JLTU r13, r16, :1
ANDI r13, r15, 255d
ADDI64 r17, r254, 0d
4: JLTU r13, r16, :0
ADDI64 r13, r17, 1024d
ADDI64 r15, r17, 10240d
3: LD r14, r254, 2048a, 1h
JLTU r13, r15, :1
ANDI r13, r14, 255d
CP r1, r13
JMP :2
1: ADD64 r15, r13, r14
MULI64 r13, r13, 1024d
ADD64 r13, r18, r13
BMC r18, r13, 1024h
CP r13, r15
1: ADDI64 r14, r13, 1024d
BMC r17, r13, 1024h
CP r13, r14
JMP :3
0: ADD64 r15, r13, r14
ADD64 r13, r18, r13
ST r16, r13, 0a, 1h
CP r13, r15
0: ADDI64 r14, r13, 1d
ADD64 r13, r17, r13
ST r15, r13, 0a, 1h
CP r13, r14
JMP :4
2: ADDI64 r254, r254, 10240d
JALA r0, r31, 0a
code size: 194
code size: 192
ret: 64
status: Ok(())