appliing late peepholes

This commit is contained in:
Jakub Doka 2024-10-26 10:45:50 +02:00
parent b62413046d
commit 9095af6d84
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
7 changed files with 149 additions and 120 deletions

View file

@ -23,10 +23,10 @@ use {
cell::RefCell, cell::RefCell,
fmt::{self, Debug, Display, Write}, fmt::{self, Debug, Display, Write},
format_args as fa, mem, format_args as fa, mem,
ops::{self, Deref}, ops::{self, Deref, Not},
}, },
hashbrown::hash_map, hashbrown::hash_map,
hbbytecode::DisasmError, hbbytecode::{st, DisasmError},
regalloc2::VReg, regalloc2::VReg,
}; };
@ -279,6 +279,36 @@ impl Nodes {
true true
} }
fn late_peephole(&mut self, target: Nid) -> Option<Nid> {
if let Some(id) = self.peephole(target) {
self.replace(target, id);
return Some(id);
}
None
}
fn iter_peeps(&mut self, mut fuel: usize) {
let mut in_stack = BitSet::default();
in_stack.clear(self.values.len());
let mut stack =
self.iter().map(|(id, ..)| id).inspect(|&id| _ = in_stack.set(id)).collect::<Vec<_>>();
while fuel != 0
&& let Some(node) = stack.pop()
{
fuel -= 1;
in_stack.unset(node);
let new = self.late_peephole(node);
if let Some(new) = new {
for &i in self[new].outputs.iter().chain(self[new].inputs.iter()) {
if in_stack.set(i) {
stack.push(i)
}
}
}
}
}
fn peephole(&mut self, target: Nid) -> Option<Nid> { fn peephole(&mut self, target: Nid) -> Option<Nid> {
use {Kind as K, TokenKind as T}; use {Kind as K, TokenKind as T};
match self[target].kind { match self[target].kind {
@ -703,14 +733,6 @@ impl Nodes {
} }
} }
fn late_peephole(&mut self, target: Nid) -> Nid {
if let Some(id) = self.peephole(target) {
self.replace(target, id);
return id;
}
target
}
fn load_loop_var(&mut self, index: usize, value: &mut Variable, loops: &mut [Loop]) { fn load_loop_var(&mut self, index: usize, value: &mut Variable, loops: &mut [Loop]) {
self.load_loop_value(&mut |l| l.scope.iter_mut().nth(index).unwrap(), value, loops); self.load_loop_value(&mut |l| l.scope.iter_mut().nth(index).unwrap(), value, loops);
} }
@ -1182,6 +1204,7 @@ impl ItemCtx {
self.nodes.unlock(ENTRY); self.nodes.unlock(ENTRY);
self.nodes.unlock(MEM); self.nodes.unlock(MEM);
self.nodes.eliminate_stack_temporaries(); self.nodes.eliminate_stack_temporaries();
self.nodes.iter_peeps(1000);
} }
fn emit(&mut self, instr: (usize, [u8; instrs::MAX_SIZE])) { fn emit(&mut self, instr: (usize, [u8; instrs::MAX_SIZE])) {
@ -1892,7 +1915,7 @@ impl<'a> Codegen<'a> {
} }
let store = self.ci.nodes.new_node_nop(ty, Kind::Stre, vc); let store = self.ci.nodes.new_node_nop(ty, Kind::Stre, vc);
self.ci.scope.store.set_value(store, &mut self.ci.nodes); self.ci.scope.store.set_value(store, &mut self.ci.nodes);
let opted = self.ci.nodes.late_peephole(store); let opted = self.ci.nodes.late_peephole(store).unwrap_or(store);
self.ci.scope.store.set_value_remove(opted, &mut self.ci.nodes); self.ci.scope.store.set_value_remove(opted, &mut self.ci.nodes);
opted opted
} }
@ -2954,7 +2977,7 @@ impl<'a> Codegen<'a> {
self.ci.nodes.unlock(self.ci.ctrl); self.ci.nodes.unlock(self.ci.ctrl);
self.ci.nodes.unlock(node); self.ci.nodes.unlock(node);
let rpl = self.ci.nodes.late_peephole(node); let rpl = self.ci.nodes.late_peephole(node).unwrap_or(node);
if self.ci.ctrl == node { if self.ci.ctrl == node {
self.ci.ctrl = rpl; self.ci.ctrl = rpl;
} }

View file

@ -287,6 +287,15 @@ impl BitSet {
self.data.resize(new_len, 0); self.data.resize(new_len, 0);
} }
pub fn unset(&mut self, idx: Nid) -> bool {
let idx = idx as usize;
let data_idx = idx / Self::ELEM_SIZE;
let sub_idx = idx % Self::ELEM_SIZE;
let prev = self.data[data_idx] & (1 << sub_idx);
self.data[data_idx] &= !(1 << sub_idx);
prev != 0
}
pub fn set(&mut self, idx: Nid) -> bool { pub fn set(&mut self, idx: Nid) -> bool {
let idx = idx as usize; let idx = idx as usize;
let data_idx = idx / Self::ELEM_SIZE; let data_idx = idx / Self::ELEM_SIZE;

View file

@ -1,17 +1,17 @@
main: main:
ADDI64 r254, r254, -128d ADDI64 r254, r254, -128d
LI8 r7, 69b LI8 r7, 69b
LI64 r5, 128d LI64 r6, 128d
LI64 r6, 0d LI64 r8, 0d
ADDI64 r4, r254, 0d ADDI64 r4, r254, 0d
2: JLTU r6, r5, :0 2: JLTU r8, r6, :0
LD r3, r254, 42a, 1h LD r3, r254, 42a, 1h
ANDI r1, r3, 255d ANDI r1, r3, 255d
JMP :1 JMP :1
0: ADDI64 r8, r6, 1d 0: ADDI64 r5, r8, 1d
ADD64 r6, r6, r4 ADD64 r12, r4, r8
ST r7, r6, 0a, 1h ST r7, r12, 0a, 1h
CP r6, r8 CP r8, r5
JMP :2 JMP :2
1: ADDI64 r254, r254, 128d 1: ADDI64 r254, r254, 128d
JALA r0, r31, 0a JALA r0, r31, 0a

View file

@ -23,19 +23,19 @@ main:
MUL64 r11, r11, r6 MUL64 r11, r11, r6
MULI64 r9, r9, 8d MULI64 r9, r9, 8d
ADD64 r11, r11, r10 ADD64 r11, r11, r10
ADD64 r9, r9, r5 ADD64 r9, r5, r9
MULI64 r11, r11, 8d MULI64 r11, r11, 8d
ADDI64 r10, r254, 32d ADDI64 r10, r254, 32d
ADD64 r12, r11, r5 ADD64 r11, r5, r11
BMC r9, r10, 8h BMC r9, r10, 8h
BMC r12, r9, 8h BMC r11, r9, 8h
BMC r10, r12, 8h BMC r10, r11, 8h
CP r10, r3 CP r10, r3
JMP :5 JMP :5
0: ADD64 r2, r9, r8 0: ADD64 r2, r9, r8
MULI64 r12, r9, 8d MULI64 r12, r9, 8d
ADD64 r3, r12, r5 ADD64 r7, r5, r12
ST r9, r3, 0a, 8h ST r9, r7, 0a, 8h
CP r9, r2 CP r9, r2
JMP :6 JMP :6
2: ADDI64 r254, r254, 40d 2: ADDI64 r254, r254, 40d

View file

@ -32,89 +32,87 @@ main:
LD r11, r254, 150a, 1h LD r11, r254, 150a, 1h
ADD8 r1, r11, r10 ADD8 r1, r11, r10
LD r3, r254, 148a, 1h LD r3, r254, 148a, 1h
ADD8 r6, r3, r1 ADD8 r7, r3, r1
LD r7, r254, 151a, 1h LI8 r8, 4b
LI8 r9, 4b ADD8 r7, r7, r6
ADD8 r11, r7, r6 ANDI r7, r7, 255d
ANDI r11, r11, 255d ANDI r8, r8, 255d
ANDI r9, r9, 255d JEQ r7, r8, :0
JEQ r11, r9, :0
LI64 r1, 1008d LI64 r1, 1008d
JMP :1 JMP :1
0: LI64 r5, 1d 0: LI64 r3, 1d
ADDI64 r8, r254, 80d ADDI64 r6, r254, 80d
ST r5, r254, 80a, 8h ST r3, r254, 80a, 8h
LI64 r9, 2d LI64 r7, 2d
ST r9, r254, 88a, 8h ST r7, r254, 88a, 8h
LI64 r1, 3d LI64 r11, 3d
ADDI64 r4, r254, 32d ADDI64 r2, r254, 32d
ST r1, r254, 32a, 8h ST r11, r254, 32a, 8h
LI64 r5, 4d LI64 r3, 4d
ST r5, r254, 40a, 8h ST r3, r254, 40a, 8h
LD r12, r254, 32a, 8h LD r10, r254, 32a, 8h
LD r1, r254, 80a, 8h LD r11, r254, 80a, 8h
ADDI64 r11, r254, 0d ADDI64 r9, r254, 0d
ADD64 r3, r12, r1
ST r3, r254, 0a, 8h
LD r7, r254, 40a, 8h
LD r9, r254, 88a, 8h
ADD64 r10, r7, r9
ST r10, r254, 8a, 8h
LD r2, r254, 80a, 8h
LD r3, r254, 32a, 8h
SUB64 r5, r3, r2
ST r5, r254, 16a, 8h
LD r9, r254, 88a, 8h
LD r10, r254, 40a, 8h
SUB64 r12, r10, r9
ST r12, r254, 24a, 8h
ADDI64 r3, r254, 112d
BMC r11, r3, 32h
LI64 r6, 0d
ADDI64 r9, r254, 96d
ST r6, r254, 96a, 8h
ST r6, r254, 104a, 8h
LD r1, r254, 32a, 8h
LD r2, r254, 96a, 8h
ADDI64 r9, r254, 48d
SUB64 r5, r2, r1
ST r5, r254, 48a, 8h
LD r10, r254, 40a, 8h
LD r11, r254, 104a, 8h
SUB64 r12, r11, r10
ST r12, r254, 56a, 8h
ADDI64 r10, r9, 16d
BMC r8, r10, 16h
LD r7, r254, 112a, 8h
LD r8, r254, 48a, 8h
ADD64 r10, r8, r7
ST r10, r254, 48a, 8h
LD r2, r254, 120a, 8h
LD r3, r254, 56a, 8h
ADD64 r5, r2, r3
ST r5, r254, 56a, 8h
LD r10, r254, 128a, 8h
LD r11, r254, 64a, 8h
ADD64 r1, r10, r11 ADD64 r1, r10, r11
ST r1, r254, 64a, 8h ST r1, r254, 0a, 8h
LD r5, r254, 136a, 8h LD r5, r254, 40a, 8h
LD r6, r254, 72a, 8h LD r7, r254, 88a, 8h
ADD64 r8, r5, r6 ADD64 r8, r5, r7
ST r8, r254, 72a, 8h ST r8, r254, 8a, 8h
LD r12, r254, 64a, 8h LD r12, r254, 80a, 8h
LD r1, r254, 48a, 8h LD r1, r254, 32a, 8h
ADDI64 r7, r254, 152d SUB64 r3, r1, r12
ADD64 r4, r12, r1 ST r3, r254, 16a, 8h
ST r4, r254, 152a, 8h LD r7, r254, 88a, 8h
LD r8, r254, 72a, 8h LD r8, r254, 40a, 8h
LD r9, r254, 56a, 8h SUB64 r10, r8, r7
ST r10, r254, 24a, 8h
ADDI64 r1, r254, 112d
BMC r9, r1, 32h
LI64 r4, 0d
ADDI64 r7, r254, 96d
ST r4, r254, 96a, 8h
ST r4, r254, 104a, 8h
LD r11, r254, 32a, 8h
LD r12, r254, 96a, 8h
ADDI64 r7, r254, 48d
SUB64 r3, r12, r11
ST r3, r254, 48a, 8h
LD r8, r254, 40a, 8h
LD r9, r254, 104a, 8h
SUB64 r10, r9, r8
ST r10, r254, 56a, 8h
ADDI64 r8, r7, 16d
BMC r6, r8, 16h
LD r5, r254, 112a, 8h
LD r6, r254, 48a, 8h
ADD64 r8, r6, r5
ST r8, r254, 48a, 8h
LD r12, r254, 120a, 8h
LD r1, r254, 56a, 8h
ADD64 r3, r12, r1
ST r3, r254, 56a, 8h
LD r8, r254, 128a, 8h
LD r9, r254, 64a, 8h
ADD64 r11, r8, r9 ADD64 r11, r8, r9
ST r11, r254, 160a, 8h ST r11, r254, 64a, 8h
LD r3, r254, 152a, 8h LD r3, r254, 136a, 8h
LD r5, r254, 160a, 8h LD r4, r254, 72a, 8h
ADD64 r1, r5, r3 ADD64 r6, r3, r4
ST r6, r254, 72a, 8h
LD r10, r254, 64a, 8h
LD r11, r254, 48a, 8h
ADDI64 r5, r254, 152d
ADD64 r2, r10, r11
ST r2, r254, 152a, 8h
LD r6, r254, 72a, 8h
LD r7, r254, 56a, 8h
ADD64 r9, r6, r7
ST r9, r254, 160a, 8h
LD r1, r254, 152a, 8h
ADD64 r1, r1, r9
1: ADDI64 r254, r254, 168d 1: ADDI64 r254, r254, 168d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 1226 code size: 1200
ret: 10 ret: 10
status: Ok(()) status: Ok(())

View file

@ -32,24 +32,23 @@ fib_iter:
JMP :2 JMP :2
1: JALA r0, r31, 0a 1: JALA r0, r31, 0a
main: main:
ADDI64 r254, r254, -18d ADDI64 r254, r254, -26d
ST r31, r254, 2a, 16h ST r31, r254, 2a, 24h
LI8 r1, 10b LI8 r32, 10b
ADDI64 r3, r254, 0d ADDI64 r3, r254, 0d
ST r1, r254, 0a, 1h ST r32, r254, 0a, 1h
ST r1, r254, 1a, 1h ST r32, r254, 1a, 1h
LD r7, r254, 0a, 1h LD r7, r254, 0a, 1h
ANDI r2, r7, 255d ANDI r2, r7, 255d
JAL r31, r0, :fib JAL r31, r0, :fib
CP r32, r1 CP r33, r1
LD r1, r254, 1a, 1h ANDI r2, r32, 255d
ANDI r2, r1, 255d
JAL r31, r0, :fib_iter JAL r31, r0, :fib_iter
CP r8, r32 CP r6, r33
SUB64 r1, r8, r1 SUB64 r1, r6, r1
LD r31, r254, 2a, 16h LD r31, r254, 2a, 24h
ADDI64 r254, r254, 18d ADDI64 r254, r254, 26d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 375 code size: 362
ret: 0 ret: 0
status: Ok(()) status: Ok(())

View file

@ -14,13 +14,13 @@ main:
JMP :2 JMP :2
1: ADD64 r2, r7, r9 1: ADD64 r2, r7, r9
MULI64 r1, r7, 1024d MULI64 r1, r7, 1024d
ADD64 r3, r1, r5 ADD64 r7, r5, r1
BMC r5, r3, 1024h BMC r5, r7, 1024h
CP r7, r2 CP r7, r2
JMP :3 JMP :3
0: ADD64 r2, r8, r9 0: ADD64 r2, r8, r9
ADD64 r12, r8, r5 ADD64 r8, r5, r8
ST r6, r12, 0a, 1h ST r6, r8, 0a, 1h
CP r8, r2 CP r8, r2
JMP :4 JMP :4
2: ADDI64 r254, r254, 10240d 2: ADDI64 r254, r254, 10240d