removing needless copies to zero register for unused values

This commit is contained in:
Jakub Doka 2024-11-16 13:42:17 +01:00
parent e6df9b6b01
commit e5d6b35f66
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
18 changed files with 138 additions and 118 deletions

View file

@ -2066,6 +2066,11 @@ impl Node {
fn is_data_phi(&self) -> bool {
self.kind == Kind::Phi && self.ty != ty::Id::VOID
}
fn has_no_value(&self) -> bool {
(self.kind.is_cfg() && (!self.kind.is_call() || self.ty == ty::Id::VOID))
|| matches!(self.kind, Kind::Stre)
}
}
type LoopDepth = u16;

View file

@ -75,7 +75,7 @@ impl HbvmBackend {
PLoc::WideReg(rg, size) => (rg, size),
PLoc::Reg(rg, size) if ty.loc(tys) == Loc::Stack => (rg, size),
PLoc::Reg(r, ..) | PLoc::Ref(r, ..) => {
self.emit(instrs::cp(atr(arg), r));
self.emit_cp(atr(arg), r);
continue;
}
};
@ -83,13 +83,13 @@ impl HbvmBackend {
if nodes.is_unlocked(arg) {
self.emit(instrs::addi64(rg, reg::STACK_PTR, self.offsets[arg as usize] as _));
}
self.emit(instrs::cp(atr(arg), rg));
self.emit_cp(atr(arg), rg);
}
let mut alloc_buf = vec![];
for (i, block) in res.blocks.iter().enumerate() {
self.offsets[block.entry as usize] = self.code.len() as _;
for &nid in &res.instrs[block.range.clone()] {
for &nid in &res.instrs[block.range()] {
if nid == VOID {
continue;
}
@ -289,7 +289,7 @@ impl HbvmBackend {
match ret {
Some(PLoc::WideReg(..)) => {}
Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {}
Some(PLoc::Reg(r, ..)) => self.emit(instrs::cp(atr(nid), r)),
Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r),
None | Some(PLoc::Ref(..)) => {}
}
}
@ -309,6 +309,12 @@ impl HbvmBackend {
tail,
)
}
fn emit_cp(&mut self, dst: Reg, src: Reg) {
if dst != 0 {
self.emit(instrs::cp(dst, src));
}
}
}
struct Function<'a> {
@ -323,7 +329,7 @@ impl core::fmt::Debug for Function<'_> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
for block in &self.func.blocks {
writeln!(f, "{:?}", self.nodes[block.entry].kind)?;
for &instr in &self.func.instrs[block.range.clone()] {
for &instr in &self.func.instrs[block.range()] {
writeln!(f, "{:?}", self.nodes[instr].kind)?;
}
}
@ -344,9 +350,11 @@ impl<'a> Function<'a> {
}
fn add_block(&mut self, entry: Nid) {
self.func
.blocks
.push(Block { range: self.func.instrs.len()..self.func.instrs.len(), entry });
self.func.blocks.push(Block {
start: self.func.instrs.len() as _,
end: self.func.instrs.len() as _,
entry,
});
self.func.backrefs[entry as usize] = self.func.blocks.len() as u16 - 1;
}
@ -357,7 +365,7 @@ impl<'a> Function<'a> {
self.func.instrs.push(exit);
}
let prev = self.func.blocks.last_mut().unwrap();
prev.range.end = self.func.instrs.len();
prev.end = self.func.instrs.len() as _;
}
fn add_instr(&mut self, nid: Nid) {
@ -562,6 +570,8 @@ impl<'a> Regalloc<'a> {
fn run_low(&mut self) {
self.res.bundles.clear();
self.res.node_to_reg.clear();
#[cfg(debug_assertions)]
self.res.marked.clear();
self.res.node_to_reg.resize(self.nodes.vreg_count(), 0);
debug_assert!(self.res.dfs_buf.is_empty());
@ -591,7 +601,7 @@ impl<'a> Regalloc<'a> {
let instrs = mem::take(&mut self.res.instrs);
for &inst in &instrs {
if self.res.visited.get(inst) || inst == 0 {
if self.nodes[inst].has_no_value() || self.res.visited.get(inst) || inst == 0 {
continue;
}
self.append_bundle(inst, &mut bundle, None);
@ -599,13 +609,18 @@ impl<'a> Regalloc<'a> {
self.res.instrs = instrs;
}
fn append_bundle(&mut self, inst: Nid, bundle: &mut Bundle, prefered: Option<usize>) {
fn collect_bundle(&mut self, inst: Nid, into: &mut Bundle) {
let dom = self.nodes.idom_of(inst);
self.res.dfs_seem.clear(self.nodes.values.len());
for (cursor, uinst) in self.nodes.uses_of(inst) {
if !self.res.dfs_seem.set(uinst) {
continue;
}
#[cfg(debug_assertions)]
self.res.marked.insert((inst, uinst));
debug_assert!(self.res.marked.insert((inst, uinst)));
self.reverse_cfg_dfs(cursor, dom, |s, n, b| {
let mut range = b.range.clone();
let mut range = b.range();
debug_assert!(range.start < range.end);
range.start = range.start.max(s.instr_of(inst).map_or(0, |n| n + 1) as usize);
debug_assert!(range.start < range.end, "{:?}", range);
@ -621,35 +636,40 @@ impl<'a> Regalloc<'a> {
range.end = new;
debug_assert!(range.start < range.end, "{:?} {inst} {uinst}", range);
bundle.add(range);
into.add(range);
});
}
}
if !bundle.taken.contains(&true) {
fn append_bundle(&mut self, inst: Nid, tmp: &mut Bundle, prefered: Option<usize>) {
self.collect_bundle(inst, tmp);
if tmp.is_empty() {
self.res.node_to_reg[inst as usize] = u8::MAX;
return;
}
if let Some(prefered) = prefered
&& !self.res.bundles[prefered].overlaps(bundle)
&& !self.res.bundles[prefered].overlaps(tmp)
{
self.res.bundles[prefered].merge(bundle);
bundle.clear();
self.res.bundles[prefered].merge(tmp);
tmp.clear();
self.res.node_to_reg[inst as usize] = prefered as Reg + 1;
} else {
match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(bundle)) {
return;
}
match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) {
Some((i, other)) => {
other.merge(bundle);
bundle.clear();
other.merge(tmp);
tmp.clear();
self.res.node_to_reg[inst as usize] = i as Reg + 1;
}
None => {
self.res.bundles.push(mem::replace(bundle, Bundle::new(bundle.taken.len())));
self.res.bundles.push(tmp.take());
self.res.node_to_reg[inst as usize] = self.res.bundles.len() as Reg;
}
}
}
}
fn reverse_cfg_dfs(
&mut self,
@ -659,13 +679,12 @@ impl<'a> Regalloc<'a> {
) {
debug_assert!(self.res.dfs_buf.is_empty());
self.res.dfs_buf.push(from);
self.res.dfs_seem.clear(self.nodes.values.len());
debug_assert!(self.nodes.dominates(until, from));
while let Some(nid) = self.res.dfs_buf.pop() {
debug_assert!(self.nodes.dominates(until, nid), "{until} {:?}", self.nodes[until]);
each(self, nid, self.res.blocks[self.block_of(nid) as usize].clone());
each(self, nid, self.res.blocks[self.block_of(nid) as usize]);
if nid == until {
continue;
}
@ -729,10 +748,25 @@ impl Bundle {
fn clear(&mut self) {
self.taken.fill(false);
}
fn is_empty(&self) -> bool {
!self.taken.contains(&true)
}
fn take(&mut self) -> Self {
mem::replace(self, Self::new(self.taken.len()))
}
}
#[derive(Clone)]
#[derive(Clone, Copy)]
struct Block {
range: Range<usize>,
start: u16,
end: u16,
entry: Nid,
}
impl Block {
pub fn range(&self) -> Range<usize> {
self.start as usize..self.end as usize
}
}

View file

@ -1,7 +1,6 @@
main:
CP r0, r2
CP r1, r0
JALA r0, r31, 0a
code size: 25
code size: 22
ret: 0
status: Ok(())

View file

@ -1,6 +1,6 @@
main:
ADDI64 r254, r254, -160d
ST r31, r254, 80a, 80h
ADDI64 r254, r254, -152d
ST r31, r254, 80a, 72h
LRA r32, r0, :glob_stru
JAL r31, r0, :new_stru
ST r1, r32, 0a, 16h
@ -62,14 +62,14 @@ main:
ST r33, r32, 8a, 8h
CP r32, r34
JMP :7
3: MULI64 r39, r32, 16d
ADD64 r39, r37, r39
3: MULI64 r34, r32, 16d
ADD64 r34, r37, r34
JAL r31, r0, :new_stru
ST r1, r39, 0a, 16h
ST r1, r34, 0a, 16h
ADD64 r32, r32, r35
JMP :8
1: LD r31, r254, 80a, 80h
ADDI64 r254, r254, 160d
1: LD r31, r254, 80a, 72h
ADDI64 r254, r254, 152d
JALA r0, r31, 0a
new_stru:
ADDI64 r254, r254, -24d

View file

@ -14,12 +14,11 @@ main:
CP r6, r13
LD r3, r14, 0a, 16h
ECA
CP r0, r1
CP r1, r0
LD r32, r254, 16a, 32h
ADDI64 r254, r254, 48d
JALA r0, r31, 0a
ev: Ecall
code size: 183
code size: 180
ret: 0
status: Ok(())

View file

@ -11,7 +11,6 @@ main:
ADDI64 r254, r254, -64d
ST r31, r254, 0a, 64h
JAL r31, r0, :check_platform
CP r0, r1
CP r33, r0
LI64 r36, 30d
LI64 r37, 100d
@ -59,6 +58,6 @@ x86_fb_ptr:
LD r32, r254, 0a, 8h
ADDI64 r254, r254, 8d
JALA r0, r31, 0a
code size: 428
code size: 425
ret: 3000
status: Ok(())

View file

@ -6,11 +6,10 @@ main:
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
opaque:
CP r0, r2
JALA r0, r31, 0a
process:
ADDI64 r254, r254, -64d
ST r31, r254, 16a, 48h
ADDI64 r254, r254, -56d
ST r31, r254, 16a, 40h
LI64 r32, 1000d
ADDI64 r33, r254, 0d
ST r0, r254, 0a, 1h
@ -18,16 +17,16 @@ process:
JMP :1
0: CP r2, r33
JAL r31, r0, :opaque
LD r35, r254, 0a, 1h
ANDI r35, r35, 255d
JEQ r35, r0, :2
LD r34, r254, 0a, 1h
ANDI r34, r34, 255d
JEQ r34, r0, :2
JMP :3
2: ADDI64 r32, r32, -1d
1: JMP :4
3: LD r31, r254, 16a, 48h
ADDI64 r254, r254, 64d
3: LD r31, r254, 16a, 40h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a
timed out
code size: 251
code size: 248
ret: 0
status: Ok(())

View file

@ -1,12 +1,12 @@
main:
ADDI64 r254, r254, -24d
ST r32, r254, 0a, 24h
ADDI64 r254, r254, -16d
ST r32, r254, 0a, 16h
LI64 r13, 8d
CP r2, r13
ECA
LI64 r14, 6d
LRA r15, r0, :gb
LD r13, r15, 0a, 8h
LRA r13, r0, :gb
LD r13, r13, 0a, 8h
CMPU r13, r13, r0
CMPUI r13, r13, 0d
OR r13, r13, r0
@ -17,8 +17,8 @@ main:
0: LI64 r13, 1d
1: SUB64 r13, r13, r14
CP r1, r13
LD r32, r254, 0a, 24h
ADDI64 r254, r254, 24d
LD r32, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 179
ret: 0

View file

@ -1,31 +1,29 @@
main:
ADDI64 r254, r254, -72d
ST r31, r254, 16a, 56h
ADDI64 r254, r254, -56d
ST r31, r254, 16a, 40h
ADDI64 r32, r254, 0d
ADDI64 r33, r254, 8d
ST r0, r254, 0a, 8h
ST r0, r254, 8a, 8h
LI64 r36, 1024d
LI64 r34, 1024d
CP r2, r33
CP r3, r32
CP r4, r36
CP r4, r34
JAL r31, r0, :set
CP r32, r1
ANDI r32, r32, 4294967295d
CP r1, r32
LD r31, r254, 16a, 56h
ADDI64 r254, r254, 72d
LD r31, r254, 16a, 40h
ADDI64 r254, r254, 56d
JALA r0, r31, 0a
set:
ADDI64 r254, r254, -8d
ST r32, r254, 0a, 8h
CP r0, r2
CP r0, r3
CP r13, r4
CP r1, r13
LD r32, r254, 0a, 8h
ADDI64 r254, r254, 8d
JALA r0, r31, 0a
code size: 229
code size: 223
ret: 1024
status: Ok(())

View file

@ -24,10 +24,9 @@ main:
CP r2, r0
CP r3, r32
JAL r31, r0, :integer_range
CP r0, r1
LD r31, r254, 0a, 24h
ADDI64 r254, r254, 24d
JALA r0, r31, 0a
code size: 215
code size: 212
ret: 42
status: Ok(())

View file

@ -20,8 +20,8 @@ main:
JAL r31, r0, :decide
ADDI64 r34, r254, 24d
BMC r32, r34, 24h
LI64 r36, 1d
CP r2, r36
LI64 r35, 1d
CP r2, r35
CP r1, r34
JAL r31, r0, :decide
ADDI64 r36, r254, 0d

View file

@ -4,7 +4,6 @@ main:
JAL r31, r0, :opaque
CP r33, r1
JAL r31, r0, :opaque
CP r0, r1
JNE r33, r0, :0
CP r32, r0
JMP :1
@ -19,6 +18,6 @@ main:
opaque:
CP r1, r0
JALA r0, r31, 0a
code size: 153
code size: 150
ret: 0
status: Ok(())

View file

@ -7,8 +7,8 @@ decide:
ADDI64 r254, r254, 8d
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -144d
ST r31, r254, 80a, 64h
ADDI64 r254, r254, -136d
ST r31, r254, 80a, 56h
JAL r31, r0, :decide
CP r32, r1
CP r33, r0
@ -57,22 +57,22 @@ main:
9: ADDI64 r33, r254, 40d
JAL r31, r0, :new_foo
ST r1, r33, 0a, 16h
LD r37, r254, 40a, 8h
JNE r37, r0, :10
LD r36, r254, 40a, 8h
JNE r36, r0, :10
LI64 r32, 999d
CP r1, r32
JMP :3
10: LRA r37, r0, :"foo\0"
CP r4, r37
10: LRA r36, r0, :"foo\0"
CP r4, r36
LD r2, r33, 0a, 16h
JAL r31, r0, :use_foo
ADDI64 r33, r254, 0d
JAL r31, r0, :no_foo
ST r1, r33, 0a, 16h
JAL r31, r0, :decide
CP r37, r1
ANDI r37, r37, 255d
JNE r37, r0, :11
CP r36, r1
ANDI r36, r36, 255d
JNE r36, r0, :11
JMP :12
11: ST r34, r254, 0a, 8h
ST r35, r254, 8a, 8h
@ -102,8 +102,8 @@ main:
ANDI r32, r32, 65535d
SUB64 r32, r32, r33
CP r1, r32
3: LD r31, r254, 80a, 64h
ADDI64 r254, r254, 144d
3: LD r31, r254, 80a, 56h
ADDI64 r254, r254, 136d
JALA r0, r31, 0a
new_bar:
ADDI64 r254, r254, -48d
@ -144,10 +144,8 @@ use_foo:
ADDI64 r254, r254, -16d
ST r2, r254, 0a, 16h
ADDI64 r2, r254, 0d
CP r0, r2
CP r0, r4
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 1246
code size: 1240
ret: 0
status: Ok(())

View file

@ -32,9 +32,7 @@ main:
ADDI64 r254, r254, 32d
JALA r0, r31, 0a
outb:
CP r0, r2
CP r0, r3
JALA r0, r31, 0a
code size: 257
code size: 251
ret: 1
status: Ok(())

View file

@ -1,9 +1,8 @@
drop:
CP r0, r2
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -48d
ST r31, r254, 8a, 40h
ADDI64 r254, r254, -40d
ST r31, r254, 8a, 32h
ADDI64 r32, r254, 0d
LI64 r33, 1d
ST r33, r254, 0a, 8h
@ -14,8 +13,8 @@ main:
LD r32, r254, 0a, 8h
ADDI64 r32, r32, -2d
CP r1, r32
LD r31, r254, 8a, 40h
ADDI64 r254, r254, 48d
LD r31, r254, 8a, 32h
ADDI64 r254, r254, 40d
JALA r0, r31, 0a
modify:
ADDI64 r254, r254, -16d
@ -26,6 +25,6 @@ modify:
LD r32, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 247
code size: 244
ret: 0
status: Ok(())

View file

@ -20,11 +20,9 @@ create_back_buffer:
2: JLTS r32, r33, :3
CP r2, r34
JAL r31, r0, :request_page
CP r0, r1
JMP :4
3: CP r2, r32
JAL r31, r0, :request_page
CP r0, r1
4: SUB64 r32, r32, r33
JMP :5
1: LD r31, r254, 0a, 48h
@ -36,29 +34,28 @@ main:
LI64 r32, 400d
CP r2, r32
JAL r31, r0, :create_back_buffer
CP r0, r1
LD r31, r254, 0a, 24h
ADDI64 r254, r254, 24d
JALA r0, r31, 0a
request_page:
ADDI64 r254, r254, -40d
ST r32, r254, 0a, 40h
ADDI64 r254, r254, -32d
ST r32, r254, 0a, 32h
CP r13, r2
LRA r14, r0, :"\0\u{1}xxxxxxxx\0"
ST r13, r14, 1a, 1h
LI64 r15, 12d
LI64 r16, 2d
LI64 r17, 3d
CP r2, r17
CP r3, r16
LI64 r13, 12d
LI64 r15, 2d
LI64 r16, 3d
CP r2, r16
CP r3, r15
CP r4, r14
CP r5, r15
CP r5, r13
ECA
CP r13, r1
CP r1, r13
LD r32, r254, 0a, 40h
ADDI64 r254, r254, 40d
LD r32, r254, 0a, 32h
ADDI64 r254, r254, 32d
JALA r0, r31, 0a
code size: 408
code size: 399
ret: 42
status: Ok(())

View file

@ -7,7 +7,6 @@ do_stuff:
ADDI64 r254, r254, 8d
JALA r0, r31, 0a
just_read:
CP r0, r2
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -88d
@ -75,6 +74,6 @@ optionala:
LD r32, r254, 48a, 32h
ADDI64 r254, r254, 80d
JALA r0, r31, 0a
code size: 670
code size: 667
ret: 100
status: Ok(())

View file

@ -17,8 +17,6 @@ main:
maina:
ADDI64 r254, r254, -68d
ST r31, r254, 20a, 48h
CP r0, r3
CP r0, r4
ADDI64 r32, r254, 16d
JAL r31, r0, :small_struct
ST r1, r32, 0a, 4h
@ -55,6 +53,6 @@ small_struct:
LD r32, r254, 4a, 8h
ADDI64 r254, r254, 12d
JALA r0, r31, 0a
code size: 591
code size: 585
ret: 2
status: Ok(())