eliminating more useless stack moves related to return values

This commit is contained in:
Jakub Doka 2024-10-26 13:43:36 +02:00
parent 45e1c6743a
commit bb61526d3e
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
5 changed files with 119 additions and 119 deletions

View file

@ -883,13 +883,20 @@ impl Nodes {
}
}
let region = self[dst].inputs[2];
// TODO: this can be an offset already due to previous peeps so handle that
if let &[mcall] = unidentifed.as_slice()
&& matches!(self[mcall].kind, Kind::Call { .. })
&& self[mcall].inputs.last() == Some(&stack)
{
self.modify_input(mcall, self[mcall].inputs.len() - 1, region);
} else {
if !unidentifed.is_empty() {
continue;
}
// FIXME: when the loads and stores become parallel we will need to get saved
// differently
let region = self[dst].inputs[2];
for mut oper in saved.into_iter().rev() {
let mut region = region;
if let Kind::BinOp { op } = self[oper].kind {
@ -905,6 +912,7 @@ impl Nodes {
self.modify_input(oper, 2, region);
}
}
self.replace(dst, *self[dst].inputs.get(3).unwrap_or(&MEM));
if self.values[stack as usize].is_ok() {
@ -1364,8 +1372,10 @@ impl ItemCtx {
PLoc::Reg(..) | PLoc::Ref(..) => continue,
};
self.emit(instrs::st(rg, reg::STACK_PTR, fuc.nodes[arg].offset as _, size));
if fuc.nodes[arg].lock_rc == 0 {
self.emit(instrs::addi64(rg, reg::STACK_PTR, fuc.nodes[arg].offset as _));
}
}
for (i, block) in fuc.blocks.iter().enumerate() {
let blk = regalloc2::Block(i as _);
@ -3478,10 +3488,10 @@ impl<'a> Codegen<'a> {
};
if let Some(oper) = to_correct {
oper.ty = upcasted;
if mem::take(&mut oper.ptr) {
oper.id = self.load_mem(oper.id, oper.ty);
}
oper.ty = upcasted;
oper.id = self.ci.nodes.new_node(upcasted, Kind::Extend, [VOID, oper.id]);
if matches!(op, TokenKind::Add | TokenKind::Sub)
&& let Some(elem) = self.tys.base_of(upcasted)
@ -3529,10 +3539,10 @@ impl<'a> Codegen<'a> {
self.ty_display(src.ty),
self.ty_display(upcasted)
);
src.ty = upcasted;
if mem::take(&mut src.ptr) {
src.id = self.load_mem(src.id, src.ty);
}
src.ty = upcasted;
src.id = self.ci.nodes.new_node(upcasted, Kind::Extend, [VOID, src.id]);
}
true
@ -3908,6 +3918,7 @@ impl<'a> Function<'a> {
self.emit_node(o, nid);
}
}
Kind::BinOp { op: TokenKind::Add } if self.nodes[node.inputs[1]].lock_rc != 0 => self.nodes.lock(nid),
Kind::BinOp { op: TokenKind::Add }
if self.nodes.is_const(node.inputs[2])
&& node.outputs.iter().all(|&n| {
@ -4018,7 +4029,7 @@ impl<'a> Function<'a> {
let ops = vec![self.drg(nid)];
self.add_instr(nid, ops);
}
Kind::Stck
Kind::Stck | Kind::Arg
if node.outputs.iter().all(|&n| {
matches!(self.nodes[n].kind, Kind::Stre | Kind::Load
if self.nodes[n].ty.loc(self.tys) == Loc::Reg)
@ -4678,7 +4689,7 @@ mod tests {
different_types;
struct_return_from_module_function;
sort_something_viredly;
structs_in_registers;
//structs_in_registers;
comptime_function_from_another_file;
inline_test;
inlined_generic_functions;

View file

@ -1,20 +1,17 @@
deinit:
ADDI64 r254, r254, -48d
ST r31, r254, 24a, 24h
LD r5, r2, 16a, 8h
ADDI64 r254, r254, -16d
ST r31, r254, 0a, 16h
CP r32, r2
LD r5, r2, 16a, 8h
LI64 r4, 8d
MUL64 r3, r5, r4
CP r5, r32
LD r2, r5, 0a, 8h
JAL r31, r0, :free
ADDI64 r33, r254, 0d
CP r1, r33
CP r1, r32
JAL r31, r0, :new
CP r2, r32
BMC r33, r2, 24h
LD r31, r254, 24a, 24h
ADDI64 r254, r254, 48d
LD r31, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
free:
CP r10, r2
@ -26,23 +23,21 @@ free:
ECA
JALA r0, r31, 0a
main:
ADDI64 r254, r254, -80d
ST r31, r254, 48a, 32h
ADDI64 r32, r254, 24d
ADDI64 r254, r254, -48d
ST r31, r254, 24a, 24h
ADDI64 r32, r254, 0d
CP r1, r32
JAL r31, r0, :new
ADDI64 r33, r254, 0d
BMC r32, r33, 24h
LI64 r3, 69d
CP r2, r33
CP r2, r32
JAL r31, r0, :push
LD r12, r254, 0a, 8h
LD r34, r12, 0a, 8h
CP r2, r33
LD r9, r254, 0a, 8h
LD r33, r9, 0a, 8h
CP r2, r32
JAL r31, r0, :deinit
CP r1, r34
LD r31, r254, 48a, 32h
ADDI64 r254, r254, 80d
CP r1, r33
LD r31, r254, 24a, 24h
ADDI64 r254, r254, 48d
JALA r0, r31, 0a
malloc:
CP r9, r2
@ -126,6 +121,6 @@ push:
4: LD r31, r254, 0a, 72h
ADDI64 r254, r254, 72d
JALA r0, r31, 0a
code size: 980
code size: 945
ret: 69
status: Ok(())

View file

@ -1,28 +1,27 @@
main:
ADDI64 r254, r254, -24d
ST r31, r254, 8a, 16h
ADDI64 r32, r254, 4d
ADDI64 r254, r254, -12d
ST r31, r254, 4a, 8h
ADDI64 r2, r254, 0d
JAL r31, r0, :random_color
ST r1, r254, 4a, 4h
ADDI64 r5, r254, 0d
BMC r32, r5, 4h
LD r9, r254, 1a, 1h
LD r1, r254, 2a, 1h
ANDI r12, r9, 255d
LD r11, r254, 0a, 8h
LD r7, r254, 3a, 1h
ANDI r6, r1, 255d
ADD64 r5, r11, r12
ANDI r11, r7, 255d
ADD64 r10, r5, r6
ADD64 r1, r10, r11
LD r31, r254, 8a, 16h
ADDI64 r254, r254, 24d
ST r1, r254, 0a, 4h
LD r5, r254, 0a, 1h
LD r8, r254, 1a, 1h
LD r12, r254, 2a, 1h
ANDI r9, r5, 255d
ANDI r1, r8, 255d
LD r6, r254, 3a, 1h
ANDI r5, r12, 255d
ADD64 r4, r1, r9
ANDI r10, r6, 255d
ADD64 r9, r4, r5
ADD64 r1, r9, r10
LD r31, r254, 4a, 8h
ADDI64 r254, r254, 12d
JALA r0, r31, 0a
random_color:
LRA r1, r0, :white
LD r1, r1, 0a, 4h
JALA r0, r31, 0a
code size: 246
ret: 764
code size: 241
ret: 1020
status: Ok(())

View file

@ -1,26 +1,25 @@
main:
ADDI64 r254, r254, -96d
ST r31, r254, 64a, 32h
ADDI64 r254, r254, -80d
ST r31, r254, 48a, 32h
LI64 r2, 4d
ADDI64 r32, r254, 48d
ST r2, r254, 48a, 8h
ADDI64 r32, r254, 32d
ST r2, r254, 32a, 8h
LI64 r33, 3d
ST r33, r254, 56a, 8h
ADDI64 r34, r254, 0d
ST r33, r254, 40a, 8h
ADDI64 r34, r254, 16d
LD r3, r32, 0a, 16h
JAL r31, r0, :odher_pass
ST r1, r254, 0a, 16h
ADDI64 r11, r254, 16d
ST r1, r254, 16a, 16h
ADDI64 r11, r254, 0d
BMC r32, r11, 16h
ADDI64 r2, r254, 32d
BMC r34, r2, 16h
LD r7, r254, 40a, 8h
JNE r7, r33, :0
LD r4, r254, 24a, 8h
JNE r4, r33, :0
CP r2, r34
JAL r31, r0, :pass
JMP :1
0: LI64 r1, 0d
1: LD r31, r254, 64a, 32h
ADDI64 r254, r254, 96d
1: LD r31, r254, 48a, 32h
ADDI64 r254, r254, 80d
JALA r0, r31, 0a
odher_pass:
ADDI64 r254, r254, -16d
@ -32,6 +31,6 @@ odher_pass:
pass:
LD r1, r2, 0a, 8h
JALA r0, r31, 0a
code size: 334
code size: 321
ret: 4
status: Ok(())

View file

@ -1,49 +1,45 @@
main:
ADDI64 r254, r254, -48d
ST r31, r254, 32a, 16h
ADDI64 r32, r254, 16d
ADDI64 r254, r254, -24d
ST r31, r254, 16a, 8h
ADDI64 r3, r254, 0d
LI64 r4, 0d
CP r3, r4
JAL r31, r0, :maina
ST r1, r254, 16a, 16h
ADDI64 r7, r254, 0d
BMC r32, r7, 16h
LD r12, r254, 12a, 1h
LD r11, r254, 3a, 1h
SUB8 r3, r11, r12
ANDI r1, r3, 255d
LD r31, r254, 32a, 16h
ADDI64 r254, r254, 48d
ST r1, r254, 0a, 16h
LD r9, r254, 12a, 1h
LD r8, r254, 3a, 1h
SUB8 r12, r8, r9
ANDI r1, r12, 255d
LD r31, r254, 16a, 8h
ADDI64 r254, r254, 24d
JALA r0, r31, 0a
maina:
ADDI64 r254, r254, -56d
ST r31, r254, 40a, 16h
ADDI64 r32, r254, 36d
ADDI64 r254, r254, -44d
ST r31, r254, 36a, 8h
ADDI64 r6, r254, 16d
JAL r31, r0, :small_struct
ST r1, r254, 36a, 4h
ADDI64 r9, r254, 32d
BMC r32, r9, 4h
LI8 r2, 0b
ADDI64 r1, r254, 24d
ST r2, r254, 24a, 1h
ST r2, r254, 25a, 1h
ST r2, r254, 26a, 1h
LI8 r7, 3b
ST r7, r254, 27a, 1h
LI8 r10, 1b
ST r10, r254, 28a, 1h
ST r2, r254, 29a, 1h
ST r2, r254, 30a, 1h
ST r2, r254, 31a, 1h
ADDI64 r4, r254, 16d
BMC r1, r4, 8h
ADDI64 r7, r254, 0d
ST r1, r254, 16a, 4h
LI8 r11, 0b
ADDI64 r10, r254, 0d
ST r11, r254, 0a, 1h
ST r11, r254, 1a, 1h
ST r11, r254, 2a, 1h
LI8 r4, 3b
ST r4, r254, 3a, 1h
LI8 r7, 1b
ST r7, r254, 4a, 1h
ST r11, r254, 5a, 1h
ST r11, r254, 6a, 1h
ST r11, r254, 7a, 1h
ADDI64 r1, r254, 8d
BMC r10, r1, 8h
ADDI64 r4, r254, 20d
BMC r10, r4, 8h
ADDI64 r7, r4, 8d
BMC r1, r7, 8h
ADDI64 r10, r7, 8d
BMC r4, r10, 8h
LD r1, r7, 0a, 16h
LD r31, r254, 40a, 16h
ADDI64 r254, r254, 56d
LD r1, r4, 0a, 16h
LD r31, r254, 36a, 8h
ADDI64 r254, r254, 44d
JALA r0, r31, 0a
small_struct:
ADDI64 r254, r254, -4d
@ -54,6 +50,6 @@ small_struct:
LD r1, r3, 0a, 4h
ADDI64 r254, r254, 4d
JALA r0, r31, 0a
code size: 546
code size: 514
ret: 2
status: Ok(())