making the instruction scheduling smarter

the instructions that are only depended by phis are pushed to the end of
the block, which usually saves copy instructions

Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
Jakub Doka 2024-12-19 11:00:19 +01:00
parent a8aba7e7c2
commit cfd3eac0a8
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
11 changed files with 132 additions and 141 deletions

View file

@ -295,9 +295,7 @@ impl Nodes {
continue;
}
let mut cursor = buf.len();
for &o in outputs.iter().filter(|&&n| n == o) {
buf.push(o);
}
buf.push(o);
while let Some(&n) = buf.get(cursor) {
for &i in &self[n].inputs[1..] {
if fromc == self[i].inputs.first()
@ -308,15 +306,17 @@ impl Nodes {
})
&& seen.set(i)
{
for &o in outputs.iter().filter(|&&n| n == i) {
buf.push(o);
}
buf.push(i);
}
}
cursor += 1;
}
}
buf[1..].sort_by_key(|&n| {
self[n].has_no_value() || !self[n].outputs.iter().all(|&o| self[o].kind == Kind::Phi)
});
debug_assert_eq!(
outputs.iter().filter(|&&n| !seen.get(n)).copied().collect::<Vec<_>>(),
vec![],

View file

@ -48,10 +48,9 @@ main:
JMP :1
6: CP r1, r0
JMP :1
5: ADDI64 r34, r32, 16d
ST r0, r32, 0a, 8h
5: ST r0, r32, 0a, 8h
ST r0, r32, 8a, 8h
CP r32, r34
ADDI64 r32, r32, 16d
JMP :7
3: JAL r31, r0, :new_stru
ST r1, r32, 0a, 16h
@ -67,6 +66,6 @@ new_stru:
LD r1, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
code size: 658
code size: 655
ret: 0
status: Ok(())

View file

@ -1,10 +1,10 @@
continue_and_state_change:
CP r13, r2
CP r15, r0
LI64 r16, 3d
LI64 r14, 4d
LI64 r17, 2d
LI64 r18, 10d
CP r15, r0
LI64 r14, 4d
6: JLTU r13, r18, :0
JMP :1
0: JNE r13, r17, :2

View file

@ -112,51 +112,49 @@ new:
push:
ADDI64 r254, r254, -80d
ST r31, r254, 0a, 80h
CP r38, r2
CP r39, r3
LI64 r37, 1d
LD r33, r38, 8a, 8h
LD r32, r38, 16a, 8h
CP r36, r2
CP r37, r3
LI64 r35, 1d
LD r33, r36, 8a, 8h
LD r32, r36, 16a, 8h
JNE r32, r33, :0
JNE r32, r0, :1
CP r32, r37
CP r32, r35
JMP :2
1: MULI64 r32, r32, 2d
2: CP r2, r32
CP r3, r37
CP r3, r35
JAL r31, r0, :malloc
ST r32, r38, 16a, 8h
CP r35, r1
JNE r35, r0, :3
ST r32, r36, 16a, 8h
CP r34, r1
JNE r34, r0, :3
CP r1, r0
JMP :4
3: LD r32, r38, 0a, 8h
ADD64 r40, r33, r32
CP r34, r35
7: LD r33, r38, 0a, 8h
LD r36, r38, 8a, 8h
JNE r40, r32, :5
JEQ r36, r0, :6
CP r2, r33
CP r3, r36
CP r4, r37
3: LD r32, r36, 0a, 8h
ADD64 r38, r33, r32
CP r33, r34
7: LD r39, r36, 0a, 8h
LD r40, r36, 8a, 8h
JNE r38, r32, :5
JEQ r40, r0, :6
CP r2, r39
CP r3, r40
CP r4, r35
JAL r31, r0, :free
JMP :6
6: ST r35, r38, 0a, 8h
6: ST r34, r36, 0a, 8h
JMP :0
5: ADDI64 r36, r34, 1d
ADDI64 r33, r32, 1d
LD r32, r32, 0a, 1h
ST r32, r34, 0a, 1h
CP r32, r33
CP r34, r36
JMP :7
0: LD r32, r38, 8a, 8h
LD r33, r38, 0a, 8h
ADD64 r33, r32, r33
5: LD r39, r32, 0a, 1h
ST r39, r33, 0a, 1h
ADD64 r32, r32, r37
ST r32, r38, 8a, 8h
ADDI64 r33, r33, 1d
ADDI64 r32, r32, 1d
JMP :7
0: LD r32, r36, 8a, 8h
LD r33, r36, 0a, 8h
ADD64 r33, r32, r33
ST r37, r33, 0a, 1h
ADD64 r32, r32, r35
ST r32, r36, 8a, 8h
CP r1, r33
4: LD r31, r254, 0a, 80h
ADDI64 r254, r254, 80d
@ -164,60 +162,58 @@ push:
push:
ADDI64 r254, r254, -88d
ST r31, r254, 0a, 88h
CP r38, r2
CP r39, r3
LI64 r37, 1d
LD r33, r38, 8a, 8h
LD r32, r38, 16a, 8h
CP r36, r2
CP r37, r3
LI64 r35, 1d
LD r33, r36, 8a, 8h
LD r32, r36, 16a, 8h
JNE r32, r33, :0
JNE r32, r0, :1
CP r32, r37
CP r32, r35
JMP :2
1: MULI64 r32, r32, 2d
2: LI64 r40, 8d
MUL64 r34, r32, r40
2: LI64 r38, 8d
MUL64 r34, r32, r38
CP r2, r34
CP r3, r40
CP r3, r38
JAL r31, r0, :malloc
ST r32, r38, 16a, 8h
CP r35, r1
JNE r35, r0, :3
ST r32, r36, 16a, 8h
CP r34, r1
JNE r34, r0, :3
CP r1, r0
JMP :4
3: MULI64 r33, r33, 8d
LD r32, r38, 0a, 8h
ADD64 r41, r32, r33
CP r34, r35
7: LD r33, r38, 0a, 8h
LD r36, r38, 8a, 8h
JNE r41, r32, :5
JEQ r36, r0, :6
MUL64 r32, r36, r40
CP r2, r33
LD r32, r36, 0a, 8h
ADD64 r39, r32, r33
CP r33, r34
7: LD r40, r36, 0a, 8h
LD r41, r36, 8a, 8h
JNE r39, r32, :5
JEQ r41, r0, :6
MUL64 r32, r41, r38
CP r2, r40
CP r3, r32
CP r4, r40
CP r4, r38
JAL r31, r0, :free
JMP :6
6: ST r35, r38, 0a, 8h
6: ST r34, r36, 0a, 8h
JMP :0
5: ADDI64 r36, r34, 8d
ADDI64 r33, r32, 8d
LD r32, r32, 0a, 8h
ST r32, r34, 0a, 8h
CP r32, r33
CP r34, r36
5: LD r40, r32, 0a, 8h
ST r40, r33, 0a, 8h
ADDI64 r33, r33, 8d
ADDI64 r32, r32, 8d
JMP :7
0: LD r32, r38, 8a, 8h
0: LD r32, r36, 8a, 8h
MULI64 r33, r32, 8d
LD r34, r38, 0a, 8h
LD r34, r36, 0a, 8h
ADD64 r33, r34, r33
ST r39, r33, 0a, 8h
ADD64 r32, r32, r37
ST r32, r38, 8a, 8h
ST r37, r33, 0a, 8h
ADD64 r32, r32, r35
ST r32, r36, 8a, 8h
CP r1, r33
4: LD r31, r254, 0a, 88h
ADDI64 r254, r254, 88d
JALA r0, r31, 0a
code size: 1635
code size: 1623
ret: 69
status: Ok(())

View file

@ -1,21 +1,20 @@
main:
ADDI64 r254, r254, -128d
ADDI64 r15, r254, 0d
LI8 r16, 69b
LI64 r17, 128d
ADDI64 r14, r254, 0d
LI8 r15, 69b
LI64 r16, 128d
CP r13, r0
2: LD r14, r254, 42a, 1h
JLTU r13, r17, :0
ANDI r13, r14, 255d
2: LD r17, r254, 42a, 1h
JLTU r13, r16, :0
ANDI r13, r17, 255d
CP r1, r13
JMP :1
0: ADDI64 r14, r13, 1d
ADD64 r13, r15, r13
ST r16, r13, 0a, 1h
CP r13, r14
0: ADD64 r17, r14, r13
ST r15, r17, 0a, 1h
ADDI64 r13, r13, 1d
JMP :2
1: ADDI64 r254, r254, 128d
JALA r0, r31, 0a
code size: 141
code size: 138
ret: 69
status: Ok(())

View file

@ -59,9 +59,9 @@ put_filled_rect:
LD r14, r14, 0a, 8h
ADD64 r26, r14, r26
LD r28, r15, 0a, 8h
MUL64 r15, r27, r25
ADD64 r14, r14, r15
ADD64 r15, r28, r26
MUL64 r25, r27, r25
ADD64 r14, r14, r25
ADD64 r14, r28, r14
3: JGTU r13, r20, :0
JNE r13, r20, :1

View file

@ -12,8 +12,8 @@ create_back_buffer:
0: LI8 r34, 255b
CP r2, r34
JAL r31, r0, :request_page
SUB64 r32, r32, r33
CP r35, r1
SUB64 r32, r32, r33
5: JGTS r32, r0, :2
CP r1, r35
JMP :1

View file

@ -11,9 +11,9 @@ main:
JALA r0, r31, 0a
sqrt:
CP r14, r2
CP r17, r0
LI64 r16, 15d
LI64 r15, 32768d
CP r17, r0
CP r13, r17
3: JNE r15, r17, :0
CP r1, r13

View file

@ -1,45 +1,44 @@
main:
ADDI64 r254, r254, -40d
LI64 r17, 1d
LI64 r16, 1d
LI64 r15, 4d
ADDI64 r17, r254, 0d
CP r14, r0
ADDI64 r18, r254, 0d
CP r13, r14
6: JNE r13, r15, :0
ADDI64 r19, r254, 32d
LI64 r20, 2d
ADDI64 r18, r254, 32d
LI64 r19, 2d
CP r13, r14
4: LD r15, r254, 16a, 8h
JNE r13, r17, :1
JNE r13, r16, :1
CP r1, r15
JMP :2
1: ADD64 r16, r13, r17
SUB64 r15, r20, r16
MUL64 r21, r15, r20
MUL64 r22, r13, r20
1: ADD64 r15, r13, r16
SUB64 r20, r19, r15
MUL64 r20, r20, r19
MUL64 r21, r13, r19
CP r13, r14
5: JNE r13, r20, :3
CP r13, r16
JMP :4
3: ADD64 r15, r13, r17
ADD64 r23, r22, r13
ADD64 r13, r21, r13
MULI64 r23, r23, 8d
MULI64 r13, r13, 8d
ADD64 r23, r18, r23
ADD64 r13, r18, r13
BMC r23, r19, 8h
BMC r13, r23, 8h
BMC r19, r13, 8h
5: JNE r13, r19, :3
CP r13, r15
JMP :4
3: ADD64 r22, r21, r13
ADD64 r23, r20, r13
MULI64 r22, r22, 8d
MULI64 r23, r23, 8d
ADD64 r22, r17, r22
ADD64 r23, r17, r23
BMC r22, r18, 8h
BMC r23, r22, 8h
BMC r18, r23, 8h
ADD64 r13, r13, r16
JMP :5
0: MULI64 r16, r13, 8d
ADD64 r16, r18, r16
ST r13, r16, 0a, 8h
ADD64 r13, r13, r17
0: MULI64 r18, r13, 8d
ADD64 r18, r17, r18
ST r13, r18, 0a, 8h
ADD64 r13, r13, r16
JMP :6
2: ADDI64 r254, r254, 40d
JALA r0, r31, 0a
code size: 267
code size: 264
ret: 0
status: Ok(())

View file

@ -1,28 +1,26 @@
main:
ADDI64 r254, r254, -10240d
LI8 r15, 64b
LI64 r16, 1024d
LI8 r14, 64b
LI64 r15, 1024d
ADDI64 r16, r254, 0d
CP r13, r0
ADDI64 r17, r254, 0d
4: JLTU r13, r16, :0
ADDI64 r13, r17, 1024d
ADDI64 r15, r17, 10240d
3: LD r14, r254, 2048a, 1h
JLTU r13, r15, :1
ANDI r13, r14, 255d
4: JLTU r13, r15, :0
ADDI64 r14, r16, 10240d
ADDI64 r13, r16, 1024d
3: LD r15, r254, 2048a, 1h
JLTU r13, r14, :1
ANDI r13, r15, 255d
CP r1, r13
JMP :2
1: ADDI64 r14, r13, 1024d
BMC r17, r13, 1024h
CP r13, r14
1: BMC r16, r13, 1024h
ADDI64 r13, r13, 1024d
JMP :3
0: ADDI64 r14, r13, 1d
ADD64 r13, r17, r13
ST r15, r13, 0a, 1h
CP r13, r14
0: ADD64 r17, r16, r13
ST r14, r17, 0a, 1h
ADDI64 r13, r13, 1d
JMP :4
2: ADDI64 r254, r254, 10240d
JALA r0, r31, 0a
code size: 192
code size: 186
ret: 64
status: Ok(())

View file

@ -1,10 +1,10 @@
main:
ADDI64 r254, r254, -64d
ST r31, r254, 0a, 64h
CP r34, r0
LI64 r37, 65536d
LI8 r35, 1b
CP r36, r0
CP r34, r0
LI8 r35, 1b
CP r32, r36
7: JAL r31, r0, :opaque
CP r33, r1