making the instruction scheduling smarter

the instructions that are only depended by phis are pushed to the end of
the block, which usually saves copy instructions

Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
Jakub Doka 2024-12-19 11:00:19 +01:00
parent a8aba7e7c2
commit cfd3eac0a8
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
11 changed files with 132 additions and 141 deletions

View file

@ -295,9 +295,7 @@ impl Nodes {
continue; continue;
} }
let mut cursor = buf.len(); let mut cursor = buf.len();
for &o in outputs.iter().filter(|&&n| n == o) { buf.push(o);
buf.push(o);
}
while let Some(&n) = buf.get(cursor) { while let Some(&n) = buf.get(cursor) {
for &i in &self[n].inputs[1..] { for &i in &self[n].inputs[1..] {
if fromc == self[i].inputs.first() if fromc == self[i].inputs.first()
@ -308,15 +306,17 @@ impl Nodes {
}) })
&& seen.set(i) && seen.set(i)
{ {
for &o in outputs.iter().filter(|&&n| n == i) { buf.push(i);
buf.push(o);
}
} }
} }
cursor += 1; cursor += 1;
} }
} }
buf[1..].sort_by_key(|&n| {
self[n].has_no_value() || !self[n].outputs.iter().all(|&o| self[o].kind == Kind::Phi)
});
debug_assert_eq!( debug_assert_eq!(
outputs.iter().filter(|&&n| !seen.get(n)).copied().collect::<Vec<_>>(), outputs.iter().filter(|&&n| !seen.get(n)).copied().collect::<Vec<_>>(),
vec![], vec![],

View file

@ -48,10 +48,9 @@ main:
JMP :1 JMP :1
6: CP r1, r0 6: CP r1, r0
JMP :1 JMP :1
5: ADDI64 r34, r32, 16d 5: ST r0, r32, 0a, 8h
ST r0, r32, 0a, 8h
ST r0, r32, 8a, 8h ST r0, r32, 8a, 8h
CP r32, r34 ADDI64 r32, r32, 16d
JMP :7 JMP :7
3: JAL r31, r0, :new_stru 3: JAL r31, r0, :new_stru
ST r1, r32, 0a, 16h ST r1, r32, 0a, 16h
@ -67,6 +66,6 @@ new_stru:
LD r1, r254, 0a, 16h LD r1, r254, 0a, 16h
ADDI64 r254, r254, 16d ADDI64 r254, r254, 16d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 658 code size: 655
ret: 0 ret: 0
status: Ok(()) status: Ok(())

View file

@ -1,10 +1,10 @@
continue_and_state_change: continue_and_state_change:
CP r13, r2 CP r13, r2
CP r15, r0
LI64 r16, 3d LI64 r16, 3d
LI64 r14, 4d
LI64 r17, 2d LI64 r17, 2d
LI64 r18, 10d LI64 r18, 10d
CP r15, r0
LI64 r14, 4d
6: JLTU r13, r18, :0 6: JLTU r13, r18, :0
JMP :1 JMP :1
0: JNE r13, r17, :2 0: JNE r13, r17, :2

View file

@ -112,51 +112,49 @@ new:
push: push:
ADDI64 r254, r254, -80d ADDI64 r254, r254, -80d
ST r31, r254, 0a, 80h ST r31, r254, 0a, 80h
CP r38, r2 CP r36, r2
CP r39, r3 CP r37, r3
LI64 r37, 1d LI64 r35, 1d
LD r33, r38, 8a, 8h LD r33, r36, 8a, 8h
LD r32, r38, 16a, 8h LD r32, r36, 16a, 8h
JNE r32, r33, :0 JNE r32, r33, :0
JNE r32, r0, :1 JNE r32, r0, :1
CP r32, r37 CP r32, r35
JMP :2 JMP :2
1: MULI64 r32, r32, 2d 1: MULI64 r32, r32, 2d
2: CP r2, r32 2: CP r2, r32
CP r3, r37 CP r3, r35
JAL r31, r0, :malloc JAL r31, r0, :malloc
ST r32, r38, 16a, 8h ST r32, r36, 16a, 8h
CP r35, r1 CP r34, r1
JNE r35, r0, :3 JNE r34, r0, :3
CP r1, r0 CP r1, r0
JMP :4 JMP :4
3: LD r32, r38, 0a, 8h 3: LD r32, r36, 0a, 8h
ADD64 r40, r33, r32 ADD64 r38, r33, r32
CP r34, r35 CP r33, r34
7: LD r33, r38, 0a, 8h 7: LD r39, r36, 0a, 8h
LD r36, r38, 8a, 8h LD r40, r36, 8a, 8h
JNE r40, r32, :5 JNE r38, r32, :5
JEQ r36, r0, :6 JEQ r40, r0, :6
CP r2, r33 CP r2, r39
CP r3, r36 CP r3, r40
CP r4, r37 CP r4, r35
JAL r31, r0, :free JAL r31, r0, :free
JMP :6 JMP :6
6: ST r35, r38, 0a, 8h 6: ST r34, r36, 0a, 8h
JMP :0 JMP :0
5: ADDI64 r36, r34, 1d 5: LD r39, r32, 0a, 1h
ADDI64 r33, r32, 1d
LD r32, r32, 0a, 1h
ST r32, r34, 0a, 1h
CP r32, r33
CP r34, r36
JMP :7
0: LD r32, r38, 8a, 8h
LD r33, r38, 0a, 8h
ADD64 r33, r32, r33
ST r39, r33, 0a, 1h ST r39, r33, 0a, 1h
ADD64 r32, r32, r37 ADDI64 r33, r33, 1d
ST r32, r38, 8a, 8h ADDI64 r32, r32, 1d
JMP :7
0: LD r32, r36, 8a, 8h
LD r33, r36, 0a, 8h
ADD64 r33, r32, r33
ST r37, r33, 0a, 1h
ADD64 r32, r32, r35
ST r32, r36, 8a, 8h
CP r1, r33 CP r1, r33
4: LD r31, r254, 0a, 80h 4: LD r31, r254, 0a, 80h
ADDI64 r254, r254, 80d ADDI64 r254, r254, 80d
@ -164,60 +162,58 @@ push:
push: push:
ADDI64 r254, r254, -88d ADDI64 r254, r254, -88d
ST r31, r254, 0a, 88h ST r31, r254, 0a, 88h
CP r38, r2 CP r36, r2
CP r39, r3 CP r37, r3
LI64 r37, 1d LI64 r35, 1d
LD r33, r38, 8a, 8h LD r33, r36, 8a, 8h
LD r32, r38, 16a, 8h LD r32, r36, 16a, 8h
JNE r32, r33, :0 JNE r32, r33, :0
JNE r32, r0, :1 JNE r32, r0, :1
CP r32, r37 CP r32, r35
JMP :2 JMP :2
1: MULI64 r32, r32, 2d 1: MULI64 r32, r32, 2d
2: LI64 r40, 8d 2: LI64 r38, 8d
MUL64 r34, r32, r40 MUL64 r34, r32, r38
CP r2, r34 CP r2, r34
CP r3, r40 CP r3, r38
JAL r31, r0, :malloc JAL r31, r0, :malloc
ST r32, r38, 16a, 8h ST r32, r36, 16a, 8h
CP r35, r1 CP r34, r1
JNE r35, r0, :3 JNE r34, r0, :3
CP r1, r0 CP r1, r0
JMP :4 JMP :4
3: MULI64 r33, r33, 8d 3: MULI64 r33, r33, 8d
LD r32, r38, 0a, 8h LD r32, r36, 0a, 8h
ADD64 r41, r32, r33 ADD64 r39, r32, r33
CP r34, r35 CP r33, r34
7: LD r33, r38, 0a, 8h 7: LD r40, r36, 0a, 8h
LD r36, r38, 8a, 8h LD r41, r36, 8a, 8h
JNE r41, r32, :5 JNE r39, r32, :5
JEQ r36, r0, :6 JEQ r41, r0, :6
MUL64 r32, r36, r40 MUL64 r32, r41, r38
CP r2, r33 CP r2, r40
CP r3, r32 CP r3, r32
CP r4, r40 CP r4, r38
JAL r31, r0, :free JAL r31, r0, :free
JMP :6 JMP :6
6: ST r35, r38, 0a, 8h 6: ST r34, r36, 0a, 8h
JMP :0 JMP :0
5: ADDI64 r36, r34, 8d 5: LD r40, r32, 0a, 8h
ADDI64 r33, r32, 8d ST r40, r33, 0a, 8h
LD r32, r32, 0a, 8h ADDI64 r33, r33, 8d
ST r32, r34, 0a, 8h ADDI64 r32, r32, 8d
CP r32, r33
CP r34, r36
JMP :7 JMP :7
0: LD r32, r38, 8a, 8h 0: LD r32, r36, 8a, 8h
MULI64 r33, r32, 8d MULI64 r33, r32, 8d
LD r34, r38, 0a, 8h LD r34, r36, 0a, 8h
ADD64 r33, r34, r33 ADD64 r33, r34, r33
ST r39, r33, 0a, 8h ST r37, r33, 0a, 8h
ADD64 r32, r32, r37 ADD64 r32, r32, r35
ST r32, r38, 8a, 8h ST r32, r36, 8a, 8h
CP r1, r33 CP r1, r33
4: LD r31, r254, 0a, 88h 4: LD r31, r254, 0a, 88h
ADDI64 r254, r254, 88d ADDI64 r254, r254, 88d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 1635 code size: 1623
ret: 69 ret: 69
status: Ok(()) status: Ok(())

View file

@ -1,21 +1,20 @@
main: main:
ADDI64 r254, r254, -128d ADDI64 r254, r254, -128d
ADDI64 r15, r254, 0d ADDI64 r14, r254, 0d
LI8 r16, 69b LI8 r15, 69b
LI64 r17, 128d LI64 r16, 128d
CP r13, r0 CP r13, r0
2: LD r14, r254, 42a, 1h 2: LD r17, r254, 42a, 1h
JLTU r13, r17, :0 JLTU r13, r16, :0
ANDI r13, r14, 255d ANDI r13, r17, 255d
CP r1, r13 CP r1, r13
JMP :1 JMP :1
0: ADDI64 r14, r13, 1d 0: ADD64 r17, r14, r13
ADD64 r13, r15, r13 ST r15, r17, 0a, 1h
ST r16, r13, 0a, 1h ADDI64 r13, r13, 1d
CP r13, r14
JMP :2 JMP :2
1: ADDI64 r254, r254, 128d 1: ADDI64 r254, r254, 128d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 141 code size: 138
ret: 69 ret: 69
status: Ok(()) status: Ok(())

View file

@ -59,9 +59,9 @@ put_filled_rect:
LD r14, r14, 0a, 8h LD r14, r14, 0a, 8h
ADD64 r26, r14, r26 ADD64 r26, r14, r26
LD r28, r15, 0a, 8h LD r28, r15, 0a, 8h
MUL64 r15, r27, r25
ADD64 r14, r14, r15
ADD64 r15, r28, r26 ADD64 r15, r28, r26
MUL64 r25, r27, r25
ADD64 r14, r14, r25
ADD64 r14, r28, r14 ADD64 r14, r28, r14
3: JGTU r13, r20, :0 3: JGTU r13, r20, :0
JNE r13, r20, :1 JNE r13, r20, :1

View file

@ -12,8 +12,8 @@ create_back_buffer:
0: LI8 r34, 255b 0: LI8 r34, 255b
CP r2, r34 CP r2, r34
JAL r31, r0, :request_page JAL r31, r0, :request_page
SUB64 r32, r32, r33
CP r35, r1 CP r35, r1
SUB64 r32, r32, r33
5: JGTS r32, r0, :2 5: JGTS r32, r0, :2
CP r1, r35 CP r1, r35
JMP :1 JMP :1

View file

@ -11,9 +11,9 @@ main:
JALA r0, r31, 0a JALA r0, r31, 0a
sqrt: sqrt:
CP r14, r2 CP r14, r2
CP r17, r0
LI64 r16, 15d LI64 r16, 15d
LI64 r15, 32768d LI64 r15, 32768d
CP r17, r0
CP r13, r17 CP r13, r17
3: JNE r15, r17, :0 3: JNE r15, r17, :0
CP r1, r13 CP r1, r13

View file

@ -1,45 +1,44 @@
main: main:
ADDI64 r254, r254, -40d ADDI64 r254, r254, -40d
LI64 r17, 1d LI64 r16, 1d
LI64 r15, 4d LI64 r15, 4d
ADDI64 r17, r254, 0d
CP r14, r0 CP r14, r0
ADDI64 r18, r254, 0d
CP r13, r14 CP r13, r14
6: JNE r13, r15, :0 6: JNE r13, r15, :0
ADDI64 r19, r254, 32d ADDI64 r18, r254, 32d
LI64 r20, 2d LI64 r19, 2d
CP r13, r14 CP r13, r14
4: LD r15, r254, 16a, 8h 4: LD r15, r254, 16a, 8h
JNE r13, r17, :1 JNE r13, r16, :1
CP r1, r15 CP r1, r15
JMP :2 JMP :2
1: ADD64 r16, r13, r17 1: ADD64 r15, r13, r16
SUB64 r15, r20, r16 SUB64 r20, r19, r15
MUL64 r21, r15, r20 MUL64 r20, r20, r19
MUL64 r22, r13, r20 MUL64 r21, r13, r19
CP r13, r14 CP r13, r14
5: JNE r13, r20, :3 5: JNE r13, r19, :3
CP r13, r16
JMP :4
3: ADD64 r15, r13, r17
ADD64 r23, r22, r13
ADD64 r13, r21, r13
MULI64 r23, r23, 8d
MULI64 r13, r13, 8d
ADD64 r23, r18, r23
ADD64 r13, r18, r13
BMC r23, r19, 8h
BMC r13, r23, 8h
BMC r19, r13, 8h
CP r13, r15 CP r13, r15
JMP :4
3: ADD64 r22, r21, r13
ADD64 r23, r20, r13
MULI64 r22, r22, 8d
MULI64 r23, r23, 8d
ADD64 r22, r17, r22
ADD64 r23, r17, r23
BMC r22, r18, 8h
BMC r23, r22, 8h
BMC r18, r23, 8h
ADD64 r13, r13, r16
JMP :5 JMP :5
0: MULI64 r16, r13, 8d 0: MULI64 r18, r13, 8d
ADD64 r16, r18, r16 ADD64 r18, r17, r18
ST r13, r16, 0a, 8h ST r13, r18, 0a, 8h
ADD64 r13, r13, r17 ADD64 r13, r13, r16
JMP :6 JMP :6
2: ADDI64 r254, r254, 40d 2: ADDI64 r254, r254, 40d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 267 code size: 264
ret: 0 ret: 0
status: Ok(()) status: Ok(())

View file

@ -1,28 +1,26 @@
main: main:
ADDI64 r254, r254, -10240d ADDI64 r254, r254, -10240d
LI8 r15, 64b LI8 r14, 64b
LI64 r16, 1024d LI64 r15, 1024d
ADDI64 r16, r254, 0d
CP r13, r0 CP r13, r0
ADDI64 r17, r254, 0d 4: JLTU r13, r15, :0
4: JLTU r13, r16, :0 ADDI64 r14, r16, 10240d
ADDI64 r13, r17, 1024d ADDI64 r13, r16, 1024d
ADDI64 r15, r17, 10240d 3: LD r15, r254, 2048a, 1h
3: LD r14, r254, 2048a, 1h JLTU r13, r14, :1
JLTU r13, r15, :1 ANDI r13, r15, 255d
ANDI r13, r14, 255d
CP r1, r13 CP r1, r13
JMP :2 JMP :2
1: ADDI64 r14, r13, 1024d 1: BMC r16, r13, 1024h
BMC r17, r13, 1024h ADDI64 r13, r13, 1024d
CP r13, r14
JMP :3 JMP :3
0: ADDI64 r14, r13, 1d 0: ADD64 r17, r16, r13
ADD64 r13, r17, r13 ST r14, r17, 0a, 1h
ST r15, r13, 0a, 1h ADDI64 r13, r13, 1d
CP r13, r14
JMP :4 JMP :4
2: ADDI64 r254, r254, 10240d 2: ADDI64 r254, r254, 10240d
JALA r0, r31, 0a JALA r0, r31, 0a
code size: 192 code size: 186
ret: 64 ret: 64
status: Ok(()) status: Ok(())

View file

@ -1,10 +1,10 @@
main: main:
ADDI64 r254, r254, -64d ADDI64 r254, r254, -64d
ST r31, r254, 0a, 64h ST r31, r254, 0a, 64h
CP r34, r0
LI64 r37, 65536d LI64 r37, 65536d
LI8 r35, 1b
CP r36, r0 CP r36, r0
CP r34, r0
LI8 r35, 1b
CP r32, r36 CP r32, r36
7: JAL r31, r0, :opaque 7: JAL r31, r0, :opaque
CP r33, r1 CP r33, r1