diff --git a/lang/src/son.rs b/lang/src/son.rs index b038b09..87df191 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -23,10 +23,10 @@ use { cell::RefCell, fmt::{self, Debug, Display, Write}, format_args as fa, mem, - ops::{self, Deref}, + ops::{self, Deref, Not}, }, hashbrown::hash_map, - hbbytecode::DisasmError, + hbbytecode::{st, DisasmError}, regalloc2::VReg, }; @@ -279,6 +279,36 @@ impl Nodes { true } + fn late_peephole(&mut self, target: Nid) -> Option { + if let Some(id) = self.peephole(target) { + self.replace(target, id); + return Some(id); + } + None + } + + fn iter_peeps(&mut self, mut fuel: usize) { + let mut in_stack = BitSet::default(); + in_stack.clear(self.values.len()); + let mut stack = + self.iter().map(|(id, ..)| id).inspect(|&id| _ = in_stack.set(id)).collect::>(); + + while fuel != 0 + && let Some(node) = stack.pop() + { + fuel -= 1; + in_stack.unset(node); + let new = self.late_peephole(node); + if let Some(new) = new { + for &i in self[new].outputs.iter().chain(self[new].inputs.iter()) { + if in_stack.set(i) { + stack.push(i) + } + } + } + } + } + fn peephole(&mut self, target: Nid) -> Option { use {Kind as K, TokenKind as T}; match self[target].kind { @@ -703,14 +733,6 @@ impl Nodes { } } - fn late_peephole(&mut self, target: Nid) -> Nid { - if let Some(id) = self.peephole(target) { - self.replace(target, id); - return id; - } - target - } - fn load_loop_var(&mut self, index: usize, value: &mut Variable, loops: &mut [Loop]) { self.load_loop_value(&mut |l| l.scope.iter_mut().nth(index).unwrap(), value, loops); } @@ -1182,6 +1204,7 @@ impl ItemCtx { self.nodes.unlock(ENTRY); self.nodes.unlock(MEM); self.nodes.eliminate_stack_temporaries(); + self.nodes.iter_peeps(1000); } fn emit(&mut self, instr: (usize, [u8; instrs::MAX_SIZE])) { @@ -1892,7 +1915,7 @@ impl<'a> Codegen<'a> { } let store = self.ci.nodes.new_node_nop(ty, Kind::Stre, vc); self.ci.scope.store.set_value(store, &mut self.ci.nodes); - let opted = self.ci.nodes.late_peephole(store); + let opted = self.ci.nodes.late_peephole(store).unwrap_or(store); self.ci.scope.store.set_value_remove(opted, &mut self.ci.nodes); opted } @@ -2954,7 +2977,7 @@ impl<'a> Codegen<'a> { self.ci.nodes.unlock(self.ci.ctrl); self.ci.nodes.unlock(node); - let rpl = self.ci.nodes.late_peephole(node); + let rpl = self.ci.nodes.late_peephole(node).unwrap_or(node); if self.ci.ctrl == node { self.ci.ctrl = rpl; } diff --git a/lang/src/vc.rs b/lang/src/vc.rs index cacf136..0f7cfeb 100644 --- a/lang/src/vc.rs +++ b/lang/src/vc.rs @@ -287,6 +287,15 @@ impl BitSet { self.data.resize(new_len, 0); } + pub fn unset(&mut self, idx: Nid) -> bool { + let idx = idx as usize; + let data_idx = idx / Self::ELEM_SIZE; + let sub_idx = idx % Self::ELEM_SIZE; + let prev = self.data[data_idx] & (1 << sub_idx); + self.data[data_idx] &= !(1 << sub_idx); + prev != 0 + } + pub fn set(&mut self, idx: Nid) -> bool { let idx = idx as usize; let data_idx = idx / Self::ELEM_SIZE; diff --git a/lang/tests/son_tests_idk.txt b/lang/tests/son_tests_idk.txt index f9b3b0f..f73cb5b 100644 --- a/lang/tests/son_tests_idk.txt +++ b/lang/tests/son_tests_idk.txt @@ -1,17 +1,17 @@ main: ADDI64 r254, r254, -128d LI8 r7, 69b - LI64 r5, 128d - LI64 r6, 0d + LI64 r6, 128d + LI64 r8, 0d ADDI64 r4, r254, 0d - 2: JLTU r6, r5, :0 + 2: JLTU r8, r6, :0 LD r3, r254, 42a, 1h ANDI r1, r3, 255d JMP :1 - 0: ADDI64 r8, r6, 1d - ADD64 r6, r6, r4 - ST r7, r6, 0a, 1h - CP r6, r8 + 0: ADDI64 r5, r8, 1d + ADD64 r12, r4, r8 + ST r7, r12, 0a, 1h + CP r8, r5 JMP :2 1: ADDI64 r254, r254, 128d JALA r0, r31, 0a diff --git a/lang/tests/son_tests_string_flip.txt b/lang/tests/son_tests_string_flip.txt index 517026c..e836521 100644 --- a/lang/tests/son_tests_string_flip.txt +++ b/lang/tests/son_tests_string_flip.txt @@ -23,19 +23,19 @@ main: MUL64 r11, r11, r6 MULI64 r9, r9, 8d ADD64 r11, r11, r10 - ADD64 r9, r9, r5 + ADD64 r9, r5, r9 MULI64 r11, r11, 8d ADDI64 r10, r254, 32d - ADD64 r12, r11, r5 + ADD64 r11, r5, r11 BMC r9, r10, 8h - BMC r12, r9, 8h - BMC r10, r12, 8h + BMC r11, r9, 8h + BMC r10, r11, 8h CP r10, r3 JMP :5 0: ADD64 r2, r9, r8 MULI64 r12, r9, 8d - ADD64 r3, r12, r5 - ST r9, r3, 0a, 8h + ADD64 r7, r5, r12 + ST r9, r7, 0a, 8h CP r9, r2 JMP :6 2: ADDI64 r254, r254, 40d diff --git a/lang/tests/son_tests_struct_operators.txt b/lang/tests/son_tests_struct_operators.txt index 8f384d9..013694e 100644 --- a/lang/tests/son_tests_struct_operators.txt +++ b/lang/tests/son_tests_struct_operators.txt @@ -32,89 +32,87 @@ main: LD r11, r254, 150a, 1h ADD8 r1, r11, r10 LD r3, r254, 148a, 1h - ADD8 r6, r3, r1 - LD r7, r254, 151a, 1h - LI8 r9, 4b - ADD8 r11, r7, r6 - ANDI r11, r11, 255d - ANDI r9, r9, 255d - JEQ r11, r9, :0 + ADD8 r7, r3, r1 + LI8 r8, 4b + ADD8 r7, r7, r6 + ANDI r7, r7, 255d + ANDI r8, r8, 255d + JEQ r7, r8, :0 LI64 r1, 1008d JMP :1 - 0: LI64 r5, 1d - ADDI64 r8, r254, 80d - ST r5, r254, 80a, 8h - LI64 r9, 2d - ST r9, r254, 88a, 8h - LI64 r1, 3d - ADDI64 r4, r254, 32d - ST r1, r254, 32a, 8h - LI64 r5, 4d - ST r5, r254, 40a, 8h - LD r12, r254, 32a, 8h - LD r1, r254, 80a, 8h - ADDI64 r11, r254, 0d - ADD64 r3, r12, r1 - ST r3, r254, 0a, 8h - LD r7, r254, 40a, 8h - LD r9, r254, 88a, 8h - ADD64 r10, r7, r9 - ST r10, r254, 8a, 8h - LD r2, r254, 80a, 8h - LD r3, r254, 32a, 8h - SUB64 r5, r3, r2 - ST r5, r254, 16a, 8h - LD r9, r254, 88a, 8h - LD r10, r254, 40a, 8h - SUB64 r12, r10, r9 - ST r12, r254, 24a, 8h - ADDI64 r3, r254, 112d - BMC r11, r3, 32h - LI64 r6, 0d - ADDI64 r9, r254, 96d - ST r6, r254, 96a, 8h - ST r6, r254, 104a, 8h - LD r1, r254, 32a, 8h - LD r2, r254, 96a, 8h - ADDI64 r9, r254, 48d - SUB64 r5, r2, r1 - ST r5, r254, 48a, 8h - LD r10, r254, 40a, 8h - LD r11, r254, 104a, 8h - SUB64 r12, r11, r10 - ST r12, r254, 56a, 8h - ADDI64 r10, r9, 16d - BMC r8, r10, 16h - LD r7, r254, 112a, 8h - LD r8, r254, 48a, 8h - ADD64 r10, r8, r7 - ST r10, r254, 48a, 8h - LD r2, r254, 120a, 8h - LD r3, r254, 56a, 8h - ADD64 r5, r2, r3 - ST r5, r254, 56a, 8h - LD r10, r254, 128a, 8h - LD r11, r254, 64a, 8h + 0: LI64 r3, 1d + ADDI64 r6, r254, 80d + ST r3, r254, 80a, 8h + LI64 r7, 2d + ST r7, r254, 88a, 8h + LI64 r11, 3d + ADDI64 r2, r254, 32d + ST r11, r254, 32a, 8h + LI64 r3, 4d + ST r3, r254, 40a, 8h + LD r10, r254, 32a, 8h + LD r11, r254, 80a, 8h + ADDI64 r9, r254, 0d ADD64 r1, r10, r11 - ST r1, r254, 64a, 8h - LD r5, r254, 136a, 8h - LD r6, r254, 72a, 8h - ADD64 r8, r5, r6 - ST r8, r254, 72a, 8h - LD r12, r254, 64a, 8h - LD r1, r254, 48a, 8h - ADDI64 r7, r254, 152d - ADD64 r4, r12, r1 - ST r4, r254, 152a, 8h - LD r8, r254, 72a, 8h - LD r9, r254, 56a, 8h + ST r1, r254, 0a, 8h + LD r5, r254, 40a, 8h + LD r7, r254, 88a, 8h + ADD64 r8, r5, r7 + ST r8, r254, 8a, 8h + LD r12, r254, 80a, 8h + LD r1, r254, 32a, 8h + SUB64 r3, r1, r12 + ST r3, r254, 16a, 8h + LD r7, r254, 88a, 8h + LD r8, r254, 40a, 8h + SUB64 r10, r8, r7 + ST r10, r254, 24a, 8h + ADDI64 r1, r254, 112d + BMC r9, r1, 32h + LI64 r4, 0d + ADDI64 r7, r254, 96d + ST r4, r254, 96a, 8h + ST r4, r254, 104a, 8h + LD r11, r254, 32a, 8h + LD r12, r254, 96a, 8h + ADDI64 r7, r254, 48d + SUB64 r3, r12, r11 + ST r3, r254, 48a, 8h + LD r8, r254, 40a, 8h + LD r9, r254, 104a, 8h + SUB64 r10, r9, r8 + ST r10, r254, 56a, 8h + ADDI64 r8, r7, 16d + BMC r6, r8, 16h + LD r5, r254, 112a, 8h + LD r6, r254, 48a, 8h + ADD64 r8, r6, r5 + ST r8, r254, 48a, 8h + LD r12, r254, 120a, 8h + LD r1, r254, 56a, 8h + ADD64 r3, r12, r1 + ST r3, r254, 56a, 8h + LD r8, r254, 128a, 8h + LD r9, r254, 64a, 8h ADD64 r11, r8, r9 - ST r11, r254, 160a, 8h - LD r3, r254, 152a, 8h - LD r5, r254, 160a, 8h - ADD64 r1, r5, r3 + ST r11, r254, 64a, 8h + LD r3, r254, 136a, 8h + LD r4, r254, 72a, 8h + ADD64 r6, r3, r4 + ST r6, r254, 72a, 8h + LD r10, r254, 64a, 8h + LD r11, r254, 48a, 8h + ADDI64 r5, r254, 152d + ADD64 r2, r10, r11 + ST r2, r254, 152a, 8h + LD r6, r254, 72a, 8h + LD r7, r254, 56a, 8h + ADD64 r9, r6, r7 + ST r9, r254, 160a, 8h + LD r1, r254, 152a, 8h + ADD64 r1, r1, r9 1: ADDI64 r254, r254, 168d JALA r0, r31, 0a -code size: 1226 +code size: 1200 ret: 10 status: Ok(()) diff --git a/lang/tests/son_tests_struct_patterns.txt b/lang/tests/son_tests_struct_patterns.txt index 7b19b93..ca6aa7f 100644 --- a/lang/tests/son_tests_struct_patterns.txt +++ b/lang/tests/son_tests_struct_patterns.txt @@ -32,24 +32,23 @@ fib_iter: JMP :2 1: JALA r0, r31, 0a main: - ADDI64 r254, r254, -18d - ST r31, r254, 2a, 16h - LI8 r1, 10b + ADDI64 r254, r254, -26d + ST r31, r254, 2a, 24h + LI8 r32, 10b ADDI64 r3, r254, 0d - ST r1, r254, 0a, 1h - ST r1, r254, 1a, 1h + ST r32, r254, 0a, 1h + ST r32, r254, 1a, 1h LD r7, r254, 0a, 1h ANDI r2, r7, 255d JAL r31, r0, :fib - CP r32, r1 - LD r1, r254, 1a, 1h - ANDI r2, r1, 255d + CP r33, r1 + ANDI r2, r32, 255d JAL r31, r0, :fib_iter - CP r8, r32 - SUB64 r1, r8, r1 - LD r31, r254, 2a, 16h - ADDI64 r254, r254, 18d + CP r6, r33 + SUB64 r1, r6, r1 + LD r31, r254, 2a, 24h + ADDI64 r254, r254, 26d JALA r0, r31, 0a -code size: 375 +code size: 362 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt b/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt index ce6a245..4fc5304 100644 --- a/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt +++ b/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt @@ -14,13 +14,13 @@ main: JMP :2 1: ADD64 r2, r7, r9 MULI64 r1, r7, 1024d - ADD64 r3, r1, r5 - BMC r5, r3, 1024h + ADD64 r7, r5, r1 + BMC r5, r7, 1024h CP r7, r2 JMP :3 0: ADD64 r2, r8, r9 - ADD64 r12, r8, r5 - ST r6, r12, 0a, 1h + ADD64 r8, r5, r8 + ST r6, r8, 0a, 1h CP r8, r2 JMP :4 2: ADDI64 r254, r254, 10240d