diff --git a/lang/src/son.rs b/lang/src/son.rs index 2e4e262..9208ed0 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -2066,6 +2066,11 @@ impl Node { fn is_data_phi(&self) -> bool { self.kind == Kind::Phi && self.ty != ty::Id::VOID } + + fn has_no_value(&self) -> bool { + (self.kind.is_cfg() && (!self.kind.is_call() || self.ty == ty::Id::VOID)) + || matches!(self.kind, Kind::Stre) + } } type LoopDepth = u16; diff --git a/lang/src/son/hbvm/regalloc.rs b/lang/src/son/hbvm/regalloc.rs index 7e7c253..832a6ab 100644 --- a/lang/src/son/hbvm/regalloc.rs +++ b/lang/src/son/hbvm/regalloc.rs @@ -75,7 +75,7 @@ impl HbvmBackend { PLoc::WideReg(rg, size) => (rg, size), PLoc::Reg(rg, size) if ty.loc(tys) == Loc::Stack => (rg, size), PLoc::Reg(r, ..) | PLoc::Ref(r, ..) => { - self.emit(instrs::cp(atr(arg), r)); + self.emit_cp(atr(arg), r); continue; } }; @@ -83,13 +83,13 @@ impl HbvmBackend { if nodes.is_unlocked(arg) { self.emit(instrs::addi64(rg, reg::STACK_PTR, self.offsets[arg as usize] as _)); } - self.emit(instrs::cp(atr(arg), rg)); + self.emit_cp(atr(arg), rg); } let mut alloc_buf = vec![]; for (i, block) in res.blocks.iter().enumerate() { self.offsets[block.entry as usize] = self.code.len() as _; - for &nid in &res.instrs[block.range.clone()] { + for &nid in &res.instrs[block.range()] { if nid == VOID { continue; } @@ -289,7 +289,7 @@ impl HbvmBackend { match ret { Some(PLoc::WideReg(..)) => {} Some(PLoc::Reg(..)) if node.ty.loc(tys) == Loc::Stack => {} - Some(PLoc::Reg(r, ..)) => self.emit(instrs::cp(atr(nid), r)), + Some(PLoc::Reg(r, ..)) => self.emit_cp(atr(nid), r), None | Some(PLoc::Ref(..)) => {} } } @@ -309,6 +309,12 @@ impl HbvmBackend { tail, ) } + + fn emit_cp(&mut self, dst: Reg, src: Reg) { + if dst != 0 { + self.emit(instrs::cp(dst, src)); + } + } } struct Function<'a> { @@ -323,7 +329,7 @@ impl core::fmt::Debug for Function<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { for block in &self.func.blocks { writeln!(f, "{:?}", self.nodes[block.entry].kind)?; - for &instr in &self.func.instrs[block.range.clone()] { + for &instr in &self.func.instrs[block.range()] { writeln!(f, "{:?}", self.nodes[instr].kind)?; } } @@ -344,9 +350,11 @@ impl<'a> Function<'a> { } fn add_block(&mut self, entry: Nid) { - self.func - .blocks - .push(Block { range: self.func.instrs.len()..self.func.instrs.len(), entry }); + self.func.blocks.push(Block { + start: self.func.instrs.len() as _, + end: self.func.instrs.len() as _, + entry, + }); self.func.backrefs[entry as usize] = self.func.blocks.len() as u16 - 1; } @@ -357,7 +365,7 @@ impl<'a> Function<'a> { self.func.instrs.push(exit); } let prev = self.func.blocks.last_mut().unwrap(); - prev.range.end = self.func.instrs.len(); + prev.end = self.func.instrs.len() as _; } fn add_instr(&mut self, nid: Nid) { @@ -562,6 +570,8 @@ impl<'a> Regalloc<'a> { fn run_low(&mut self) { self.res.bundles.clear(); self.res.node_to_reg.clear(); + #[cfg(debug_assertions)] + self.res.marked.clear(); self.res.node_to_reg.resize(self.nodes.vreg_count(), 0); debug_assert!(self.res.dfs_buf.is_empty()); @@ -591,7 +601,7 @@ impl<'a> Regalloc<'a> { let instrs = mem::take(&mut self.res.instrs); for &inst in &instrs { - if self.res.visited.get(inst) || inst == 0 { + if self.nodes[inst].has_no_value() || self.res.visited.get(inst) || inst == 0 { continue; } self.append_bundle(inst, &mut bundle, None); @@ -599,13 +609,18 @@ impl<'a> Regalloc<'a> { self.res.instrs = instrs; } - fn append_bundle(&mut self, inst: Nid, bundle: &mut Bundle, prefered: Option) { + fn collect_bundle(&mut self, inst: Nid, into: &mut Bundle) { let dom = self.nodes.idom_of(inst); + self.res.dfs_seem.clear(self.nodes.values.len()); for (cursor, uinst) in self.nodes.uses_of(inst) { + if !self.res.dfs_seem.set(uinst) { + continue; + } #[cfg(debug_assertions)] - self.res.marked.insert((inst, uinst)); + debug_assert!(self.res.marked.insert((inst, uinst))); + self.reverse_cfg_dfs(cursor, dom, |s, n, b| { - let mut range = b.range.clone(); + let mut range = b.range(); debug_assert!(range.start < range.end); range.start = range.start.max(s.instr_of(inst).map_or(0, |n| n + 1) as usize); debug_assert!(range.start < range.end, "{:?}", range); @@ -621,32 +636,37 @@ impl<'a> Regalloc<'a> { range.end = new; debug_assert!(range.start < range.end, "{:?} {inst} {uinst}", range); - bundle.add(range); + into.add(range); }); } + } - if !bundle.taken.contains(&true) { + fn append_bundle(&mut self, inst: Nid, tmp: &mut Bundle, prefered: Option) { + self.collect_bundle(inst, tmp); + + if tmp.is_empty() { self.res.node_to_reg[inst as usize] = u8::MAX; return; } if let Some(prefered) = prefered - && !self.res.bundles[prefered].overlaps(bundle) + && !self.res.bundles[prefered].overlaps(tmp) { - self.res.bundles[prefered].merge(bundle); - bundle.clear(); + self.res.bundles[prefered].merge(tmp); + tmp.clear(); self.res.node_to_reg[inst as usize] = prefered as Reg + 1; - } else { - match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(bundle)) { - Some((i, other)) => { - other.merge(bundle); - bundle.clear(); - self.res.node_to_reg[inst as usize] = i as Reg + 1; - } - None => { - self.res.bundles.push(mem::replace(bundle, Bundle::new(bundle.taken.len()))); - self.res.node_to_reg[inst as usize] = self.res.bundles.len() as Reg; - } + return; + } + + match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) { + Some((i, other)) => { + other.merge(tmp); + tmp.clear(); + self.res.node_to_reg[inst as usize] = i as Reg + 1; + } + None => { + self.res.bundles.push(tmp.take()); + self.res.node_to_reg[inst as usize] = self.res.bundles.len() as Reg; } } } @@ -659,13 +679,12 @@ impl<'a> Regalloc<'a> { ) { debug_assert!(self.res.dfs_buf.is_empty()); self.res.dfs_buf.push(from); - self.res.dfs_seem.clear(self.nodes.values.len()); debug_assert!(self.nodes.dominates(until, from)); while let Some(nid) = self.res.dfs_buf.pop() { debug_assert!(self.nodes.dominates(until, nid), "{until} {:?}", self.nodes[until]); - each(self, nid, self.res.blocks[self.block_of(nid) as usize].clone()); + each(self, nid, self.res.blocks[self.block_of(nid) as usize]); if nid == until { continue; } @@ -729,10 +748,25 @@ impl Bundle { fn clear(&mut self) { self.taken.fill(false); } + + fn is_empty(&self) -> bool { + !self.taken.contains(&true) + } + + fn take(&mut self) -> Self { + mem::replace(self, Self::new(self.taken.len())) + } } -#[derive(Clone)] +#[derive(Clone, Copy)] struct Block { - range: Range, + start: u16, + end: u16, entry: Nid, } + +impl Block { + pub fn range(&self) -> Range { + self.start as usize..self.end as usize + } +} diff --git a/lang/tests/son_tests_const_folding_with_arg.txt b/lang/tests/son_tests_const_folding_with_arg.txt index 8723147..5aa16aa 100644 --- a/lang/tests/son_tests_const_folding_with_arg.txt +++ b/lang/tests/son_tests_const_folding_with_arg.txt @@ -1,7 +1,6 @@ main: - CP r0, r2 CP r1, r0 JALA r0, r31, 0a -code size: 25 +code size: 22 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index 5e6a775..18f9921 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -1,6 +1,6 @@ main: - ADDI64 r254, r254, -160d - ST r31, r254, 80a, 80h + ADDI64 r254, r254, -152d + ST r31, r254, 80a, 72h LRA r32, r0, :glob_stru JAL r31, r0, :new_stru ST r1, r32, 0a, 16h @@ -62,14 +62,14 @@ main: ST r33, r32, 8a, 8h CP r32, r34 JMP :7 - 3: MULI64 r39, r32, 16d - ADD64 r39, r37, r39 + 3: MULI64 r34, r32, 16d + ADD64 r34, r37, r34 JAL r31, r0, :new_stru - ST r1, r39, 0a, 16h + ST r1, r34, 0a, 16h ADD64 r32, r32, r35 JMP :8 - 1: LD r31, r254, 80a, 80h - ADDI64 r254, r254, 160d + 1: LD r31, r254, 80a, 72h + ADDI64 r254, r254, 152d JALA r0, r31, 0a new_stru: ADDI64 r254, r254, -24d diff --git a/lang/tests/son_tests_directives.txt b/lang/tests/son_tests_directives.txt index 7dca40b..a29e1a8 100644 --- a/lang/tests/son_tests_directives.txt +++ b/lang/tests/son_tests_directives.txt @@ -14,12 +14,11 @@ main: CP r6, r13 LD r3, r14, 0a, 16h ECA - CP r0, r1 CP r1, r0 LD r32, r254, 16a, 32h ADDI64 r254, r254, 48d JALA r0, r31, 0a ev: Ecall -code size: 183 +code size: 180 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_fb_driver.txt b/lang/tests/son_tests_fb_driver.txt index 06347e7..c3f362b 100644 --- a/lang/tests/son_tests_fb_driver.txt +++ b/lang/tests/son_tests_fb_driver.txt @@ -11,7 +11,6 @@ main: ADDI64 r254, r254, -64d ST r31, r254, 0a, 64h JAL r31, r0, :check_platform - CP r0, r1 CP r33, r0 LI64 r36, 30d LI64 r37, 100d @@ -59,6 +58,6 @@ x86_fb_ptr: LD r32, r254, 0a, 8h ADDI64 r254, r254, 8d JALA r0, r31, 0a -code size: 428 +code size: 425 ret: 3000 status: Ok(()) diff --git a/lang/tests/son_tests_generic_type_mishap.txt b/lang/tests/son_tests_generic_type_mishap.txt index 6037f7e..cbdeecb 100644 --- a/lang/tests/son_tests_generic_type_mishap.txt +++ b/lang/tests/son_tests_generic_type_mishap.txt @@ -6,11 +6,10 @@ main: ADDI64 r254, r254, 16d JALA r0, r31, 0a opaque: - CP r0, r2 JALA r0, r31, 0a process: - ADDI64 r254, r254, -64d - ST r31, r254, 16a, 48h + ADDI64 r254, r254, -56d + ST r31, r254, 16a, 40h LI64 r32, 1000d ADDI64 r33, r254, 0d ST r0, r254, 0a, 1h @@ -18,16 +17,16 @@ process: JMP :1 0: CP r2, r33 JAL r31, r0, :opaque - LD r35, r254, 0a, 1h - ANDI r35, r35, 255d - JEQ r35, r0, :2 + LD r34, r254, 0a, 1h + ANDI r34, r34, 255d + JEQ r34, r0, :2 JMP :3 2: ADDI64 r32, r32, -1d 1: JMP :4 - 3: LD r31, r254, 16a, 48h - ADDI64 r254, r254, 64d + 3: LD r31, r254, 16a, 40h + ADDI64 r254, r254, 56d JALA r0, r31, 0a timed out -code size: 251 +code size: 248 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_inline.txt b/lang/tests/son_tests_inline.txt index 753a25b..c40a6f5 100644 --- a/lang/tests/son_tests_inline.txt +++ b/lang/tests/son_tests_inline.txt @@ -1,12 +1,12 @@ main: - ADDI64 r254, r254, -24d - ST r32, r254, 0a, 24h + ADDI64 r254, r254, -16d + ST r32, r254, 0a, 16h LI64 r13, 8d CP r2, r13 ECA LI64 r14, 6d - LRA r15, r0, :gb - LD r13, r15, 0a, 8h + LRA r13, r0, :gb + LD r13, r13, 0a, 8h CMPU r13, r13, r0 CMPUI r13, r13, 0d OR r13, r13, r0 @@ -17,8 +17,8 @@ main: 0: LI64 r13, 1d 1: SUB64 r13, r13, r14 CP r1, r13 - LD r32, r254, 0a, 24h - ADDI64 r254, r254, 24d + LD r32, r254, 0a, 16h + ADDI64 r254, r254, 16d JALA r0, r31, 0a code size: 179 ret: 0 diff --git a/lang/tests/son_tests_intcast_store.txt b/lang/tests/son_tests_intcast_store.txt index 8639a9e..b9a12d6 100644 --- a/lang/tests/son_tests_intcast_store.txt +++ b/lang/tests/son_tests_intcast_store.txt @@ -1,31 +1,29 @@ main: - ADDI64 r254, r254, -72d - ST r31, r254, 16a, 56h + ADDI64 r254, r254, -56d + ST r31, r254, 16a, 40h ADDI64 r32, r254, 0d ADDI64 r33, r254, 8d ST r0, r254, 0a, 8h ST r0, r254, 8a, 8h - LI64 r36, 1024d + LI64 r34, 1024d CP r2, r33 CP r3, r32 - CP r4, r36 + CP r4, r34 JAL r31, r0, :set CP r32, r1 ANDI r32, r32, 4294967295d CP r1, r32 - LD r31, r254, 16a, 56h - ADDI64 r254, r254, 72d + LD r31, r254, 16a, 40h + ADDI64 r254, r254, 56d JALA r0, r31, 0a set: ADDI64 r254, r254, -8d ST r32, r254, 0a, 8h - CP r0, r2 - CP r0, r3 CP r13, r4 CP r1, r13 LD r32, r254, 0a, 8h ADDI64 r254, r254, 8d JALA r0, r31, 0a -code size: 229 +code size: 223 ret: 1024 status: Ok(()) diff --git a/lang/tests/son_tests_integer_inference_issues.txt b/lang/tests/son_tests_integer_inference_issues.txt index 1c49891..b62454f 100644 --- a/lang/tests/son_tests_integer_inference_issues.txt +++ b/lang/tests/son_tests_integer_inference_issues.txt @@ -24,10 +24,9 @@ main: CP r2, r0 CP r3, r32 JAL r31, r0, :integer_range - CP r0, r1 LD r31, r254, 0a, 24h ADDI64 r254, r254, 24d JALA r0, r31, 0a -code size: 215 +code size: 212 ret: 42 status: Ok(()) diff --git a/lang/tests/son_tests_memory_swap.txt b/lang/tests/son_tests_memory_swap.txt index 02b3dea..0dbc2d9 100644 --- a/lang/tests/son_tests_memory_swap.txt +++ b/lang/tests/son_tests_memory_swap.txt @@ -20,8 +20,8 @@ main: JAL r31, r0, :decide ADDI64 r34, r254, 24d BMC r32, r34, 24h - LI64 r36, 1d - CP r2, r36 + LI64 r35, 1d + CP r2, r35 CP r1, r34 JAL r31, r0, :decide ADDI64 r36, r254, 0d diff --git a/lang/tests/son_tests_more_if_opts.txt b/lang/tests/son_tests_more_if_opts.txt index e6c9653..c53e42d 100644 --- a/lang/tests/son_tests_more_if_opts.txt +++ b/lang/tests/son_tests_more_if_opts.txt @@ -4,7 +4,6 @@ main: JAL r31, r0, :opaque CP r33, r1 JAL r31, r0, :opaque - CP r0, r1 JNE r33, r0, :0 CP r32, r0 JMP :1 @@ -19,6 +18,6 @@ main: opaque: CP r1, r0 JALA r0, r31, 0a -code size: 153 +code size: 150 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_nullable_types.txt b/lang/tests/son_tests_nullable_types.txt index ddfbacd..ad4afa5 100644 --- a/lang/tests/son_tests_nullable_types.txt +++ b/lang/tests/son_tests_nullable_types.txt @@ -7,8 +7,8 @@ decide: ADDI64 r254, r254, 8d JALA r0, r31, 0a main: - ADDI64 r254, r254, -144d - ST r31, r254, 80a, 64h + ADDI64 r254, r254, -136d + ST r31, r254, 80a, 56h JAL r31, r0, :decide CP r32, r1 CP r33, r0 @@ -57,22 +57,22 @@ main: 9: ADDI64 r33, r254, 40d JAL r31, r0, :new_foo ST r1, r33, 0a, 16h - LD r37, r254, 40a, 8h - JNE r37, r0, :10 + LD r36, r254, 40a, 8h + JNE r36, r0, :10 LI64 r32, 999d CP r1, r32 JMP :3 -10: LRA r37, r0, :"foo\0" - CP r4, r37 +10: LRA r36, r0, :"foo\0" + CP r4, r36 LD r2, r33, 0a, 16h JAL r31, r0, :use_foo ADDI64 r33, r254, 0d JAL r31, r0, :no_foo ST r1, r33, 0a, 16h JAL r31, r0, :decide - CP r37, r1 - ANDI r37, r37, 255d - JNE r37, r0, :11 + CP r36, r1 + ANDI r36, r36, 255d + JNE r36, r0, :11 JMP :12 11: ST r34, r254, 0a, 8h ST r35, r254, 8a, 8h @@ -102,8 +102,8 @@ main: ANDI r32, r32, 65535d SUB64 r32, r32, r33 CP r1, r32 - 3: LD r31, r254, 80a, 64h - ADDI64 r254, r254, 144d + 3: LD r31, r254, 80a, 56h + ADDI64 r254, r254, 136d JALA r0, r31, 0a new_bar: ADDI64 r254, r254, -48d @@ -144,10 +144,8 @@ use_foo: ADDI64 r254, r254, -16d ST r2, r254, 0a, 16h ADDI64 r2, r254, 0d - CP r0, r2 - CP r0, r4 ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 1246 +code size: 1240 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_only_break_loop.txt b/lang/tests/son_tests_only_break_loop.txt index 1c74502..b491767 100644 --- a/lang/tests/son_tests_only_break_loop.txt +++ b/lang/tests/son_tests_only_break_loop.txt @@ -32,9 +32,7 @@ main: ADDI64 r254, r254, 32d JALA r0, r31, 0a outb: - CP r0, r2 - CP r0, r3 JALA r0, r31, 0a -code size: 257 +code size: 251 ret: 1 status: Ok(()) diff --git a/lang/tests/son_tests_pointers.txt b/lang/tests/son_tests_pointers.txt index 3ef067a..b74bd3b 100644 --- a/lang/tests/son_tests_pointers.txt +++ b/lang/tests/son_tests_pointers.txt @@ -1,9 +1,8 @@ drop: - CP r0, r2 JALA r0, r31, 0a main: - ADDI64 r254, r254, -48d - ST r31, r254, 8a, 40h + ADDI64 r254, r254, -40d + ST r31, r254, 8a, 32h ADDI64 r32, r254, 0d LI64 r33, 1d ST r33, r254, 0a, 8h @@ -14,8 +13,8 @@ main: LD r32, r254, 0a, 8h ADDI64 r32, r32, -2d CP r1, r32 - LD r31, r254, 8a, 40h - ADDI64 r254, r254, 48d + LD r31, r254, 8a, 32h + ADDI64 r254, r254, 40d JALA r0, r31, 0a modify: ADDI64 r254, r254, -16d @@ -26,6 +25,6 @@ modify: LD r32, r254, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 247 +code size: 244 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_request_page.txt b/lang/tests/son_tests_request_page.txt index 105dd89..9cc0b1a 100644 --- a/lang/tests/son_tests_request_page.txt +++ b/lang/tests/son_tests_request_page.txt @@ -20,11 +20,9 @@ create_back_buffer: 2: JLTS r32, r33, :3 CP r2, r34 JAL r31, r0, :request_page - CP r0, r1 JMP :4 3: CP r2, r32 JAL r31, r0, :request_page - CP r0, r1 4: SUB64 r32, r32, r33 JMP :5 1: LD r31, r254, 0a, 48h @@ -36,29 +34,28 @@ main: LI64 r32, 400d CP r2, r32 JAL r31, r0, :create_back_buffer - CP r0, r1 LD r31, r254, 0a, 24h ADDI64 r254, r254, 24d JALA r0, r31, 0a request_page: - ADDI64 r254, r254, -40d - ST r32, r254, 0a, 40h + ADDI64 r254, r254, -32d + ST r32, r254, 0a, 32h CP r13, r2 LRA r14, r0, :"\0\u{1}xxxxxxxx\0" ST r13, r14, 1a, 1h - LI64 r15, 12d - LI64 r16, 2d - LI64 r17, 3d - CP r2, r17 - CP r3, r16 + LI64 r13, 12d + LI64 r15, 2d + LI64 r16, 3d + CP r2, r16 + CP r3, r15 CP r4, r14 - CP r5, r15 + CP r5, r13 ECA CP r13, r1 CP r1, r13 - LD r32, r254, 0a, 40h - ADDI64 r254, r254, 40d + LD r32, r254, 0a, 32h + ADDI64 r254, r254, 32d JALA r0, r31, 0a -code size: 408 +code size: 399 ret: 42 status: Ok(()) diff --git a/lang/tests/son_tests_storing_into_nullable_struct.txt b/lang/tests/son_tests_storing_into_nullable_struct.txt index 23eccd9..06f1c06 100644 --- a/lang/tests/son_tests_storing_into_nullable_struct.txt +++ b/lang/tests/son_tests_storing_into_nullable_struct.txt @@ -7,7 +7,6 @@ do_stuff: ADDI64 r254, r254, 8d JALA r0, r31, 0a just_read: - CP r0, r2 JALA r0, r31, 0a main: ADDI64 r254, r254, -88d @@ -75,6 +74,6 @@ optionala: LD r32, r254, 48a, 32h ADDI64 r254, r254, 80d JALA r0, r31, 0a -code size: 670 +code size: 667 ret: 100 status: Ok(()) diff --git a/lang/tests/son_tests_wide_ret.txt b/lang/tests/son_tests_wide_ret.txt index 5a354e8..c7b7006 100644 --- a/lang/tests/son_tests_wide_ret.txt +++ b/lang/tests/son_tests_wide_ret.txt @@ -17,8 +17,6 @@ main: maina: ADDI64 r254, r254, -68d ST r31, r254, 20a, 48h - CP r0, r3 - CP r0, r4 ADDI64 r32, r254, 16d JAL r31, r0, :small_struct ST r1, r32, 0a, 4h @@ -55,6 +53,6 @@ small_struct: LD r32, r254, 4a, 8h ADDI64 r254, r254, 12d JALA r0, r31, 0a -code size: 591 +code size: 585 ret: 2 status: Ok(())