From 969ea57e3f677ae12b0b5031ad1da8711c71d634 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Thu, 19 Dec 2024 19:43:30 +0100 Subject: [PATCH] optimizing the bitset used in register allocation also fixing an enum bug Signed-off-by: Jakub Doka --- lang/src/backend/hbvm.rs | 14 ++- lang/src/backend/hbvm/regalloc.rs | 183 +++++++++++++++++++++++------- lang/src/lib.rs | 3 +- lang/src/ty.rs | 6 +- lang/src/utils.rs | 81 ++++++++----- 5 files changed, 211 insertions(+), 76 deletions(-) diff --git a/lang/src/backend/hbvm.rs b/lang/src/backend/hbvm.rs index f1a241a..19e40b9 100644 --- a/lang/src/backend/hbvm.rs +++ b/lang/src/backend/hbvm.rs @@ -8,7 +8,7 @@ use { utils::{EntSlice, EntVec}, }, alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec}, - core::{assert_matches::debug_assert_matches, mem, ops::Range}, + core::{assert_matches::debug_assert_matches, mem, ops::Range, usize}, hbbytecode::{self as instrs, *}, reg::Reg, }; @@ -606,9 +606,8 @@ impl TokenKind { Some(ops[size.ilog2() as usize]) } - fn unop(&self, dst: ty::Id, src: ty::Id) -> Option EncodedInstr> { - let src_idx = - src.simple_size().unwrap_or_else(|| panic!("{:?}", src.expand())).ilog2() as usize; + fn unop(&self, dst: ty::Id, src: ty::Id, tys: &Types) -> Option EncodedInstr> { + let src_idx = tys.size_of(src).ilog2() as usize; Some(match self { Self::Sub => [ |a, b| sub8(a, reg::ZERO, b), @@ -654,6 +653,13 @@ enum PLoc { WideReg(Reg, u16), Ref(Reg, u32), } +impl PLoc { + fn reg(self) -> u8 { + match self { + PLoc::Reg(r, _) | PLoc::WideReg(r, _) | PLoc::Ref(r, _) => r, + } + } +} struct ParamAlloc(Range); diff --git a/lang/src/backend/hbvm/regalloc.rs b/lang/src/backend/hbvm/regalloc.rs index 16d2f5f..ac358c8 100644 --- a/lang/src/backend/hbvm/regalloc.rs +++ b/lang/src/backend/hbvm/regalloc.rs @@ -1,4 +1,5 @@ use { + super::ParamAlloc, crate::{ backend::hbvm::{ reg::{self, Reg}, @@ -32,7 +33,9 @@ impl HbvmBackend { let mut res = mem::take(&mut self.ralloc); - Regalloc::run(nodes, tys, &mut res); + let special_reg_count = 13u8; + + Regalloc::run(nodes, tys, &mut res, special_reg_count as _); '_open_function: { self.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, 0)); @@ -40,12 +43,12 @@ impl HbvmBackend { } if let Some(PLoc::Ref(..)) = tys.parama(sig.ret).0 { - res.node_to_reg[MEM as usize] = res.bundles.len() as u8 + 1; - res.bundles.push(Bundle::new(0)); + res.node_to_reg[MEM as usize] = res.general_bundles.len() as u8 + 1; + res.general_bundles.push(Bundle::default()); } - let reg_offset = if tail { reg::RET + 12 } else { reg::RET_ADDR + 1 }; - let bundle_count = res.bundles.len() + (reg_offset as usize); + let reg_offset = if tail { special_reg_count } else { reg::RET_ADDR + 1 }; + let bundle_count = res.general_bundles.len() + (reg_offset as usize); res.node_to_reg.iter_mut().filter(|r| **r != 0).for_each(|r| { if *r == u8::MAX { @@ -325,6 +328,7 @@ impl HbvmBackend { node.ty, tys.inner_of(nodes[node.inputs[1]].ty) .unwrap_or(nodes[node.inputs[1]].ty), + tys, ) .unwrap_or_else(|| { panic!( @@ -521,7 +525,7 @@ impl HbvmBackend { if tail { bundle_count.saturating_sub(reg::RET_ADDR as _) } else { - self.ralloc.bundles.len() + self.ralloc.general_bundles.len() }, tail, ) @@ -770,7 +774,13 @@ impl Nodes { nid } - fn uses_of(&self, nid: Nid, types: &Types, stack: &mut Vec, buf: &mut Vec<(Nid, Nid)>) { + fn uses_of( + &self, + nid: Nid, + types: &Types, + stack: &mut Vec, + buf: &mut Vec<(Nid, Nid, Reg)>, + ) { debug_assert!(stack.is_empty()); debug_assert!(buf.is_empty()); @@ -786,13 +796,36 @@ impl Nodes { continue; } if self.is_unlocked(o) { - buf.push((self.use_block_of(exp, o), o)); + buf.push((self.use_block_of(exp, o), o, self.use_reg_of(exp, o))); } else { stack.push(o); } } } } + + fn init_loc_of(&self, def: Nid, types: &Types) -> Reg { + if self[def].kind == Kind::Arg { + let mut parama = ParamAlloc(0..11); + let (_, ploc) = self[VOID] + .outputs + .iter() + .skip(ARG_START) + .map(|&n| (n, parama.next(self[n].ty, types))) + .find(|&(n, _)| n == def) + .unwrap(); + + return ploc.unwrap().reg(); + } + + 255 + } + + fn use_reg_of(&self, def: Nid, usage: Nid) -> Reg { + if matches!(self[usage].kind, Kind::Return { .. }) {} + + 255 + } } struct Regalloc<'a> { @@ -815,27 +848,35 @@ impl<'a> Regalloc<'a> { self.res.backrefs[nid as usize] } - fn run(ctx: &'a Nodes, tys: &'a Types, res: &'a mut Res) { - Self { nodes: ctx, tys, res }.run_low(); + fn run(ctx: &'a Nodes, tys: &'a Types, res: &'a mut Res, special_count: usize) { + Self { nodes: ctx, tys, res }.run_low(special_count); } - fn run_low(&mut self) { - self.res.bundles.clear(); + fn run_low(&mut self, special_count: usize) { + self.res.general_bundles.clear(); self.res.node_to_reg.clear(); #[cfg(debug_assertions)] self.res.marked.clear(); self.res.node_to_reg.resize(self.nodes.vreg_count(), 0); + self.res.call_set.clear(); + + for (i, &instr) in self.res.instrs.iter().enumerate() { + if self.nodes[instr].kind.is_call() { + self.res.call_set.add_one(i); + } + } debug_assert!(self.res.dfs_buf.is_empty()); let mut uses_buf = Vec::new(); - let mut bundle = Bundle::new(self.res.instrs.len()); + let mut range_buf = Vec::new(); + let mut bundle = Bundle::default(); self.res.visited.clear(self.nodes.len()); for i in (0..self.res.blocks.len()).rev() { for [a, rest @ ..] in self.nodes.phi_inputs_of(self.res.blocks[i].entry) { if self.res.visited.set(a) { - self.append_bundle(a, &mut bundle, None, &mut uses_buf); + self.append_bundle(a, &mut bundle, None, &mut uses_buf, &mut range_buf); } for r in rest { @@ -848,6 +889,7 @@ impl<'a> Regalloc<'a> { &mut bundle, Some(self.res.node_to_reg[a as usize] as usize - 1), &mut uses_buf, + &mut range_buf, ); } } @@ -858,7 +900,7 @@ impl<'a> Regalloc<'a> { if self.nodes[inst].has_no_value() || self.res.visited.get(inst) || inst == 0 { continue; } - self.append_bundle(inst, &mut bundle, None, &mut uses_buf); + self.append_bundle(inst, &mut bundle, None, &mut uses_buf, &mut range_buf); } self.res.instrs = instrs; } @@ -868,12 +910,16 @@ impl<'a> Regalloc<'a> { inst: Nid, tmp: &mut Bundle, prefered: Option, - uses_buf: &mut Vec<(Nid, Nid)>, + uses_buf: &mut Vec<(Nid, Nid, Reg)>, + range_buf: &mut Vec>, ) { let dom = self.nodes.idom_of(inst); self.res.dfs_seem.clear(self.nodes.len()); self.nodes.uses_of(inst, self.tys, &mut self.res.dfs_buf, uses_buf); - for (cursor, uinst) in uses_buf.drain(..) { + let mut prefered_reg = reg::ZERO; + for (cursor, uinst, reg) in uses_buf.drain(..) { + prefered_reg = prefered_reg.min(reg); + if !self.res.dfs_seem.set(uinst) { continue; } @@ -903,8 +949,22 @@ impl<'a> Regalloc<'a> { range.end = new; debug_assert!(range.start < range.end, "{:?} {inst} {uinst}", range); - tmp.add(range); + range_buf.push(range) }); + + range_buf.sort_unstable_by_key(|r| r.start); + range_buf.dedup_by(|a, b| { + if b.end == a.start { + b.end = a.end; + true + } else { + false + } + }); + + for range in range_buf.drain(..) { + tmp.add(range); + } } if tmp.is_empty() { @@ -913,23 +973,23 @@ impl<'a> Regalloc<'a> { } if let Some(prefered) = prefered - && !self.res.bundles[prefered].overlaps(tmp) + && !self.res.general_bundles[prefered].overlaps(tmp) { - self.res.bundles[prefered].merge(tmp); + self.res.general_bundles[prefered].merge(tmp); tmp.clear(); self.res.node_to_reg[inst as usize] = prefered as Reg + 1; return; } - match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) { + match self.res.general_bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) { Some((i, other)) => { other.merge(tmp); tmp.clear(); self.res.node_to_reg[inst as usize] = i as Reg + 1; } None => { - self.res.bundles.push(tmp.take()); - self.res.node_to_reg[inst as usize] = self.res.bundles.len() as Reg; + self.res.general_bundles.push(tmp.take()); + self.res.node_to_reg[inst as usize] = self.res.general_bundles.len() as Reg; } } } @@ -980,7 +1040,8 @@ pub(super) struct Res { instrs: Vec, backrefs: Vec, - bundles: Vec, + general_bundles: Vec, + call_set: Bundle, node_to_reg: Vec, visited: BitSet, @@ -991,37 +1052,83 @@ pub(super) struct Res { } struct Bundle { - taken: Vec, + start: usize, + end: usize, + usage: BitSet, +} + +impl Default for Bundle { + fn default() -> Self { + Self { start: usize::MAX, end: 0, usage: Default::default() } + } } impl Bundle { - fn new(size: usize) -> Self { - Self { taken: vec![false; size] } - } - fn add(&mut self, range: Range) { - self.taken[range].fill(true); + debug_assert!(!range.is_empty()); + debug_assert!(range.start / BitSet::UNIT >= self.start || self.start == usize::MAX); + self.start = self.start.min(range.start / BitSet::UNIT); + self.end = self.end.max(range.end.div_ceil(BitSet::UNIT)); + let proj_range = + range.start - self.start * BitSet::UNIT..range.end - self.start * BitSet::UNIT; + self.usage.set_range(proj_range) } - fn overlaps(&self, other: &Self) -> bool { - self.taken.iter().zip(other.taken.iter()).any(|(a, b)| a & b) + fn overlaps(&self, othr: &Self) -> bool { + let overlap = self.start.max(othr.start)..self.end.min(othr.end); + + if overlap.start >= overlap.end { + return false; + } + + let [mut sslot, mut oslot] = [0, 0]; + let sunits = + &self.usage.units(&mut sslot)[overlap.start - self.start..overlap.end - self.start]; + let ounits = + &othr.usage.units(&mut oslot)[overlap.start - othr.start..overlap.end - othr.start]; + + debug_assert_eq!(sunits.len(), ounits.len()); + + let res = sunits.iter().zip(ounits).any(|(a, b)| (a & b) != 0); + res } - fn merge(&mut self, other: &Self) { - debug_assert!(!self.overlaps(other)); - self.taken.iter_mut().zip(other.taken.iter()).for_each(|(a, b)| *a |= *b); + fn merge(&mut self, othr: &Self) { + debug_assert!(!self.overlaps(othr)); + debug_assert!(self.start <= othr.start || self.start == usize::MAX); + + self.usage.reserve((othr.end - self.start) * BitSet::UNIT); + self.start = self.start.min(othr.start); + self.end = self.end.max(othr.end); + + let sunits = + &mut self.usage.units_mut().unwrap()[othr.start - self.start..othr.end - self.start]; + let mut oslot = 0; + let ounits = othr.usage.units(&mut oslot); + sunits.iter_mut().zip(ounits).for_each(|(a, b)| *a |= *b); } fn clear(&mut self) { - self.taken.fill(false); + self.start = usize::MAX; + self.end = 0; + self.usage.clear_as_is(); } fn is_empty(&self) -> bool { - !self.taken.contains(&true) + self.end == 0 } fn take(&mut self) -> Self { - mem::replace(self, Self::new(self.taken.len())) + let mut new = Self { start: 0, ..Self::default() }; + new.merge(self); + self.clear(); + new + } + + fn add_one(&mut self, i: usize) { + self.start = self.start.min(i / BitSet::UNIT); + self.end = self.end.max(i.div_ceil(BitSet::UNIT)); + self.usage.set(i as _); } } diff --git a/lang/src/lib.rs b/lang/src/lib.rs index 7e17882..041681d 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -24,7 +24,8 @@ iter_next_chunk, pointer_is_aligned_to, maybe_uninit_fill, - array_chunks + array_chunks, + array_windows )] #![warn(clippy::dbg_macro)] #![expect(internal_features)] diff --git a/lang/src/ty.rs b/lang/src/ty.rs index af8018e..8262e83 100644 --- a/lang/src/ty.rs +++ b/lang/src/ty.rs @@ -184,11 +184,13 @@ impl Id { } pub fn is_unsigned(self) -> bool { - matches!(self.repr(), U8..=UINT) || self.is_never() + matches!(self.repr(), U8..=UINT) + || self.is_never() + || matches!(self.expand(), Kind::Enum(_)) } pub fn is_integer(self) -> bool { - matches!(self.repr(), U8..=INT) || self.is_never() + self.is_signed() || self.is_unsigned() } pub fn is_never(self) -> bool { diff --git a/lang/src/utils.rs b/lang/src/utils.rs index 86db0dd..2fce52a 100644 --- a/lang/src/utils.rs +++ b/lang/src/utils.rs @@ -1,4 +1,3 @@ -#![expect(dead_code)] use { alloc::alloc, core::{ @@ -7,7 +6,7 @@ use { hint::unreachable_unchecked, marker::PhantomData, mem::MaybeUninit, - ops::{Deref, DerefMut, Not}, + ops::{Deref, DerefMut, Not, Range}, ptr::Unique, }, }; @@ -32,9 +31,10 @@ pub fn is_screaming_case(str: &str) -> Result<(), &'static str> { } type Nid = u16; +type BitSetUnit = usize; pub union BitSet { - inline: usize, + inline: BitSetUnit, alloced: Unique, } @@ -78,9 +78,9 @@ impl Default for BitSet { } impl BitSet { - const FLAG: usize = 1 << (Self::UNIT - 1); + const FLAG: BitSetUnit = 1 << (Self::UNIT - 1); const INLINE_ELEMS: usize = Self::UNIT - 1; - const UNIT: usize = core::mem::size_of::() * 8; + pub const UNIT: usize = core::mem::size_of::() * 8; pub fn with_capacity(len: usize) -> Self { let mut s = Self::default(); @@ -92,7 +92,7 @@ impl BitSet { unsafe { self.inline & Self::FLAG != 0 } } - fn data_and_len(&self) -> (&[usize], usize) { + fn data_and_len(&self) -> (&[BitSetUnit], usize) { unsafe { if self.is_inline() { (core::slice::from_ref(&self.inline), Self::INLINE_ELEMS) @@ -100,16 +100,16 @@ impl BitSet { let small_vec = self.alloced.as_ref(); ( core::slice::from_raw_parts( - &small_vec.data as *const _ as *const usize, + &small_vec.data as *const _ as *const BitSetUnit, small_vec.cap, ), - small_vec.cap * core::mem::size_of::() * 8, + small_vec.cap * Self::UNIT, ) } } } - fn data_mut_and_len(&mut self) -> (&mut [usize], usize) { + fn data_mut_and_len(&mut self) -> (&mut [BitSetUnit], usize) { unsafe { if self.is_inline() { (core::slice::from_mut(&mut self.inline), INLINE_ELEMS) @@ -117,7 +117,7 @@ impl BitSet { let small_vec = self.alloced.as_mut(); ( core::slice::from_raw_parts_mut( - &mut small_vec.data as *mut _ as *mut usize, + &mut small_vec.data as *mut _ as *mut BitSetUnit, small_vec.cap, ), small_vec.cap * Self::UNIT, @@ -163,7 +163,7 @@ impl BitSet { let (ptr, prev_len) = unsafe { if self.is_inline() { let ptr = alloc::alloc(layout); - *ptr.add(off).cast::() = self.inline & !Self::FLAG; + *ptr.add(off).cast::() = self.inline & !Self::FLAG; (ptr, 1) } else { let prev_len = self.alloced.as_ref().cap; @@ -174,7 +174,7 @@ impl BitSet { unsafe { MaybeUninit::fill( core::slice::from_raw_parts_mut( - ptr.add(off).cast::>().add(prev_len), + ptr.add(off).cast::>().add(prev_len), slot_count - prev_len, ), 0, @@ -187,7 +187,7 @@ impl BitSet { fn layout(slot_count: usize) -> (core::alloc::Layout, usize) { unsafe { core::alloc::Layout::new::() - .extend(Layout::array::(slot_count).unwrap_unchecked()) + .extend(Layout::array::(slot_count).unwrap_unchecked()) .unwrap_unchecked() } } @@ -205,6 +205,10 @@ impl BitSet { pub fn clear(&mut self, len: usize) { self.reserve(len); + self.clear_as_is(); + } + + pub fn clear_as_is(&mut self) { if self.is_inline() { unsafe { self.inline &= Self::FLAG }; } else { @@ -212,7 +216,11 @@ impl BitSet { } } - pub fn units<'a>(&'a self, slot: &'a mut usize) -> &'a [usize] { + pub fn approx_unit_cap(&self) -> usize { + self.data_and_len().0.len() + } + + pub fn units<'a>(&'a self, slot: &'a mut BitSetUnit) -> &'a [BitSetUnit] { if self.is_inline() { *slot = unsafe { self.inline } & !Self::FLAG; core::slice::from_ref(slot) @@ -221,36 +229,47 @@ impl BitSet { } } + pub fn units_mut(&mut self) -> Option<&mut [BitSetUnit]> { + self.is_inline().not().then(|| self.data_mut_and_len().0) + } + pub fn reserve(&mut self, len: usize) { if len > self.data_and_len().1 { self.grow(len.next_power_of_two().max(4 * Self::UNIT)); } } - pub fn units_mut(&mut self) -> Result<&mut [usize], &mut InlineBitSetView> { - if self.is_inline() { - Err(unsafe { - core::mem::transmute::<&mut usize, &mut InlineBitSetView>(&mut self.inline) - }) - } else { - Ok(self.data_mut_and_len().0) + pub fn set_range(&mut self, proj_range: Range) { + if proj_range.is_empty() { + return; } - } -} -pub struct InlineBitSetView(usize); + self.reserve(proj_range.end); + let (units, _) = self.data_mut_and_len(); -impl InlineBitSetView { - pub(crate) fn add_mask(&mut self, tmp: usize) { - debug_assert!(tmp & BitSet::FLAG == 0); - self.0 |= tmp; + if proj_range.start / Self::UNIT == (proj_range.end - 1) / Self::UNIT { + debug_assert!(proj_range.len() <= Self::UNIT); + let mask = ((1 << proj_range.len()) - 1) << (proj_range.start % Self::UNIT); + units[proj_range.start / Self::UNIT] |= mask; + } else { + let fill_range = proj_range.start.div_ceil(Self::UNIT)..proj_range.end / Self::UNIT; + units[fill_range].fill(BitSetUnit::MAX); + + let prefix_len = Self::UNIT - proj_range.start % Self::UNIT; + let prefix_mask = ((1 << prefix_len) - 1) << (proj_range.start % Self::UNIT); + units[proj_range.start / Self::UNIT] |= prefix_mask; + + let postfix_len = proj_range.end % Self::UNIT; + let postfix_mask = (1 << postfix_len) - 1; + units[proj_range.end / Self::UNIT] |= postfix_mask; + } } } pub struct BitSetIter<'a> { index: usize, - current: usize, - remining: &'a [usize], + current: BitSetUnit, + remining: &'a [BitSetUnit], } impl Iterator for BitSetIter<'_> { @@ -270,7 +289,7 @@ impl Iterator for BitSetIter<'_> { struct AllocedBitSet { cap: usize, - data: [usize; 0], + data: [BitSetUnit; 0], } #[cfg(test)]