optimizing the bitset used in register allocation

also fixing an enum bug

Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
Jakub Doka 2024-12-19 19:43:30 +01:00
parent cfd3eac0a8
commit 969ea57e3f
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
5 changed files with 211 additions and 76 deletions

View file

@ -8,7 +8,7 @@ use {
utils::{EntSlice, EntVec},
},
alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec},
core::{assert_matches::debug_assert_matches, mem, ops::Range},
core::{assert_matches::debug_assert_matches, mem, ops::Range, usize},
hbbytecode::{self as instrs, *},
reg::Reg,
};
@ -606,9 +606,8 @@ impl TokenKind {
Some(ops[size.ilog2() as usize])
}
fn unop(&self, dst: ty::Id, src: ty::Id) -> Option<fn(u8, u8) -> EncodedInstr> {
let src_idx =
src.simple_size().unwrap_or_else(|| panic!("{:?}", src.expand())).ilog2() as usize;
fn unop(&self, dst: ty::Id, src: ty::Id, tys: &Types) -> Option<fn(u8, u8) -> EncodedInstr> {
let src_idx = tys.size_of(src).ilog2() as usize;
Some(match self {
Self::Sub => [
|a, b| sub8(a, reg::ZERO, b),
@ -654,6 +653,13 @@ enum PLoc {
WideReg(Reg, u16),
Ref(Reg, u32),
}
impl PLoc {
fn reg(self) -> u8 {
match self {
PLoc::Reg(r, _) | PLoc::WideReg(r, _) | PLoc::Ref(r, _) => r,
}
}
}
struct ParamAlloc(Range<Reg>);

View file

@ -1,4 +1,5 @@
use {
super::ParamAlloc,
crate::{
backend::hbvm::{
reg::{self, Reg},
@ -32,7 +33,9 @@ impl HbvmBackend {
let mut res = mem::take(&mut self.ralloc);
Regalloc::run(nodes, tys, &mut res);
let special_reg_count = 13u8;
Regalloc::run(nodes, tys, &mut res, special_reg_count as _);
'_open_function: {
self.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, 0));
@ -40,12 +43,12 @@ impl HbvmBackend {
}
if let Some(PLoc::Ref(..)) = tys.parama(sig.ret).0 {
res.node_to_reg[MEM as usize] = res.bundles.len() as u8 + 1;
res.bundles.push(Bundle::new(0));
res.node_to_reg[MEM as usize] = res.general_bundles.len() as u8 + 1;
res.general_bundles.push(Bundle::default());
}
let reg_offset = if tail { reg::RET + 12 } else { reg::RET_ADDR + 1 };
let bundle_count = res.bundles.len() + (reg_offset as usize);
let reg_offset = if tail { special_reg_count } else { reg::RET_ADDR + 1 };
let bundle_count = res.general_bundles.len() + (reg_offset as usize);
res.node_to_reg.iter_mut().filter(|r| **r != 0).for_each(|r| {
if *r == u8::MAX {
@ -325,6 +328,7 @@ impl HbvmBackend {
node.ty,
tys.inner_of(nodes[node.inputs[1]].ty)
.unwrap_or(nodes[node.inputs[1]].ty),
tys,
)
.unwrap_or_else(|| {
panic!(
@ -521,7 +525,7 @@ impl HbvmBackend {
if tail {
bundle_count.saturating_sub(reg::RET_ADDR as _)
} else {
self.ralloc.bundles.len()
self.ralloc.general_bundles.len()
},
tail,
)
@ -770,7 +774,13 @@ impl Nodes {
nid
}
fn uses_of(&self, nid: Nid, types: &Types, stack: &mut Vec<Nid>, buf: &mut Vec<(Nid, Nid)>) {
fn uses_of(
&self,
nid: Nid,
types: &Types,
stack: &mut Vec<Nid>,
buf: &mut Vec<(Nid, Nid, Reg)>,
) {
debug_assert!(stack.is_empty());
debug_assert!(buf.is_empty());
@ -786,13 +796,36 @@ impl Nodes {
continue;
}
if self.is_unlocked(o) {
buf.push((self.use_block_of(exp, o), o));
buf.push((self.use_block_of(exp, o), o, self.use_reg_of(exp, o)));
} else {
stack.push(o);
}
}
}
}
fn init_loc_of(&self, def: Nid, types: &Types) -> Reg {
if self[def].kind == Kind::Arg {
let mut parama = ParamAlloc(0..11);
let (_, ploc) = self[VOID]
.outputs
.iter()
.skip(ARG_START)
.map(|&n| (n, parama.next(self[n].ty, types)))
.find(|&(n, _)| n == def)
.unwrap();
return ploc.unwrap().reg();
}
255
}
fn use_reg_of(&self, def: Nid, usage: Nid) -> Reg {
if matches!(self[usage].kind, Kind::Return { .. }) {}
255
}
}
struct Regalloc<'a> {
@ -815,27 +848,35 @@ impl<'a> Regalloc<'a> {
self.res.backrefs[nid as usize]
}
fn run(ctx: &'a Nodes, tys: &'a Types, res: &'a mut Res) {
Self { nodes: ctx, tys, res }.run_low();
fn run(ctx: &'a Nodes, tys: &'a Types, res: &'a mut Res, special_count: usize) {
Self { nodes: ctx, tys, res }.run_low(special_count);
}
fn run_low(&mut self) {
self.res.bundles.clear();
fn run_low(&mut self, special_count: usize) {
self.res.general_bundles.clear();
self.res.node_to_reg.clear();
#[cfg(debug_assertions)]
self.res.marked.clear();
self.res.node_to_reg.resize(self.nodes.vreg_count(), 0);
self.res.call_set.clear();
for (i, &instr) in self.res.instrs.iter().enumerate() {
if self.nodes[instr].kind.is_call() {
self.res.call_set.add_one(i);
}
}
debug_assert!(self.res.dfs_buf.is_empty());
let mut uses_buf = Vec::new();
let mut bundle = Bundle::new(self.res.instrs.len());
let mut range_buf = Vec::new();
let mut bundle = Bundle::default();
self.res.visited.clear(self.nodes.len());
for i in (0..self.res.blocks.len()).rev() {
for [a, rest @ ..] in self.nodes.phi_inputs_of(self.res.blocks[i].entry) {
if self.res.visited.set(a) {
self.append_bundle(a, &mut bundle, None, &mut uses_buf);
self.append_bundle(a, &mut bundle, None, &mut uses_buf, &mut range_buf);
}
for r in rest {
@ -848,6 +889,7 @@ impl<'a> Regalloc<'a> {
&mut bundle,
Some(self.res.node_to_reg[a as usize] as usize - 1),
&mut uses_buf,
&mut range_buf,
);
}
}
@ -858,7 +900,7 @@ impl<'a> Regalloc<'a> {
if self.nodes[inst].has_no_value() || self.res.visited.get(inst) || inst == 0 {
continue;
}
self.append_bundle(inst, &mut bundle, None, &mut uses_buf);
self.append_bundle(inst, &mut bundle, None, &mut uses_buf, &mut range_buf);
}
self.res.instrs = instrs;
}
@ -868,12 +910,16 @@ impl<'a> Regalloc<'a> {
inst: Nid,
tmp: &mut Bundle,
prefered: Option<usize>,
uses_buf: &mut Vec<(Nid, Nid)>,
uses_buf: &mut Vec<(Nid, Nid, Reg)>,
range_buf: &mut Vec<Range<usize>>,
) {
let dom = self.nodes.idom_of(inst);
self.res.dfs_seem.clear(self.nodes.len());
self.nodes.uses_of(inst, self.tys, &mut self.res.dfs_buf, uses_buf);
for (cursor, uinst) in uses_buf.drain(..) {
let mut prefered_reg = reg::ZERO;
for (cursor, uinst, reg) in uses_buf.drain(..) {
prefered_reg = prefered_reg.min(reg);
if !self.res.dfs_seem.set(uinst) {
continue;
}
@ -903,8 +949,22 @@ impl<'a> Regalloc<'a> {
range.end = new;
debug_assert!(range.start < range.end, "{:?} {inst} {uinst}", range);
tmp.add(range);
range_buf.push(range)
});
range_buf.sort_unstable_by_key(|r| r.start);
range_buf.dedup_by(|a, b| {
if b.end == a.start {
b.end = a.end;
true
} else {
false
}
});
for range in range_buf.drain(..) {
tmp.add(range);
}
}
if tmp.is_empty() {
@ -913,23 +973,23 @@ impl<'a> Regalloc<'a> {
}
if let Some(prefered) = prefered
&& !self.res.bundles[prefered].overlaps(tmp)
&& !self.res.general_bundles[prefered].overlaps(tmp)
{
self.res.bundles[prefered].merge(tmp);
self.res.general_bundles[prefered].merge(tmp);
tmp.clear();
self.res.node_to_reg[inst as usize] = prefered as Reg + 1;
return;
}
match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) {
match self.res.general_bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) {
Some((i, other)) => {
other.merge(tmp);
tmp.clear();
self.res.node_to_reg[inst as usize] = i as Reg + 1;
}
None => {
self.res.bundles.push(tmp.take());
self.res.node_to_reg[inst as usize] = self.res.bundles.len() as Reg;
self.res.general_bundles.push(tmp.take());
self.res.node_to_reg[inst as usize] = self.res.general_bundles.len() as Reg;
}
}
}
@ -980,7 +1040,8 @@ pub(super) struct Res {
instrs: Vec<Nid>,
backrefs: Vec<u16>,
bundles: Vec<Bundle>,
general_bundles: Vec<Bundle>,
call_set: Bundle,
node_to_reg: Vec<Reg>,
visited: BitSet,
@ -991,37 +1052,83 @@ pub(super) struct Res {
}
struct Bundle {
taken: Vec<bool>,
start: usize,
end: usize,
usage: BitSet,
}
impl Default for Bundle {
fn default() -> Self {
Self { start: usize::MAX, end: 0, usage: Default::default() }
}
}
impl Bundle {
fn new(size: usize) -> Self {
Self { taken: vec![false; size] }
}
fn add(&mut self, range: Range<usize>) {
self.taken[range].fill(true);
debug_assert!(!range.is_empty());
debug_assert!(range.start / BitSet::UNIT >= self.start || self.start == usize::MAX);
self.start = self.start.min(range.start / BitSet::UNIT);
self.end = self.end.max(range.end.div_ceil(BitSet::UNIT));
let proj_range =
range.start - self.start * BitSet::UNIT..range.end - self.start * BitSet::UNIT;
self.usage.set_range(proj_range)
}
fn overlaps(&self, other: &Self) -> bool {
self.taken.iter().zip(other.taken.iter()).any(|(a, b)| a & b)
fn overlaps(&self, othr: &Self) -> bool {
let overlap = self.start.max(othr.start)..self.end.min(othr.end);
if overlap.start >= overlap.end {
return false;
}
let [mut sslot, mut oslot] = [0, 0];
let sunits =
&self.usage.units(&mut sslot)[overlap.start - self.start..overlap.end - self.start];
let ounits =
&othr.usage.units(&mut oslot)[overlap.start - othr.start..overlap.end - othr.start];
debug_assert_eq!(sunits.len(), ounits.len());
let res = sunits.iter().zip(ounits).any(|(a, b)| (a & b) != 0);
res
}
fn merge(&mut self, other: &Self) {
debug_assert!(!self.overlaps(other));
self.taken.iter_mut().zip(other.taken.iter()).for_each(|(a, b)| *a |= *b);
fn merge(&mut self, othr: &Self) {
debug_assert!(!self.overlaps(othr));
debug_assert!(self.start <= othr.start || self.start == usize::MAX);
self.usage.reserve((othr.end - self.start) * BitSet::UNIT);
self.start = self.start.min(othr.start);
self.end = self.end.max(othr.end);
let sunits =
&mut self.usage.units_mut().unwrap()[othr.start - self.start..othr.end - self.start];
let mut oslot = 0;
let ounits = othr.usage.units(&mut oslot);
sunits.iter_mut().zip(ounits).for_each(|(a, b)| *a |= *b);
}
fn clear(&mut self) {
self.taken.fill(false);
self.start = usize::MAX;
self.end = 0;
self.usage.clear_as_is();
}
fn is_empty(&self) -> bool {
!self.taken.contains(&true)
self.end == 0
}
fn take(&mut self) -> Self {
mem::replace(self, Self::new(self.taken.len()))
let mut new = Self { start: 0, ..Self::default() };
new.merge(self);
self.clear();
new
}
fn add_one(&mut self, i: usize) {
self.start = self.start.min(i / BitSet::UNIT);
self.end = self.end.max(i.div_ceil(BitSet::UNIT));
self.usage.set(i as _);
}
}

View file

@ -24,7 +24,8 @@
iter_next_chunk,
pointer_is_aligned_to,
maybe_uninit_fill,
array_chunks
array_chunks,
array_windows
)]
#![warn(clippy::dbg_macro)]
#![expect(internal_features)]

View file

@ -184,11 +184,13 @@ impl Id {
}
pub fn is_unsigned(self) -> bool {
matches!(self.repr(), U8..=UINT) || self.is_never()
matches!(self.repr(), U8..=UINT)
|| self.is_never()
|| matches!(self.expand(), Kind::Enum(_))
}
pub fn is_integer(self) -> bool {
matches!(self.repr(), U8..=INT) || self.is_never()
self.is_signed() || self.is_unsigned()
}
pub fn is_never(self) -> bool {

View file

@ -1,4 +1,3 @@
#![expect(dead_code)]
use {
alloc::alloc,
core::{
@ -7,7 +6,7 @@ use {
hint::unreachable_unchecked,
marker::PhantomData,
mem::MaybeUninit,
ops::{Deref, DerefMut, Not},
ops::{Deref, DerefMut, Not, Range},
ptr::Unique,
},
};
@ -32,9 +31,10 @@ pub fn is_screaming_case(str: &str) -> Result<(), &'static str> {
}
type Nid = u16;
type BitSetUnit = usize;
pub union BitSet {
inline: usize,
inline: BitSetUnit,
alloced: Unique<AllocedBitSet>,
}
@ -78,9 +78,9 @@ impl Default for BitSet {
}
impl BitSet {
const FLAG: usize = 1 << (Self::UNIT - 1);
const FLAG: BitSetUnit = 1 << (Self::UNIT - 1);
const INLINE_ELEMS: usize = Self::UNIT - 1;
const UNIT: usize = core::mem::size_of::<usize>() * 8;
pub const UNIT: usize = core::mem::size_of::<BitSetUnit>() * 8;
pub fn with_capacity(len: usize) -> Self {
let mut s = Self::default();
@ -92,7 +92,7 @@ impl BitSet {
unsafe { self.inline & Self::FLAG != 0 }
}
fn data_and_len(&self) -> (&[usize], usize) {
fn data_and_len(&self) -> (&[BitSetUnit], usize) {
unsafe {
if self.is_inline() {
(core::slice::from_ref(&self.inline), Self::INLINE_ELEMS)
@ -100,16 +100,16 @@ impl BitSet {
let small_vec = self.alloced.as_ref();
(
core::slice::from_raw_parts(
&small_vec.data as *const _ as *const usize,
&small_vec.data as *const _ as *const BitSetUnit,
small_vec.cap,
),
small_vec.cap * core::mem::size_of::<usize>() * 8,
small_vec.cap * Self::UNIT,
)
}
}
}
fn data_mut_and_len(&mut self) -> (&mut [usize], usize) {
fn data_mut_and_len(&mut self) -> (&mut [BitSetUnit], usize) {
unsafe {
if self.is_inline() {
(core::slice::from_mut(&mut self.inline), INLINE_ELEMS)
@ -117,7 +117,7 @@ impl BitSet {
let small_vec = self.alloced.as_mut();
(
core::slice::from_raw_parts_mut(
&mut small_vec.data as *mut _ as *mut usize,
&mut small_vec.data as *mut _ as *mut BitSetUnit,
small_vec.cap,
),
small_vec.cap * Self::UNIT,
@ -163,7 +163,7 @@ impl BitSet {
let (ptr, prev_len) = unsafe {
if self.is_inline() {
let ptr = alloc::alloc(layout);
*ptr.add(off).cast::<usize>() = self.inline & !Self::FLAG;
*ptr.add(off).cast::<BitSetUnit>() = self.inline & !Self::FLAG;
(ptr, 1)
} else {
let prev_len = self.alloced.as_ref().cap;
@ -174,7 +174,7 @@ impl BitSet {
unsafe {
MaybeUninit::fill(
core::slice::from_raw_parts_mut(
ptr.add(off).cast::<MaybeUninit<usize>>().add(prev_len),
ptr.add(off).cast::<MaybeUninit<BitSetUnit>>().add(prev_len),
slot_count - prev_len,
),
0,
@ -187,7 +187,7 @@ impl BitSet {
fn layout(slot_count: usize) -> (core::alloc::Layout, usize) {
unsafe {
core::alloc::Layout::new::<AllocedBitSet>()
.extend(Layout::array::<usize>(slot_count).unwrap_unchecked())
.extend(Layout::array::<BitSetUnit>(slot_count).unwrap_unchecked())
.unwrap_unchecked()
}
}
@ -205,6 +205,10 @@ impl BitSet {
pub fn clear(&mut self, len: usize) {
self.reserve(len);
self.clear_as_is();
}
pub fn clear_as_is(&mut self) {
if self.is_inline() {
unsafe { self.inline &= Self::FLAG };
} else {
@ -212,7 +216,11 @@ impl BitSet {
}
}
pub fn units<'a>(&'a self, slot: &'a mut usize) -> &'a [usize] {
pub fn approx_unit_cap(&self) -> usize {
self.data_and_len().0.len()
}
pub fn units<'a>(&'a self, slot: &'a mut BitSetUnit) -> &'a [BitSetUnit] {
if self.is_inline() {
*slot = unsafe { self.inline } & !Self::FLAG;
core::slice::from_ref(slot)
@ -221,36 +229,47 @@ impl BitSet {
}
}
pub fn units_mut(&mut self) -> Option<&mut [BitSetUnit]> {
self.is_inline().not().then(|| self.data_mut_and_len().0)
}
pub fn reserve(&mut self, len: usize) {
if len > self.data_and_len().1 {
self.grow(len.next_power_of_two().max(4 * Self::UNIT));
}
}
pub fn units_mut(&mut self) -> Result<&mut [usize], &mut InlineBitSetView> {
if self.is_inline() {
Err(unsafe {
core::mem::transmute::<&mut usize, &mut InlineBitSetView>(&mut self.inline)
})
} else {
Ok(self.data_mut_and_len().0)
pub fn set_range(&mut self, proj_range: Range<usize>) {
if proj_range.is_empty() {
return;
}
}
}
pub struct InlineBitSetView(usize);
self.reserve(proj_range.end);
let (units, _) = self.data_mut_and_len();
impl InlineBitSetView {
pub(crate) fn add_mask(&mut self, tmp: usize) {
debug_assert!(tmp & BitSet::FLAG == 0);
self.0 |= tmp;
if proj_range.start / Self::UNIT == (proj_range.end - 1) / Self::UNIT {
debug_assert!(proj_range.len() <= Self::UNIT);
let mask = ((1 << proj_range.len()) - 1) << (proj_range.start % Self::UNIT);
units[proj_range.start / Self::UNIT] |= mask;
} else {
let fill_range = proj_range.start.div_ceil(Self::UNIT)..proj_range.end / Self::UNIT;
units[fill_range].fill(BitSetUnit::MAX);
let prefix_len = Self::UNIT - proj_range.start % Self::UNIT;
let prefix_mask = ((1 << prefix_len) - 1) << (proj_range.start % Self::UNIT);
units[proj_range.start / Self::UNIT] |= prefix_mask;
let postfix_len = proj_range.end % Self::UNIT;
let postfix_mask = (1 << postfix_len) - 1;
units[proj_range.end / Self::UNIT] |= postfix_mask;
}
}
}
pub struct BitSetIter<'a> {
index: usize,
current: usize,
remining: &'a [usize],
current: BitSetUnit,
remining: &'a [BitSetUnit],
}
impl Iterator for BitSetIter<'_> {
@ -270,7 +289,7 @@ impl Iterator for BitSetIter<'_> {
struct AllocedBitSet {
cap: usize,
data: [usize; 0],
data: [BitSetUnit; 0],
}
#[cfg(test)]