forked from AbleOS/holey-bytes
optimizing the bitset used in register allocation
also fixing an enum bug Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
This commit is contained in:
parent
cfd3eac0a8
commit
969ea57e3f
|
@ -8,7 +8,7 @@ use {
|
||||||
utils::{EntSlice, EntVec},
|
utils::{EntSlice, EntVec},
|
||||||
},
|
},
|
||||||
alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec},
|
alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec},
|
||||||
core::{assert_matches::debug_assert_matches, mem, ops::Range},
|
core::{assert_matches::debug_assert_matches, mem, ops::Range, usize},
|
||||||
hbbytecode::{self as instrs, *},
|
hbbytecode::{self as instrs, *},
|
||||||
reg::Reg,
|
reg::Reg,
|
||||||
};
|
};
|
||||||
|
@ -606,9 +606,8 @@ impl TokenKind {
|
||||||
Some(ops[size.ilog2() as usize])
|
Some(ops[size.ilog2() as usize])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn unop(&self, dst: ty::Id, src: ty::Id) -> Option<fn(u8, u8) -> EncodedInstr> {
|
fn unop(&self, dst: ty::Id, src: ty::Id, tys: &Types) -> Option<fn(u8, u8) -> EncodedInstr> {
|
||||||
let src_idx =
|
let src_idx = tys.size_of(src).ilog2() as usize;
|
||||||
src.simple_size().unwrap_or_else(|| panic!("{:?}", src.expand())).ilog2() as usize;
|
|
||||||
Some(match self {
|
Some(match self {
|
||||||
Self::Sub => [
|
Self::Sub => [
|
||||||
|a, b| sub8(a, reg::ZERO, b),
|
|a, b| sub8(a, reg::ZERO, b),
|
||||||
|
@ -654,6 +653,13 @@ enum PLoc {
|
||||||
WideReg(Reg, u16),
|
WideReg(Reg, u16),
|
||||||
Ref(Reg, u32),
|
Ref(Reg, u32),
|
||||||
}
|
}
|
||||||
|
impl PLoc {
|
||||||
|
fn reg(self) -> u8 {
|
||||||
|
match self {
|
||||||
|
PLoc::Reg(r, _) | PLoc::WideReg(r, _) | PLoc::Ref(r, _) => r,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct ParamAlloc(Range<Reg>);
|
struct ParamAlloc(Range<Reg>);
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use {
|
use {
|
||||||
|
super::ParamAlloc,
|
||||||
crate::{
|
crate::{
|
||||||
backend::hbvm::{
|
backend::hbvm::{
|
||||||
reg::{self, Reg},
|
reg::{self, Reg},
|
||||||
|
@ -32,7 +33,9 @@ impl HbvmBackend {
|
||||||
|
|
||||||
let mut res = mem::take(&mut self.ralloc);
|
let mut res = mem::take(&mut self.ralloc);
|
||||||
|
|
||||||
Regalloc::run(nodes, tys, &mut res);
|
let special_reg_count = 13u8;
|
||||||
|
|
||||||
|
Regalloc::run(nodes, tys, &mut res, special_reg_count as _);
|
||||||
|
|
||||||
'_open_function: {
|
'_open_function: {
|
||||||
self.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, 0));
|
self.emit(instrs::addi64(reg::STACK_PTR, reg::STACK_PTR, 0));
|
||||||
|
@ -40,12 +43,12 @@ impl HbvmBackend {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(PLoc::Ref(..)) = tys.parama(sig.ret).0 {
|
if let Some(PLoc::Ref(..)) = tys.parama(sig.ret).0 {
|
||||||
res.node_to_reg[MEM as usize] = res.bundles.len() as u8 + 1;
|
res.node_to_reg[MEM as usize] = res.general_bundles.len() as u8 + 1;
|
||||||
res.bundles.push(Bundle::new(0));
|
res.general_bundles.push(Bundle::default());
|
||||||
}
|
}
|
||||||
|
|
||||||
let reg_offset = if tail { reg::RET + 12 } else { reg::RET_ADDR + 1 };
|
let reg_offset = if tail { special_reg_count } else { reg::RET_ADDR + 1 };
|
||||||
let bundle_count = res.bundles.len() + (reg_offset as usize);
|
let bundle_count = res.general_bundles.len() + (reg_offset as usize);
|
||||||
|
|
||||||
res.node_to_reg.iter_mut().filter(|r| **r != 0).for_each(|r| {
|
res.node_to_reg.iter_mut().filter(|r| **r != 0).for_each(|r| {
|
||||||
if *r == u8::MAX {
|
if *r == u8::MAX {
|
||||||
|
@ -325,6 +328,7 @@ impl HbvmBackend {
|
||||||
node.ty,
|
node.ty,
|
||||||
tys.inner_of(nodes[node.inputs[1]].ty)
|
tys.inner_of(nodes[node.inputs[1]].ty)
|
||||||
.unwrap_or(nodes[node.inputs[1]].ty),
|
.unwrap_or(nodes[node.inputs[1]].ty),
|
||||||
|
tys,
|
||||||
)
|
)
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
panic!(
|
panic!(
|
||||||
|
@ -521,7 +525,7 @@ impl HbvmBackend {
|
||||||
if tail {
|
if tail {
|
||||||
bundle_count.saturating_sub(reg::RET_ADDR as _)
|
bundle_count.saturating_sub(reg::RET_ADDR as _)
|
||||||
} else {
|
} else {
|
||||||
self.ralloc.bundles.len()
|
self.ralloc.general_bundles.len()
|
||||||
},
|
},
|
||||||
tail,
|
tail,
|
||||||
)
|
)
|
||||||
|
@ -770,7 +774,13 @@ impl Nodes {
|
||||||
nid
|
nid
|
||||||
}
|
}
|
||||||
|
|
||||||
fn uses_of(&self, nid: Nid, types: &Types, stack: &mut Vec<Nid>, buf: &mut Vec<(Nid, Nid)>) {
|
fn uses_of(
|
||||||
|
&self,
|
||||||
|
nid: Nid,
|
||||||
|
types: &Types,
|
||||||
|
stack: &mut Vec<Nid>,
|
||||||
|
buf: &mut Vec<(Nid, Nid, Reg)>,
|
||||||
|
) {
|
||||||
debug_assert!(stack.is_empty());
|
debug_assert!(stack.is_empty());
|
||||||
debug_assert!(buf.is_empty());
|
debug_assert!(buf.is_empty());
|
||||||
|
|
||||||
|
@ -786,13 +796,36 @@ impl Nodes {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if self.is_unlocked(o) {
|
if self.is_unlocked(o) {
|
||||||
buf.push((self.use_block_of(exp, o), o));
|
buf.push((self.use_block_of(exp, o), o, self.use_reg_of(exp, o)));
|
||||||
} else {
|
} else {
|
||||||
stack.push(o);
|
stack.push(o);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn init_loc_of(&self, def: Nid, types: &Types) -> Reg {
|
||||||
|
if self[def].kind == Kind::Arg {
|
||||||
|
let mut parama = ParamAlloc(0..11);
|
||||||
|
let (_, ploc) = self[VOID]
|
||||||
|
.outputs
|
||||||
|
.iter()
|
||||||
|
.skip(ARG_START)
|
||||||
|
.map(|&n| (n, parama.next(self[n].ty, types)))
|
||||||
|
.find(|&(n, _)| n == def)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
return ploc.unwrap().reg();
|
||||||
|
}
|
||||||
|
|
||||||
|
255
|
||||||
|
}
|
||||||
|
|
||||||
|
fn use_reg_of(&self, def: Nid, usage: Nid) -> Reg {
|
||||||
|
if matches!(self[usage].kind, Kind::Return { .. }) {}
|
||||||
|
|
||||||
|
255
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Regalloc<'a> {
|
struct Regalloc<'a> {
|
||||||
|
@ -815,27 +848,35 @@ impl<'a> Regalloc<'a> {
|
||||||
self.res.backrefs[nid as usize]
|
self.res.backrefs[nid as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run(ctx: &'a Nodes, tys: &'a Types, res: &'a mut Res) {
|
fn run(ctx: &'a Nodes, tys: &'a Types, res: &'a mut Res, special_count: usize) {
|
||||||
Self { nodes: ctx, tys, res }.run_low();
|
Self { nodes: ctx, tys, res }.run_low(special_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_low(&mut self) {
|
fn run_low(&mut self, special_count: usize) {
|
||||||
self.res.bundles.clear();
|
self.res.general_bundles.clear();
|
||||||
self.res.node_to_reg.clear();
|
self.res.node_to_reg.clear();
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
self.res.marked.clear();
|
self.res.marked.clear();
|
||||||
self.res.node_to_reg.resize(self.nodes.vreg_count(), 0);
|
self.res.node_to_reg.resize(self.nodes.vreg_count(), 0);
|
||||||
|
self.res.call_set.clear();
|
||||||
|
|
||||||
|
for (i, &instr) in self.res.instrs.iter().enumerate() {
|
||||||
|
if self.nodes[instr].kind.is_call() {
|
||||||
|
self.res.call_set.add_one(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
debug_assert!(self.res.dfs_buf.is_empty());
|
debug_assert!(self.res.dfs_buf.is_empty());
|
||||||
|
|
||||||
let mut uses_buf = Vec::new();
|
let mut uses_buf = Vec::new();
|
||||||
let mut bundle = Bundle::new(self.res.instrs.len());
|
let mut range_buf = Vec::new();
|
||||||
|
let mut bundle = Bundle::default();
|
||||||
self.res.visited.clear(self.nodes.len());
|
self.res.visited.clear(self.nodes.len());
|
||||||
|
|
||||||
for i in (0..self.res.blocks.len()).rev() {
|
for i in (0..self.res.blocks.len()).rev() {
|
||||||
for [a, rest @ ..] in self.nodes.phi_inputs_of(self.res.blocks[i].entry) {
|
for [a, rest @ ..] in self.nodes.phi_inputs_of(self.res.blocks[i].entry) {
|
||||||
if self.res.visited.set(a) {
|
if self.res.visited.set(a) {
|
||||||
self.append_bundle(a, &mut bundle, None, &mut uses_buf);
|
self.append_bundle(a, &mut bundle, None, &mut uses_buf, &mut range_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
for r in rest {
|
for r in rest {
|
||||||
|
@ -848,6 +889,7 @@ impl<'a> Regalloc<'a> {
|
||||||
&mut bundle,
|
&mut bundle,
|
||||||
Some(self.res.node_to_reg[a as usize] as usize - 1),
|
Some(self.res.node_to_reg[a as usize] as usize - 1),
|
||||||
&mut uses_buf,
|
&mut uses_buf,
|
||||||
|
&mut range_buf,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -858,7 +900,7 @@ impl<'a> Regalloc<'a> {
|
||||||
if self.nodes[inst].has_no_value() || self.res.visited.get(inst) || inst == 0 {
|
if self.nodes[inst].has_no_value() || self.res.visited.get(inst) || inst == 0 {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
self.append_bundle(inst, &mut bundle, None, &mut uses_buf);
|
self.append_bundle(inst, &mut bundle, None, &mut uses_buf, &mut range_buf);
|
||||||
}
|
}
|
||||||
self.res.instrs = instrs;
|
self.res.instrs = instrs;
|
||||||
}
|
}
|
||||||
|
@ -868,12 +910,16 @@ impl<'a> Regalloc<'a> {
|
||||||
inst: Nid,
|
inst: Nid,
|
||||||
tmp: &mut Bundle,
|
tmp: &mut Bundle,
|
||||||
prefered: Option<usize>,
|
prefered: Option<usize>,
|
||||||
uses_buf: &mut Vec<(Nid, Nid)>,
|
uses_buf: &mut Vec<(Nid, Nid, Reg)>,
|
||||||
|
range_buf: &mut Vec<Range<usize>>,
|
||||||
) {
|
) {
|
||||||
let dom = self.nodes.idom_of(inst);
|
let dom = self.nodes.idom_of(inst);
|
||||||
self.res.dfs_seem.clear(self.nodes.len());
|
self.res.dfs_seem.clear(self.nodes.len());
|
||||||
self.nodes.uses_of(inst, self.tys, &mut self.res.dfs_buf, uses_buf);
|
self.nodes.uses_of(inst, self.tys, &mut self.res.dfs_buf, uses_buf);
|
||||||
for (cursor, uinst) in uses_buf.drain(..) {
|
let mut prefered_reg = reg::ZERO;
|
||||||
|
for (cursor, uinst, reg) in uses_buf.drain(..) {
|
||||||
|
prefered_reg = prefered_reg.min(reg);
|
||||||
|
|
||||||
if !self.res.dfs_seem.set(uinst) {
|
if !self.res.dfs_seem.set(uinst) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -903,8 +949,22 @@ impl<'a> Regalloc<'a> {
|
||||||
range.end = new;
|
range.end = new;
|
||||||
debug_assert!(range.start < range.end, "{:?} {inst} {uinst}", range);
|
debug_assert!(range.start < range.end, "{:?} {inst} {uinst}", range);
|
||||||
|
|
||||||
tmp.add(range);
|
range_buf.push(range)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
range_buf.sort_unstable_by_key(|r| r.start);
|
||||||
|
range_buf.dedup_by(|a, b| {
|
||||||
|
if b.end == a.start {
|
||||||
|
b.end = a.end;
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for range in range_buf.drain(..) {
|
||||||
|
tmp.add(range);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if tmp.is_empty() {
|
if tmp.is_empty() {
|
||||||
|
@ -913,23 +973,23 @@ impl<'a> Regalloc<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(prefered) = prefered
|
if let Some(prefered) = prefered
|
||||||
&& !self.res.bundles[prefered].overlaps(tmp)
|
&& !self.res.general_bundles[prefered].overlaps(tmp)
|
||||||
{
|
{
|
||||||
self.res.bundles[prefered].merge(tmp);
|
self.res.general_bundles[prefered].merge(tmp);
|
||||||
tmp.clear();
|
tmp.clear();
|
||||||
self.res.node_to_reg[inst as usize] = prefered as Reg + 1;
|
self.res.node_to_reg[inst as usize] = prefered as Reg + 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
match self.res.bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) {
|
match self.res.general_bundles.iter_mut().enumerate().find(|(_, b)| !b.overlaps(tmp)) {
|
||||||
Some((i, other)) => {
|
Some((i, other)) => {
|
||||||
other.merge(tmp);
|
other.merge(tmp);
|
||||||
tmp.clear();
|
tmp.clear();
|
||||||
self.res.node_to_reg[inst as usize] = i as Reg + 1;
|
self.res.node_to_reg[inst as usize] = i as Reg + 1;
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
self.res.bundles.push(tmp.take());
|
self.res.general_bundles.push(tmp.take());
|
||||||
self.res.node_to_reg[inst as usize] = self.res.bundles.len() as Reg;
|
self.res.node_to_reg[inst as usize] = self.res.general_bundles.len() as Reg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -980,7 +1040,8 @@ pub(super) struct Res {
|
||||||
instrs: Vec<Nid>,
|
instrs: Vec<Nid>,
|
||||||
backrefs: Vec<u16>,
|
backrefs: Vec<u16>,
|
||||||
|
|
||||||
bundles: Vec<Bundle>,
|
general_bundles: Vec<Bundle>,
|
||||||
|
call_set: Bundle,
|
||||||
node_to_reg: Vec<Reg>,
|
node_to_reg: Vec<Reg>,
|
||||||
|
|
||||||
visited: BitSet,
|
visited: BitSet,
|
||||||
|
@ -991,37 +1052,83 @@ pub(super) struct Res {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Bundle {
|
struct Bundle {
|
||||||
taken: Vec<bool>,
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
usage: BitSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Bundle {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self { start: usize::MAX, end: 0, usage: Default::default() }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Bundle {
|
impl Bundle {
|
||||||
fn new(size: usize) -> Self {
|
|
||||||
Self { taken: vec![false; size] }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add(&mut self, range: Range<usize>) {
|
fn add(&mut self, range: Range<usize>) {
|
||||||
self.taken[range].fill(true);
|
debug_assert!(!range.is_empty());
|
||||||
|
debug_assert!(range.start / BitSet::UNIT >= self.start || self.start == usize::MAX);
|
||||||
|
self.start = self.start.min(range.start / BitSet::UNIT);
|
||||||
|
self.end = self.end.max(range.end.div_ceil(BitSet::UNIT));
|
||||||
|
let proj_range =
|
||||||
|
range.start - self.start * BitSet::UNIT..range.end - self.start * BitSet::UNIT;
|
||||||
|
self.usage.set_range(proj_range)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn overlaps(&self, other: &Self) -> bool {
|
fn overlaps(&self, othr: &Self) -> bool {
|
||||||
self.taken.iter().zip(other.taken.iter()).any(|(a, b)| a & b)
|
let overlap = self.start.max(othr.start)..self.end.min(othr.end);
|
||||||
|
|
||||||
|
if overlap.start >= overlap.end {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let [mut sslot, mut oslot] = [0, 0];
|
||||||
|
let sunits =
|
||||||
|
&self.usage.units(&mut sslot)[overlap.start - self.start..overlap.end - self.start];
|
||||||
|
let ounits =
|
||||||
|
&othr.usage.units(&mut oslot)[overlap.start - othr.start..overlap.end - othr.start];
|
||||||
|
|
||||||
|
debug_assert_eq!(sunits.len(), ounits.len());
|
||||||
|
|
||||||
|
let res = sunits.iter().zip(ounits).any(|(a, b)| (a & b) != 0);
|
||||||
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, othr: &Self) {
|
||||||
debug_assert!(!self.overlaps(other));
|
debug_assert!(!self.overlaps(othr));
|
||||||
self.taken.iter_mut().zip(other.taken.iter()).for_each(|(a, b)| *a |= *b);
|
debug_assert!(self.start <= othr.start || self.start == usize::MAX);
|
||||||
|
|
||||||
|
self.usage.reserve((othr.end - self.start) * BitSet::UNIT);
|
||||||
|
self.start = self.start.min(othr.start);
|
||||||
|
self.end = self.end.max(othr.end);
|
||||||
|
|
||||||
|
let sunits =
|
||||||
|
&mut self.usage.units_mut().unwrap()[othr.start - self.start..othr.end - self.start];
|
||||||
|
let mut oslot = 0;
|
||||||
|
let ounits = othr.usage.units(&mut oslot);
|
||||||
|
sunits.iter_mut().zip(ounits).for_each(|(a, b)| *a |= *b);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clear(&mut self) {
|
fn clear(&mut self) {
|
||||||
self.taken.fill(false);
|
self.start = usize::MAX;
|
||||||
|
self.end = 0;
|
||||||
|
self.usage.clear_as_is();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_empty(&self) -> bool {
|
fn is_empty(&self) -> bool {
|
||||||
!self.taken.contains(&true)
|
self.end == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
fn take(&mut self) -> Self {
|
fn take(&mut self) -> Self {
|
||||||
mem::replace(self, Self::new(self.taken.len()))
|
let mut new = Self { start: 0, ..Self::default() };
|
||||||
|
new.merge(self);
|
||||||
|
self.clear();
|
||||||
|
new
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_one(&mut self, i: usize) {
|
||||||
|
self.start = self.start.min(i / BitSet::UNIT);
|
||||||
|
self.end = self.end.max(i.div_ceil(BitSet::UNIT));
|
||||||
|
self.usage.set(i as _);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,8 @@
|
||||||
iter_next_chunk,
|
iter_next_chunk,
|
||||||
pointer_is_aligned_to,
|
pointer_is_aligned_to,
|
||||||
maybe_uninit_fill,
|
maybe_uninit_fill,
|
||||||
array_chunks
|
array_chunks,
|
||||||
|
array_windows
|
||||||
)]
|
)]
|
||||||
#![warn(clippy::dbg_macro)]
|
#![warn(clippy::dbg_macro)]
|
||||||
#![expect(internal_features)]
|
#![expect(internal_features)]
|
||||||
|
|
|
@ -184,11 +184,13 @@ impl Id {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_unsigned(self) -> bool {
|
pub fn is_unsigned(self) -> bool {
|
||||||
matches!(self.repr(), U8..=UINT) || self.is_never()
|
matches!(self.repr(), U8..=UINT)
|
||||||
|
|| self.is_never()
|
||||||
|
|| matches!(self.expand(), Kind::Enum(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_integer(self) -> bool {
|
pub fn is_integer(self) -> bool {
|
||||||
matches!(self.repr(), U8..=INT) || self.is_never()
|
self.is_signed() || self.is_unsigned()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_never(self) -> bool {
|
pub fn is_never(self) -> bool {
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#![expect(dead_code)]
|
|
||||||
use {
|
use {
|
||||||
alloc::alloc,
|
alloc::alloc,
|
||||||
core::{
|
core::{
|
||||||
|
@ -7,7 +6,7 @@ use {
|
||||||
hint::unreachable_unchecked,
|
hint::unreachable_unchecked,
|
||||||
marker::PhantomData,
|
marker::PhantomData,
|
||||||
mem::MaybeUninit,
|
mem::MaybeUninit,
|
||||||
ops::{Deref, DerefMut, Not},
|
ops::{Deref, DerefMut, Not, Range},
|
||||||
ptr::Unique,
|
ptr::Unique,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -32,9 +31,10 @@ pub fn is_screaming_case(str: &str) -> Result<(), &'static str> {
|
||||||
}
|
}
|
||||||
|
|
||||||
type Nid = u16;
|
type Nid = u16;
|
||||||
|
type BitSetUnit = usize;
|
||||||
|
|
||||||
pub union BitSet {
|
pub union BitSet {
|
||||||
inline: usize,
|
inline: BitSetUnit,
|
||||||
alloced: Unique<AllocedBitSet>,
|
alloced: Unique<AllocedBitSet>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,9 +78,9 @@ impl Default for BitSet {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BitSet {
|
impl BitSet {
|
||||||
const FLAG: usize = 1 << (Self::UNIT - 1);
|
const FLAG: BitSetUnit = 1 << (Self::UNIT - 1);
|
||||||
const INLINE_ELEMS: usize = Self::UNIT - 1;
|
const INLINE_ELEMS: usize = Self::UNIT - 1;
|
||||||
const UNIT: usize = core::mem::size_of::<usize>() * 8;
|
pub const UNIT: usize = core::mem::size_of::<BitSetUnit>() * 8;
|
||||||
|
|
||||||
pub fn with_capacity(len: usize) -> Self {
|
pub fn with_capacity(len: usize) -> Self {
|
||||||
let mut s = Self::default();
|
let mut s = Self::default();
|
||||||
|
@ -92,7 +92,7 @@ impl BitSet {
|
||||||
unsafe { self.inline & Self::FLAG != 0 }
|
unsafe { self.inline & Self::FLAG != 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn data_and_len(&self) -> (&[usize], usize) {
|
fn data_and_len(&self) -> (&[BitSetUnit], usize) {
|
||||||
unsafe {
|
unsafe {
|
||||||
if self.is_inline() {
|
if self.is_inline() {
|
||||||
(core::slice::from_ref(&self.inline), Self::INLINE_ELEMS)
|
(core::slice::from_ref(&self.inline), Self::INLINE_ELEMS)
|
||||||
|
@ -100,16 +100,16 @@ impl BitSet {
|
||||||
let small_vec = self.alloced.as_ref();
|
let small_vec = self.alloced.as_ref();
|
||||||
(
|
(
|
||||||
core::slice::from_raw_parts(
|
core::slice::from_raw_parts(
|
||||||
&small_vec.data as *const _ as *const usize,
|
&small_vec.data as *const _ as *const BitSetUnit,
|
||||||
small_vec.cap,
|
small_vec.cap,
|
||||||
),
|
),
|
||||||
small_vec.cap * core::mem::size_of::<usize>() * 8,
|
small_vec.cap * Self::UNIT,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn data_mut_and_len(&mut self) -> (&mut [usize], usize) {
|
fn data_mut_and_len(&mut self) -> (&mut [BitSetUnit], usize) {
|
||||||
unsafe {
|
unsafe {
|
||||||
if self.is_inline() {
|
if self.is_inline() {
|
||||||
(core::slice::from_mut(&mut self.inline), INLINE_ELEMS)
|
(core::slice::from_mut(&mut self.inline), INLINE_ELEMS)
|
||||||
|
@ -117,7 +117,7 @@ impl BitSet {
|
||||||
let small_vec = self.alloced.as_mut();
|
let small_vec = self.alloced.as_mut();
|
||||||
(
|
(
|
||||||
core::slice::from_raw_parts_mut(
|
core::slice::from_raw_parts_mut(
|
||||||
&mut small_vec.data as *mut _ as *mut usize,
|
&mut small_vec.data as *mut _ as *mut BitSetUnit,
|
||||||
small_vec.cap,
|
small_vec.cap,
|
||||||
),
|
),
|
||||||
small_vec.cap * Self::UNIT,
|
small_vec.cap * Self::UNIT,
|
||||||
|
@ -163,7 +163,7 @@ impl BitSet {
|
||||||
let (ptr, prev_len) = unsafe {
|
let (ptr, prev_len) = unsafe {
|
||||||
if self.is_inline() {
|
if self.is_inline() {
|
||||||
let ptr = alloc::alloc(layout);
|
let ptr = alloc::alloc(layout);
|
||||||
*ptr.add(off).cast::<usize>() = self.inline & !Self::FLAG;
|
*ptr.add(off).cast::<BitSetUnit>() = self.inline & !Self::FLAG;
|
||||||
(ptr, 1)
|
(ptr, 1)
|
||||||
} else {
|
} else {
|
||||||
let prev_len = self.alloced.as_ref().cap;
|
let prev_len = self.alloced.as_ref().cap;
|
||||||
|
@ -174,7 +174,7 @@ impl BitSet {
|
||||||
unsafe {
|
unsafe {
|
||||||
MaybeUninit::fill(
|
MaybeUninit::fill(
|
||||||
core::slice::from_raw_parts_mut(
|
core::slice::from_raw_parts_mut(
|
||||||
ptr.add(off).cast::<MaybeUninit<usize>>().add(prev_len),
|
ptr.add(off).cast::<MaybeUninit<BitSetUnit>>().add(prev_len),
|
||||||
slot_count - prev_len,
|
slot_count - prev_len,
|
||||||
),
|
),
|
||||||
0,
|
0,
|
||||||
|
@ -187,7 +187,7 @@ impl BitSet {
|
||||||
fn layout(slot_count: usize) -> (core::alloc::Layout, usize) {
|
fn layout(slot_count: usize) -> (core::alloc::Layout, usize) {
|
||||||
unsafe {
|
unsafe {
|
||||||
core::alloc::Layout::new::<AllocedBitSet>()
|
core::alloc::Layout::new::<AllocedBitSet>()
|
||||||
.extend(Layout::array::<usize>(slot_count).unwrap_unchecked())
|
.extend(Layout::array::<BitSetUnit>(slot_count).unwrap_unchecked())
|
||||||
.unwrap_unchecked()
|
.unwrap_unchecked()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -205,6 +205,10 @@ impl BitSet {
|
||||||
|
|
||||||
pub fn clear(&mut self, len: usize) {
|
pub fn clear(&mut self, len: usize) {
|
||||||
self.reserve(len);
|
self.reserve(len);
|
||||||
|
self.clear_as_is();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clear_as_is(&mut self) {
|
||||||
if self.is_inline() {
|
if self.is_inline() {
|
||||||
unsafe { self.inline &= Self::FLAG };
|
unsafe { self.inline &= Self::FLAG };
|
||||||
} else {
|
} else {
|
||||||
|
@ -212,7 +216,11 @@ impl BitSet {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn units<'a>(&'a self, slot: &'a mut usize) -> &'a [usize] {
|
pub fn approx_unit_cap(&self) -> usize {
|
||||||
|
self.data_and_len().0.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn units<'a>(&'a self, slot: &'a mut BitSetUnit) -> &'a [BitSetUnit] {
|
||||||
if self.is_inline() {
|
if self.is_inline() {
|
||||||
*slot = unsafe { self.inline } & !Self::FLAG;
|
*slot = unsafe { self.inline } & !Self::FLAG;
|
||||||
core::slice::from_ref(slot)
|
core::slice::from_ref(slot)
|
||||||
|
@ -221,36 +229,47 @@ impl BitSet {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn units_mut(&mut self) -> Option<&mut [BitSetUnit]> {
|
||||||
|
self.is_inline().not().then(|| self.data_mut_and_len().0)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn reserve(&mut self, len: usize) {
|
pub fn reserve(&mut self, len: usize) {
|
||||||
if len > self.data_and_len().1 {
|
if len > self.data_and_len().1 {
|
||||||
self.grow(len.next_power_of_two().max(4 * Self::UNIT));
|
self.grow(len.next_power_of_two().max(4 * Self::UNIT));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn units_mut(&mut self) -> Result<&mut [usize], &mut InlineBitSetView> {
|
pub fn set_range(&mut self, proj_range: Range<usize>) {
|
||||||
if self.is_inline() {
|
if proj_range.is_empty() {
|
||||||
Err(unsafe {
|
return;
|
||||||
core::mem::transmute::<&mut usize, &mut InlineBitSetView>(&mut self.inline)
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
Ok(self.data_mut_and_len().0)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct InlineBitSetView(usize);
|
self.reserve(proj_range.end);
|
||||||
|
let (units, _) = self.data_mut_and_len();
|
||||||
|
|
||||||
impl InlineBitSetView {
|
if proj_range.start / Self::UNIT == (proj_range.end - 1) / Self::UNIT {
|
||||||
pub(crate) fn add_mask(&mut self, tmp: usize) {
|
debug_assert!(proj_range.len() <= Self::UNIT);
|
||||||
debug_assert!(tmp & BitSet::FLAG == 0);
|
let mask = ((1 << proj_range.len()) - 1) << (proj_range.start % Self::UNIT);
|
||||||
self.0 |= tmp;
|
units[proj_range.start / Self::UNIT] |= mask;
|
||||||
|
} else {
|
||||||
|
let fill_range = proj_range.start.div_ceil(Self::UNIT)..proj_range.end / Self::UNIT;
|
||||||
|
units[fill_range].fill(BitSetUnit::MAX);
|
||||||
|
|
||||||
|
let prefix_len = Self::UNIT - proj_range.start % Self::UNIT;
|
||||||
|
let prefix_mask = ((1 << prefix_len) - 1) << (proj_range.start % Self::UNIT);
|
||||||
|
units[proj_range.start / Self::UNIT] |= prefix_mask;
|
||||||
|
|
||||||
|
let postfix_len = proj_range.end % Self::UNIT;
|
||||||
|
let postfix_mask = (1 << postfix_len) - 1;
|
||||||
|
units[proj_range.end / Self::UNIT] |= postfix_mask;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct BitSetIter<'a> {
|
pub struct BitSetIter<'a> {
|
||||||
index: usize,
|
index: usize,
|
||||||
current: usize,
|
current: BitSetUnit,
|
||||||
remining: &'a [usize],
|
remining: &'a [BitSetUnit],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for BitSetIter<'_> {
|
impl Iterator for BitSetIter<'_> {
|
||||||
|
@ -270,7 +289,7 @@ impl Iterator for BitSetIter<'_> {
|
||||||
|
|
||||||
struct AllocedBitSet {
|
struct AllocedBitSet {
|
||||||
cap: usize,
|
cap: usize,
|
||||||
data: [usize; 0],
|
data: [BitSetUnit; 0],
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
Loading…
Reference in a new issue