diff --git a/lang/src/lib.rs b/lang/src/lib.rs index 59995f1..0de1fd8 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -25,6 +25,7 @@ pointer_is_aligned_to, maybe_uninit_fill )] +#![feature(array_chunks)] #![warn(clippy::dbg_macro)] #![expect(internal_features)] #![no_std] diff --git a/lang/src/son.rs b/lang/src/son.rs index a52d380..a7c02aa 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -124,24 +124,26 @@ impl Default for Nodes { } impl Nodes { - fn loop_depth(&self, target: Nid) -> LoopDepth { + fn loop_depth(&self, target: Nid, scheds: Option<&[Nid]>) -> LoopDepth { self[target].loop_depth.set(match self[target].kind { Kind::Region | Kind::Entry | Kind::Then | Kind::Else | Kind::Call { .. } | Kind::If => { if self[target].loop_depth.get() != 0 { return self[target].loop_depth.get(); } - self.loop_depth(self[target].inputs[0]) + self.loop_depth(self[target].inputs[0], scheds) } Kind::Loop => { - if self[target].loop_depth.get() == self.loop_depth(self[target].inputs[0]) + 1 { + if self[target].loop_depth.get() + == self.loop_depth(self[target].inputs[0], scheds) + 1 + { return self[target].loop_depth.get(); } - let depth = self.loop_depth(self[target].inputs[0]) + 1; + let depth = self.loop_depth(self[target].inputs[0], scheds) + 1; self[target].loop_depth.set(depth); let mut cursor = self[target].inputs[1]; while cursor != target { self[cursor].loop_depth.set(depth); - let next = self.idom(cursor); + let next = self.idom(cursor, scheds); debug_assert_ne!(next, 0); if matches!(self[cursor].kind, Kind::Then | Kind::Else) { debug_assert_eq!(self[next].kind, Kind::If); @@ -159,17 +161,21 @@ impl Nodes { self[target].loop_depth.get() } - fn idepth(&self, target: Nid) -> IDomDepth { + fn idepth(&self, target: Nid, scheds: Option<&[Nid]>) -> IDomDepth { if target == VOID { return 0; } if self[target].depth.get() == 0 { let depth = match self[target].kind { Kind::End | Kind::Start => unreachable!("{:?}", self[target].kind), - Kind::Region => { - self.idepth(self[target].inputs[0]).max(self.idepth(self[target].inputs[1])) + Kind::Region => self + .idepth(self[target].inputs[0], scheds) + .max(self.idepth(self[target].inputs[1], scheds)), + _ if self[target].kind.is_pinned() => self.idepth(self[target].inputs[0], scheds), + _ if let Some(scheds) = scheds => { + self.idepth(scheds[target as usize], Some(scheds)) } - _ => self.idepth(self[target].inputs[0]), + _ => self.idepth(self[target].inputs[0], scheds), } + 1; self[target].depth.set(depth); } @@ -195,15 +201,14 @@ impl Nodes { } } - fn push_up_impl(&mut self, node: Nid, visited: &mut BitSet) { + fn push_up_impl(&self, node: Nid, visited: &mut BitSet, scheds: &mut [Nid]) { if !visited.set(node) { return; } - for i in 1..self[node].inputs.len() { - let inp = self[node].inputs[i]; + for &inp in &self[node].inputs[1..] { if !self[inp].kind.is_pinned() { - self.push_up_impl(inp, visited); + self.push_up_impl(inp, visited, scheds); } } @@ -213,32 +218,17 @@ impl Nodes { let mut deepest = self[node].inputs[0]; for &inp in self[node].inputs[1..].iter() { - if self.idepth(inp) > self.idepth(deepest) { + if self.idepth(inp, Some(scheds)) > self.idepth(deepest, Some(scheds)) { if self[inp].kind.is_call() { deepest = inp; } else { debug_assert!(!self.is_cfg(inp)); - deepest = self.idom(inp); + deepest = self.idom(inp, Some(scheds)); } } } - if deepest == self[node].inputs[0] { - return; - } - - let current = self[node].inputs[0]; - - let index = self[current].outputs.iter().position(|&p| p == node).unwrap(); - self[current].outputs.remove(index); - self[node].inputs[0] = deepest; - debug_assert!( - !self[deepest].outputs.contains(&node) || self[deepest].kind.is_call(), - "{node} {:?} {deepest} {:?}", - self[node], - self[deepest] - ); - self[deepest].outputs.push(node); + scheds[node as usize] = deepest; } fn collect_rpo(&self, node: Nid, rpo: &mut Vec, visited: &mut BitSet) { @@ -253,21 +243,21 @@ impl Nodes { rpo.push(node); } - fn push_up(&mut self, rpo: &mut Vec, visited: &mut BitSet) { + fn push_up(&self, rpo: &mut Vec, visited: &mut BitSet, scheds: &mut [Nid]) { debug_assert!(rpo.is_empty()); self.collect_rpo(VOID, rpo, visited); for &node in rpo.iter().rev() { - self.loop_depth(node); + self.loop_depth(node, Some(scheds)); for i in 0..self[node].inputs.len() { - self.push_up_impl(self[node].inputs[i], visited); + self.push_up_impl(self[node].inputs[i], visited, scheds); } if matches!(self[node].kind, Kind::Loop | Kind::Region) { for i in 0..self[node].outputs.len() { let usage = self[node].outputs[i]; if self[usage].kind == Kind::Phi { - self.push_up_impl(usage, visited); + self.push_up_impl(usage, visited, scheds); } } } @@ -290,14 +280,14 @@ impl Nodes { rpo.clear(); } - fn better(&mut self, is: Nid, then: Nid) -> bool { - debug_assert_ne!(self.idepth(is), self.idepth(then), "{is} {then}"); - self.loop_depth(is) < self.loop_depth(then) - || self.idepth(is) > self.idepth(then) + fn better(&self, is: Nid, then: Nid, scheds: Option<&[Nid]>) -> bool { + debug_assert_ne!(self.idepth(is, scheds), self.idepth(then, scheds), "{is} {then}"); + self.loop_depth(is, scheds) < self.loop_depth(then, scheds) + || self.idepth(is, scheds) > self.idepth(then, scheds) || self[then].kind == Kind::If } - fn is_forward_edge(&mut self, usage: Nid, def: Nid) -> bool { + fn is_forward_edge(&self, usage: Nid, def: Nid) -> bool { match self[usage].kind { Kind::Phi => { self[usage].inputs[2] != def || self[self[usage].inputs[0]].kind != Kind::Loop @@ -307,20 +297,27 @@ impl Nodes { } } - fn push_down(&mut self, node: Nid, visited: &mut BitSet, antideps: &mut [Nid]) { + fn push_down( + &self, + node: Nid, + visited: &mut BitSet, + antideps: &mut [Nid], + scheds: &mut [Nid], + antidep_bounds: &mut Vec, + ) { if !visited.set(node) { return; } for usage in self[node].outputs.clone() { if self.is_forward_edge(usage, node) && self[node].kind == Kind::Stre { - self.push_down(usage, visited, antideps); + self.push_down(usage, visited, antideps, scheds, antidep_bounds); } } for usage in self[node].outputs.clone() { if self.is_forward_edge(usage, node) { - self.push_down(usage, visited, antideps); + self.push_down(usage, visited, antideps, scheds, antidep_bounds); } } @@ -331,56 +328,60 @@ impl Nodes { let mut min = None::; for i in 0..self[node].outputs.len() { let usage = self[node].outputs[i]; - let ub = self.use_block(node, usage); - min = min.map(|m| self.common_dom(ub, m)).or(Some(ub)); + let ub = self.use_block(node, usage, Some(scheds)); + min = min.map(|m| self.common_dom(ub, m, Some(scheds))).or(Some(ub)); } let mut min = min.unwrap(); - debug_assert!(self.dominates(self[node].inputs[0], min)); + debug_assert!(self.dominates(scheds[node as usize], min, Some(scheds))); let mut cursor = min; - while cursor != self[node].inputs[0] { - cursor = self.idom(cursor); - if self.better(cursor, min) { + let mut fuel = self.values.len(); + while cursor != scheds[node as usize] { + debug_assert!(fuel != 0); + fuel -= 1; + cursor = self.idom(cursor, Some(scheds)); + if self.better(cursor, min, Some(scheds)) { min = cursor; } } if self[node].kind == Kind::Load { - min = self.find_antideps(node, min, antideps); - } - - if self[node].kind == Kind::Stre { - antideps[node as usize] = self[node].inputs[0]; + min = self.find_antideps(node, min, antideps, scheds, antidep_bounds); } if self[min].kind.ends_basic_block() { - min = self.idom(min); + min = self.idom(min, Some(scheds)); } - self.assert_dominance(node, min, true); + self.assert_dominance(node, min, true, Some(scheds)); - let prev = self[node].inputs[0]; - debug_assert!(self.idepth(min) >= self.idepth(prev)); - let index = self[prev].outputs.iter().position(|&p| p == node).unwrap(); - self[prev].outputs.remove(index); - self[node].inputs[0] = min; - self[min].outputs.push(node); + debug_assert!( + self.idepth(min, Some(scheds)) >= self.idepth(scheds[node as usize], Some(scheds)) + ); + scheds[node as usize] = min; } - fn find_antideps(&mut self, load: Nid, mut min: Nid, antideps: &mut [Nid]) -> Nid { + fn find_antideps( + &self, + load: Nid, + mut min: Nid, + antideps: &mut [Nid], + scheds: &[Nid], + antidep_bounds: &mut Vec, + ) -> Nid { debug_assert!(self[load].kind == Kind::Load); let (aclass, _) = self.aclass_index(self[load].inputs[1]); let mut cursor = min; - while cursor != self[load].inputs[0] { + while cursor != scheds[load as usize] { antideps[cursor as usize] = load; if self[cursor].clobbers.get(aclass as _) { min = self[cursor].inputs[0]; break; } - cursor = self.idom(cursor); + cursor = self.idom(cursor, Some(scheds)); } if self[load].inputs[2] == MEM { @@ -390,16 +391,19 @@ impl Nodes { for out in self[self[load].inputs[2]].outputs.clone() { match self[out].kind { Kind::Stre => { - let mut cursor = self[out].inputs[0]; - while cursor != antideps[out as usize] { + let mut cursor = scheds[out as usize]; + while cursor != scheds[load as usize] + && self.idepth(cursor, Some(scheds)) + > self.idepth(scheds[load as usize], Some(scheds)) + { if antideps[cursor as usize] == load { - min = self.common_dom(min, cursor); + min = self.common_dom(min, cursor, Some(scheds)); if min == cursor { - self.bind(load, out); + antidep_bounds.extend([load, out]); } break; } - cursor = self.idom(cursor); + cursor = self.idom(cursor, Some(scheds)); } break; } @@ -409,12 +413,15 @@ impl Nodes { .position(|&n| n == self[load].inputs[2]) .unwrap(); let mut cursor = self[self[out].inputs[0]].inputs[n]; - while cursor != antideps[out as usize] { + while cursor != scheds[load as usize] + && self.idepth(cursor, Some(scheds)) + > self.idepth(scheds[load as usize], Some(scheds)) + { if antideps[cursor as usize] == load { - min = self.common_dom(min, cursor); + min = self.common_dom(min, cursor, Some(scheds)); break; } - cursor = self.idom(cursor); + cursor = self.idom(cursor, Some(scheds)); } } _ => {} @@ -431,9 +438,9 @@ impl Nodes { self[to].inputs.push(from); } - fn use_block(&self, target: Nid, from: Nid) -> Nid { + fn use_block(&self, target: Nid, from: Nid, scheds: Option<&[Nid]>) -> Nid { if self[from].kind != Kind::Phi { - return self.idom(from); + return self.idom(from, scheds); } let index = self[from].inputs.iter().position(|&n| n == target).unwrap_or_else(|| { @@ -442,26 +449,28 @@ impl Nodes { self[self[from].inputs[0]].inputs[index - 1] } - fn idom(&self, target: Nid) -> Nid { + fn idom(&self, target: Nid, scheds: Option<&[Nid]>) -> Nid { match self[target].kind { - Kind::Start => VOID, + Kind::Start => unreachable!(), Kind::End => unreachable!(), Kind::Region => { let &[lcfg, rcfg] = self[target].inputs.as_slice() else { unreachable!() }; - self.common_dom(lcfg, rcfg) + self.common_dom(lcfg, rcfg, scheds) } + _ if self[target].kind.is_pinned() => self[target].inputs[0], + _ if let Some(scheds) = scheds => scheds[target as usize], _ => self[target].inputs[0], } } - fn common_dom(&self, mut a: Nid, mut b: Nid) -> Nid { + fn common_dom(&self, mut a: Nid, mut b: Nid, scheds: Option<&[Nid]>) -> Nid { while a != b { - let [ldepth, rdepth] = [self.idepth(a), self.idepth(b)]; + let [ldepth, rdepth] = [self.idepth(a, scheds), self.idepth(b, scheds)]; if ldepth >= rdepth { - a = self.idom(a); + a = self.idom(a, scheds); } if ldepth <= rdepth { - b = self.idom(b); + b = self.idom(b, scheds); } } a @@ -581,16 +590,40 @@ impl Nodes { } } - fn gcm(&mut self, rpo: &mut Vec, visited: &mut BitSet) { + fn gcm(&mut self, scratch: &mut Vec, bind_buf: &mut Vec, visited: &mut BitSet) { visited.clear(self.values.len()); - self.fix_loops(rpo, visited); + self.fix_loops(bind_buf, visited); + debug_assert!(bind_buf.is_empty()); + debug_assert!(scratch.is_empty()); + scratch.resize(self.values.len() * 2, Nid::MAX); + let (antideps, scheds) = scratch.split_at_mut(self.values.len()); visited.clear(self.values.len()); - self.push_up(rpo, visited); + self.push_up(bind_buf, visited, scheds); visited.clear(self.values.len()); - debug_assert!(rpo.is_empty()); - rpo.resize(self.values.len(), VOID); - self.push_down(VOID, visited, rpo); - rpo.clear(); + self.push_down(VOID, visited, antideps, scheds, bind_buf); + + for &[from, to] in bind_buf.array_chunks() { + self.bind(from, to); + } + + bind_buf.clear(); + self[VOID].outputs = + self[VOID].outputs.iter().filter(|&&n| self[n].kind.is_at_start()).copied().collect(); + + for (&shed, n) in scheds.iter().zip(0u16..) { + if shed == Nid::MAX { + continue; + } + + let prev = mem::replace(&mut self[n].inputs[0], shed); + if prev != VOID { + let index = self[prev].outputs.iter().position(|&o| o == n).unwrap(); + self[prev].outputs.swap_remove(index); + } + self[shed].outputs.push(n); + } + + scratch.clear(); } fn clear(&mut self) { @@ -1429,9 +1462,9 @@ impl Nodes { } return Some(self[target].inputs[2]); } - _ if self.is_cfg(target) && self.idom(target) == NEVER => panic!(), - K::Start - | K::Entry + K::Start => {} + _ if self.is_cfg(target) && self.idom(target, None) == NEVER => panic!(), + K::Entry | K::Mem | K::Loops | K::End @@ -1468,7 +1501,7 @@ impl Nodes { } } - cursor = self.idom(cursor); + cursor = self.idom(cursor, None); } CondOptRes::Unknown @@ -1563,7 +1596,7 @@ impl Nodes { } #[expect(clippy::format_in_format_args)] - fn basic_blocks_instr(&mut self, out: &mut String, node: Nid) -> core::fmt::Result { + fn basic_blocks_instr(&self, out: &mut String, node: Nid) -> core::fmt::Result { match self[node].kind { Kind::Assert { .. } | Kind::Start => unreachable!("{} {out}", self[node].kind), Kind::End => return Ok(()), @@ -1599,7 +1632,8 @@ impl Nodes { if self[node].kind != Kind::Loop && self[node].kind != Kind::Region { writeln!( out, - " {:<14} {}", + " {:<3} {:<14} {}", + node, format!("{:?}", self[node].inputs), format!("{:?}", self[node].outputs) )?; @@ -1609,7 +1643,7 @@ impl Nodes { } fn basic_blocks_low( - &mut self, + &self, out: &mut String, mut node: Nid, visited: &mut BitSet, @@ -1709,7 +1743,7 @@ impl Nodes { Ok(()) } - fn basic_blocks(&mut self) { + fn basic_blocks(&self) { let mut out = String::new(); let mut visited = BitSet::default(); self.basic_blocks_low(&mut out, VOID, &mut visited).unwrap(); @@ -1763,12 +1797,12 @@ impl Nodes { let mut stack = vec![self[loob].inputs[1]]; let mut seen = BitSet::default(); seen.set(loob); - let depth = self.loop_depth(loob); + let depth = self.loop_depth(loob, None); while let Some(nid) = stack.pop() { if seen.set(nid) { - if depth > self.loop_depth(nid) { + if depth > self.loop_depth(nid, None) { failed = true; - log::error!("{depth} {} {nid} {:?}", self.loop_depth(nid), self[nid]); + log::error!("{depth} {} {nid} {:?}", self.loop_depth(nid, None), self[nid]); } match self[nid].kind { @@ -1830,25 +1864,25 @@ impl Nodes { aclass.last_store.set(lvar.last_store.get(), self); } - fn assert_dominance(&mut self, nd: Nid, min: Nid, check_outputs: bool) { + fn assert_dominance(&self, nd: Nid, min: Nid, check_outputs: bool, scheds: Option<&[Nid]>) { if !cfg!(debug_assertions) { return; } let node = self[nd].clone(); - for &i in node.inputs.iter() { - let dom = self.idom(i); + for &i in &node.inputs[1..] { + let dom = self.idom(i, scheds); debug_assert!( - self.dominates(dom, min), + self.dominates(dom, min, scheds), "{dom} {min} {node:?} {:?}", self.basic_blocks() ); } if check_outputs { for &o in node.outputs.iter() { - let dom = self.use_block(nd, o); + let dom = self.use_block(nd, o, scheds); debug_assert!( - self.dominates(min, dom), + self.dominates(min, dom, scheds), "{min} {dom} {node:?} {:?}", self.basic_blocks() ); @@ -1856,17 +1890,19 @@ impl Nodes { } } - fn dominates(&self, dominator: Nid, mut dominated: Nid) -> bool { + fn dominates(&self, dominator: Nid, mut dominated: Nid, scheds: Option<&[Nid]>) -> bool { loop { if dominator == dominated { break true; } - if self.idepth(dominator) > self.idepth(dominated) { + debug_assert!(dominated != VOID); + + if self.idepth(dominator, scheds) > self.idepth(dominated, scheds) { break false; } - dominated = self.idom(dominated); + dominated = self.idom(dominated, scheds); } } @@ -2018,8 +2054,11 @@ impl Kind { } fn is_pinned(&self) -> bool { - self.is_cfg() - || matches!(self, Self::Phi | Self::Arg | Self::Mem | Self::Loops | Kind::Assert { .. }) + self.is_cfg() || self.is_at_start() || matches!(self, Self::Phi | Kind::Assert { .. }) + } + + fn is_at_start(&self) -> bool { + matches!(self, Self::Arg | Self::Mem | Self::Loops | Self::Entry) } fn is_cfg(&self) -> bool { @@ -2384,7 +2423,8 @@ impl Ctx { pub struct Pool { cis: Vec, used_cis: usize, - nid_stack: Vec, + scratch1: Vec, + scratch2: Vec, nid_set: BitSet, } @@ -2652,8 +2692,8 @@ impl<'a> Codegen<'a> { if self.ci.nodes[value].kind == Kind::Load { let (lindex, ..) = self.ci.nodes.aclass_index(self.ci.nodes[value].inputs[1]); let clobber = self.ci.scope.aclasses[lindex].clobber.get(); - if self.ci.nodes.idepth(clobber) - > self.ci.nodes.idepth(self.ci.scope.aclasses[index].clobber.get()) + if self.ci.nodes.idepth(clobber, None) + > self.ci.nodes.idepth(self.ci.scope.aclasses[index].clobber.get(), None) { self.ci.scope.aclasses[index].clobber.set(clobber, &mut self.ci.nodes); } @@ -4732,7 +4772,7 @@ impl<'a> Codegen<'a> { fn finalize(&mut self, prev_err_len: usize) -> bool { use {AssertKind as AK, CondOptRes as CR}; - self.ci.finalize(&mut self.pool.nid_stack, self.tys, self.files); + self.ci.finalize(&mut self.pool.scratch1, self.tys, self.files); //let mut to_remove = vec![]; for (id, node) in self.ci.nodes.iter() { @@ -4787,7 +4827,11 @@ impl<'a> Codegen<'a> { if self.errors.borrow().len() == prev_err_len { self.ci.nodes.check_final_integrity(self.ty_display(ty::Id::VOID)); self.ci.nodes.graphviz(self.ty_display(ty::Id::VOID)); - self.ci.nodes.gcm(&mut self.pool.nid_stack, &mut self.pool.nid_set); + self.ci.nodes.gcm( + &mut self.pool.scratch1, + &mut self.pool.scratch2, + &mut self.pool.nid_set, + ); self.ci.nodes.check_loop_depth_integrity(self.ty_display(ty::Id::VOID)); self.ci.nodes.basic_blocks(); self.ci.nodes.graphviz(self.ty_display(ty::Id::VOID)); diff --git a/lang/src/son/hbvm.rs b/lang/src/son/hbvm.rs index c8f1073..368d43c 100644 --- a/lang/src/son/hbvm.rs +++ b/lang/src/son/hbvm.rs @@ -362,6 +362,9 @@ impl Nodes { let mut seen = BitSet::default(); seen.clear(self.values.len()); + let cfg_idx = outputs.iter().position(|&n| self.is_cfg(n)).unwrap(); + outputs.swap(cfg_idx, 0); + for &o in outputs.iter() { if (!self.is_cfg(o) && self[o].outputs.iter().any(|&oi| { @@ -407,6 +410,28 @@ impl Nodes { self[from] ); + let bf = &buf; + debug_assert_eq!( + bf.iter() + .enumerate() + .filter(|(_, &b)| !self[b].kind.is_pinned()) + .flat_map(|(i, &b)| self[b] + .inputs + .iter() + .filter(|&&b| !self[b].kind.is_pinned()) + .filter_map(move |&inp| bf + .iter() + .position(|&n| inp == n) + .filter(|&j| i > j) + .map(|j| (bf[i], bf[j])))) + .collect::>(), + vec![], + "{:?}", + bf + ); + + debug_assert!(self.is_cfg(bf[0]) || self[bf[0]].kind == Kind::Phi, "{:?}", self[bf[0]]); + if outputs.len() != buf.len() { panic!("{:?} {:?}", outputs, buf); } diff --git a/lang/src/son/hbvm/regalloc.rs b/lang/src/son/hbvm/regalloc.rs index eac2e3a..8314320 100644 --- a/lang/src/son/hbvm/regalloc.rs +++ b/lang/src/son/hbvm/regalloc.rs @@ -22,6 +22,7 @@ impl HbvmBackend { files: &[parser::Ast], ) -> (usize, bool) { let tail = Function::build(nodes, tys, &mut self.ralloc, sig); + nodes.basic_blocks(); let strip_load = |value| match nodes[value].kind { Kind::Load { .. } if nodes[value].ty.loc(tys) == Loc::Stack => nodes[value].inputs[1], @@ -366,6 +367,8 @@ impl<'a> Function<'a> { func.visited.clear(nodes.values.len()); let mut s = Self { tail: true, nodes, tys, sig, func }; s.emit_node(VOID); + debug_assert!(s.func.blocks.array_chunks().all(|[a, b]| a.end == b.start)); + log::info!("{s:?}"); s.tail } @@ -513,9 +516,9 @@ impl Nodes { } fn use_block_of(&self, inst: Nid, uinst: Nid) -> Nid { - let mut block = self.use_block(inst, uinst); + let mut block = self.use_block(inst, uinst, None); while !self[block].kind.starts_basic_block() { - block = self.idom(block); + block = self.idom(block, None); } block } @@ -538,7 +541,7 @@ impl Nodes { fn idom_of(&self, mut nid: Nid) -> Nid { while !self[nid].kind.starts_basic_block() { - nid = self.idom(nid); + nid = self.idom(nid, None); } nid } @@ -643,12 +646,18 @@ impl<'a> Regalloc<'a> { let mut range = b.range(); debug_assert!(range.start < range.end); range.start = range.start.max(s.instr_of(inst).map_or(0, |n| n + 1) as usize); - debug_assert!(range.start < range.end, "{:?}", range); + debug_assert!( + range.start < range.end, + "{:?} {:?} {n} {inst}", + range, + self.nodes[inst] + ); let new = range.end.min( s.instr_of(uinst) .filter(|_| { n == cursor - && self.nodes.loop_depth(dom) == self.nodes.loop_depth(cursor) + && self.nodes.loop_depth(dom, None) + == self.nodes.loop_depth(cursor, None) }) .map_or(Nid::MAX, |n| n + 1) as usize, ); @@ -700,10 +709,14 @@ impl<'a> Regalloc<'a> { debug_assert!(self.res.dfs_buf.is_empty()); self.res.dfs_buf.push(from); - debug_assert!(self.nodes.dominates(until, from)); + debug_assert!(self.nodes.dominates(until, from, None)); while let Some(nid) = self.res.dfs_buf.pop() { - debug_assert!(self.nodes.dominates(until, nid), "{until} {:?}", self.nodes[until]); + debug_assert!( + self.nodes.dominates(until, nid, None), + "{until} {:?}", + self.nodes[until] + ); each(self, nid, self.res.blocks[self.block_of(nid) as usize]); if nid == until { continue; diff --git a/lang/tests/son_tests_different_function_destinations.txt b/lang/tests/son_tests_different_function_destinations.txt index adc5687..5919ee1 100644 --- a/lang/tests/son_tests_different_function_destinations.txt +++ b/lang/tests/son_tests_different_function_destinations.txt @@ -1,6 +1,6 @@ main: - ADDI64 r254, r254, -144d - ST r31, r254, 80a, 64h + ADDI64 r254, r254, -136d + ST r31, r254, 80a, 56h LRA r32, r0, :glob_stru JAL r31, r0, :new_stru ST r1, r32, 0a, 16h @@ -16,58 +16,57 @@ main: LI64 r32, 200d CP r1, r32 JMP :1 - 2: LI64 r35, 3d - LI64 r36, 1d - ST r36, r32, 0a, 8h - ST r36, r32, 8a, 8h - ADDI64 r37, r254, 32d - ST r36, r254, 32a, 8h - ST r36, r254, 40a, 8h - ST r36, r254, 48a, 8h - ST r36, r254, 56a, 8h - ST r36, r254, 64a, 8h - ST r36, r254, 72a, 8h + 2: LI64 r34, 1d + ST r34, r32, 0a, 8h + ST r34, r32, 8a, 8h + ADDI64 r35, r254, 32d + ST r34, r254, 32a, 8h + ST r34, r254, 40a, 8h + ST r34, r254, 48a, 8h + ST r34, r254, 56a, 8h + ST r34, r254, 64a, 8h + ST r34, r254, 72a, 8h + LI64 r36, 3d CP r32, r33 - 8: JNE r32, r35, :3 + 8: JNE r32, r36, :3 LD r32, r254, 64a, 8h JEQ r32, r33, :4 LI64 r32, 100d CP r1, r32 JMP :1 - 4: ST r33, r254, 0a, 8h + 4: ST r34, r254, 32a, 8h + ST r34, r254, 40a, 8h + ST r34, r254, 48a, 8h + ST r34, r254, 56a, 8h + ST r34, r254, 64a, 8h + ST r34, r254, 72a, 8h + ST r33, r254, 0a, 8h ST r33, r254, 8a, 8h ST r33, r254, 16a, 8h ST r33, r254, 24a, 8h - ST r36, r254, 32a, 8h - ST r36, r254, 40a, 8h - ST r36, r254, 48a, 8h - ST r36, r254, 56a, 8h - ST r36, r254, 64a, 8h - ST r36, r254, 72a, 8h CP r32, r33 - 7: LD r38, r254, 64a, 8h - JNE r32, r35, :5 - JEQ r38, r33, :6 + 7: LD r37, r254, 64a, 8h + JNE r32, r36, :5 + JEQ r37, r33, :6 LI64 r32, 10d CP r1, r32 JMP :1 6: CP r1, r33 JMP :1 - 5: ADD64 r34, r32, r36 - MULI64 r32, r32, 16d - ADD64 r32, r37, r32 - ST r33, r32, 0a, 8h - ST r33, r32, 8a, 8h - CP r32, r34 + 5: MULI64 r37, r32, 16d + ADD64 r37, r35, r37 + ST r33, r37, 0a, 8h + ST r33, r37, 8a, 8h + ADD64 r32, r32, r34 JMP :7 - 3: MULI64 r34, r32, 16d - ADD64 r34, r37, r34 + 3: MULI64 r37, r32, 16d + ADD64 r37, r35, r37 JAL r31, r0, :new_stru - ST r1, r34, 0a, 16h - ADD64 r32, r32, r36 + ST r1, r37, 0a, 16h + ADD64 r32, r32, r34 JMP :8 - 1: LD r31, r254, 80a, 64h - ADDI64 r254, r254, 144d + 1: LD r31, r254, 80a, 56h + ADDI64 r254, r254, 136d JALA r0, r31, 0a new_stru: ADDI64 r254, r254, -16d @@ -77,6 +76,6 @@ new_stru: LD r1, r13, 0a, 16h ADDI64 r254, r254, 16d JALA r0, r31, 0a -code size: 739 +code size: 736 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_generic_type_mishap.txt b/lang/tests/son_tests_generic_type_mishap.txt index 4841b9f..9028e5c 100644 --- a/lang/tests/son_tests_generic_type_mishap.txt +++ b/lang/tests/son_tests_generic_type_mishap.txt @@ -10,9 +10,9 @@ opaque: process: ADDI64 r254, r254, -48d ST r31, r254, 16a, 32h - LI64 r32, 1000d ADDI64 r33, r254, 0d ST r0, r254, 0a, 1h + LI64 r32, 1000d 4: JGTU r32, r0, :0 JMP :1 0: CP r2, r33 diff --git a/lang/tests/son_tests_generic_types.txt b/lang/tests/son_tests_generic_types.txt index 4b6101d..0c2f673 100644 --- a/lang/tests/son_tests_generic_types.txt +++ b/lang/tests/son_tests_generic_types.txt @@ -69,8 +69,8 @@ new: ADDI64 r254, r254, 24d JALA r0, r31, 0a push: - ADDI64 r254, r254, -104d - ST r31, r254, 0a, 104h + ADDI64 r254, r254, -88d + ST r31, r254, 0a, 88h CP r38, r2 CP r39, r3 LI64 r37, 1d @@ -95,12 +95,12 @@ push: LD r32, r38, 0a, 8h ADD64 r41, r32, r33 CP r34, r35 - 7: LD r42, r38, 0a, 8h - LD r43, r38, 8a, 8h + 7: LD r33, r38, 0a, 8h + LD r36, r38, 8a, 8h JNE r41, r32, :5 - JEQ r43, r0, :6 - MUL64 r32, r43, r40 - CP r2, r42 + JEQ r36, r0, :6 + MUL64 r32, r36, r40 + CP r2, r33 CP r3, r32 CP r4, r40 JAL r31, r0, :free @@ -122,8 +122,8 @@ push: ADD64 r32, r32, r37 ST r32, r38, 8a, 8h CP r1, r33 - 4: LD r31, r254, 0a, 104h - ADDI64 r254, r254, 104d + 4: LD r31, r254, 0a, 88h + ADDI64 r254, r254, 88d JALA r0, r31, 0a code size: 923 ret: 69 diff --git a/lang/tests/son_tests_idk.txt b/lang/tests/son_tests_idk.txt index a58d12f..0f7074b 100644 --- a/lang/tests/son_tests_idk.txt +++ b/lang/tests/son_tests_idk.txt @@ -1,17 +1,17 @@ main: ADDI64 r254, r254, -128d - LI8 r15, 69b - LI64 r16, 128d + ADDI64 r15, r254, 0d + LI8 r16, 69b + LI64 r17, 128d CP r13, r0 - ADDI64 r17, r254, 0d - 2: LD r18, r254, 42a, 1h - JLTU r13, r16, :0 - ANDI r13, r18, 255d + 2: LD r14, r254, 42a, 1h + JLTU r13, r17, :0 + ANDI r13, r14, 255d CP r1, r13 JMP :1 0: ADDI64 r14, r13, 1d - ADD64 r13, r17, r13 - ST r15, r13, 0a, 1h + ADD64 r13, r15, r13 + ST r16, r13, 0a, 1h CP r13, r14 JMP :2 1: ADDI64 r254, r254, 128d diff --git a/lang/tests/son_tests_inlining_issues.txt b/lang/tests/son_tests_inlining_issues.txt index 906ddf0..b6fa85d 100644 --- a/lang/tests/son_tests_inlining_issues.txt +++ b/lang/tests/son_tests_inlining_issues.txt @@ -37,26 +37,26 @@ put_filled_rect: ST r7, r254, 75a, 1h ADDI64 r7, r254, 75d CP r16, r7 - LI64 r17, 25d - LI64 r18, 2d - LI64 r19, 8d - ADDI64 r20, r254, 25d - ADDI64 r21, r254, 50d - LI8 r22, 5b - ST r22, r254, 25a, 1h - LD r23, r13, 0a, 8h - ST r23, r254, 26a, 4h - LI64 r24, 1d - ST r24, r254, 30a, 4h + ADDI64 r17, r254, 25d + LI8 r18, 5b + ST r18, r254, 25a, 1h + LD r19, r13, 0a, 8h + ST r19, r254, 26a, 4h + LI64 r20, 1d + ST r20, r254, 30a, 4h ST r16, r254, 34a, 8h - ST r22, r254, 50a, 1h - ST r23, r254, 51a, 4h - ST r24, r254, 55a, 4h + LI64 r21, 25d + ADDI64 r22, r254, 50d + ST r18, r254, 50a, 1h + ST r19, r254, 51a, 4h + ST r20, r254, 55a, 4h ST r16, r254, 59a, 8h + LI64 r23, 2d + LI64 r24, 8d LD r25, r15, 8a, 8h LD r13, r13, 8a, 8h ADD64 r26, r13, r25 - SUB64 r26, r26, r24 + SUB64 r26, r26, r20 LD r27, r14, 8a, 8h MUL64 r26, r27, r26 LD r14, r14, 0a, 8h @@ -66,36 +66,36 @@ put_filled_rect: MUL64 r25, r27, r25 ADD64 r14, r14, r25 ADD64 r14, r28, r14 - 3: JGTU r13, r24, :0 - JNE r13, r24, :1 + 3: JGTU r13, r20, :0 + JNE r13, r20, :1 ADDI64 r13, r254, 0d - ST r22, r254, 0a, 1h - ST r23, r254, 1a, 4h - ST r24, r254, 5a, 4h + ST r18, r254, 0a, 1h + ST r19, r254, 1a, 4h + ST r20, r254, 5a, 4h ST r16, r254, 9a, 8h ST r14, r254, 17a, 8h - CP r2, r19 - CP r3, r18 + CP r2, r24 + CP r3, r23 CP r4, r13 - CP r5, r17 + CP r5, r21 ECA JMP :1 1: JMP :2 0: ST r14, r254, 67a, 8h - CP r2, r19 - CP r3, r18 - CP r4, r21 - CP r5, r17 + CP r2, r24 + CP r3, r23 + CP r4, r22 + CP r5, r21 ECA ST r15, r254, 42a, 8h - CP r2, r19 - CP r3, r18 - CP r4, r20 - CP r5, r17 + CP r2, r24 + CP r3, r23 + CP r4, r17 + CP r5, r21 ECA + SUB64 r13, r13, r23 SUB64 r15, r15, r27 ADD64 r14, r27, r14 - SUB64 r13, r13, r18 JMP :3 2: ADDI64 r254, r254, 108d JALA r0, r31, 0a diff --git a/lang/tests/son_tests_loops.txt b/lang/tests/son_tests_loops.txt index 7463236..de4541c 100644 --- a/lang/tests/son_tests_loops.txt +++ b/lang/tests/son_tests_loops.txt @@ -7,8 +7,8 @@ fib: 2: JNE r13, r15, :0 CP r1, r14 JMP :1 - 0: ADD64 r14, r16, r14 - SUB64 r13, r13, r17 + 0: SUB64 r13, r13, r17 + ADD64 r14, r16, r14 SWA r14, r16 JMP :2 1: JALA r0, r31, 0a diff --git a/lang/tests/son_tests_sort_something_viredly.txt b/lang/tests/son_tests_sort_something_viredly.txt index 2351a62..b05ab0b 100644 --- a/lang/tests/son_tests_sort_something_viredly.txt +++ b/lang/tests/son_tests_sort_something_viredly.txt @@ -23,8 +23,8 @@ sqrt: ADD64 r18, r18, r15 SLU64 r18, r18, r16 JLTU r13, r18, :2 - ADD64 r14, r15, r14 SUB64 r13, r13, r18 + ADD64 r14, r15, r14 JMP :2 2: SRUI64 r15, r15, 1b JMP :3 diff --git a/lang/tests/son_tests_string_flip.txt b/lang/tests/son_tests_string_flip.txt index 351d7a3..d737d87 100644 --- a/lang/tests/son_tests_string_flip.txt +++ b/lang/tests/son_tests_string_flip.txt @@ -1,46 +1,45 @@ main: ADDI64 r254, r254, -40d LI64 r17, 1d - LI64 r16, 4d + LI64 r15, 4d CP r14, r0 ADDI64 r18, r254, 0d CP r13, r14 - 6: JNE r13, r16, :0 - LI64 r19, 2d - ADDI64 r20, r254, 32d + 6: JNE r13, r15, :0 + ADDI64 r19, r254, 32d + LI64 r20, 2d CP r13, r14 4: LD r15, r254, 16a, 8h JNE r13, r17, :1 CP r1, r15 JMP :2 - 1: MUL64 r21, r13, r19 - ADD64 r16, r13, r17 - SUB64 r13, r19, r16 - MUL64 r22, r13, r19 + 1: ADD64 r16, r13, r17 + SUB64 r15, r20, r16 + MUL64 r21, r15, r20 + MUL64 r22, r13, r20 CP r13, r14 - 5: JNE r13, r19, :3 + 5: JNE r13, r20, :3 CP r13, r16 JMP :4 3: ADD64 r15, r13, r17 - ADD64 r23, r21, r13 - ADD64 r13, r22, r13 + ADD64 r23, r22, r13 + ADD64 r13, r21, r13 MULI64 r23, r23, 8d MULI64 r13, r13, 8d ADD64 r23, r18, r23 ADD64 r13, r18, r13 - BMC r23, r20, 8h + BMC r23, r19, 8h BMC r13, r23, 8h - BMC r20, r13, 8h + BMC r19, r13, 8h CP r13, r15 JMP :5 - 0: ADD64 r15, r13, r17 - MULI64 r19, r13, 8d - ADD64 r19, r18, r19 - ST r13, r19, 0a, 8h - CP r13, r15 + 0: MULI64 r16, r13, 8d + ADD64 r16, r18, r16 + ST r13, r16, 0a, 8h + ADD64 r13, r13, r17 JMP :6 2: ADDI64 r254, r254, 40d JALA r0, r31, 0a -code size: 270 +code size: 267 ret: 0 status: Ok(()) diff --git a/lang/tests/son_tests_struct_patterns.txt b/lang/tests/son_tests_struct_patterns.txt index 1a0cb13..55d641d 100644 --- a/lang/tests/son_tests_struct_patterns.txt +++ b/lang/tests/son_tests_struct_patterns.txt @@ -28,8 +28,8 @@ fib_iter: 2: JNE r13, r15, :0 CP r1, r14 JMP :1 - 0: ADD64 r14, r16, r14 - SUB64 r13, r13, r17 + 0: SUB64 r13, r13, r17 + ADD64 r14, r16, r14 SWA r14, r16 JMP :2 1: JALA r0, r31, 0a diff --git a/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt b/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt index 22ae15a..1848295 100644 --- a/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt +++ b/lang/tests/son_tests_tests_ptr_to_ptr_copy.txt @@ -8,9 +8,9 @@ main: 4: JLTU r13, r17, :0 LI64 r16, 10d CP r13, r14 - 3: LD r17, r254, 2048a, 1h + 3: LD r15, r254, 2048a, 1h JLTU r13, r16, :1 - ANDI r13, r17, 255d + ANDI r13, r15, 255d CP r1, r13 JMP :2 1: ADD64 r15, r13, r14 diff --git a/lang/tests/son_tests_very_nested_loops.txt b/lang/tests/son_tests_very_nested_loops.txt index 246a255..3ba6fd7 100644 --- a/lang/tests/son_tests_very_nested_loops.txt +++ b/lang/tests/son_tests_very_nested_loops.txt @@ -1,24 +1,24 @@ main: - LI64 r16, 4621819117588971520d - LI64 r17, 1d + LI64 r16, 1d + LI64 r17, 4621819117588971520d LI64 r18, 3d CP r14, r0 CP r15, r14 3: JNE r15, r18, :0 - CP r1, r17 + CP r1, r16 JMP :1 0: ITF64 r19, r15 CP r13, r14 5: JNE r13, r18, :2 - ADD64 r15, r15, r17 + ADD64 r15, r15, r16 JMP :3 2: ITF64 r20, r13 FMUL64 r20, r20, r19 - FCMPLT64 r20, r20, r16 + FCMPLT64 r20, r20, r17 NOT r20, r20 ANDI r20, r20, 255d JNE r20, r0, :4 - ADD64 r13, r13, r17 + ADD64 r13, r13, r16 JMP :5 4: CP r1, r14 1: JALA r0, r31, 0a diff --git a/lang/tests/son_tests_wrong_dead_code_elimination.txt b/lang/tests/son_tests_wrong_dead_code_elimination.txt index df3874a..2f8bf09 100644 --- a/lang/tests/son_tests_wrong_dead_code_elimination.txt +++ b/lang/tests/son_tests_wrong_dead_code_elimination.txt @@ -1,8 +1,8 @@ main: ADDI64 r254, r254, -1d + ST r0, r254, 0a, 1h LI64 r14, 255d LI8 r13, 1b - ST r0, r254, 0a, 1h 2: LD r15, r254, 0a, 1h ANDI r16, r15, 255d CMPU r17, r16, r14