From 7719d2617798069ab8b50a39e9a2d1f429647499 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 15 Dec 2021 23:21:24 -0800 Subject: [PATCH] Rewrote wasm region generation/stackifier from scratch starting from a loop-nest analysis --- src/backend/stackify.rs | 2 - src/cfg/mod.rs | 5 + src/cfg/postorder.rs | 5 + src/cfg/structured.rs | 240 ++++++++++++++++++++++++++++++++++++++++ src/ir.rs | 11 +- 5 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 src/cfg/structured.rs diff --git a/src/backend/stackify.rs b/src/backend/stackify.rs index f114115..79d98ba 100644 --- a/src/backend/stackify.rs +++ b/src/backend/stackify.rs @@ -1,8 +1,6 @@ //! Stackifier-like algorithm to recover (or create) structured //! control flow out of a CFG. -use std::collections::BTreeSet; - use crate::{cfg::CFGInfo, ir::*}; #[derive(Clone, Debug)] diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index 8edf633..619c438 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -8,6 +8,7 @@ use smallvec::SmallVec; pub mod domtree; pub mod postorder; +pub mod structured; #[derive(Clone, Debug)] pub struct CFGInfo { @@ -67,6 +68,10 @@ impl CFGInfo { } } + pub fn len(&self) -> usize { + self.block_succs.len() + } + pub fn dominates(&self, a: BlockId, b: BlockId) -> bool { domtree::dominates(&self.domtree[..], a, b) } diff --git a/src/cfg/postorder.rs b/src/cfg/postorder.rs index 8356274..755aea6 100644 --- a/src/cfg/postorder.rs +++ b/src/cfg/postorder.rs @@ -17,6 +17,7 @@ pub fn calculate<'a, SuccFn: Fn(BlockId) -> &'a [BlockId]>( let mut visited = vec![]; visited.resize(num_blocks, false); + #[derive(Debug)] struct State<'a> { block: BlockId, succs: &'a [BlockId], @@ -32,11 +33,14 @@ pub fn calculate<'a, SuccFn: Fn(BlockId) -> &'a [BlockId]>( }); while let Some(ref mut state) = stack.last_mut() { + log::trace!("postorder: TOS is {:?}", state); // Perform one action: push to new succ, skip an already-visited succ, or pop. if state.next_succ < state.succs.len() { let succ = state.succs[state.next_succ]; + log::trace!(" -> succ {}", succ); state.next_succ += 1; if !visited[succ] { + log::trace!(" -> visiting"); visited[succ] = true; stack.push(State { block: succ, @@ -45,6 +49,7 @@ pub fn calculate<'a, SuccFn: Fn(BlockId) -> &'a [BlockId]>( }); } } else { + log::trace!("retreating from {}", state.block); ret.push(state.block); stack.pop(); } diff --git a/src/cfg/structured.rs b/src/cfg/structured.rs new file mode 100644 index 0000000..f222187 --- /dev/null +++ b/src/cfg/structured.rs @@ -0,0 +1,240 @@ +//! Recovery of structured control flow information. Loop nest +//! computation, block order linearization and loop/block region +//! generation. + +use fxhash::{FxHashMap, FxHashSet}; + +use crate::{cfg::CFGInfo, BlockId}; + +#[derive(Clone, Debug)] +pub enum Node { + Leaf(BlockId), + Loop(BlockId, Vec), +} + +impl Node { + pub fn header(&self) -> BlockId { + match self { + &Node::Leaf(block) => block, + &Node::Loop(block, ..) => block, + } + } + pub fn is_loop(&self) -> bool { + match self { + &Node::Loop(..) => true, + _ => false, + } + } + pub fn is_leaf(&self) -> bool { + match self { + &Node::Leaf(..) => true, + _ => false, + } + } +} + +pub struct LoopNest { + nodes: Vec, +} + +impl LoopNest { + pub fn compute(cfg: &CFGInfo) -> LoopNest { + // Find loop backedges: any successor edge from a higher- to + // lower-numbered block in RPO. + let mut backedges: Vec<(BlockId, BlockId)> = vec![]; + for (block_rpo, &block) in cfg.postorder.iter().rev().enumerate() { + for &succ in &cfg.block_succs[block] { + let succ_po = cfg.postorder_pos[succ] + .expect("Edge from reachable to unreachable block is impossible"); + let succ_rpo = cfg.postorder.len() - 1 - succ_po; + if succ_rpo <= block_rpo { + log::trace!("LoopNest compute: backedge from {} to {}", block, succ); + backedges.push((block, succ)); + } + } + } + + // For each backedge, find the backedge's natural loop and + // accumulate those blocks into the set of blocks in each loop + // body. + let mut loop_bodies: FxHashMap> = FxHashMap::default(); + for &(from, to) in &backedges { + assert!( + cfg.dominates(to, from), + "Irreducible CFG edge from {} to {}", + from, + to + ); + let body = loop_bodies + .entry(to) + .or_insert_with(|| FxHashSet::default()); + Self::collect_loop_body(body, to, cfg); + log::trace!("loop body for header {}: {:?}", to, body); + } + + // Now build the loop nest. + let mut nodes = vec![]; + let mut visited = FxHashSet::default(); + for &block in cfg.postorder.iter().rev() { + if visited.contains(&block) { + continue; + } + if loop_bodies.contains_key(&block) { + nodes.push(Self::loop_node(cfg, block, &loop_bodies, &mut visited)); + } else { + nodes.push(Node::Leaf(block)); + visited.insert(block); + } + } + + log::trace!("loop nest nodes: {:?}", nodes); + LoopNest { nodes } + } + + fn collect_loop_body(blocks: &mut FxHashSet, header: BlockId, cfg: &CFGInfo) { + let mut workset = vec![header]; + while let Some(block) = workset.pop() { + for &pred in &cfg.block_preds[block] { + if blocks.contains(&pred) { + continue; + } + if cfg.dominates(header, pred) { + blocks.insert(pred); + workset.push(pred); + } + } + } + } + + fn loop_node( + cfg: &CFGInfo, + header: BlockId, + loops: &FxHashMap>, + visited: &mut FxHashSet, + ) -> Node { + let mut body_blocks = loops + .get(&header) + .unwrap() + .iter() + .cloned() + .collect::>(); + body_blocks.sort_by_key(|&block| -(cfg.postorder_pos[block].unwrap() as isize)); + + let mut body_nodes = vec![]; + for block in body_blocks { + if visited.contains(&block) { + continue; + } + if block != header && loops.contains_key(&block) { + body_nodes.push(Self::loop_node(cfg, block, loops, visited)); + } else { + body_nodes.push(Node::Leaf(block)); + visited.insert(block); + } + } + + Node::Loop(header, body_nodes) + } +} + +fn compute_forward_edge_targets(cfg: &CFGInfo) -> FxHashSet { + let mut ret = FxHashSet::default(); + for (block_rpo, &block) in cfg.postorder.iter().rev().enumerate() { + for &succ in &cfg.block_succs[block] { + let succ_po = cfg.postorder_pos[succ].unwrap(); + let succ_rpo = cfg.postorder.len() - 1 - succ_po; + if succ_rpo > block_rpo { + ret.insert(succ); + } + } + } + ret +} + +#[derive(Clone, Debug)] +pub enum WasmRegion { + /// Block starting at the first `BlockId`, with a fallthrough/exit + /// label at the second `BlockId`. + Block(BlockId, Option, Vec), + /// Loop with a header at the given `BlockId`. + Loop(BlockId, Vec), + /// An individual basic block, just included inline (with no + /// Wasm-level structure). + Leaf(BlockId), +} + +impl WasmRegion { + pub fn header(&self) -> BlockId { + match self { + &WasmRegion::Block(block, ..) => block, + &WasmRegion::Loop(block, ..) => block, + &WasmRegion::Leaf(block) => block, + } + } + + pub fn compute(cfg: &CFGInfo, loop_nest: &LoopNest) -> WasmRegion { + assert!(!loop_nest.nodes.is_empty()); + assert!(loop_nest.nodes[0].header() == 0); + + let forward_targets = compute_forward_edge_targets(cfg); + log::trace!( + "WasmRegion::compute: forward_targets = {:?}", + forward_targets + ); + + let top = WasmRegion::Block( + 0, + None, + loop_nest + .nodes + .iter() + .map(|node| Self::compute_for_node(cfg, &forward_targets, node)) + .collect::>(), + ); + + log::trace!("Wasm region: {:?}", top); + top + } + + fn compute_for_node( + cfg: &CFGInfo, + forward_targets: &FxHashSet, + node: &Node, + ) -> WasmRegion { + log::trace!("WasmRegion::compute_for_node: node {:?}", node); + match node { + &Node::Leaf(block) => { + log::trace!(" -> leaf {}", block); + WasmRegion::Leaf(block) + } + &Node::Loop(block, ref subnodes) => { + // Scan subnodes and find forward-edge targets that + // are at this level of the loop nest. + let block_targets = subnodes + .iter() + .map(|n| n.header()) + .filter(|n| forward_targets.contains(&n)) + .collect::>(); + log::trace!(" -> block targets are {:?}", block_targets,); + + let mut subregions: Vec = vec![]; + for subnode in subnodes { + if subnode.header() != block && block_targets.contains(&subnode.header()) { + let subsubregions = std::mem::take(&mut subregions); + assert!(!subsubregions.is_empty()); + let first = subsubregions[0].header(); + let enclosing_block = + WasmRegion::Block(first, Some(subnode.header()), subsubregions); + subregions.push(enclosing_block); + } + + let subregion = Self::compute_for_node(cfg, forward_targets, subnode); + subregions.push(subregion); + } + + log::trace!(" -> loop header {} subregions {:?}", block, subregions); + WasmRegion::Loop(block, subregions) + } + } + } +} diff --git a/src/ir.rs b/src/ir.rs index 138c508..ff5971a 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -2,7 +2,13 @@ use std::collections::hash_map::Entry; -use crate::{backend::Shape, cfg::CFGInfo, frontend, Operator}; +use crate::{ + cfg::{ + structured::{LoopNest, WasmRegion}, + CFGInfo, + }, + frontend, Operator, +}; use anyhow::Result; use fxhash::FxHashMap; use wasmparser::{FuncType, Type}; @@ -477,7 +483,8 @@ impl<'a> Module<'a> { match func { &FuncDecl::Body(_, ref body) => { let cfg = CFGInfo::new(body); - let _shape = Shape::compute(body, &cfg); + let loopnest = LoopNest::compute(&cfg); + let _regions = WasmRegion::compute(&cfg, &loopnest); } _ => {} }