From 993aa223797715d9111b771664af4a30463ff764 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 14 Nov 2021 23:56:56 -0800 Subject: [PATCH] WIP. --- Cargo.toml | 1 + src/backend.rs | 83 ++++++++++++------------------ src/cfg/domtree.rs | 118 +++++++++++++++++++++++++++++++++++++++++++ src/cfg/mod.rs | 91 +++++++++++++++++++++++++++++++++ src/cfg/postorder.rs | 54 ++++++++++++++++++++ src/frontend.rs | 22 ++++++-- src/ir.rs | 59 ++++++++++++++++++---- src/lib.rs | 1 + 8 files changed, 365 insertions(+), 64 deletions(-) create mode 100644 src/cfg/domtree.rs create mode 100644 src/cfg/mod.rs create mode 100644 src/cfg/postorder.rs diff --git a/Cargo.toml b/Cargo.toml index e637744..7c79815 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,4 @@ structopt = "0.3" log = "0.4" env_logger = "0.9" fxhash = "0.2" +smallvec = "1.7" \ No newline at end of file diff --git a/src/backend.rs b/src/backend.rs index b079298..aae9aa7 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,54 +1,37 @@ //! IR-to-Wasm transform. -use crate::ir::*; -use fxhash::{FxHashMap, FxHashSet}; +use crate::{cfg::CFGInfo, ir::*}; -pub fn treeify_function(func: &mut FunctionBody) -> FxHashSet<(BlockId, InstId)> { - // First, count uses of all values. - let mut uses: FxHashMap<(BlockId, InstId, usize), usize> = FxHashMap::default(); - for block in &func.blocks { - for inst in &block.insts { - for input in &inst.inputs { - match input { - &Operand::Value(value_id) => { - if let ValueKind::Inst(src_block, src_inst, idx) = - &func.values[value_id].kind - { - *uses.entry((*src_block, *src_inst, *idx)).or_insert(0) += 1; - } - } - _ => {} - } - } - } - - for arg in block.terminator.args() { - match arg { - Operand::Value(value_id) => { - if let ValueKind::Inst(src_block, src_inst, idx) = &func.values[value_id].kind { - *uses.entry((*src_block, *src_inst, *idx)).or_insert(0) += 1; - } - } - _ => {} - } - } - } - - // Next, treeify all insts with only one use. - let mut single_use_insts: FxHashSet<(BlockId, InstId)> = FxHashSet::default(); - for (block_idx, block) in func.blocks.iter().enumerate() { - for (inst_idx, inst) in block.insts.iter().enumerate() { - let all_one_use = (0..inst.outputs.len()).all(|output| { - uses.get(&(block_idx, inst_idx, output)) - .cloned() - .unwrap_or(0) - <= 1 - }); - if all_one_use { - single_use_insts.insert((block_idx, inst_idx)); - } - } - } - - single_use_insts +#[derive(Clone, Debug)] +pub enum Shape { + Block { head: BlockId, children: Vec }, + Loop { head: BlockId, children: Vec }, + Leaf { block: BlockId, succs: Vec }, +} + +enum Region { + /// Forward-branch region. Extends from end (just prior to + /// terminator) of first block to just before second block. Can be + /// extended earlier, prior to the beginning, if needed. + Forward(BlockId, BlockId), + + /// Backward-branch region. Extends from start of first block to + /// end (after terminator) of second block. Can be extended past + /// the end if needed. TODO: actually record all jump-points. + Backward(BlockId, BlockId), +} + +impl Shape { + pub fn compute(f: &FunctionBody, cfg: &CFGInfo) -> Self { + // Process all non-contiguous edges in RPO block order. For + // forward and backward edges, emit Regions. + + // Sort regions by start. Then examine adjacent regions to + // resolve nesting. If out-of-order, we can extend a Forward + // region's start backward, or a Backward region's end + // forward. If still out-of-order, drop any conflicting + // Backward; we'll handle by duplication. + + todo!() + } } diff --git a/src/cfg/domtree.rs b/src/cfg/domtree.rs new file mode 100644 index 0000000..8736973 --- /dev/null +++ b/src/cfg/domtree.rs @@ -0,0 +1,118 @@ +/* + * Derives from the dominator tree implementation in regalloc.rs, which is + * licensed under the Apache Public License 2.0 with LLVM Exception. See: + * https://github.com/bytecodealliance/regalloc.rs + */ + +// This is an implementation of the algorithm described in +// +// A Simple, Fast Dominance Algorithm +// Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy +// Department of Computer Science, Rice University, Houston, Texas, USA +// TR-06-33870 +// https://www.cs.rice.edu/~keith/EMBED/dom.pdf + +use crate::ir::{BlockId, INVALID_BLOCK}; + +// Helper +fn merge_sets( + idom: &[BlockId], // map from BlockId to BlockId + block_to_rpo: &[Option], + mut node1: BlockId, + mut node2: BlockId, +) -> BlockId { + while node1 != node2 { + if node1 == INVALID_BLOCK || node2 == INVALID_BLOCK { + return INVALID_BLOCK; + } + let rpo1 = block_to_rpo[node1].unwrap(); + let rpo2 = block_to_rpo[node2].unwrap(); + if rpo1 > rpo2 { + node1 = idom[node1]; + } else if rpo2 > rpo1 { + node2 = idom[node2]; + } + } + assert!(node1 == node2); + node1 +} + +pub fn calculate<'a, PredFn: Fn(BlockId) -> &'a [BlockId]>( + num_blocks: usize, + preds: PredFn, + post_ord: &[BlockId], + start: BlockId, +) -> Vec { + // We have post_ord, which is the postorder sequence. + + // Compute maps from RPO to block number and vice-versa. + let mut block_to_rpo = vec![None; num_blocks]; + block_to_rpo.resize(num_blocks, None); + for (i, rpo_block) in post_ord.iter().rev().enumerate() { + block_to_rpo[*rpo_block] = Some(i as u32); + } + + let mut idom = vec![INVALID_BLOCK; num_blocks]; + + // The start node must have itself as a parent. + idom[start] = start; + + let mut changed = true; + while changed { + changed = false; + // Consider blocks in reverse postorder. Skip any that are unreachable. + for &node in post_ord.iter().rev() { + let rponum = block_to_rpo[node].unwrap(); + + let mut parent = INVALID_BLOCK; + for &pred in preds(node).iter() { + let pred_rpo = match block_to_rpo[pred] { + Some(r) => r, + None => { + // Skip unreachable preds. + continue; + } + }; + if pred_rpo < rponum { + parent = pred; + break; + } + } + + if parent != INVALID_BLOCK { + for &pred in preds(node).iter() { + if pred == parent { + continue; + } + if idom[pred] == INVALID_BLOCK { + continue; + } + parent = merge_sets(&idom, &block_to_rpo[..], parent, pred); + } + } + + if parent != INVALID_BLOCK && parent != idom[node] { + idom[node] = parent; + changed = true; + } + } + } + + // Now set the start node's dominator-tree parent to "invalid"; + // this allows the loop in `dominates` to terminate. + idom[start] = INVALID_BLOCK; + + idom +} + +pub fn dominates(idom: &[BlockId], a: BlockId, mut b: BlockId) -> bool { + loop { + if a == b { + return true; + } + if b == INVALID_BLOCK { + return false; + } + b = idom[b]; + } +} diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs new file mode 100644 index 0000000..898faa8 --- /dev/null +++ b/src/cfg/mod.rs @@ -0,0 +1,91 @@ +//! Lightweight CFG analyses. + +// Borrowed from regalloc2's cfg.rs, which is also Apache-2.0 with +// LLVM exception. + +use crate::ir::{BlockId, FunctionBody, Terminator}; +use smallvec::SmallVec; + +pub mod domtree; +pub mod postorder; + +#[derive(Clone, Debug)] +pub struct CFGInfo { + /// Predecessors for each block. + pub block_preds: Vec>, + /// Successors for each block. + pub block_succs: Vec>, + /// Blocks that end in return. + pub return_blocks: Vec, + /// Postorder traversal of blocks. + pub postorder: Vec, + /// Position of each block in postorder, if reachable. + pub postorder_pos: Vec>, + /// Domtree parents, indexed by block. + pub domtree: Vec, +} + +impl CFGInfo { + pub fn new(f: &FunctionBody) -> CFGInfo { + let mut block_preds = vec![SmallVec::new(); f.blocks.len()]; + let mut block_succs = vec![SmallVec::new(); f.blocks.len()]; + for block in 0..f.blocks.len() { + for succ in f.blocks[block].successors() { + block_preds[succ].push(block); + block_succs[block].push(succ); + } + } + + let mut return_blocks = vec![]; + for block in 0..f.blocks.len() { + if let Terminator::Return { .. } = &f.blocks[block].terminator { + return_blocks.push(block); + } + } + + let postorder = postorder::calculate(f.blocks.len(), 0, |block| &block_succs[block]); + + let mut postorder_pos = vec![None; f.blocks.len()]; + for (i, block) in postorder.iter().enumerate() { + postorder_pos[*block] = Some(i); + } + + let domtree = domtree::calculate( + f.blocks.len(), + |block| &&block_preds[block], + &postorder[..], + 0, + ); + + CFGInfo { + block_preds, + block_succs, + return_blocks, + postorder, + postorder_pos, + domtree, + } + } + + pub fn dominates(&self, a: BlockId, b: BlockId) -> bool { + domtree::dominates(&self.domtree[..], a, b) + } + + pub fn succs(&self, block: BlockId) -> &[BlockId] { + &self.block_succs[block] + } + + pub fn preds(&self, block: BlockId) -> &[BlockId] { + &self.block_preds[block] + } + + pub fn pred_count_with_entry(&self, block: BlockId) -> usize { + let is_entry = block == 0; + self.preds(block).len() + if is_entry { 1 } else { 0 } + } + + pub fn succ_count_with_return(&self, block: BlockId) -> usize { + let is_return = self.return_blocks.binary_search(&block).is_ok(); + self.succs(block).len() + if is_return { 1 } else { 0 } + } +} diff --git a/src/cfg/postorder.rs b/src/cfg/postorder.rs new file mode 100644 index 0000000..8356274 --- /dev/null +++ b/src/cfg/postorder.rs @@ -0,0 +1,54 @@ +//! Fast postorder computation. + +// Borrowed from regalloc2's postorder.rs, which is also Apache-2.0 +// with LLVM-exception. + +use crate::ir::BlockId; +use smallvec::{smallvec, SmallVec}; + +pub fn calculate<'a, SuccFn: Fn(BlockId) -> &'a [BlockId]>( + num_blocks: usize, + entry: BlockId, + succ_blocks: SuccFn, +) -> Vec { + let mut ret = vec![]; + + // State: visited-block map, and explicit DFS stack. + let mut visited = vec![]; + visited.resize(num_blocks, false); + + struct State<'a> { + block: BlockId, + succs: &'a [BlockId], + next_succ: usize, + } + let mut stack: SmallVec<[State; 64]> = smallvec![]; + + visited[entry] = true; + stack.push(State { + block: entry, + succs: succ_blocks(entry), + next_succ: 0, + }); + + while let Some(ref mut state) = stack.last_mut() { + // Perform one action: push to new succ, skip an already-visited succ, or pop. + if state.next_succ < state.succs.len() { + let succ = state.succs[state.next_succ]; + state.next_succ += 1; + if !visited[succ] { + visited[succ] = true; + stack.push(State { + block: succ, + succs: succ_blocks(succ), + next_succ: 0, + }); + } + } else { + ret.push(state.block); + stack.pop(); + } + } + + ret +} diff --git a/src/frontend.rs b/src/frontend.rs index 39867f8..0c2119e 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -2,6 +2,23 @@ #![allow(dead_code)] +/* + +- TODO: better local-variable handling: + - pre-pass to scan for locations of definitions of all locals. for + each frame, record set of vars that are def'd. + - during main pass: + - for an if/else, add blockparams to join block for all vars def'd + in either side. + - for a block, add blockparams to out-block for all vars def'd in + body of block. + - for a loop, add blockparams to header block for all vars def'd + in body. + - when generating a branch to any block, just emit current values + for every local in blockparams. + + */ + use crate::ir::*; use crate::op_traits::{op_inputs, op_outputs}; use anyhow::{bail, Result}; @@ -130,7 +147,6 @@ fn parse_body<'a, 'b>( builder.body.values.push(ValueDef { kind: ValueKind::Arg(arg_idx), ty: arg_ty, - local: Some(local_idx), }); trace!("defining local {} to value {}", local_idx, value); builder.locals.insert(local_idx, (arg_ty, value)); @@ -315,7 +331,6 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { self.body.values.push(ValueDef { ty, kind: ValueKind::Inst(block, inst, 0), - local: Some(*local_index), }); value } else { @@ -888,7 +903,6 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { self.body.values.push(ValueDef { kind: ValueKind::BlockParam(block, block_param_num), ty, - local: None, }); self.op_stack.push((ty, value_id)); block_param_num += 1; @@ -903,7 +917,6 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { self.body.values.push(ValueDef { kind: ValueKind::BlockParam(block, block_param_num), ty, - local: Some(local_id), }); block_param_num += 1; self.locals.insert(local_id, (ty, value_id)); @@ -939,7 +952,6 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { self.body.values.push(ValueDef { kind: ValueKind::Inst(block, inst, i), ty: output_ty, - local: None, }); self.op_stack.push((output_ty, val)); } diff --git a/src/ir.rs b/src/ir.rs index e8cb865..41275d5 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -12,6 +12,7 @@ pub type ValueId = usize; pub type LocalId = u32; pub const NO_VALUE: ValueId = usize::MAX; +pub const INVALID_BLOCK: BlockId = usize::MAX; #[derive(Clone, Debug, Default)] pub struct Module<'a> { @@ -48,7 +49,6 @@ pub struct FunctionBody<'a> { pub struct ValueDef { pub kind: ValueKind, pub ty: Type, - pub local: Option, } #[derive(Clone, Debug)] @@ -65,6 +65,55 @@ pub struct Block<'a> { pub terminator: Terminator, } +impl<'a> Block<'a> { + pub fn successors(&self) -> Vec { + self.terminator.successors() + } + + pub fn values<'b>(&'b self) -> impl Iterator + 'b { + self.insts + .iter() + .map(|inst| inst.outputs.iter().cloned()) + .flatten() + } + + pub fn visit_operands(&self, f: F) { + for inst in &self.insts { + for input in &inst.inputs { + f(input); + } + } + match &self.terminator { + &Terminator::CondBr { ref cond, .. } => f(cond), + &Terminator::Select { ref value, .. } => f(value), + &Terminator::Return { ref values, .. } => { + for value in values { + f(value); + } + } + _ => {} + } + } + + pub fn update_operands(&mut self, f: F) { + for inst in &mut self.insts { + for input in &mut inst.inputs { + f(input); + } + } + match &mut self.terminator { + &mut Terminator::CondBr { ref mut cond, .. } => f(cond), + &mut Terminator::Select { ref mut value, .. } => f(value), + &mut Terminator::Return { ref mut values, .. } => { + for value in values { + f(value); + } + } + _ => {} + } + } +} + #[derive(Clone, Debug)] pub struct Inst<'a> { pub operator: Operator<'a>, @@ -163,14 +212,6 @@ impl<'a> Module<'a> { pub fn to_wasm_bytes(mut self) -> Vec { // TODO - for func in &mut self.funcs { - match func { - &mut FuncDecl::Body(_, ref mut body) => { - let _deleted_insts = backend::treeify_function(body); - } - _ => {} - } - } self.orig_bytes.to_vec() } } diff --git a/src/lib.rs b/src/lib.rs index 24dff61..78dd8f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,5 +12,6 @@ mod dataflow; mod frontend; mod ir; mod op_traits; +mod cfg; pub use ir::*;