diff --git a/src/backend/mod.rs b/src/backend/mod.rs index edafad3..26f7951 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -1,4 +1,13 @@ //! Backend: IR to Wasm. +mod structured; +pub use structured::*; +mod use_count; +pub use use_count::*; +mod schedule; +pub use schedule::*; +mod serialize; +pub use serialize::*; + mod locations; diff --git a/src/backend/schedule.rs b/src/backend/schedule.rs new file mode 100644 index 0000000..bc44cf7 --- /dev/null +++ b/src/backend/schedule.rs @@ -0,0 +1,220 @@ +//! Op scheduling. + +use fxhash::FxHashMap; + +use super::UseCountAnalysis; +use crate::{cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Value, ValueDef}; + +#[derive(Clone, Debug, Default)] +pub struct Schedule { + /// Output: location at which to compute each value. + pub location: Vec, + /// Output: for each toplevel value, all values that are computed + /// after it is. + pub compute_after_value: FxHashMap>, + /// Output: all values ready at the top of a given block. + pub compute_at_top_of_block: FxHashMap>, +} + +pub struct SchedulerContext<'a> { + /// The schedule we are constructing. + schedule: &'a mut Schedule, + /// In-progress state: for each value, the values that have one + /// more ready input once that value is computed. + waiting_on_value: FxHashMap>, + /// In-progress state: for each value, how many inputs need to + /// become ready. + remaining_inputs: FxHashMap, + /// In-progress state: all values that are ready to be scheduled. + ready: Vec, + /// Input context: CFG. + cfg: &'a CFGInfo, + /// Input context: function body. + f: &'a FunctionBody, +} + +/// Locations are denoted by top-level values (those in `insts`), +/// which are those with a side-effect; the sea-of-nodes +/// representation for all other value nodes allows them to be +/// computed anywhere dominated by all operands and that dominates all +/// uses, so we have significant flexibility. We denote a location as +/// "after a toplevel", then in the second pass where we actually +/// generate operators according to stack discipline, we resolve the +/// order for all values at a given toplevel. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Location { + /// At a separate top-level location. + Toplevel, + /// After a given value. + After(Value), + /// At the top of a given block. + BlockTop(BlockId), + /// Not yet scheduled. + None, +} + +impl Schedule { + pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self { + let mut schedule = Schedule::default(); + schedule.location = vec![Location::None; f.values.len()]; + + log::trace!("f: {:?}", f); + log::trace!("cfg: {:?}", cfg); + log::trace!("uses: {:?}", uses); + + let mut ctx = SchedulerContext { + schedule: &mut schedule, + f, + cfg, + waiting_on_value: FxHashMap::default(), + remaining_inputs: FxHashMap::default(), + ready: vec![], + }; + + // Prepare the "waiting on value", "remaining inputs", and + // "ready" vectors. + for (value, value_def) in f.values() { + if uses.use_count[value.index()] == 0 { + continue; + } + if uses.toplevel.contains(&value) { + continue; + } + match value_def { + &ValueDef::Operator(op, ref operands) => { + if operands.len() == 0 { + if !op_rematerialize(&op) { + log::trace!("immediately ready: v{}", value.index()); + ctx.ready.push(value); + } + } else { + log::trace!("v{} waiting on {:?}", value.index(), operands); + ctx.remaining_inputs.insert(value, operands.len()); + for &input in operands { + let input = f.resolve_alias(input); + ctx.waiting_on_value + .entry(input) + .or_insert_with(|| vec![]) + .push(value); + } + } + } + &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => { + let v = f.resolve_alias(v); + ctx.remaining_inputs.insert(value, 1); + ctx.waiting_on_value + .entry(v) + .or_insert_with(|| vec![]) + .push(value); + } + _ => {} + } + } + + // Traverse blocks in RPO. When we schedule a given op, we've + // already scheduled all of its operands, so we can find the + // right place for it without any sort of backtracking or + // fixpoint convergence. + // + // - Values in `insts` (toplevel operations) + // are scheduled at their locations. All side-effecting ops + // are in this category, and hence never experience + // code-motion relative to other side-effecting ops or + // control flow. + // + // - Otherwise, values are scheduled after their last operand + // is ready. All operands must have been computed by the + // time we reach a given operator in RPO, and each operand's + // scheduled site must dominate the current location + // (toplevel value). Because the dominance relation forms a + // tree structure (the domtree), for any two operand def + // sites X and Y to the current location L, given X dom L + // and Y dom L, either X dom Y or Y dom X. Thus, consider + // the current-best and each new operand in pairs, and pick + // the one that is dominated by the other. + + for &block in cfg.postorder.iter().rev() { + for &(_, param) in &f.blocks[block].params { + log::trace!("block{}: param v{}", block, param.index()); + ctx.wake_dependents(param); + } + ctx.sched_ready_at_block_top(block); + for &inst in &f.blocks[block].insts { + log::trace!("block{}: toplevel v{}", block, inst.index()); + ctx.sched_toplevel(inst); + ctx.sched_ready_after_value(inst); + } + } + + schedule + } +} + +impl<'a> SchedulerContext<'a> { + fn sched_toplevel(&mut self, v: Value) { + log::trace!("sched_toplevel: v{}", v.index()); + assert_eq!(self.schedule.location[v.index()], Location::None); + self.schedule.location[v.index()] = Location::Toplevel; + self.wake_dependents(v); + } + + fn sched_ready_after_value(&mut self, v: Value) { + log::trace!("sched_ready_after_value: toplevel v{}", v.index()); + while !self.ready.is_empty() { + for ready in std::mem::take(&mut self.ready) { + log::trace!( + "sched_ready_after_value: toplevel v{} -> v{} now ready", + v.index(), + ready.index() + ); + self.schedule.location[ready.index()] = Location::After(v); + self.schedule + .compute_after_value + .entry(v) + .or_insert_with(|| vec![]) + .push(ready); + self.wake_dependents(ready); + } + } + } + + fn sched_ready_at_block_top(&mut self, block: BlockId) { + log::trace!("ready_at_block_top: block{}", block); + while !self.ready.is_empty() { + for ready in std::mem::take(&mut self.ready) { + log::trace!( + "ready_at_block_top: block{} -> ready: v{}", + block, + ready.index() + ); + self.schedule.location[ready.index()] = Location::BlockTop(block); + self.schedule + .compute_at_top_of_block + .entry(block) + .or_insert_with(|| vec![]) + .push(ready); + self.wake_dependents(ready); + } + } + } + + fn wake_dependents(&mut self, v: Value) { + log::trace!("wake_dependents: v{}", v.index()); + let dependents = self.waiting_on_value.remove(&v).unwrap_or_default(); + for dependent in dependents { + let remaining = self.remaining_inputs.get_mut(&dependent).unwrap(); + *remaining -= 1; + log::trace!( + " -> v{} wakes dependent v{}; remaining now {}", + v.index(), + dependent.index(), + *remaining + ); + if *remaining == 0 { + self.remaining_inputs.remove(&dependent); + self.ready.push(dependent); + self.wake_dependents(dependent); + } + } + } +} diff --git a/src/cfg/serialize.rs b/src/backend/serialize.rs similarity index 57% rename from src/cfg/serialize.rs rename to src/backend/serialize.rs index b1fa972..4d0ae2e 100644 --- a/src/cfg/serialize.rs +++ b/src/backend/serialize.rs @@ -3,17 +3,15 @@ //! in Wasm function body. Contains everything needed to emit Wasm //! except for value locations (and corresponding local spill/reloads). -use std::collections::VecDeque; - -use fxhash::{FxHashMap, FxHashSet}; - use super::{ structured::{BlockOrder, BlockOrderEntry}, - CFGInfo, + Schedule, UseCountAnalysis, }; use crate::{ - op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value, ValueDef, + cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value, + ValueDef, }; +use fxhash::FxHashSet; /// A Wasm function body with a serialized sequence of operators that /// mirror Wasm opcodes in every way *except* for locals corresponding @@ -420,286 +418,3 @@ impl<'a> SerializedBodyContext<'a> { } } } - -#[derive(Clone, Debug)] -pub struct UseCountAnalysis { - toplevel: FxHashSet, - use_count: Vec, -} - -impl UseCountAnalysis { - fn compute(f: &FunctionBody) -> UseCountAnalysis { - let n_values = f.values.len(); - let mut counts = UseCountAnalysis { - use_count: vec![0; n_values], - toplevel: FxHashSet::default(), - }; - - let mut workqueue = VecDeque::new(); - let mut workqueue_set = FxHashSet::default(); - for block in 0..f.blocks.len() { - for &value in &f.blocks[block].insts { - let value = f.resolve_alias(value); - if workqueue_set.insert(value) { - workqueue.push_back(value); - } - counts.toplevel.insert(value); - } - f.blocks[block].terminator.visit_uses(|value| { - let value = f.resolve_alias(value); - if workqueue_set.insert(value) { - workqueue.push_back(value); - } - }); - - while let Some(value) = workqueue.pop_front() { - workqueue_set.remove(&value); - counts.add(value); - match &f.values[value.index()] { - &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {} - &ValueDef::Operator(_op, ref args) => { - for &arg in args { - let arg = f.resolve_alias(arg); - if counts.use_count[arg.index()] == 0 { - if workqueue_set.insert(arg) { - workqueue.push_back(arg); - } - } - } - } - &ValueDef::PickOutput(value, _) => { - let value = f.resolve_alias(value); - if counts.use_count[value.index()] == 0 { - if workqueue_set.insert(value) { - workqueue.push_back(value); - } - } - } - &ValueDef::Placeholder => { - panic!("Unresolved placeholder for value {}", value); - } - } - } - } - - counts - } - - fn add(&mut self, value: Value) { - self.use_count[value.index()] += 1; - } -} - -#[derive(Clone, Debug, Default)] -pub struct Schedule { - /// Output: location at which to compute each value. - pub location: Vec, - /// Output: for each toplevel value, all values that are computed - /// after it is. - pub compute_after_value: FxHashMap>, - /// Output: all values ready at the top of a given block. - pub compute_at_top_of_block: FxHashMap>, -} - -pub struct SchedulerContext<'a> { - /// The schedule we are constructing. - schedule: &'a mut Schedule, - /// In-progress state: for each value, the values that have one - /// more ready input once that value is computed. - waiting_on_value: FxHashMap>, - /// In-progress state: for each value, how many inputs need to - /// become ready. - remaining_inputs: FxHashMap, - /// In-progress state: all values that are ready to be scheduled. - ready: Vec, - /// Input context: CFG. - cfg: &'a CFGInfo, - /// Input context: function body. - f: &'a FunctionBody, -} - -/// Locations are denoted by top-level values (those in `insts`), -/// which are those with a side-effect; the sea-of-nodes -/// representation for all other value nodes allows them to be -/// computed anywhere dominated by all operands and that dominates all -/// uses, so we have significant flexibility. We denote a location as -/// "after a toplevel", then in the second pass where we actually -/// generate operators according to stack discipline, we resolve the -/// order for all values at a given toplevel. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Location { - /// At a separate top-level location. - Toplevel, - /// After a given value. - After(Value), - /// At the top of a given block. - BlockTop(BlockId), - /// Not yet scheduled. - None, -} - -impl Schedule { - pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self { - let mut schedule = Schedule::default(); - schedule.location = vec![Location::None; f.values.len()]; - - log::trace!("f: {:?}", f); - log::trace!("cfg: {:?}", cfg); - log::trace!("uses: {:?}", uses); - - let mut ctx = SchedulerContext { - schedule: &mut schedule, - f, - cfg, - waiting_on_value: FxHashMap::default(), - remaining_inputs: FxHashMap::default(), - ready: vec![], - }; - - // Prepare the "waiting on value", "remaining inputs", and - // "ready" vectors. - for (value, value_def) in f.values() { - if uses.use_count[value.index()] == 0 { - continue; - } - if uses.toplevel.contains(&value) { - continue; - } - match value_def { - &ValueDef::Operator(op, ref operands) => { - if operands.len() == 0 { - if !op_rematerialize(&op) { - log::trace!("immediately ready: v{}", value.index()); - ctx.ready.push(value); - } - } else { - log::trace!("v{} waiting on {:?}", value.index(), operands); - ctx.remaining_inputs.insert(value, operands.len()); - for &input in operands { - let input = f.resolve_alias(input); - ctx.waiting_on_value - .entry(input) - .or_insert_with(|| vec![]) - .push(value); - } - } - } - &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => { - let v = f.resolve_alias(v); - ctx.remaining_inputs.insert(value, 1); - ctx.waiting_on_value - .entry(v) - .or_insert_with(|| vec![]) - .push(value); - } - _ => {} - } - } - - // Traverse blocks in RPO. When we schedule a given op, we've - // already scheduled all of its operands, so we can find the - // right place for it without any sort of backtracking or - // fixpoint convergence. - // - // - Values in `insts` (toplevel operations) - // are scheduled at their locations. All side-effecting ops - // are in this category, and hence never experience - // code-motion relative to other side-effecting ops or - // control flow. - // - // - Otherwise, values are scheduled after their last operand - // is ready. All operands must have been computed by the - // time we reach a given operator in RPO, and each operand's - // scheduled site must dominate the current location - // (toplevel value). Because the dominance relation forms a - // tree structure (the domtree), for any two operand def - // sites X and Y to the current location L, given X dom L - // and Y dom L, either X dom Y or Y dom X. Thus, consider - // the current-best and each new operand in pairs, and pick - // the one that is dominated by the other. - - for &block in cfg.postorder.iter().rev() { - for &(_, param) in &f.blocks[block].params { - log::trace!("block{}: param v{}", block, param.index()); - ctx.wake_dependents(param); - } - ctx.sched_ready_at_block_top(block); - for &inst in &f.blocks[block].insts { - log::trace!("block{}: toplevel v{}", block, inst.index()); - ctx.sched_toplevel(inst); - ctx.sched_ready_after_value(inst); - } - } - - schedule - } -} - -impl<'a> SchedulerContext<'a> { - fn sched_toplevel(&mut self, v: Value) { - log::trace!("sched_toplevel: v{}", v.index()); - assert_eq!(self.schedule.location[v.index()], Location::None); - self.schedule.location[v.index()] = Location::Toplevel; - self.wake_dependents(v); - } - - fn sched_ready_after_value(&mut self, v: Value) { - log::trace!("sched_ready_after_value: toplevel v{}", v.index()); - while !self.ready.is_empty() { - for ready in std::mem::take(&mut self.ready) { - log::trace!( - "sched_ready_after_value: toplevel v{} -> v{} now ready", - v.index(), - ready.index() - ); - self.schedule.location[ready.index()] = Location::After(v); - self.schedule - .compute_after_value - .entry(v) - .or_insert_with(|| vec![]) - .push(ready); - self.wake_dependents(ready); - } - } - } - - fn sched_ready_at_block_top(&mut self, block: BlockId) { - log::trace!("ready_at_block_top: block{}", block); - while !self.ready.is_empty() { - for ready in std::mem::take(&mut self.ready) { - log::trace!( - "ready_at_block_top: block{} -> ready: v{}", - block, - ready.index() - ); - self.schedule.location[ready.index()] = Location::BlockTop(block); - self.schedule - .compute_at_top_of_block - .entry(block) - .or_insert_with(|| vec![]) - .push(ready); - self.wake_dependents(ready); - } - } - } - - fn wake_dependents(&mut self, v: Value) { - log::trace!("wake_dependents: v{}", v.index()); - let dependents = self.waiting_on_value.remove(&v).unwrap_or_default(); - for dependent in dependents { - let remaining = self.remaining_inputs.get_mut(&dependent).unwrap(); - *remaining -= 1; - log::trace!( - " -> v{} wakes dependent v{}; remaining now {}", - v.index(), - dependent.index(), - *remaining - ); - if *remaining == 0 { - self.remaining_inputs.remove(&dependent); - self.ready.push(dependent); - self.wake_dependents(dependent); - } - } - } -} diff --git a/src/cfg/structured.rs b/src/backend/structured.rs similarity index 100% rename from src/cfg/structured.rs rename to src/backend/structured.rs diff --git a/src/backend/use_count.rs b/src/backend/use_count.rs new file mode 100644 index 0000000..f5ecf9e --- /dev/null +++ b/src/backend/use_count.rs @@ -0,0 +1,75 @@ +//! Use-count analysis. + +use std::collections::VecDeque; + +use crate::{Value, FunctionBody, ValueDef}; +use fxhash::FxHashSet; + +#[derive(Clone, Debug)] +pub struct UseCountAnalysis { + pub(crate) toplevel: FxHashSet, + pub(crate) use_count: Vec, +} + +impl UseCountAnalysis { + pub(crate) fn compute(f: &FunctionBody) -> UseCountAnalysis { + let n_values = f.values.len(); + let mut counts = UseCountAnalysis { + use_count: vec![0; n_values], + toplevel: FxHashSet::default(), + }; + + let mut workqueue = VecDeque::new(); + let mut workqueue_set = FxHashSet::default(); + for block in 0..f.blocks.len() { + for &value in &f.blocks[block].insts { + let value = f.resolve_alias(value); + if workqueue_set.insert(value) { + workqueue.push_back(value); + } + counts.toplevel.insert(value); + } + f.blocks[block].terminator.visit_uses(|value| { + let value = f.resolve_alias(value); + if workqueue_set.insert(value) { + workqueue.push_back(value); + } + }); + + while let Some(value) = workqueue.pop_front() { + workqueue_set.remove(&value); + counts.add(value); + match &f.values[value.index()] { + &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {} + &ValueDef::Operator(_op, ref args) => { + for &arg in args { + let arg = f.resolve_alias(arg); + if counts.use_count[arg.index()] == 0 { + if workqueue_set.insert(arg) { + workqueue.push_back(arg); + } + } + } + } + &ValueDef::PickOutput(value, _) => { + let value = f.resolve_alias(value); + if counts.use_count[value.index()] == 0 { + if workqueue_set.insert(value) { + workqueue.push_back(value); + } + } + } + &ValueDef::Placeholder => { + panic!("Unresolved placeholder for value {}", value); + } + } + } + } + + counts + } + + fn add(&mut self, value: Value) { + self.use_count[value.index()] += 1; + } +} diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index 452106a..0c3d7b2 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -8,8 +8,6 @@ use smallvec::SmallVec; pub mod domtree; pub mod postorder; -pub mod serialize; -pub mod structured; #[derive(Clone, Debug)] pub struct CFGInfo { diff --git a/src/ir.rs b/src/ir.rs index 2ba2a02..9100280 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -3,11 +3,8 @@ use std::collections::hash_map::Entry; use crate::{ - cfg::{ - serialize::SerializedBody, - structured::{BlockOrder, LoopNest, WasmRegion}, - CFGInfo, - }, + backend::{BlockOrder, LoopNest, SerializedBody, WasmRegion}, + cfg::CFGInfo, frontend, Operator, }; use anyhow::Result;