cleanup and move code to backend/

This commit is contained in:
Chris Fallin 2021-12-23 20:05:36 -08:00
parent 7735b522d4
commit 57693e592c
7 changed files with 310 additions and 296 deletions

View file

@ -1,4 +1,13 @@
//! Backend: IR to Wasm.
mod structured;
pub use structured::*;
mod use_count;
pub use use_count::*;
mod schedule;
pub use schedule::*;
mod serialize;
pub use serialize::*;
mod locations;

220
src/backend/schedule.rs Normal file
View file

@ -0,0 +1,220 @@
//! Op scheduling.
use fxhash::FxHashMap;
use super::UseCountAnalysis;
use crate::{cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Value, ValueDef};
#[derive(Clone, Debug, Default)]
pub struct Schedule {
/// Output: location at which to compute each value.
pub location: Vec</* Value, */ Location>,
/// Output: for each toplevel value, all values that are computed
/// after it is.
pub compute_after_value: FxHashMap<Value, Vec<Value>>,
/// Output: all values ready at the top of a given block.
pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
}
pub struct SchedulerContext<'a> {
/// The schedule we are constructing.
schedule: &'a mut Schedule,
/// In-progress state: for each value, the values that have one
/// more ready input once that value is computed.
waiting_on_value: FxHashMap<Value, Vec<Value>>,
/// In-progress state: for each value, how many inputs need to
/// become ready.
remaining_inputs: FxHashMap<Value, usize>,
/// In-progress state: all values that are ready to be scheduled.
ready: Vec<Value>,
/// Input context: CFG.
cfg: &'a CFGInfo,
/// Input context: function body.
f: &'a FunctionBody,
}
/// Locations are denoted by top-level values (those in `insts`),
/// which are those with a side-effect; the sea-of-nodes
/// representation for all other value nodes allows them to be
/// computed anywhere dominated by all operands and that dominates all
/// uses, so we have significant flexibility. We denote a location as
/// "after a toplevel", then in the second pass where we actually
/// generate operators according to stack discipline, we resolve the
/// order for all values at a given toplevel.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Location {
/// At a separate top-level location.
Toplevel,
/// After a given value.
After(Value),
/// At the top of a given block.
BlockTop(BlockId),
/// Not yet scheduled.
None,
}
impl Schedule {
pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
let mut schedule = Schedule::default();
schedule.location = vec![Location::None; f.values.len()];
log::trace!("f: {:?}", f);
log::trace!("cfg: {:?}", cfg);
log::trace!("uses: {:?}", uses);
let mut ctx = SchedulerContext {
schedule: &mut schedule,
f,
cfg,
waiting_on_value: FxHashMap::default(),
remaining_inputs: FxHashMap::default(),
ready: vec![],
};
// Prepare the "waiting on value", "remaining inputs", and
// "ready" vectors.
for (value, value_def) in f.values() {
if uses.use_count[value.index()] == 0 {
continue;
}
if uses.toplevel.contains(&value) {
continue;
}
match value_def {
&ValueDef::Operator(op, ref operands) => {
if operands.len() == 0 {
if !op_rematerialize(&op) {
log::trace!("immediately ready: v{}", value.index());
ctx.ready.push(value);
}
} else {
log::trace!("v{} waiting on {:?}", value.index(), operands);
ctx.remaining_inputs.insert(value, operands.len());
for &input in operands {
let input = f.resolve_alias(input);
ctx.waiting_on_value
.entry(input)
.or_insert_with(|| vec![])
.push(value);
}
}
}
&ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
let v = f.resolve_alias(v);
ctx.remaining_inputs.insert(value, 1);
ctx.waiting_on_value
.entry(v)
.or_insert_with(|| vec![])
.push(value);
}
_ => {}
}
}
// Traverse blocks in RPO. When we schedule a given op, we've
// already scheduled all of its operands, so we can find the
// right place for it without any sort of backtracking or
// fixpoint convergence.
//
// - Values in `insts` (toplevel operations)
// are scheduled at their locations. All side-effecting ops
// are in this category, and hence never experience
// code-motion relative to other side-effecting ops or
// control flow.
//
// - Otherwise, values are scheduled after their last operand
// is ready. All operands must have been computed by the
// time we reach a given operator in RPO, and each operand's
// scheduled site must dominate the current location
// (toplevel value). Because the dominance relation forms a
// tree structure (the domtree), for any two operand def
// sites X and Y to the current location L, given X dom L
// and Y dom L, either X dom Y or Y dom X. Thus, consider
// the current-best and each new operand in pairs, and pick
// the one that is dominated by the other.
for &block in cfg.postorder.iter().rev() {
for &(_, param) in &f.blocks[block].params {
log::trace!("block{}: param v{}", block, param.index());
ctx.wake_dependents(param);
}
ctx.sched_ready_at_block_top(block);
for &inst in &f.blocks[block].insts {
log::trace!("block{}: toplevel v{}", block, inst.index());
ctx.sched_toplevel(inst);
ctx.sched_ready_after_value(inst);
}
}
schedule
}
}
impl<'a> SchedulerContext<'a> {
fn sched_toplevel(&mut self, v: Value) {
log::trace!("sched_toplevel: v{}", v.index());
assert_eq!(self.schedule.location[v.index()], Location::None);
self.schedule.location[v.index()] = Location::Toplevel;
self.wake_dependents(v);
}
fn sched_ready_after_value(&mut self, v: Value) {
log::trace!("sched_ready_after_value: toplevel v{}", v.index());
while !self.ready.is_empty() {
for ready in std::mem::take(&mut self.ready) {
log::trace!(
"sched_ready_after_value: toplevel v{} -> v{} now ready",
v.index(),
ready.index()
);
self.schedule.location[ready.index()] = Location::After(v);
self.schedule
.compute_after_value
.entry(v)
.or_insert_with(|| vec![])
.push(ready);
self.wake_dependents(ready);
}
}
}
fn sched_ready_at_block_top(&mut self, block: BlockId) {
log::trace!("ready_at_block_top: block{}", block);
while !self.ready.is_empty() {
for ready in std::mem::take(&mut self.ready) {
log::trace!(
"ready_at_block_top: block{} -> ready: v{}",
block,
ready.index()
);
self.schedule.location[ready.index()] = Location::BlockTop(block);
self.schedule
.compute_at_top_of_block
.entry(block)
.or_insert_with(|| vec![])
.push(ready);
self.wake_dependents(ready);
}
}
}
fn wake_dependents(&mut self, v: Value) {
log::trace!("wake_dependents: v{}", v.index());
let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
for dependent in dependents {
let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
*remaining -= 1;
log::trace!(
" -> v{} wakes dependent v{}; remaining now {}",
v.index(),
dependent.index(),
*remaining
);
if *remaining == 0 {
self.remaining_inputs.remove(&dependent);
self.ready.push(dependent);
self.wake_dependents(dependent);
}
}
}
}

View file

@ -3,17 +3,15 @@
//! in Wasm function body. Contains everything needed to emit Wasm
//! except for value locations (and corresponding local spill/reloads).
use std::collections::VecDeque;
use fxhash::{FxHashMap, FxHashSet};
use super::{
structured::{BlockOrder, BlockOrderEntry},
CFGInfo,
Schedule, UseCountAnalysis,
};
use crate::{
op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value, ValueDef,
cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value,
ValueDef,
};
use fxhash::FxHashSet;
/// A Wasm function body with a serialized sequence of operators that
/// mirror Wasm opcodes in every way *except* for locals corresponding
@ -420,286 +418,3 @@ impl<'a> SerializedBodyContext<'a> {
}
}
}
#[derive(Clone, Debug)]
pub struct UseCountAnalysis {
toplevel: FxHashSet<Value>,
use_count: Vec</* Value, */ usize>,
}
impl UseCountAnalysis {
fn compute(f: &FunctionBody) -> UseCountAnalysis {
let n_values = f.values.len();
let mut counts = UseCountAnalysis {
use_count: vec![0; n_values],
toplevel: FxHashSet::default(),
};
let mut workqueue = VecDeque::new();
let mut workqueue_set = FxHashSet::default();
for block in 0..f.blocks.len() {
for &value in &f.blocks[block].insts {
let value = f.resolve_alias(value);
if workqueue_set.insert(value) {
workqueue.push_back(value);
}
counts.toplevel.insert(value);
}
f.blocks[block].terminator.visit_uses(|value| {
let value = f.resolve_alias(value);
if workqueue_set.insert(value) {
workqueue.push_back(value);
}
});
while let Some(value) = workqueue.pop_front() {
workqueue_set.remove(&value);
counts.add(value);
match &f.values[value.index()] {
&ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {}
&ValueDef::Operator(_op, ref args) => {
for &arg in args {
let arg = f.resolve_alias(arg);
if counts.use_count[arg.index()] == 0 {
if workqueue_set.insert(arg) {
workqueue.push_back(arg);
}
}
}
}
&ValueDef::PickOutput(value, _) => {
let value = f.resolve_alias(value);
if counts.use_count[value.index()] == 0 {
if workqueue_set.insert(value) {
workqueue.push_back(value);
}
}
}
&ValueDef::Placeholder => {
panic!("Unresolved placeholder for value {}", value);
}
}
}
}
counts
}
fn add(&mut self, value: Value) {
self.use_count[value.index()] += 1;
}
}
#[derive(Clone, Debug, Default)]
pub struct Schedule {
/// Output: location at which to compute each value.
pub location: Vec</* Value, */ Location>,
/// Output: for each toplevel value, all values that are computed
/// after it is.
pub compute_after_value: FxHashMap<Value, Vec<Value>>,
/// Output: all values ready at the top of a given block.
pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
}
pub struct SchedulerContext<'a> {
/// The schedule we are constructing.
schedule: &'a mut Schedule,
/// In-progress state: for each value, the values that have one
/// more ready input once that value is computed.
waiting_on_value: FxHashMap<Value, Vec<Value>>,
/// In-progress state: for each value, how many inputs need to
/// become ready.
remaining_inputs: FxHashMap<Value, usize>,
/// In-progress state: all values that are ready to be scheduled.
ready: Vec<Value>,
/// Input context: CFG.
cfg: &'a CFGInfo,
/// Input context: function body.
f: &'a FunctionBody,
}
/// Locations are denoted by top-level values (those in `insts`),
/// which are those with a side-effect; the sea-of-nodes
/// representation for all other value nodes allows them to be
/// computed anywhere dominated by all operands and that dominates all
/// uses, so we have significant flexibility. We denote a location as
/// "after a toplevel", then in the second pass where we actually
/// generate operators according to stack discipline, we resolve the
/// order for all values at a given toplevel.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Location {
/// At a separate top-level location.
Toplevel,
/// After a given value.
After(Value),
/// At the top of a given block.
BlockTop(BlockId),
/// Not yet scheduled.
None,
}
impl Schedule {
pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
let mut schedule = Schedule::default();
schedule.location = vec![Location::None; f.values.len()];
log::trace!("f: {:?}", f);
log::trace!("cfg: {:?}", cfg);
log::trace!("uses: {:?}", uses);
let mut ctx = SchedulerContext {
schedule: &mut schedule,
f,
cfg,
waiting_on_value: FxHashMap::default(),
remaining_inputs: FxHashMap::default(),
ready: vec![],
};
// Prepare the "waiting on value", "remaining inputs", and
// "ready" vectors.
for (value, value_def) in f.values() {
if uses.use_count[value.index()] == 0 {
continue;
}
if uses.toplevel.contains(&value) {
continue;
}
match value_def {
&ValueDef::Operator(op, ref operands) => {
if operands.len() == 0 {
if !op_rematerialize(&op) {
log::trace!("immediately ready: v{}", value.index());
ctx.ready.push(value);
}
} else {
log::trace!("v{} waiting on {:?}", value.index(), operands);
ctx.remaining_inputs.insert(value, operands.len());
for &input in operands {
let input = f.resolve_alias(input);
ctx.waiting_on_value
.entry(input)
.or_insert_with(|| vec![])
.push(value);
}
}
}
&ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
let v = f.resolve_alias(v);
ctx.remaining_inputs.insert(value, 1);
ctx.waiting_on_value
.entry(v)
.or_insert_with(|| vec![])
.push(value);
}
_ => {}
}
}
// Traverse blocks in RPO. When we schedule a given op, we've
// already scheduled all of its operands, so we can find the
// right place for it without any sort of backtracking or
// fixpoint convergence.
//
// - Values in `insts` (toplevel operations)
// are scheduled at their locations. All side-effecting ops
// are in this category, and hence never experience
// code-motion relative to other side-effecting ops or
// control flow.
//
// - Otherwise, values are scheduled after their last operand
// is ready. All operands must have been computed by the
// time we reach a given operator in RPO, and each operand's
// scheduled site must dominate the current location
// (toplevel value). Because the dominance relation forms a
// tree structure (the domtree), for any two operand def
// sites X and Y to the current location L, given X dom L
// and Y dom L, either X dom Y or Y dom X. Thus, consider
// the current-best and each new operand in pairs, and pick
// the one that is dominated by the other.
for &block in cfg.postorder.iter().rev() {
for &(_, param) in &f.blocks[block].params {
log::trace!("block{}: param v{}", block, param.index());
ctx.wake_dependents(param);
}
ctx.sched_ready_at_block_top(block);
for &inst in &f.blocks[block].insts {
log::trace!("block{}: toplevel v{}", block, inst.index());
ctx.sched_toplevel(inst);
ctx.sched_ready_after_value(inst);
}
}
schedule
}
}
impl<'a> SchedulerContext<'a> {
fn sched_toplevel(&mut self, v: Value) {
log::trace!("sched_toplevel: v{}", v.index());
assert_eq!(self.schedule.location[v.index()], Location::None);
self.schedule.location[v.index()] = Location::Toplevel;
self.wake_dependents(v);
}
fn sched_ready_after_value(&mut self, v: Value) {
log::trace!("sched_ready_after_value: toplevel v{}", v.index());
while !self.ready.is_empty() {
for ready in std::mem::take(&mut self.ready) {
log::trace!(
"sched_ready_after_value: toplevel v{} -> v{} now ready",
v.index(),
ready.index()
);
self.schedule.location[ready.index()] = Location::After(v);
self.schedule
.compute_after_value
.entry(v)
.or_insert_with(|| vec![])
.push(ready);
self.wake_dependents(ready);
}
}
}
fn sched_ready_at_block_top(&mut self, block: BlockId) {
log::trace!("ready_at_block_top: block{}", block);
while !self.ready.is_empty() {
for ready in std::mem::take(&mut self.ready) {
log::trace!(
"ready_at_block_top: block{} -> ready: v{}",
block,
ready.index()
);
self.schedule.location[ready.index()] = Location::BlockTop(block);
self.schedule
.compute_at_top_of_block
.entry(block)
.or_insert_with(|| vec![])
.push(ready);
self.wake_dependents(ready);
}
}
}
fn wake_dependents(&mut self, v: Value) {
log::trace!("wake_dependents: v{}", v.index());
let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
for dependent in dependents {
let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
*remaining -= 1;
log::trace!(
" -> v{} wakes dependent v{}; remaining now {}",
v.index(),
dependent.index(),
*remaining
);
if *remaining == 0 {
self.remaining_inputs.remove(&dependent);
self.ready.push(dependent);
self.wake_dependents(dependent);
}
}
}
}

75
src/backend/use_count.rs Normal file
View file

@ -0,0 +1,75 @@
//! Use-count analysis.
use std::collections::VecDeque;
use crate::{Value, FunctionBody, ValueDef};
use fxhash::FxHashSet;
#[derive(Clone, Debug)]
pub struct UseCountAnalysis {
pub(crate) toplevel: FxHashSet<Value>,
pub(crate) use_count: Vec</* Value, */ usize>,
}
impl UseCountAnalysis {
pub(crate) fn compute(f: &FunctionBody) -> UseCountAnalysis {
let n_values = f.values.len();
let mut counts = UseCountAnalysis {
use_count: vec![0; n_values],
toplevel: FxHashSet::default(),
};
let mut workqueue = VecDeque::new();
let mut workqueue_set = FxHashSet::default();
for block in 0..f.blocks.len() {
for &value in &f.blocks[block].insts {
let value = f.resolve_alias(value);
if workqueue_set.insert(value) {
workqueue.push_back(value);
}
counts.toplevel.insert(value);
}
f.blocks[block].terminator.visit_uses(|value| {
let value = f.resolve_alias(value);
if workqueue_set.insert(value) {
workqueue.push_back(value);
}
});
while let Some(value) = workqueue.pop_front() {
workqueue_set.remove(&value);
counts.add(value);
match &f.values[value.index()] {
&ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {}
&ValueDef::Operator(_op, ref args) => {
for &arg in args {
let arg = f.resolve_alias(arg);
if counts.use_count[arg.index()] == 0 {
if workqueue_set.insert(arg) {
workqueue.push_back(arg);
}
}
}
}
&ValueDef::PickOutput(value, _) => {
let value = f.resolve_alias(value);
if counts.use_count[value.index()] == 0 {
if workqueue_set.insert(value) {
workqueue.push_back(value);
}
}
}
&ValueDef::Placeholder => {
panic!("Unresolved placeholder for value {}", value);
}
}
}
}
counts
}
fn add(&mut self, value: Value) {
self.use_count[value.index()] += 1;
}
}

View file

@ -8,8 +8,6 @@ use smallvec::SmallVec;
pub mod domtree;
pub mod postorder;
pub mod serialize;
pub mod structured;
#[derive(Clone, Debug)]
pub struct CFGInfo {

View file

@ -3,11 +3,8 @@
use std::collections::hash_map::Entry;
use crate::{
cfg::{
serialize::SerializedBody,
structured::{BlockOrder, LoopNest, WasmRegion},
CFGInfo,
},
backend::{BlockOrder, LoopNest, SerializedBody, WasmRegion},
cfg::CFGInfo,
frontend, Operator,
};
use anyhow::Result;