cleanup and move code to backend/

2021-12-23 20:05:36 -08:00 · 2021-12-23 20:05:36 -08:00 · 57693e592c
parent 7735b522d4
commit 57693e592c
7 changed files with 310 additions and 296 deletions
--- a/src/backend/mod.rs
+++ b/src/backend/mod.rs
@ -1,4 +1,13 @@
 //! Backend: IR to Wasm.
 mod structured;
 pub use structured::*;
 mod use_count;
 pub use use_count::*;
 mod schedule;
 pub use schedule::*;
 mod serialize;
 pub use serialize::*;
 mod locations;
--- a/src/backend/schedule.rs
+++ b/src/backend/schedule.rs
@ -0,0 +1,220 @@
 //! Op scheduling.
 use fxhash::FxHashMap;
 use super::UseCountAnalysis;
 use crate::{cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Value, ValueDef};
 #[derive(Clone, Debug, Default)]
 pub struct Schedule {
    /// Output: location at which to compute each value.
    pub location: Vec</* Value, */ Location>,
    /// Output: for each toplevel value, all values that are computed
    /// after it is.
    pub compute_after_value: FxHashMap<Value, Vec<Value>>,
    /// Output: all values ready at the top of a given block.
    pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
 }
 pub struct SchedulerContext<'a> {
    /// The schedule we are constructing.
    schedule: &'a mut Schedule,
    /// In-progress state: for each value, the values that have one
    /// more ready input once that value is computed.
    waiting_on_value: FxHashMap<Value, Vec<Value>>,
    /// In-progress state: for each value, how many inputs need to
    /// become ready.
    remaining_inputs: FxHashMap<Value, usize>,
    /// In-progress state: all values that are ready to be scheduled.
    ready: Vec<Value>,
    /// Input context: CFG.
    cfg: &'a CFGInfo,
    /// Input context: function body.
    f: &'a FunctionBody,
 }
 /// Locations are denoted by top-level values (those in `insts`),
 /// which are those with a side-effect; the sea-of-nodes
 /// representation for all other value nodes allows them to be
 /// computed anywhere dominated by all operands and that dominates all
 /// uses, so we have significant flexibility. We denote a location as
 /// "after a toplevel", then in the second pass where we actually
 /// generate operators according to stack discipline, we resolve the
 /// order for all values at a given toplevel.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum Location {
    /// At a separate top-level location.
    Toplevel,
    /// After a given value.
    After(Value),
    /// At the top of a given block.
    BlockTop(BlockId),
    /// Not yet scheduled.
    None,
 }
 impl Schedule {
    pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
        let mut schedule = Schedule::default();
        schedule.location = vec![Location::None; f.values.len()];
        log::trace!("f: {:?}", f);
        log::trace!("cfg: {:?}", cfg);
        log::trace!("uses: {:?}", uses);
        let mut ctx = SchedulerContext {
            schedule: &mut schedule,
            f,
            cfg,
            waiting_on_value: FxHashMap::default(),
            remaining_inputs: FxHashMap::default(),
            ready: vec![],
        };
        // Prepare the "waiting on value", "remaining inputs", and
        // "ready" vectors.
        for (value, value_def) in f.values() {
            if uses.use_count[value.index()] == 0 {
                continue;
            }
            if uses.toplevel.contains(&value) {
                continue;
            }
            match value_def {
                &ValueDef::Operator(op, ref operands) => {
                    if operands.len() == 0 {
                        if !op_rematerialize(&op) {
                            log::trace!("immediately ready: v{}", value.index());
                            ctx.ready.push(value);
                        }
                    } else {
                        log::trace!("v{} waiting on {:?}", value.index(), operands);
                        ctx.remaining_inputs.insert(value, operands.len());
                        for &input in operands {
                            let input = f.resolve_alias(input);
                            ctx.waiting_on_value
                                .entry(input)
                                .or_insert_with(|| vec![])
                                .push(value);
                        }
                    }
                }
                &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
                    let v = f.resolve_alias(v);
                    ctx.remaining_inputs.insert(value, 1);
                    ctx.waiting_on_value
                        .entry(v)
                        .or_insert_with(|| vec![])
                        .push(value);
                }
                _ => {}
            }
        }
        // Traverse blocks in RPO. When we schedule a given op, we've
        // already scheduled all of its operands, so we can find the
        // right place for it without any sort of backtracking or
        // fixpoint convergence.
        //
        // - Values in `insts` (toplevel operations)
        //   are scheduled at their locations. All side-effecting ops
        //   are in this category, and hence never experience
        //   code-motion relative to other side-effecting ops or
        //   control flow.
        //
        // - Otherwise, values are scheduled after their last operand
        //   is ready. All operands must have been computed by the
        //   time we reach a given operator in RPO, and each operand's
        //   scheduled site must dominate the current location
        //   (toplevel value). Because the dominance relation forms a
        //   tree structure (the domtree), for any two operand def
        //   sites X and Y to the current location L, given X dom L
        //   and Y dom L, either X dom Y or Y dom X. Thus, consider
        //   the current-best and each new operand in pairs, and pick
        //   the one that is dominated by the other.
        for &block in cfg.postorder.iter().rev() {
            for &(_, param) in &f.blocks[block].params {
                log::trace!("block{}: param v{}", block, param.index());
                ctx.wake_dependents(param);
            }
            ctx.sched_ready_at_block_top(block);
            for &inst in &f.blocks[block].insts {
                log::trace!("block{}: toplevel v{}", block, inst.index());
                ctx.sched_toplevel(inst);
                ctx.sched_ready_after_value(inst);
            }
        }
        schedule
    }
 }
 impl<'a> SchedulerContext<'a> {
    fn sched_toplevel(&mut self, v: Value) {
        log::trace!("sched_toplevel: v{}", v.index());
        assert_eq!(self.schedule.location[v.index()], Location::None);
        self.schedule.location[v.index()] = Location::Toplevel;
        self.wake_dependents(v);
    }
    fn sched_ready_after_value(&mut self, v: Value) {
        log::trace!("sched_ready_after_value: toplevel v{}", v.index());
        while !self.ready.is_empty() {
            for ready in std::mem::take(&mut self.ready) {
                log::trace!(
                    "sched_ready_after_value: toplevel v{} -> v{} now ready",
                    v.index(),
                    ready.index()
                );
                self.schedule.location[ready.index()] = Location::After(v);
                self.schedule
                    .compute_after_value
                    .entry(v)
                    .or_insert_with(|| vec![])
                    .push(ready);
                self.wake_dependents(ready);
            }
        }
    }
    fn sched_ready_at_block_top(&mut self, block: BlockId) {
        log::trace!("ready_at_block_top: block{}", block);
        while !self.ready.is_empty() {
            for ready in std::mem::take(&mut self.ready) {
                log::trace!(
                    "ready_at_block_top: block{} -> ready: v{}",
                    block,
                    ready.index()
                );
                self.schedule.location[ready.index()] = Location::BlockTop(block);
                self.schedule
                    .compute_at_top_of_block
                    .entry(block)
                    .or_insert_with(|| vec![])
                    .push(ready);
                self.wake_dependents(ready);
            }
        }
    }
    fn wake_dependents(&mut self, v: Value) {
        log::trace!("wake_dependents: v{}", v.index());
        let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
        for dependent in dependents {
            let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
            *remaining -= 1;
            log::trace!(
                " -> v{} wakes dependent v{}; remaining now {}",
                v.index(),
                dependent.index(),
                *remaining
            );
            if *remaining == 0 {
                self.remaining_inputs.remove(&dependent);
                self.ready.push(dependent);
                self.wake_dependents(dependent);
            }
        }
    }
 }
--- a/src/backend/serialize.rs
+++ b/src/backend/serialize.rs
@ -3,17 +3,15 @@
 //! in Wasm function body. Contains everything needed to emit Wasm
 //! except for value locations (and corresponding local spill/reloads).
 use std::collections::VecDeque;
 use fxhash::{FxHashMap, FxHashSet};
 use super::{
    structured::{BlockOrder, BlockOrderEntry},
-    CFGInfo,
+    Schedule, UseCountAnalysis,
 };
 use crate::{
-    op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value, ValueDef,
+    cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value,
    ValueDef,
 };
 use fxhash::FxHashSet;
 /// A Wasm function body with a serialized sequence of operators that
 /// mirror Wasm opcodes in every way *except* for locals corresponding
@ -420,286 +418,3 @@ impl<'a> SerializedBodyContext<'a> {
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct UseCountAnalysis {
    toplevel: FxHashSet<Value>,
    use_count: Vec</* Value, */ usize>,
 }
 impl UseCountAnalysis {
    fn compute(f: &FunctionBody) -> UseCountAnalysis {
        let n_values = f.values.len();
        let mut counts = UseCountAnalysis {
            use_count: vec![0; n_values],
            toplevel: FxHashSet::default(),
        };
        let mut workqueue = VecDeque::new();
        let mut workqueue_set = FxHashSet::default();
        for block in 0..f.blocks.len() {
            for &value in &f.blocks[block].insts {
                let value = f.resolve_alias(value);
                if workqueue_set.insert(value) {
                    workqueue.push_back(value);
                }
                counts.toplevel.insert(value);
            }
            f.blocks[block].terminator.visit_uses(|value| {
                let value = f.resolve_alias(value);
                if workqueue_set.insert(value) {
                    workqueue.push_back(value);
                }
            });
            while let Some(value) = workqueue.pop_front() {
                workqueue_set.remove(&value);
                counts.add(value);
                match &f.values[value.index()] {
                    &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {}
                    &ValueDef::Operator(_op, ref args) => {
                        for &arg in args {
                            let arg = f.resolve_alias(arg);
                            if counts.use_count[arg.index()] == 0 {
                                if workqueue_set.insert(arg) {
                                    workqueue.push_back(arg);
                                }
                            }
                        }
                    }
                    &ValueDef::PickOutput(value, _) => {
                        let value = f.resolve_alias(value);
                        if counts.use_count[value.index()] == 0 {
                            if workqueue_set.insert(value) {
                                workqueue.push_back(value);
                            }
                        }
                    }
                    &ValueDef::Placeholder => {
                        panic!("Unresolved placeholder for value {}", value);
                    }
                }
            }
        }
        counts
    }
    fn add(&mut self, value: Value) {
        self.use_count[value.index()] += 1;
    }
 }
 #[derive(Clone, Debug, Default)]
 pub struct Schedule {
    /// Output: location at which to compute each value.
    pub location: Vec</* Value, */ Location>,
    /// Output: for each toplevel value, all values that are computed
    /// after it is.
    pub compute_after_value: FxHashMap<Value, Vec<Value>>,
    /// Output: all values ready at the top of a given block.
    pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
 }
 pub struct SchedulerContext<'a> {
    /// The schedule we are constructing.
    schedule: &'a mut Schedule,
    /// In-progress state: for each value, the values that have one
    /// more ready input once that value is computed.
    waiting_on_value: FxHashMap<Value, Vec<Value>>,
    /// In-progress state: for each value, how many inputs need to
    /// become ready.
    remaining_inputs: FxHashMap<Value, usize>,
    /// In-progress state: all values that are ready to be scheduled.
    ready: Vec<Value>,
    /// Input context: CFG.
    cfg: &'a CFGInfo,
    /// Input context: function body.
    f: &'a FunctionBody,
 }
 /// Locations are denoted by top-level values (those in `insts`),
 /// which are those with a side-effect; the sea-of-nodes
 /// representation for all other value nodes allows them to be
 /// computed anywhere dominated by all operands and that dominates all
 /// uses, so we have significant flexibility. We denote a location as
 /// "after a toplevel", then in the second pass where we actually
 /// generate operators according to stack discipline, we resolve the
 /// order for all values at a given toplevel.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum Location {
    /// At a separate top-level location.
    Toplevel,
    /// After a given value.
    After(Value),
    /// At the top of a given block.
    BlockTop(BlockId),
    /// Not yet scheduled.
    None,
 }
 impl Schedule {
    pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
        let mut schedule = Schedule::default();
        schedule.location = vec![Location::None; f.values.len()];
        log::trace!("f: {:?}", f);
        log::trace!("cfg: {:?}", cfg);
        log::trace!("uses: {:?}", uses);
        let mut ctx = SchedulerContext {
            schedule: &mut schedule,
            f,
            cfg,
            waiting_on_value: FxHashMap::default(),
            remaining_inputs: FxHashMap::default(),
            ready: vec![],
        };
        // Prepare the "waiting on value", "remaining inputs", and
        // "ready" vectors.
        for (value, value_def) in f.values() {
            if uses.use_count[value.index()] == 0 {
                continue;
            }
            if uses.toplevel.contains(&value) {
                continue;
            }
            match value_def {
                &ValueDef::Operator(op, ref operands) => {
                    if operands.len() == 0 {
                        if !op_rematerialize(&op) {
                            log::trace!("immediately ready: v{}", value.index());
                            ctx.ready.push(value);
                        }
                    } else {
                        log::trace!("v{} waiting on {:?}", value.index(), operands);
                        ctx.remaining_inputs.insert(value, operands.len());
                        for &input in operands {
                            let input = f.resolve_alias(input);
                            ctx.waiting_on_value
                                .entry(input)
                                .or_insert_with(|| vec![])
                                .push(value);
                        }
                    }
                }
                &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
                    let v = f.resolve_alias(v);
                    ctx.remaining_inputs.insert(value, 1);
                    ctx.waiting_on_value
                        .entry(v)
                        .or_insert_with(|| vec![])
                        .push(value);
                }
                _ => {}
            }
        }
        // Traverse blocks in RPO. When we schedule a given op, we've
        // already scheduled all of its operands, so we can find the
        // right place for it without any sort of backtracking or
        // fixpoint convergence.
        //
        // - Values in `insts` (toplevel operations)
        //   are scheduled at their locations. All side-effecting ops
        //   are in this category, and hence never experience
        //   code-motion relative to other side-effecting ops or
        //   control flow.
        //
        // - Otherwise, values are scheduled after their last operand
        //   is ready. All operands must have been computed by the
        //   time we reach a given operator in RPO, and each operand's
        //   scheduled site must dominate the current location
        //   (toplevel value). Because the dominance relation forms a
        //   tree structure (the domtree), for any two operand def
        //   sites X and Y to the current location L, given X dom L
        //   and Y dom L, either X dom Y or Y dom X. Thus, consider
        //   the current-best and each new operand in pairs, and pick
        //   the one that is dominated by the other.
        for &block in cfg.postorder.iter().rev() {
            for &(_, param) in &f.blocks[block].params {
                log::trace!("block{}: param v{}", block, param.index());
                ctx.wake_dependents(param);
            }
            ctx.sched_ready_at_block_top(block);
            for &inst in &f.blocks[block].insts {
                log::trace!("block{}: toplevel v{}", block, inst.index());
                ctx.sched_toplevel(inst);
                ctx.sched_ready_after_value(inst);
            }
        }
        schedule
    }
 }
 impl<'a> SchedulerContext<'a> {
    fn sched_toplevel(&mut self, v: Value) {
        log::trace!("sched_toplevel: v{}", v.index());
        assert_eq!(self.schedule.location[v.index()], Location::None);
        self.schedule.location[v.index()] = Location::Toplevel;
        self.wake_dependents(v);
    }
    fn sched_ready_after_value(&mut self, v: Value) {
        log::trace!("sched_ready_after_value: toplevel v{}", v.index());
        while !self.ready.is_empty() {
            for ready in std::mem::take(&mut self.ready) {
                log::trace!(
                    "sched_ready_after_value: toplevel v{} -> v{} now ready",
                    v.index(),
                    ready.index()
                );
                self.schedule.location[ready.index()] = Location::After(v);
                self.schedule
                    .compute_after_value
                    .entry(v)
                    .or_insert_with(|| vec![])
                    .push(ready);
                self.wake_dependents(ready);
            }
        }
    }
    fn sched_ready_at_block_top(&mut self, block: BlockId) {
        log::trace!("ready_at_block_top: block{}", block);
        while !self.ready.is_empty() {
            for ready in std::mem::take(&mut self.ready) {
                log::trace!(
                    "ready_at_block_top: block{} -> ready: v{}",
                    block,
                    ready.index()
                );
                self.schedule.location[ready.index()] = Location::BlockTop(block);
                self.schedule
                    .compute_at_top_of_block
                    .entry(block)
                    .or_insert_with(|| vec![])
                    .push(ready);
                self.wake_dependents(ready);
            }
        }
    }
    fn wake_dependents(&mut self, v: Value) {
        log::trace!("wake_dependents: v{}", v.index());
        let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
        for dependent in dependents {
            let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
            *remaining -= 1;
            log::trace!(
                " -> v{} wakes dependent v{}; remaining now {}",
                v.index(),
                dependent.index(),
                *remaining
            );
            if *remaining == 0 {
                self.remaining_inputs.remove(&dependent);
                self.ready.push(dependent);
                self.wake_dependents(dependent);
            }
        }
    }
 }
--- a/src/backend/structured.rs
+++ b/src/backend/structured.rs
--- a/src/backend/use_count.rs
+++ b/src/backend/use_count.rs
@ -0,0 +1,75 @@
 //! Use-count analysis.
 use std::collections::VecDeque;
 use crate::{Value, FunctionBody, ValueDef};
 use fxhash::FxHashSet;
 #[derive(Clone, Debug)]
 pub struct UseCountAnalysis {
    pub(crate) toplevel: FxHashSet<Value>,
    pub(crate) use_count: Vec</* Value, */ usize>,
 }
 impl UseCountAnalysis {
    pub(crate) fn compute(f: &FunctionBody) -> UseCountAnalysis {
        let n_values = f.values.len();
        let mut counts = UseCountAnalysis {
            use_count: vec![0; n_values],
            toplevel: FxHashSet::default(),
        };
        let mut workqueue = VecDeque::new();
        let mut workqueue_set = FxHashSet::default();
        for block in 0..f.blocks.len() {
            for &value in &f.blocks[block].insts {
                let value = f.resolve_alias(value);
                if workqueue_set.insert(value) {
                    workqueue.push_back(value);
                }
                counts.toplevel.insert(value);
            }
            f.blocks[block].terminator.visit_uses(|value| {
                let value = f.resolve_alias(value);
                if workqueue_set.insert(value) {
                    workqueue.push_back(value);
                }
            });
            while let Some(value) = workqueue.pop_front() {
                workqueue_set.remove(&value);
                counts.add(value);
                match &f.values[value.index()] {
                    &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {}
                    &ValueDef::Operator(_op, ref args) => {
                        for &arg in args {
                            let arg = f.resolve_alias(arg);
                            if counts.use_count[arg.index()] == 0 {
                                if workqueue_set.insert(arg) {
                                    workqueue.push_back(arg);
                                }
                            }
                        }
                    }
                    &ValueDef::PickOutput(value, _) => {
                        let value = f.resolve_alias(value);
                        if counts.use_count[value.index()] == 0 {
                            if workqueue_set.insert(value) {
                                workqueue.push_back(value);
                            }
                        }
                    }
                    &ValueDef::Placeholder => {
                        panic!("Unresolved placeholder for value {}", value);
                    }
                }
            }
        }
        counts
    }
    fn add(&mut self, value: Value) {
        self.use_count[value.index()] += 1;
    }
 }
--- a/src/cfg/mod.rs
+++ b/src/cfg/mod.rs
@ -8,8 +8,6 @@ use smallvec::SmallVec;
 pub mod domtree;
 pub mod postorder;
 pub mod serialize;
 pub mod structured;
 #[derive(Clone, Debug)]
 pub struct CFGInfo {
--- a/src/ir.rs
+++ b/src/ir.rs
@ -3,11 +3,8 @@
 use std::collections::hash_map::Entry;
 use crate::{
-    cfg::{
+    backend::{BlockOrder, LoopNest, SerializedBody, WasmRegion},
-        serialize::SerializedBody,
+    cfg::CFGInfo,
        structured::{BlockOrder, LoopNest, WasmRegion},
        CFGInfo,
    },
    frontend, Operator,
 };
 use anyhow::Result;