From 57693e592cb53873ef8c7d366ccca5d2a33110a7 Mon Sep 17 00:00:00 2001
From: Chris Fallin <chris@cfallin.org>
Date: Thu, 23 Dec 2021 20:05:36 -0800
Subject: [PATCH] cleanup and move code to backend/

---
 src/backend/mod.rs                 |   9 +
 src/backend/schedule.rs            | 220 ++++++++++++++++++++++
 src/{cfg => backend}/serialize.rs  | 293 +----------------------------
 src/{cfg => backend}/structured.rs |   0
 src/backend/use_count.rs           |  75 ++++++++
 src/cfg/mod.rs                     |   2 -
 src/ir.rs                          |   7 +-
 7 files changed, 310 insertions(+), 296 deletions(-)
 create mode 100644 src/backend/schedule.rs
 rename src/{cfg => backend}/serialize.rs (57%)
 rename src/{cfg => backend}/structured.rs (100%)
 create mode 100644 src/backend/use_count.rs
diff --git a/src/backend/mod.rs b/src/backend/mod.rs
index edafad3..26f7951 100644
--- a/src/backend/mod.rs
+++ b/src/backend/mod.rs
@@ -1,4 +1,13 @@
 //! Backend: IR to Wasm.
 
+mod structured;
+pub use structured::*;
+mod use_count;
+pub use use_count::*;
+mod schedule;
+pub use schedule::*;
+mod serialize;
+pub use serialize::*;
+
 mod locations;
 
diff --git a/src/backend/schedule.rs b/src/backend/schedule.rs
new file mode 100644
index 0000000..bc44cf7
--- /dev/null
+++ b/src/backend/schedule.rs
@@ -0,0 +1,220 @@
+//! Op scheduling.
+
+use fxhash::FxHashMap;
+
+use super::UseCountAnalysis;
+use crate::{cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Value, ValueDef};
+
+#[derive(Clone, Debug, Default)]
+pub struct Schedule {
+    /// Output: location at which to compute each value.
+    pub location: Vec</* Value, */ Location>,
+    /// Output: for each toplevel value, all values that are computed
+    /// after it is.
+    pub compute_after_value: FxHashMap<Value, Vec<Value>>,
+    /// Output: all values ready at the top of a given block.
+    pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
+}
+
+pub struct SchedulerContext<'a> {
+    /// The schedule we are constructing.
+    schedule: &'a mut Schedule,
+    /// In-progress state: for each value, the values that have one
+    /// more ready input once that value is computed.
+    waiting_on_value: FxHashMap<Value, Vec<Value>>,
+    /// In-progress state: for each value, how many inputs need to
+    /// become ready.
+    remaining_inputs: FxHashMap<Value, usize>,
+    /// In-progress state: all values that are ready to be scheduled.
+    ready: Vec<Value>,
+    /// Input context: CFG.
+    cfg: &'a CFGInfo,
+    /// Input context: function body.
+    f: &'a FunctionBody,
+}
+
+/// Locations are denoted by top-level values (those in `insts`),
+/// which are those with a side-effect; the sea-of-nodes
+/// representation for all other value nodes allows them to be
+/// computed anywhere dominated by all operands and that dominates all
+/// uses, so we have significant flexibility. We denote a location as
+/// "after a toplevel", then in the second pass where we actually
+/// generate operators according to stack discipline, we resolve the
+/// order for all values at a given toplevel.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Location {
+    /// At a separate top-level location.
+    Toplevel,
+    /// After a given value.
+    After(Value),
+    /// At the top of a given block.
+    BlockTop(BlockId),
+    /// Not yet scheduled.
+    None,
+}
+
+impl Schedule {
+    pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
+        let mut schedule = Schedule::default();
+        schedule.location = vec![Location::None; f.values.len()];
+
+        log::trace!("f: {:?}", f);
+        log::trace!("cfg: {:?}", cfg);
+        log::trace!("uses: {:?}", uses);
+
+        let mut ctx = SchedulerContext {
+            schedule: &mut schedule,
+            f,
+            cfg,
+            waiting_on_value: FxHashMap::default(),
+            remaining_inputs: FxHashMap::default(),
+            ready: vec![],
+        };
+
+        // Prepare the "waiting on value", "remaining inputs", and
+        // "ready" vectors.
+        for (value, value_def) in f.values() {
+            if uses.use_count[value.index()] == 0 {
+                continue;
+            }
+            if uses.toplevel.contains(&value) {
+                continue;
+            }
+            match value_def {
+                &ValueDef::Operator(op, ref operands) => {
+                    if operands.len() == 0 {
+                        if !op_rematerialize(&op) {
+                            log::trace!("immediately ready: v{}", value.index());
+                            ctx.ready.push(value);
+                        }
+                    } else {
+                        log::trace!("v{} waiting on {:?}", value.index(), operands);
+                        ctx.remaining_inputs.insert(value, operands.len());
+                        for &input in operands {
+                            let input = f.resolve_alias(input);
+                            ctx.waiting_on_value
+                                .entry(input)
+                                .or_insert_with(|| vec![])
+                                .push(value);
+                        }
+                    }
+                }
+                &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
+                    let v = f.resolve_alias(v);
+                    ctx.remaining_inputs.insert(value, 1);
+                    ctx.waiting_on_value
+                        .entry(v)
+                        .or_insert_with(|| vec![])
+                        .push(value);
+                }
+                _ => {}
+            }
+        }
+
+        // Traverse blocks in RPO. When we schedule a given op, we've
+        // already scheduled all of its operands, so we can find the
+        // right place for it without any sort of backtracking or
+        // fixpoint convergence.
+        //
+        // - Values in `insts` (toplevel operations)
+        //   are scheduled at their locations. All side-effecting ops
+        //   are in this category, and hence never experience
+        //   code-motion relative to other side-effecting ops or
+        //   control flow.
+        //
+        // - Otherwise, values are scheduled after their last operand
+        //   is ready. All operands must have been computed by the
+        //   time we reach a given operator in RPO, and each operand's
+        //   scheduled site must dominate the current location
+        //   (toplevel value). Because the dominance relation forms a
+        //   tree structure (the domtree), for any two operand def
+        //   sites X and Y to the current location L, given X dom L
+        //   and Y dom L, either X dom Y or Y dom X. Thus, consider
+        //   the current-best and each new operand in pairs, and pick
+        //   the one that is dominated by the other.
+
+        for &block in cfg.postorder.iter().rev() {
+            for &(_, param) in &f.blocks[block].params {
+                log::trace!("block{}: param v{}", block, param.index());
+                ctx.wake_dependents(param);
+            }
+            ctx.sched_ready_at_block_top(block);
+            for &inst in &f.blocks[block].insts {
+                log::trace!("block{}: toplevel v{}", block, inst.index());
+                ctx.sched_toplevel(inst);
+                ctx.sched_ready_after_value(inst);
+            }
+        }
+
+        schedule
+    }
+}
+
+impl<'a> SchedulerContext<'a> {
+    fn sched_toplevel(&mut self, v: Value) {
+        log::trace!("sched_toplevel: v{}", v.index());
+        assert_eq!(self.schedule.location[v.index()], Location::None);
+        self.schedule.location[v.index()] = Location::Toplevel;
+        self.wake_dependents(v);
+    }
+
+    fn sched_ready_after_value(&mut self, v: Value) {
+        log::trace!("sched_ready_after_value: toplevel v{}", v.index());
+        while !self.ready.is_empty() {
+            for ready in std::mem::take(&mut self.ready) {
+                log::trace!(
+                    "sched_ready_after_value: toplevel v{} -> v{} now ready",
+                    v.index(),
+                    ready.index()
+                );
+                self.schedule.location[ready.index()] = Location::After(v);
+                self.schedule
+                    .compute_after_value
+                    .entry(v)
+                    .or_insert_with(|| vec![])
+                    .push(ready);
+                self.wake_dependents(ready);
+            }
+        }
+    }
+
+    fn sched_ready_at_block_top(&mut self, block: BlockId) {
+        log::trace!("ready_at_block_top: block{}", block);
+        while !self.ready.is_empty() {
+            for ready in std::mem::take(&mut self.ready) {
+                log::trace!(
+                    "ready_at_block_top: block{} -> ready: v{}",
+                    block,
+                    ready.index()
+                );
+                self.schedule.location[ready.index()] = Location::BlockTop(block);
+                self.schedule
+                    .compute_at_top_of_block
+                    .entry(block)
+                    .or_insert_with(|| vec![])
+                    .push(ready);
+                self.wake_dependents(ready);
+            }
+        }
+    }
+
+    fn wake_dependents(&mut self, v: Value) {
+        log::trace!("wake_dependents: v{}", v.index());
+        let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
+        for dependent in dependents {
+            let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
+            *remaining -= 1;
+            log::trace!(
+                " -> v{} wakes dependent v{}; remaining now {}",
+                v.index(),
+                dependent.index(),
+                *remaining
+            );
+            if *remaining == 0 {
+                self.remaining_inputs.remove(&dependent);
+                self.ready.push(dependent);
+                self.wake_dependents(dependent);
+            }
+        }
+    }
+}
diff --git a/src/cfg/serialize.rs b/src/backend/serialize.rs
similarity index 57%
rename from src/cfg/serialize.rs
rename to src/backend/serialize.rs
index b1fa972..4d0ae2e 100644
--- a/src/cfg/serialize.rs
+++ b/src/backend/serialize.rs
@@ -3,17 +3,15 @@
 //! in Wasm function body. Contains everything needed to emit Wasm
 //! except for value locations (and corresponding local spill/reloads).
 
-use std::collections::VecDeque;
-
-use fxhash::{FxHashMap, FxHashSet};
-
 use super::{
     structured::{BlockOrder, BlockOrderEntry},
-    CFGInfo,
+    Schedule, UseCountAnalysis,
 };
 use crate::{
-    op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value, ValueDef,
+    cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value,
+    ValueDef,
 };
+use fxhash::FxHashSet;
 
 /// A Wasm function body with a serialized sequence of operators that
 /// mirror Wasm opcodes in every way *except* for locals corresponding
@@ -420,286 +418,3 @@ impl<'a> SerializedBodyContext<'a> {
         }
     }
 }
-
-#[derive(Clone, Debug)]
-pub struct UseCountAnalysis {
-    toplevel: FxHashSet<Value>,
-    use_count: Vec</* Value, */ usize>,
-}
-
-impl UseCountAnalysis {
-    fn compute(f: &FunctionBody) -> UseCountAnalysis {
-        let n_values = f.values.len();
-        let mut counts = UseCountAnalysis {
-            use_count: vec![0; n_values],
-            toplevel: FxHashSet::default(),
-        };
-
-        let mut workqueue = VecDeque::new();
-        let mut workqueue_set = FxHashSet::default();
-        for block in 0..f.blocks.len() {
-            for &value in &f.blocks[block].insts {
-                let value = f.resolve_alias(value);
-                if workqueue_set.insert(value) {
-                    workqueue.push_back(value);
-                }
-                counts.toplevel.insert(value);
-            }
-            f.blocks[block].terminator.visit_uses(|value| {
-                let value = f.resolve_alias(value);
-                if workqueue_set.insert(value) {
-                    workqueue.push_back(value);
-                }
-            });
-
-            while let Some(value) = workqueue.pop_front() {
-                workqueue_set.remove(&value);
-                counts.add(value);
-                match &f.values[value.index()] {
-                    &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {}
-                    &ValueDef::Operator(_op, ref args) => {
-                        for &arg in args {
-                            let arg = f.resolve_alias(arg);
-                            if counts.use_count[arg.index()] == 0 {
-                                if workqueue_set.insert(arg) {
-                                    workqueue.push_back(arg);
-                                }
-                            }
-                        }
-                    }
-                    &ValueDef::PickOutput(value, _) => {
-                        let value = f.resolve_alias(value);
-                        if counts.use_count[value.index()] == 0 {
-                            if workqueue_set.insert(value) {
-                                workqueue.push_back(value);
-                            }
-                        }
-                    }
-                    &ValueDef::Placeholder => {
-                        panic!("Unresolved placeholder for value {}", value);
-                    }
-                }
-            }
-        }
-
-        counts
-    }
-
-    fn add(&mut self, value: Value) {
-        self.use_count[value.index()] += 1;
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct Schedule {
-    /// Output: location at which to compute each value.
-    pub location: Vec</* Value, */ Location>,
-    /// Output: for each toplevel value, all values that are computed
-    /// after it is.
-    pub compute_after_value: FxHashMap<Value, Vec<Value>>,
-    /// Output: all values ready at the top of a given block.
-    pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
-}
-
-pub struct SchedulerContext<'a> {
-    /// The schedule we are constructing.
-    schedule: &'a mut Schedule,
-    /// In-progress state: for each value, the values that have one
-    /// more ready input once that value is computed.
-    waiting_on_value: FxHashMap<Value, Vec<Value>>,
-    /// In-progress state: for each value, how many inputs need to
-    /// become ready.
-    remaining_inputs: FxHashMap<Value, usize>,
-    /// In-progress state: all values that are ready to be scheduled.
-    ready: Vec<Value>,
-    /// Input context: CFG.
-    cfg: &'a CFGInfo,
-    /// Input context: function body.
-    f: &'a FunctionBody,
-}
-
-/// Locations are denoted by top-level values (those in `insts`),
-/// which are those with a side-effect; the sea-of-nodes
-/// representation for all other value nodes allows them to be
-/// computed anywhere dominated by all operands and that dominates all
-/// uses, so we have significant flexibility. We denote a location as
-/// "after a toplevel", then in the second pass where we actually
-/// generate operators according to stack discipline, we resolve the
-/// order for all values at a given toplevel.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum Location {
-    /// At a separate top-level location.
-    Toplevel,
-    /// After a given value.
-    After(Value),
-    /// At the top of a given block.
-    BlockTop(BlockId),
-    /// Not yet scheduled.
-    None,
-}
-
-impl Schedule {
-    pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
-        let mut schedule = Schedule::default();
-        schedule.location = vec![Location::None; f.values.len()];
-
-        log::trace!("f: {:?}", f);
-        log::trace!("cfg: {:?}", cfg);
-        log::trace!("uses: {:?}", uses);
-
-        let mut ctx = SchedulerContext {
-            schedule: &mut schedule,
-            f,
-            cfg,
-            waiting_on_value: FxHashMap::default(),
-            remaining_inputs: FxHashMap::default(),
-            ready: vec![],
-        };
-
-        // Prepare the "waiting on value", "remaining inputs", and
-        // "ready" vectors.
-        for (value, value_def) in f.values() {
-            if uses.use_count[value.index()] == 0 {
-                continue;
-            }
-            if uses.toplevel.contains(&value) {
-                continue;
-            }
-            match value_def {
-                &ValueDef::Operator(op, ref operands) => {
-                    if operands.len() == 0 {
-                        if !op_rematerialize(&op) {
-                            log::trace!("immediately ready: v{}", value.index());
-                            ctx.ready.push(value);
-                        }
-                    } else {
-                        log::trace!("v{} waiting on {:?}", value.index(), operands);
-                        ctx.remaining_inputs.insert(value, operands.len());
-                        for &input in operands {
-                            let input = f.resolve_alias(input);
-                            ctx.waiting_on_value
-                                .entry(input)
-                                .or_insert_with(|| vec![])
-                                .push(value);
-                        }
-                    }
-                }
-                &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
-                    let v = f.resolve_alias(v);
-                    ctx.remaining_inputs.insert(value, 1);
-                    ctx.waiting_on_value
-                        .entry(v)
-                        .or_insert_with(|| vec![])
-                        .push(value);
-                }
-                _ => {}
-            }
-        }
-
-        // Traverse blocks in RPO. When we schedule a given op, we've
-        // already scheduled all of its operands, so we can find the
-        // right place for it without any sort of backtracking or
-        // fixpoint convergence.
-        //
-        // - Values in `insts` (toplevel operations)
-        //   are scheduled at their locations. All side-effecting ops
-        //   are in this category, and hence never experience
-        //   code-motion relative to other side-effecting ops or
-        //   control flow.
-        //
-        // - Otherwise, values are scheduled after their last operand
-        //   is ready. All operands must have been computed by the
-        //   time we reach a given operator in RPO, and each operand's
-        //   scheduled site must dominate the current location
-        //   (toplevel value). Because the dominance relation forms a
-        //   tree structure (the domtree), for any two operand def
-        //   sites X and Y to the current location L, given X dom L
-        //   and Y dom L, either X dom Y or Y dom X. Thus, consider
-        //   the current-best and each new operand in pairs, and pick
-        //   the one that is dominated by the other.
-
-        for &block in cfg.postorder.iter().rev() {
-            for &(_, param) in &f.blocks[block].params {
-                log::trace!("block{}: param v{}", block, param.index());
-                ctx.wake_dependents(param);
-            }
-            ctx.sched_ready_at_block_top(block);
-            for &inst in &f.blocks[block].insts {
-                log::trace!("block{}: toplevel v{}", block, inst.index());
-                ctx.sched_toplevel(inst);
-                ctx.sched_ready_after_value(inst);
-            }
-        }
-
-        schedule
-    }
-}
-
-impl<'a> SchedulerContext<'a> {
-    fn sched_toplevel(&mut self, v: Value) {
-        log::trace!("sched_toplevel: v{}", v.index());
-        assert_eq!(self.schedule.location[v.index()], Location::None);
-        self.schedule.location[v.index()] = Location::Toplevel;
-        self.wake_dependents(v);
-    }
-
-    fn sched_ready_after_value(&mut self, v: Value) {
-        log::trace!("sched_ready_after_value: toplevel v{}", v.index());
-        while !self.ready.is_empty() {
-            for ready in std::mem::take(&mut self.ready) {
-                log::trace!(
-                    "sched_ready_after_value: toplevel v{} -> v{} now ready",
-                    v.index(),
-                    ready.index()
-                );
-                self.schedule.location[ready.index()] = Location::After(v);
-                self.schedule
-                    .compute_after_value
-                    .entry(v)
-                    .or_insert_with(|| vec![])
-                    .push(ready);
-                self.wake_dependents(ready);
-            }
-        }
-    }
-
-    fn sched_ready_at_block_top(&mut self, block: BlockId) {
-        log::trace!("ready_at_block_top: block{}", block);
-        while !self.ready.is_empty() {
-            for ready in std::mem::take(&mut self.ready) {
-                log::trace!(
-                    "ready_at_block_top: block{} -> ready: v{}",
-                    block,
-                    ready.index()
-                );
-                self.schedule.location[ready.index()] = Location::BlockTop(block);
-                self.schedule
-                    .compute_at_top_of_block
-                    .entry(block)
-                    .or_insert_with(|| vec![])
-                    .push(ready);
-                self.wake_dependents(ready);
-            }
-        }
-    }
-
-    fn wake_dependents(&mut self, v: Value) {
-        log::trace!("wake_dependents: v{}", v.index());
-        let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
-        for dependent in dependents {
-            let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
-            *remaining -= 1;
-            log::trace!(
-                " -> v{} wakes dependent v{}; remaining now {}",
-                v.index(),
-                dependent.index(),
-                *remaining
-            );
-            if *remaining == 0 {
-                self.remaining_inputs.remove(&dependent);
-                self.ready.push(dependent);
-                self.wake_dependents(dependent);
-            }
-        }
-    }
-}
diff --git a/src/cfg/structured.rs b/src/backend/structured.rs
similarity index 100%
rename from src/cfg/structured.rs
rename to src/backend/structured.rs
diff --git a/src/backend/use_count.rs b/src/backend/use_count.rs
new file mode 100644
index 0000000..f5ecf9e
--- /dev/null
+++ b/src/backend/use_count.rs
@@ -0,0 +1,75 @@
+//! Use-count analysis.
+
+use std::collections::VecDeque;
+
+use crate::{Value, FunctionBody, ValueDef};
+use fxhash::FxHashSet;
+
+#[derive(Clone, Debug)]
+pub struct UseCountAnalysis {
+    pub(crate) toplevel: FxHashSet<Value>,
+    pub(crate) use_count: Vec</* Value, */ usize>,
+}
+
+impl UseCountAnalysis {
+    pub(crate) fn compute(f: &FunctionBody) -> UseCountAnalysis {
+        let n_values = f.values.len();
+        let mut counts = UseCountAnalysis {
+            use_count: vec![0; n_values],
+            toplevel: FxHashSet::default(),
+        };
+
+        let mut workqueue = VecDeque::new();
+        let mut workqueue_set = FxHashSet::default();
+        for block in 0..f.blocks.len() {
+            for &value in &f.blocks[block].insts {
+                let value = f.resolve_alias(value);
+                if workqueue_set.insert(value) {
+                    workqueue.push_back(value);
+                }
+                counts.toplevel.insert(value);
+            }
+            f.blocks[block].terminator.visit_uses(|value| {
+                let value = f.resolve_alias(value);
+                if workqueue_set.insert(value) {
+                    workqueue.push_back(value);
+                }
+            });
+
+            while let Some(value) = workqueue.pop_front() {
+                workqueue_set.remove(&value);
+                counts.add(value);
+                match &f.values[value.index()] {
+                    &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {}
+                    &ValueDef::Operator(_op, ref args) => {
+                        for &arg in args {
+                            let arg = f.resolve_alias(arg);
+                            if counts.use_count[arg.index()] == 0 {
+                                if workqueue_set.insert(arg) {
+                                    workqueue.push_back(arg);
+                                }
+                            }
+                        }
+                    }
+                    &ValueDef::PickOutput(value, _) => {
+                        let value = f.resolve_alias(value);
+                        if counts.use_count[value.index()] == 0 {
+                            if workqueue_set.insert(value) {
+                                workqueue.push_back(value);
+                            }
+                        }
+                    }
+                    &ValueDef::Placeholder => {
+                        panic!("Unresolved placeholder for value {}", value);
+                    }
+                }
+            }
+        }
+
+        counts
+    }
+
+    fn add(&mut self, value: Value) {
+        self.use_count[value.index()] += 1;
+    }
+}
diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs
index 452106a..0c3d7b2 100644
--- a/src/cfg/mod.rs
+++ b/src/cfg/mod.rs
@@ -8,8 +8,6 @@ use smallvec::SmallVec;
 
 pub mod domtree;
 pub mod postorder;
-pub mod serialize;
-pub mod structured;
 
 #[derive(Clone, Debug)]
 pub struct CFGInfo {
diff --git a/src/ir.rs b/src/ir.rs
index 2ba2a02..9100280 100644
--- a/src/ir.rs
+++ b/src/ir.rs
@@ -3,11 +3,8 @@
 use std::collections::hash_map::Entry;
 
 use crate::{
-    cfg::{
-        serialize::SerializedBody,
-        structured::{BlockOrder, LoopNest, WasmRegion},
-        CFGInfo,
-    },
+    backend::{BlockOrder, LoopNest, SerializedBody, WasmRegion},
+    cfg::CFGInfo,
     frontend, Operator,
 };
 use anyhow::Result;