From f49a7c6b6c3491cac67fa927908b6e9a0fbb0013 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 23 Dec 2021 18:23:42 -0800 Subject: [PATCH] WIP. --- src/cfg/serialize.rs | 582 +++++++++++++++++++++++++++++++++++++++++-- src/frontend.rs | 22 +- src/ir.rs | 33 ++- 3 files changed, 587 insertions(+), 50 deletions(-) diff --git a/src/cfg/serialize.rs b/src/cfg/serialize.rs index 0232d2a..af03a99 100644 --- a/src/cfg/serialize.rs +++ b/src/cfg/serialize.rs @@ -3,12 +3,20 @@ //! in Wasm function body. Contains everything needed to emit Wasm //! except for value locations (and corresponding local spill/reloads). +use std::collections::VecDeque; + +use fxhash::{FxHashMap, FxHashSet}; + use super::{ structured::{BlockOrder, BlockOrderEntry, BlockOrderTarget}, CFGInfo, }; -use crate::{BlockId, FunctionBody, Value}; +use crate::{BlockId, FunctionBody, Operator, Terminator, Value, ValueDef}; +/// A Wasm function body with a serialized sequence of operators that +/// mirror Wasm opcodes in every way *except* for locals corresponding +/// to SSA values. This is a sort of "pre-regalloc" representation of +/// the final code. #[derive(Clone, Debug)] pub struct SerializedBody { pub(crate) operators: Vec, @@ -16,8 +24,8 @@ pub struct SerializedBody { #[derive(Clone, Debug)] pub enum SerializedBlockTarget { - Fallthrough(Vec), - Branch(usize, Vec), + Fallthrough(Vec), + Branch(usize, Vec), } #[derive(Clone, Debug)] @@ -28,7 +36,7 @@ pub enum SerializedOperator { }, StartLoop { header: BlockId, - param: Vec<(wasmparser::Type, Value)>, + params: Vec<(wasmparser::Type, Value)>, }, Br(SerializedBlockTarget), BrIf { @@ -41,43 +49,563 @@ pub enum SerializedOperator { targets: Vec, default: SerializedBlockTarget, }, - Operator(Value), + /// Compute the given value. Stack discipline will be maintained: + /// all operands will be computed or fetched via `Get` and all + /// produced results will be used directly or stored via `Set`. + Operator(Operator), + /// Get the given value from the local corresponding to the + /// `Value`'s n'th result. + Get(Value, usize), + /// Set the local corresponding to the `Value`'s n'th result, + /// consuming the value on the stack. + Set(Value, usize), + /// Set the value, like `Set`, but without consuming it from the + /// stack. + Tee(Value, usize), + /// Get the given function argument. + GetArg(usize), End, } +impl SerializedOperator { + pub fn visit_value_locals( + &self, + mut r: R, + mut w: W, + ) { + match self { + &SerializedOperator::Br(ref target) => { + target.visit_value_locals(&mut r, &mut w); + } + &SerializedOperator::BrIf { + cond, + ref if_true, + ref if_false, + } => { + r(cond, 0); + if_true.visit_value_locals(&mut r, &mut w); + if_false.visit_value_locals(&mut r, &mut w); + } + &SerializedOperator::BrTable { + index, + ref default, + ref targets, + } => { + r(index, 0); + default.visit_value_locals(&mut r, &mut w); + for target in targets { + target.visit_value_locals(&mut r, &mut w); + } + } + &SerializedOperator::Get(v, i) => { + r(v, i); + } + &SerializedOperator::Set(v, i) | &SerializedOperator::Tee(v, i) => { + w(v, i); + } + &SerializedOperator::StartBlock { ref params, .. } + | &SerializedOperator::StartLoop { ref params, .. } => { + for &(_, value) in params { + w(value, 0); + } + } + &SerializedOperator::GetArg(..) + | &SerializedOperator::Operator(..) + | &SerializedOperator::End => {} + } + } +} + +impl SerializedBlockTarget { + fn visit_value_locals( + &self, + r: &mut R, + w: &mut W, + ) { + match self { + &SerializedBlockTarget::Branch(_, ref ops) + | &SerializedBlockTarget::Fallthrough(ref ops) => { + for op in ops { + op.visit_value_locals(|value, i| r(value, i), |value, i| w(value, i)); + } + } + } + } +} + +struct SerializedBodyContext<'a> { + f: &'a FunctionBody, + cfg: &'a CFGInfo, + uses: &'a UseCountAnalysis, + schedule: &'a Schedule, + operators: Vec, +} + impl SerializedBody { pub fn compute(f: &FunctionBody, cfg: &CFGInfo, order: &BlockOrder) -> SerializedBody { - let mut operators = vec![]; + let uses = UseCountAnalysis::compute(f); + let schedule = Schedule::compute(f, cfg, &uses); + let mut ctx = SerializedBodyContext { + f, + cfg, + uses: &uses, + schedule: &schedule, + operators: vec![], + }; for entry in &order.entries { - Self::compute_entry(f, cfg, entry, &mut operators); + ctx.compute_entry(entry); + } + SerializedBody { + operators: ctx.operators, } - SerializedBody { operators } } +} - fn compute_entry( - f: &FunctionBody, - cfg: &CFGInfo, - entry: &BlockOrderEntry, - operators: &mut Vec, - ) { +impl<'a> SerializedBodyContext<'a> { + fn compute_entry(&mut self, entry: &BlockOrderEntry) { match entry { - &BlockOrderEntry::StartBlock(header, ref params) => { - operators.push(SerializedOperator::StartBlock { - header, - params: params.clone(), - }); - } - &BlockOrderEntry::StartLoop(header, ref params) => { - operators.push(SerializedOperator::StartBlock { - header, - params: params.clone(), - }); + &BlockOrderEntry::StartBlock(header, ref params) + | &BlockOrderEntry::StartLoop(header, ref params) => { + let is_loop = match entry { + &BlockOrderEntry::StartLoop(..) => true, + _ => false, + }; + + if is_loop { + self.operators.push(SerializedOperator::StartLoop { + header, + params: params.clone(), + }); + } else { + self.operators.push(SerializedOperator::StartBlock { + header, + params: params.clone(), + }); + } + + // Save params that are on the stack into + // locals. TODO: reuse one or more values immediately + // if ready-to-schedule ops can use them. + for &(_, value) in params.iter().rev() { + self.operators.push(SerializedOperator::Set(value, 0)); + } } &BlockOrderEntry::End => { - operators.push(SerializedOperator::End); + self.operators.push(SerializedOperator::End); } &BlockOrderEntry::BasicBlock(block, ref targets) => { - todo!() + // Schedule ops. First handle the compute-at-top ones. + if let Some(compute_at_top) = self.schedule.compute_at_top_of_block.get(&block) { + self.schedule_ops(None, &compute_at_top[..]); + } + + // Next schedule all toplevels, and values ready to + // schedule after each one. + for &inst in &self.f.blocks[block].insts { + if let Some(after) = self.schedule.compute_after_value.get(&inst) { + self.schedule_ops(Some(inst), &after[..]); + } + } + + // For each BlockOrderTarget, compute a SerializedBlockTarget. + let targets = targets + .iter() + .map(|target| { + let mut rev_ops = vec![]; + for &value in target.args.iter().rev() { + self.push_value(value, &mut rev_ops); + } + rev_ops.reverse(); + match target.relative_branch { + Some(branch) => SerializedBlockTarget::Branch(branch, rev_ops), + None => SerializedBlockTarget::Fallthrough(rev_ops), + } + }) + .collect::>(); + + // Finally, generate branch ops. + match &self.f.blocks[block].terminator { + &Terminator::Br { .. } => { + let target = targets.into_iter().next().unwrap(); + self.operators.push(SerializedOperator::Br(target)); + } + &Terminator::CondBr { cond, .. } => { + let mut iter = targets.into_iter(); + let if_true = iter.next().unwrap(); + let if_false = iter.next().unwrap(); + self.operators.push(SerializedOperator::BrIf { + cond, + if_true, + if_false, + }); + } + &Terminator::Select { value, .. } => { + let mut iter = targets.into_iter(); + let default = iter.next().unwrap(); + let targets = iter.collect::>(); + self.operators.push(SerializedOperator::BrTable { + index: value, + targets, + default, + }); + } + &Terminator::Return { ref values, .. } => { + let mut rev_ops = vec![]; + for &value in values.iter().rev() { + self.push_value(value, &mut rev_ops); + } + rev_ops.reverse(); + self.operators.extend(rev_ops.into_iter()); + self.operators + .push(SerializedOperator::Operator(Operator::Return)); + } + &Terminator::None => { + self.operators + .push(SerializedOperator::Operator(Operator::Unreachable)); + } + } + } + } + } + + fn schedule_ops(&mut self, toplevel: Option, values: &[Value]) { + // Work backward, generating values in the appropriate order + // on the stack if single-use. + let mut rev_ops = vec![]; + let mut to_compute = values + .iter() + .chain(toplevel.iter()) + .cloned() + .collect::>(); + for &value in values.iter().rev() { + self.schedule_op( + value, + &mut rev_ops, + /* leave_value_on_stack = */ false, + &mut to_compute, + ); + } + if let Some(toplevel) = toplevel { + self.schedule_op( + toplevel, + &mut rev_ops, + /* leave_value_on_stack = */ false, + &mut to_compute, + ); + } + rev_ops.reverse(); + self.operators.extend(rev_ops.into_iter()); + } + + fn push_value(&mut self, v: Value, rev_ops: &mut Vec) { + match &self.f.values[v.index()] { + &ValueDef::PickOutput(v, i) => { + rev_ops.push(SerializedOperator::Get(v, i)); + } + &ValueDef::Arg(i) => { + rev_ops.push(SerializedOperator::GetArg(i)); + } + _ => { + rev_ops.push(SerializedOperator::Get(v, 0)); + } + } + } + + fn schedule_op( + &mut self, + op: Value, + rev_ops: &mut Vec, + leave_value_on_stack: bool, + to_compute: &mut FxHashSet, + ) { + let op = self.f.resolve_alias(op); + if !to_compute.remove(&op) { + if leave_value_on_stack { + self.push_value(op, rev_ops); + } + return; + } + + let (operator, operands) = match &self.f.values[op.index()] { + &ValueDef::Operator(op, ref operands) => (op, operands), + _ => { + return; + } + }; + + // We're generating ops in reverse order. So we must first + // store value. + for i in (0..self.f.types[op.index()].len()).rev() { + if !leave_value_on_stack { + rev_ops.push(SerializedOperator::Set(op, i)); + } else { + assert_eq!(i, 0); + if self.uses.use_count[op.index()] > 1 { + rev_ops.push(SerializedOperator::Tee(op, i)); + } + } + } + + rev_ops.push(SerializedOperator::Operator(operator)); + + // Now push the args in reverse order. + for &arg in operands.iter().rev() { + match &self.f.values[arg.index()] { + &ValueDef::Operator(..) => { + if self.uses.use_count[arg.index()] == 1 && self.f.types[arg.index()].len() == 1 + { + self.schedule_op( + arg, rev_ops, /* leave_on_stack = */ true, to_compute, + ); + } else { + self.push_value(arg, rev_ops); + } + } + _ => { + self.push_value(arg, rev_ops); + } + } + } + } +} + +#[derive(Clone, Debug)] +pub struct UseCountAnalysis { + toplevel: FxHashSet, + use_count: Vec, +} + +impl UseCountAnalysis { + fn compute(f: &FunctionBody) -> UseCountAnalysis { + let n_values = f.values.len(); + let mut counts = UseCountAnalysis { + use_count: vec![0; n_values], + toplevel: FxHashSet::default(), + }; + + let mut workqueue = VecDeque::new(); + let mut workqueue_set = FxHashSet::default(); + for block in 0..f.blocks.len() { + for &value in &f.blocks[block].insts { + let value = f.resolve_alias(value); + if workqueue_set.insert(value) { + workqueue.push_back(value); + } + counts.toplevel.insert(value); + } + + while let Some(value) = workqueue.pop_front() { + workqueue_set.remove(&value); + counts.add(value); + match &f.values[value.index()] { + &ValueDef::Alias(..) | &ValueDef::Arg(..) | &ValueDef::BlockParam(..) => {} + &ValueDef::Operator(_op, ref args) => { + for &arg in args { + let arg = f.resolve_alias(arg); + if counts.use_count[arg.index()] == 0 { + if workqueue_set.insert(arg) { + workqueue.push_back(arg); + } + } + } + } + &ValueDef::PickOutput(value, _) => { + let value = f.resolve_alias(value); + if counts.use_count[value.index()] == 0 { + if workqueue_set.insert(value) { + workqueue.push_back(value); + } + } + } + &ValueDef::Placeholder => { + panic!("Unresolved placeholder for value {}", value); + } + } + } + } + + counts + } + + fn add(&mut self, value: Value) { + self.use_count[value.index()] += 1; + } +} + +#[derive(Clone, Debug, Default)] +pub struct Schedule { + /// Output: location at which to compute each value. + pub location: Vec, + /// Output: for each toplevel value, all values that are computed + /// after it is. + pub compute_after_value: FxHashMap>, + /// Output: all values ready at the top of a given block. + pub compute_at_top_of_block: FxHashMap>, +} + +pub struct SchedulerContext<'a> { + /// The schedule we are constructing. + schedule: &'a mut Schedule, + /// In-progress state: for each value, the values that have one + /// more ready input once that value is computed. + waiting_on_value: FxHashMap>, + /// In-progress state: for each value, how many inputs need to + /// become ready. + remaining_inputs: FxHashMap, + /// In-progress state: all values that are ready to be scheduled. + ready: Vec, + /// Input context: CFG. + cfg: &'a CFGInfo, + /// Input context: function body. + f: &'a FunctionBody, +} + +/// Locations are denoted by top-level values (those in `insts`), +/// which are those with a side-effect; the sea-of-nodes +/// representation for all other value nodes allows them to be +/// computed anywhere dominated by all operands and that dominates all +/// uses, so we have significant flexibility. We denote a location as +/// "after a toplevel", then in the second pass where we actually +/// generate operators according to stack discipline, we resolve the +/// order for all values at a given toplevel. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Location { + /// At a separate top-level location. + Toplevel, + /// After a given value. + After(Value), + /// At the top of a given block. + BlockTop(BlockId), + /// Not yet scheduled. + None, +} + +impl Schedule { + pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self { + let mut schedule = Schedule::default(); + schedule.location = vec![Location::None; f.values.len()]; + + let mut ctx = SchedulerContext { + schedule: &mut schedule, + f, + cfg, + waiting_on_value: FxHashMap::default(), + remaining_inputs: FxHashMap::default(), + ready: vec![], + }; + + // Prepare the "waiting on value", "remaining inputs", and + // "ready" vectors. + for (value, value_def) in f.values() { + if uses.use_count[value.index()] == 0 { + continue; + } + match value_def { + &ValueDef::Operator(_, ref operands) => { + if operands.len() == 0 { + ctx.ready.push(value); + } else { + ctx.remaining_inputs.insert(value, operands.len()); + for &input in operands { + let input = f.resolve_alias(input); + ctx.waiting_on_value + .entry(input) + .or_insert_with(|| vec![]) + .push(value); + } + } + } + &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => { + let v = f.resolve_alias(v); + ctx.remaining_inputs.insert(v, 1); + ctx.waiting_on_value + .entry(v) + .or_insert_with(|| vec![]) + .push(value); + } + _ => {} + } + } + + // Traverse blocks in RPO. When we schedule a given op, we've + // already scheduled all of its operands, so we can find the + // right place for it without any sort of backtracking or + // fixpoint convergence. + // + // - Values in `insts` (toplevel operations) + // are scheduled at their locations. All side-effecting ops + // are in this category, and hence never experience + // code-motion relative to other side-effecting ops or + // control flow. + // + // - Otherwise, values are scheduled after their last operand + // is ready. All operands must have been computed by the + // time we reach a given operator in RPO, and each operand's + // scheduled site must dominate the current location + // (toplevel value). Because the dominance relation forms a + // tree structure (the domtree), for any two operand def + // sites X and Y to the current location L, given X dom L + // and Y dom L, either X dom Y or Y dom X. Thus, consider + // the current-best and each new operand in pairs, and pick + // the one that is dominated by the other. + + for &block in cfg.postorder.iter().rev() { + for &(_, param) in &f.blocks[block].params { + ctx.wake_dependents(param); + } + for &inst in &f.blocks[block].insts { + ctx.sched_toplevel(inst); + } + } + + schedule + } +} + +impl<'a> SchedulerContext<'a> { + fn sched_toplevel(&mut self, v: Value) { + assert_eq!(self.schedule.location[v.index()], Location::None); + self.schedule.location[v.index()] = Location::Toplevel; + self.wake_dependents(v); + } + + fn sched_ready_after_value(&mut self, v: Value) { + while !self.ready.is_empty() { + for ready in std::mem::take(&mut self.ready) { + self.schedule.location[ready.index()] = Location::After(v); + self.schedule + .compute_after_value + .entry(v) + .or_insert_with(|| vec![]) + .push(ready); + self.wake_dependents(ready); + } + } + } + + fn sched_ready_at_block_top(&mut self, block: BlockId) { + while !self.ready.is_empty() { + for ready in std::mem::take(&mut self.ready) { + self.schedule.location[ready.index()] = Location::BlockTop(block); + self.schedule + .compute_at_top_of_block + .entry(block) + .or_insert_with(|| vec![]) + .push(ready); + self.wake_dependents(ready); + } + } + } + + fn wake_dependents(&mut self, v: Value) { + let dependents = self.waiting_on_value.remove(&v).unwrap_or_default(); + for dependent in dependents { + let remaining = self.remaining_inputs.get_mut(&dependent).unwrap(); + *remaining -= 1; + if *remaining == 0 { + self.remaining_inputs.remove(&dependent); + self.ready.push(dependent); + self.wake_dependents(dependent); } } } diff --git a/src/frontend.rs b/src/frontend.rs index 8b5ee23..5c2c018 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -131,7 +131,7 @@ fn parse_body<'a>( for (arg_idx, &arg_ty) in module.signatures[my_sig].params.iter().enumerate() { let local_idx = arg_idx as LocalId; - let value = builder.body.add_value(ValueDef::Arg(arg_idx), Some(arg_ty)); + let value = builder.body.add_value(ValueDef::Arg(arg_idx), vec![arg_ty]); trace!("defining local {} to value {}", local_idx, value); builder.locals.declare(local_idx, arg_ty); builder.locals.set(local_idx, value); @@ -295,11 +295,11 @@ impl LocalTracker { match ty { Type::I32 => body.add_value( ValueDef::Operator(Operator::I32Const { value: 0 }, vec![]), - Some(ty), + vec![ty], ), Type::I64 => body.add_value( ValueDef::Operator(Operator::I64Const { value: 0 }, vec![]), - Some(ty), + vec![ty], ), Type::F32 => body.add_value( ValueDef::Operator( @@ -308,7 +308,7 @@ impl LocalTracker { }, vec![], ), - Some(ty), + vec![ty], ), Type::F64 => body.add_value( ValueDef::Operator( @@ -317,7 +317,7 @@ impl LocalTracker { }, vec![], ), - Some(ty), + vec![ty], ), _ => todo!("unsupported type: {:?}", ty), } @@ -1173,16 +1173,12 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { } input_operands.reverse(); log::trace!(" -> operands: {:?}", input_operands); + log::trace!(" -> ty {:?}", outputs); - let ty = if n_outputs == 1 { - Some(outputs[0]) - } else { - None - }; let value = self .body - .add_value(ValueDef::Operator(op, input_operands), ty); - log::trace!(" -> value: {:?} ty {:?}", value, ty); + .add_value(ValueDef::Operator(op, input_operands), outputs.clone()); + log::trace!(" -> value: {:?}", value); if let Some(block) = self.cur_block { if !op_effects(&op).unwrap().is_empty() { @@ -1197,7 +1193,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { for (i, output_ty) in outputs.into_iter().enumerate() { let pick = self .body - .add_value(ValueDef::PickOutput(value, i), Some(output_ty)); + .add_value(ValueDef::PickOutput(value, i), vec![output_ty]); self.op_stack.push((output_ty, pick)); log::trace!(" -> pick {}: {:?} ty {:?}", i, pick, output_ty); } diff --git a/src/ir.rs b/src/ir.rs index 6798a12..2ba2a02 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -4,6 +4,7 @@ use std::collections::hash_map::Entry; use crate::{ cfg::{ + serialize::SerializedBody, structured::{BlockOrder, LoopNest, WasmRegion}, CFGInfo, }, @@ -55,7 +56,9 @@ pub struct FunctionBody { /// Sea-of-nodes representation. pub values: Vec, value_dedup: FxHashMap, - pub types: Vec>, + /// A single value can have multiple types if multi-value (e.g. a + /// call). + pub types: Vec>, } impl FunctionBody { @@ -77,18 +80,19 @@ impl FunctionBody { log::trace!("add_edge: from {} to {}", from, to); } - pub fn add_value(&mut self, value: ValueDef, ty: Option) -> Value { + pub fn add_value(&mut self, value: ValueDef, tys: Vec) -> Value { + log::trace!("add_value: def {:?} ty {:?}", value, tys); let id = match self.value_dedup.entry(value.clone()) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { let id = Value(self.values.len() as u32); self.values.push(value.clone()); - self.types.push(ty); + self.types.push(tys); v.insert(id); id } }; - log::trace!("add_value: def {:?} ty {:?} -> {:?}", value, ty, id); + log::trace!(" -> value {:?}", id); id } @@ -116,27 +120,27 @@ impl FunctionBody { result } - pub fn add_mutable_inst(&mut self, ty: Option, def: ValueDef) -> Value { + pub fn add_mutable_inst(&mut self, tys: Vec, def: ValueDef) -> Value { let value = Value(self.values.len() as u32); - self.types.push(ty); + self.types.push(tys); self.values.push(def); value } pub fn add_blockparam(&mut self, block: BlockId, ty: Type) -> Value { let index = self.blocks[block].params.len(); - let value = self.add_value(ValueDef::BlockParam(block, index), Some(ty)); + let value = self.add_value(ValueDef::BlockParam(block, index), vec![ty]); self.blocks[block].params.push((ty, value)); value } pub fn add_placeholder(&mut self, ty: Type) -> Value { - self.add_mutable_inst(Some(ty), ValueDef::Placeholder) + self.add_mutable_inst(vec![ty], ValueDef::Placeholder) } pub fn replace_placeholder_with_blockparam(&mut self, block: BlockId, value: Value) { assert!(self.values[value.index()] == ValueDef::Placeholder); - let ty = self.types[value.index()].unwrap(); + let ty = self.types[value.index()].get(0).cloned().unwrap(); let index = self.blocks[block].params.len(); self.blocks[block].params.push((ty, value)); self.values[value.index()] = ValueDef::BlockParam(block, index); @@ -169,6 +173,13 @@ impl FunctionBody { self.locals.push(ty); id } + + pub fn values<'a>(&'a self) -> impl Iterator + 'a { + self.values + .iter() + .enumerate() + .map(|(idx, value_def)| (Value(idx as u32), value_def)) + } } impl std::ops::Index for FunctionBody { @@ -481,7 +492,9 @@ impl<'a> Module<'a> { let cfg = CFGInfo::new(body); let loopnest = LoopNest::compute(&cfg); let regions = WasmRegion::compute(&cfg, &loopnest); - let _blockorder = BlockOrder::compute(body, &cfg, ®ions); + let blockorder = BlockOrder::compute(body, &cfg, ®ions); + let serialized = SerializedBody::compute(body, &cfg, &blockorder); + log::trace!("serialized:{:?}", serialized); } _ => {} }