From 62fbb238d789ea68178ac2a6e44dc2de6e7bc86c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 14 Nov 2021 00:00:34 -0800 Subject: [PATCH] WIP. --- fuzz/Cargo.toml | 6 +++ fuzz/fuzz_targets/roundtrip.rs | 11 +++++ src/backend.rs | 54 +++++++++++++++++++++ src/frontend.rs | 7 +-- src/ir.rs | 86 +++++++++++++++++++++++++--------- src/lib.rs | 1 + 6 files changed, 141 insertions(+), 24 deletions(-) create mode 100644 fuzz/fuzz_targets/roundtrip.rs create mode 100644 src/backend.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 04f6384..8446bb3 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -26,3 +26,9 @@ name = "parse_ir" path = "fuzz_targets/parse_ir.rs" test = false doc = false + +[[bin]] +name = "roundtrip" +path = "fuzz_targets/roundtrip.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/roundtrip.rs b/fuzz/fuzz_targets/roundtrip.rs new file mode 100644 index 0000000..eec7311 --- /dev/null +++ b/fuzz/fuzz_targets/roundtrip.rs @@ -0,0 +1,11 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +use waffle::Module; + +fuzz_target!(|module: wasm_smith::Module| { + let _ = env_logger::try_init(); + let orig_bytes = module.to_bytes(); + let parsed_module = Module::from_wasm_bytes(&orig_bytes[..]).unwrap(); + let _ = parsed_module.to_wasm_bytes(); +}); diff --git a/src/backend.rs b/src/backend.rs new file mode 100644 index 0000000..b079298 --- /dev/null +++ b/src/backend.rs @@ -0,0 +1,54 @@ +//! IR-to-Wasm transform. + +use crate::ir::*; +use fxhash::{FxHashMap, FxHashSet}; + +pub fn treeify_function(func: &mut FunctionBody) -> FxHashSet<(BlockId, InstId)> { + // First, count uses of all values. + let mut uses: FxHashMap<(BlockId, InstId, usize), usize> = FxHashMap::default(); + for block in &func.blocks { + for inst in &block.insts { + for input in &inst.inputs { + match input { + &Operand::Value(value_id) => { + if let ValueKind::Inst(src_block, src_inst, idx) = + &func.values[value_id].kind + { + *uses.entry((*src_block, *src_inst, *idx)).or_insert(0) += 1; + } + } + _ => {} + } + } + } + + for arg in block.terminator.args() { + match arg { + Operand::Value(value_id) => { + if let ValueKind::Inst(src_block, src_inst, idx) = &func.values[value_id].kind { + *uses.entry((*src_block, *src_inst, *idx)).or_insert(0) += 1; + } + } + _ => {} + } + } + } + + // Next, treeify all insts with only one use. + let mut single_use_insts: FxHashSet<(BlockId, InstId)> = FxHashSet::default(); + for (block_idx, block) in func.blocks.iter().enumerate() { + for (inst_idx, inst) in block.insts.iter().enumerate() { + let all_one_use = (0..inst.outputs.len()).all(|output| { + uses.get(&(block_idx, inst_idx, output)) + .cloned() + .unwrap_or(0) + <= 1 + }); + if all_one_use { + single_use_insts.insert((block_idx, inst_idx)); + } + } + } + + single_use_insts +} diff --git a/src/frontend.rs b/src/frontend.rs index c3402b7..39867f8 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -14,6 +14,7 @@ use wasmparser::{ pub fn wasm_to_ir(bytes: &[u8]) -> Result> { let mut module = Module::default(); + module.orig_bytes = bytes; let parser = Parser::new(0); let mut next_func = 0; for payload in parser.parse_all(bytes) { @@ -770,7 +771,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { &self.ctrl_stack[self.ctrl_stack.len() - 1 - relative_depth as usize] } - fn fill_block_params_with_locals(&mut self, target: BlockId, args: &mut Vec>) { + fn fill_block_params_with_locals(&mut self, target: BlockId, args: &mut Vec) { if !self.block_param_locals.contains_key(&target) { let mut keys: Vec = self.locals.keys().cloned().collect(); keys.sort(); @@ -789,7 +790,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { fn emit_branch(&mut self, target: BlockId, args: &[ValueId]) { if let Some(block) = self.cur_block { - let mut args: Vec> = args.iter().map(|&val| Operand::value(val)).collect(); + let mut args: Vec = args.iter().map(|&val| Operand::value(val)).collect(); self.fill_block_params_with_locals(target, &mut args); let target = BlockTarget { block: target, @@ -840,7 +841,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { args: &[ValueId], ) { if let Some(block) = self.cur_block { - let args: Vec> = args.iter().map(|&arg| Operand::value(arg)).collect(); + let args: Vec = args.iter().map(|&arg| Operand::value(arg)).collect(); let targets = indexed_targets .iter() .map(|&block| { diff --git a/src/ir.rs b/src/ir.rs index c93491c..e8cb865 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,6 +1,6 @@ //! Intermediate representation for Wasm. -use crate::frontend; +use crate::{backend, frontend}; use anyhow::Result; use wasmparser::{FuncType, Operator, Type}; @@ -15,6 +15,7 @@ pub const NO_VALUE: ValueId = usize::MAX; #[derive(Clone, Debug, Default)] pub struct Module<'a> { + pub orig_bytes: &'a [u8], pub funcs: Vec>, pub signatures: Vec, pub globals: Vec, @@ -61,28 +62,26 @@ pub enum ValueKind { pub struct Block<'a> { pub params: Vec, pub insts: Vec>, - pub terminator: Terminator<'a>, + pub terminator: Terminator, } #[derive(Clone, Debug)] pub struct Inst<'a> { pub operator: Operator<'a>, pub outputs: Vec, - pub inputs: Vec>, + pub inputs: Vec, } -#[derive(Clone, Debug)] -pub enum Operand<'a> { +#[derive(Clone, Copy, Debug)] +pub enum Operand { /// An SSA value. Value(ValueId), - /// Tree-ified instructions for Wasm emission. - Sub(Box>), /// Undef values are produced when code is unreachable and thus /// removed/never executed. Undef, } -impl<'a> Operand<'a> { +impl Operand { pub fn value(value: ValueId) -> Self { if value == NO_VALUE { Operand::Undef @@ -93,45 +92,90 @@ impl<'a> Operand<'a> { } #[derive(Clone, Debug)] -pub struct BlockTarget<'a> { +pub struct BlockTarget { pub block: BlockId, - pub args: Vec>, + pub args: Vec, } #[derive(Clone, Debug)] -pub enum Terminator<'a> { +pub enum Terminator { Br { - target: BlockTarget<'a>, + target: BlockTarget, }, CondBr { - cond: Operand<'a>, - if_true: BlockTarget<'a>, - if_false: BlockTarget<'a>, + cond: Operand, + if_true: BlockTarget, + if_false: BlockTarget, }, Select { - value: Operand<'a>, - targets: Vec>, - default: BlockTarget<'a>, + value: Operand, + targets: Vec, + default: BlockTarget, }, Return { - values: Vec>, + values: Vec, }, None, } -impl<'a> std::default::Default for Terminator<'a> { +impl std::default::Default for Terminator { fn default() -> Self { Terminator::None } } +impl Terminator { + pub fn args(&self) -> Vec { + match self { + Terminator::Br { target } => target.args.clone(), + Terminator::CondBr { + cond, + if_true, + if_false, + } => { + let mut ret = vec![*cond]; + ret.extend(if_true.args.iter().cloned()); + ret.extend(if_false.args.iter().cloned()); + ret + } + Terminator::Select { + value, + targets, + default, + } => { + let mut ret = vec![*value]; + for target in targets { + ret.extend(target.args.iter().cloned()); + } + ret.extend(default.args.clone()); + ret + } + Terminator::Return { values } => values.clone(), + Terminator::None => vec![], + } + } +} + impl<'a> Module<'a> { pub fn from_wasm_bytes(bytes: &'a [u8]) -> Result { frontend::wasm_to_ir(bytes) } + + pub fn to_wasm_bytes(mut self) -> Vec { + // TODO + for func in &mut self.funcs { + match func { + &mut FuncDecl::Body(_, ref mut body) => { + let _deleted_insts = backend::treeify_function(body); + } + _ => {} + } + } + self.orig_bytes.to_vec() + } } -impl<'a> Terminator<'a> { +impl Terminator { pub fn successors(&self) -> Vec { match self { Terminator::Return { .. } => vec![], diff --git a/src/lib.rs b/src/lib.rs index 1752de9..24dff61 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ pub use wasm_encoder; pub use wasmparser; +mod backend; mod dataflow; mod frontend; mod ir;