From 1e26c0aaa4e887ca9bc5f593ab1ad9d4ce3927c9 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 26 Oct 2022 22:15:15 -0700 Subject: [PATCH] WIP. --- src/backend/binaryen.rs | 41 ++++ src/backend/final.rs | 159 ---------------- src/backend/locations.rs | 214 --------------------- src/backend/mod.rs | 4 +- src/backend/schedule.rs | 233 ----------------------- src/backend/serialize.rs | 311 ------------------------------ src/backend/structured.rs | 391 -------------------------------------- src/bin/waffle-util.rs | 2 +- src/frontend.rs | 47 ++--- src/ir.rs | 80 +++++++- src/op_traits.rs | 22 +-- 11 files changed, 148 insertions(+), 1356 deletions(-) delete mode 100644 src/backend/final.rs delete mode 100644 src/backend/locations.rs delete mode 100644 src/backend/schedule.rs delete mode 100644 src/backend/serialize.rs delete mode 100644 src/backend/structured.rs diff --git a/src/backend/binaryen.rs b/src/backend/binaryen.rs index 6cb1a33..58290d2 100644 --- a/src/backend/binaryen.rs +++ b/src/backend/binaryen.rs @@ -23,6 +23,20 @@ impl Module { Ok(Module(ptr)) } + pub fn write(&self) -> Result> { + let result = unsafe { BinaryenModuleAllocateAndWrite(self.0, std::ptr::null()) }; + if result.binary.is_null() { + bail!("Failed to serialize module"); + } + let slice = unsafe { + std::slice::from_raw_parts( + result.binary as *const c_void as *const u8, + result.binary_bytes as usize, + ) + }; + Ok(slice.to_vec()) + } + pub fn num_funcs(&self) -> usize { unsafe { BinaryenGetNumFunctions(self.0) as usize } } @@ -88,6 +102,12 @@ impl Function { } } + pub fn set_body(&mut self, body: Expression) { + unsafe { + BinaryenFunctionSetBody(self.0, body.1); + } + } + pub fn name(&self) -> &str { let s = unsafe { CStr::from_ptr(BinaryenFunctionGetName(self.1)) }; s.to_str().unwrap() @@ -622,14 +642,35 @@ type BinaryenFunction = *const c_void; type BinaryenExpression = *const c_void; type BinaryenExport = *const c_void; +#[repr(C)] +struct BinaryenModuleAllocateAndWriteResult { + binary: *mut c_void, + binary_bytes: libc::size_t, + source_map: *mut c_char, +} + +impl Drop for BinaryenModuleAllocateAndWriteResult { + fn drop(&mut self) { + unsafe { + libc::free(self.binary); + libc::free(self.source_map as *mut c_void); + } + } +} + #[link(name = "binaryen")] extern "C" { fn BinaryenModuleRead(data: *const u8, len: usize) -> BinaryenModule; fn BinaryenModuleDispose(ptr: BinaryenModule); + fn BinaryenModuleAllocateAndWrite( + ptr: BinaryenModule, + sourceMapUrl: *const c_char, + ) -> BinaryenModuleAllocateAndWriteResult; fn BinaryenGetNumFunctions(ptr: BinaryenModule) -> u32; fn BinaryenGetFunctionByIndex(ptr: BinaryenModule, index: u32) -> BinaryenFunction; fn BinaryenGetFunction(ptr: BinaryenModule, name: *const c_char) -> BinaryenFunction; fn BinaryenFunctionGetBody(ptr: BinaryenFunction) -> BinaryenExpression; + fn BinaryenFunctionSetBody(ptr: BinaryenFunction, body: BinaryenExpression); fn BinaryenFunctionGetName(ptr: BinaryenFunction) -> *const c_char; fn BinaryenGetExport(ptr: BinaryenModule, name: *const c_char) -> BinaryenExport; fn BinaryenGetNumExports(ptr: BinaryenModule) -> u32; diff --git a/src/backend/final.rs b/src/backend/final.rs deleted file mode 100644 index 0ed80f8..0000000 --- a/src/backend/final.rs +++ /dev/null @@ -1,159 +0,0 @@ -//! Final Wasm operator sequence production. - -use super::{Locations, SerializedBlockTarget, SerializedBody, SerializedOperator}; -use crate::{ops::ty_to_valty, FunctionBody}; -use std::borrow::Cow; -use wasm_encoder::BlockType; - -#[derive(Clone, Debug)] -pub struct Wasm { - pub operators: Vec>, - pub locals: Vec, -} - -struct WasmContext<'a> { - wasm: &'a mut Wasm, -} - -impl<'a> WasmContext<'a> { - fn translate(&mut self, op: &SerializedOperator, locations: &Locations) { - log::trace!("translate: {:?}", op); - match op { - SerializedOperator::StartBlock { .. } => { - self.wasm - .operators - .push(wasm_encoder::Instruction::Block(BlockType::Empty)); - } - SerializedOperator::StartLoop { .. } => { - self.wasm - .operators - .push(wasm_encoder::Instruction::Loop(BlockType::Empty)); - } - SerializedOperator::End => { - self.wasm.operators.push(wasm_encoder::Instruction::End); - } - SerializedOperator::GetArg(index) => { - self.wasm - .operators - .push(wasm_encoder::Instruction::LocalGet(*index as u32)); - } - SerializedOperator::Operator(op) => { - self.wasm.operators.push(op.clone().into()); - } - SerializedOperator::Br(ref target) => { - self.translate_target(0, target, locations); - } - SerializedOperator::BrIf { - ref if_true, - ref if_false, - } => { - self.wasm - .operators - .push(wasm_encoder::Instruction::If(BlockType::Empty)); - self.translate_target(1, if_true, locations); - self.wasm.operators.push(wasm_encoder::Instruction::Else); - self.translate_target(1, if_false, locations); - self.wasm.operators.push(wasm_encoder::Instruction::End); - } - SerializedOperator::BrTable { - ref index_ops, - ref targets, - ref default, - } => { - for _ in 0..(targets.len() + 2) { - self.wasm.operators.push(wasm_encoder::Instruction::Block( - wasm_encoder::BlockType::Empty, - )); - } - - let br_table_targets = (1..=targets.len()).map(|i| i as u32).collect::>(); - for op in index_ops { - self.translate(op, locations); - } - self.wasm.operators.push(wasm_encoder::Instruction::BrTable( - Cow::Owned(br_table_targets), - 0, - )); - self.wasm.operators.push(wasm_encoder::Instruction::End); - - self.translate_target(targets.len() + 1, default, locations); - self.wasm.operators.push(wasm_encoder::Instruction::End); - - for i in 0..targets.len() { - self.translate_target(targets.len() - i, &targets[i], locations); - self.wasm.operators.push(wasm_encoder::Instruction::End); - } - } - SerializedOperator::Get(v, i) => { - let loc = *locations.locations.get(&(*v, *i)).unwrap(); - self.wasm - .operators - .push(wasm_encoder::Instruction::LocalGet(loc)); - } - SerializedOperator::Set(v, i) => { - let loc = *locations.locations.get(&(*v, *i)).unwrap(); - self.wasm - .operators - .push(wasm_encoder::Instruction::LocalSet(loc)); - } - SerializedOperator::Tee(v, i) => { - let loc = *locations.locations.get(&(*v, *i)).unwrap(); - self.wasm - .operators - .push(wasm_encoder::Instruction::LocalTee(loc)); - } - } - } - - fn translate_target( - &mut self, - extra_blocks: usize, - target: &SerializedBlockTarget, - locations: &Locations, - ) { - log::trace!("translate_target: {:?}", target); - match target { - &SerializedBlockTarget::Fallthrough(ref ops) => { - for op in ops { - self.translate(op, locations); - } - if extra_blocks > 0 { - self.wasm - .operators - .push(wasm_encoder::Instruction::Br((extra_blocks - 1) as u32)); - } - } - &SerializedBlockTarget::Branch(branch, ref ops) => { - for op in ops { - self.translate(op, locations); - } - self.wasm.operators.push(wasm_encoder::Instruction::Br( - (branch + extra_blocks) as u32, - )); - } - } - } -} - -pub fn produce_func_wasm(f: &FunctionBody, body: &SerializedBody, locations: &Locations) -> Wasm { - let mut wasm = Wasm { - operators: vec![], - locals: vec![], - }; - wasm.locals - .extend(f.locals.iter().skip(f.n_params).map(|ty| ty_to_valty(*ty))); - wasm.locals - .extend(locations.new_locals.iter().map(|ty| ty_to_valty(*ty))); - - let mut ctx = WasmContext { wasm: &mut wasm }; - for operator in &body.operators { - ctx.translate(operator, locations); - } - // There is always an explicit Return before this point. This - // allows us to avoid matching the return types in our stack - // discipline / outer block type. - wasm.operators.push(wasm_encoder::Instruction::Unreachable); - wasm.operators.push(wasm_encoder::Instruction::End); - - wasm -} diff --git a/src/backend/locations.rs b/src/backend/locations.rs deleted file mode 100644 index 6d332dc..0000000 --- a/src/backend/locations.rs +++ /dev/null @@ -1,214 +0,0 @@ -//! Location assignment (pseudo-regalloc) for SSA values onto -//! locals/operand-stack values. - -use crate::{FunctionBody, LocalId, Value}; -use fxhash::FxHashMap; - -use super::{SerializedBody, SerializedOperator}; - -#[derive(Debug)] -pub struct Locations { - pub locations: FxHashMap<(Value, usize), LocalId>, - pub new_locals: Vec, -} - -#[derive(Debug)] -struct Frame { - is_loop: bool, - start_loc: usize, - use_at_end: Vec<(Value, usize)>, -} - -pub struct Allocator<'a> { - locations: &'a mut Locations, - f: &'a FunctionBody, - active_frames: Vec, - spans: FxHashMap<(Value, usize), ValueSpan>, - starts: Vec, - ends: Vec, - freelist: FxHashMap>, -} - -#[derive(Clone, Copy, Debug)] -pub struct ValueSpan { - value: Value, - multi_value_index: usize, - /// First index in serialized body at which value is live. - start: usize, - /// First index in serialized body at which value is no longer live. - end: usize, -} - -impl ValueSpan { - fn len(&self) -> usize { - self.end - self.start - } -} - -impl Locations { - pub fn compute(f: &FunctionBody, body: &SerializedBody) -> Locations { - let mut locations = Locations { - locations: FxHashMap::default(), - new_locals: vec![], - }; - let mut allocator = Allocator { - locations: &mut locations, - f, - active_frames: vec![], - freelist: FxHashMap::default(), - spans: FxHashMap::default(), - starts: vec![], - ends: vec![], - }; - - allocator.compute_spans(&body.operators[..]); - - locations - } -} - -impl<'a> Allocator<'a> { - fn handle_op(&mut self, location: usize, op: &SerializedOperator) { - let mut reads = vec![]; - let mut writes = vec![]; - - match op { - &SerializedOperator::StartBlock { .. } => { - self.active_frames.push(Frame { - is_loop: false, - start_loc: location, - use_at_end: vec![], - }); - } - &SerializedOperator::StartLoop { .. } => { - self.active_frames.push(Frame { - is_loop: true, - start_loc: location, - use_at_end: vec![], - }); - } - &SerializedOperator::End { .. } => { - let frame = self.active_frames.pop().unwrap(); - if frame.is_loop { - reads.extend(frame.use_at_end); - } - } - _ => {} - } - - op.visit_value_locals( - &mut |value, index| { - reads.push((value, index)); - }, - &mut |value, index| { - writes.push((value, index)); - }, - ); - - log::trace!( - "handle_op: at location {} op {:?} reads {:?} writes {:?}", - location, - op, - reads, - writes - ); - - for (value, index) in reads { - let span = match self.spans.get_mut(&(value, index)) { - Some(span) => span, - None => { - panic!("Read before any write to local ({},{})", value, index); - } - }; - span.end = location + 1; - log::trace!(" -> span for {}: {:?}", value, span); - - for frame in &mut self.active_frames { - if frame.is_loop && span.start < frame.start_loc { - frame.use_at_end.push((value, index)); - } - } - } - - for (value, index) in writes { - let span = self.spans.entry((value, index)).or_insert(ValueSpan { - value, - multi_value_index: index, - start: location, - end: location + 1, - }); - span.end = location + 1; - log::trace!(" -> span for {}: {:?}", value, span); - } - } - - fn compute_spans(&mut self, operators: &[SerializedOperator]) { - // For each operator, get the reads and writes and construct spans. - for (index, operator) in operators.iter().enumerate() { - self.handle_op(index, operator); - } - - // Build lists of spans sorted by start and end. - self.starts = self.spans.values().cloned().collect(); - self.ends = self.starts.clone(); - self.starts.sort_unstable_by_key(|span| span.start); - self.ends.sort_unstable_by_key(|span| span.end); - - // Finally, assign locals to (value, index) pairs. - let mut start_idx = 0; - let mut end_idx = 0; - while start_idx < self.starts.len() || end_idx < self.ends.len() { - if start_idx < self.starts.len() && end_idx < self.ends.len() { - if self.ends[end_idx].end <= self.starts[start_idx].start { - let span = self.ends[end_idx]; - end_idx += 1; - self.handle_end(&span); - } else { - let span = self.starts[start_idx]; - start_idx += 1; - self.handle_start(&span); - } - } else if start_idx < self.starts.len() { - let span = self.starts[start_idx]; - start_idx += 1; - self.handle_start(&span); - } else { - let span = self.ends[end_idx]; - end_idx += 1; - self.handle_end(&span); - } - } - } - - fn handle_end(&mut self, span: &ValueSpan) { - let local = self - .locations - .locations - .get(&(span.value, span.multi_value_index)) - .cloned() - .unwrap(); - let ty = self.f.types[span.value.index()][span.multi_value_index]; - self.freelist - .entry(ty) - .or_insert_with(|| vec![]) - .push(local); - } - - fn handle_start(&mut self, span: &ValueSpan) { - let ty = self.f.types[span.value.index()][span.multi_value_index]; - if let Some(list) = self.freelist.get_mut(&ty) { - if let Some(local) = list.pop() { - self.locations - .locations - .insert((span.value, span.multi_value_index), local); - return; - } - } - - let new_local = self.f.locals.len() + self.locations.new_locals.len(); - self.locations.new_locals.push(ty); - self.locations - .locations - .insert((span.value, span.multi_value_index), new_local as u32); - } -} diff --git a/src/backend/mod.rs b/src/backend/mod.rs index 86b4492..d75646a 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -1,4 +1,4 @@ //! Backend: IR to Wasm. -mod binaryen; -pub use binaryen::*; +pub mod binaryen; +pub mod lower; diff --git a/src/backend/schedule.rs b/src/backend/schedule.rs deleted file mode 100644 index c35bf43..0000000 --- a/src/backend/schedule.rs +++ /dev/null @@ -1,233 +0,0 @@ -//! Op scheduling. - -use fxhash::FxHashMap; - -use super::UseCountAnalysis; -use crate::{cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Value, ValueDef}; - -#[derive(Clone, Debug, Default)] -pub struct Schedule { - /// Output: location at which to compute each value. - pub location: Vec, - /// Output: for each toplevel value, all values that are computed - /// after it is. - pub compute_after_value: FxHashMap>, - /// Output: all values ready at the top of a given block. - pub compute_at_top_of_block: FxHashMap>, -} - -pub struct SchedulerContext<'a> { - /// The schedule we are constructing. - schedule: &'a mut Schedule, - /// In-progress state: for each value, the values that have one - /// more ready input once that value is computed. - waiting_on_value: FxHashMap>, - /// In-progress state: for each value, how many inputs need to - /// become ready. - remaining_inputs: FxHashMap, - /// In-progress state: all values that are ready to be scheduled. - ready: Vec, - /// Input context: CFG. - cfg: &'a CFGInfo, - /// Input context: function body. - f: &'a FunctionBody, -} - -/// Locations are denoted by top-level values (those in `insts`), -/// which are those with a side-effect; the sea-of-nodes -/// representation for all other value nodes allows them to be -/// computed anywhere dominated by all operands and that dominates all -/// uses, so we have significant flexibility. We denote a location as -/// "after a toplevel", then in the second pass where we actually -/// generate operators according to stack discipline, we resolve the -/// order for all values at a given toplevel. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Location { - /// At a separate top-level location. - Toplevel, - /// After a given value. - After(Value), - /// At the top of a given block. - BlockTop(BlockId), - /// Not yet scheduled. - None, -} - -impl Schedule { - pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self { - let mut schedule = Schedule::default(); - schedule.location = vec![Location::None; f.values.len()]; - - log::trace!("f: {:?}", f); - log::trace!("cfg: {:?}", cfg); - log::trace!("uses: {:?}", uses); - - let mut ctx = SchedulerContext { - schedule: &mut schedule, - f, - cfg, - waiting_on_value: FxHashMap::default(), - remaining_inputs: FxHashMap::default(), - ready: vec![], - }; - - // Prepare the "waiting on value", "remaining inputs", and - // "ready" vectors. - for (value, value_def) in f.values() { - if uses.use_count[value.index()] == 0 { - continue; - } - if uses.toplevel.contains(&value) { - continue; - } - match value_def { - &ValueDef::Operator(op, ref operands) => { - if operands.len() == 0 { - if !op_rematerialize(&op) { - log::trace!("immediately ready: v{}", value.index()); - ctx.ready.push(value); - } - } else { - let mut remaining = 0; - for &input in operands { - let input = f.resolve_alias(input); - - match &f.values[input.index()] { - &ValueDef::Operator(ref op, ..) if op_rematerialize(op) => { - continue; - } - &ValueDef::Arg(..) => { - continue; - } - _ => {} - } - - log::trace!("v{} waiting on v{}", value.index(), input.index()); - ctx.waiting_on_value - .entry(input) - .or_insert_with(|| vec![]) - .push(value); - remaining += 1; - } - if remaining > 0 { - ctx.remaining_inputs.insert(value, remaining); - } else { - ctx.ready.push(value); - } - } - } - &ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => { - let v = f.resolve_alias(v); - ctx.remaining_inputs.insert(value, 1); - ctx.waiting_on_value - .entry(v) - .or_insert_with(|| vec![]) - .push(value); - } - _ => {} - } - } - - // Traverse blocks in RPO. When we schedule a given op, we've - // already scheduled all of its operands, so we can find the - // right place for it without any sort of backtracking or - // fixpoint convergence. - // - // - Values in `insts` (toplevel operations) - // are scheduled at their locations. All side-effecting ops - // are in this category, and hence never experience - // code-motion relative to other side-effecting ops or - // control flow. - // - // - Otherwise, values are scheduled after their last operand - // is ready. All operands must have been computed by the - // time we reach a given operator in RPO, and each operand's - // scheduled site must dominate the current location - // (toplevel value). Because the dominance relation forms a - // tree structure (the domtree), for any two operand def - // sites X and Y to the current location L, given X dom L - // and Y dom L, either X dom Y or Y dom X. Thus, consider - // the current-best and each new operand in pairs, and pick - // the one that is dominated by the other. - - for &block in cfg.postorder.iter().rev() { - for &(_, param) in &f.blocks[block].params { - log::trace!("block{}: param v{}", block, param.index()); - ctx.wake_dependents(param); - } - ctx.sched_ready_at_block_top(block); - for &inst in &f.blocks[block].insts { - log::trace!("block{}: toplevel v{}", block, inst.index()); - ctx.sched_toplevel(inst); - ctx.sched_ready_after_value(inst); - } - } - - schedule - } -} - -impl<'a> SchedulerContext<'a> { - fn sched_toplevel(&mut self, v: Value) { - log::trace!("sched_toplevel: v{}", v.index()); - assert_eq!(self.schedule.location[v.index()], Location::None); - self.schedule.location[v.index()] = Location::Toplevel; - self.wake_dependents(v); - } - - fn sched_ready_after_value(&mut self, v: Value) { - log::trace!("sched_ready_after_value: toplevel v{}", v.index()); - while !self.ready.is_empty() { - for ready in std::mem::take(&mut self.ready) { - log::trace!( - "sched_ready_after_value: toplevel v{} -> v{} now ready", - v.index(), - ready.index() - ); - self.schedule.location[ready.index()] = Location::After(v); - self.schedule - .compute_after_value - .entry(v) - .or_insert_with(|| vec![]) - .push(ready); - self.wake_dependents(ready); - } - } - } - - fn sched_ready_at_block_top(&mut self, block: BlockId) { - log::trace!("ready_at_block_top: block{}", block); - while !self.ready.is_empty() { - for ready in std::mem::take(&mut self.ready) { - log::trace!( - "ready_at_block_top: block{} -> ready: v{}", - block, - ready.index() - ); - self.schedule.location[ready.index()] = Location::BlockTop(block); - self.schedule - .compute_at_top_of_block - .entry(block) - .or_insert_with(|| vec![]) - .push(ready); - self.wake_dependents(ready); - } - } - } - - fn wake_dependents(&mut self, v: Value) { - log::trace!("wake_dependents: v{}", v.index()); - let dependents = self.waiting_on_value.remove(&v).unwrap_or_default(); - for dependent in dependents { - log::trace!(" -> v{} wakes dependent v{}", v.index(), dependent.index(),); - let remaining = self.remaining_inputs.get_mut(&dependent).unwrap(); - *remaining -= 1; - log::trace!(" -> remaining now {}", *remaining); - if *remaining == 0 { - self.remaining_inputs.remove(&dependent); - self.ready.push(dependent); - self.wake_dependents(dependent); - } - } - } -} diff --git a/src/backend/serialize.rs b/src/backend/serialize.rs deleted file mode 100644 index bab2e16..0000000 --- a/src/backend/serialize.rs +++ /dev/null @@ -1,311 +0,0 @@ -//! Serialization of the sea-of-nodes IR using a BlockOrder -//! Wasm-structured-control-flow result into actual order of operators -//! in Wasm function body. Contains everything needed to emit Wasm -//! except for value locations (and corresponding local spill/reloads). - -use super::structured::{BlockOrder, BlockOrderEntry}; -use crate::{ - cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value, - ValueDef, -}; - -/// A Wasm function body with a serialized sequence of operators that -/// mirror Wasm opcodes in every way *except* for locals corresponding -/// to SSA values. This is a sort of "pre-regalloc" representation of -/// the final code. -#[derive(Clone, Debug)] -pub struct SerializedBody { - pub(crate) operators: Vec, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum SerializedBlockTarget { - Fallthrough(Vec), - Branch(usize, Vec), -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum SerializedOperator { - StartBlock { - header: BlockId, - }, - StartLoop { - header: BlockId, - }, - Br(SerializedBlockTarget), - BrIf { - if_true: SerializedBlockTarget, - if_false: SerializedBlockTarget, - }, - BrTable { - index_ops: Vec, - targets: Vec, - default: SerializedBlockTarget, - }, - /// Compute the given value. Stack discipline will be maintained: - /// all operands will be computed or fetched via `Get` and all - /// produced results will be used directly or stored via `Set`. - Operator(Operator), - /// Get the given value from the local corresponding to the - /// `Value`'s n'th result. - Get(Value, usize), - /// Set the local corresponding to the `Value`'s n'th result, - /// consuming the value on the stack. - Set(Value, usize), - /// Set the value, like `Set`, but without consuming it from the - /// stack. - Tee(Value, usize), - /// Get the given function argument. - GetArg(usize), - End, -} - -impl SerializedOperator { - pub fn visit_value_locals( - &self, - r: &mut R, - w: &mut W, - ) { - match self { - &SerializedOperator::Br(ref target) => { - target.visit_value_locals(r, w); - } - &SerializedOperator::BrIf { - ref if_true, - ref if_false, - } => { - if_true.visit_value_locals(r, w); - if_false.visit_value_locals(r, w); - } - &SerializedOperator::BrTable { - ref index_ops, - ref default, - ref targets, - } => { - for index_op in index_ops { - index_op.visit_value_locals(r, w); - } - default.visit_value_locals(r, w); - for target in targets { - target.visit_value_locals(r, w); - } - } - &SerializedOperator::Get(v, i) => { - r(v, i); - } - &SerializedOperator::Set(v, i) | &SerializedOperator::Tee(v, i) => { - w(v, i); - } - &SerializedOperator::StartBlock { .. } | &SerializedOperator::StartLoop { .. } => {} - &SerializedOperator::GetArg(..) - | &SerializedOperator::Operator(..) - | &SerializedOperator::End => {} - } - } -} - -impl SerializedBlockTarget { - fn visit_value_locals( - &self, - r: &mut R, - w: &mut W, - ) { - match self { - &SerializedBlockTarget::Branch(_, ref ops) - | &SerializedBlockTarget::Fallthrough(ref ops) => { - for op in ops { - op.visit_value_locals(r, w); - } - } - } - } -} - -struct SerializedBodyContext<'a> { - f: &'a FunctionBody, - cfg: &'a CFGInfo, - operators: Vec, -} - -impl SerializedBody { - pub fn compute(f: &FunctionBody, cfg: &CFGInfo, order: &BlockOrder) -> SerializedBody { - if log::log_enabled!(log::Level::Trace) { - log::trace!("values:"); - for value in 0..f.values.len() { - log::trace!(" * v{}: {:?}", value, f.values[value]); - } - - for block in 0..f.blocks.len() { - log::trace!("block{}:", block); - for &inst in &f.blocks[block].insts { - log::trace!(" -> v{}", inst.index()); - } - log::trace!(" -> terminator: {:?}", f.blocks[block].terminator); - } - } - - let mut ctx = SerializedBodyContext { - f, - cfg, - operators: vec![], - }; - for entry in &order.entries { - ctx.compute_entry(entry); - } - SerializedBody { - operators: ctx.operators, - } - } -} - -impl<'a> SerializedBodyContext<'a> { - fn compute_entry(&mut self, entry: &BlockOrderEntry) { - match entry { - &BlockOrderEntry::StartBlock(header) | &BlockOrderEntry::StartLoop(header) => { - let is_loop = match entry { - &BlockOrderEntry::StartLoop(..) => true, - _ => false, - }; - - if is_loop { - self.operators - .push(SerializedOperator::StartLoop { header }); - } else { - self.operators - .push(SerializedOperator::StartBlock { header }); - } - } - &BlockOrderEntry::End => { - self.operators.push(SerializedOperator::End); - } - &BlockOrderEntry::BasicBlock(block, ref targets) => { - log::trace!("BlockOrderEntry: block{}", block); - - // Compute insts' values in sequence. - for &inst in &self.f.blocks[block].insts { - let mut rev_ops = vec![]; - self.emit_inst(inst, &mut rev_ops); - rev_ops.reverse(); - self.operators.extend(rev_ops); - } - - // For each BlockOrderTarget, compute a SerializedBlockTarget. - let targets = targets - .iter() - .map(|target| { - log::trace!("target: {:?}", target); - - let mut rev_ops = vec![]; - - // Store into block param values. - for &(_, value) in &self.f.blocks[target.target].params { - rev_ops.push(SerializedOperator::Set(value, 0)); - } - - // Load from branch operator's args. - for &value in target.args.iter().rev() { - let value = self.f.resolve_alias(value); - self.push_value(value, &mut rev_ops); - } - - rev_ops.reverse(); - log::trace!(" -> ops: {:?}", rev_ops); - - match target.relative_branch { - Some(branch) => SerializedBlockTarget::Branch(branch, rev_ops), - None => SerializedBlockTarget::Fallthrough(rev_ops), - } - }) - .collect::>(); - - // Finally, generate branch ops. - match &self.f.blocks[block].terminator { - &Terminator::Br { .. } => { - let target = targets.into_iter().next().unwrap(); - self.operators.push(SerializedOperator::Br(target)); - } - &Terminator::CondBr { cond, .. } => { - let mut iter = targets.into_iter(); - let if_true = iter.next().unwrap(); - let if_false = iter.next().unwrap(); - let mut rev_ops = vec![]; - let cond = self.f.resolve_alias(cond); - self.push_value(cond, &mut rev_ops); - rev_ops.reverse(); - self.operators.extend(rev_ops); - self.operators - .push(SerializedOperator::BrIf { if_true, if_false }); - } - &Terminator::Select { value, .. } => { - let mut iter = targets.into_iter(); - let default = iter.next().unwrap(); - let targets = iter.collect::>(); - let mut rev_ops = vec![]; - let value = self.f.resolve_alias(value); - self.push_value(value, &mut rev_ops); - rev_ops.reverse(); - self.operators.push(SerializedOperator::BrTable { - index_ops: rev_ops, - targets, - default, - }); - } - &Terminator::Return { ref values, .. } => { - let mut rev_ops = vec![]; - for &value in values.iter().rev() { - self.push_value(value, &mut rev_ops); - } - rev_ops.reverse(); - self.operators.extend(rev_ops.into_iter()); - self.operators - .push(SerializedOperator::Operator(Operator::Return)); - } - &Terminator::None => { - self.operators - .push(SerializedOperator::Operator(Operator::Unreachable)); - } - } - } - } - } - fn push_value(&mut self, v: Value, rev_ops: &mut Vec) { - let v = self.f.resolve_alias(v); - match &self.f.values[v.index()] { - &ValueDef::PickOutput(v, i) => { - rev_ops.push(SerializedOperator::Get(v, i)); - } - &ValueDef::Arg(i) => { - rev_ops.push(SerializedOperator::GetArg(i)); - } - &ValueDef::Operator(op, ..) if op_rematerialize(&op) => { - rev_ops.push(SerializedOperator::Operator(op)); - } - _ => { - rev_ops.push(SerializedOperator::Get(v, 0)); - } - } - } - - fn emit_inst(&mut self, inst: Value, rev_ops: &mut Vec) { - let (operator, operands) = match &self.f.values[inst.index()] { - &ValueDef::Operator(op, ref operands) => (op, operands), - _ => { - return; - } - }; - - // We're generating ops in reverse order. So we must first - // store value. - for i in 0..self.f.types[inst.index()].len() { - rev_ops.push(SerializedOperator::Set(inst, i)); - } - - rev_ops.push(SerializedOperator::Operator(operator)); - - // Now push the args in reverse order. - for &arg in operands.iter().rev() { - let arg = self.f.resolve_alias(arg); - self.push_value(arg, rev_ops); - } - } -} diff --git a/src/backend/structured.rs b/src/backend/structured.rs deleted file mode 100644 index ebbd4d6..0000000 --- a/src/backend/structured.rs +++ /dev/null @@ -1,391 +0,0 @@ -//! Recovery of structured control flow information. Loop nest -//! computation, block order linearization and loop/block region -//! generation. - -use fxhash::{FxHashMap, FxHashSet}; - -use crate::{cfg::CFGInfo, BlockId, FunctionBody, Value}; - -#[derive(Clone, Debug)] -pub enum Node { - Leaf(BlockId), - Loop(BlockId, Vec), -} - -impl Node { - pub fn header(&self) -> BlockId { - match self { - &Node::Leaf(block) => block, - &Node::Loop(block, ..) => block, - } - } - pub fn is_loop(&self) -> bool { - match self { - &Node::Loop(..) => true, - _ => false, - } - } - pub fn is_leaf(&self) -> bool { - match self { - &Node::Leaf(..) => true, - _ => false, - } - } -} - -pub struct LoopNest { - nodes: Vec, -} - -impl LoopNest { - pub fn compute(cfg: &CFGInfo) -> LoopNest { - // Find loop backedges: any successor edge from a higher- to - // lower-numbered block in RPO. - let mut backedges: Vec<(BlockId, BlockId)> = vec![]; - for (block_rpo, &block) in cfg.postorder.iter().rev().enumerate() { - for &succ in &cfg.block_succs[block] { - let succ_po = cfg.postorder_pos[succ] - .expect("Edge from reachable to unreachable block is impossible"); - let succ_rpo = cfg.postorder.len() - 1 - succ_po; - if succ_rpo <= block_rpo { - log::trace!("LoopNest compute: backedge from {} to {}", block, succ); - backedges.push((block, succ)); - } - } - } - - // For each backedge, find the backedge's natural loop and - // accumulate those blocks into the set of blocks in each loop - // body. - let mut loop_bodies: FxHashMap> = FxHashMap::default(); - for &(from, to) in &backedges { - assert!( - cfg.dominates(to, from), - "Irreducible CFG edge from {} to {}", - from, - to - ); - let body = loop_bodies - .entry(to) - .or_insert_with(|| FxHashSet::default()); - Self::collect_loop_body(body, to, cfg); - log::trace!("loop body for header {}: {:?}", to, body); - } - - // Now build the loop nest. - let mut nodes = vec![]; - let mut visited = FxHashSet::default(); - for &block in cfg.postorder.iter().rev() { - if visited.contains(&block) { - continue; - } - if loop_bodies.contains_key(&block) { - nodes.push(Self::loop_node(cfg, block, &loop_bodies, &mut visited)); - } else { - nodes.push(Node::Leaf(block)); - visited.insert(block); - } - } - - log::trace!("loop nest nodes: {:?}", nodes); - LoopNest { nodes } - } - - fn collect_loop_body(blocks: &mut FxHashSet, header: BlockId, cfg: &CFGInfo) { - let mut workset = vec![header]; - while let Some(block) = workset.pop() { - for &pred in &cfg.block_preds[block] { - if blocks.contains(&pred) { - continue; - } - if cfg.dominates(header, pred) { - blocks.insert(pred); - workset.push(pred); - } - } - } - } - - fn loop_node( - cfg: &CFGInfo, - header: BlockId, - loops: &FxHashMap>, - visited: &mut FxHashSet, - ) -> Node { - let mut body_blocks = loops - .get(&header) - .unwrap() - .iter() - .cloned() - .collect::>(); - body_blocks.sort_by_key(|&block| -(cfg.postorder_pos[block].unwrap() as isize)); - - let mut body_nodes = vec![]; - for block in body_blocks { - if visited.contains(&block) { - continue; - } - if block != header && loops.contains_key(&block) { - body_nodes.push(Self::loop_node(cfg, block, loops, visited)); - } else { - body_nodes.push(Node::Leaf(block)); - visited.insert(block); - } - } - - Node::Loop(header, body_nodes) - } -} - -fn compute_linear_block_pos(cfg: &CFGInfo, nest: &LoopNest) -> Vec> { - let mut next = 0; - let mut positions = vec![None; cfg.len()]; - for node in &nest.nodes { - compute_linear_block_pos_for_node(node, &mut next, &mut positions); - } - positions -} - -fn compute_linear_block_pos_for_node( - node: &Node, - next: &mut usize, - positions: &mut Vec>, -) { - match node { - &Node::Loop(_, ref subnodes) => { - for subnode in subnodes { - compute_linear_block_pos_for_node(subnode, next, positions); - } - } - &Node::Leaf(block) => { - let linear_index = *next; - *next += 1; - positions[block] = Some(linear_index); - } - } -} - -fn compute_forward_edge_targets( - cfg: &CFGInfo, - linear_block_pos: &[Option], -) -> FxHashSet { - let mut ret = FxHashSet::default(); - for block in 0..cfg.len() { - if linear_block_pos[block].is_none() { - continue; - } - let block_pos = linear_block_pos[block].unwrap(); - for &succ in &cfg.block_succs[block] { - let succ_pos = linear_block_pos[succ].unwrap(); - if succ_pos > block_pos + 1 { - ret.insert(succ); - } - } - } - ret -} - -#[derive(Clone, Debug)] -pub enum WasmRegion { - /// Block starting at the first `BlockId`, with a fallthrough/exit - /// label at the second `BlockId`. - Block(BlockId, Option, Vec), - /// Loop with a header at the given `BlockId`. - Loop(BlockId, Vec), - /// An individual basic block, just included inline (with no - /// Wasm-level structure). - Leaf(BlockId), -} - -impl WasmRegion { - pub fn header(&self) -> BlockId { - match self { - &WasmRegion::Block(block, ..) => block, - &WasmRegion::Loop(block, ..) => block, - &WasmRegion::Leaf(block) => block, - } - } - - pub fn compute(cfg: &CFGInfo, loop_nest: &LoopNest) -> WasmRegion { - assert!(!loop_nest.nodes.is_empty()); - assert!(loop_nest.nodes[0].header() == 0); - - let linear_pos = compute_linear_block_pos(cfg, loop_nest); - let forward_targets = compute_forward_edge_targets(cfg, &linear_pos); - log::trace!( - "WasmRegion::compute: forward_targets = {:?}", - forward_targets - ); - - // Enclose loop nest in a virtual loop, to handle forward - // edges in a unified way even outside any loop. - let top = Self::compute_for_node( - cfg, - &forward_targets, - &Node::Loop(BlockId::MAX, loop_nest.nodes.clone()), - ); - let subregions = match top { - WasmRegion::Loop(_, subregions) => subregions, - _ => unreachable!(), - }; - let top = WasmRegion::Block(0, None, subregions); - - log::trace!("Wasm region: {:?}", top); - top - } - - fn compute_for_node( - cfg: &CFGInfo, - forward_targets: &FxHashSet, - node: &Node, - ) -> WasmRegion { - log::trace!("WasmRegion::compute_for_node: node {:?}", node); - match node { - &Node::Leaf(block) => { - log::trace!(" -> leaf {}", block); - WasmRegion::Leaf(block) - } - &Node::Loop(block, ref subnodes) => { - // Scan subnodes and find forward-edge targets that - // are at this level of the loop nest. - let block_targets = subnodes - .iter() - .map(|n| n.header()) - .filter(|n| forward_targets.contains(&n)) - .collect::>(); - log::trace!(" -> block targets are {:?}", block_targets,); - - let mut subregions: Vec = vec![]; - for subnode in subnodes { - if subnode.header() != block && block_targets.contains(&subnode.header()) { - let subsubregions = std::mem::take(&mut subregions); - assert!(!subsubregions.is_empty()); - let first = subsubregions[0].header(); - let enclosing_block = - WasmRegion::Block(first, Some(subnode.header()), subsubregions); - subregions.push(enclosing_block); - } - - let subregion = Self::compute_for_node(cfg, forward_targets, subnode); - subregions.push(subregion); - } - - log::trace!(" -> loop header {} subregions {:?}", block, subregions); - WasmRegion::Loop(block, subregions) - } - } - } -} - -#[derive(Clone, Debug)] -pub struct BlockOrder { - pub entries: Vec, -} - -#[derive(Clone, Debug)] -pub enum BlockOrderEntry { - StartBlock(BlockId), - StartLoop(BlockId), - End, - BasicBlock(BlockId, Vec), -} - -#[derive(Clone, Debug)] -pub struct BlockOrderTarget { - pub target: BlockId, - /// `None` means fallthrough. - pub relative_branch: Option, - pub args: Vec, -} - -impl BlockOrder { - pub fn compute(f: &FunctionBody, cfg: &CFGInfo, wasm_region: &WasmRegion) -> BlockOrder { - let mut target_stack = vec![]; - let mut entries = vec![]; - Self::generate_region(f, cfg, &mut target_stack, &mut entries, wasm_region, None); - log::trace!("entries: {:?}", entries); - BlockOrder { entries } - } - - fn generate_region( - f: &FunctionBody, - cfg: &CFGInfo, - target_stack: &mut Vec, - entries: &mut Vec, - region: &WasmRegion, - fallthrough: Option, - ) { - log::trace!( - "BlockOrder::generate_region: stack {:?} region {:?} fallthrough {:?}", - target_stack, - region, - fallthrough, - ); - match region { - &WasmRegion::Block(header, _, ref subregions, ..) - | &WasmRegion::Loop(header, ref subregions) => { - let (target, is_loop) = match region { - &WasmRegion::Block(_, out, ..) => { - assert!(out.is_some() || target_stack.is_empty()); - (out, false) - } - &WasmRegion::Loop(header, ..) => (Some(header), true), - _ => unreachable!(), - }; - - if let Some(target) = target { - target_stack.push(target); - } - - if is_loop { - entries.push(BlockOrderEntry::StartLoop(header)); - } else { - entries.push(BlockOrderEntry::StartBlock(header)); - } - - for i in 0..subregions.len() { - let subregion = &subregions[i]; - let fallthrough = if i == subregions.len() - 1 { - fallthrough - } else { - Some(subregions[i + 1].header()) - }; - Self::generate_region(f, cfg, target_stack, entries, subregion, fallthrough); - } - - entries.push(BlockOrderEntry::End); - if target.is_some() { - target_stack.pop(); - } - } - - &WasmRegion::Leaf(block) => { - let mut targets = vec![]; - f.blocks[block].terminator.visit_targets(|target| { - log::trace!( - "BlockOrder::generate_region: looking for succ {} in stack {:?} fallthrough {:?}", - target.block, - target_stack, - fallthrough, - ); - let relative_branch = if Some(target.block) == fallthrough { - None - } else { - let pos = target_stack - .iter() - .position(|entry| *entry == target.block) - .expect("Malformed Wasm structured control flow"); - Some(target_stack.len() - 1 - pos) - }; - targets.push(BlockOrderTarget { - target: target.block, - relative_branch, - args: target.args.clone(), - }); - }); - entries.push(BlockOrderEntry::BasicBlock(block, targets)); - } - } - log::trace!("BlockOrder::generate_region: done with region {:?}", region); - } -} diff --git a/src/bin/waffle-util.rs b/src/bin/waffle-util.rs index 976da0a..c409e53 100644 --- a/src/bin/waffle-util.rs +++ b/src/bin/waffle-util.rs @@ -52,7 +52,7 @@ fn main() -> Result<()> { let bytes = std::fs::read(input)?; debug!("Loaded {} bytes of Wasm data", bytes.len()); let module = Module::from_wasm_bytes(&bytes[..])?; - let produced = module.to_wasm_bytes(); + let produced = module.to_wasm_bytes()?; std::fs::write(output, &produced[..])?; } } diff --git a/src/frontend.rs b/src/frontend.rs index 4afb1fc..25ab17e 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -15,8 +15,7 @@ use wasmparser::{ }; pub fn wasm_to_ir(bytes: &[u8]) -> Result> { - let mut module = Module::default(); - module.orig_bytes = bytes; + let mut module = Module::with_orig_bytes(bytes); let parser = Parser::new(0); let mut next_func = 0; for payload in parser.parse_all(bytes) { @@ -38,7 +37,7 @@ fn handle_payload<'a>( for _ in 0..reader.get_count() { let ty = reader.read()?; if let TypeDef::Func(fty) = ty { - module.signatures.push(fty); + module.frontend_add_signature(fty); } } } @@ -46,14 +45,14 @@ fn handle_payload<'a>( for _ in 0..reader.get_count() { match reader.read()?.ty { ImportSectionEntryType::Function(sig_idx) => { - module.funcs.push(FuncDecl::Import(sig_idx as SignatureId)); + module.frontend_add_func(FuncDecl::Import(sig_idx as SignatureId)); *next_func += 1; } ImportSectionEntryType::Global(ty) => { - module.globals.push(ty.content_type); + module.frontend_add_global(ty.content_type); } ImportSectionEntryType::Table(ty) => { - module.tables.push(ty.element_type); + module.frontend_add_table(ty.element_type); } _ => {} } @@ -62,36 +61,30 @@ fn handle_payload<'a>( Payload::GlobalSection(mut reader) => { for _ in 0..reader.get_count() { let global = reader.read()?; - module.globals.push(global.ty.content_type); + module.frontend_add_global(global.ty.content_type); } } Payload::TableSection(mut reader) => { for _ in 0..reader.get_count() { let table = reader.read()?; - module.tables.push(table.element_type); + module.frontend_add_table(table.element_type); } } Payload::FunctionSection(mut reader) => { for _ in 0..reader.get_count() { let sig_idx = reader.read()? as SignatureId; - module - .funcs - .push(FuncDecl::Body(sig_idx, FunctionBody::default())); + module.frontend_add_func(FuncDecl::Body(sig_idx, FunctionBody::default())); } } Payload::CodeSectionEntry(body) => { let func_idx = *next_func; *next_func += 1; - let my_sig = module.funcs[func_idx].sig(); + let my_sig = module.func(func_idx).sig(); let body = parse_body(module, my_sig, body)?; - match &mut module.funcs[func_idx] { - FuncDecl::Body(_, ref mut existing_body) => { - *existing_body = body; - } - _ => unreachable!(), - } + let existing_body = module.func_mut(func_idx).body_mut().unwrap(); + *existing_body = body; } _ => {} } @@ -106,11 +99,11 @@ fn parse_body<'a>( ) -> Result { let mut ret: FunctionBody = FunctionBody::default(); - for ¶m in &module.signatures[my_sig].params[..] { + for ¶m in &module.signature(my_sig).params[..] { ret.locals.push(param); } - ret.n_params = module.signatures[my_sig].params.len(); - for &r in &module.signatures[my_sig].returns[..] { + ret.n_params = module.signature(my_sig).params.len(); + for &r in &module.signature(my_sig).returns[..] { ret.rets.push(r); } @@ -126,14 +119,14 @@ fn parse_body<'a>( trace!( "Parsing function body: locals = {:?} sig = {:?}", ret.locals, - module.signatures[my_sig] + module.signature(my_sig) ); let mut builder = FunctionBodyBuilder::new(module, my_sig, &mut ret); builder.locals.seal_block_preds(0, &mut builder.body); builder.locals.start_block(0); - for (arg_idx, &arg_ty) in module.signatures[my_sig].params.iter().enumerate() { + for (arg_idx, &arg_ty) in module.signature(my_sig).params.iter().enumerate() { let local_idx = arg_idx as LocalId; let value = builder.body.add_value(ValueDef::Arg(arg_idx), vec![arg_ty]); trace!("defining local {} to value {}", local_idx, value); @@ -141,7 +134,7 @@ fn parse_body<'a>( builder.locals.set(local_idx, value); } - let n_args = module.signatures[my_sig].params.len(); + let n_args = module.signature(my_sig).params.len(); for (offset, local_ty) in locals.into_iter().enumerate() { let local_idx = (n_args + offset) as u32; builder.locals.declare(local_idx, local_ty); @@ -517,7 +510,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { }; // Push initial implicit Block. - let results = module.signatures[my_sig].returns.to_vec(); + let results = module.signature(my_sig).returns.to_vec(); let out = ret.body.add_block(); ret.add_block_params(out, &results[..]); ret.ctrl_stack.push(Frame::Block { @@ -997,7 +990,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { } wasmparser::Operator::Return => { - let retvals = self.pop_n(self.module.signatures[self.my_sig].returns.len()); + let retvals = self.pop_n(self.module.signature(self.my_sig).returns.len()); self.emit_ret(&retvals[..]); } @@ -1019,7 +1012,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { TypeOrFuncType::Type(Type::EmptyBlockType) => (vec![], vec![]), TypeOrFuncType::Type(ret_ty) => (vec![], vec![ret_ty]), TypeOrFuncType::FuncType(sig_idx) => { - let sig = &self.module.signatures[sig_idx as SignatureId]; + let sig = &self.module.signature(sig_idx as SignatureId); ( Vec::from(sig.params.clone()), Vec::from(sig.returns.clone()), diff --git a/src/ir.rs b/src/ir.rs index 594d437..58e8206 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,7 +1,9 @@ //! Intermediate representation for Wasm. +use crate::{backend, backend::binaryen}; use crate::{frontend, Operator}; use anyhow::Result; +use fxhash::FxHashSet; use wasmparser::{FuncType, Type}; pub type SignatureId = usize; @@ -17,11 +19,53 @@ pub const INVALID_BLOCK: BlockId = usize::MAX; #[derive(Clone, Debug, Default)] pub struct Module<'a> { - pub orig_bytes: &'a [u8], - pub funcs: Vec, - pub signatures: Vec, - pub globals: Vec, - pub tables: Vec, + orig_bytes: &'a [u8], + funcs: Vec, + signatures: Vec, + globals: Vec, + tables: Vec, + + dirty_funcs: FxHashSet, +} + +impl<'a> Module<'a> { + pub(crate) fn with_orig_bytes(orig_bytes: &'a [u8]) -> Module<'a> { + let mut m = Module::default(); + m.orig_bytes = orig_bytes; + m + } +} + +impl<'a> Module<'a> { + pub fn func<'b>(&'b self, id: FuncId) -> &'b FuncDecl { + &self.funcs[id] + } + pub fn func_mut<'b>(&'b mut self, id: FuncId) -> &'b mut FuncDecl { + self.dirty_funcs.insert(id); + &mut self.funcs[id] + } + pub fn signature<'b>(&'b self, id: SignatureId) -> &'b FuncType { + &self.signatures[id] + } + pub fn global_ty(&self, id: GlobalId) -> Type { + self.globals[id as usize] + } + pub fn table_ty(&self, id: TableId) -> Type { + self.tables[id as usize] + } + + pub(crate) fn frontend_add_signature(&mut self, ty: FuncType) { + self.signatures.push(ty); + } + pub(crate) fn frontend_add_func(&mut self, body: FuncDecl) { + self.funcs.push(body); + } + pub(crate) fn frontend_add_table(&mut self, ty: Type) { + self.tables.push(ty); + } + pub(crate) fn frontend_add_global(&mut self, ty: Type) { + self.globals.push(ty); + } } #[derive(Clone, Debug)] @@ -37,6 +81,20 @@ impl FuncDecl { FuncDecl::Body(sig, ..) => *sig, } } + + pub fn body(&self) -> Option<&FunctionBody> { + match self { + FuncDecl::Body(_, body) => Some(body), + _ => None, + } + } + + pub fn body_mut(&mut self) -> Option<&mut FunctionBody> { + match self { + FuncDecl::Body(_, body) => Some(body), + _ => None, + } + } } #[derive(Clone, Debug, Default)] @@ -457,7 +515,15 @@ impl<'a> Module<'a> { frontend::wasm_to_ir(bytes) } - pub fn to_wasm_bytes(&self) -> Vec { - todo!("use Binaryen") + pub fn to_wasm_bytes(&self) -> Result> { + let binaryen_module = binaryen::Module::read(self.orig_bytes)?; + for &func in &self.dirty_funcs { + if let Some(body) = self.func(func).body() { + let mut binaryen_func = binaryen_module.func(func); + let binaryen_expr = backend::lower::generate_body(self, body); + binaryen_func.set_body(binaryen_expr); + } + } + binaryen_module.write() } } diff --git a/src/op_traits.rs b/src/op_traits.rs index 263d859..51f3c28 100644 --- a/src/op_traits.rs +++ b/src/op_traits.rs @@ -16,15 +16,15 @@ pub fn op_inputs( &Operator::Unreachable | &Operator::Nop => Ok(vec![]), &Operator::Call { function_index } => { - let sig = module.funcs[function_index as usize].sig(); - Ok(Vec::from(module.signatures[sig].params.clone())) + let sig = module.func(function_index).sig(); + Ok(Vec::from(module.signature(sig).params.clone())) } &Operator::CallIndirect { index, .. } => { - let mut params = module.signatures[index as usize].params.to_vec(); + let mut params = module.signature(index).params.to_vec(); params.push(Type::I32); Ok(params) } - &Operator::Return => Ok(Vec::from(module.signatures[my_sig].returns.clone())), + &Operator::Return => Ok(Vec::from(module.signature(my_sig).returns.clone())), &Operator::LocalSet { local_index } | &Operator::LocalTee { local_index } => { Ok(vec![my_locals[local_index as usize]]) @@ -38,7 +38,7 @@ pub fn op_inputs( &Operator::TypedSelect { ty } => Ok(vec![ty, ty, Type::I32]), &Operator::GlobalGet { .. } => Ok(vec![]), - &Operator::GlobalSet { global_index } => Ok(vec![module.globals[global_index as usize]]), + &Operator::GlobalSet { global_index } => Ok(vec![module.global_ty(global_index)]), Operator::I32Load { .. } | Operator::I64Load { .. } @@ -216,7 +216,7 @@ pub fn op_inputs( Operator::I32ReinterpretF32 => Ok(vec![Type::F32]), Operator::I64ReinterpretF64 => Ok(vec![Type::F64]), Operator::TableGet { .. } => Ok(vec![Type::I32]), - Operator::TableSet { table } => Ok(vec![Type::I32, module.tables[*table as usize]]), + Operator::TableSet { table } => Ok(vec![Type::I32, module.table_ty(*table)]), Operator::TableGrow { .. } => Ok(vec![Type::I32]), Operator::TableSize { .. } => Ok(vec![]), Operator::MemorySize { .. } => Ok(vec![]), @@ -234,11 +234,11 @@ pub fn op_outputs( &Operator::Unreachable | &Operator::Nop => Ok(vec![]), &Operator::Call { function_index } => { - let sig = module.funcs[function_index as usize].sig(); - Ok(Vec::from(module.signatures[sig].returns.clone())) + let sig = module.func(function_index).sig(); + Ok(Vec::from(module.signature(sig).returns.clone())) } &Operator::CallIndirect { index, .. } => { - Ok(Vec::from(module.signatures[index as usize].returns.clone())) + Ok(Vec::from(module.signature(index).returns.clone())) } &Operator::Return => Ok(vec![]), &Operator::LocalSet { .. } => Ok(vec![]), @@ -251,7 +251,7 @@ pub fn op_outputs( Ok(vec![val_ty]) } &Operator::TypedSelect { ty } => Ok(vec![ty]), - &Operator::GlobalGet { global_index } => Ok(vec![module.globals[global_index as usize]]), + &Operator::GlobalGet { global_index } => Ok(vec![module.global_ty(global_index)]), &Operator::GlobalSet { .. } => Ok(vec![]), Operator::I32Load { .. } @@ -425,7 +425,7 @@ pub fn op_outputs( Operator::F64ReinterpretI64 => Ok(vec![Type::F64]), Operator::I32ReinterpretF32 => Ok(vec![Type::I32]), Operator::I64ReinterpretF64 => Ok(vec![Type::I64]), - Operator::TableGet { table } => Ok(vec![module.tables[*table as usize]]), + Operator::TableGet { table } => Ok(vec![module.table_ty(*table)]), Operator::TableSet { .. } => Ok(vec![]), Operator::TableGrow { .. } => Ok(vec![]), Operator::TableSize { .. } => Ok(vec![Type::I32]),