This commit is contained in:
Chris Fallin 2022-10-26 22:15:15 -07:00
parent 2214c1701b
commit 1e26c0aaa4
11 changed files with 148 additions and 1356 deletions

View file

@ -23,6 +23,20 @@ impl Module {
Ok(Module(ptr)) Ok(Module(ptr))
} }
pub fn write(&self) -> Result<Vec<u8>> {
let result = unsafe { BinaryenModuleAllocateAndWrite(self.0, std::ptr::null()) };
if result.binary.is_null() {
bail!("Failed to serialize module");
}
let slice = unsafe {
std::slice::from_raw_parts(
result.binary as *const c_void as *const u8,
result.binary_bytes as usize,
)
};
Ok(slice.to_vec())
}
pub fn num_funcs(&self) -> usize { pub fn num_funcs(&self) -> usize {
unsafe { BinaryenGetNumFunctions(self.0) as usize } unsafe { BinaryenGetNumFunctions(self.0) as usize }
} }
@ -88,6 +102,12 @@ impl Function {
} }
} }
pub fn set_body(&mut self, body: Expression) {
unsafe {
BinaryenFunctionSetBody(self.0, body.1);
}
}
pub fn name(&self) -> &str { pub fn name(&self) -> &str {
let s = unsafe { CStr::from_ptr(BinaryenFunctionGetName(self.1)) }; let s = unsafe { CStr::from_ptr(BinaryenFunctionGetName(self.1)) };
s.to_str().unwrap() s.to_str().unwrap()
@ -622,14 +642,35 @@ type BinaryenFunction = *const c_void;
type BinaryenExpression = *const c_void; type BinaryenExpression = *const c_void;
type BinaryenExport = *const c_void; type BinaryenExport = *const c_void;
#[repr(C)]
struct BinaryenModuleAllocateAndWriteResult {
binary: *mut c_void,
binary_bytes: libc::size_t,
source_map: *mut c_char,
}
impl Drop for BinaryenModuleAllocateAndWriteResult {
fn drop(&mut self) {
unsafe {
libc::free(self.binary);
libc::free(self.source_map as *mut c_void);
}
}
}
#[link(name = "binaryen")] #[link(name = "binaryen")]
extern "C" { extern "C" {
fn BinaryenModuleRead(data: *const u8, len: usize) -> BinaryenModule; fn BinaryenModuleRead(data: *const u8, len: usize) -> BinaryenModule;
fn BinaryenModuleDispose(ptr: BinaryenModule); fn BinaryenModuleDispose(ptr: BinaryenModule);
fn BinaryenModuleAllocateAndWrite(
ptr: BinaryenModule,
sourceMapUrl: *const c_char,
) -> BinaryenModuleAllocateAndWriteResult;
fn BinaryenGetNumFunctions(ptr: BinaryenModule) -> u32; fn BinaryenGetNumFunctions(ptr: BinaryenModule) -> u32;
fn BinaryenGetFunctionByIndex(ptr: BinaryenModule, index: u32) -> BinaryenFunction; fn BinaryenGetFunctionByIndex(ptr: BinaryenModule, index: u32) -> BinaryenFunction;
fn BinaryenGetFunction(ptr: BinaryenModule, name: *const c_char) -> BinaryenFunction; fn BinaryenGetFunction(ptr: BinaryenModule, name: *const c_char) -> BinaryenFunction;
fn BinaryenFunctionGetBody(ptr: BinaryenFunction) -> BinaryenExpression; fn BinaryenFunctionGetBody(ptr: BinaryenFunction) -> BinaryenExpression;
fn BinaryenFunctionSetBody(ptr: BinaryenFunction, body: BinaryenExpression);
fn BinaryenFunctionGetName(ptr: BinaryenFunction) -> *const c_char; fn BinaryenFunctionGetName(ptr: BinaryenFunction) -> *const c_char;
fn BinaryenGetExport(ptr: BinaryenModule, name: *const c_char) -> BinaryenExport; fn BinaryenGetExport(ptr: BinaryenModule, name: *const c_char) -> BinaryenExport;
fn BinaryenGetNumExports(ptr: BinaryenModule) -> u32; fn BinaryenGetNumExports(ptr: BinaryenModule) -> u32;

View file

@ -1,159 +0,0 @@
//! Final Wasm operator sequence production.
use super::{Locations, SerializedBlockTarget, SerializedBody, SerializedOperator};
use crate::{ops::ty_to_valty, FunctionBody};
use std::borrow::Cow;
use wasm_encoder::BlockType;
#[derive(Clone, Debug)]
pub struct Wasm {
pub operators: Vec<wasm_encoder::Instruction<'static>>,
pub locals: Vec<wasm_encoder::ValType>,
}
struct WasmContext<'a> {
wasm: &'a mut Wasm,
}
impl<'a> WasmContext<'a> {
fn translate(&mut self, op: &SerializedOperator, locations: &Locations) {
log::trace!("translate: {:?}", op);
match op {
SerializedOperator::StartBlock { .. } => {
self.wasm
.operators
.push(wasm_encoder::Instruction::Block(BlockType::Empty));
}
SerializedOperator::StartLoop { .. } => {
self.wasm
.operators
.push(wasm_encoder::Instruction::Loop(BlockType::Empty));
}
SerializedOperator::End => {
self.wasm.operators.push(wasm_encoder::Instruction::End);
}
SerializedOperator::GetArg(index) => {
self.wasm
.operators
.push(wasm_encoder::Instruction::LocalGet(*index as u32));
}
SerializedOperator::Operator(op) => {
self.wasm.operators.push(op.clone().into());
}
SerializedOperator::Br(ref target) => {
self.translate_target(0, target, locations);
}
SerializedOperator::BrIf {
ref if_true,
ref if_false,
} => {
self.wasm
.operators
.push(wasm_encoder::Instruction::If(BlockType::Empty));
self.translate_target(1, if_true, locations);
self.wasm.operators.push(wasm_encoder::Instruction::Else);
self.translate_target(1, if_false, locations);
self.wasm.operators.push(wasm_encoder::Instruction::End);
}
SerializedOperator::BrTable {
ref index_ops,
ref targets,
ref default,
} => {
for _ in 0..(targets.len() + 2) {
self.wasm.operators.push(wasm_encoder::Instruction::Block(
wasm_encoder::BlockType::Empty,
));
}
let br_table_targets = (1..=targets.len()).map(|i| i as u32).collect::<Vec<_>>();
for op in index_ops {
self.translate(op, locations);
}
self.wasm.operators.push(wasm_encoder::Instruction::BrTable(
Cow::Owned(br_table_targets),
0,
));
self.wasm.operators.push(wasm_encoder::Instruction::End);
self.translate_target(targets.len() + 1, default, locations);
self.wasm.operators.push(wasm_encoder::Instruction::End);
for i in 0..targets.len() {
self.translate_target(targets.len() - i, &targets[i], locations);
self.wasm.operators.push(wasm_encoder::Instruction::End);
}
}
SerializedOperator::Get(v, i) => {
let loc = *locations.locations.get(&(*v, *i)).unwrap();
self.wasm
.operators
.push(wasm_encoder::Instruction::LocalGet(loc));
}
SerializedOperator::Set(v, i) => {
let loc = *locations.locations.get(&(*v, *i)).unwrap();
self.wasm
.operators
.push(wasm_encoder::Instruction::LocalSet(loc));
}
SerializedOperator::Tee(v, i) => {
let loc = *locations.locations.get(&(*v, *i)).unwrap();
self.wasm
.operators
.push(wasm_encoder::Instruction::LocalTee(loc));
}
}
}
fn translate_target(
&mut self,
extra_blocks: usize,
target: &SerializedBlockTarget,
locations: &Locations,
) {
log::trace!("translate_target: {:?}", target);
match target {
&SerializedBlockTarget::Fallthrough(ref ops) => {
for op in ops {
self.translate(op, locations);
}
if extra_blocks > 0 {
self.wasm
.operators
.push(wasm_encoder::Instruction::Br((extra_blocks - 1) as u32));
}
}
&SerializedBlockTarget::Branch(branch, ref ops) => {
for op in ops {
self.translate(op, locations);
}
self.wasm.operators.push(wasm_encoder::Instruction::Br(
(branch + extra_blocks) as u32,
));
}
}
}
}
pub fn produce_func_wasm(f: &FunctionBody, body: &SerializedBody, locations: &Locations) -> Wasm {
let mut wasm = Wasm {
operators: vec![],
locals: vec![],
};
wasm.locals
.extend(f.locals.iter().skip(f.n_params).map(|ty| ty_to_valty(*ty)));
wasm.locals
.extend(locations.new_locals.iter().map(|ty| ty_to_valty(*ty)));
let mut ctx = WasmContext { wasm: &mut wasm };
for operator in &body.operators {
ctx.translate(operator, locations);
}
// There is always an explicit Return before this point. This
// allows us to avoid matching the return types in our stack
// discipline / outer block type.
wasm.operators.push(wasm_encoder::Instruction::Unreachable);
wasm.operators.push(wasm_encoder::Instruction::End);
wasm
}

View file

@ -1,214 +0,0 @@
//! Location assignment (pseudo-regalloc) for SSA values onto
//! locals/operand-stack values.
use crate::{FunctionBody, LocalId, Value};
use fxhash::FxHashMap;
use super::{SerializedBody, SerializedOperator};
#[derive(Debug)]
pub struct Locations {
pub locations: FxHashMap<(Value, usize), LocalId>,
pub new_locals: Vec<wasmparser::Type>,
}
#[derive(Debug)]
struct Frame {
is_loop: bool,
start_loc: usize,
use_at_end: Vec<(Value, usize)>,
}
pub struct Allocator<'a> {
locations: &'a mut Locations,
f: &'a FunctionBody,
active_frames: Vec<Frame>,
spans: FxHashMap<(Value, usize), ValueSpan>,
starts: Vec<ValueSpan>,
ends: Vec<ValueSpan>,
freelist: FxHashMap<wasmparser::Type, Vec<LocalId>>,
}
#[derive(Clone, Copy, Debug)]
pub struct ValueSpan {
value: Value,
multi_value_index: usize,
/// First index in serialized body at which value is live.
start: usize,
/// First index in serialized body at which value is no longer live.
end: usize,
}
impl ValueSpan {
fn len(&self) -> usize {
self.end - self.start
}
}
impl Locations {
pub fn compute(f: &FunctionBody, body: &SerializedBody) -> Locations {
let mut locations = Locations {
locations: FxHashMap::default(),
new_locals: vec![],
};
let mut allocator = Allocator {
locations: &mut locations,
f,
active_frames: vec![],
freelist: FxHashMap::default(),
spans: FxHashMap::default(),
starts: vec![],
ends: vec![],
};
allocator.compute_spans(&body.operators[..]);
locations
}
}
impl<'a> Allocator<'a> {
fn handle_op(&mut self, location: usize, op: &SerializedOperator) {
let mut reads = vec![];
let mut writes = vec![];
match op {
&SerializedOperator::StartBlock { .. } => {
self.active_frames.push(Frame {
is_loop: false,
start_loc: location,
use_at_end: vec![],
});
}
&SerializedOperator::StartLoop { .. } => {
self.active_frames.push(Frame {
is_loop: true,
start_loc: location,
use_at_end: vec![],
});
}
&SerializedOperator::End { .. } => {
let frame = self.active_frames.pop().unwrap();
if frame.is_loop {
reads.extend(frame.use_at_end);
}
}
_ => {}
}
op.visit_value_locals(
&mut |value, index| {
reads.push((value, index));
},
&mut |value, index| {
writes.push((value, index));
},
);
log::trace!(
"handle_op: at location {} op {:?} reads {:?} writes {:?}",
location,
op,
reads,
writes
);
for (value, index) in reads {
let span = match self.spans.get_mut(&(value, index)) {
Some(span) => span,
None => {
panic!("Read before any write to local ({},{})", value, index);
}
};
span.end = location + 1;
log::trace!(" -> span for {}: {:?}", value, span);
for frame in &mut self.active_frames {
if frame.is_loop && span.start < frame.start_loc {
frame.use_at_end.push((value, index));
}
}
}
for (value, index) in writes {
let span = self.spans.entry((value, index)).or_insert(ValueSpan {
value,
multi_value_index: index,
start: location,
end: location + 1,
});
span.end = location + 1;
log::trace!(" -> span for {}: {:?}", value, span);
}
}
fn compute_spans(&mut self, operators: &[SerializedOperator]) {
// For each operator, get the reads and writes and construct spans.
for (index, operator) in operators.iter().enumerate() {
self.handle_op(index, operator);
}
// Build lists of spans sorted by start and end.
self.starts = self.spans.values().cloned().collect();
self.ends = self.starts.clone();
self.starts.sort_unstable_by_key(|span| span.start);
self.ends.sort_unstable_by_key(|span| span.end);
// Finally, assign locals to (value, index) pairs.
let mut start_idx = 0;
let mut end_idx = 0;
while start_idx < self.starts.len() || end_idx < self.ends.len() {
if start_idx < self.starts.len() && end_idx < self.ends.len() {
if self.ends[end_idx].end <= self.starts[start_idx].start {
let span = self.ends[end_idx];
end_idx += 1;
self.handle_end(&span);
} else {
let span = self.starts[start_idx];
start_idx += 1;
self.handle_start(&span);
}
} else if start_idx < self.starts.len() {
let span = self.starts[start_idx];
start_idx += 1;
self.handle_start(&span);
} else {
let span = self.ends[end_idx];
end_idx += 1;
self.handle_end(&span);
}
}
}
fn handle_end(&mut self, span: &ValueSpan) {
let local = self
.locations
.locations
.get(&(span.value, span.multi_value_index))
.cloned()
.unwrap();
let ty = self.f.types[span.value.index()][span.multi_value_index];
self.freelist
.entry(ty)
.or_insert_with(|| vec![])
.push(local);
}
fn handle_start(&mut self, span: &ValueSpan) {
let ty = self.f.types[span.value.index()][span.multi_value_index];
if let Some(list) = self.freelist.get_mut(&ty) {
if let Some(local) = list.pop() {
self.locations
.locations
.insert((span.value, span.multi_value_index), local);
return;
}
}
let new_local = self.f.locals.len() + self.locations.new_locals.len();
self.locations.new_locals.push(ty);
self.locations
.locations
.insert((span.value, span.multi_value_index), new_local as u32);
}
}

View file

@ -1,4 +1,4 @@
//! Backend: IR to Wasm. //! Backend: IR to Wasm.
mod binaryen; pub mod binaryen;
pub use binaryen::*; pub mod lower;

View file

@ -1,233 +0,0 @@
//! Op scheduling.
use fxhash::FxHashMap;
use super::UseCountAnalysis;
use crate::{cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Value, ValueDef};
#[derive(Clone, Debug, Default)]
pub struct Schedule {
/// Output: location at which to compute each value.
pub location: Vec</* Value, */ Location>,
/// Output: for each toplevel value, all values that are computed
/// after it is.
pub compute_after_value: FxHashMap<Value, Vec<Value>>,
/// Output: all values ready at the top of a given block.
pub compute_at_top_of_block: FxHashMap<BlockId, Vec<Value>>,
}
pub struct SchedulerContext<'a> {
/// The schedule we are constructing.
schedule: &'a mut Schedule,
/// In-progress state: for each value, the values that have one
/// more ready input once that value is computed.
waiting_on_value: FxHashMap<Value, Vec<Value>>,
/// In-progress state: for each value, how many inputs need to
/// become ready.
remaining_inputs: FxHashMap<Value, usize>,
/// In-progress state: all values that are ready to be scheduled.
ready: Vec<Value>,
/// Input context: CFG.
cfg: &'a CFGInfo,
/// Input context: function body.
f: &'a FunctionBody,
}
/// Locations are denoted by top-level values (those in `insts`),
/// which are those with a side-effect; the sea-of-nodes
/// representation for all other value nodes allows them to be
/// computed anywhere dominated by all operands and that dominates all
/// uses, so we have significant flexibility. We denote a location as
/// "after a toplevel", then in the second pass where we actually
/// generate operators according to stack discipline, we resolve the
/// order for all values at a given toplevel.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Location {
/// At a separate top-level location.
Toplevel,
/// After a given value.
After(Value),
/// At the top of a given block.
BlockTop(BlockId),
/// Not yet scheduled.
None,
}
impl Schedule {
pub fn compute(f: &FunctionBody, cfg: &CFGInfo, uses: &UseCountAnalysis) -> Self {
let mut schedule = Schedule::default();
schedule.location = vec![Location::None; f.values.len()];
log::trace!("f: {:?}", f);
log::trace!("cfg: {:?}", cfg);
log::trace!("uses: {:?}", uses);
let mut ctx = SchedulerContext {
schedule: &mut schedule,
f,
cfg,
waiting_on_value: FxHashMap::default(),
remaining_inputs: FxHashMap::default(),
ready: vec![],
};
// Prepare the "waiting on value", "remaining inputs", and
// "ready" vectors.
for (value, value_def) in f.values() {
if uses.use_count[value.index()] == 0 {
continue;
}
if uses.toplevel.contains(&value) {
continue;
}
match value_def {
&ValueDef::Operator(op, ref operands) => {
if operands.len() == 0 {
if !op_rematerialize(&op) {
log::trace!("immediately ready: v{}", value.index());
ctx.ready.push(value);
}
} else {
let mut remaining = 0;
for &input in operands {
let input = f.resolve_alias(input);
match &f.values[input.index()] {
&ValueDef::Operator(ref op, ..) if op_rematerialize(op) => {
continue;
}
&ValueDef::Arg(..) => {
continue;
}
_ => {}
}
log::trace!("v{} waiting on v{}", value.index(), input.index());
ctx.waiting_on_value
.entry(input)
.or_insert_with(|| vec![])
.push(value);
remaining += 1;
}
if remaining > 0 {
ctx.remaining_inputs.insert(value, remaining);
} else {
ctx.ready.push(value);
}
}
}
&ValueDef::Alias(v) | &ValueDef::PickOutput(v, _) => {
let v = f.resolve_alias(v);
ctx.remaining_inputs.insert(value, 1);
ctx.waiting_on_value
.entry(v)
.or_insert_with(|| vec![])
.push(value);
}
_ => {}
}
}
// Traverse blocks in RPO. When we schedule a given op, we've
// already scheduled all of its operands, so we can find the
// right place for it without any sort of backtracking or
// fixpoint convergence.
//
// - Values in `insts` (toplevel operations)
// are scheduled at their locations. All side-effecting ops
// are in this category, and hence never experience
// code-motion relative to other side-effecting ops or
// control flow.
//
// - Otherwise, values are scheduled after their last operand
// is ready. All operands must have been computed by the
// time we reach a given operator in RPO, and each operand's
// scheduled site must dominate the current location
// (toplevel value). Because the dominance relation forms a
// tree structure (the domtree), for any two operand def
// sites X and Y to the current location L, given X dom L
// and Y dom L, either X dom Y or Y dom X. Thus, consider
// the current-best and each new operand in pairs, and pick
// the one that is dominated by the other.
for &block in cfg.postorder.iter().rev() {
for &(_, param) in &f.blocks[block].params {
log::trace!("block{}: param v{}", block, param.index());
ctx.wake_dependents(param);
}
ctx.sched_ready_at_block_top(block);
for &inst in &f.blocks[block].insts {
log::trace!("block{}: toplevel v{}", block, inst.index());
ctx.sched_toplevel(inst);
ctx.sched_ready_after_value(inst);
}
}
schedule
}
}
impl<'a> SchedulerContext<'a> {
fn sched_toplevel(&mut self, v: Value) {
log::trace!("sched_toplevel: v{}", v.index());
assert_eq!(self.schedule.location[v.index()], Location::None);
self.schedule.location[v.index()] = Location::Toplevel;
self.wake_dependents(v);
}
fn sched_ready_after_value(&mut self, v: Value) {
log::trace!("sched_ready_after_value: toplevel v{}", v.index());
while !self.ready.is_empty() {
for ready in std::mem::take(&mut self.ready) {
log::trace!(
"sched_ready_after_value: toplevel v{} -> v{} now ready",
v.index(),
ready.index()
);
self.schedule.location[ready.index()] = Location::After(v);
self.schedule
.compute_after_value
.entry(v)
.or_insert_with(|| vec![])
.push(ready);
self.wake_dependents(ready);
}
}
}
fn sched_ready_at_block_top(&mut self, block: BlockId) {
log::trace!("ready_at_block_top: block{}", block);
while !self.ready.is_empty() {
for ready in std::mem::take(&mut self.ready) {
log::trace!(
"ready_at_block_top: block{} -> ready: v{}",
block,
ready.index()
);
self.schedule.location[ready.index()] = Location::BlockTop(block);
self.schedule
.compute_at_top_of_block
.entry(block)
.or_insert_with(|| vec![])
.push(ready);
self.wake_dependents(ready);
}
}
}
fn wake_dependents(&mut self, v: Value) {
log::trace!("wake_dependents: v{}", v.index());
let dependents = self.waiting_on_value.remove(&v).unwrap_or_default();
for dependent in dependents {
log::trace!(" -> v{} wakes dependent v{}", v.index(), dependent.index(),);
let remaining = self.remaining_inputs.get_mut(&dependent).unwrap();
*remaining -= 1;
log::trace!(" -> remaining now {}", *remaining);
if *remaining == 0 {
self.remaining_inputs.remove(&dependent);
self.ready.push(dependent);
self.wake_dependents(dependent);
}
}
}
}

View file

@ -1,311 +0,0 @@
//! Serialization of the sea-of-nodes IR using a BlockOrder
//! Wasm-structured-control-flow result into actual order of operators
//! in Wasm function body. Contains everything needed to emit Wasm
//! except for value locations (and corresponding local spill/reloads).
use super::structured::{BlockOrder, BlockOrderEntry};
use crate::{
cfg::CFGInfo, op_traits::op_rematerialize, BlockId, FunctionBody, Operator, Terminator, Value,
ValueDef,
};
/// A Wasm function body with a serialized sequence of operators that
/// mirror Wasm opcodes in every way *except* for locals corresponding
/// to SSA values. This is a sort of "pre-regalloc" representation of
/// the final code.
#[derive(Clone, Debug)]
pub struct SerializedBody {
pub(crate) operators: Vec<SerializedOperator>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum SerializedBlockTarget {
Fallthrough(Vec<SerializedOperator>),
Branch(usize, Vec<SerializedOperator>),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum SerializedOperator {
StartBlock {
header: BlockId,
},
StartLoop {
header: BlockId,
},
Br(SerializedBlockTarget),
BrIf {
if_true: SerializedBlockTarget,
if_false: SerializedBlockTarget,
},
BrTable {
index_ops: Vec<SerializedOperator>,
targets: Vec<SerializedBlockTarget>,
default: SerializedBlockTarget,
},
/// Compute the given value. Stack discipline will be maintained:
/// all operands will be computed or fetched via `Get` and all
/// produced results will be used directly or stored via `Set`.
Operator(Operator),
/// Get the given value from the local corresponding to the
/// `Value`'s n'th result.
Get(Value, usize),
/// Set the local corresponding to the `Value`'s n'th result,
/// consuming the value on the stack.
Set(Value, usize),
/// Set the value, like `Set`, but without consuming it from the
/// stack.
Tee(Value, usize),
/// Get the given function argument.
GetArg(usize),
End,
}
impl SerializedOperator {
pub fn visit_value_locals<R: FnMut(Value, usize), W: FnMut(Value, usize)>(
&self,
r: &mut R,
w: &mut W,
) {
match self {
&SerializedOperator::Br(ref target) => {
target.visit_value_locals(r, w);
}
&SerializedOperator::BrIf {
ref if_true,
ref if_false,
} => {
if_true.visit_value_locals(r, w);
if_false.visit_value_locals(r, w);
}
&SerializedOperator::BrTable {
ref index_ops,
ref default,
ref targets,
} => {
for index_op in index_ops {
index_op.visit_value_locals(r, w);
}
default.visit_value_locals(r, w);
for target in targets {
target.visit_value_locals(r, w);
}
}
&SerializedOperator::Get(v, i) => {
r(v, i);
}
&SerializedOperator::Set(v, i) | &SerializedOperator::Tee(v, i) => {
w(v, i);
}
&SerializedOperator::StartBlock { .. } | &SerializedOperator::StartLoop { .. } => {}
&SerializedOperator::GetArg(..)
| &SerializedOperator::Operator(..)
| &SerializedOperator::End => {}
}
}
}
impl SerializedBlockTarget {
fn visit_value_locals<R: FnMut(Value, usize), W: FnMut(Value, usize)>(
&self,
r: &mut R,
w: &mut W,
) {
match self {
&SerializedBlockTarget::Branch(_, ref ops)
| &SerializedBlockTarget::Fallthrough(ref ops) => {
for op in ops {
op.visit_value_locals(r, w);
}
}
}
}
}
struct SerializedBodyContext<'a> {
f: &'a FunctionBody,
cfg: &'a CFGInfo,
operators: Vec<SerializedOperator>,
}
impl SerializedBody {
pub fn compute(f: &FunctionBody, cfg: &CFGInfo, order: &BlockOrder) -> SerializedBody {
if log::log_enabled!(log::Level::Trace) {
log::trace!("values:");
for value in 0..f.values.len() {
log::trace!(" * v{}: {:?}", value, f.values[value]);
}
for block in 0..f.blocks.len() {
log::trace!("block{}:", block);
for &inst in &f.blocks[block].insts {
log::trace!(" -> v{}", inst.index());
}
log::trace!(" -> terminator: {:?}", f.blocks[block].terminator);
}
}
let mut ctx = SerializedBodyContext {
f,
cfg,
operators: vec![],
};
for entry in &order.entries {
ctx.compute_entry(entry);
}
SerializedBody {
operators: ctx.operators,
}
}
}
impl<'a> SerializedBodyContext<'a> {
fn compute_entry(&mut self, entry: &BlockOrderEntry) {
match entry {
&BlockOrderEntry::StartBlock(header) | &BlockOrderEntry::StartLoop(header) => {
let is_loop = match entry {
&BlockOrderEntry::StartLoop(..) => true,
_ => false,
};
if is_loop {
self.operators
.push(SerializedOperator::StartLoop { header });
} else {
self.operators
.push(SerializedOperator::StartBlock { header });
}
}
&BlockOrderEntry::End => {
self.operators.push(SerializedOperator::End);
}
&BlockOrderEntry::BasicBlock(block, ref targets) => {
log::trace!("BlockOrderEntry: block{}", block);
// Compute insts' values in sequence.
for &inst in &self.f.blocks[block].insts {
let mut rev_ops = vec![];
self.emit_inst(inst, &mut rev_ops);
rev_ops.reverse();
self.operators.extend(rev_ops);
}
// For each BlockOrderTarget, compute a SerializedBlockTarget.
let targets = targets
.iter()
.map(|target| {
log::trace!("target: {:?}", target);
let mut rev_ops = vec![];
// Store into block param values.
for &(_, value) in &self.f.blocks[target.target].params {
rev_ops.push(SerializedOperator::Set(value, 0));
}
// Load from branch operator's args.
for &value in target.args.iter().rev() {
let value = self.f.resolve_alias(value);
self.push_value(value, &mut rev_ops);
}
rev_ops.reverse();
log::trace!(" -> ops: {:?}", rev_ops);
match target.relative_branch {
Some(branch) => SerializedBlockTarget::Branch(branch, rev_ops),
None => SerializedBlockTarget::Fallthrough(rev_ops),
}
})
.collect::<Vec<_>>();
// Finally, generate branch ops.
match &self.f.blocks[block].terminator {
&Terminator::Br { .. } => {
let target = targets.into_iter().next().unwrap();
self.operators.push(SerializedOperator::Br(target));
}
&Terminator::CondBr { cond, .. } => {
let mut iter = targets.into_iter();
let if_true = iter.next().unwrap();
let if_false = iter.next().unwrap();
let mut rev_ops = vec![];
let cond = self.f.resolve_alias(cond);
self.push_value(cond, &mut rev_ops);
rev_ops.reverse();
self.operators.extend(rev_ops);
self.operators
.push(SerializedOperator::BrIf { if_true, if_false });
}
&Terminator::Select { value, .. } => {
let mut iter = targets.into_iter();
let default = iter.next().unwrap();
let targets = iter.collect::<Vec<_>>();
let mut rev_ops = vec![];
let value = self.f.resolve_alias(value);
self.push_value(value, &mut rev_ops);
rev_ops.reverse();
self.operators.push(SerializedOperator::BrTable {
index_ops: rev_ops,
targets,
default,
});
}
&Terminator::Return { ref values, .. } => {
let mut rev_ops = vec![];
for &value in values.iter().rev() {
self.push_value(value, &mut rev_ops);
}
rev_ops.reverse();
self.operators.extend(rev_ops.into_iter());
self.operators
.push(SerializedOperator::Operator(Operator::Return));
}
&Terminator::None => {
self.operators
.push(SerializedOperator::Operator(Operator::Unreachable));
}
}
}
}
}
fn push_value(&mut self, v: Value, rev_ops: &mut Vec<SerializedOperator>) {
let v = self.f.resolve_alias(v);
match &self.f.values[v.index()] {
&ValueDef::PickOutput(v, i) => {
rev_ops.push(SerializedOperator::Get(v, i));
}
&ValueDef::Arg(i) => {
rev_ops.push(SerializedOperator::GetArg(i));
}
&ValueDef::Operator(op, ..) if op_rematerialize(&op) => {
rev_ops.push(SerializedOperator::Operator(op));
}
_ => {
rev_ops.push(SerializedOperator::Get(v, 0));
}
}
}
fn emit_inst(&mut self, inst: Value, rev_ops: &mut Vec<SerializedOperator>) {
let (operator, operands) = match &self.f.values[inst.index()] {
&ValueDef::Operator(op, ref operands) => (op, operands),
_ => {
return;
}
};
// We're generating ops in reverse order. So we must first
// store value.
for i in 0..self.f.types[inst.index()].len() {
rev_ops.push(SerializedOperator::Set(inst, i));
}
rev_ops.push(SerializedOperator::Operator(operator));
// Now push the args in reverse order.
for &arg in operands.iter().rev() {
let arg = self.f.resolve_alias(arg);
self.push_value(arg, rev_ops);
}
}
}

View file

@ -1,391 +0,0 @@
//! Recovery of structured control flow information. Loop nest
//! computation, block order linearization and loop/block region
//! generation.
use fxhash::{FxHashMap, FxHashSet};
use crate::{cfg::CFGInfo, BlockId, FunctionBody, Value};
#[derive(Clone, Debug)]
pub enum Node {
Leaf(BlockId),
Loop(BlockId, Vec<Node>),
}
impl Node {
pub fn header(&self) -> BlockId {
match self {
&Node::Leaf(block) => block,
&Node::Loop(block, ..) => block,
}
}
pub fn is_loop(&self) -> bool {
match self {
&Node::Loop(..) => true,
_ => false,
}
}
pub fn is_leaf(&self) -> bool {
match self {
&Node::Leaf(..) => true,
_ => false,
}
}
}
pub struct LoopNest {
nodes: Vec<Node>,
}
impl LoopNest {
pub fn compute(cfg: &CFGInfo) -> LoopNest {
// Find loop backedges: any successor edge from a higher- to
// lower-numbered block in RPO.
let mut backedges: Vec<(BlockId, BlockId)> = vec![];
for (block_rpo, &block) in cfg.postorder.iter().rev().enumerate() {
for &succ in &cfg.block_succs[block] {
let succ_po = cfg.postorder_pos[succ]
.expect("Edge from reachable to unreachable block is impossible");
let succ_rpo = cfg.postorder.len() - 1 - succ_po;
if succ_rpo <= block_rpo {
log::trace!("LoopNest compute: backedge from {} to {}", block, succ);
backedges.push((block, succ));
}
}
}
// For each backedge, find the backedge's natural loop and
// accumulate those blocks into the set of blocks in each loop
// body.
let mut loop_bodies: FxHashMap<BlockId, FxHashSet<BlockId>> = FxHashMap::default();
for &(from, to) in &backedges {
assert!(
cfg.dominates(to, from),
"Irreducible CFG edge from {} to {}",
from,
to
);
let body = loop_bodies
.entry(to)
.or_insert_with(|| FxHashSet::default());
Self::collect_loop_body(body, to, cfg);
log::trace!("loop body for header {}: {:?}", to, body);
}
// Now build the loop nest.
let mut nodes = vec![];
let mut visited = FxHashSet::default();
for &block in cfg.postorder.iter().rev() {
if visited.contains(&block) {
continue;
}
if loop_bodies.contains_key(&block) {
nodes.push(Self::loop_node(cfg, block, &loop_bodies, &mut visited));
} else {
nodes.push(Node::Leaf(block));
visited.insert(block);
}
}
log::trace!("loop nest nodes: {:?}", nodes);
LoopNest { nodes }
}
fn collect_loop_body(blocks: &mut FxHashSet<BlockId>, header: BlockId, cfg: &CFGInfo) {
let mut workset = vec![header];
while let Some(block) = workset.pop() {
for &pred in &cfg.block_preds[block] {
if blocks.contains(&pred) {
continue;
}
if cfg.dominates(header, pred) {
blocks.insert(pred);
workset.push(pred);
}
}
}
}
fn loop_node(
cfg: &CFGInfo,
header: BlockId,
loops: &FxHashMap<BlockId, FxHashSet<BlockId>>,
visited: &mut FxHashSet<BlockId>,
) -> Node {
let mut body_blocks = loops
.get(&header)
.unwrap()
.iter()
.cloned()
.collect::<Vec<_>>();
body_blocks.sort_by_key(|&block| -(cfg.postorder_pos[block].unwrap() as isize));
let mut body_nodes = vec![];
for block in body_blocks {
if visited.contains(&block) {
continue;
}
if block != header && loops.contains_key(&block) {
body_nodes.push(Self::loop_node(cfg, block, loops, visited));
} else {
body_nodes.push(Node::Leaf(block));
visited.insert(block);
}
}
Node::Loop(header, body_nodes)
}
}
fn compute_linear_block_pos(cfg: &CFGInfo, nest: &LoopNest) -> Vec<Option<usize>> {
let mut next = 0;
let mut positions = vec![None; cfg.len()];
for node in &nest.nodes {
compute_linear_block_pos_for_node(node, &mut next, &mut positions);
}
positions
}
fn compute_linear_block_pos_for_node(
node: &Node,
next: &mut usize,
positions: &mut Vec<Option<usize>>,
) {
match node {
&Node::Loop(_, ref subnodes) => {
for subnode in subnodes {
compute_linear_block_pos_for_node(subnode, next, positions);
}
}
&Node::Leaf(block) => {
let linear_index = *next;
*next += 1;
positions[block] = Some(linear_index);
}
}
}
fn compute_forward_edge_targets(
cfg: &CFGInfo,
linear_block_pos: &[Option<usize>],
) -> FxHashSet<BlockId> {
let mut ret = FxHashSet::default();
for block in 0..cfg.len() {
if linear_block_pos[block].is_none() {
continue;
}
let block_pos = linear_block_pos[block].unwrap();
for &succ in &cfg.block_succs[block] {
let succ_pos = linear_block_pos[succ].unwrap();
if succ_pos > block_pos + 1 {
ret.insert(succ);
}
}
}
ret
}
#[derive(Clone, Debug)]
pub enum WasmRegion {
/// Block starting at the first `BlockId`, with a fallthrough/exit
/// label at the second `BlockId`.
Block(BlockId, Option<BlockId>, Vec<WasmRegion>),
/// Loop with a header at the given `BlockId`.
Loop(BlockId, Vec<WasmRegion>),
/// An individual basic block, just included inline (with no
/// Wasm-level structure).
Leaf(BlockId),
}
impl WasmRegion {
pub fn header(&self) -> BlockId {
match self {
&WasmRegion::Block(block, ..) => block,
&WasmRegion::Loop(block, ..) => block,
&WasmRegion::Leaf(block) => block,
}
}
pub fn compute(cfg: &CFGInfo, loop_nest: &LoopNest) -> WasmRegion {
assert!(!loop_nest.nodes.is_empty());
assert!(loop_nest.nodes[0].header() == 0);
let linear_pos = compute_linear_block_pos(cfg, loop_nest);
let forward_targets = compute_forward_edge_targets(cfg, &linear_pos);
log::trace!(
"WasmRegion::compute: forward_targets = {:?}",
forward_targets
);
// Enclose loop nest in a virtual loop, to handle forward
// edges in a unified way even outside any loop.
let top = Self::compute_for_node(
cfg,
&forward_targets,
&Node::Loop(BlockId::MAX, loop_nest.nodes.clone()),
);
let subregions = match top {
WasmRegion::Loop(_, subregions) => subregions,
_ => unreachable!(),
};
let top = WasmRegion::Block(0, None, subregions);
log::trace!("Wasm region: {:?}", top);
top
}
fn compute_for_node(
cfg: &CFGInfo,
forward_targets: &FxHashSet<BlockId>,
node: &Node,
) -> WasmRegion {
log::trace!("WasmRegion::compute_for_node: node {:?}", node);
match node {
&Node::Leaf(block) => {
log::trace!(" -> leaf {}", block);
WasmRegion::Leaf(block)
}
&Node::Loop(block, ref subnodes) => {
// Scan subnodes and find forward-edge targets that
// are at this level of the loop nest.
let block_targets = subnodes
.iter()
.map(|n| n.header())
.filter(|n| forward_targets.contains(&n))
.collect::<FxHashSet<_>>();
log::trace!(" -> block targets are {:?}", block_targets,);
let mut subregions: Vec<WasmRegion> = vec![];
for subnode in subnodes {
if subnode.header() != block && block_targets.contains(&subnode.header()) {
let subsubregions = std::mem::take(&mut subregions);
assert!(!subsubregions.is_empty());
let first = subsubregions[0].header();
let enclosing_block =
WasmRegion::Block(first, Some(subnode.header()), subsubregions);
subregions.push(enclosing_block);
}
let subregion = Self::compute_for_node(cfg, forward_targets, subnode);
subregions.push(subregion);
}
log::trace!(" -> loop header {} subregions {:?}", block, subregions);
WasmRegion::Loop(block, subregions)
}
}
}
}
#[derive(Clone, Debug)]
pub struct BlockOrder {
pub entries: Vec<BlockOrderEntry>,
}
#[derive(Clone, Debug)]
pub enum BlockOrderEntry {
StartBlock(BlockId),
StartLoop(BlockId),
End,
BasicBlock(BlockId, Vec<BlockOrderTarget>),
}
#[derive(Clone, Debug)]
pub struct BlockOrderTarget {
pub target: BlockId,
/// `None` means fallthrough.
pub relative_branch: Option<usize>,
pub args: Vec<Value>,
}
impl BlockOrder {
pub fn compute(f: &FunctionBody, cfg: &CFGInfo, wasm_region: &WasmRegion) -> BlockOrder {
let mut target_stack = vec![];
let mut entries = vec![];
Self::generate_region(f, cfg, &mut target_stack, &mut entries, wasm_region, None);
log::trace!("entries: {:?}", entries);
BlockOrder { entries }
}
fn generate_region(
f: &FunctionBody,
cfg: &CFGInfo,
target_stack: &mut Vec<BlockId>,
entries: &mut Vec<BlockOrderEntry>,
region: &WasmRegion,
fallthrough: Option<BlockId>,
) {
log::trace!(
"BlockOrder::generate_region: stack {:?} region {:?} fallthrough {:?}",
target_stack,
region,
fallthrough,
);
match region {
&WasmRegion::Block(header, _, ref subregions, ..)
| &WasmRegion::Loop(header, ref subregions) => {
let (target, is_loop) = match region {
&WasmRegion::Block(_, out, ..) => {
assert!(out.is_some() || target_stack.is_empty());
(out, false)
}
&WasmRegion::Loop(header, ..) => (Some(header), true),
_ => unreachable!(),
};
if let Some(target) = target {
target_stack.push(target);
}
if is_loop {
entries.push(BlockOrderEntry::StartLoop(header));
} else {
entries.push(BlockOrderEntry::StartBlock(header));
}
for i in 0..subregions.len() {
let subregion = &subregions[i];
let fallthrough = if i == subregions.len() - 1 {
fallthrough
} else {
Some(subregions[i + 1].header())
};
Self::generate_region(f, cfg, target_stack, entries, subregion, fallthrough);
}
entries.push(BlockOrderEntry::End);
if target.is_some() {
target_stack.pop();
}
}
&WasmRegion::Leaf(block) => {
let mut targets = vec![];
f.blocks[block].terminator.visit_targets(|target| {
log::trace!(
"BlockOrder::generate_region: looking for succ {} in stack {:?} fallthrough {:?}",
target.block,
target_stack,
fallthrough,
);
let relative_branch = if Some(target.block) == fallthrough {
None
} else {
let pos = target_stack
.iter()
.position(|entry| *entry == target.block)
.expect("Malformed Wasm structured control flow");
Some(target_stack.len() - 1 - pos)
};
targets.push(BlockOrderTarget {
target: target.block,
relative_branch,
args: target.args.clone(),
});
});
entries.push(BlockOrderEntry::BasicBlock(block, targets));
}
}
log::trace!("BlockOrder::generate_region: done with region {:?}", region);
}
}

View file

@ -52,7 +52,7 @@ fn main() -> Result<()> {
let bytes = std::fs::read(input)?; let bytes = std::fs::read(input)?;
debug!("Loaded {} bytes of Wasm data", bytes.len()); debug!("Loaded {} bytes of Wasm data", bytes.len());
let module = Module::from_wasm_bytes(&bytes[..])?; let module = Module::from_wasm_bytes(&bytes[..])?;
let produced = module.to_wasm_bytes(); let produced = module.to_wasm_bytes()?;
std::fs::write(output, &produced[..])?; std::fs::write(output, &produced[..])?;
} }
} }

View file

@ -15,8 +15,7 @@ use wasmparser::{
}; };
pub fn wasm_to_ir(bytes: &[u8]) -> Result<Module<'_>> { pub fn wasm_to_ir(bytes: &[u8]) -> Result<Module<'_>> {
let mut module = Module::default(); let mut module = Module::with_orig_bytes(bytes);
module.orig_bytes = bytes;
let parser = Parser::new(0); let parser = Parser::new(0);
let mut next_func = 0; let mut next_func = 0;
for payload in parser.parse_all(bytes) { for payload in parser.parse_all(bytes) {
@ -38,7 +37,7 @@ fn handle_payload<'a>(
for _ in 0..reader.get_count() { for _ in 0..reader.get_count() {
let ty = reader.read()?; let ty = reader.read()?;
if let TypeDef::Func(fty) = ty { if let TypeDef::Func(fty) = ty {
module.signatures.push(fty); module.frontend_add_signature(fty);
} }
} }
} }
@ -46,14 +45,14 @@ fn handle_payload<'a>(
for _ in 0..reader.get_count() { for _ in 0..reader.get_count() {
match reader.read()?.ty { match reader.read()?.ty {
ImportSectionEntryType::Function(sig_idx) => { ImportSectionEntryType::Function(sig_idx) => {
module.funcs.push(FuncDecl::Import(sig_idx as SignatureId)); module.frontend_add_func(FuncDecl::Import(sig_idx as SignatureId));
*next_func += 1; *next_func += 1;
} }
ImportSectionEntryType::Global(ty) => { ImportSectionEntryType::Global(ty) => {
module.globals.push(ty.content_type); module.frontend_add_global(ty.content_type);
} }
ImportSectionEntryType::Table(ty) => { ImportSectionEntryType::Table(ty) => {
module.tables.push(ty.element_type); module.frontend_add_table(ty.element_type);
} }
_ => {} _ => {}
} }
@ -62,36 +61,30 @@ fn handle_payload<'a>(
Payload::GlobalSection(mut reader) => { Payload::GlobalSection(mut reader) => {
for _ in 0..reader.get_count() { for _ in 0..reader.get_count() {
let global = reader.read()?; let global = reader.read()?;
module.globals.push(global.ty.content_type); module.frontend_add_global(global.ty.content_type);
} }
} }
Payload::TableSection(mut reader) => { Payload::TableSection(mut reader) => {
for _ in 0..reader.get_count() { for _ in 0..reader.get_count() {
let table = reader.read()?; let table = reader.read()?;
module.tables.push(table.element_type); module.frontend_add_table(table.element_type);
} }
} }
Payload::FunctionSection(mut reader) => { Payload::FunctionSection(mut reader) => {
for _ in 0..reader.get_count() { for _ in 0..reader.get_count() {
let sig_idx = reader.read()? as SignatureId; let sig_idx = reader.read()? as SignatureId;
module module.frontend_add_func(FuncDecl::Body(sig_idx, FunctionBody::default()));
.funcs
.push(FuncDecl::Body(sig_idx, FunctionBody::default()));
} }
} }
Payload::CodeSectionEntry(body) => { Payload::CodeSectionEntry(body) => {
let func_idx = *next_func; let func_idx = *next_func;
*next_func += 1; *next_func += 1;
let my_sig = module.funcs[func_idx].sig(); let my_sig = module.func(func_idx).sig();
let body = parse_body(module, my_sig, body)?; let body = parse_body(module, my_sig, body)?;
match &mut module.funcs[func_idx] { let existing_body = module.func_mut(func_idx).body_mut().unwrap();
FuncDecl::Body(_, ref mut existing_body) => { *existing_body = body;
*existing_body = body;
}
_ => unreachable!(),
}
} }
_ => {} _ => {}
} }
@ -106,11 +99,11 @@ fn parse_body<'a>(
) -> Result<FunctionBody> { ) -> Result<FunctionBody> {
let mut ret: FunctionBody = FunctionBody::default(); let mut ret: FunctionBody = FunctionBody::default();
for &param in &module.signatures[my_sig].params[..] { for &param in &module.signature(my_sig).params[..] {
ret.locals.push(param); ret.locals.push(param);
} }
ret.n_params = module.signatures[my_sig].params.len(); ret.n_params = module.signature(my_sig).params.len();
for &r in &module.signatures[my_sig].returns[..] { for &r in &module.signature(my_sig).returns[..] {
ret.rets.push(r); ret.rets.push(r);
} }
@ -126,14 +119,14 @@ fn parse_body<'a>(
trace!( trace!(
"Parsing function body: locals = {:?} sig = {:?}", "Parsing function body: locals = {:?} sig = {:?}",
ret.locals, ret.locals,
module.signatures[my_sig] module.signature(my_sig)
); );
let mut builder = FunctionBodyBuilder::new(module, my_sig, &mut ret); let mut builder = FunctionBodyBuilder::new(module, my_sig, &mut ret);
builder.locals.seal_block_preds(0, &mut builder.body); builder.locals.seal_block_preds(0, &mut builder.body);
builder.locals.start_block(0); builder.locals.start_block(0);
for (arg_idx, &arg_ty) in module.signatures[my_sig].params.iter().enumerate() { for (arg_idx, &arg_ty) in module.signature(my_sig).params.iter().enumerate() {
let local_idx = arg_idx as LocalId; let local_idx = arg_idx as LocalId;
let value = builder.body.add_value(ValueDef::Arg(arg_idx), vec![arg_ty]); let value = builder.body.add_value(ValueDef::Arg(arg_idx), vec![arg_ty]);
trace!("defining local {} to value {}", local_idx, value); trace!("defining local {} to value {}", local_idx, value);
@ -141,7 +134,7 @@ fn parse_body<'a>(
builder.locals.set(local_idx, value); builder.locals.set(local_idx, value);
} }
let n_args = module.signatures[my_sig].params.len(); let n_args = module.signature(my_sig).params.len();
for (offset, local_ty) in locals.into_iter().enumerate() { for (offset, local_ty) in locals.into_iter().enumerate() {
let local_idx = (n_args + offset) as u32; let local_idx = (n_args + offset) as u32;
builder.locals.declare(local_idx, local_ty); builder.locals.declare(local_idx, local_ty);
@ -517,7 +510,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> {
}; };
// Push initial implicit Block. // Push initial implicit Block.
let results = module.signatures[my_sig].returns.to_vec(); let results = module.signature(my_sig).returns.to_vec();
let out = ret.body.add_block(); let out = ret.body.add_block();
ret.add_block_params(out, &results[..]); ret.add_block_params(out, &results[..]);
ret.ctrl_stack.push(Frame::Block { ret.ctrl_stack.push(Frame::Block {
@ -997,7 +990,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> {
} }
wasmparser::Operator::Return => { wasmparser::Operator::Return => {
let retvals = self.pop_n(self.module.signatures[self.my_sig].returns.len()); let retvals = self.pop_n(self.module.signature(self.my_sig).returns.len());
self.emit_ret(&retvals[..]); self.emit_ret(&retvals[..]);
} }
@ -1019,7 +1012,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> {
TypeOrFuncType::Type(Type::EmptyBlockType) => (vec![], vec![]), TypeOrFuncType::Type(Type::EmptyBlockType) => (vec![], vec![]),
TypeOrFuncType::Type(ret_ty) => (vec![], vec![ret_ty]), TypeOrFuncType::Type(ret_ty) => (vec![], vec![ret_ty]),
TypeOrFuncType::FuncType(sig_idx) => { TypeOrFuncType::FuncType(sig_idx) => {
let sig = &self.module.signatures[sig_idx as SignatureId]; let sig = &self.module.signature(sig_idx as SignatureId);
( (
Vec::from(sig.params.clone()), Vec::from(sig.params.clone()),
Vec::from(sig.returns.clone()), Vec::from(sig.returns.clone()),

View file

@ -1,7 +1,9 @@
//! Intermediate representation for Wasm. //! Intermediate representation for Wasm.
use crate::{backend, backend::binaryen};
use crate::{frontend, Operator}; use crate::{frontend, Operator};
use anyhow::Result; use anyhow::Result;
use fxhash::FxHashSet;
use wasmparser::{FuncType, Type}; use wasmparser::{FuncType, Type};
pub type SignatureId = usize; pub type SignatureId = usize;
@ -17,11 +19,53 @@ pub const INVALID_BLOCK: BlockId = usize::MAX;
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Module<'a> { pub struct Module<'a> {
pub orig_bytes: &'a [u8], orig_bytes: &'a [u8],
pub funcs: Vec<FuncDecl>, funcs: Vec<FuncDecl>,
pub signatures: Vec<FuncType>, signatures: Vec<FuncType>,
pub globals: Vec<Type>, globals: Vec<Type>,
pub tables: Vec<Type>, tables: Vec<Type>,
dirty_funcs: FxHashSet<FuncId>,
}
impl<'a> Module<'a> {
pub(crate) fn with_orig_bytes(orig_bytes: &'a [u8]) -> Module<'a> {
let mut m = Module::default();
m.orig_bytes = orig_bytes;
m
}
}
impl<'a> Module<'a> {
pub fn func<'b>(&'b self, id: FuncId) -> &'b FuncDecl {
&self.funcs[id]
}
pub fn func_mut<'b>(&'b mut self, id: FuncId) -> &'b mut FuncDecl {
self.dirty_funcs.insert(id);
&mut self.funcs[id]
}
pub fn signature<'b>(&'b self, id: SignatureId) -> &'b FuncType {
&self.signatures[id]
}
pub fn global_ty(&self, id: GlobalId) -> Type {
self.globals[id as usize]
}
pub fn table_ty(&self, id: TableId) -> Type {
self.tables[id as usize]
}
pub(crate) fn frontend_add_signature(&mut self, ty: FuncType) {
self.signatures.push(ty);
}
pub(crate) fn frontend_add_func(&mut self, body: FuncDecl) {
self.funcs.push(body);
}
pub(crate) fn frontend_add_table(&mut self, ty: Type) {
self.tables.push(ty);
}
pub(crate) fn frontend_add_global(&mut self, ty: Type) {
self.globals.push(ty);
}
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -37,6 +81,20 @@ impl FuncDecl {
FuncDecl::Body(sig, ..) => *sig, FuncDecl::Body(sig, ..) => *sig,
} }
} }
pub fn body(&self) -> Option<&FunctionBody> {
match self {
FuncDecl::Body(_, body) => Some(body),
_ => None,
}
}
pub fn body_mut(&mut self) -> Option<&mut FunctionBody> {
match self {
FuncDecl::Body(_, body) => Some(body),
_ => None,
}
}
} }
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
@ -457,7 +515,15 @@ impl<'a> Module<'a> {
frontend::wasm_to_ir(bytes) frontend::wasm_to_ir(bytes)
} }
pub fn to_wasm_bytes(&self) -> Vec<u8> { pub fn to_wasm_bytes(&self) -> Result<Vec<u8>> {
todo!("use Binaryen") let binaryen_module = binaryen::Module::read(self.orig_bytes)?;
for &func in &self.dirty_funcs {
if let Some(body) = self.func(func).body() {
let mut binaryen_func = binaryen_module.func(func);
let binaryen_expr = backend::lower::generate_body(self, body);
binaryen_func.set_body(binaryen_expr);
}
}
binaryen_module.write()
} }
} }

View file

@ -16,15 +16,15 @@ pub fn op_inputs(
&Operator::Unreachable | &Operator::Nop => Ok(vec![]), &Operator::Unreachable | &Operator::Nop => Ok(vec![]),
&Operator::Call { function_index } => { &Operator::Call { function_index } => {
let sig = module.funcs[function_index as usize].sig(); let sig = module.func(function_index).sig();
Ok(Vec::from(module.signatures[sig].params.clone())) Ok(Vec::from(module.signature(sig).params.clone()))
} }
&Operator::CallIndirect { index, .. } => { &Operator::CallIndirect { index, .. } => {
let mut params = module.signatures[index as usize].params.to_vec(); let mut params = module.signature(index).params.to_vec();
params.push(Type::I32); params.push(Type::I32);
Ok(params) Ok(params)
} }
&Operator::Return => Ok(Vec::from(module.signatures[my_sig].returns.clone())), &Operator::Return => Ok(Vec::from(module.signature(my_sig).returns.clone())),
&Operator::LocalSet { local_index } | &Operator::LocalTee { local_index } => { &Operator::LocalSet { local_index } | &Operator::LocalTee { local_index } => {
Ok(vec![my_locals[local_index as usize]]) Ok(vec![my_locals[local_index as usize]])
@ -38,7 +38,7 @@ pub fn op_inputs(
&Operator::TypedSelect { ty } => Ok(vec![ty, ty, Type::I32]), &Operator::TypedSelect { ty } => Ok(vec![ty, ty, Type::I32]),
&Operator::GlobalGet { .. } => Ok(vec![]), &Operator::GlobalGet { .. } => Ok(vec![]),
&Operator::GlobalSet { global_index } => Ok(vec![module.globals[global_index as usize]]), &Operator::GlobalSet { global_index } => Ok(vec![module.global_ty(global_index)]),
Operator::I32Load { .. } Operator::I32Load { .. }
| Operator::I64Load { .. } | Operator::I64Load { .. }
@ -216,7 +216,7 @@ pub fn op_inputs(
Operator::I32ReinterpretF32 => Ok(vec![Type::F32]), Operator::I32ReinterpretF32 => Ok(vec![Type::F32]),
Operator::I64ReinterpretF64 => Ok(vec![Type::F64]), Operator::I64ReinterpretF64 => Ok(vec![Type::F64]),
Operator::TableGet { .. } => Ok(vec![Type::I32]), Operator::TableGet { .. } => Ok(vec![Type::I32]),
Operator::TableSet { table } => Ok(vec![Type::I32, module.tables[*table as usize]]), Operator::TableSet { table } => Ok(vec![Type::I32, module.table_ty(*table)]),
Operator::TableGrow { .. } => Ok(vec![Type::I32]), Operator::TableGrow { .. } => Ok(vec![Type::I32]),
Operator::TableSize { .. } => Ok(vec![]), Operator::TableSize { .. } => Ok(vec![]),
Operator::MemorySize { .. } => Ok(vec![]), Operator::MemorySize { .. } => Ok(vec![]),
@ -234,11 +234,11 @@ pub fn op_outputs(
&Operator::Unreachable | &Operator::Nop => Ok(vec![]), &Operator::Unreachable | &Operator::Nop => Ok(vec![]),
&Operator::Call { function_index } => { &Operator::Call { function_index } => {
let sig = module.funcs[function_index as usize].sig(); let sig = module.func(function_index).sig();
Ok(Vec::from(module.signatures[sig].returns.clone())) Ok(Vec::from(module.signature(sig).returns.clone()))
} }
&Operator::CallIndirect { index, .. } => { &Operator::CallIndirect { index, .. } => {
Ok(Vec::from(module.signatures[index as usize].returns.clone())) Ok(Vec::from(module.signature(index).returns.clone()))
} }
&Operator::Return => Ok(vec![]), &Operator::Return => Ok(vec![]),
&Operator::LocalSet { .. } => Ok(vec![]), &Operator::LocalSet { .. } => Ok(vec![]),
@ -251,7 +251,7 @@ pub fn op_outputs(
Ok(vec![val_ty]) Ok(vec![val_ty])
} }
&Operator::TypedSelect { ty } => Ok(vec![ty]), &Operator::TypedSelect { ty } => Ok(vec![ty]),
&Operator::GlobalGet { global_index } => Ok(vec![module.globals[global_index as usize]]), &Operator::GlobalGet { global_index } => Ok(vec![module.global_ty(global_index)]),
&Operator::GlobalSet { .. } => Ok(vec![]), &Operator::GlobalSet { .. } => Ok(vec![]),
Operator::I32Load { .. } Operator::I32Load { .. }
@ -425,7 +425,7 @@ pub fn op_outputs(
Operator::F64ReinterpretI64 => Ok(vec![Type::F64]), Operator::F64ReinterpretI64 => Ok(vec![Type::F64]),
Operator::I32ReinterpretF32 => Ok(vec![Type::I32]), Operator::I32ReinterpretF32 => Ok(vec![Type::I32]),
Operator::I64ReinterpretF64 => Ok(vec![Type::I64]), Operator::I64ReinterpretF64 => Ok(vec![Type::I64]),
Operator::TableGet { table } => Ok(vec![module.tables[*table as usize]]), Operator::TableGet { table } => Ok(vec![module.table_ty(*table)]),
Operator::TableSet { .. } => Ok(vec![]), Operator::TableSet { .. } => Ok(vec![]),
Operator::TableGrow { .. } => Ok(vec![]), Operator::TableGrow { .. } => Ok(vec![]),
Operator::TableSize { .. } => Ok(vec![Type::I32]), Operator::TableSize { .. } => Ok(vec![Type::I32]),