//! Intermediate representation for Wasm. use crate::{ backend::{ produce_func_wasm, BlockOrder, FuncTypeSink, Locations, LoopNest, SerializedBody, WasmRegion, }, cfg::CFGInfo, frontend, ops::ty_to_valty, Operator, }; use anyhow::Result; use fxhash::FxHashMap; use rayon::prelude::*; use std::collections::hash_map::Entry; use wasmparser::{FuncType, SectionReader, Type}; pub type SignatureId = usize; pub type FuncId = usize; pub type BlockId = usize; pub type InstId = usize; pub type LocalId = u32; pub type GlobalId = u32; pub type TableId = u32; pub type MemoryId = u32; pub const INVALID_BLOCK: BlockId = usize::MAX; #[derive(Clone, Debug, Default)] pub struct Module<'a> { pub orig_bytes: &'a [u8], pub funcs: Vec, pub signatures: Vec, pub globals: Vec, pub tables: Vec, } #[derive(Clone, Debug)] pub enum FuncDecl { Import(SignatureId), Body(SignatureId, FunctionBody), } impl FuncDecl { pub fn sig(&self) -> SignatureId { match self { FuncDecl::Import(sig) => *sig, FuncDecl::Body(sig, ..) => *sig, } } } #[derive(Clone, Debug, Default)] pub struct FunctionBody { pub rets: Vec, pub locals: Vec, pub blocks: Vec, /// Sea-of-nodes representation. pub values: Vec, value_dedup: FxHashMap, /// A single value can have multiple types if multi-value (e.g. a /// call). pub types: Vec>, } impl FunctionBody { pub fn add_block(&mut self) -> BlockId { let id = self.blocks.len(); self.blocks.push(Block::default()); self.blocks[id].id = id; log::trace!("add_block: block {}", id); id } pub fn add_edge(&mut self, from: BlockId, to: BlockId) { let succ_pos = self.blocks[from].succs.len(); let pred_pos = self.blocks[to].preds.len(); self.blocks[from].succs.push(to); self.blocks[to].preds.push(from); self.blocks[from].pos_in_succ_pred.push(pred_pos); self.blocks[to].pos_in_pred_succ.push(succ_pos); log::trace!("add_edge: from {} to {}", from, to); } pub fn add_value(&mut self, value: ValueDef, tys: Vec) -> Value { log::trace!("add_value: def {:?} ty {:?}", value, tys); let id = match self.value_dedup.entry(value.clone()) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { let id = Value(self.values.len() as u32); self.values.push(value.clone()); self.types.push(tys); v.insert(id); id } }; log::trace!(" -> value {:?}", id); id } pub fn set_alias(&mut self, value: Value, to: Value) { log::trace!("set_alias: value {:?} to {:?}", value, to); // Resolve the `to` value through all existing aliases. let to = self.resolve_and_update_alias(to); // Disallow cycles. if to == value { panic!("Cannot create an alias cycle"); } self.values[value.index()] = ValueDef::Alias(to); } pub fn resolve_alias(&self, value: Value) -> Value { let mut result = value; loop { if let &ValueDef::Alias(to) = &self.values[result.index()] { result = to; } else { break; } } result } pub fn add_mutable_inst(&mut self, tys: Vec, def: ValueDef) -> Value { let value = Value(self.values.len() as u32); self.types.push(tys); self.values.push(def); value } pub fn add_blockparam(&mut self, block: BlockId, ty: Type) -> Value { let index = self.blocks[block].params.len(); let value = self.add_value(ValueDef::BlockParam(block, index), vec![ty]); self.blocks[block].params.push((ty, value)); value } pub fn add_placeholder(&mut self, ty: Type) -> Value { self.add_mutable_inst(vec![ty], ValueDef::Placeholder) } pub fn replace_placeholder_with_blockparam(&mut self, block: BlockId, value: Value) { assert!(self.values[value.index()] == ValueDef::Placeholder); let ty = self.types[value.index()].get(0).cloned().unwrap(); let index = self.blocks[block].params.len(); self.blocks[block].params.push((ty, value)); self.values[value.index()] = ValueDef::BlockParam(block, index); } pub fn resolve_and_update_alias(&mut self, value: Value) -> Value { let to = self.resolve_alias(value); // Short-circuit the chain, union-find-style. if let &ValueDef::Alias(orig_to) = &self.values[value.index()] { if orig_to != to { self.values[value.index()] = ValueDef::Alias(to); } } to } pub fn append_to_block(&mut self, block: BlockId, value: Value) { self.blocks[block].insts.push(value); } pub fn end_block(&mut self, block: BlockId, terminator: Terminator) { terminator.visit_successors(|succ| { self.add_edge(block, succ); }); self.blocks[block].terminator = terminator; } pub fn add_local(&mut self, ty: Type) -> LocalId { let id = self.locals.len() as LocalId; self.locals.push(ty); id } pub fn values<'a>(&'a self) -> impl Iterator + 'a { self.values .iter() .enumerate() .map(|(idx, value_def)| (Value(idx as u32), value_def)) } } impl std::ops::Index for FunctionBody { type Output = ValueDef; fn index(&self, index: Value) -> &ValueDef { &self.values[index.0 as usize] } } impl std::ops::IndexMut for FunctionBody { fn index_mut(&mut self, index: Value) -> &mut ValueDef { &mut self.values[index.0 as usize] } } impl std::ops::Index for FunctionBody { type Output = Block; fn index(&self, index: BlockId) -> &Block { &self.blocks[index] } } impl std::ops::IndexMut for FunctionBody { fn index_mut(&mut self, index: BlockId) -> &mut Block { &mut self.blocks[index] } } #[derive(Clone, Debug, Default)] pub struct Block { pub id: BlockId, /// Side-effecting values from the sea-of-nodes that are computed, in order. pub insts: Vec, /// Terminator: branch or return. pub terminator: Terminator, /// Successor blocks. pub succs: Vec, /// For each successor block, our index in its `preds` array. pub pos_in_succ_pred: Vec, /// Predecessor blocks. pub preds: Vec, /// For each predecessor block, our index in its `succs` array. pub pos_in_pred_succ: Vec, /// Type and Value for each blockparam. pub params: Vec<(Type, Value)>, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Value(u32); impl std::fmt::Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "v{}", self.0) } } impl Value { pub fn index(self) -> usize { self.0 as usize } pub fn from_index(value: usize) -> Value { Self(value as u32) } } #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum ValueDef { Arg(usize), BlockParam(BlockId, usize), Operator(Operator, Vec), PickOutput(Value, usize), Alias(Value), Placeholder, } impl ValueDef { pub fn visit_uses(&self, mut f: F) { match self { &ValueDef::Arg { .. } => {} &ValueDef::BlockParam { .. } => {} &ValueDef::Operator(_, ref args) => { for &arg in args { f(arg); } } &ValueDef::PickOutput(from, ..) => f(from), &ValueDef::Alias(value) => f(value), &ValueDef::Placeholder => {} } } pub fn update_uses(&mut self, mut f: F) { match self { &mut ValueDef::Arg { .. } => {} &mut ValueDef::BlockParam { .. } => {} &mut ValueDef::Operator(_, ref mut args) => { for arg in args { f(arg); } } &mut ValueDef::PickOutput(ref mut from, ..) => f(from), &mut ValueDef::Alias(ref mut value) => f(value), &mut ValueDef::Placeholder => {} } } } #[derive(Clone, Debug)] pub struct BlockTarget { pub block: BlockId, pub args: Vec, } #[derive(Clone, Debug)] pub enum Terminator { Br { target: BlockTarget, }, CondBr { cond: Value, if_true: BlockTarget, if_false: BlockTarget, }, Select { value: Value, targets: Vec, default: BlockTarget, }, Return { values: Vec, }, None, } impl std::default::Default for Terminator { fn default() -> Self { Terminator::None } } impl Terminator { pub fn visit_targets(&self, mut f: F) { match self { Terminator::Return { .. } => {} Terminator::Br { ref target, .. } => f(target), Terminator::CondBr { ref if_true, ref if_false, .. } => { f(if_true); f(if_false); } Terminator::Select { ref targets, ref default, .. } => { f(default); for target in targets { f(target); } } Terminator::None => {} } } pub fn update_targets(&mut self, mut f: F) { match self { Terminator::Return { .. } => {} Terminator::Br { ref mut target, .. } => f(target), Terminator::CondBr { ref mut if_true, ref mut if_false, .. } => { f(if_true); f(if_false); } Terminator::Select { ref mut targets, ref mut default, .. } => { f(default); for target in targets { f(target); } } Terminator::None => {} } } pub fn visit_target(&self, index: usize, mut f: F) { match (index, self) { (0, Terminator::Br { ref target, .. }) => f(target), (0, Terminator::CondBr { ref if_true, .. }) => { f(if_true); } (1, Terminator::CondBr { ref if_false, .. }) => { f(if_false); } (0, Terminator::Select { ref default, .. }) => { f(default); } (i, Terminator::Select { ref targets, .. }) if i <= targets.len() => { f(&targets[i - 1]); } _ => panic!("out of bounds"), } } pub fn update_target(&mut self, index: usize, mut f: F) { match (index, self) { (0, Terminator::Br { ref mut target, .. }) => f(target), ( 0, Terminator::CondBr { ref mut if_true, .. }, ) => { f(if_true); } ( 1, Terminator::CondBr { ref mut if_false, .. }, ) => { f(if_false); } ( 0, Terminator::Select { ref mut default, .. }, ) => { f(default); } ( i, Terminator::Select { ref mut targets, .. }, ) if i <= targets.len() => { f(&mut targets[i - 1]); } (i, this) => panic!("out of bounds: index {} term {:?}", i, this), } } pub fn visit_successors(&self, mut f: F) { self.visit_targets(|target| f(target.block)); } pub fn visit_uses(&self, mut f: F) { self.visit_targets(|target| { for &arg in &target.args { f(arg); } }); match self { &Terminator::CondBr { cond, .. } => f(cond), &Terminator::Select { value, .. } => f(value), &Terminator::Return { ref values, .. } => { for &value in values { f(value); } } _ => {} } } pub fn update_uses(&mut self, mut f: F) { self.update_targets(|target| { for arg in &mut target.args { f(arg); } }); match self { &mut Terminator::CondBr { ref mut cond, .. } => f(cond), &mut Terminator::Select { ref mut value, .. } => f(value), &mut Terminator::Return { ref mut values, .. } => { for value in values { f(value); } } _ => {} } } } impl<'a> Module<'a> { pub fn from_wasm_bytes(bytes: &'a [u8]) -> Result { frontend::wasm_to_ir(bytes) } pub fn to_wasm_bytes(&self) -> Vec { // Do most of the compilation in parallel: up to the // serialized (pre-regalloc) body and the regalloc // results. Only the "final parts assembly" needs to be // serialized because it can add function signatures. let compiled: Vec<(u32, &FunctionBody, SerializedBody, Locations)> = self .funcs .par_iter() .filter_map(|func| match func { &FuncDecl::Body(sig, ref body) => { let cfg = CFGInfo::new(body); let loopnest = LoopNest::compute(&cfg); let regions = WasmRegion::compute(&cfg, &loopnest); let blockorder = BlockOrder::compute(body, &cfg, ®ions); let serialized = SerializedBody::compute(body, &cfg, &blockorder); log::trace!("serialized: {:?}", serialized); let locations = Locations::compute(body, &serialized); log::trace!("locations: {:?}", locations); Some((sig as u32, body, serialized, locations)) } _ => None, }) .collect(); // Build the final code section and function-type section. let mut signatures = SignatureAdder::new(&self); let mut code_section = wasm_encoder::CodeSection::new(); let mut func_section = wasm_encoder::FunctionSection::new(); for (sig, body, serialized, locations) in compiled { let func_body = produce_func_wasm(body, &serialized, &locations, &mut signatures); log::trace!("body: {:?}", func_body); let mut locals: Vec<(u32, wasm_encoder::ValType)> = vec![]; for local_ty in func_body.locals { if locals.len() > 0 && locals.last().unwrap().1 == local_ty { locals.last_mut().unwrap().0 += 1; } else { locals.push((1, local_ty)); } } let mut func = wasm_encoder::Function::new(locals); for inst in func_body.operators { func.instruction(&inst); } func_section.function(sig); code_section.function(&func); } // Build the final function-signature (type) section. let mut type_section = wasm_encoder::TypeSection::new(); for sig in &signatures.signatures { let params: Vec = sig.params.iter().map(|&ty| ty_to_valty(ty)).collect(); let returns: Vec = sig.returns.iter().map(|&ty| ty_to_valty(ty)).collect(); type_section.function(params, returns); } // Now do a final pass over the original bytes with // wasmparser, replacing the type section, function section, // and code section. (TODO: allow new imports to be added // too?) let parser = wasmparser::Parser::new(0); let mut module = wasm_encoder::Module::new(); for payload in parser.parse_all(self.orig_bytes) { match payload.unwrap() { wasmparser::Payload::TypeSection(..) => { module.section(&type_section); } wasmparser::Payload::FunctionSection(..) => { module.section(&func_section); } wasmparser::Payload::CodeSectionStart { .. } => { module.section(&code_section); } wasmparser::Payload::CodeSectionEntry(..) => {} wasmparser::Payload::ImportSection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 2, data: bytes }); } wasmparser::Payload::TableSection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 4, data: bytes }); } wasmparser::Payload::MemorySection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 5, data: bytes }); } wasmparser::Payload::GlobalSection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 6, data: bytes }); } wasmparser::Payload::ExportSection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 7, data: bytes }); } wasmparser::Payload::StartSection { range, .. } => { let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 8, data: bytes }); } wasmparser::Payload::ElementSection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 9, data: bytes }); } wasmparser::Payload::DataSection(reader) => { let range = reader.range(); let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 11, data: bytes, }); } wasmparser::Payload::DataCountSection { range, .. } => { let bytes = &self.orig_bytes[range.start..range.end]; module.section(&wasm_encoder::RawSection { id: 12, data: bytes, }); } _ => {} } } module.finish() } } struct SignatureAdder { signatures: Vec, signature_dedup: FxHashMap, } impl SignatureAdder { fn new(module: &Module<'_>) -> Self { let signature_dedup: FxHashMap = module .signatures .iter() .enumerate() .map(|(idx, sig)| (sig.clone(), idx as u32)) .collect(); Self { signatures: module.signatures.clone(), signature_dedup, } } } impl FuncTypeSink for SignatureAdder { fn add_signature(&mut self, params: Vec, results: Vec) -> u32 { let ft = wasmparser::FuncType { params: params.into_boxed_slice(), returns: results.into_boxed_slice(), }; match self.signature_dedup.entry(ft.clone()) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { let idx = self.signatures.len() as u32; self.signatures.push(ft); *v.insert(idx) } } } }