From 4585ec48be35cb2bb7d911adc20e86b2ed7a623a Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 24 Dec 2021 13:00:21 -0800 Subject: [PATCH] Final module emission (?) --- Cargo.toml | 1 + src/backend/final.rs | 101 +++++++++++++----------- src/ir.rs | 181 +++++++++++++++++++++++++++++++++++++++---- 3 files changed, 225 insertions(+), 58 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b924ce6..3229165 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ env_logger = "0.9" fxhash = "0.2" smallvec = "1.7" regalloc2 = { git = 'https://github.com/bytecodealliance/regalloc2', rev = 'c7bc6c941cd81bbd30b95969009b7e61539f2b4c' } +rayon = "1.5" diff --git a/src/backend/final.rs b/src/backend/final.rs index 18bac1e..a06ef09 100644 --- a/src/backend/final.rs +++ b/src/backend/final.rs @@ -4,24 +4,28 @@ use super::{Locations, SerializedBlockTarget, SerializedBody, SerializedOperator use crate::{ops::ty_to_valty, FunctionBody}; use std::borrow::Cow; use wasm_encoder::BlockType; +use wasmparser::Type; #[derive(Clone, Debug)] pub struct Wasm { - operators: Vec>, - locals: Vec, - func_types: Vec<(Vec, Vec)>, + pub operators: Vec>, + pub locals: Vec, } -impl Wasm { - fn create_type( - &mut self, - params: Vec, - results: Vec, - ) -> u32 { - let idx = self.func_types.len() as u32; - self.func_types.push((params, results)); - idx +struct WasmContext<'a, FT: FuncTypeSink> { + wasm: &'a mut Wasm, + func_type_sink: &'a mut FT, +} + +pub trait FuncTypeSink { + fn add_signature(&mut self, params: Vec, results: Vec) -> u32; +} + +impl<'a, FT: FuncTypeSink> WasmContext<'a, FT> { + fn create_type(&mut self, params: Vec, results: Vec) -> u32 { + self.func_type_sink.add_signature(params, results) } + fn translate(&mut self, op: &SerializedOperator, locations: &Locations) { match op { SerializedOperator::StartBlock { @@ -29,36 +33,33 @@ impl Wasm { ref results, .. } => { - let ty = self.create_type( - params.iter().map(|(ty, _)| ty_to_valty(*ty)).collect(), - results.iter().map(|ty| ty_to_valty(*ty)).collect(), - ); - self.operators - .push(wasm_encoder::Instruction::Block(BlockType::FunctionType( - ty, - ))); + let ty = + self.create_type(params.iter().map(|&(ty, _)| ty).collect(), results.clone()); + self.wasm.operators.push(wasm_encoder::Instruction::Block( + BlockType::FunctionType(ty), + )); } SerializedOperator::StartLoop { ref params, ref results, .. } => { - let ty = self.create_type( - params.iter().map(|(ty, _)| ty_to_valty(*ty)).collect(), - results.iter().map(|ty| ty_to_valty(*ty)).collect(), - ); - self.operators + let ty = + self.create_type(params.iter().map(|&(ty, _)| ty).collect(), results.clone()); + self.wasm + .operators .push(wasm_encoder::Instruction::Loop(BlockType::FunctionType(ty))); } SerializedOperator::End => { - self.operators.push(wasm_encoder::Instruction::End); + self.wasm.operators.push(wasm_encoder::Instruction::End); } SerializedOperator::GetArg(index) => { - self.operators + self.wasm + .operators .push(wasm_encoder::Instruction::LocalGet(*index as u32)); } SerializedOperator::Operator(op) => { - self.operators.push(op.clone().into()); + self.wasm.operators.push(op.clone().into()); } SerializedOperator::Br(ref target) => { self.translate_target(0, target, locations); @@ -67,53 +68,56 @@ impl Wasm { ref if_true, ref if_false, } => { - self.operators.push(wasm_encoder::Instruction::If( + self.wasm.operators.push(wasm_encoder::Instruction::If( wasm_encoder::BlockType::Empty, )); self.translate_target(1, if_true, locations); - self.operators.push(wasm_encoder::Instruction::Else); + self.wasm.operators.push(wasm_encoder::Instruction::Else); self.translate_target(1, if_false, locations); - self.operators.push(wasm_encoder::Instruction::End); + self.wasm.operators.push(wasm_encoder::Instruction::End); } SerializedOperator::BrTable { ref targets, ref default, } => { - let ty = self.create_type(vec![wasm_encoder::ValType::I32], vec![]); + let ty = self.create_type(vec![Type::I32], vec![]); for _ in 0..(targets.len() + 2) { - self.operators.push(wasm_encoder::Instruction::Block( + self.wasm.operators.push(wasm_encoder::Instruction::Block( wasm_encoder::BlockType::FunctionType(ty), )); } let br_table_targets = (1..=targets.len()).map(|i| i as u32).collect::>(); - self.operators.push(wasm_encoder::Instruction::BrTable( + self.wasm.operators.push(wasm_encoder::Instruction::BrTable( Cow::Owned(br_table_targets), 0, )); - self.operators.push(wasm_encoder::Instruction::End); + self.wasm.operators.push(wasm_encoder::Instruction::End); self.translate_target(targets.len() + 1, default, locations); - self.operators.push(wasm_encoder::Instruction::End); + self.wasm.operators.push(wasm_encoder::Instruction::End); for i in 0..targets.len() { self.translate_target(targets.len() - i, &targets[i], locations); - self.operators.push(wasm_encoder::Instruction::End); + self.wasm.operators.push(wasm_encoder::Instruction::End); } } SerializedOperator::Get(v, i) => { let loc = *locations.locations.get(&(*v, *i)).unwrap(); - self.operators + self.wasm + .operators .push(wasm_encoder::Instruction::LocalGet(loc)); } SerializedOperator::Set(v, i) => { let loc = *locations.locations.get(&(*v, *i)).unwrap(); - self.operators + self.wasm + .operators .push(wasm_encoder::Instruction::LocalSet(loc)); } SerializedOperator::Tee(v, i) => { let loc = *locations.locations.get(&(*v, *i)).unwrap(); - self.operators + self.wasm + .operators .push(wasm_encoder::Instruction::LocalTee(loc)); } } @@ -135,7 +139,7 @@ impl Wasm { for op in ops { self.translate(op, locations); } - self.operators.push(wasm_encoder::Instruction::Br( + self.wasm.operators.push(wasm_encoder::Instruction::Br( (branch + extra_blocks) as u32, )); } @@ -143,20 +147,27 @@ impl Wasm { } } -pub fn produce_func_wasm(f: &FunctionBody, body: &SerializedBody, locations: &Locations) -> Wasm { +pub fn produce_func_wasm( + f: &FunctionBody, + body: &SerializedBody, + locations: &Locations, + ft: &mut FT, +) -> Wasm { let mut wasm = Wasm { operators: vec![], locals: vec![], - func_types: vec![], }; - wasm.locals .extend(f.locals.iter().map(|ty| ty_to_valty(*ty))); wasm.locals .extend(locations.new_locals.iter().map(|ty| ty_to_valty(*ty))); + let mut ctx = WasmContext { + wasm: &mut wasm, + func_type_sink: ft, + }; for operator in &body.operators { - wasm.translate(operator, locations); + ctx.translate(operator, locations); } wasm diff --git a/src/ir.rs b/src/ir.rs index 96502a1..f1d2892 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,15 +1,20 @@ //! Intermediate representation for Wasm. -use std::collections::hash_map::Entry; - use crate::{ - backend::{produce_func_wasm, BlockOrder, Locations, LoopNest, SerializedBody, WasmRegion}, + backend::{ + produce_func_wasm, BlockOrder, FuncTypeSink, Locations, LoopNest, SerializedBody, + WasmRegion, + }, cfg::CFGInfo, - frontend, Operator, + frontend, + ops::ty_to_valty, + Operator, }; use anyhow::Result; use fxhash::FxHashMap; -use wasmparser::{FuncType, Type}; +use rayon::prelude::*; +use std::collections::hash_map::Entry; +use wasmparser::{FuncType, SectionReader, Type}; pub type SignatureId = usize; pub type FuncId = usize; @@ -483,10 +488,16 @@ impl<'a> Module<'a> { frontend::wasm_to_ir(bytes) } - pub fn to_wasm_bytes(self) -> Vec { - for func in &self.funcs { - match func { - &FuncDecl::Body(_, ref body) => { + pub fn to_wasm_bytes(&self) -> Vec { + // Do most of the compilation in parallel: up to the + // serialized (pre-regalloc) body and the regalloc + // results. Only the "final parts assembly" needs to be + // serialized because it can add function signatures. + let compiled: Vec<(u32, &FunctionBody, SerializedBody, Locations)> = self + .funcs + .par_iter() + .filter_map(|func| match func { + &FuncDecl::Body(sig, ref body) => { let cfg = CFGInfo::new(body); let loopnest = LoopNest::compute(&cfg); let regions = WasmRegion::compute(&cfg, &loopnest); @@ -495,13 +506,157 @@ impl<'a> Module<'a> { log::trace!("serialized: {:?}", serialized); let locations = Locations::compute(body, &serialized); log::trace!("locations: {:?}", locations); - let func_body = produce_func_wasm(body, &serialized, &locations); - log::trace!("body: {:?}", func_body); + Some((sig as u32, body, serialized, locations)) + } + _ => None, + }) + .collect(); + + // Build the final code section and function-type section. + let mut signatures = SignatureAdder::new(&self); + let mut code_section = wasm_encoder::CodeSection::new(); + let mut func_section = wasm_encoder::FunctionSection::new(); + for (sig, body, serialized, locations) in compiled { + let func_body = produce_func_wasm(body, &serialized, &locations, &mut signatures); + log::trace!("body: {:?}", func_body); + + let mut locals: Vec<(u32, wasm_encoder::ValType)> = vec![]; + for local_ty in func_body.locals { + if locals.len() > 0 && locals.last().unwrap().1 == local_ty { + locals.last_mut().unwrap().0 += 1; + } else { + locals.push((1, local_ty)); + } + } + let mut func = wasm_encoder::Function::new(locals); + + for inst in func_body.operators { + func.instruction(&inst); + } + + func_section.function(sig); + code_section.function(&func); + } + + // Build the final function-signature (type) section. + let mut type_section = wasm_encoder::TypeSection::new(); + for sig in &signatures.signatures { + let params: Vec = + sig.params.iter().map(|&ty| ty_to_valty(ty)).collect(); + let returns: Vec = + sig.returns.iter().map(|&ty| ty_to_valty(ty)).collect(); + type_section.function(params, returns); + } + + // Now do a final pass over the original bytes with + // wasmparser, replacing the type section, function section, + // and code section. (TODO: allow new imports to be added + // too?) + let parser = wasmparser::Parser::new(0); + let mut module = wasm_encoder::Module::new(); + for payload in parser.parse_all(self.orig_bytes) { + match payload.unwrap() { + wasmparser::Payload::TypeSection(..) => { + module.section(&type_section); + } + wasmparser::Payload::FunctionSection(..) => { + module.section(&func_section); + } + wasmparser::Payload::CodeSectionStart { .. } => { + module.section(&code_section); + } + wasmparser::Payload::CodeSectionEntry(..) => {} + wasmparser::Payload::ImportSection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 2, data: bytes }); + } + wasmparser::Payload::TableSection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 4, data: bytes }); + } + wasmparser::Payload::MemorySection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 5, data: bytes }); + } + wasmparser::Payload::GlobalSection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 6, data: bytes }); + } + wasmparser::Payload::ExportSection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 7, data: bytes }); + } + wasmparser::Payload::StartSection { range, .. } => { + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 8, data: bytes }); + } + wasmparser::Payload::ElementSection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { id: 9, data: bytes }); + } + wasmparser::Payload::DataSection(reader) => { + let range = reader.range(); + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { + id: 11, + data: bytes, + }); + } + wasmparser::Payload::DataCountSection { range, .. } => { + let bytes = &self.orig_bytes[range.start..range.end]; + module.section(&wasm_encoder::RawSection { + id: 12, + data: bytes, + }); } _ => {} } } - // TODO - self.orig_bytes.to_vec() + + module.finish() + } +} + +struct SignatureAdder { + signatures: Vec, + signature_dedup: FxHashMap, +} + +impl SignatureAdder { + fn new(module: &Module<'_>) -> Self { + let signature_dedup: FxHashMap = module + .signatures + .iter() + .enumerate() + .map(|(idx, sig)| (sig.clone(), idx as u32)) + .collect(); + + Self { + signatures: module.signatures.clone(), + signature_dedup, + } + } +} + +impl FuncTypeSink for SignatureAdder { + fn add_signature(&mut self, params: Vec, results: Vec) -> u32 { + let ft = wasmparser::FuncType { + params: params.into_boxed_slice(), + returns: results.into_boxed_slice(), + }; + match self.signature_dedup.entry(ft.clone()) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + let idx = self.signatures.len() as u32; + self.signatures.push(ft); + *v.insert(idx) + } + } } }