From 5b4279f517e4c433d8ed220acf465b3f49e1d471 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 7 Feb 2023 14:34:59 -0800 Subject: [PATCH] Lazy function parsing and recompilation when roundtripping --- fuzz/fuzz_targets/roundtrip.rs | 1 + src/backend/mod.rs | 36 +++++++++++++++++++++++++++------- src/frontend.rs | 9 +++------ src/ir/display.rs | 4 ++++ src/ir/func.rs | 19 ++++++++++++++++-- src/ir/module.rs | 29 +++++++++++++++++++++------ 6 files changed, 77 insertions(+), 21 deletions(-) diff --git a/fuzz/fuzz_targets/roundtrip.rs b/fuzz/fuzz_targets/roundtrip.rs index 81467f6..66958f7 100644 --- a/fuzz/fuzz_targets/roundtrip.rs +++ b/fuzz/fuzz_targets/roundtrip.rs @@ -24,6 +24,7 @@ fuzz_target!(|module: wasm_smith::Module| { } } }; + parsed_module.expand_all_funcs().unwrap(); parsed_module.optimize(); let _ = parsed_module.to_wasm_bytes(); }); diff --git a/src/backend/mod.rs b/src/backend/mod.rs index a289527..0241422 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -585,7 +585,7 @@ pub fn compile(module: &Module<'_>) -> anyhow::Result> { for (func, func_decl) in module.funcs().skip(num_func_imports) { match func_decl { FuncDecl::Import(_) => anyhow::bail!("Import comes after func with body: {}", func), - FuncDecl::Body(sig, _) => { + FuncDecl::Lazy(sig, _) | FuncDecl::Body(sig, _) => { funcs.function(sig.index() as u32); } } @@ -689,20 +689,42 @@ pub fn compile(module: &Module<'_>) -> anyhow::Result> { into_mod.section(&elem); let mut code = wasm_encoder::CodeSection::new(); + enum FuncOrRawBytes<'a> { + Func(wasm_encoder::Function), + Raw(&'a [u8]), + } + let bodies = module .funcs() .skip(num_func_imports) .collect::>() .par_iter() - .map(|(func, func_decl)| -> Result { - let body = func_decl.body().unwrap(); - log::debug!("Compiling {}", func); - WasmFuncBackend::new(body)?.compile() + .map(|(func, func_decl)| -> Result { + match func_decl { + FuncDecl::Lazy(_, reader) => { + let data = &module.orig_bytes[reader.range()]; + Ok(FuncOrRawBytes::Raw(data)) + } + FuncDecl::Body(_, body) => { + log::debug!("Compiling {}", func); + WasmFuncBackend::new(body)? + .compile() + .map(|f| FuncOrRawBytes::Func(f)) + } + FuncDecl::Import(_) => unreachable!("Should have skipped imports"), + } }) - .collect::>>()?; + .collect::>>>()?; for body in bodies { - code.function(&body); + match body { + FuncOrRawBytes::Func(f) => { + code.function(&f); + } + FuncOrRawBytes::Raw(bytes) => { + code.raw(bytes); + } + } } into_mod.section(&code); diff --git a/src/frontend.rs b/src/frontend.rs index d16464b..e4825a3 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -143,10 +143,7 @@ fn handle_payload<'a>( *next_func += 1; let my_sig = module.func(func_idx).sig(); - let body = parse_body(module, my_sig, body)?; - - let existing_body = module.func_mut(func_idx).body_mut().unwrap(); - *existing_body = body; + *module.func_mut(func_idx) = FuncDecl::Lazy(my_sig, body); } Payload::ExportSection(reader) => { for export in reader { @@ -267,10 +264,10 @@ fn handle_payload<'a>( Ok(()) } -fn parse_body<'a>( +pub(crate) fn parse_body<'a>( module: &'a Module, my_sig: Signature, - body: wasmparser::FunctionBody, + body: &mut wasmparser::FunctionBody, ) -> Result { let mut ret: FunctionBody = FunctionBody::default(); diff --git a/src/ir/display.rs b/src/ir/display.rs index c3fa77a..1f6a35d 100644 --- a/src/ir/display.rs +++ b/src/ir/display.rs @@ -217,6 +217,10 @@ impl<'a> Display for ModuleDisplay<'a> { writeln!(f, " {}: {} = # {}", func, sig, sig_strs.get(&sig).unwrap())?; writeln!(f, "{}", body.display(" "))?; } + FuncDecl::Lazy(sig, reader) => { + writeln!(f, " {}: {} = # {}", func, sig, sig_strs.get(&sig).unwrap())?; + writeln!(f, " # raw bytes (length {})", reader.range().len())?; + } FuncDecl::Import(sig) => { writeln!(f, " {}: {} # {}", func, sig, sig_strs.get(&sig).unwrap())?; } diff --git a/src/ir/func.rs b/src/ir/func.rs index 183b854..050454c 100644 --- a/src/ir/func.rs +++ b/src/ir/func.rs @@ -1,21 +1,36 @@ use super::{Block, FunctionBodyDisplay, Local, Module, Signature, Type, Value, ValueDef}; use crate::cfg::CFGInfo; use crate::entity::{EntityRef, EntityVec, PerEntity}; +use crate::frontend::parse_body; +use anyhow::Result; #[derive(Clone, Debug)] -pub enum FuncDecl { +pub enum FuncDecl<'a> { Import(Signature), + Lazy(Signature, wasmparser::FunctionBody<'a>), Body(Signature, FunctionBody), } -impl FuncDecl { +impl<'a> FuncDecl<'a> { pub fn sig(&self) -> Signature { match self { FuncDecl::Import(sig) => *sig, + FuncDecl::Lazy(sig, ..) => *sig, FuncDecl::Body(sig, ..) => *sig, } } + pub fn parse(&mut self, module: &Module) -> Result<()> { + match self { + FuncDecl::Lazy(sig, body) => { + let body = parse_body(module, *sig, body)?; + *self = FuncDecl::Body(*sig, body); + Ok(()) + } + _ => Ok(()), + } + } + pub fn body(&self) -> Option<&FunctionBody> { match self { FuncDecl::Body(_, body) => Some(body), diff --git a/src/ir/module.rs b/src/ir/module.rs index b72fbee..aa7adf4 100644 --- a/src/ir/module.rs +++ b/src/ir/module.rs @@ -6,8 +6,8 @@ use anyhow::Result; #[derive(Clone, Debug)] pub struct Module<'a> { - orig_bytes: &'a [u8], - funcs: EntityVec, + pub orig_bytes: &'a [u8], + funcs: EntityVec>, signatures: EntityVec, globals: EntityVec, tables: EntityVec, @@ -142,13 +142,13 @@ impl<'a> Module<'a> { } impl<'a> Module<'a> { - pub fn func<'b>(&'b self, id: Func) -> &'b FuncDecl { + pub fn func<'b>(&'b self, id: Func) -> &'b FuncDecl<'a> { &self.funcs[id] } - pub fn func_mut<'b>(&'b mut self, id: Func) -> &'b mut FuncDecl { + pub fn func_mut<'b>(&'b mut self, id: Func) -> &'b mut FuncDecl<'a> { &mut self.funcs[id] } - pub fn funcs<'b>(&'b self) -> impl Iterator { + pub fn funcs<'b>(&'b self) -> impl Iterator)> { self.funcs.entries() } pub fn signature<'b>(&'b self, id: Signature) -> &'b SignatureData { @@ -192,7 +192,7 @@ impl<'a> Module<'a> { pub(crate) fn frontend_add_signature(&mut self, ty: SignatureData) { self.signatures.push(ty); } - pub(crate) fn frontend_add_func(&mut self, body: FuncDecl) -> Func { + pub(crate) fn frontend_add_func(&mut self, body: FuncDecl<'a>) -> Func { self.funcs.push(body) } pub(crate) fn frontend_add_table(&mut self, ty: Type, max: Option) -> Table { @@ -236,6 +236,23 @@ impl<'a> Module<'a> { } } + pub fn expand_func<'b>(&'b mut self, id: Func) -> Result<&'b FuncDecl<'a>> { + let mut funcs = std::mem::take(&mut self.funcs); + let ret = funcs[id].parse(self); + self.funcs = funcs; + ret.and(Ok(&self.funcs[id])) + } + + pub fn expand_all_funcs(&mut self) -> Result<()> { + let mut funcs = std::mem::take(&mut self.funcs); + let mut ret = Ok(()); + for func_decl in funcs.values_mut() { + ret = ret.and_then(|_| func_decl.parse(self)); + } + self.funcs = funcs; + ret + } + pub fn optimize(&mut self) { self.per_func_body(|body| { let cfg = crate::cfg::CFGInfo::new(body);