//! Frontend: convert Wasm to IR. #![allow(dead_code)] use crate::ir::*; use crate::op_traits::{op_inputs, op_outputs}; use anyhow::{bail, Result}; use log::trace; use wasmparser::{ ImportSectionEntryType, Operator, Parser, Payload, Type, TypeDef, TypeOrFuncType, }; pub fn wasm_to_ir(bytes: &[u8]) -> Result> { let mut module = Module::default(); let parser = Parser::new(0); let mut next_func = 0; for payload in parser.parse_all(bytes) { let payload = payload?; handle_payload(&mut module, payload, &mut next_func)?; } Ok(module) } fn handle_payload<'a>( module: &mut Module<'a>, payload: Payload<'a>, next_func: &mut usize, ) -> Result<()> { trace!("Wasm parser item: {:?}", payload); match payload { Payload::TypeSection(mut reader) => { for _ in 0..reader.get_count() { let ty = reader.read()?; if let TypeDef::Func(fty) = ty { module.signatures.push(fty); } } } Payload::ImportSection(mut reader) => { for _ in 0..reader.get_count() { if let ImportSectionEntryType::Function(sig_idx) = reader.read()?.ty { module.funcs.push(FuncDecl::Import(sig_idx as SignatureId)); *next_func += 1; } } } Payload::FunctionSection(mut reader) => { for _ in 0..reader.get_count() { let sig_idx = reader.read()? as SignatureId; module .funcs .push(FuncDecl::Body(sig_idx, FunctionBody::default())); } } Payload::CodeSectionEntry(body) => { let func_idx = *next_func; *next_func += 1; let my_sig = module.funcs[func_idx].sig(); let body = parse_body(module, my_sig, body)?; match &mut module.funcs[func_idx] { FuncDecl::Body(_, ref mut existing_body) => { *existing_body = body; } _ => unreachable!(), } } _ => {} } Ok(()) } fn parse_body<'a, 'b>( module: &'b Module<'a>, my_sig: SignatureId, body: wasmparser::FunctionBody<'a>, ) -> Result> { let mut ret: FunctionBody<'a> = FunctionBody::default(); let mut locals = body.get_locals_reader()?; for _ in 0..locals.get_count() { let (count, ty) = locals.read()?; for _ in 0..count { ret.locals.push(ty); } } let mut builder = FunctionBodyBuilder::new(module, my_sig, &mut ret); let ops = body.get_operators_reader()?; for op in ops.into_iter() { let op = op?; builder.handle_op(op)?; } Ok(ret) } #[derive(Debug)] struct FunctionBodyBuilder<'a, 'b> { module: &'b Module<'a>, my_sig: SignatureId, body: &'b mut FunctionBody<'a>, cur_block: Option, ctrl_stack: Vec, op_stack: Vec, } #[derive(Clone, Debug)] enum Frame { Block { start_depth: usize, out: BlockId, params: Vec, results: Vec, }, Loop { start_depth: usize, header: BlockId, out: BlockId, params: Vec, results: Vec, }, If { start_depth: usize, out: BlockId, el: BlockId, param_values: Vec, params: Vec, results: Vec, }, Else { start_depth: usize, out: BlockId, params: Vec, results: Vec, }, } impl Frame { fn start_depth(&self) -> usize { match self { Frame::Block { start_depth, .. } | Frame::Loop { start_depth, .. } | Frame::If { start_depth, .. } | Frame::Else { start_depth, .. } => *start_depth, } } fn br_args(&self) -> &[Type] { match self { Frame::Block { results, .. } | Frame::If { results, .. } | Frame::Else { results, .. } => &results[..], Frame::Loop { params, .. } => ¶ms[..], } } fn br_target(&self) -> BlockId { match self { Frame::Block { out, .. } => *out, Frame::Loop { header, .. } => *header, Frame::If { out, .. } | Frame::Else { out, .. } => *out, } } } impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { fn new(module: &'b Module<'a>, my_sig: SignatureId, body: &'b mut FunctionBody<'a>) -> Self { body.blocks.push(Block::default()); Self { module, my_sig, body, ctrl_stack: vec![], op_stack: vec![], cur_block: Some(0), } } fn add_block_params(&mut self, block: BlockId, tys: &[Type]) { self.body.blocks[block].params.extend_from_slice(tys); } fn handle_op(&mut self, op: Operator<'a>) -> Result<()> { match &op { Operator::Unreachable | Operator::Call { .. } | Operator::LocalGet { .. } | Operator::LocalSet { .. } | Operator::LocalTee { .. } | Operator::I32Eqz => self.emit(op.clone())?, Operator::End => match self.ctrl_stack.pop() { None => { self.emit(Operator::Return)?; } Some(Frame::Block { start_depth, out, results, .. }) | Some(Frame::Loop { start_depth, out, results, .. }) => { // Generate a branch to the out-block with // blockparams for the results. let result_values = self.op_stack.split_off(results.len()); self.emit_branch(out, &result_values[..]); assert_eq!(self.op_stack.len(), start_depth); self.cur_block = Some(out); self.push_block_params(&results[..]); } Some(Frame::If { start_depth, out, el, param_values, results, .. }) => { // Generate a branch to the out-block with // blockparams for the results. let result_values = self.op_stack.split_off(results.len()); self.emit_branch(out, &result_values[..]); // No `else`, so we need to generate a trivial // branch in the else-block. If the if-block-type // has results, they must be exactly the params. let else_result_values = param_values; assert_eq!(else_result_values.len(), results.len()); self.emit_branch(el, &else_result_values[..]); assert_eq!(self.op_stack.len(), start_depth); self.cur_block = Some(out); self.push_block_params(&results[..]); } Some(Frame::Else { out, results, start_depth, .. }) => { // Generate a branch to the out-block with // blockparams for the results. let result_values = self.op_stack.split_off(results.len()); assert_eq!(self.op_stack.len(), start_depth); self.emit_branch(out, &result_values[..]); self.cur_block = Some(out); self.push_block_params(&results[..]); } }, Operator::Block { ty } => { let (params, results) = self.block_params_and_results(*ty); let out = self.create_block(); self.add_block_params(out, &results[..]); let start_depth = self.op_stack.len() - params.len(); self.ctrl_stack.push(Frame::Block { start_depth, out, params, results, }); } Operator::Loop { ty } => { let (params, results) = self.block_params_and_results(*ty); let header = self.create_block(); self.add_block_params(header, ¶ms[..]); let initial_args = self.op_stack.split_off(params.len()); let start_depth = self.op_stack.len(); self.emit_branch(header, &initial_args[..]); self.cur_block = Some(header); self.push_block_params(¶ms[..]); let out = self.create_block(); self.ctrl_stack.push(Frame::Loop { start_depth, header, out, params, results, }); } Operator::If { ty } => { let (params, results) = self.block_params_and_results(*ty); let if_true = self.create_block(); let if_false = self.create_block(); let join = self.create_block(); self.add_block_params(join, &results[..]); let cond = self.op_stack.pop().unwrap(); let param_values = self.op_stack[self.op_stack.len() - params.len()..].to_vec(); let start_depth = self.op_stack.len(); self.ctrl_stack.push(Frame::If { start_depth, out: join, el: if_false, param_values, params, results, }); self.cur_block = Some(if_true); self.emit_cond_branch(cond, if_true, &[], if_false, &[]); } Operator::Else => { if let Frame::If { start_depth, out, el, param_values, params, results, } = self.ctrl_stack.pop().unwrap() { let if_results = self.op_stack.split_off(results.len()); self.emit_branch(out, &if_results[..]); self.op_stack.extend(param_values); self.ctrl_stack.push(Frame::Else { start_depth, out, params, results, }); self.cur_block = Some(el); } else { bail!("Else without If on top of frame stack"); } } Operator::Br { relative_depth } | Operator::BrIf { relative_depth } => { let cond = match &op { Operator::Br { .. } => None, Operator::BrIf { .. } => Some(self.op_stack.pop().unwrap()), _ => unreachable!(), }; // Get the frame we're branching to. let frame = self.relative_frame(*relative_depth).clone(); // Get the args off the stack. let args = self.op_stack.split_off(frame.br_args().len()); // Finally, generate the branch itself. match cond { None => { self.emit_branch(frame.br_target(), &args[..]); self.cur_block = None; } Some(cond) => { let cont = self.create_block(); self.emit_cond_branch(cond, frame.br_target(), &args[..], cont, &[]); self.cur_block = Some(cont); } } } Operator::BrTable { table } => { // Get the selector index. let index = self.op_stack.pop().unwrap(); // Get the signature of the default frame; this tells // us the signature of all frames (since wasmparser // validates the input for us). Pop that many args. let default_frame = self.relative_frame(table.default()); let default_term_target = default_frame.br_target(); let arg_len = default_frame.br_args().len(); let args = self.op_stack.split_off(arg_len); // Generate a branch terminator with the same args for // every branch target. let mut term_targets = vec![]; for target in table.targets() { let target = target?; let frame = self.relative_frame(target); assert_eq!(frame.br_args().len(), args.len()); let block = frame.br_target(); term_targets.push(block); } self.emit_br_table(index, default_term_target, &term_targets[..], &args[..]); } _ => bail!("Unsupported operator: {:?}", op), } Ok(()) } fn create_block(&mut self) -> BlockId { let id = self.body.blocks.len() as BlockId; self.body.blocks.push(Block::default()); id } fn block_params_and_results(&self, ty: TypeOrFuncType) -> (Vec, Vec) { match ty { TypeOrFuncType::Type(ret_ty) => (vec![], vec![ret_ty]), TypeOrFuncType::FuncType(sig_idx) => { let sig = &self.module.signatures[sig_idx as SignatureId]; ( Vec::from(sig.params.clone()), Vec::from(sig.returns.clone()), ) } } } fn relative_frame(&self, relative_depth: u32) -> &Frame { &self.ctrl_stack[self.ctrl_stack.len() - 1 - relative_depth as usize] } fn emit_branch(&mut self, target: BlockId, args: &[ValueId]) { if let Some(block) = self.cur_block { let args = args.iter().map(|&val| Operand::value(val)).collect(); let target = BlockTarget { block: target, args, }; self.body.blocks[block].terminator = Terminator::Br { target }; } } fn emit_cond_branch( &mut self, cond: ValueId, if_true: BlockId, if_true_args: &[ValueId], if_false: BlockId, if_false_args: &[ValueId], ) { if let Some(block) = self.cur_block { let if_true_args = if_true_args .iter() .map(|&val| Operand::value(val)) .collect(); let if_false_args = if_false_args .iter() .map(|&val| Operand::value(val)) .collect(); self.body.blocks[block].terminator = Terminator::CondBr { cond: Operand::value(cond), if_true: BlockTarget { block: if_true, args: if_true_args, }, if_false: BlockTarget { block: if_false, args: if_false_args, }, }; } } fn emit_br_table( &mut self, index: ValueId, default_target: BlockId, indexed_targets: &[BlockId], args: &[ValueId], ) { if let Some(block) = self.cur_block { let args: Vec> = args.iter().map(|&arg| Operand::value(arg)).collect(); let targets = indexed_targets .iter() .map(|&block| BlockTarget { block, args: args.clone(), }) .collect(); let default = BlockTarget { block: default_target, args: args.clone(), }; self.body.blocks[block].terminator = Terminator::Select { value: Operand::value(index), targets, default, }; } } fn push_block_params(&mut self, tys: &[Type]) { assert_eq!(tys, self.body.blocks[self.cur_block.unwrap()].params); for (i, &ty) in tys.iter().enumerate() { let value_id = self.body.values.len() as ValueId; self.body.values.push(ValueDef { kind: ValueKind::BlockParam(self.cur_block.unwrap(), i), ty, }); self.op_stack.push(value_id); } } fn emit(&mut self, op: Operator<'a>) -> Result<()> { if let Some(block) = self.cur_block { let inst = self.body.blocks[block].insts.len() as InstId; let mut inputs = vec![]; for input in op_inputs(self.module, self.my_sig, &self.body.locals[..], &op)? .into_iter() .rev() { let stack_top = self.op_stack.pop().unwrap(); assert_eq!(self.body.values[stack_top].ty, input); inputs.push(Operand::value(stack_top)); } inputs.reverse(); let mut outputs = vec![]; for output in op_outputs(self.module, &self.body.locals[..], &op)?.into_iter() { let val = self.body.values.len() as ValueId; outputs.push(val); self.body.values.push(ValueDef { kind: ValueKind::Inst(block, inst), ty: output, }); self.op_stack.push(val); } self.body.blocks[block].insts.push(Inst { operator: op, outputs, inputs, }); } else { let _ = self .op_stack .split_off(op_inputs(self.module, self.my_sig, &self.body.locals[..], &op)?.len()); for _ in 0..op_outputs(self.module, &self.body.locals[..], &op)?.len() { self.op_stack.push(NO_VALUE); } } Ok(()) } }