diff --git a/Cargo.toml b/Cargo.toml index 756070a..559ba66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,7 @@ smallvec = "1.7" rayon = "1.5" lazy_static = "1.4" libc = "0.2" +addr2line = "0.19" + +[patch.crates-io] +gimli = { git = "https://github.com/philipc/gimli", branch = "line-tombstone" } diff --git a/src/frontend.rs b/src/frontend.rs index 57b4dbb..891dcff 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -7,6 +7,7 @@ use crate::errors::FrontendError; use crate::ir::*; use crate::op_traits::{op_inputs, op_outputs}; use crate::ops::Operator; +use addr2line::gimli; use anyhow::{bail, Result}; use fxhash::{FxHashMap, FxHashSet}; use log::trace; @@ -19,10 +20,24 @@ pub fn wasm_to_ir(bytes: &[u8]) -> Result> { let mut module = Module::with_orig_bytes(bytes); let parser = Parser::new(0); let mut next_func = 0; + let mut dwarf = gimli::Dwarf::default(); + let mut extra_sections = ExtraSections::default(); for payload in parser.parse_all(bytes) { let payload = payload?; - handle_payload(&mut module, payload, &mut next_func)?; + handle_payload( + &mut module, + payload, + &mut next_func, + &mut dwarf, + &mut extra_sections, + )?; } + dwarf.locations = + gimli::LocationLists::new(extra_sections.debug_loc, extra_sections.debug_loclists); + dwarf.ranges = + gimli::RangeLists::new(extra_sections.debug_ranges, extra_sections.debug_rnglists); + let debug_map = DebugMap::from_dwarf(dwarf, &mut module.debug)?; + module.debug_map = debug_map; Ok(module) } @@ -53,10 +68,20 @@ fn parse_init_expr<'a>(init_expr: &wasmparser::ConstExpr<'a>) -> Result { + debug_loc: gimli::DebugLoc>, + debug_loclists: gimli::DebugLocLists>, + debug_ranges: gimli::DebugRanges>, + debug_rnglists: gimli::DebugRngLists>, +} + fn handle_payload<'a>( module: &mut Module<'a>, payload: Payload<'a>, next_func: &mut usize, + dwarf: &mut gimli::Dwarf>, + extra_sections: &mut ExtraSections<'a>, ) -> Result<()> { trace!("Wasm parser item: {:?}", payload); match payload { @@ -213,6 +238,53 @@ fn handle_payload<'a>( } } } + Payload::CustomSection(reader) if reader.name() == ".debug_info" => { + dwarf.debug_info = gimli::DebugInfo::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_abbrev" => { + dwarf.debug_abbrev = gimli::DebugAbbrev::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_addr" => { + dwarf.debug_addr = + gimli::DebugAddr::from(gimli::EndianSlice::new(reader.data(), gimli::LittleEndian)); + } + Payload::CustomSection(reader) if reader.name() == ".debug_aranges" => { + dwarf.debug_aranges = gimli::DebugAranges::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_line" => { + dwarf.debug_line = gimli::DebugLine::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_line_str" => { + dwarf.debug_line_str = gimli::DebugLineStr::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_str" => { + dwarf.debug_str = gimli::DebugStr::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_str_offsets" => { + dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(gimli::EndianSlice::new( + reader.data(), + gimli::LittleEndian, + )); + } + Payload::CustomSection(reader) if reader.name() == ".debug_types" => { + dwarf.debug_types = gimli::DebugTypes::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_loc" => { + extra_sections.debug_loc = gimli::DebugLoc::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_loclists" => { + extra_sections.debug_loclists = + gimli::DebugLocLists::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_ranges" => { + extra_sections.debug_ranges = + gimli::DebugRanges::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_rnglists" => { + extra_sections.debug_rnglists = + gimli::DebugRngLists::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(_) => {} Payload::CodeSectionStart { .. } => {} Payload::Version { .. } => {} Payload::ElementSection(reader) => { @@ -285,6 +357,33 @@ fn handle_payload<'a>( Ok(()) } +struct DebugLocReader<'a> { + locs: &'a [(u32, u32, SourceLoc)], +} + +impl<'a> DebugLocReader<'a> { + fn new(module: &'a Module, offset: usize) -> Self { + DebugLocReader { + locs: module.debug_map.locs_from_offset(offset), + } + } + + fn get_loc(&mut self, offset: usize) -> SourceLoc { + let offset = u32::try_from(offset).unwrap(); + while self.locs.len() > 0 { + let (start, len, loc) = self.locs[0]; + if offset < start { + break; + } + if offset > (start + len) { + self.locs = &self.locs[1..]; + } + return loc; + } + SourceLoc::invalid() + } +} + pub(crate) fn parse_body<'a>( module: &'a Module, my_sig: Signature, @@ -292,6 +391,9 @@ pub(crate) fn parse_body<'a>( ) -> Result { let mut ret: FunctionBody = FunctionBody::default(); + let start_offset = body.range().start; + let mut debug_locs = DebugLocReader::new(module, start_offset); + for ¶m in &module.signatures[my_sig].params[..] { ret.locals.push(param.into()); } @@ -337,17 +439,18 @@ pub(crate) fn parse_body<'a>( } let ops = body.get_operators_reader()?; - for op in ops.into_iter() { - let op = op?; + for item in ops.into_iter_with_offsets() { + let (op, offset) = item?; + let loc = debug_locs.get_loc(offset); if builder.reachable { - builder.handle_op(op)?; + builder.handle_op(op, loc)?; } else { builder.handle_op_unreachable(op)?; } } if builder.reachable { - builder.handle_op(wasmparser::Operator::Return)?; + builder.handle_op(wasmparser::Operator::Return, SourceLoc::invalid())?; } for block in builder.body.blocks.iter() { @@ -806,7 +909,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { } } - fn handle_op(&mut self, op: wasmparser::Operator<'a>) -> Result<()> { + fn handle_op(&mut self, op: wasmparser::Operator<'a>, loc: SourceLoc) -> Result<()> { trace!("handle_op: {:?}", op); trace!("op_stack = {:?}", self.op_stack); trace!("ctrl_stack = {:?}", self.ctrl_stack); @@ -1017,7 +1120,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { | wasmparser::Operator::TableSet { .. } | wasmparser::Operator::TableGrow { .. } | wasmparser::Operator::TableSize { .. } => { - self.emit(Operator::try_from(&op).unwrap())? + self.emit(Operator::try_from(&op).unwrap(), loc)? } wasmparser::Operator::Nop => {} @@ -1538,7 +1641,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { } } - fn emit(&mut self, op: Operator) -> Result<()> { + fn emit(&mut self, op: Operator, loc: SourceLoc) -> Result<()> { let inputs = op_inputs(self.module, &self.op_stack[..], &op)?; let outputs = op_outputs(self.module, &self.op_stack[..], &op)?; @@ -1569,6 +1672,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { if self.reachable { self.body.append_to_block(self.cur_block, value); } + self.body.source_locs[value] = loc; if n_outputs == 1 { let output_ty = outputs[0]; diff --git a/src/ir.rs b/src/ir.rs index 5fafc10..c7d10ad 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -69,3 +69,5 @@ mod value; pub use value::*; mod display; pub use display::*; +mod debug; +pub use debug::*; diff --git a/src/ir/debug.rs b/src/ir/debug.rs new file mode 100644 index 0000000..969de8c --- /dev/null +++ b/src/ir/debug.rs @@ -0,0 +1,99 @@ +//! Debug info (currently, source-location maps). + +use crate::declare_entity; +use crate::entity::EntityVec; +use addr2line::gimli; +use std::collections::hash_map::Entry as HashEntry; +use std::collections::HashMap; +use std::convert::TryFrom; + +declare_entity!(SourceFile, "file"); +declare_entity!(SourceLoc, "loc"); + +#[derive(Clone, Debug, Default)] +pub struct Debug { + pub source_files: EntityVec, + source_file_dedup: HashMap, + pub source_locs: EntityVec, + source_loc_dedup: HashMap, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SourceLocData { + pub file: SourceFile, + pub line: u32, + pub col: u32, +} + +impl Debug { + pub fn intern_file(&mut self, path: &str) -> SourceFile { + if let Some(id) = self.source_file_dedup.get(path) { + return *id; + } + let id = self.source_files.push(path.to_owned()); + self.source_file_dedup.insert(path.to_owned(), id); + id + } + + pub fn intern_loc(&mut self, file: SourceFile, line: u32, col: u32) -> SourceLoc { + let data = SourceLocData { file, line, col }; + match self.source_loc_dedup.entry(data) { + HashEntry::Vacant(v) => { + let id = self.source_locs.push(data); + *v.insert(id) + } + HashEntry::Occupied(o) => *o.get(), + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct DebugMap { + tuples: Vec<(u32, u32, SourceLoc)>, +} + +impl DebugMap { + pub(crate) fn from_dwarf( + dwarf: gimli::Dwarf, + debug: &mut Debug, + ) -> anyhow::Result { + let ctx = addr2line::Context::from_dwarf(dwarf)?; + let mut tuples = vec![]; + + let mut locs = ctx.find_location_range(0, u64::MAX).unwrap(); + while let Some((start, len, loc)) = locs.next() { + let file = debug.intern_file(loc.file.unwrap_or("")); + let loc = debug.intern_loc(file, loc.line.unwrap_or(0), loc.column.unwrap_or(0)); + tuples.push((start as u32, len as u32, loc)); + } + tuples.sort(); + + let mut last = 0; + tuples.retain(|&(start, len, _)| { + let retain = start >= last; + if retain { + last = start + len; + } + retain + }); + + Ok(DebugMap { tuples }) + } + + pub(crate) fn locs_from_offset<'a>(&'a self, offset: usize) -> &'a [(u32, u32, SourceLoc)] { + let offset = u32::try_from(offset).unwrap(); + let start = match self.tuples.binary_search_by(|&(start, len, _)| { + if offset < start { + std::cmp::Ordering::Greater + } else if offset >= (start + len) { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) { + Ok(idx) => idx, + Err(first_after) => first_after, + }; + &self.tuples[start..] + } +} diff --git a/src/ir/display.rs b/src/ir/display.rs index a6f5a54..ef85f0d 100644 --- a/src/ir/display.rs +++ b/src/ir/display.rs @@ -123,12 +123,13 @@ impl<'a> Display for FunctionBodyDisplay<'a> { let tys = tys.iter().map(|&ty| format!("{}", ty)).collect::>(); writeln!( f, - "{} {} = {} {} # {}", + "{} {} = {} {} # {} @{}", self.1, inst, op, args.join(", "), - tys.join(", ") + tys.join(", "), + self.0.source_locs[inst], )?; } ValueDef::PickOutput(val, idx, ty) => { @@ -250,6 +251,16 @@ impl<'a> Display for ModuleDisplay<'a> { } } } + for (loc, loc_data) in self.0.debug.source_locs.entries() { + writeln!( + f, + " {} = {} line {} column {}", + loc, loc_data.file, loc_data.line, loc_data.col + )?; + } + for (file, file_name) in self.0.debug.source_files.entries() { + writeln!(f, " {} = \"{}\"", file, file_name)?; + } writeln!(f, "}}")?; Ok(()) } diff --git a/src/ir/func.rs b/src/ir/func.rs index a5d638f..c1537d8 100644 --- a/src/ir/func.rs +++ b/src/ir/func.rs @@ -2,6 +2,7 @@ use super::{Block, FunctionBodyDisplay, Local, Module, Signature, Type, Value, V use crate::cfg::CFGInfo; use crate::entity::{EntityRef, EntityVec, PerEntity}; use crate::frontend::parse_body; +use crate::ir::SourceLoc; use anyhow::Result; #[derive(Clone, Debug)] @@ -99,6 +100,8 @@ pub struct FunctionBody { pub value_blocks: PerEntity, /// Wasm locals that values correspond to, if any. pub value_locals: PerEntity>, + /// Debug source locations of each value. + pub source_locs: PerEntity, } impl FunctionBody { @@ -124,6 +127,7 @@ impl FunctionBody { values, value_blocks, value_locals: PerEntity::default(), + source_locs: PerEntity::default(), } } diff --git a/src/ir/module.rs b/src/ir/module.rs index 077248c..ae2e75c 100644 --- a/src/ir/module.rs +++ b/src/ir/module.rs @@ -1,6 +1,6 @@ use super::{Func, FuncDecl, Global, Memory, ModuleDisplay, Signature, Table, Type}; use crate::entity::{EntityRef, EntityVec}; -use crate::ir::FunctionBody; +use crate::ir::{Debug, DebugMap, FunctionBody}; use crate::{backend, frontend}; use anyhow::Result; @@ -15,6 +15,8 @@ pub struct Module<'a> { pub exports: Vec, pub memories: EntityVec, pub start_func: Option, + pub debug: Debug, + pub debug_map: DebugMap, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -137,6 +139,8 @@ impl<'a> Module<'a> { exports: vec![], memories: EntityVec::default(), start_func: None, + debug: Debug::default(), + debug_map: DebugMap::default(), } } }