From ceaa8acac65dee0584bc419a17d719d384ae6b41 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 10 Feb 2023 20:55:45 -0800 Subject: [PATCH 1/6] Add addr2line usage to attempt to get source-loc information --- Cargo.toml | 1 + src/frontend.rs | 39 ++++++++++++++++++++++++++++++++++++++- src/ir.rs | 2 ++ src/ir/module.rs | 6 +++++- 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 756070a..225f23d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,3 +18,4 @@ smallvec = "1.7" rayon = "1.5" lazy_static = "1.4" libc = "0.2" +addr2line = "0.19" diff --git a/src/frontend.rs b/src/frontend.rs index 57b4dbb..7a5bf97 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -7,6 +7,7 @@ use crate::errors::FrontendError; use crate::ir::*; use crate::op_traits::{op_inputs, op_outputs}; use crate::ops::Operator; +use addr2line::gimli; use anyhow::{bail, Result}; use fxhash::{FxHashMap, FxHashSet}; use log::trace; @@ -19,10 +20,13 @@ pub fn wasm_to_ir(bytes: &[u8]) -> Result> { let mut module = Module::with_orig_bytes(bytes); let parser = Parser::new(0); let mut next_func = 0; + let mut dwarf = gimli::Dwarf::default(); for payload in parser.parse_all(bytes) { let payload = payload?; - handle_payload(&mut module, payload, &mut next_func)?; + handle_payload(&mut module, payload, &mut next_func, &mut dwarf)?; } + let debug_map = DebugMap::from_dwarf(dwarf, &mut module.debug); + module.debug_map = debug_map; Ok(module) } @@ -57,6 +61,7 @@ fn handle_payload<'a>( module: &mut Module<'a>, payload: Payload<'a>, next_func: &mut usize, + dwarf: &mut gimli::Dwarf>, ) -> Result<()> { trace!("Wasm parser item: {:?}", payload); match payload { @@ -213,6 +218,38 @@ fn handle_payload<'a>( } } } + Payload::CustomSection(reader) if reader.name() == ".debug_info" => { + dwarf.debug_info = gimli::DebugInfo::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_abbrev" => { + dwarf.debug_abbrev = gimli::DebugAbbrev::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_addr" => { + dwarf.debug_addr = + gimli::DebugAddr::from(gimli::EndianSlice::new(reader.data(), gimli::LittleEndian)); + } + Payload::CustomSection(reader) if reader.name() == ".debug_aranges" => { + dwarf.debug_aranges = gimli::DebugAranges::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_line" => { + dwarf.debug_line = gimli::DebugLine::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_line_str" => { + dwarf.debug_line_str = gimli::DebugLineStr::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_str" => { + dwarf.debug_str = gimli::DebugStr::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_str_offsets" => { + dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(gimli::EndianSlice::new( + reader.data(), + gimli::LittleEndian, + )); + } + Payload::CustomSection(reader) if reader.name() == ".debug_types" => { + dwarf.debug_types = gimli::DebugTypes::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(_) => {} Payload::CodeSectionStart { .. } => {} Payload::Version { .. } => {} Payload::ElementSection(reader) => { diff --git a/src/ir.rs b/src/ir.rs index 5fafc10..c7d10ad 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -69,3 +69,5 @@ mod value; pub use value::*; mod display; pub use display::*; +mod debug; +pub use debug::*; diff --git a/src/ir/module.rs b/src/ir/module.rs index 077248c..ae2e75c 100644 --- a/src/ir/module.rs +++ b/src/ir/module.rs @@ -1,6 +1,6 @@ use super::{Func, FuncDecl, Global, Memory, ModuleDisplay, Signature, Table, Type}; use crate::entity::{EntityRef, EntityVec}; -use crate::ir::FunctionBody; +use crate::ir::{Debug, DebugMap, FunctionBody}; use crate::{backend, frontend}; use anyhow::Result; @@ -15,6 +15,8 @@ pub struct Module<'a> { pub exports: Vec, pub memories: EntityVec, pub start_func: Option, + pub debug: Debug, + pub debug_map: DebugMap, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -137,6 +139,8 @@ impl<'a> Module<'a> { exports: vec![], memories: EntityVec::default(), start_func: None, + debug: Debug::default(), + debug_map: DebugMap::default(), } } } From 75a6f9a8ce4b1eda1d47c6e2b22deab626820301 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 10 Feb 2023 21:04:01 -0800 Subject: [PATCH 2/6] Add missing ir/debug.rs. --- src/ir/debug.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/ir/debug.rs diff --git a/src/ir/debug.rs b/src/ir/debug.rs new file mode 100644 index 0000000..5602cca --- /dev/null +++ b/src/ir/debug.rs @@ -0,0 +1,78 @@ +//! Debug info (currently, source-location maps). + +use crate::declare_entity; +use crate::entity::EntityVec; +use addr2line::gimli; +use std::collections::hash_map::Entry as HashEntry; +use std::collections::HashMap; + +declare_entity!(SourceFile, "file"); +declare_entity!(SourceLoc, "loc"); + +#[derive(Clone, Debug, Default)] +pub struct Debug { + source_files: EntityVec, + source_file_dedup: HashMap, + source_locs: EntityVec, + source_loc_dedup: HashMap, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SourceLocData { + file: SourceFile, + line: u32, + col: u32, +} + +impl Debug { + pub fn intern_file(&mut self, path: &str) -> SourceFile { + if let Some(id) = self.source_file_dedup.get(path) { + return *id; + } + let id = self.source_files.push(path.to_owned()); + self.source_file_dedup.insert(path.to_owned(), id); + id + } + + pub fn intern_loc(&mut self, file: SourceFile, line: u32, col: u32) -> SourceLoc { + let data = SourceLocData { file, line, col }; + match self.source_loc_dedup.entry(data) { + HashEntry::Vacant(v) => { + let id = self.source_locs.push(data); + *v.insert(id) + } + HashEntry::Occupied(o) => *o.get(), + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct DebugMap { + tuples: Vec<(u32, u32, SourceLoc)>, +} + +impl DebugMap { + pub(crate) fn from_dwarf( + dwarf: gimli::Dwarf, + debug: &mut Debug, + ) -> DebugMap { + let ctx = addr2line::Context::from_dwarf(dwarf).unwrap(); + let mut tuples = vec![]; + + let mut locs = ctx.find_location_range(0, u64::MAX).unwrap(); + while let Some((start, end, loc)) = locs.next() { + let file = debug.intern_file(loc.file.unwrap_or("")); + let loc = debug.intern_loc(file, loc.line.unwrap_or(0), loc.column.unwrap_or(0)); + tuples.push((start as u32, end as u32, loc)); + } + + log::trace!("tuples:"); + for &(start, end, loc) in &tuples { + log::trace!(" {:x} - {:x}: {}", start, end, loc); + } + log::trace!("files: {:?}", debug.source_files); + log::trace!("locs: {:?}", debug.source_locs); + + DebugMap { tuples } + } +} From 0e958f5a4ba435a826ca95adc61a79e084e90121 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Sun, 12 Feb 2023 11:37:52 -0800 Subject: [PATCH 3/6] Fix debuginfo parsing: actually supply all needed sections to addr2line. --- src/frontend.rs | 39 +++++++++++++++++++++++++++++++++++++-- src/ir/debug.rs | 14 +++++++------- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/frontend.rs b/src/frontend.rs index 7a5bf97..ee6062f 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -21,11 +21,22 @@ pub fn wasm_to_ir(bytes: &[u8]) -> Result> { let parser = Parser::new(0); let mut next_func = 0; let mut dwarf = gimli::Dwarf::default(); + let mut extra_sections = ExtraSections::default(); for payload in parser.parse_all(bytes) { let payload = payload?; - handle_payload(&mut module, payload, &mut next_func, &mut dwarf)?; + handle_payload( + &mut module, + payload, + &mut next_func, + &mut dwarf, + &mut extra_sections, + )?; } - let debug_map = DebugMap::from_dwarf(dwarf, &mut module.debug); + dwarf.locations = + gimli::LocationLists::new(extra_sections.debug_loc, extra_sections.debug_loclists); + dwarf.ranges = + gimli::RangeLists::new(extra_sections.debug_ranges, extra_sections.debug_rnglists); + let debug_map = DebugMap::from_dwarf(dwarf, &mut module.debug)?; module.debug_map = debug_map; Ok(module) @@ -57,11 +68,20 @@ fn parse_init_expr<'a>(init_expr: &wasmparser::ConstExpr<'a>) -> Result { + debug_loc: gimli::DebugLoc>, + debug_loclists: gimli::DebugLocLists>, + debug_ranges: gimli::DebugRanges>, + debug_rnglists: gimli::DebugRngLists>, +} + fn handle_payload<'a>( module: &mut Module<'a>, payload: Payload<'a>, next_func: &mut usize, dwarf: &mut gimli::Dwarf>, + extra_sections: &mut ExtraSections<'a>, ) -> Result<()> { trace!("Wasm parser item: {:?}", payload); match payload { @@ -249,6 +269,21 @@ fn handle_payload<'a>( Payload::CustomSection(reader) if reader.name() == ".debug_types" => { dwarf.debug_types = gimli::DebugTypes::new(reader.data(), gimli::LittleEndian); } + Payload::CustomSection(reader) if reader.name() == ".debug_loc" => { + extra_sections.debug_loc = gimli::DebugLoc::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_loclists" => { + extra_sections.debug_loclists = + gimli::DebugLocLists::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_ranges" => { + extra_sections.debug_ranges = + gimli::DebugRanges::new(reader.data(), gimli::LittleEndian); + } + Payload::CustomSection(reader) if reader.name() == ".debug_rnglists" => { + extra_sections.debug_rnglists = + gimli::DebugRngLists::new(reader.data(), gimli::LittleEndian); + } Payload::CustomSection(_) => {} Payload::CodeSectionStart { .. } => {} Payload::Version { .. } => {} diff --git a/src/ir/debug.rs b/src/ir/debug.rs index 5602cca..7568243 100644 --- a/src/ir/debug.rs +++ b/src/ir/debug.rs @@ -55,8 +55,8 @@ impl DebugMap { pub(crate) fn from_dwarf( dwarf: gimli::Dwarf, debug: &mut Debug, - ) -> DebugMap { - let ctx = addr2line::Context::from_dwarf(dwarf).unwrap(); + ) -> anyhow::Result { + let ctx = addr2line::Context::from_dwarf(dwarf)?; let mut tuples = vec![]; let mut locs = ctx.find_location_range(0, u64::MAX).unwrap(); @@ -66,13 +66,13 @@ impl DebugMap { tuples.push((start as u32, end as u32, loc)); } - log::trace!("tuples:"); + println!("tuples:"); for &(start, end, loc) in &tuples { - log::trace!(" {:x} - {:x}: {}", start, end, loc); + println!(" {:x} - {:x}: {}", start, end, loc); } - log::trace!("files: {:?}", debug.source_files); - log::trace!("locs: {:?}", debug.source_locs); + println!("files: {:?}", debug.source_files); + println!("locs: {:?}", debug.source_locs); - DebugMap { tuples } + Ok(DebugMap { tuples }) } } From 19fc22d3a30332e60020a12a1aa294d7e43e4d86 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 13 Feb 2023 14:24:33 -0800 Subject: [PATCH 4/6] Use patched version of gimli --- Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 225f23d..559ba66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,3 +19,6 @@ rayon = "1.5" lazy_static = "1.4" libc = "0.2" addr2line = "0.19" + +[patch.crates-io] +gimli = { git = "https://github.com/philipc/gimli", branch = "line-tombstone" } From aac46663f0e11258cae91a6ee6ebee00443f1904 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 13 Feb 2023 14:30:25 -0800 Subject: [PATCH 5/6] Sort debuginfo tuples by starting address, and detect overlapping ranges --- src/ir/debug.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ir/debug.rs b/src/ir/debug.rs index 7568243..2589d97 100644 --- a/src/ir/debug.rs +++ b/src/ir/debug.rs @@ -65,10 +65,16 @@ impl DebugMap { let loc = debug.intern_loc(file, loc.line.unwrap_or(0), loc.column.unwrap_or(0)); tuples.push((start as u32, end as u32, loc)); } + tuples.sort(); println!("tuples:"); - for &(start, end, loc) in &tuples { - println!(" {:x} - {:x}: {}", start, end, loc); + let mut last = 0; + for &(start, len, loc) in &tuples { + if start < last { + println!(" WARNING: OVERLAP"); + } + last = start + len; + println!(" {:x} - {:x}: {}", start, start + len, loc); } println!("files: {:?}", debug.source_files); println!("locs: {:?}", debug.source_locs); From 19392d05b04fe899e106ff2ecccb1419a434bd95 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 13 Feb 2023 16:17:17 -0800 Subject: [PATCH 6/6] Add debug-loc info --- src/frontend.rs | 46 +++++++++++++++++++++++++++++++++++++++------- src/ir/debug.rs | 47 +++++++++++++++++++++++++++++++---------------- src/ir/display.rs | 17 ++++++++++++++--- src/ir/func.rs | 4 ++++ 4 files changed, 88 insertions(+), 26 deletions(-) diff --git a/src/frontend.rs b/src/frontend.rs index ee6062f..891dcff 100644 --- a/src/frontend.rs +++ b/src/frontend.rs @@ -357,6 +357,33 @@ fn handle_payload<'a>( Ok(()) } +struct DebugLocReader<'a> { + locs: &'a [(u32, u32, SourceLoc)], +} + +impl<'a> DebugLocReader<'a> { + fn new(module: &'a Module, offset: usize) -> Self { + DebugLocReader { + locs: module.debug_map.locs_from_offset(offset), + } + } + + fn get_loc(&mut self, offset: usize) -> SourceLoc { + let offset = u32::try_from(offset).unwrap(); + while self.locs.len() > 0 { + let (start, len, loc) = self.locs[0]; + if offset < start { + break; + } + if offset > (start + len) { + self.locs = &self.locs[1..]; + } + return loc; + } + SourceLoc::invalid() + } +} + pub(crate) fn parse_body<'a>( module: &'a Module, my_sig: Signature, @@ -364,6 +391,9 @@ pub(crate) fn parse_body<'a>( ) -> Result { let mut ret: FunctionBody = FunctionBody::default(); + let start_offset = body.range().start; + let mut debug_locs = DebugLocReader::new(module, start_offset); + for ¶m in &module.signatures[my_sig].params[..] { ret.locals.push(param.into()); } @@ -409,17 +439,18 @@ pub(crate) fn parse_body<'a>( } let ops = body.get_operators_reader()?; - for op in ops.into_iter() { - let op = op?; + for item in ops.into_iter_with_offsets() { + let (op, offset) = item?; + let loc = debug_locs.get_loc(offset); if builder.reachable { - builder.handle_op(op)?; + builder.handle_op(op, loc)?; } else { builder.handle_op_unreachable(op)?; } } if builder.reachable { - builder.handle_op(wasmparser::Operator::Return)?; + builder.handle_op(wasmparser::Operator::Return, SourceLoc::invalid())?; } for block in builder.body.blocks.iter() { @@ -878,7 +909,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { } } - fn handle_op(&mut self, op: wasmparser::Operator<'a>) -> Result<()> { + fn handle_op(&mut self, op: wasmparser::Operator<'a>, loc: SourceLoc) -> Result<()> { trace!("handle_op: {:?}", op); trace!("op_stack = {:?}", self.op_stack); trace!("ctrl_stack = {:?}", self.ctrl_stack); @@ -1089,7 +1120,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { | wasmparser::Operator::TableSet { .. } | wasmparser::Operator::TableGrow { .. } | wasmparser::Operator::TableSize { .. } => { - self.emit(Operator::try_from(&op).unwrap())? + self.emit(Operator::try_from(&op).unwrap(), loc)? } wasmparser::Operator::Nop => {} @@ -1610,7 +1641,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { } } - fn emit(&mut self, op: Operator) -> Result<()> { + fn emit(&mut self, op: Operator, loc: SourceLoc) -> Result<()> { let inputs = op_inputs(self.module, &self.op_stack[..], &op)?; let outputs = op_outputs(self.module, &self.op_stack[..], &op)?; @@ -1641,6 +1672,7 @@ impl<'a, 'b> FunctionBodyBuilder<'a, 'b> { if self.reachable { self.body.append_to_block(self.cur_block, value); } + self.body.source_locs[value] = loc; if n_outputs == 1 { let output_ty = outputs[0]; diff --git a/src/ir/debug.rs b/src/ir/debug.rs index 2589d97..969de8c 100644 --- a/src/ir/debug.rs +++ b/src/ir/debug.rs @@ -5,23 +5,24 @@ use crate::entity::EntityVec; use addr2line::gimli; use std::collections::hash_map::Entry as HashEntry; use std::collections::HashMap; +use std::convert::TryFrom; declare_entity!(SourceFile, "file"); declare_entity!(SourceLoc, "loc"); #[derive(Clone, Debug, Default)] pub struct Debug { - source_files: EntityVec, + pub source_files: EntityVec, source_file_dedup: HashMap, - source_locs: EntityVec, + pub source_locs: EntityVec, source_loc_dedup: HashMap, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SourceLocData { - file: SourceFile, - line: u32, - col: u32, + pub file: SourceFile, + pub line: u32, + pub col: u32, } impl Debug { @@ -60,25 +61,39 @@ impl DebugMap { let mut tuples = vec![]; let mut locs = ctx.find_location_range(0, u64::MAX).unwrap(); - while let Some((start, end, loc)) = locs.next() { + while let Some((start, len, loc)) = locs.next() { let file = debug.intern_file(loc.file.unwrap_or("")); let loc = debug.intern_loc(file, loc.line.unwrap_or(0), loc.column.unwrap_or(0)); - tuples.push((start as u32, end as u32, loc)); + tuples.push((start as u32, len as u32, loc)); } tuples.sort(); - println!("tuples:"); let mut last = 0; - for &(start, len, loc) in &tuples { - if start < last { - println!(" WARNING: OVERLAP"); + tuples.retain(|&(start, len, _)| { + let retain = start >= last; + if retain { + last = start + len; } - last = start + len; - println!(" {:x} - {:x}: {}", start, start + len, loc); - } - println!("files: {:?}", debug.source_files); - println!("locs: {:?}", debug.source_locs); + retain + }); Ok(DebugMap { tuples }) } + + pub(crate) fn locs_from_offset<'a>(&'a self, offset: usize) -> &'a [(u32, u32, SourceLoc)] { + let offset = u32::try_from(offset).unwrap(); + let start = match self.tuples.binary_search_by(|&(start, len, _)| { + if offset < start { + std::cmp::Ordering::Greater + } else if offset >= (start + len) { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) { + Ok(idx) => idx, + Err(first_after) => first_after, + }; + &self.tuples[start..] + } } diff --git a/src/ir/display.rs b/src/ir/display.rs index c717ba0..6a8dbf1 100644 --- a/src/ir/display.rs +++ b/src/ir/display.rs @@ -78,7 +78,7 @@ impl<'a> Display for FunctionBodyDisplay<'a> { .iter() .map(|(ty, val)| format!("{}: {}", val, ty)) .collect::>(); - writeln!(f, "{} {}({}):", self.1, block_id, block_params.join(", "))?; + writeln!(f, "{} {}({}):", self.1, block_id, block_params.join(", "),)?; writeln!( f, "{} # preds: {}", @@ -116,12 +116,13 @@ impl<'a> Display for FunctionBodyDisplay<'a> { let tys = tys.iter().map(|&ty| format!("{}", ty)).collect::>(); writeln!( f, - "{} {} = {} {} # {}", + "{} {} = {} {} # {} @{}", self.1, inst, op, args.join(", "), - tys.join(", ") + tys.join(", "), + self.0.source_locs[inst], )?; } ValueDef::PickOutput(val, idx, ty) => { @@ -243,6 +244,16 @@ impl<'a> Display for ModuleDisplay<'a> { } } } + for (loc, loc_data) in self.0.debug.source_locs.entries() { + writeln!( + f, + " {} = {} line {} column {}", + loc, loc_data.file, loc_data.line, loc_data.col + )?; + } + for (file, file_name) in self.0.debug.source_files.entries() { + writeln!(f, " {} = \"{}\"", file, file_name)?; + } writeln!(f, "}}")?; Ok(()) } diff --git a/src/ir/func.rs b/src/ir/func.rs index cf76637..5aa0be4 100644 --- a/src/ir/func.rs +++ b/src/ir/func.rs @@ -2,6 +2,7 @@ use super::{Block, FunctionBodyDisplay, Local, Module, Signature, Type, Value, V use crate::cfg::CFGInfo; use crate::entity::{EntityRef, EntityVec, PerEntity}; use crate::frontend::parse_body; +use crate::ir::SourceLoc; use anyhow::Result; #[derive(Clone, Debug)] @@ -99,6 +100,8 @@ pub struct FunctionBody { pub value_blocks: PerEntity, /// Wasm locals that values correspond to, if any. pub value_locals: PerEntity>, + /// Debug source locations of each value. + pub source_locs: PerEntity, } impl FunctionBody { @@ -124,6 +127,7 @@ impl FunctionBody { values, value_blocks, value_locals: PerEntity::default(), + source_locs: PerEntity::default(), } }