breakpad/src/common/mac/dump_syms.cc
Ben Hamilton 9cc38fec8b [dump_syms/Mac] New -n MODULE arg to Mac dump_syms
Previously, dump_syms always used the basename of the on-disk file as
the Breakpad module name and required that the on-disk filename of the dSYM and binary file match, or it would exit with an error.

Build automation often uses filenames unrelated to the Breakpad module
name, so this CL adds a new optional "-n MODULE" argument to Mac
dump_syms that allows passing in the Breakpad module name from outside.

In this case, the basename of the on-disk file(s) is ignored and
no longer required to match.

Change-Id: Ic38e8cf762c79bce61d289b397293eff6c0039ce
Bug: b/273531493
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/4338857
Reviewed-by: Robert Sesek <rsesek@chromium.org>
2023-03-20 18:43:06 +00:00

692 lines
24 KiB
C++

// -*- mode: c++ -*-
// Copyright 2011 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
// dump_syms.cc: Create a symbol file for use with minidumps
#ifdef HAVE_CONFIG_H
#include <config.h> // Must come first
#endif
#include "common/mac/dump_syms.h"
#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <mach-o/arch.h>
#include <mach-o/fat.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <ostream>
#include <string>
#include <vector>
#include "common/dwarf/bytereader-inl.h"
#include "common/dwarf/dwarf2reader.h"
#include "common/dwarf_cfi_to_module.h"
#include "common/dwarf_cu_to_module.h"
#include "common/dwarf_line_to_module.h"
#include "common/dwarf_range_list_handler.h"
#include "common/mac/file_id.h"
#include "common/mac/arch_utilities.h"
#include "common/mac/macho_reader.h"
#include "common/module.h"
#include "common/path_helper.h"
#include "common/scoped_ptr.h"
#include "common/stabs_reader.h"
#include "common/stabs_to_module.h"
#include "common/symbol_data.h"
#ifndef CPU_TYPE_ARM
#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
#endif // CPU_TYPE_ARM
#ifndef CPU_TYPE_ARM64
#define CPU_TYPE_ARM64 (static_cast<cpu_type_t>(16777228))
#endif // CPU_TYPE_ARM64
using google_breakpad::ByteReader;
using google_breakpad::DwarfCUToModule;
using google_breakpad::DwarfLineToModule;
using google_breakpad::DwarfRangeListHandler;
using google_breakpad::mach_o::FatReader;
using google_breakpad::mach_o::FileID;
using google_breakpad::mach_o::Section;
using google_breakpad::mach_o::Segment;
using google_breakpad::Module;
using google_breakpad::StabsReader;
using google_breakpad::StabsToModule;
using google_breakpad::scoped_ptr;
using std::make_pair;
using std::pair;
using std::string;
using std::vector;
namespace {
// Return a vector<string> with absolute paths to all the entries
// in directory (excluding . and ..).
vector<string> list_directory(const string& directory) {
vector<string> entries;
DIR* dir = opendir(directory.c_str());
if (!dir) {
return entries;
}
string path = directory;
if (path[path.length() - 1] != '/') {
path += '/';
}
struct dirent* entry = NULL;
while ((entry = readdir(dir))) {
if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
entries.push_back(path + entry->d_name);
}
}
closedir(dir);
return entries;
}
}
namespace google_breakpad {
bool DumpSymbols::Read(const string& filename) {
selected_object_file_ = nullptr;
struct stat st;
if (stat(filename.c_str(), &st) == -1) {
fprintf(stderr, "Could not access object file %s: %s\n",
filename.c_str(), strerror(errno));
return false;
}
from_disk_ = true;
// Does this filename refer to a dSYM bundle?
string contents_path = filename + "/Contents/Resources/DWARF";
string object_filename;
if (S_ISDIR(st.st_mode) &&
access(contents_path.c_str(), F_OK) == 0) {
// If there's one file under Contents/Resources/DWARF then use that,
// otherwise bail out.
const vector<string> entries = list_directory(contents_path);
if (entries.size() == 0) {
fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
filename.c_str());
return false;
}
if (entries.size() > 1) {
fprintf(stderr, "Too many DWARF files in bundle: %s\n",
filename.c_str());
return false;
}
object_filename = entries[0];
} else {
object_filename = filename;
}
// Read the file's contents into memory.
bool read_ok = true;
string error;
scoped_array<uint8_t> contents;
off_t total = 0;
if (stat(object_filename.c_str(), &st) != -1) {
FILE* f = fopen(object_filename.c_str(), "rb");
if (f) {
contents.reset(new uint8_t[st.st_size]);
while (total < st.st_size && !feof(f)) {
size_t read = fread(&contents[0] + total, 1, st.st_size - total, f);
if (read == 0) {
if (ferror(f)) {
read_ok = false;
error = strerror(errno);
}
break;
}
total += read;
}
fclose(f);
} else {
error = strerror(errno);
}
}
if (!read_ok) {
fprintf(stderr, "Error reading object file: %s: %s\n",
object_filename.c_str(), error.c_str());
return false;
}
return ReadData(contents.release(), total, object_filename);
}
bool DumpSymbols::ReadData(uint8_t* contents, size_t size,
const std::string& filename) {
contents_.reset(contents);
size_ = size;
object_filename_ = filename;
// Get the list of object files present in the file.
FatReader::Reporter fat_reporter(object_filename_);
FatReader fat_reader(&fat_reporter);
if (!fat_reader.Read(contents_.get(), size)) {
return false;
}
// Get our own copy of fat_reader's object file list.
size_t object_files_count;
const SuperFatArch* object_files =
fat_reader.object_files(&object_files_count);
if (object_files_count == 0) {
fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
object_filename_.c_str());
return false;
}
object_files_.resize(object_files_count);
memcpy(&object_files_[0], object_files,
sizeof(SuperFatArch) * object_files_count);
return true;
}
bool DumpSymbols::SetArchitecture(const ArchInfo& info) {
// Find the best match for the architecture the user requested.
const SuperFatArch* best_match =
FindBestMatchForArchitecture(info.cputype, info.cpusubtype);
if (!best_match) return false;
// Record the selected object file.
selected_object_file_ = best_match;
return true;
}
SuperFatArch* DumpSymbols::FindBestMatchForArchitecture(
cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
// Check if all the object files can be converted to struct fat_arch.
bool can_convert_to_fat_arch = true;
vector<struct fat_arch> fat_arch_vector;
for (vector<SuperFatArch>::const_iterator it = object_files_.begin();
it != object_files_.end();
++it) {
struct fat_arch arch;
bool success = it->ConvertToFatArch(&arch);
if (!success) {
can_convert_to_fat_arch = false;
break;
}
fat_arch_vector.push_back(arch);
}
// If all the object files can be converted to struct fat_arch, use
// NXFindBestFatArch.
if (can_convert_to_fat_arch) {
const struct fat_arch* best_match
= NXFindBestFatArch(cpu_type, cpu_subtype, &fat_arch_vector[0],
static_cast<uint32_t>(fat_arch_vector.size()));
for (size_t i = 0; i < fat_arch_vector.size(); ++i) {
if (best_match == &fat_arch_vector[i])
return &object_files_[i];
}
assert(best_match == NULL);
// Fall through since NXFindBestFatArch can't find arm slices on x86_64
// macOS 13. See FB11955188.
}
// Check for an exact match with cpu_type and cpu_subtype.
for (vector<SuperFatArch>::iterator it = object_files_.begin();
it != object_files_.end();
++it) {
if (static_cast<cpu_type_t>(it->cputype) == cpu_type &&
(static_cast<cpu_subtype_t>(it->cpusubtype) & ~CPU_SUBTYPE_MASK) ==
(cpu_subtype & ~CPU_SUBTYPE_MASK))
return &*it;
}
// No exact match found.
// TODO(erikchen): If it becomes necessary, we can copy the implementation of
// NXFindBestFatArch, located at
// http://web.mit.edu/darwin/src/modules/cctools/libmacho/arch.c.
fprintf(stderr, "Failed to find an exact match for an object file with cpu "
"type: %d and cpu subtype: %d.\n", cpu_type, cpu_subtype);
if (!can_convert_to_fat_arch) {
fprintf(stderr, "Furthermore, at least one object file is larger "
"than 2**32.\n");
}
return NULL;
}
string DumpSymbols::Identifier() {
scoped_ptr<FileID> file_id;
if (from_disk_) {
file_id.reset(new FileID(object_filename_.c_str()));
} else {
file_id.reset(new FileID(contents_.get(), size_));
}
unsigned char identifier_bytes[16];
scoped_ptr<Module> module;
if (!selected_object_file_) {
if (!CreateEmptyModule(module))
return string();
}
cpu_type_t cpu_type = selected_object_file_->cputype;
cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
if (!file_id->MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
object_filename_.c_str());
return "";
}
char identifier_string[40];
FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
sizeof(identifier_string));
string compacted(identifier_string);
for(size_t i = compacted.find('-'); i != string::npos;
i = compacted.find('-', i))
compacted.erase(i, 1);
// The pdb for these IDs has an extra byte, so to make everything uniform put
// a 0 on the end of mac IDs.
compacted += "0";
return compacted;
}
// A range handler that accepts rangelist data parsed by
// RangeListReader and populates a range vector (typically
// owned by a function) with the results.
class DumpSymbols::DumperRangesHandler:
public DwarfCUToModule::RangesHandler {
public:
DumperRangesHandler(ByteReader* reader) :
reader_(reader) { }
bool ReadRanges(
enum DwarfForm form, uint64_t data,
RangeListReader::CURangesInfo* cu_info,
vector<Module::Range>* ranges) {
DwarfRangeListHandler handler(ranges);
RangeListReader range_list_reader(reader_, cu_info,
&handler);
return range_list_reader.ReadRanges(form, data);
}
private:
ByteReader* reader_;
};
// A line-to-module loader that accepts line number info parsed by
// LineInfo and populates a Module and a line vector
// with the results.
class DumpSymbols::DumperLineToModule:
public DwarfCUToModule::LineToModuleHandler {
public:
// Create a line-to-module converter using BYTE_READER.
DumperLineToModule(ByteReader* byte_reader)
: byte_reader_(byte_reader) { }
void StartCompilationUnit(const string& compilation_dir) {
compilation_dir_ = compilation_dir;
}
void ReadProgram(const uint8_t* program,
uint64_t length,
const uint8_t* string_section,
uint64_t string_section_length,
const uint8_t* line_string_section,
uint64_t line_string_section_length,
Module* module,
vector<Module::Line>* lines,
std::map<uint32_t, Module::File*>* files) {
DwarfLineToModule handler(module, compilation_dir_, lines, files);
LineInfo parser(program, length, byte_reader_, nullptr, 0,
nullptr, 0, &handler);
parser.Start();
}
private:
string compilation_dir_;
ByteReader* byte_reader_; // WEAK
};
bool DumpSymbols::CreateEmptyModule(scoped_ptr<Module>& module) {
// Select an object file, if SetArchitecture hasn't been called to set one
// explicitly.
if (!selected_object_file_) {
// If there's only one architecture, that's the one.
if (object_files_.size() == 1)
selected_object_file_ = &object_files_[0];
else {
// Look for an object file whose architecture matches our own.
ArchInfo local_arch = GetLocalArchInfo();
if (!SetArchitecture(local_arch)) {
fprintf(stderr, "%s: object file contains more than one"
" architecture, none of which match the current"
" architecture; specify an architecture explicitly"
" with '-a ARCH' to resolve the ambiguity\n",
object_filename_.c_str());
return false;
}
}
}
assert(selected_object_file_);
// Find the name of the selected file's architecture, to appear in
// the MODULE record and in error messages.
const char* selected_arch_name = GetNameFromCPUType(
selected_object_file_->cputype, selected_object_file_->cpusubtype);
// In certain cases, it is possible that architecture info can't be reliably
// determined, e.g. new architectures that breakpad is unware of. In that
// case, avoid crashing and return false instead.
if (selected_arch_name == kUnknownArchName) {
return false;
}
if (strcmp(selected_arch_name, "i386") == 0)
selected_arch_name = "x86";
// Produce a name to use in error messages that includes the
// filename, and the architecture, if there is more than one.
selected_object_name_ = object_filename_;
if (object_files_.size() > 1) {
selected_object_name_ += ", architecture ";
selected_object_name_ + selected_arch_name;
}
// Compute a module name, to appear in the MODULE record.
string module_name;
if (!module_name_.empty()) {
module_name = module_name_;
} else {
module_name = google_breakpad::BaseName(object_filename_);
}
// Choose an identifier string, to appear in the MODULE record.
string identifier = Identifier();
if (identifier.empty())
return false;
// Create a module to hold the debugging information.
module.reset(new Module(module_name, "mac", selected_arch_name, identifier,
"", enable_multiple_));
return true;
}
void DumpSymbols::ReadDwarf(google_breakpad::Module* module,
const mach_o::Reader& macho_reader,
const mach_o::SectionMap& dwarf_sections,
bool handle_inter_cu_refs) const {
// Build a byte reader of the appropriate endianness.
ByteReader byte_reader(macho_reader.big_endian()
? ENDIANNESS_BIG
: ENDIANNESS_LITTLE);
// Construct a context for this file.
DwarfCUToModule::FileContext file_context(selected_object_name_,
module,
handle_inter_cu_refs);
// Build a SectionMap from our mach_o::SectionMap.
for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
it != dwarf_sections.end(); ++it) {
file_context.AddSectionToSectionMap(
it->first,
it->second.contents.start,
it->second.contents.Size());
}
// Find the __debug_info section.
SectionMap::const_iterator debug_info_entry =
file_context.section_map().find("__debug_info");
// There had better be a __debug_info section!
if (debug_info_entry == file_context.section_map().end()) {
fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
selected_object_name_.c_str());
return;
}
const std::pair<const uint8_t*, uint64_t>& debug_info_section =
debug_info_entry->second;
// Build a line-to-module loader for the root handler to use.
DumperLineToModule line_to_module(&byte_reader);
// .debug_ranges and .debug_rngslists reader
DumperRangesHandler ranges_handler(&byte_reader);
// Walk the __debug_info section, one compilation unit at a time.
uint64_t debug_info_length = debug_info_section.second;
for (uint64_t offset = 0; offset < debug_info_length;) {
// Make a handler for the root DIE that populates MODULE with the
// debug info.
DwarfCUToModule::WarningReporter reporter(selected_object_name_,
offset);
DwarfCUToModule root_handler(&file_context, &line_to_module,
&ranges_handler, &reporter,
symbol_data_ & INLINES);
// Make a Dwarf2Handler that drives our DIEHandler.
DIEDispatcher die_dispatcher(&root_handler);
// Make a DWARF parser for the compilation unit at OFFSET.
CompilationUnit dwarf_reader(selected_object_name_,
file_context.section_map(),
offset,
&byte_reader,
&die_dispatcher);
// Process the entire compilation unit; get the offset of the next.
offset += dwarf_reader.Start();
}
}
bool DumpSymbols::ReadCFI(google_breakpad::Module* module,
const mach_o::Reader& macho_reader,
const mach_o::Section& section,
bool eh_frame) const {
// Find the appropriate set of register names for this file's
// architecture.
vector<string> register_names;
switch (macho_reader.cpu_type()) {
case CPU_TYPE_X86:
register_names = DwarfCFIToModule::RegisterNames::I386();
break;
case CPU_TYPE_X86_64:
register_names = DwarfCFIToModule::RegisterNames::X86_64();
break;
case CPU_TYPE_ARM:
register_names = DwarfCFIToModule::RegisterNames::ARM();
break;
case CPU_TYPE_ARM64:
register_names = DwarfCFIToModule::RegisterNames::ARM64();
break;
default: {
const char* arch_name = GetNameFromCPUType(macho_reader.cpu_type(),
macho_reader.cpu_subtype());
fprintf(
stderr,
"%s: cannot convert DWARF call frame information for architecture "
"'%s' (%d, %d) to Breakpad symbol file: no register name table\n",
selected_object_name_.c_str(), arch_name, macho_reader.cpu_type(),
macho_reader.cpu_subtype());
return false;
}
}
// Find the call frame information and its size.
const uint8_t* cfi = section.contents.start;
size_t cfi_size = section.contents.Size();
// Plug together the parser, handler, and their entourages.
DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
section.section_name);
DwarfCFIToModule handler(module, register_names, &module_reporter);
ByteReader byte_reader(macho_reader.big_endian() ?
ENDIANNESS_BIG :
ENDIANNESS_LITTLE);
byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
// At the moment, according to folks at Apple and some cursory
// investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
// this is the only base address the CFI parser will need.
byte_reader.SetCFIDataBase(section.address, cfi);
CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
section.section_name);
CallFrameInfo parser(cfi, cfi_size,
&byte_reader, &handler, &dwarf_reporter,
eh_frame);
parser.Start();
return true;
}
// A LoadCommandHandler that loads whatever debugging data it finds into a
// Module.
class DumpSymbols::LoadCommandDumper:
public mach_o::Reader::LoadCommandHandler {
public:
// Create a load command dumper handling load commands from READER's
// file, and adding data to MODULE.
LoadCommandDumper(const DumpSymbols& dumper,
google_breakpad::Module* module,
const mach_o::Reader& reader,
SymbolData symbol_data,
bool handle_inter_cu_refs)
: dumper_(dumper),
module_(module),
reader_(reader),
symbol_data_(symbol_data),
handle_inter_cu_refs_(handle_inter_cu_refs) { }
bool SegmentCommand(const mach_o::Segment& segment);
bool SymtabCommand(const ByteBuffer& entries, const ByteBuffer& strings);
private:
const DumpSymbols& dumper_;
google_breakpad::Module* module_; // WEAK
const mach_o::Reader& reader_;
const SymbolData symbol_data_;
const bool handle_inter_cu_refs_;
};
bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment& segment) {
mach_o::SectionMap section_map;
if (!reader_.MapSegmentSections(segment, &section_map))
return false;
if (segment.name == "__TEXT") {
module_->SetLoadAddress(segment.vmaddr);
if (symbol_data_ & CFI) {
mach_o::SectionMap::const_iterator eh_frame =
section_map.find("__eh_frame");
if (eh_frame != section_map.end()) {
// If there is a problem reading this, don't treat it as a fatal error.
dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
}
}
return true;
}
if (segment.name == "__DWARF") {
if ((symbol_data_ & SYMBOLS_AND_FILES) || (symbol_data_ & INLINES)) {
dumper_.ReadDwarf(module_, reader_, section_map, handle_inter_cu_refs_);
}
if (symbol_data_ & CFI) {
mach_o::SectionMap::const_iterator debug_frame
= section_map.find("__debug_frame");
if (debug_frame != section_map.end()) {
// If there is a problem reading this, don't treat it as a fatal error.
dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
}
}
}
return true;
}
bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer& entries,
const ByteBuffer& strings) {
StabsToModule stabs_to_module(module_);
// Mac OS X STABS are never "unitized", and the size of the 'value' field
// matches the address size of the executable.
StabsReader stabs_reader(entries.start, entries.Size(),
strings.start, strings.Size(),
reader_.big_endian(),
reader_.bits_64() ? 8 : 4,
true,
&stabs_to_module);
if (!stabs_reader.Process())
return false;
stabs_to_module.Finalize();
return true;
}
bool DumpSymbols::ReadSymbolData(Module** out_module) {
scoped_ptr<Module> module;
if (!CreateEmptyModule(module))
return false;
// Parse the selected object file.
mach_o::Reader::Reporter reporter(selected_object_name_);
mach_o::Reader reader(&reporter);
if (!reader.Read(&contents_[0]
+ selected_object_file_->offset,
selected_object_file_->size,
selected_object_file_->cputype,
selected_object_file_->cpusubtype))
return false;
// Walk its load commands, and deal with whatever is there.
LoadCommandDumper load_command_dumper(*this, module.get(), reader,
symbol_data_, handle_inter_cu_refs_);
if (!reader.WalkLoadCommands(&load_command_dumper))
return false;
*out_module = module.release();
return true;
}
// Read the selected object file's debugging information, and write out the
// header only to |stream|. Return true on success; if an error occurs, report
// it and return false.
bool DumpSymbols::WriteSymbolFileHeader(std::ostream& stream) {
scoped_ptr<Module> module;
if (!CreateEmptyModule(module))
return false;
return module->Write(stream, symbol_data_);
}
} // namespace google_breakpad