breakpad/src/common/mac/dump_syms.cc

// -*- mode: c++ -*-

// Copyright 2011 Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// dump_syms.cc: Create a symbol file for use with minidumps

#ifdef HAVE_CONFIG_H
#include <config.h>  // Must come first
#endif

#include "common/mac/dump_syms.h"

#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <mach-o/arch.h>
#include <mach-o/fat.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <ostream>
#include <string>
#include <vector>

#include "common/dwarf/bytereader-inl.h"
#include "common/dwarf/dwarf2reader.h"
#include "common/dwarf_cfi_to_module.h"
#include "common/dwarf_cu_to_module.h"
#include "common/dwarf_line_to_module.h"
#include "common/dwarf_range_list_handler.h"
#include "common/mac/file_id.h"
#include "common/mac/arch_utilities.h"
#include "common/mac/macho_reader.h"
#include "common/module.h"
#include "common/path_helper.h"
#include "common/scoped_ptr.h"
#include "common/stabs_reader.h"
#include "common/stabs_to_module.h"
#include "common/symbol_data.h"

#ifndef CPU_TYPE_ARM
#define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
#endif //  CPU_TYPE_ARM

#ifndef CPU_TYPE_ARM64
#define CPU_TYPE_ARM64 (static_cast<cpu_type_t>(16777228))
#endif  // CPU_TYPE_ARM64

using google_breakpad::ByteReader;
using google_breakpad::DwarfCUToModule;
using google_breakpad::DwarfLineToModule;
using google_breakpad::DwarfRangeListHandler;
using google_breakpad::mach_o::FatReader;
using google_breakpad::mach_o::FileID;
using google_breakpad::mach_o::Section;
using google_breakpad::mach_o::Segment;
using google_breakpad::Module;
using google_breakpad::StabsReader;
using google_breakpad::StabsToModule;
using google_breakpad::scoped_ptr;
using std::make_pair;
using std::pair;
using std::string;
using std::vector;

namespace {
// Return a vector<string> with absolute paths to all the entries
// in directory (excluding . and ..).
vector<string> list_directory(const string& directory) {
  vector<string> entries;
  DIR* dir = opendir(directory.c_str());
  if (!dir) {
    return entries;
  }

  string path = directory;
  if (path[path.length() - 1] != '/') {
    path += '/';
  }

  struct dirent* entry = NULL;
  while ((entry = readdir(dir))) {
    if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
      entries.push_back(path + entry->d_name);
    }
  }

  closedir(dir);
  return entries;
}
}

namespace google_breakpad {

bool DumpSymbols::Read(const string& filename) {
  selected_object_file_ = nullptr;
  struct stat st;
  if (stat(filename.c_str(), &st) == -1) {
    fprintf(stderr, "Could not access object file %s: %s\n",
            filename.c_str(), strerror(errno));
    return false;
  }

  from_disk_ = true;

  // Does this filename refer to a dSYM bundle?
  string contents_path = filename + "/Contents/Resources/DWARF";
  string object_filename;
  if (S_ISDIR(st.st_mode) &&
      access(contents_path.c_str(), F_OK) == 0) {
    // If there's one file under Contents/Resources/DWARF then use that,
    // otherwise bail out.
    const vector<string> entries = list_directory(contents_path);
    if (entries.size() == 0) {
      fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
              filename.c_str());
      return false;
    }
    if (entries.size() > 1) {
      fprintf(stderr, "Too many DWARF files in bundle: %s\n",
              filename.c_str());
      return false;
    }

    object_filename = entries[0];
  } else {
    object_filename = filename;
  }

  // Read the file's contents into memory.
  bool read_ok = true;
  string error;
  scoped_array<uint8_t> contents;
  off_t total = 0;
  if (stat(object_filename.c_str(), &st) != -1) {
    FILE* f = fopen(object_filename.c_str(), "rb");
    if (f) {
      contents.reset(new uint8_t[st.st_size]);
      while (total < st.st_size && !feof(f)) {
        size_t read = fread(&contents[0] + total, 1, st.st_size - total, f);
        if (read == 0) {
          if (ferror(f)) {
            read_ok = false;
            error = strerror(errno);
          }
          break;
        }
        total += read;
      }
      fclose(f);
    } else {
      error = strerror(errno);
    }
  }

  if (!read_ok) {
    fprintf(stderr, "Error reading object file: %s: %s\n",
            object_filename.c_str(), error.c_str());
    return false;
  }
  return ReadData(contents.release(), total, object_filename);
}

bool DumpSymbols::ReadData(uint8_t* contents, size_t size,
                           const std::string& filename) {
  contents_.reset(contents);
  size_ = size;
  object_filename_ = filename;

  // Get the list of object files present in the file.
  FatReader::Reporter fat_reporter(object_filename_);
  FatReader fat_reader(&fat_reporter);
  if (!fat_reader.Read(contents_.get(), size)) {
    return false;
  }

  // Get our own copy of fat_reader's object file list.
  size_t object_files_count;
  const SuperFatArch* object_files =
    fat_reader.object_files(&object_files_count);
  if (object_files_count == 0) {
    fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
            object_filename_.c_str());
    return false;
  }
  object_files_.resize(object_files_count);
  memcpy(&object_files_[0], object_files,
         sizeof(SuperFatArch) * object_files_count);

  return true;
}

bool DumpSymbols::SetArchitecture(const ArchInfo& info) {
  // Find the best match for the architecture the user requested.
  const SuperFatArch* best_match =
      FindBestMatchForArchitecture(info.cputype, info.cpusubtype);
  if (!best_match) return false;

  // Record the selected object file.
  selected_object_file_ = best_match;
  return true;
}

SuperFatArch* DumpSymbols::FindBestMatchForArchitecture(
    cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
  // Check if all the object files can be converted to struct fat_arch.
  bool can_convert_to_fat_arch = true;
  vector<struct fat_arch> fat_arch_vector;
  for (vector<SuperFatArch>::const_iterator it = object_files_.begin();
       it != object_files_.end();
       ++it) {
    struct fat_arch arch;
    bool success = it->ConvertToFatArch(&arch);
    if (!success) {
      can_convert_to_fat_arch = false;
      break;
    }
    fat_arch_vector.push_back(arch);
  }

  // If all the object files can be converted to struct fat_arch, use
  // NXFindBestFatArch.
  if (can_convert_to_fat_arch) {
    const struct fat_arch* best_match
      = NXFindBestFatArch(cpu_type, cpu_subtype, &fat_arch_vector[0],
                          static_cast<uint32_t>(fat_arch_vector.size()));

    for (size_t i = 0; i < fat_arch_vector.size(); ++i) {
      if (best_match == &fat_arch_vector[i])
        return &object_files_[i];
    }
    assert(best_match == NULL);
    // Fall through since NXFindBestFatArch can't find arm slices on x86_64
    // macOS 13. See FB11955188.
  }

  // Check for an exact match with cpu_type and cpu_subtype.
  for (vector<SuperFatArch>::iterator it = object_files_.begin();
       it != object_files_.end();
       ++it) {
    if (static_cast<cpu_type_t>(it->cputype) == cpu_type &&
        (static_cast<cpu_subtype_t>(it->cpusubtype) & ~CPU_SUBTYPE_MASK) ==
            (cpu_subtype & ~CPU_SUBTYPE_MASK))
      return &*it;
  }

  // No exact match found.
  // TODO(erikchen): If it becomes necessary, we can copy the implementation of
  // NXFindBestFatArch, located at
  // http://web.mit.edu/darwin/src/modules/cctools/libmacho/arch.c.
  fprintf(stderr, "Failed to find an exact match for an object file with cpu "
      "type: %d and cpu subtype: %d.\n", cpu_type, cpu_subtype);
  if (!can_convert_to_fat_arch) {
    fprintf(stderr, "Furthermore, at least one object file is larger "
        "than 2**32.\n");
  }
  return NULL;
}

string DumpSymbols::Identifier() {
  scoped_ptr<FileID> file_id;

  if (from_disk_) {
    file_id.reset(new FileID(object_filename_.c_str()));
  } else {
    file_id.reset(new FileID(contents_.get(), size_));
  }
  unsigned char identifier_bytes[16];
  scoped_ptr<Module> module;
  if (!selected_object_file_) {
    if (!CreateEmptyModule(module))
      return string();
  }
  cpu_type_t cpu_type = selected_object_file_->cputype;
  cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
  if (!file_id->MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
    fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
            object_filename_.c_str());
    return "";
  }

  char identifier_string[40];
  FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
                                    sizeof(identifier_string));

  string compacted(identifier_string);
  for(size_t i = compacted.find('-'); i != string::npos;
      i = compacted.find('-', i))
    compacted.erase(i, 1);

  // The pdb for these IDs has an extra byte, so to make everything uniform put
  // a 0 on the end of mac IDs.
  compacted += "0";

  return compacted;
}

// A range handler that accepts rangelist data parsed by
// RangeListReader and populates a range vector (typically
// owned by a function) with the results.
class DumpSymbols::DumperRangesHandler:
      public DwarfCUToModule::RangesHandler {
 public:
  DumperRangesHandler(ByteReader* reader) :
      reader_(reader) { }

  bool ReadRanges(
      enum DwarfForm form, uint64_t data,
      RangeListReader::CURangesInfo* cu_info,
      vector<Module::Range>* ranges) {
    DwarfRangeListHandler handler(ranges);
    RangeListReader range_list_reader(reader_, cu_info,
                                                    &handler);
    return range_list_reader.ReadRanges(form, data);
  }

 private:
  ByteReader* reader_;
};

// A line-to-module loader that accepts line number info parsed by
// LineInfo and populates a Module and a line vector
// with the results.
class DumpSymbols::DumperLineToModule:
      public DwarfCUToModule::LineToModuleHandler {
 public:
  // Create a line-to-module converter using BYTE_READER.
  DumperLineToModule(ByteReader* byte_reader)
      : byte_reader_(byte_reader) { }

  void StartCompilationUnit(const string& compilation_dir) {
    compilation_dir_ = compilation_dir;
  }

  void ReadProgram(const uint8_t* program,
                   uint64_t length,
                   const uint8_t* string_section,
                   uint64_t string_section_length,
                   const uint8_t* line_string_section,
                   uint64_t line_string_section_length,
                   Module* module,
                   vector<Module::Line>* lines,
                   std::map<uint32_t, Module::File*>* files) {
    DwarfLineToModule handler(module, compilation_dir_, lines, files);
    LineInfo parser(program, length, byte_reader_, nullptr, 0,
                                  nullptr, 0, &handler);
    parser.Start();
  }
 private:
  string compilation_dir_;
  ByteReader* byte_reader_;  // WEAK
};

bool DumpSymbols::CreateEmptyModule(scoped_ptr<Module>& module) {
  // Select an object file, if SetArchitecture hasn't been called to set one
  // explicitly.
  if (!selected_object_file_) {
    // If there's only one architecture, that's the one.
    if (object_files_.size() == 1)
      selected_object_file_ = &object_files_[0];
    else {
      // Look for an object file whose architecture matches our own.
      ArchInfo local_arch = GetLocalArchInfo();
      if (!SetArchitecture(local_arch)) {
        fprintf(stderr, "%s: object file contains more than one"
                " architecture, none of which match the current"
                " architecture; specify an architecture explicitly"
                " with '-a ARCH' to resolve the ambiguity\n",
                object_filename_.c_str());
        return false;
      }
    }
  }

  assert(selected_object_file_);

  // Find the name of the selected file's architecture, to appear in
  // the MODULE record and in error messages.
  const char* selected_arch_name = GetNameFromCPUType(
      selected_object_file_->cputype, selected_object_file_->cpusubtype);

  // In certain cases, it is possible that architecture info can't be reliably
  // determined, e.g. new architectures that breakpad is unware of. In that
  // case, avoid crashing and return false instead.
  if (selected_arch_name == kUnknownArchName) {
    return false;
  }

  if (strcmp(selected_arch_name, "i386") == 0)
    selected_arch_name = "x86";

  // Produce a name to use in error messages that includes the
  // filename, and the architecture, if there is more than one.
  selected_object_name_ = object_filename_;
  if (object_files_.size() > 1) {
    selected_object_name_ += ", architecture ";
    selected_object_name_ + selected_arch_name;
  }

  // Compute a module name, to appear in the MODULE record.
  string module_name;
  if (!module_name_.empty()) {
    module_name = module_name_;
  } else {
    module_name = google_breakpad::BaseName(object_filename_);
  }

  // Choose an identifier string, to appear in the MODULE record.
  string identifier = Identifier();
  if (identifier.empty())
    return false;

  // Create a module to hold the debugging information.
  module.reset(new Module(module_name, "mac", selected_arch_name, identifier,
                          "", enable_multiple_));
  return true;
}

void DumpSymbols::ReadDwarf(google_breakpad::Module* module,
                            const mach_o::Reader& macho_reader,
                            const mach_o::SectionMap& dwarf_sections,
                            bool handle_inter_cu_refs) const {
  // Build a byte reader of the appropriate endianness.
  ByteReader byte_reader(macho_reader.big_endian()
                         ? ENDIANNESS_BIG
                         : ENDIANNESS_LITTLE);

  // Construct a context for this file.
  DwarfCUToModule::FileContext file_context(selected_object_name_,
                                            module,
                                            handle_inter_cu_refs);

  // Build a SectionMap from our mach_o::SectionMap.
  for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
       it != dwarf_sections.end(); ++it) {
    file_context.AddSectionToSectionMap(
        it->first,
        it->second.contents.start,
        it->second.contents.Size());
  }

  // Find the __debug_info section.
  SectionMap::const_iterator debug_info_entry =
      file_context.section_map().find("__debug_info");
  // There had better be a __debug_info section!
  if (debug_info_entry == file_context.section_map().end()) {
    fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
            selected_object_name_.c_str());
    return;
  }
  const std::pair<const uint8_t*, uint64_t>& debug_info_section =
      debug_info_entry->second;

  // Build a line-to-module loader for the root handler to use.
  DumperLineToModule line_to_module(&byte_reader);

  // .debug_ranges and .debug_rngslists reader
  DumperRangesHandler ranges_handler(&byte_reader);

  // Walk the __debug_info section, one compilation unit at a time.
  uint64_t debug_info_length = debug_info_section.second;
  for (uint64_t offset = 0; offset < debug_info_length;) {
    // Make a handler for the root DIE that populates MODULE with the
    // debug info.
    DwarfCUToModule::WarningReporter reporter(selected_object_name_,
                                              offset);
    DwarfCUToModule root_handler(&file_context, &line_to_module,
                                 &ranges_handler, &reporter,
                                 symbol_data_ & INLINES);
    // Make a Dwarf2Handler that drives our DIEHandler.
    DIEDispatcher die_dispatcher(&root_handler);
    // Make a DWARF parser for the compilation unit at OFFSET.
    CompilationUnit dwarf_reader(selected_object_name_,
                                               file_context.section_map(),
                                               offset,
                                               &byte_reader,
                                               &die_dispatcher);
    // Process the entire compilation unit; get the offset of the next.
    offset += dwarf_reader.Start();
  }
}

bool DumpSymbols::ReadCFI(google_breakpad::Module* module,
                          const mach_o::Reader& macho_reader,
                          const mach_o::Section& section,
                          bool eh_frame) const {
  // Find the appropriate set of register names for this file's
  // architecture.
  vector<string> register_names;
  switch (macho_reader.cpu_type()) {
    case CPU_TYPE_X86:
      register_names = DwarfCFIToModule::RegisterNames::I386();
      break;
    case CPU_TYPE_X86_64:
      register_names = DwarfCFIToModule::RegisterNames::X86_64();
      break;
    case CPU_TYPE_ARM:
      register_names = DwarfCFIToModule::RegisterNames::ARM();
      break;
    case CPU_TYPE_ARM64:
      register_names = DwarfCFIToModule::RegisterNames::ARM64();
      break;
    default: {
      const char* arch_name = GetNameFromCPUType(macho_reader.cpu_type(),
                                                 macho_reader.cpu_subtype());
      fprintf(
          stderr,
          "%s: cannot convert DWARF call frame information for architecture "
          "'%s' (%d, %d) to Breakpad symbol file: no register name table\n",
          selected_object_name_.c_str(), arch_name, macho_reader.cpu_type(),
          macho_reader.cpu_subtype());
      return false;
    }
  }

  // Find the call frame information and its size.
  const uint8_t* cfi = section.contents.start;
  size_t cfi_size = section.contents.Size();

  // Plug together the parser, handler, and their entourages.
  DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
                                             section.section_name);
  DwarfCFIToModule handler(module, register_names, &module_reporter);
  ByteReader byte_reader(macho_reader.big_endian() ?
                                       ENDIANNESS_BIG :
                                       ENDIANNESS_LITTLE);
  byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
  // At the moment, according to folks at Apple and some cursory
  // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
  // this is the only base address the CFI parser will need.
  byte_reader.SetCFIDataBase(section.address, cfi);

  CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
                                                       section.section_name);
  CallFrameInfo parser(cfi, cfi_size,
                                     &byte_reader, &handler, &dwarf_reporter,
                                     eh_frame);
  parser.Start();
  return true;
}

// A LoadCommandHandler that loads whatever debugging data it finds into a
// Module.
class DumpSymbols::LoadCommandDumper:
      public mach_o::Reader::LoadCommandHandler {
 public:
  // Create a load command dumper handling load commands from READER's
  // file, and adding data to MODULE.
  LoadCommandDumper(const DumpSymbols& dumper,
                    google_breakpad::Module* module,
                    const mach_o::Reader& reader,
                    SymbolData symbol_data,
                    bool handle_inter_cu_refs)
      : dumper_(dumper),
        module_(module),
        reader_(reader),
        symbol_data_(symbol_data),
        handle_inter_cu_refs_(handle_inter_cu_refs) { }

  bool SegmentCommand(const mach_o::Segment& segment);
  bool SymtabCommand(const ByteBuffer& entries, const ByteBuffer& strings);

 private:
  const DumpSymbols& dumper_;
  google_breakpad::Module* module_;  // WEAK
  const mach_o::Reader& reader_;
  const SymbolData symbol_data_;
  const bool handle_inter_cu_refs_;
};

bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment& segment) {
  mach_o::SectionMap section_map;
  if (!reader_.MapSegmentSections(segment, &section_map))
    return false;

  if (segment.name == "__TEXT") {
    module_->SetLoadAddress(segment.vmaddr);
    if (symbol_data_ & CFI) {
      mach_o::SectionMap::const_iterator eh_frame =
          section_map.find("__eh_frame");
      if (eh_frame != section_map.end()) {
        // If there is a problem reading this, don't treat it as a fatal error.
        dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
      }
    }
    return true;
  }

  if (segment.name == "__DWARF") {
    if ((symbol_data_ & SYMBOLS_AND_FILES) || (symbol_data_ & INLINES)) {
      dumper_.ReadDwarf(module_, reader_, section_map, handle_inter_cu_refs_);
    }
    if (symbol_data_ & CFI) {
      mach_o::SectionMap::const_iterator debug_frame
          = section_map.find("__debug_frame");
      if (debug_frame != section_map.end()) {
        // If there is a problem reading this, don't treat it as a fatal error.
        dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
      }
    }
  }

  return true;
}

bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer& entries,
                                                   const ByteBuffer& strings) {
  StabsToModule stabs_to_module(module_);
  // Mac OS X STABS are never "unitized", and the size of the 'value' field
  // matches the address size of the executable.
  StabsReader stabs_reader(entries.start, entries.Size(),
                           strings.start, strings.Size(),
                           reader_.big_endian(),
                           reader_.bits_64() ? 8 : 4,
                           true,
                           &stabs_to_module);
  if (!stabs_reader.Process())
    return false;
  stabs_to_module.Finalize();
  return true;
}

bool DumpSymbols::ReadSymbolData(Module** out_module) {
  scoped_ptr<Module> module;
  if (!CreateEmptyModule(module))
    return false;

  // Parse the selected object file.
  mach_o::Reader::Reporter reporter(selected_object_name_);
  mach_o::Reader reader(&reporter);
  if (!reader.Read(&contents_[0]
                   + selected_object_file_->offset,
                   selected_object_file_->size,
                   selected_object_file_->cputype,
                   selected_object_file_->cpusubtype))
    return false;

  // Walk its load commands, and deal with whatever is there.
  LoadCommandDumper load_command_dumper(*this, module.get(), reader,
                                        symbol_data_, handle_inter_cu_refs_);
  if (!reader.WalkLoadCommands(&load_command_dumper))
    return false;

  *out_module = module.release();

  return true;
}

// Read the selected object file's debugging information, and write out the
// header only to |stream|. Return true on success; if an error occurs, report
// it and return false.
bool DumpSymbols::WriteSymbolFileHeader(std::ostream& stream) {
  scoped_ptr<Module> module;
  if (!CreateEmptyModule(module))
    return false;

  return module->Write(stream, symbol_data_);
}

}  // namespace google_breakpad