// Copyright (c) 2022, Google LLC // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google LLC nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // disassembler_objdump.: Disassembler that invokes objdump for disassembly. // // Author: Mark Brand #include "processor/disassembler_objdump.h" #include #include #include #include #include #include #include #include "processor/logging.h" namespace google_breakpad { namespace { const size_t kMaxX86InstructionLength = 15; // Small RAII wrapper for temporary files. // // Example: // ScopedTmpFile tmp("/tmp/tmpfile-XXXX"); // if (tmp.Create()) { // std::cerr << tmp.path() << std::endl; // } class ScopedTmpFile { public: // Initialize the ScopedTmpFile object - this does not create the temporary // file yet. ScopedTmpFile(const char* path_format); ~ScopedTmpFile(); // Creates the temporary file, returns true on success. bool Create(); // Writes bytes to the temporary file, returns true on success. bool Write(const uint8_t* bytes, unsigned int bytes_len); // Returns the path of the temporary file. string path() const { return path_; } private: int fd_; string path_; }; ScopedTmpFile::ScopedTmpFile(const char* path_format) : path_(path_format) {} ScopedTmpFile::~ScopedTmpFile() { if (fd_) { close(fd_); unlink(path_.c_str()); } } bool ScopedTmpFile::Create() { fd_ = mkstemp(path_.data()); if (fd_ < 0) { unlink(path_.c_str()); fd_ = 0; path_ = ""; return false; } return true; } bool ScopedTmpFile::Write(const uint8_t* bytes, unsigned int bytes_len) { if (fd_) { do { ssize_t result = write(fd_, bytes, bytes_len); if (result < 0) { break; } bytes += result; bytes_len -= result; } while (bytes_len); } return bytes_len == 0; } bool IsInstructionPrefix(const string& token) { if (token == "lock" || token == "rep" || token == "repz" || token == "repnz") { return true; } return false; } bool IsOperandSize(const string& token) { if (token == "BYTE" || token == "WORD" || token == "DWORD" || token == "QWORD" || token == "PTR") { return true; } return false; } bool GetSegmentAddressX86(const DumpContext& context, string segment_name, uint64_t& address) { if (segment_name == "ds") { address = context.GetContextX86()->ds; } else if (segment_name == "es") { address = context.GetContextX86()->es; } else if (segment_name == "fs") { address = context.GetContextX86()->fs; } else if (segment_name == "gs") { address = context.GetContextX86()->gs; } else { BPLOG(ERROR) << "Unsupported segment register: " << segment_name; return false; } return true; } bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name, uint64_t& address) { if (segment_name == "ds") { address = 0; } else if (segment_name == "es") { address = 0; } else { BPLOG(ERROR) << "Unsupported segment register: " << segment_name; return false; } return true; } bool GetSegmentAddress(const DumpContext& context, string segment_name, uint64_t& address) { if (context.GetContextCPU() == MD_CONTEXT_X86) { return GetSegmentAddressX86(context, segment_name, address); } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) { return GetSegmentAddressAMD64(context, segment_name, address); } else { BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n"; return false; } } bool GetRegisterValueX86(const DumpContext& context, string register_name, uint64_t& value) { if (register_name == "eax") { value = context.GetContextX86()->eax; } else if (register_name == "ebx") { value = context.GetContextX86()->ebx; } else if (register_name == "ecx") { value = context.GetContextX86()->ecx; } else if (register_name == "edx") { value = context.GetContextX86()->edx; } else if (register_name == "edi") { value = context.GetContextX86()->edi; } else if (register_name == "esi") { value = context.GetContextX86()->esi; } else if (register_name == "ebp") { value = context.GetContextX86()->ebp; } else if (register_name == "esp") { value = context.GetContextX86()->esp; } else if (register_name == "eip") { value = context.GetContextX86()->eip; } else { BPLOG(ERROR) << "Unsupported register: " << register_name; return false; } return true; } bool GetRegisterValueAMD64(const DumpContext& context, string register_name, uint64_t& value) { if (register_name == "rax") { value = context.GetContextAMD64()->rax; } else if (register_name == "rbx") { value = context.GetContextAMD64()->rbx; } else if (register_name == "rcx") { value = context.GetContextAMD64()->rcx; } else if (register_name == "rdx") { value = context.GetContextAMD64()->rdx; } else if (register_name == "rdi") { value = context.GetContextAMD64()->rdi; } else if (register_name == "rsi") { value = context.GetContextAMD64()->rsi; } else if (register_name == "rbp") { value = context.GetContextAMD64()->rbp; } else if (register_name == "rsp") { value = context.GetContextAMD64()->rsp; } else if (register_name == "r8") { value = context.GetContextAMD64()->r8; } else if (register_name == "r9") { value = context.GetContextAMD64()->r9; } else if (register_name == "r10") { value = context.GetContextAMD64()->r10; } else if (register_name == "r11") { value = context.GetContextAMD64()->r11; } else if (register_name == "r12") { value = context.GetContextAMD64()->r12; } else if (register_name == "r13") { value = context.GetContextAMD64()->r13; } else if (register_name == "r14") { value = context.GetContextAMD64()->r14; } else if (register_name == "r15") { value = context.GetContextAMD64()->r15; } else if (register_name == "rip") { value = context.GetContextAMD64()->rip; } else { BPLOG(ERROR) << "Unsupported register: " << register_name; return false; } return true; } // Lookup the value of `register_name` in `context`, store it into `value` on // success. // Support for non-full-size registers not implemented, since we're only using // this to evaluate address expressions. bool GetRegisterValue(const DumpContext& context, string register_name, uint64_t& value) { if (context.GetContextCPU() == MD_CONTEXT_X86) { return GetRegisterValueX86(context, register_name, value); } else if (context.GetContextCPU() == MD_CONTEXT_AMD64) { return GetRegisterValueAMD64(context, register_name, value); } else { BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n"; return false; } } } // namespace // static bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes, unsigned int raw_bytes_len, string& instruction) { // Always initialize outputs instruction = ""; if (!raw_bytes || raw_bytes_len == 0) { // There's no need to perform any operation in this case, as there's // clearly no instruction there. return false; } string architecture; if (cpu == MD_CONTEXT_X86) { architecture = "i386"; } else if (cpu == MD_CONTEXT_AMD64) { architecture = "i386:x86-64"; } else { BPLOG(ERROR) << "Unsupported architecture."; return false; } // Create two temporary files, one for the raw instruction bytes to pass to // objdump, and one for the output, and write the bytes to the input file. ScopedTmpFile raw_bytes_file("/tmp/breakpad_mem_region-raw_bytes-XXXXXX"); ScopedTmpFile disassembly_file("/tmp/breakpad_mem_region-disassembly-XXXXXX"); if (!raw_bytes_file.Create() || !disassembly_file.Create() || !raw_bytes_file.Write(raw_bytes, raw_bytes_len)) { BPLOG(ERROR) << "Failed creating temporary files."; return false; } char cmd[1024] = {0}; snprintf(cmd, 1024, "objdump -D --no-show-raw-insn -b binary -M intel -m %s %s > %s", architecture.c_str(), raw_bytes_file.path().c_str(), disassembly_file.path().c_str()); if (system(cmd)) { BPLOG(ERROR) << "Failed to call objdump."; return false; } // Pipe each output line into the string until the string contains the first // instruction from objdump. std::ifstream objdump_stream(disassembly_file.path()); // Match the instruction line, from: // 0: lock cmpxchg DWORD PTR [esi+0x10],eax // extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax" std::regex instruction_regex( "^\\s+[0-9a-f]+:\\s+" // " 0:" "((?:\\s*\\S*)+)$"); // "lock cmpxchg..." std::string line; std::smatch match; do { if (!getline(objdump_stream, line)) { BPLOG(INFO) << "Failed to find instruction in objdump output."; return false; } } while (!std::regex_match(line, match, instruction_regex)); instruction = match[1].str(); return true; } // static bool DisassemblerObjdump::TokenizeInstruction(const string& instruction, string& operation, string& dest, string& src) { // Always initialize outputs. operation = ""; dest = ""; src = ""; // Split the instruction into tokens by either whitespace or comma. std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*"); std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(), token_regex); bool found_comma = false; for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator(); ++tokens_iter) { auto token = (*tokens_iter)[1].str(); if (operation.size() == 0) { if (IsInstructionPrefix(token)) continue; operation = token; } else if (dest.size() == 0) { if (IsOperandSize(token)) continue; dest = token; } else if (!found_comma) { if (token == ",") { found_comma = true; } else { BPLOG(ERROR) << "Failed to parse operands from objdump output, expected" " comma but found \"" << token << "\""; return false; } } else if (src.size() == 0) { if (IsOperandSize(token)) continue; src = token; } else { if (token == ",") { BPLOG(ERROR) << "Failed to parse operands from objdump output, found " "unexpected comma after last operand."; return false; } else { // We just ignore other junk after the last operand unless it's a // comma, which would indicate we're probably still in the middle // of the operands and something has gone wrong } } } if (found_comma && src.size() == 0) { BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma " "but no src operand."; return false; } return true; } // static bool DisassemblerObjdump::CalculateAddress(const DumpContext& context, const string& expression, uint64_t& address) { address = 0; // Extract the components of the expression. // fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"] std::regex expression_regex( "^(?:(\\ws):)?" // "fs:" "\\[(\\w+)" // "[esi" "(?:\\+(\\w+)(?:\\*(\\d+)))?" // "+edi*4" "(?:([\\+-])(0x[0-9a-f]+))?" // "-0x80" "\\]$"); // "]" std::smatch match; if (!std::regex_match(expression, match, expression_regex) || match.size() != 7) { return false; } string segment_name = match[1].str(); string register_name = match[2].str(); string index_name = match[3].str(); string index_stride = match[4].str(); string offset_sign = match[5].str(); string offset = match[6].str(); uint64_t segment_address = 0; uint64_t register_value = 0; uint64_t index_value = 0; uint64_t index_stride_value = 1; uint64_t offset_value = 0; if (segment_name.size() && !GetSegmentAddress(context, segment_name, segment_address)) { return false; } if (!GetRegisterValue(context, register_name, register_value)) { return false; } if (index_name.size() && !GetRegisterValue(context, index_name, index_value)) { return false; } if (index_stride.size()) { index_stride_value = strtoull(index_stride.c_str(), nullptr, 0); } if (offset.size()) { offset_value = strtoull(offset.c_str(), nullptr, 0); } address = segment_address + register_value + (index_value * index_stride_value); if (offset_sign == "+") { address += offset_value; } else if (offset_sign == "-") { address -= offset_value; } return true; } DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu, const MemoryRegion* memory_region, uint64_t address) { if (address < memory_region->GetBase() || memory_region->GetBase() + memory_region->GetSize() <= address) { return; } uint8_t ip_bytes[kMaxX86InstructionLength] = {0}; size_t ip_bytes_length; for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength; ++ip_bytes_length) { // We have to read byte-by-byte here, since we still want to try and // disassemble an instruction even if we don't have enough bytes. if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length, &ip_bytes[ip_bytes_length])) { break; } } string instruction; if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength, instruction)) { return; } if (!TokenizeInstruction(instruction, operation_, dest_, src_)) { return; } } bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context, uint64_t& address) { return CalculateAddress(context, src_, address); } bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context, uint64_t& address) { return CalculateAddress(context, dest_, address); } } // namespace google_breakpad