def calculateComplexDerefOpAddress(complexDerefOp, registerMap): match = re.match("((?:\\-?0x[0-9a-f]+)?)\\(%([a-z0-9]+),%([a-z0-9]+),([0-9]+)\\)", complexDerefOp) if match != None: offset = 0L if len(match.group(1)) > 0: offset = long(match.group(1), 16) regA = RegisterHelper.getRegisterValue(match.group(2), registerMap) regB = RegisterHelper.getRegisterValue(match.group(3), registerMap) mult = long(match.group(4), 16) # If we're missing any of the two register values, return None if regA == None or regB == None: if regA == None: return (None, "Missing value for register %s" % match.group(2)) else: return (None, "Missing value for register %s" % match.group(3)) if RegisterHelper.getBitWidth(registerMap) == 32: val = int32(uint32(regA)) + int32(uint32(offset)) + (int32(uint32(regB)) * int32(uint32(mult))) else: # Assume 64 bit width val = int64(uint64(regA)) + int64(uint64(offset)) + (int64(uint64(regB)) * int64(uint64(mult))) return (long(val), None) return (None, "Unknown failure.")
def calculateComplexDerefOpAddress(complexDerefOp, registerMap): match = re.match( "((?:\\-?0x[0-9a-f]+)?)\\(%([a-z0-9]+),%([a-z0-9]+),([0-9]+)\\)", complexDerefOp) if match != None: offset = 0L if len(match.group(1)) > 0: offset = long(match.group(1), 16) regA = RegisterHelper.getRegisterValue(match.group(2), registerMap) regB = RegisterHelper.getRegisterValue(match.group(3), registerMap) mult = long(match.group(4), 16) # If we're missing any of the two register values, return None if regA == None or regB == None: if regA == None: return (None, "Missing value for register %s" % match.group(2)) else: return (None, "Missing value for register %s" % match.group(3)) if RegisterHelper.getBitWidth(registerMap) == 32: val = int32(uint32(regA)) + int32(uint32(offset)) + ( int32(uint32(regB)) * int32(uint32(mult))) else: # Assume 64 bit width val = int64(uint64(regA)) + int64(uint64(offset)) + ( int64(uint64(regB)) * int64(uint64(mult))) return (long(val), None) return (None, "Unknown failure.")
def runTest(self): registerMap = { "rax" : 0xfffffffffffffe00L, "rbx" : 0x7ffff79a7640L } self.assertEqual(RegisterHelper.getRegisterValue("rax", registerMap), 0xfffffffffffffe00L) self.assertEqual(RegisterHelper.getRegisterValue("eax", registerMap), 0xfffffe00L) self.assertEqual(RegisterHelper.getRegisterValue("ax", registerMap), 0xfe00L) self.assertEqual(RegisterHelper.getRegisterValue("ah", registerMap), 0xfeL) self.assertEqual(RegisterHelper.getRegisterValue("al", registerMap), 0x0L) self.assertEqual(RegisterHelper.getRegisterValue("rbx", registerMap), 0x7ffff79a7640L) self.assertEqual(RegisterHelper.getRegisterValue("ebx", registerMap), 0xf79a7640L) self.assertEqual(RegisterHelper.getRegisterValue("bx", registerMap), 0x7640L) self.assertEqual(RegisterHelper.getRegisterValue("bh", registerMap), 0x76L) self.assertEqual(RegisterHelper.getRegisterValue("bl", registerMap), 0x40L)
def calculateCrashAddress(crashInstruction, registerMap): ''' Calculate the crash address given the crash instruction and register contents @type crashInstruction: string @param crashInstruction: Crash instruction string as provided by GDB @type registerMap: Map from string to long @param registerMap: Map of register names to values @rtype: long @return The calculated crash address On error, a string containing the failure message is returned instead. ''' if (len(crashInstruction) == 0): # GDB shows us no instruction, so the memory at the instruction # pointer address must be inaccessible and we should assume # that this caused our crash. return RegisterHelper.getInstructionPointer(registerMap) parts = crashInstruction.split(None, 1) if len(parts) == 1: # Single instruction without any operands? # Only accept those that we explicitly know so far. instruction = parts[0] if instruction == "ret": # If ret is crashing, it's most likely due to the stack pointer # pointing somewhere where it shouldn't point, so use that as # the crash address. return RegisterHelper.getStackPointer(registerMap) elif instruction == "ud2": # ud2 - Raise invalid opcode exception # We treat this like invalid instruction return RegisterHelper.getInstructionPointer(registerMap) else: raise RuntimeError("Unsupported non-operand instruction: %s" % instruction) if len(parts) != 2: raise RuntimeError("Failed to split instruction and operands apart: %s" % crashInstruction) instruction = parts[0] operands = parts[1] if not re.match("[a-z\\.]+", instruction): raise RuntimeError("Invalid instruction: %s" % instruction) parts = operands.split(",") # We now have four possibilities: # 1. Length of parts is 1, that means we have one operand # 2. Length of parts is 2, that means we have two simple operands # 3. Length of parts is 4 and # a) First part contains '(' but not ')', meaning the first operand is complex # b) First part contains no '(' or ')', meaning the last operand is complex # e.g. mov %ecx,0x500094(%r15,%rdx,4) # # 4. Length of parts is 3, just one complex operand. # e.g. shrb -0x69(%rdx,%rbx,8) # When we fail, try storing a reason here failureReason = "Unknown failure." if RegisterHelper.isX86Compatible(registerMap): if len(parts) == 1: if instruction == "callq" or instruction == "call" or instruction == "push" or instruction == "pop": return RegisterHelper.getStackPointer(registerMap) else: failureReason = "Unsupported single-operand instruction." elif len(parts) == 2: failureReason = "Unknown failure with two-operand instruction." derefOp = None if "(" in parts[0] and ")" in parts[0]: derefOp = parts[0] if "(" in parts[1] and ")" in parts[1]: if derefOp != None: if ":(" in parts[1]: # This can be an instruction using multiple segments, like: # # movsq %ds:(%rsi),%es:(%rdi) # # (gdb) p $_siginfo._sifields._sigfault.si_addr # $1 = (void *) 0x7ff846e64d28 # (gdb) x /i $pc # => 0x876b40 <js::ArgumentsObject::create<CopyFrameArgs>(JSContext*, JS::HandleScript, JS::HandleFunction, unsigned int, CopyFrameArgs&)+528>: movsq %ds:(%rsi),%es:(%rdi) # (gdb) info reg $ds # ds 0x0 0 # (gdb) info reg $es # es 0x0 0 # (gdb) info reg $rsi # rsi 0x7ff846e64d28 140704318115112 # (gdb) info reg $rdi # rdi 0x7fff27fac030 140733864132656 # # # We don't support this right now, so return None. # return None raise RuntimeError("Instruction operands have multiple loads? %s" % crashInstruction) derefOp = parts[1] if derefOp != None: match = re.match("((?:\\-?0x[0-9a-f]+)?)\\(%([a-z0-9]+)\\)", derefOp) if match != None: offset = 0L if len(match.group(1)): offset = long(match.group(1), 16) val = RegisterHelper.getRegisterValue(match.group(2), registerMap) # If we don't have the value, return None if val == None: failureReason = "Missing value for register %s " % match.group(2) else: if RegisterHelper.getBitWidth(registerMap) == 32: return long(int32(uint32(offset)) + int32(uint32(val))) else: # Assume 64 bit width return long(int64(uint64(offset)) + int64(uint64(val))) else: failureReason = "Failed to decode two-operand instruction: No dereference operation or hardcoded address detected." # We might still be reading from/writing to a hardcoded address. # Note that it's not possible to have two hardcoded addresses # in one instruction, one operand must be a register or immediate # constant (denoted by leading $). In some cases, like a movabs # instruction, the immediate constant however is dereferenced # and is the first operator. So we first check parts[1] then # parts[0] in case it's a dereferencing operation. for x in (parts[1], parts[0]): result = re.match("\\$?(\\-?0x[0-9a-f]+)", x) if result != None: return long(result.group(1), 16) elif len(parts) == 3: # Example instruction: shrb -0x69(%rdx,%rbx,8) if "(" in parts[0] and ")" in parts[2]: complexDerefOp = parts[0] + "," + parts[1] + "," + parts[2] (result, reason) = GDBCrashInfo.calculateComplexDerefOpAddress(complexDerefOp, registerMap) if result == None: failureReason = reason else: return result else: raise RuntimeError("Unexpected instruction pattern: %s" % crashInstruction) elif len(parts) == 4: if "(" in parts[0] and not ")" in parts[0]: complexDerefOp = parts[0] + "," + parts[1] + "," + parts[2] elif not "(" in parts[0] and not ")" in parts[0]: complexDerefOp = parts[1] + "," + parts[2] + "," + parts[3] (result, reason) = GDBCrashInfo.calculateComplexDerefOpAddress(complexDerefOp, registerMap) if result == None: failureReason = reason else: return result else: raise RuntimeError("Unexpected length after splitting operands of this instruction: %s" % crashInstruction) else: failureReason = "Architecture is not supported." print("Unable to calculate crash address from instruction: %s " % crashInstruction, file=sys.stderr) print("Reason: %s" % failureReason, file=sys.stderr) return failureReason
def calculateCrashAddress(crashInstruction, registerMap): ''' Calculate the crash address given the crash instruction and register contents @type crashInstruction: string @param crashInstruction: Crash instruction string as provided by GDB @type registerMap: Map from string to long @param registerMap: Map of register names to values @rtype: long @return The calculated crash address On error, a string containing the failure message is returned instead. ''' if (len(crashInstruction) == 0): # GDB shows us no instruction, so the memory at the instruction # pointer address must be inaccessible and we should assume # that this caused our crash. return RegisterHelper.getInstructionPointer(registerMap) parts = crashInstruction.split(None, 1) if len(parts) != 2: raise RuntimeError( "Failed to split instruction and operands apart: %s" % crashInstruction) instruction = parts[0] operands = parts[1] if not re.match("[a-z\\.]+", instruction): raise RuntimeError("Invalid instruction: %s" % instruction) parts = operands.split(",") # We now have four possibilities: # 1. Length of parts is 1, that means we have one operand # 2. Length of parts is 2, that means we have two simple operands # 3. Length of parts is 4 and # a) First part contains '(' but not ')', meaning the first operand is complex # b) First part contains no '(' or ')', meaning the last operand is complex # e.g. mov %ecx,0x500094(%r15,%rdx,4) # # 4. Length of parts is 3, just one complex operand. # e.g. shrb -0x69(%rdx,%rbx,8) # When we fail, try storing a reason here failureReason = "Unknown failure." if RegisterHelper.isX86Compatible(registerMap): if len(parts) == 1: if instruction == "callq" or instruction == "push" or instruction == "pop": return RegisterHelper.getStackPointer(registerMap) else: failureReason = "Unsupported single-operand instruction." elif len(parts) == 2: failureReason = "Unknown failure with two-operand instruction." derefOp = None if "(" in parts[0] and ")" in parts[0]: derefOp = parts[0] if "(" in parts[1] and ")" in parts[1]: if derefOp != None: if ":(" in parts[1]: # This can be an instruction using multiple segments, like: # # movsq %ds:(%rsi),%es:(%rdi) # # (gdb) p $_siginfo._sifields._sigfault.si_addr # $1 = (void *) 0x7ff846e64d28 # (gdb) x /i $pc # => 0x876b40 <js::ArgumentsObject::create<CopyFrameArgs>(JSContext*, JS::HandleScript, JS::HandleFunction, unsigned int, CopyFrameArgs&)+528>: movsq %ds:(%rsi),%es:(%rdi) # (gdb) info reg $ds # ds 0x0 0 # (gdb) info reg $es # es 0x0 0 # (gdb) info reg $rsi # rsi 0x7ff846e64d28 140704318115112 # (gdb) info reg $rdi # rdi 0x7fff27fac030 140733864132656 # # # We don't support this right now, so return None. # return None raise RuntimeError( "Instruction operands have multiple loads? %s" % crashInstruction) derefOp = parts[1] if derefOp != None: match = re.match( "((?:\\-?0x[0-9a-f]+)?)\\(%([a-z0-9]+)\\)", derefOp) if match != None: offset = 0L if len(match.group(1)): offset = long(match.group(1), 16) val = RegisterHelper.getRegisterValue( match.group(2), registerMap) # If we don't have the value, return None if val == None: failureReason = "Missing value for register %s " % match.group( 2) else: if RegisterHelper.getBitWidth(registerMap) == 32: return long( int32(uint32(offset)) + int32(uint32(val))) else: # Assume 64 bit width return long( int64(uint64(offset)) + int64(uint64(val))) else: failureReason = "Failed to decode two-operand instruction: No dereference operation or hardcoded address detected." # We might still be reading from/writing to a hardcoded address. # Note that it's not possible to have two hardcoded addresses # in one instruction, one operand must be a register or immediate # constant (denoted by leading $). In some cases, like a movabs # instruction, the immediate constant however is dereferenced # and is the first operator. So we first check parts[1] then # parts[0] in case it's a dereferencing operation. for x in (parts[1], parts[0]): result = re.match("\\$?(\\-?0x[0-9a-f]+)", x) if result != None: return long(result.group(1), 16) elif len(parts) == 3: # Example instruction: shrb -0x69(%rdx,%rbx,8) if "(" in parts[0] and ")" in parts[2]: complexDerefOp = parts[0] + "," + parts[1] + "," + parts[2] (result, reason) = GDBCrashInfo.calculateComplexDerefOpAddress( complexDerefOp, registerMap) if result == None: failureReason = reason else: return result else: raise RuntimeError("Unexpected instruction pattern: %s" % crashInstruction) elif len(parts) == 4: if "(" in parts[0] and not ")" in parts[0]: complexDerefOp = parts[0] + "," + parts[1] + "," + parts[2] elif not "(" in parts[0] and not ")" in parts[0]: complexDerefOp = parts[1] + "," + parts[2] + "," + parts[3] (result, reason) = GDBCrashInfo.calculateComplexDerefOpAddress( complexDerefOp, registerMap) if result == None: failureReason = reason else: return result else: raise RuntimeError( "Unexpected length after splitting operands of this instruction: %s" % crashInstruction) else: failureReason = "Architecture is not supported." print("Unable to calculate crash address from instruction: %s " % crashInstruction, file=sys.stderr) print("Reason: %s" % failureReason, file=sys.stderr) return failureReason