def _find_function_start(location): ''' Description: If the location is the first applicable byte, assume that's the start byte. Else, search up until we find the first (lowest EA) "push ebp" before either aligns or a function. If that fails, look for "push esp", then "push esi", and finally "push edi". Input: The EA from which to start looking for a function start Output: A list of applicable EAs (one per type) or idc.BADADDR if none are found. ''' if idaapi.get_func(location - 1) or idc.isAlign( idc.GetFlags(location - 1)): return [location] eas = [] for opcode in ['55', '54', '56', '57']: ea = location function_start = idc.BADADDR while ea != idc.BADADDR: ea = idc.FindBinary(ea - 1, idc.SEARCH_UP, opcode) if not (idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea))): function_start = ea else: break if function_start != idc.BADADDR: eas.append(_un_nop(function_start, idc.NextHead)) eas.sort() return eas
def _find_function_end(location): ''' Description: If the location is the last applicable byte, assume that's the end byte. Else, search down until we find the last (highest EA) "retn" before either aligns or a function. If that fails, look for the last (higheste EA) "jmp" instruction. Input: location - The EA from which to start looking for a function end. Output: A list of applicable EAs (one per type) or idc.BADADDR if none are found. ''' if idaapi.get_func(location + 1) or idc.isAlign( idc.GetFlags(location + 1)): # This bit is inclusive return [location + 1] # This bit is exclusive eas = [] # CA + CB are retf, but aren't used often; 'EA' is 16 bit; 'FF' jumps are too rare for opcode in ['C3', 'C2', 'E9', 'EB']: ea = location function_end = idc.BADADDR while ea != idc.BADADDR: ea = idc.FindBinary(ea + 1, idc.SEARCH_DOWN, opcode) if not (idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea))): function_end = ea else: break if function_end != idc.BADADDR: eas.append(_un_nop(function_end, idc.PrevHead) + 1) # Again, exclusive eas.sort(reverse=True) return eas
def force_create_function(loc): """ Similar to create_function above, but a little more hackish (maybe). Makes a lot of assumptions about there being defined code, i.e. not obfsucated code. However, won't create a function that does not include the desired location, which will need to be fixed at a later date. :param loc: Location a function is needed at :return: True if function is created, False otherwise """ # Do a couple sanity checks. if idaapi.get_func(loc): append_debug('There\'s already a function here!') return False elif idc.isAlign(idc.GetFlags(loc)) or idc.GetMnem(loc) == 'nop' or \ (idaapi.isData(idc.GetFlags(loc)) and idc.Byte(loc) == 0x90): append_debug('Can\'t make a function out of aligns and/or nops!') return False start = _force_find_start(loc) end = _find_force_end(loc) if idc.MakeFunction(start, end): append_debug('Created a function 0x%X - 0x%X.' % (start, end)) return True else: append_debug('Failed to create a function 0x%X - 0x%X.' % (start, end)) return False
def ida_make_function(location): ''' Description: From the first non-function byte, attempt to make a function. Input: location - The EA at which IDA should attempt to make a function. Output: True if it succeeded, False otherwise. ''' function_start = location ea = location while not (idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea))): function_start = ea ea = idc.PrevHead(ea) function_start = _un_nop(function_start, idc.NextHead) if idc.MakeFunction(function_start): last_mnem = idc.GetMnem( idc.ItemHead(idaapi.get_func(function_start).endEA - 1)) if 'ret' not in last_mnem and 'jmp' not in last_mnem: idc.DelFunction(function_start) append_debug( 'Created a function at 0x%X, but there wasn\'t a jmp or ret at the end.' % function_start) return False else: append_debug('Created a function 0x%X.' % function_start) return True else: return False
def _force_find_start(loc): """ Locate a possible function start location, if push ebp is found use that as default. Otherwise keep track of push esp, push esi, and push edi and use the lowest ea before finding a previous function. :param loc: Location a function is needed at :return: A possible function start location """ push_esp = idc.BADADDR push_esi = idc.BADADDR push_edi = idc.BADADDR loc = idc.PrevHead(loc) while not idc.isAlign(idc.GetFlags(loc)) and not idaapi.get_func(loc): if idc.GetMnem(loc) == "push": opnd_0 = idc.GetOpnd(loc, 0) if opnd_0 == "ebp": return loc elif opnd_0 == "esp": push_esp = loc elif opnd_0 == "esi": push_esi = loc elif opnd_0 == "edi": push_edi = loc loc = idc.PrevHead(loc) min_ea = min([push_esp, push_esi, push_edi]) return min_ea
def getOrigDisasm(self): # type: () -> str """ Gets the original disassembly without any further applied transformations However, the formatting is different from the original and is more convenient for parsing :return: the disassembly """ flags = idc.GetFlags(self.ea) if idc.isCode(flags): disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) disasm = disasm.replace(' ', ' ') elif idc.isStruct(flags): disasm = self._getStructDisasm() # disasm = "INVALID" elif idc.isAlign(flags): disasm = idc.GetDisasm(self.ea) disasm = self._convertAlignDisasm(disasm) elif idc.isASCII(flags): content = self.getContent() numNewLines = content.count(0x0A) if numNewLines > 1: disasm = '.ascii "' else: disasm = '.asciz "' for i in range(len(content)): if content[i] == 0x00: disasm += '"' elif chr(content[i]) == '"': disasm += '\\\"' elif chr(content[i]) == '\\': disasm += '\\\\' elif content[i] == 0x0A: disasm += '\\n' numNewLines -= 1 if numNewLines > 1: disasm += '"\n\t.ascii "' elif numNewLines == 1: disasm += '"\n\t.asciz "' elif chr(content[i]) == ' ': disasm += ' ' elif not chr(content[i]).isspace(): disasm += chr(content[i]) else: # TODO [INVALID] arm-none-eabi doesn't recognize \xXX? \x seems to become a byte. disasm += '\\x%02X' % content[i] elif idc.isData(flags): disasm = self._getDataDisasm() else: disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) disasm = disasm.replace(' ', ' ') # parse force command if '<force>' in self.getComment(): comment = self.getComment() disasm = comment[comment.index('<force> ') + len('<force> '):] return disasm
def find_function_ends(location, end_mnem_bytes=None): """ Description: Identifies all possible function ends before the next function or Align. Input: location - The EA to search after end_mnem_bytes - Try to end functions on a particular instruction Instructions are entered as space separated bytes (i.e. 'C2' for 'retn') The specified pattern will be used first, then the defaults will be used If no pattern is specified, the defaults will be used, which prefers 'retn' Output: ends - A list of function end EAs sorted: end_mnem_bytes, retn, jmp """ # foreach target bytes: # step instructions down # if instruction matches the target bytes, add to the corresponding output list # if we hit a function or an align, quit # return ends in the order: # end_nmem_bytes # retn # jmp # others, sorted ascending max_location = None ea = location while max_location is None: ea = idc.NextHead(ea) if idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea)): max_location = ea elif ea == idc.BADADDR: max_location = idaapi.getseg(location).endEA max_location = min(max_location, idaapi.getseg(location).endEA) targets = ['C3', 'C2', 'E9', 'EA', 'EB'] if end_mnem_bytes: targets.insert(0, end_mnem_bytes) ends = {} for target in targets: function_ends = [] ea = find_binary_instruction_start(location, idc.SEARCH_DOWN, target, max_location=max_location) while ea != idc.BADADDR: if ea > max_location: break else: function_ends.append(ea) ea = find_binary_instruction_start(ea + 11, idc.SEARCH_DOWN, target, max_location=max_location) ends[target] = function_ends return [end + idc.ItemSize(end) for end in ((ends[end_mnem_bytes] if end_mnem_bytes else []) + sorted(ends['C3'] + ends['C2']) + sorted(itertools.chain.from_iterable(ends[target] for target in targets[-3:])))]
def create_function(location, find_start=True): ''' Description: Attempts to create a function using IDA's builtin functionality. If that fails build a assuming a start instruction of "push ebp", "push esp", "push esi", or "push edi" and an end instruction of "retn" (C2 or C3), excluding aligns and nops. Input: location - An address that should be within a function find_start - When False, assume location is the start of the function Output: True if it made a function, False otherwise. ''' # Do a couple sanity checks. if idaapi.get_func(location): append_debug('There\'s already a function here! (0x%X)' % location) return False elif idc.isAlign(idc.GetFlags(location)) or idc.GetMnem(location) == 'nop' or \ (idaapi.isData(idc.GetFlags(location)) and idc.Byte(location) == 0x90): append_debug('Can\'t make a function out of aligns and/or nops!') return False # Trace up as far as possible and have IDA do its thing. if ida_make_function(location): return True # Attempt to find the function ourselves. function_starts = _find_function_start(location) if find_start else [ location ] function_ends = _find_function_end(location) found_func = None if function_ends and function_starts: for function_start, function_end in itertools.product( function_starts, function_ends): if function_start < function_end: if idc.MakeFunction(function_start, function_end): append_debug('Created a function 0x%X - 0x%X.' % (function_start, function_end)) found_func = (function_start, function_end) break # Don't return here in case we have to split it yet. else: append_debug( 'Tried to create a function 0x%X - 0x%X, but IDA wouldn\'t do it.' % (function_start, function_end)) if found_func: split_funcs(*found_func) return True append_debug('Failed to find function based on location 0x%X.' % location) return False
def find_function_starts(location, start_mnem_bytes=None): """ Description: Identifies all possible function starts since the most recent function or Align. Input: location - The EA to search before start_mnem_bytes - Try to start functions on a particular instruction Instructions are entered as space separated bytes (i.e. '55' for 'push ebp') The specified pattern will be used first, then the defaults will be used If no pattern is specified, the defaults will be used, which prefers 'push ebp' Output: starts - A list of function end EAs sorted: start_mnem_bytes, push ebp, (push esp, push esi, push edi) """ # foreach target bytes: # step instructions up # if instruction matches the target bytes, add to the corresponding output list # if we hit a function or an align, quit # return starts in the order: # start_nmem_bytes # push ebp # others, sorted ascending min_location = None ea = location while min_location is None: ea = idc.PrevHead(ea) if idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea)): min_location = ea elif ea == idc.BADADDR: min_location = idaapi.getseg(location).startEA min_location = max(min_location, idaapi.getseg(location).startEA) targets = ['55', '54', '56', '57'] if start_mnem_bytes: targets.insert(0, start_mnem_bytes) starts = {} for target in targets: function_starts = [] ea = find_binary_instruction_start(location - 1, idc.SEARCH_UP, target, min_location) while ea != idc.BADADDR: if ea < min_location: break else: function_starts.append(ea) ea = find_binary_instruction_start(ea - 1, idc.SEARCH_UP, target, min_location) starts[target] = function_starts return (starts[start_mnem_bytes] if start_mnem_bytes else []) + starts['55'] + \ sorted(itertools.chain.from_iterable(starts[target] for target in targets[-3:]))
def _find_force_end(loc): """ Locate the first "ret" instruction down from the input location :param loc: Location a function is needed at :return: ItemEnd of the return location. """ loc = idc.NextHead(loc) while not idc.isAlign(idc.GetFlags(loc)) and not idaapi.get_func(loc): if "ret" in idc.GetMnem(loc): return idc.ItemEnd(loc) loc = idc.NextHead(loc) return idc.ItemEnd(idc.PrevHead(loc))
def find_function_ends_near(location, end_mnem_bytes=None): """ Description: Identifies the nearest possible function ends before the next function or Align for each end mnem. Input: location - The EA to search after end_mnem_bytes - Try to end functions on a particular instruction Instructions are entered as space separated bytes (i.e. 'C2' for 'retn') The specified pattern will be used first, then the defaults will be used If no pattern is specified, the defaults will be used, which prefers 'retn' Output: ends - A list of function end EAs sorted: end_mnem_bytes, retn, jmp """ # foreach target bytes: # step instructions down # if instruction matches the target bytes, add to output list # then move on to the next target bytes # if we hit a function or an align, quit # return ends in the order # end_nmem_bytes # retn # jmp # others, sorted ascending max_location = None ea = location while max_location is None: ea = idc.NextHead(ea) if idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea)): max_location = ea elif ea == idc.BADADDR: max_location = idaapi.getseg(location).endEA max_location = min(max_location, idaapi.getseg(location).endEA) targets = ['C2', 'C3', 'E9', 'EA', 'EB'] if end_mnem_bytes: targets.insert(0, end_mnem_bytes) ends = {} for target in targets: ea = find_binary_instruction_start(location, idc.SEARCH_DOWN, target, max_location=max_location) if ea <= max_location: ends[target] = ea return [end + idc.ItemSize(end) for end in (([ends.get(end_mnem_bytes, None), ends.get('C2', None), ends.get('C3', None)]) + sorted(ends.get(target, None) for target in targets[-3:])) if end]
def trim_func(ea, GetHead): """ Description: Steps until it hits something not a nop or not starts with 90 (nop opcode) nor an align or not byte 0xCC (Align 'opcode'). Input: ea - The location to adjust for nops and Aligns. EA must be a head. GetHead - either PrevHead or NextHead Output: The corrected EA. """ while idc.GetMnem(ea) == 'nop' or (idaapi.isData(idc.GetFlags(ea)) and idc.Byte(ea) == 0x90) or \ idc.isAlign(idc.GetFlags(ea)) or (not idc.isCode(idc.GetFlags(ea)) and idc.Byte(ea) == 0xCC): ea = GetHead(ea) return ea
def getDisasm(self): """ :return: transformed disassembly so that it's functional with the gcc assembler """ disasm = self.getOrigDisasm() flags = idc.GetFlags(self.ea) if idc.isAlign(flags): disasm = self._convertAlignDisasm(disasm) elif idc.isData(flags) or idc.isUnknown(flags): disasm = self._convertData(disasm) elif idc.isCode(flags): disasm = self._convertCode(self.ea, disasm) # make code small case disasm = self._lowerCode(disasm) disasm = self._convertTabs(disasm) return disasm
def find_function_starts_near(location, start_mnem_bytes=None): """ Description: Identifies the nearest possible function starts since the most recent function or Align. Input: location - The EA to search before start_mnem_bytes - Try to start functions on a particular instruction Instructions are entered as space separated bytes (i.e. '55' for 'push ebp') The specified pattern will be used first, then the defaults will be used If no pattern is specified, the defaults will be used, which prefers 'push ebp' Output: starts - A list of function end EAs sorted: start_mnem_bytes, push ebp, (push esp, push esi, push edi) """ # foreach target bytes: # step instructions up # if instruction matches the target bytes, add to output list # then move on to the next target bytes # if we hit a function or an align, quit # return starts in the order # start_nmem_bytes # push ebp # others, sorted descending min_location = None ea = location while min_location is None: ea = idc.PrevHead(ea) if idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea)): min_location = ea elif ea == idc.BADADDR: min_location = idaapi.getseg(location).startEA min_location = max(min_location, idaapi.getseg(location).startEA) targets = ['55', '54', '56', '57'] if start_mnem_bytes: targets.insert(0, start_mnem_bytes) starts = {} for target in targets: ea = find_binary_instruction_start(location - 1, idc.SEARCH_UP, target, min_location) if ea != idc.BADADDR: starts[target] = ea return [start for start in ([starts.get(start_mnem_bytes, None), starts.get('55', None)] + sorted([starts.get(target, None) for target in targets[-3:]], reverse=True)) if start]
def from_syntactic_to_semantic(self, _start, _end): _sem = '' # Parse all the instructions inside the function for instr in idautils.Heads(_start, _end): flags = idc.GetFlags(instr) if idc.isCode(flags): # Code: convert instruction info = idautils.DecodeInstruction(instr) first_offset = self.get_first_numerical_operand_offset(info) if first_offset != 0: tmp = self.get_semantic_bytes(info.ea, first_offset) if tmp is not None: _sem += ''.join(tmp) else: return None else: _sem += ''.join(chr(idc.Byte(info.ea + i)) for i in range(info.size)) elif idc.isAlign(flags): # align: copy the byte without semantic conversion _sem += idc.GetManyBytes(instr, idc.NextHead(instr) - instr, False) return _sem
def try_mark_as_code(address, end_address=0): flags = idc.GetFlags(address) if idc.isAlign(flags): return False if idc.isCode(flags): return True if idc.MakeCode(address): idaapi.autoWait() return True end_address = max(end_address, address + 1) idc.MakeUnknown(address, end_address - address + 1, idc.DOUNK_SIMPLE) if idc.MakeCode(address): idaapi.autoWait() return True return False
def getOrigDisasm(self): # type: () -> str """ Gets the original disassembly without any further applied transformations However, the formatting is different from the original and is more convenient for parsing :return: the disassembly """ flags = idc.GetFlags(self.ea) if idc.isStruct(flags): disasm = "INVALID" elif idc.isAlign(flags): disasm = idc.GetDisasm(self.ea) elif idc.isData(flags): disasm = self._getDataDisasm(self.ea) else: disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) while ' ' in disasm: disasm = disasm.replace(' ', ' ') return disasm
def expandUnkArrays(start_ea, end_ea, verbose=True): """ Finds all named byte_xxx and dword_xxx arrays, and turns them to unknowns. If an array is unnamed, and it's a byte array, it's also turned into unknowns. :param start_ea: start of the range :param end_ea: end of the range :param verbose: if True, print all changes :return: status of the expansion """ d = Data.Data(start_ea) while d.ea < end_ea: if (not idc.isAlign(d.getFlags()) and ( # known dummy array (d.getName() and (d.getName().startswith('byte_') or d.getName().startswith('dword_'))) # byte/dword array or (not d.getName() and type(d.getContent()) == list and (d.getSize() / len(d.getContent()) == 1 or d.getSize() / len(d.getContent()) == 4)))): if verbose: print('%07X: delete unk arr' % d.ea) idc.del_items(d.ea, d.getSize()) d = Data.Data(d.ea + d.getSize())
def sanity_checks(location): """ Description: Do some basic checks to see if a function can be created containing the provided EA. Input: location - The EA to evaluate Output: True if a function can be created containing the provided EA False if a the provided EA was a nop or Align None if there is already a function containing the provided EA """ if idaapi.get_func(location): append_debug('There\'s already a function here! (0x%X)' % location) return None elif idc.isAlign(idc.GetFlags(location)) or idc.GetMnem(location) == 'nop' or \ (idaapi.isData(idc.GetFlags(location)) and idc.Byte(location) == 0x90): # Yes, the nop bit may be incorrect, but it's gonna be a very special case that needs a function with nops append_debug('Can\'t make a function including aligns and/or nops!') return False else: return True
def ida_make_functions(location, require_term=True): """ Description: Attempts to create functions based on the assumption that there should be continuous contiguous functions defined since the previous function or align. Stops creating functions once a function containing <location> is created or the next created function would be past <location>. Only identifies potential start EAs and lets IDA find the ends. Input: location - The EA at which IDA should attempt to make a function. require_term - When True, requires the last instruction in all defined functions to be retn or jmp Output: True if it made a function or a function was already present, False otherwise. """ sanity = sanity_checks(location) if sanity is None: # There was already a function return True elif sanity is False: # There was something preventing function creation return False target_location = location function_start = location ea = location while not (idaapi.get_func(ea) or idc.isAlign(idc.GetFlags(ea))): function_start = ea ea = idc.PrevHead(ea) function_start = trim_func(function_start, idc.NextHead) if try_make_function(function_start, require_term=require_term): if not idaapi.get_func(target_location): return ida_make_functions(target_location, require_term) else: return True else: return False
def address_is_alignment(address): flags = idc.GetFlags(address) return idc.isAlign(flags)
def is_align(self): return idc.isAlign(self.flags)