def get_flow_code_from_address(address): """Get a sequence of instructions starting at a given address. This function is used to collect basic blocks marked as chunks in IDA but not as belonging to the function being examined. IDA can only assign a chunk to a function, not to multiple. This helps getting around that limitation. """ if idc.isCode(idc.GetFlags(address)): code = [address] else: return None while True: # Get the address of the following element address = address+idc.ItemSize(address) flags = idc.GetFlags(address) # If the element is an instruction and "flow" goes into it if idc.isCode(flags) and idc.isFlow(flags): code.append(address) else: break # Return the code chunk just obtained # Note: if we get down here there'll be at least one instruction so we are cool # Node: the +1 is so the last instruction can be retrieved through a call to # "Heads(start, end)". As end is a non-inclusive limit we need to move the # pointer ahead so the instruction at that address is retrieved. return (min(code), max(code)+1)
def get_flow_code_from_address(address): """Get a sequence of instructions starting at a given address. This function is used to collect basic blocks marked as chunks in IDA but not as belonging to the function being examined. IDA can only assign a chunk to a function, not to multiple. This helps getting around that limitation. """ if idc.isCode(idc.GetFlags(address)): code = [address] else: return None while True: # Get the address of the following element address = address + idc.ItemSize(address) flags = idc.GetFlags(address) # If the element is an instruction and "flow" goes into it if idc.isCode(flags) and idc.isFlow(flags): code.append(address) else: break # Return the code chunk just obtained # Note: if we get down here there'll be at least one instruction so we are cool # Node: the +1 is so the last instruction can be retrieved through a call to # "Heads(start, end)". As end is a non-inclusive limit we need to move the # pointer ahead so the instruction at that address is retrieved. return (min(code), max(code) + 1)
def rngExterns(start_ea, end_ea, toStr=True): """ creates .equs for all external symbols used in the range :param start_ea: start ea of the range, inclusive :param end_ea: end ea of the range, exclusive :return: a string containing all the external symbol .equs, or just the refs if not disp """ ea = start_ea xrefs = [] # if there's a function at end_ea, include all of its refs if Function.isFunction(end_ea): f = Function.Function(end_ea) end_ea = f.func_ea + f.getSize(withPool=True) # obtain xrefs of every data item, filtering out internal ones and duplicates while ea < end_ea: d = Data.Data(ea) # append crefs ands xrefs for xref in d.getXRefsFrom()[0]: # all code refs shouldn't have a +1 in them. The thumb switch isn't involved with the symbol itself if (idc.isCode(idc.GetFlags(xref)) or idc.isCode( idc.GetFlags(xref - 1))) and xref & 1 == 1: xref = xref - 1 if ((xref < start_ea or xref >= end_ea ) # filter internal (not external; within range) and xref not in xrefs): # filter duplicate xrefs.append(xref) for xref in d.getXRefsFrom()[1]: # all code refs shouldn't have a +1 in them. The thumb switch isn't involved with the symbol itself if (idc.isCode(idc.GetFlags(xref)) or idc.isCode( idc.GetFlags(xref - 1))) and xref & 1 == 1: xref = xref - 1 if ((xref < start_ea or xref >= end_ea ) # filter internal (not external; within range) and xref not in xrefs # filter duplicate and d.isPointer(xref) ): # filter non-pointer symbols, like byte_50 xrefs.append(xref) # advance to next item ea = ea + d.getSize() xrefs.sort() if not toStr: return xrefs output = '' # output file formats to include symbols into linking process for xref in xrefs: d = Data.Data(xref) name = d.getName() xref = d.ea output += '.equ %s, 0x%07X\n' % (name, xref) return output
def walk_datas(): for seg_start in idautils.Segments(): seg_end = idc.SegEnd(seg_start) for ea in ya.get_all_items(seg_start, seg_end): flags = idc.GetFlags(ea) func = idaapi.get_func(ea) if idaapi.isFunc(flags) or (func and idc.isCode(flags)): # function continue if not func and idc.isCode(flags): # code continue yield ea
def getSize(self, withPool=False): """ Computes the size of the function the first time this is called, and caches that computation for later Parsed Comment commands: <endpool> specifies the last element in the pool. That element's size is included in the pool. to specify a function has no pool at all, put the comment command at its last instruction. :param withPool: (bool) somewhat of a heuristic. Computes the pool size as simply the amount of bytes since the function's code portion finished (endEA) until a new code head is detected :return: Returns the size of the Function in bytes: EndEA - StartEA (if no pool selected, otherwise + pool) """ if not withPool: return self.func.end_ea - self.func.start_ea head = self.func.end_ea # check if the function is set to have no pool instSize = self.isThumb() and 2 or 4 endCmt = idc.Comment(self.func.end_ea - instSize) if endCmt and '<endpool>' in endCmt: return self.func.end_ea - self.func.start_ea while not idc.isCode(idc.GetFlags(head)): # manual pool computation, trust and assume that this is the last element in the pool! if idc.Comment(head) and '<endpool>' in idc.Comment(head): head += idc.get_item_size(head) break # advance to next data element head += idc.get_item_size(head) return head - self.func.start_ea
def find_xrefs(addr): lrefs = list(idautils.DataRefsTo(addr)) if len(lrefs) == 0: lrefs = list(idautils.refs(addr, first, next)) lrefs = [r for r in lrefs if not idc.isCode(idc.GetFlags(r))] return lrefs
def get_value_type(ea): addr_type = T_VALUE if not idaapi.is_loaded(ea): return addr_type segm_name = idc.SegName(ea) segm = idaapi.getseg(ea) flags = idc.GetFlags(ea) is_code = idc.isCode(flags) if "stack" in segm_name.lower() or \ (dbg.stack_segm and dbg.stack_segm.start_ea == segm.start_ea): addr_type = T_STACK elif "heap" in segm_name.lower(): addr_type = T_HEAP elif not is_code and segm.perm & idaapi.SEGPERM_READ and \ segm.perm & idaapi.SEGPERM_WRITE and \ segm.perm & idaapi.SEGPERM_EXEC: addr_type = T_RWX elif is_code or \ (segm.perm & idaapi.SEGPERM_READ and segm.perm & idaapi.SEGPERM_EXEC): addr_type = T_CODE elif segm.perm & idaapi.SEGPERM_READ and \ segm.perm & idaapi.SEGPERM_WRITE: addr_type = T_DATA elif segm.perm & idaapi.SEGPERM_READ: addr_type = T_RODATA return addr_type
def disassemble_from_trace(self): try: index = self.traces_tab.currentIndex() trace = self.core.traces[self.id_map[index]] self.disassemble_button.setFlat(True) found_match = False for k, inst in trace.instrs.items(): if k in trace.metas: for name, arg1, arg2 in trace.metas[k]: if name == "wave": self.parent.log("LOG", "Wave n°%d encountered at (%s,%x) stop.." % (arg1, k, inst.address)) prev_inst = trace.instrs[k-1] idc.MakeComm(prev_inst.address, "Jump into Wave %d" % arg1) self.disassemble_button.setFlat(False) return # TODO: Check that the address is in the address space of the program if not idc.isCode(idc.GetFlags(inst.address)): found_match = True # TODO: Add an xref with the previous instruction self.parent.log("LOG", "Addr:%x not decoded as an instruction" % inst.address) if idc.MakeCode(inst.address) == 0: self.parent.log("ERROR", "Fail to decode at:%x" % inst.address) else: idaapi.autoWait() self.parent.log("SUCCESS", "Instruction decoded at:%x" % inst.address) if not found_match: self.parent.log("LOG", "All instruction are already decoded") self.disassemble_button.setFlat(False) except KeyError: print "No trace found to use"
def heatmap_trace(self): try: index = self.traces_tab.currentIndex() trace = self.core.traces[self.id_map[index]] if self.heatmaped: self.heatmap_button.setText("Heatmap") color = lambda x: 0xffffff else: self.heatmap_button.setText("Heatmap undo") self.heatmap_button.setFlat(True) hit_map = trace.address_hit_count color_map = self.compute_step_map(set(hit_map.values())) print color_map color = lambda x: color_map[hit_map[x]] for inst in trace.instrs.values(): if idc.isCode(idc.GetFlags(inst.address)): c = color(inst.address) idc.SetColor(inst.address, idc.CIC_ITEM, c) if not self.heatmaped: self.heatmap_button.setFlat(False) self.heatmaped = True else: self.heatmaped = False except KeyError: print "No trace found"
def is_code(va): if is_head(va): flags = idc.GetFlags(va) return idc.isCode(flags) else: head = get_head(va) return is_code(head)
def crefs_from(ea, only_one=False, check_fixup=True): flags = idc.GetFlags(ea) if not idc.isCode(flags): return fixup_ea = idc.BADADDR seen = False has_one = only_one if check_fixup: fixup_ea = idc.GetFixupTgtOff(ea) if not is_invalid_ea(fixup_ea) and is_code(fixup_ea): seen = only_one has_one = True yield fixup_ea if has_one and _stop_looking_for_xrefs(ea): return for target_ea in _xref_generator(ea, idaapi.get_first_cref_from, idaapi.get_next_cref_from): if target_ea != fixup_ea and not is_invalid_ea(target_ea): seen = only_one yield target_ea if seen: return if not seen and ea in _CREFS_FROM: for target_ea in _CREFS_FROM[ea]: seen = only_one yield target_ea if seen: return
def GetDataXrefString(ea): name = idc.GetFunctionName(ea) ea = idc.LocByName(name) f_start = ea f_end = idc.GetFunctionAttr(ea, idc.FUNCATTR_END) ret = [] for chunk in idautils.Chunks(ea): astart = chunk[0] aend = chunk[1] for head in idautils.Heads(astart, aend): # If the element is an instruction if idc.isCode(idc.GetFlags(head)): refs = list(idautils.DataRefsFrom(head)) for ref in refs: s = idc.GetString(ref, -1, idc.ASCSTR_C) if not s or len(s) <= 4: s = idc.GetString(ref, -1, idc.ASCSTR_UNICODE) if s: if len(s) > 4: ret.append(repr(s)) if len(ret) > 0: return "\n\n" + "\n".join(ret) else: return ""
def isLikeLoadJmpTable(ea): insn_t = idautils.DecodeInstruction(ea) # 1) mov reg, off[reg*4] if hasDispl(insn_t, 1): base, scale, index, displ = getAddressParts(insn_t, 1) if base == 5 and scale == 2 and idc.isData(idc.GetFlags(displ)): # check if there is a table of valid code pointers ncases = 0 bs = idaapi.get_many_bytes(displ, 4) if bs == None or len(bs) != 4: return False jmpaddress = struct.unpack('<I', bs)[0] while idc.isCode(idc.GetFlags(jmpaddress)): ncases += 1 bs = idaapi.get_many_bytes(displ+ncases*4, 4) if bs == None or len(bs) != 4: break jmpaddress = struct.unpack('<I', bs)[0] if ncases != 0: return True return False
def yara_sig_code_selection(): """Return some internal details for how we want to signature the selection""" cur_ea = SelStart() end_ea = SelEnd() d = i386DisasmParts() comments = [] rulelines = [] # Iterate over selected code bytes while cur_ea < end_ea: # Did we inadvertently select something that wasn't code? if not idc.isCode(idaapi.getFlags(cur_ea)): noncodebytes = "".join([chr(Byte(x)) for x in xrange(cur_ea, NextHead(cur_ea, end_ea))]) comments.append("Non-code at %08X: %d bytes" % (cur_ea, len(noncodebytes))) rulelines.append(binhex_spaced(noncodebytes)) else: curlen = idaapi.decode_insn(cur_ea) # Match IDA's disassembly format comments.append(GetDisasm(cur_ea)) # But we need our custom object to process curbytes = "".join([chr(Byte(b)) for b in xrange(cur_ea, cur_ea + curlen)]) codefrag = d.disasm(curbytes, 0, cur_ea) rulelines.append(yara_wildcard_instruction(codefrag)) # move along cur_ea = NextHead(cur_ea, end_ea) return (SelStart(), comments, rulelines)
def yara_sig_code_selection(): """Return some internal details for how we want to signature the selection""" cur_ea = SelStart() end_ea = SelEnd() d = i386DisasmParts() comments = [] rulelines = [] # Iterate over selected code bytes while cur_ea < end_ea: # Did we inadvertently select something that wasn't code? if not idc.isCode(idaapi.getFlags(cur_ea)): noncodebytes = "".join([ chr(Byte(x)) for x in xrange(cur_ea, NextHead(cur_ea, end_ea)) ]) comments.append("Non-code at %08X: %d bytes" % (cur_ea, len(noncodebytes))) rulelines.append(binhex_spaced(noncodebytes)) else: curlen = idaapi.decode_insn(cur_ea) # Match IDA's disassembly format comments.append(GetDisasm(cur_ea)) # But we need our custom object to process curbytes = "".join( [chr(Byte(b)) for b in xrange(cur_ea, cur_ea + curlen)]) codefrag = d.disasm(curbytes, 0, cur_ea) rulelines.append(yara_wildcard_instruction(codefrag)) # move along cur_ea = NextHead(cur_ea, end_ea) return (SelStart(), comments, rulelines)
def locate_vtables(): heads = set(idautils.Heads()) fns = set(idautils.Functions()) for h in heads: if idc.isCode(h): continue xrefs = set(idautils.DataRefsTo(h)) if len(xrefs) == 0: continue v = idc.Dword(h) if v not in fns: continue iss = False for x in xrefs: if switch.is_switch(x): iss = True break if iss: continue yield h
def get_first_function(ea): """ see above, but returns the first pushed value """ maybe_start = idc.get_func_attr(ea, idc.FUNCATTR_START) limit = 0 if maybe_start == idc.BADADDR: limit = 10 cur_ea = ea limit_count = 0 while cur_ea != idc.BADADDR: # are we over limit or up to the func start? limit_count += 1 limit_exceeded = (limit > 0 and limit_count > limit) too_far = (maybe_start != idc.BADADDR and cur_ea < maybe_start) if limit_exceeded or too_far: LOG.error( "Failed to find string walking backwards from {:08X}".format( ea)) return None prev_ins = idautils.DecodePreviousInstruction(cur_ea) prev_ea = prev_ins.ea # did we find it? if idc.GetMnem(prev_ea) == 'push': if idc.get_operand_type(prev_ea, 0) in [idc.o_mem, idc.o_imm]: # push offset found! pushed_addr = idc.GetOperandValue(prev_ea, 0) # it's not data, then probably good if idc.isCode(idc.GetFlags(pushed_addr)): return pushed_addr cur_ea = prev_ea
def insertRelocatedSymbol(M, D, reloc_dest, offset, seg_offset, new_eas, itemsize=-1): pf = idc.GetFlags(reloc_dest) DS = D.symbols.add() DS.base_address = offset+seg_offset itemsize = int(itemsize) if itemsize == -1: itemsize = int(idc.ItemSize(offset)) DEBUG("Offset: {0:x}, seg_offset: {1:x}\n".format(offset, seg_offset)) DEBUG("Reloc Base Address: {0:x}\n".format(DS.base_address)) DEBUG("Reloc offset: {0:x}\n".format(offset)) DEBUG("Reloc size: {0:x}\n".format(itemsize)) if idc.isCode(pf): DS.symbol_name = "sub_"+hex(reloc_dest) DS.symbol_size = itemsize DEBUG("Code Ref: {0:x}!\n".format(reloc_dest)) if reloc_dest not in RECOVERED_EAS: new_eas.add(reloc_dest) elif idc.isData(pf): reloc_dest = handleDataRelocation(M, reloc_dest, new_eas) DS.symbol_name = "dta_"+hex(reloc_dest) DS.symbol_size = itemsize DEBUG("Data Ref!\n") else: reloc_dest = handleDataRelocation(M, reloc_dest, new_eas) DS.symbol_name = "dta_"+hex(reloc_dest) DS.symbol_size = itemsize DEBUG("UNKNOWN Ref, assuming data\n")
def handleLikeLoadJmpTable(ins, F): insn_t = DecodeInstruction(ins) base, index, scale, displ = getAddressParts(insn_t, 1) ncases = 0 bs = idaapi.get_many_bytes(displ, 4) if bs == None or len(bs) != 4: return None jmpt = JmpTable(addr=displ, function=F) succ = JMPTABLES.succ(jmpt) jmpaddress = struct.unpack('<I', bs)[0] while idc.isCode(idc.GetFlags(jmpaddress)) and (succ is None or displ+ncases*4 < succ.get_start()): DEBUG("jmpaddress = {0:x}\n".format(jmpaddress)) jmpt.add_entry(jmpaddress) ncases += 1 bs = idaapi.get_many_bytes(displ+ncases*4, 4) if bs == None or len(bs) != 4: break jmpaddress = struct.unpack('<I', bs)[0] DEBUG("handleLikeLoadJmp @ {0:x} #{1}".format(jmpt.get_start(), len(jmpt.entries()))) return jmpt
def getOrigDisasm(self): # type: () -> str """ Gets the original disassembly without any further applied transformations However, the formatting is different from the original and is more convenient for parsing :return: the disassembly """ flags = idc.GetFlags(self.ea) if idc.isCode(flags): disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) disasm = disasm.replace(' ', ' ') elif idc.isStruct(flags): disasm = self._getStructDisasm() # disasm = "INVALID" elif idc.isAlign(flags): disasm = idc.GetDisasm(self.ea) disasm = self._convertAlignDisasm(disasm) elif idc.isASCII(flags): content = self.getContent() numNewLines = content.count(0x0A) if numNewLines > 1: disasm = '.ascii "' else: disasm = '.asciz "' for i in range(len(content)): if content[i] == 0x00: disasm += '"' elif chr(content[i]) == '"': disasm += '\\\"' elif chr(content[i]) == '\\': disasm += '\\\\' elif content[i] == 0x0A: disasm += '\\n' numNewLines -= 1 if numNewLines > 1: disasm += '"\n\t.ascii "' elif numNewLines == 1: disasm += '"\n\t.asciz "' elif chr(content[i]) == ' ': disasm += ' ' elif not chr(content[i]).isspace(): disasm += chr(content[i]) else: # TODO [INVALID] arm-none-eabi doesn't recognize \xXX? \x seems to become a byte. disasm += '\\x%02X' % content[i] elif idc.isData(flags): disasm = self._getDataDisasm() else: disasm = idc.GetDisasm(self.ea) disasm = self._filterComments(disasm) disasm = disasm.replace(' ', ' ') # parse force command if '<force>' in self.getComment(): comment = self.getComment() disasm = comment[comment.index('<force> ') + len('<force> '):] return disasm
def valid_oreans_macro_entry(address): if idc.isCode( idc.GetFlags(address)) and get_mnemonic(address) == ENTRY_MNEM: jump_location_address = get_jump_destination(address) if (jump_location_address >= OREANS_SEGMENT.startEA) and ( jump_location_address <= OREANS_SEGMENT.endEA): return True return False
def decode_here_clicked(self): inst = idc.here() if not idc.isCode(idc.GetFlags(inst)): print "Not code instruction" else: raw = idc.GetManyBytes(inst, idc.NextHead(inst) - inst) s = to_hex(raw) self.decode_ir(s)
def make_code(start, end): for i in range((end - start) / 4): addr = start + (i * 4) if not idc.isCode(idc.GetFlags(addr)): idaapi.do_unknown_range(addr, 4, 0) idaapi.auto_make_code(addr) idc.MakeCode(addr) return
def scan_data_for_code_refs(begin_ea, end_ea, read_func, read_size): """Read in 4- or 8-byte chunks of data, and try to see if they look like pointers into the code.""" global POSSIBLE_CODE_REFS for ea in xrange(begin_ea, end_ea, read_size): qword = read_func(ea) if idc.isCode(idc.GetFlags(qword)): POSSIBLE_CODE_REFS.add(qword)
def is_end_of_flow(self, instruction): """Return whether the last instruction processed end the flow.""" next_addr = instruction.ip + idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if idc.isCode(next_addr_flags) and idc.isFlow(next_addr_flags): return False return True
def is_end_of_flow(self, instruction): """Return whether the last instruction processed end the flow.""" next_addr = instruction.ip+idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if idc.isCode(next_addr_flags) and idc.isFlow(next_addr_flags): return False return True
def setBPs(self): """ Set breakpoints on all CALL and RET instructions in all of the executable sections. """ for seg_ea in idautils.Segments(): for head in idautils.Heads(seg_ea, idc.SegEnd(seg_ea)): if idc.isCode(idc.GetFlags(head)): # Add BP if instruction is a CALL if is_call(head): self.addBP(head)
def fix_code(start_address, end_address): # Todo: There might be some data in the range of codes. offset = start_address while offset <= end_address: offset = idc.NextAddr(offset) flags = idc.GetFlags(offset) if not idc.isCode(flags): # Todo: Check should use MakeCode or MakeFunction # idc.MakeCode(offset) idc.MakeFunction(offset)
def _isFunctionPointer(self, firstLineSplitDisasm): """ Identifies the construct 'DCD <funcName>' as a function pointer entry! The function Name is checked in the database for confirmation! This actually extend to none-identified functions, because it only checks if the location is valid code. :param firstLineSplitDisasm: list of space and comma split operands in the instruction. ['DCD', 'sub_DEADBEEF+1'] :return: """ return len(firstLineSplitDisasm) >= 2 and firstLineSplitDisasm[0] == 'DCD' \ and idc.isCode(idc.GetFlags(idc.get_name_ea(0, firstLineSplitDisasm[1])))
def disassemble_new_targets(self, enabled): for value in self.results.values: flag = idc.GetFlags(value) if not idc.isCode(flag) and idc.isUnknown(flag): res = idc.MakeCode(value) if res == 0: print "Try disassemble at:" + hex(value) + " KO" #TODO: Rollback ? else: print "Try disassemble at:" + hex(value) + " Success !"
def load_symbols_from_ida(self): for ea, name in idautils.Names(): flag = idc.GetFlags(ea) if not idc.hasUserName(flag): continue seg_ea = idc.SegStart(ea) seg_name = idc.SegName(ea) if seg_name not in self.sections: continue sym_type = 'function' if idc.isCode(flag) else 'object' self.symbols[name] = (seg_name, ea - seg_ea, sym_type)
def compute_nb_instr(self): #return 1000 count = 0 start, stop = self.seg_mapping[ ".text"] #TODO: Iterate all segs writable current = start while current <= stop: if idc.isCode(idc.GetFlags(current)): count += 1 current = idc.NextHead(current, stop) return count
def getBlocks(self, function_offset): blocks = [] function_chart = idaapi.FlowChart(idaapi.get_func(function_offset)) for block in function_chart: extracted_block = [] for instruction in idautils.Heads(block.startEA, block.endEA): if idc.isCode(idc.GetFlags(instruction)): extracted_block.append(instruction) if extracted_block: blocks.append(extracted_block) return sorted(blocks)
def handleDebugStepOver(self): if self.clientSocket is None: return if self.debugBreakId is None: return cur_ea = self.debugBreakId decode_insn(cur_ea) next_ea = cur_ea + idaapi.cmd.size if isCode(getFlags(next_ea)) == False: return entry = None # remove current if self.debugBreakId in self.idbHookMap: entry = self.idbHookMap[self.debugBreakId] outJSON = json.dumps({ "req_id": kFridaLink_DelHookRequest, "data": entry.genDelRequest() }) del self.idbHookMap[self.debugBreakId] self.clientSocket.sendto(outJSON, self.clientAddress) SetColor(self.debugBreakId, CIC_ITEM, kIDAViewColor_Reset) refresh_idaview_anyway() offset, moduleName = self.getAddressDetails(next_ea) # install next if entry == None: hook = InstHook() hook.id = next_ea hook.once = once hook.breakpoint = True entry = HookEntry(hook) entry.hook.id = next_ea entry.hook.mnemonic = GetDisasm(next_ea) entry.hook.address = offset entry.hook.module = moduleName outJSON = json.dumps({ "req_id": kFridaLink_SetHookRequest, "data": entry.genSetRequest() }) self.clientSocket.sendto(outJSON, self.clientAddress) self.idbHookMap[next_ea] = entry self.idbHooksView.setContent(self.idbHookMap) self.handleDebugContinue()
def scan_data_for_code_refs(begin_ea, end_ea, read_func, read_size): """Read in 4- or 8-byte chunks of data, and try to see if they look like pointers into the code.""" global POSSIBLE_CODE_REFS log.info("Scanning for code refs in range [{:08x}, {:08x})".format( begin_ea, end_ea)) for ea in itertools.count(start=begin_ea, step=read_size): if ea >= end_ea: break qword = read_func(ea) if idc.isCode(idc.GetFlags(qword)): POSSIBLE_CODE_REFS.add(qword)
def renameDword(self): proc_addr = self._import_table.item(self._import_table.currentRow(), 3).text() proc_name = str(self._import_table.item(self._import_table.currentRow(), 2).text()) renamed = 0 if proc_addr: try: proc_addr = int(proc_addr, 16) proc_bin_str = " ".join([x.encode("hex") for x in struct.pack("<I", proc_addr)]) next_dword = idc.FindBinary(idc.MinEA(), idc.SEARCH_DOWN | idc.SEARCH_NEXT, proc_bin_str) while next_dword != idc.BADADDR: log.debug("Trying to fix-up 0x{:08x}".format(next_dword)) # DWORDs can be "inaccessible" for many reasons and it requires "breaking up" the data blobs # and manually fixing them # Reason 1: In a dword array in an unknown section if idc.isUnknown(next_dword): idc.MakeUnkn(next_dword, idc.DOUNK_EXPAND) idc.MakeDword(next_dword) # Reason 2: In a dword array in a data section elif idc.isData(next_dword): hd = idc.ItemHead(next_dword) idc.MakeDword(hd) idc.MakeDword(next_dword) # Reason 3: In a dword array in a code section (validate via "dd <dword>,") elif idc.isCode(next_dword) and idc.GetDisasm(next_dword).startswith("dd "): hd = idc.ItemHead(next_dword) idc.MakeDword(hd) idc.MakeDword(next_dword) # Only perform if idc.Name(next_dword).startswith(("off_", "dword_")) or idc.Name(next_dword) == "": success = idc.MakeNameEx(next_dword, proc_name, idc.SN_NOWARN | idc.SN_NON_AUTO) i = 0 new_proc_name = proc_name while not success and i < 10: new_proc_name = "{}{}".format(proc_name, i) success = idc.MakeNameEx(next_dword, new_proc_name, idc.SN_NOWARN | idc.SN_NON_AUTO) i += 1 if success: renamed += 1 item = self._import_table.item(self._import_table.currentRow(), 5) item.setText("{}, {}".format(str(item.text()), new_proc_name)) log.debug("DWORD @ 0x{:08x} now has name {}".format(next_dword, new_proc_name)) else: log.error("Unable to auto-rename successfully, terminating search") break else: log.debug("Value at 0x{:08x} does not meet renaming requirements".format(next_dword)) next_dword = idc.FindBinary(next_dword + 4, idc.SEARCH_DOWN | idc.SEARCH_NEXT, proc_bin_str) except Exception, e: log.error("Error encountered: {}".format(e)) log.debug("Renamed {:d} instances of {}".format(renamed, proc_name))
def is_unconditional_branch(self, instruction): """Return whether the instruction is an unconditional branch""" next_addr = instruction.ip+idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if ( (instruction.itype in self.INSTRUCTIONS_BRANCH) and (not idc.isCode(next_addr_flags)) or (not idc.isFlow(next_addr_flags)) ): return True return False
def isFwdExport(iname, ea): l = ea if l == idc.BADADDR: raise Exception("Cannot find addr for: " + iname) pf = idc.GetFlags(l) if not idc.isCode(pf) and idc.isData(pf): sz = idc.ItemSize(l) iname = idaapi.get_many_bytes(l, sz-1) return iname return None
def get_fptrs(): d = {} nn = idaapi.netnode('$ mips') for fn in idautils.Functions(): for ea in idautils.FuncItems(fn): if not idc.isCode(idaapi.getFlags(ea)): continue target = nn.altval(ea) - 1 if target != -1: d[ea] = idc.Name(target) return d
def preprocessBinary(): # loop through every instruction and # keep a list of jump tables references in the # data section. These are used so we can # avoid generating unwanted function entry points for seg_ea in idautils.Segments(): for head in idautils.Heads(seg_ea, idc.SegEnd(seg_ea)): if idc.isCode(idc.GetFlags(head)): si = idaapi.get_switch_info_ex(head) if si is not None and isUnconditionalJump(head): DEBUG("Found a jmp based switch at: {0:x}\n".format(head)) esize = si.get_jtable_element_size() base = si.jumps count = si.get_jtable_size() for i in xrange(count): fulladdr = base+i*esize DEBUG("Address accessed via JMP: {:x}\n".format(fulladdr)) ACCESSED_VIA_JMP.add(fulladdr)
def from_syntactic_to_semantic(self, _start, _end): _sem = '' # Parse all the instructions inside the function for instr in idautils.Heads(_start, _end): flags = idc.GetFlags(instr) if idc.isCode(flags): # Code: convert instruction info = idautils.DecodeInstruction(instr) first_offset = self.get_first_numerical_operand_offset(info) if first_offset != 0: tmp = self.get_semantic_bytes(info.ea, first_offset) if tmp is not None: _sem += ''.join(tmp) else: return None else: _sem += ''.join(chr(idc.Byte(info.ea + i)) for i in range(info.size)) elif idc.isAlign(flags): # align: copy the byte without semantic conversion _sem += idc.GetManyBytes(instr, idc.NextHead(instr) - instr, False) return _sem
def try_mark_as_code(address, end_address=0): flags = idc.GetFlags(address) if idc.isAlign(flags): return False if idc.isCode(flags): return True if idc.MakeCode(address): idaapi.autoWait() return True end_address = max(end_address, address + 1) idc.MakeUnknown(address, end_address - address + 1, idc.DOUNK_SIMPLE) if idc.MakeCode(address): idaapi.autoWait() return True return False
def insertRelocatedSymbol(M, D, reloc_dest, offset, seg_offset, new_eas): pf = idc.GetFlags(reloc_dest) DS = D.symbols.add() DS.base_address = offset+seg_offset if idc.isCode(pf): DS.symbol_name = "sub_"+hex(reloc_dest) DEBUG("Code Ref: {0:x}!\n".format(reloc_dest)) if reloc_dest not in RECOVERED_EAS: new_eas.add(reloc_dest) elif idc.isData(pf): reloc_dest = handleDataRelocation(M, reloc_dest, new_eas) DS.symbol_name = "dta_"+hex(reloc_dest) DEBUG("Data Ref!\n") else: reloc_dest = handleDataRelocation(M, reloc_dest, new_eas) DS.symbol_name = "dta_"+hex(reloc_dest) DEBUG("UNKNOWN Ref, assuming data\n")
def GetCodeRefsFrom(ea): name = idc.GetFunctionName(ea) ea = idc.LocByName(name) f_start = ea f_end = idc.GetFunctionAttr(ea, idc.FUNCATTR_END) ret = [] for chunk in idautils.Chunks(ea): astart = chunk[0] aend = chunk[1] for head in idautils.Heads(astart, aend): # If the element is an instruction if idc.isCode(idc.GetFlags(head)): refs = idautils.CodeRefsFrom(head, 0) for ref in refs: loc = idc.LocByName(idc.GetFunctionName(ref)) if loc not in ret and loc != f_start: ret.append(ref) return ret
def walk_function(self, ea): """ Walk function and place breakpoints on every call function found within it. @param ea: An effective address within the function. @return: True if function walked succeeded or False otherwise """ try: function_name = get_function_name(ea) self.logger.debug("Walking function %s at address %s for breakpoints", function_name, hex(ea)) if function_name in self.walked_functions: self.logger.debug("No breakpoints will be set in function %s, " "since it was already walked before.", function_name) return True # Add function to walked function list self.walked_functions[function_name] = ea # function = sark.Function(ea) # for line in function.lines: # if line.is_code and line.insn.is_call: # self.addBP(line.ea) start_adrs = get_function_start_address(ea) end_adrs = get_function_end_address(ea) # Walk function and place breakpoints on every call instruction found. for head in idautils.Heads(start_adrs, end_adrs): if idc.isCode(idc.GetFlags(head)): # Add BP if instruction is a CALL if is_call(head): self.addBP(head) self.logger.debug("Function %s was successfully walked for breakpoints", function_name) return True except Exception as ex: self.logger.exception("Failed walking function at address %s for breakpoints.", hex(ea)) return False
def __init__(self, functionName): import idautils import idc import idaapi super(FunctionGraph, self).__init__() start_addr = 0 if type(functionName) == type('str'): start_addr = idc.LocByName(functionName) else: start_addr = idaapi.get_func(functionName).startEA print 'using 0x%x as function start' % (start_addr) self.start_addr = start_addr end_addr = idc.FindFuncEnd(start_addr) self.start_addr = start_addr self.end_addr = end_addr self.name = functionName for h in idautils.Heads(start_addr, end_addr): if h == idc.BADADDR: continue if not idc.isCode(idc.GetFlags(h)): continue self.add_node(h) refs = set(filter(lambda x: x <= end_addr and x >= start_addr, idautils.CodeRefsFrom(h,1))) nh = idc.NextHead(h, end_addr) if nh != idc.BADADDR and \ (idaapi.isFlow(idaapi.get_flags_ex(nh,0)) or idaapi.is_call_insn(h)): refs.add(nh) for r in refs: self.connect(h, r)
def get_called_func_data(self, ea): """ Try to get the called function name and address. @param ea: Address to the CALL instruction @return: On success a tuple of called function data (Function_ea, Demangled_Function_Name). otherwise (None,None) tuple will be returned """ try: func_name = None call_dest = None if idc.isCode(idc.GetFlags(ea)): if is_call(ea): operand_type = idc.GetOpType(ea, 0) if operand_type in (5, 6, 7, 2): call_dest = idc.GetOperandValue(ea, 0) # Call destination func_name = get_function_name(call_dest).lower() return call_dest, func_name except Exception as ex: self.logger.exception("Failed to get called function data: %s", ex) return None, None
def processRelocationsInData(M, D, start, end, new_eas, seg_offset): if start == 0: start = 1 i = idc.GetNextFixupEA(start-1) while i < end and i != idc.BADADDR: pointsto = idc.GetFixupTgtOff(i) fn = getFunctionName(i) DEBUG("{0:x} Found reloc to: {1:x}\n".format(i, pointsto)) pf = idc.GetFlags(pointsto) DS = D.symbols.add() DS.base_address = i+seg_offset if idc.isCode(pf): DS.symbol_name = "sub_"+hex(pointsto) DEBUG("Code Ref!\n") if pointsto not in RECOVERED_EAS: new_eas.add(pointsto) elif idc.isData(pf): pointsto = handleDataRelocation(M, pointsto, new_eas) DS.symbol_name = "dta_"+hex(pointsto) DEBUG("Data Ref!\n") else: pointsto = handleDataRelocation(M, pointsto, new_eas) DS.symbol_name = "dta_"+hex(pointsto) DEBUG("UNKNOWN Ref, assuming data\n") i = idc.GetNextFixupEA(i)
def getsize(self): actual_ea = self.addr while (True): # first entry case f = idc.GetFlags(actual_ea) if (len(self.entries) == 0): if (not (idc.isRef(f) and (idc.hasName(f) or (f & FF_LABL)))): print("[-] Not an entry of vtable") return 0 elif (idc.isRef(f) and (idc.hasName(f) or (f & FF_LABL))): # next vtable ? break if (not idc.hasValue(f) or not idc.isData(f)): break c = idc.Dword(actual_ea) if c: f = idc.GetFlags(c) if (not idc.hasValue(f) or not idc.isCode(f) or idc.Dword(c) == 0): break else: break self.entries.append(actual_ea) actual_ea += 4 print("[+] Vtable %08X - %08X, methods : %d" % (self.addr, actual_ea, (actual_ea - self.addr) / 4))
def checkWindowsLibs(name,ea,bCheckFileIO,bCheckNetworkIO): """ This function monitors loaded DLLs for Windows If any of these DLLs and functions are loaded a conditional breakpoint is set kernel32.dll - CreateFileW ReadFile CloseHandle WS2_32.dll - recv, bind, accept, closesocket WSOCK32.dll - recv, bind @param name: The name of the loaded DLL @param ea: The address of the loaded DLL @param bCheckFileIO: Checks to see if FileIO filtering was turned on @param bCheckNetworkIO: Checks to see if NetworkIO filtering was turned on @return: None """ import idc import logging logger = logging.getLogger('IDATrace') idc.RefreshDebuggerMemory() library_name = name.upper() if "KERNEL32" in library_name: logger.info( "Found kernel32 at 0x%x" % ea ) if bCheckFileIO: """ createFileA_func = idc.LocByName("kernel32_CreateFileA"); if createFileA_func == idc.BADADDR: logger.info( "Cannot find CreateFileA" ) else: logger.info( "We found CreateFileA at 0x%x." % createFileA_func ) idc.AddBpt(createFileA_func) idc.SetBptAttr(createFileA_func, idc.BPT_BRK, 0) idc.SetBptCnd(createFileA_func, "windowsFileIO.MyCreateFileA()") """ createFileW_func = idc.LocByName("kernel32_CreateFileW"); if createFileW_func == idc.BADADDR: logger.info( "Cannot find CreateFileW" ) else: logger.info( "We found CreateFileW at 0x%x." % createFileW_func ) idc.AddBpt(createFileW_func) idc.SetBptAttr(createFileW_func, idc.BPT_BRK, 0) idc.SetBptCnd(createFileW_func, "windowsFileIO.MyCreateFileW()") readFile_func = idc.LocByName("kernel32_ReadFile"); if readFile_func == idc.BADADDR: logger.info( "Cannot find ReadFile" ) else: logger.info( "We found ReadFile at 0x%x." % readFile_func ) idc.AddBpt(readFile_func) idc.SetBptAttr(readFile_func, idc.BPT_BRK, 0) idc.SetBptCnd(readFile_func, "windowsFileIO.MyReadFile()") closeHandle_func = idc.LocByName("kernel32_CloseHandle"); if closeHandle_func == idc.BADADDR: logger.info( "Cannot find CloseHandle" ) else: logger.info( "We found CloseHandle at 0x%x." % closeHandle_func ) idc.AddBpt(closeHandle_func) idc.SetBptAttr(closeHandle_func, idc.BPT_BRK, 0) idc.SetBptCnd(closeHandle_func, "windowsFileIO.MyCloseHandle()") elif "WS2_32" in library_name: logger.info( "Found Ws2_32 at 0x%x" % ea ) if bCheckNetworkIO: recv_func = idc.LocByName("ws2_32_recv"); if recv_func == idc.BADADDR: logger.info( "Cannot find ws2_32_recv" ) else: logger.info( "We found ws2_32_recv at 0x%x." % recv_func ) idc.AddBpt(recv_func) idc.SetBptAttr(recv_func, idc.BPT_BRK, 0) idc.SetBptCnd(recv_func, "windowsNetworkIO.checkRecv()") bind_func = idc.LocByName("ws2_32_bind"); if bind_func == idc.BADADDR: logger.info( "Cannot find ws2_32_bind" ) else: logger.info( "We found ws2_32_bind at 0x%x." % bind_func ) idc.AddBpt(bind_func) idc.SetBptAttr(bind_func, idc.BPT_BRK, 0) idc.SetBptCnd(bind_func, "windowsNetworkIO.checkBind()") accept_func = idc.LocByName("ws2_32_accept"); if accept_func == idc.BADADDR: logger.info( "Cannot find ws2_32_accept" ) else: logger.info( "We found ws2_32_accept at 0x%x." % accept_func ) idc.AddBpt(accept_func) idc.SetBptAttr(accept_func, idc.BPT_BRK, 0) idc.SetBptCnd(accept_func, "windowsNetworkIO.checkAccept()") closesocket_func = idc.LocByName("ws2_32_closesocket"); if closesocket_func == idc.BADADDR: logger.info( "Cannot find ws2_32_closesocket" ) else: logger.info( "We found ws2_32_closesocket at 0x%x." % closesocket_func ) idc.AddBpt(closesocket_func) idc.SetBptAttr(closesocket_func, idc.BPT_BRK, 0) idc.SetBptCnd(closesocket_func, "windowsNetworkIO.checkClosesocket()") elif "WSOCK32" in library_name: logger.info( "Found wsock32 at 0x%x" % ea ) if bCheckNetworkIO: """ bind_func = idc.LocByName("wsock32_bind"); if bind_func == idc.BADADDR: logger.info( "Cannot find wsock32_bind" ) else: logger.info( "We found wsock32_bind at 0x%x." % wsock32_bind ) if idc.isCode(bind_func): idc.AddBpt(bind_func) idc.SetBptAttr(bind_func, idc.BPT_BRK, 0) idc.SetBptCnd(bind_func, "windowsNetworkIO.WSOCK32Bind()") else: logger.info( "wsock32_bind at 0x%x is data not code." % bind_func ) """ recv_func = idc.LocByName("wsock32_recv") if recv_func == idc.BADADDR: logger.info( "Cannot find wsock32_recv" ) else: logger.info( "We found wsock32_recv at 0x%x." % recv_func ) if idc.isCode(recv_func): idc.AddBpt(recv_func) idc.SetBptAttr(recv_func, idc.BPT_BRK, 0) idc.SetBptCnd(recv_func, "windowsNetworkIO.WSOCK32Recv()") else: logger.info( "wsock32_recv at 0x%x is data not code." % recv_func )
def isNotCode(ea): pf = idc.GetFlags(ea) return not idc.isCode(pf)
def is_code_by_flags(ea): if not is_code(ea): return False flags = idc.GetFlags(ea) return idc.isCode(flags)
def isCode(self, ea): """Returns True if the data at ea is code, False otherwise""" return idc.isCode(idc.GetFlags(ea))
def process_function(arch, func_ea): func_end = idc.FindFuncEnd(func_ea) packet = DismantlerDataPacket() ida_chunks = get_chunks(func_ea) chunks = set() # Add to the chunks only the main block, containing the # function entry point # chunk = get_flow_code_from_address(func_ea) if chunk: chunks.add( chunk ) # Make "ida_chunks" a set for faster searches within ida_chunks = set(ida_chunks) ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks)) func = idaapi.get_func(func_ea) comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)] # Copy the list of chunks into a queue to process # chunks_todo = [c for c in chunks] while True: # If no chunks left in the queue, exit if not chunks_todo: if ida_chunks: chunks_todo.extend(ida_chunks) else: break chunk_start, chunk_end = chunks_todo.pop() if ida_chunks_idx.has_key(chunk_start): ida_chunks.remove(ida_chunks_idx[chunk_start]) del ida_chunks_idx[chunk_start] for head in idautils.Heads(chunk_start, chunk_end): comments.extend( (idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1)) ) comment = '\n'.join([c for c in comments if c is not None]) comment = comment.strip() if comment: packet.add_comment(head, comment) comments = list() if idc.isCode(idc.GetFlags(head)): instruction = arch.process_instruction(packet, head) # if there are other references than # flow add them all. if list( idautils.CodeRefsFrom(head, 0) ): # for each reference, including flow ones for ref_idx, ref in enumerate(idautils.CodeRefsFrom(head, 1)): if arch.is_call(instruction): # This two conditions must remain separated, it's # necessary to enter the enclosing "if" whenever # the instruction is a call, otherwise it will be # added as an uncoditional jump in the last else # if ref in list( idautils.CodeRefsFrom(head, 0) ): packet.add_direct_call(head, ref) elif ref_idx>0 and arch.is_conditional_branch(instruction): # The ref_idx is > 0 in order to avoid processing the # normal flow reference which would effectively imply # that the conditional branch is processed twice. # It's done this way instead of changing the loop's head # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in # order to avoid altering the behavior of other conditions # which rely on it being so. # FIXME # I don't seem to check for the reference here # to point to valid, defined code. I suspect # this could lead to a failure when exporting # if such situation appears. I should test if # it's a likely scenario and probably just add # an isHead() or isCode() to address it. packet.add_conditional_branch_true(head, ref) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, ref) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) #skip = False for ref in idautils.DataRefsFrom(head): packet.add_data_reference(head, ref) # Get a data reference from the current reference's # location. For instance, if 'ref' points to a valid # address and such address contains a data reference # to code. target = list( idautils.DataRefsFrom(ref) ) if target: target = target[0] else: target = None if target is None and arch.is_call(instruction): imp_name = idc.Name(ref) imp_module = get_import_module_name(ref) imported_functions.add((ref, imp_name, imp_module)) packet.add_indirect_virtual_call(head, ref) elif target is not None and idc.isHead(target): # for calls "routed" through this reference if arch.is_call(instruction): packet.add_indirect_call(head, target) # for unconditional jumps "routed" through this reference elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, target) # for conditional "routed" through this reference elif arch.is_conditional_branch(instruction): packet.add_conditional_branch_true(head, target) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) f = FunctionAnalyzer(arch, func_ea, packet) instrumentation.new_packet(packet) instrumentation.new_function(f)
def isCode(self, ea): return idc.isCode(ea)