def find_ret_block(addr): func = idaapi.get_func(addr) # Taken from ex_gdl_qflow_chart.py f = idaapi.FlowChart(func) for block in f: if idaapi.is_ret_block(block.type): return block return None
def main(): # Get IDA default information bin_path = ida_nalt.get_input_file_path() with open(bin_path, "rb") as f: bin_hash = sha1(f.read()).hexdigest() img_base = idaapi.get_imagebase() info = idaapi.get_inf_structure() if info.is_64bit(): bits = 64 elif info.is_32bit(): bits = 32 else: bits = 16 endian = "little" if info.is_be(): endian = "big" arch = "_".join([info.procName, str(bits), endian]) arch = get_arch(arch) # Parse option information package, compiler, arch, opti, bin_name = parse_fname(bin_path) if "_noinline" in bin_path: other_option = "noinline" elif "_pie" in bin_path: other_option = "pie" elif "_lto" in bin_path: other_option = "lto" else: other_option = "normal" # Prepare default information for processing caller_map, callee_map = get_call_graph() edge_map, bb_callee_map = get_bb_graph(caller_map, callee_map) # Now extract function information func_data = [] for idx, addr in enumerate(list(idautils.Functions())): function = idaapi.get_func(addr) if (not function or function.start_ea == idaapi.BADADDR or function.end_ea == idaapi.BADADDR): continue # IDA's default function information func_name = get_func_name(addr).strip() demangled_name, demangled_full_name = demangle(func_name) graph = idaapi.FlowChart(function, flags=idaapi.FC_PREDS) data = idc.get_bytes(addr, function.size()) or "" data_hash = sha1(data).hexdigest() stack_size = get_frame_size(addr) # Get imported callees. Note that the segment name is used because # idaapi.get_import_module_name() sometimes returns bad results ... imported_callees = [] if func_name in callee_map: imported_callees = list( filter(lambda x: get_segm_name(x[1]) != get_segm_name(addr), callee_map[func_name])) # Get type information from IDA func_type, ret_type, args = get_type(addr) # Prepare basic block information for feature extraction func_strings = [] func_consts = [] bb_data = [] for bb in graph: if bb.start_ea == idaapi.BADADDR or bb.end_ea == idaapi.BADADDR: continue bb_size = bb.end_ea - bb.start_ea block_data = idc.get_bytes(bb.start_ea, bb_size) or b"" block_data_hash = sha1(block_data).hexdigest() bb_strings = get_strings(bb.start_ea, bb.end_ea) bb_consts = get_consts(bb.start_ea, bb.end_ea) bb_callees = list( filter(lambda x: x[0] == bb.id, bb_callee_map[func_name])) bb_data.append({ "size": bb_size, "block_id": bb.id, "startEA": bb.start_ea, "endEA": bb.end_ea, "type": bb.type, "is_ret": idaapi.is_ret_block(bb.type), "hash": block_data_hash, "callees": bb_callees, "strings": bb_strings, "consts": bb_consts, }) func_strings.extend(bb_strings) func_consts.extend(bb_consts) func_data.append({ "ida_idx": idx, "seg_name": get_segm_name(addr), "name": func_name, "demangled_name": demangled_name, "demangled_full_name": demangled_full_name, "hash": data_hash, "size": function.size(), "startEA": function.start_ea, "endEA": function.end_ea, "cfg_size": graph.size, "img_base": img_base, "bin_path": bin_path, "bin_hash": bin_hash, "bin_offset": addr - img_base, "stack_size": stack_size, "package": package, "compiler": compiler, "arch": arch, "opti": opti, "others": other_option, "bin_name": bin_name, "func_type": func_type, "ret_type": ret_type, "args": args, "callers": caller_map[func_name], "callees": callee_map[func_name], "imported_callees": imported_callees, "cfg": edge_map[func_name], "strings": func_strings, "consts": func_consts, "bb_data": bb_data, }) return func_data
def is_ret(self): "True if block is a ret block (no successor)" return idaapi.is_ret_block(self.basic_block.type)