def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): ARCH = self.load_arch_module() ARCH_OUTPUT = ARCH.output ARCH_UTILS = ARCH.utils ad = ctx.entry s = self.binary.get_section(ad) if s is None: # until is != -1 only from the visual mode # It allows to not go before the first section. if until != -1: return None # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: return None ad = s.start o = ARCH_OUTPUT.Output(ctx) o._new_line() o.curr_section = s o.mode_dump = True l = 0 api = ctx.gctx.api # For mips: after a jump we add a newline, but for mips we should # add this newline after the prefetch instruction. prefetch_after_branch = False while 1: if ad == s.start: if not o.last_2_lines_are_empty(): o._new_line() o._dash() o._section(s.name) o._add(" 0x%x -> 0x%x" % (s.start, s.end)) o._new_line() o._new_line() while ((l < lines and until == -1) or (ad < until and until != -1)) \ and ad <= s.end: ty = self.mem.get_type(ad) # A PE import should not be displayed as a subroutine if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \ and self.mem.is_code(ad): is_func = ad in self.functions if is_func: if not o.last_2_lines_are_empty(): o._new_line() o._dash() o._user_comment("; SUBROUTINE") o._new_line() o._dash() i = self.lazy_disasm(ad, s.start) if not is_func and ad in self.xrefs and \ not o.last_2_lines_are_empty(): o._new_line() o._asm_inst(i) is_end = ad in self.end_functions # mips if prefetch_after_branch: prefetch_after_branch = False if not is_end: o._new_line() if is_end: for fad in self.end_functions[ad]: sy = api.get_symbol(fad) o._user_comment("; end function %s" % sy) o._new_line() o._new_line() elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i): if self.is_mips: prefetch_after_branch = True else: o._new_line() elif ARCH_UTILS.is_call(i): op = i.operands[0] if op.type == self.capstone.CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): if self.is_mips: prefetch_after_branch = True else: o._new_line() ad += i.size elif MEM_WOFFSET <= ty <= MEM_QOFFSET: prefetch_after_branch = False o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) off = s.read_int(ad, sz) if off is None: continue if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._data_prefix(sz) o._add(" ") o._imm(off, sz, True, print_data=False, force_dont_print_data=True) o._new_line() ad += sz elif ty == MEM_ASCII: prefetch_after_branch = False o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) buf = self.binary.get_string(ad, sz) if buf is not None: if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) # Split the string into multi lines splitted = buf.split("\n") j = 0 for i, st in enumerate(splitted): if i > 0 and len(st) != 0: o._new_line() o.set_line(ad + j) o._address(ad + j) ibs = 0 bs = 65 while ibs < len(st): if ibs > 0: o._new_line() o.set_line(ad + j) o._address(ad + j) blk = st[ibs:ibs + bs] if i < len(splitted) - 1 and ibs + bs >= len( st): o._string('"' + blk + '\\n"') j += len(blk) + 1 else: o._string('"' + blk + '"') j += len(blk) ibs += bs o._add(", 0") o._new_line() ad += sz elif ty == MEM_ARRAY: prefetch_after_branch = False o._label_and_address(ad) array_info = self.mem.mm[ad] total_size = array_info[0] entry_type = array_info[2] entry_size = self.mem.get_size_from_type(entry_type) n = int(total_size / entry_size) o.set_line(ad) o._data_prefix(entry_size) k = 0 while k < total_size: if o.curr_index > 70: o._new_line() o.set_line(ad) o._address(ad) o._data_prefix(entry_size) l += 1 val = s.read_int(ad, entry_size) if MEM_WOFFSET <= entry_type <= MEM_QOFFSET: o._add(" ") o._imm(val, entry_size, True, print_data=False, force_dont_print_data=True) else: o._word(val, entry_size, is_from_array=True) ad += entry_size k += entry_size if k < total_size: o._add(",") o._new_line() else: prefetch_after_branch = False o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size_from_type(ty) if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._word(s.read_int(ad, sz), sz) o._new_line() ad += sz l += 1 s = self.binary.get_section(ad) if s is None: # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: break o._new_line() ad = s.start if until != -1 and ad >= until: break if (l >= lines and until == -1) or (ad >= until and until != -1): break o.curr_section = s if until == ad: if self.mem.is_code(ad) and ad in self.xrefs or \ s is not None and ad == s.start: if not o.last_2_lines_are_empty(): o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() return o
def get_graph(self, entry): ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry) stack = [entry] start = time() prefetch = None addresses = set() # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue addresses.add(ad) if ARCH_UTILS.is_ret(inst): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == self.capstone.CS_OP_IMM: nxt = unsigned(op.value.imm) if nxt in self.functions: gph.new_node(inst, prefetch, None) else: stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == self.capstone.CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(op.value.imm) stack.append(direct_nxt) if nxt_jmp in self.functions: gph.new_node(inst, prefetch, [direct_nxt]) else: stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: if ad != entry and ARCH_UTILS.is_call(inst): # TODO: like in the analyzer, simulate registers # -> during the analysis, save in the database # the immediate value. op = inst.operands[0] if op.type == self.capstone.CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) gph.exit_or_ret.add(ad) continue if op.type == self.capstone.CS_OP_MEM and \ op.mem.disp in self.binary.imports and \ self.binary.imports[op.mem.disp] & FUNC_FLAG_NORETURN: prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) gph.exit_or_ret.add(ad) continue nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.reverse_stripped_list(self, addresses) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms
def get_graph(self, entry): ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry) stack = [entry] start = time() prefetch = None addresses = set() # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue addresses.add(ad) if ARCH_UTILS.is_ret(inst): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == self.capstone.CS_OP_IMM: nxt = unsigned(op.value.imm) if nxt in self.functions: gph.new_node(inst, prefetch, None) else: stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == self.capstone.CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(op.value.imm) stack.append(direct_nxt) if nxt_jmp in self.functions: gph.new_node(inst, prefetch, [direct_nxt]) else: stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: if ad != entry and ARCH_UTILS.is_call(inst): op = inst.operands[0] if op.type == self.capstone.CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) continue nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.reverse_stripped_list(self, addresses) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms
def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code): # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] regsctx = self.arch_analyzer.new_regs_context() if regsctx is None: # fatal error, but don't quit to let the user save the database return False flags = 0 stack_err = False args_restore = 0 if func_obj is not None: frame_size = func_obj[FUNC_FRAME_SIZE] if frame_size == -1: frame_size = self.ARCH_UTILS.guess_frame_size(self, entry) # used in arch/*/analyzer.c func_obj[FUNC_FRAME_SIZE] = frame_size else: frame_size = -1 ret_found = False stack = [(regsctx, entry)] while stack: (regsctx, ad) = stack.pop() if self.db.mem.is_data(ad): continue inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue if self.gctx.debugsp: ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx) inner_code[ad] = inst ##### RETURN ##### if self.is_ret(inst): self.__add_prefetch(regsctx, inst, func_obj, inner_code) ret_found = True if self.dis.is_x86 and len(inst.operands) == 1: args_restore = inst.operands[0].value.imm flags |= FUNC_FLAG_STDCALL if self.arch_analyzer.get_sp(regsctx) != 0: flags |= FUNC_FLAG_ERR_STACK_ANALYSIS ##### UNCONDITIONAL JUMP ##### elif self.is_uncond_jump(inst): self.__add_prefetch(regsctx, inst, func_obj, inner_code) op = inst.operands[-1] jmp_ad = None if op.type == self.ARCH_UTILS.OP_IMM: jmp_ad = unsigned(op.value.imm) else: is_jmptable = inst.address in self.jmptables # Create a jumptable if necessary if not is_jmptable: if op.type == self.ARCH_UTILS.OP_REG: jmp_ad = self.arch_analyzer.reg_value( regsctx, op.value.reg) if jmp_ad is None: is_jmptable = self.auto_jump_table( inst, inner_code) elif op.type == self.ARCH_UTILS.OP_MEM: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) is_jmptable = self.auto_jump_table( inst, inner_code) if is_jmptable: table = self.jmptables[inst.address].table for n in table: r = self.arch_analyzer.clone_regs_context(regsctx) stack.append((r, n)) self.api.add_xrefs_table(ad, table) if add_if_code: added_xrefs.append((ad, table)) continue self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # TODO: assume there is return if jmp_ad is None: if entry in self.db.imports: ret_found |= self.db.imports[ entry] & FUNC_FLAG_NORETURN else: ret_found = True continue self.api.add_xref(ad, jmp_ad) if self.db.mem.is_func(jmp_ad): ret_found |= not self.is_func_noreturn(jmp_ad, entry) fo = self.functions[jmp_ad] flags = fo[FUNC_FLAGS] frame_size = max(fo[FUNC_FRAME_SIZE], frame_size) args_restore = fo[FUNC_ARGS_RESTORE] else: stack.append((regsctx, jmp_ad)) if add_if_code: added_xrefs.append((ad, jmp_ad)) ##### CONDITIONAL JUMP ##### elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(regsctx, inst, func_obj, inner_code) op = inst.operands[-1] if op.type == self.ARCH_UTILS.OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.api.add_xref(ad, nxt_jmp) if self.db.mem.is_func(direct_nxt): ret_found |= not self.is_func_noreturn( direct_nxt, entry) fo = self.functions[direct_nxt] flags = fo[FUNC_FLAGS] frame_size = max(fo[FUNC_FRAME_SIZE], frame_size) args_restore = fo[FUNC_ARGS_RESTORE] else: stack.append((regsctx, direct_nxt)) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if self.db.mem.is_func(nxt_jmp): ret_found |= not self.is_func_noreturn(nxt_jmp, entry) else: newctx = self.arch_analyzer.clone_regs_context(regsctx) stack.append((newctx, nxt_jmp)) else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # TODO : jump tables for conditional jumps ? ##### CALL ##### elif self.is_call(inst): op = inst.operands[-1] call_ad = None sp_before = self.arch_analyzer.get_sp(regsctx) if op.type == self.ARCH_UTILS.OP_IMM: call_ad = unsigned(op.value.imm) elif op.type == self.ARCH_UTILS.OP_REG: # FIXME : for MIPS, addresses are loaded in t9 (generally) # then jalr t9 is executed. The problem here is that we # will analyze twice the function. The first time is done # by the function analyze_imm. call_ad = self.arch_analyzer.reg_value( regsctx, op.value.reg) else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) if self.db.mem.is_func(op.mem.disp) and \ self.is_func_noreturn(op.mem.disp, entry): self.__add_prefetch(regsctx, inst, func_obj, inner_code) continue if call_ad is not None: self.api.add_xref(ad, call_ad) if add_if_code: added_xrefs.append((ad, call_ad)) self.analyze_flow(call_ad, entry_is_func=True, force=False, add_if_code=add_if_code) # TODO: if the address was alredy in the pending list # we don't have a computed args size # Reset the stack pointer to frame_size to handle stdcall. if frame_size != -1 and call_ad in self.functions: fo = self.functions[call_ad] if fo is not None: n = fo[FUNC_ARGS_RESTORE] if n: self.arch_analyzer.set_sp( regsctx, sp_before + n) if self.db.mem.is_func(call_ad) and \ self.is_func_noreturn(call_ad, entry): self.__add_prefetch(regsctx, inst, func_obj, inner_code) continue # It seems it doesn't matter for the prefetched instruction nxt = inst.address + inst.size stack.append((regsctx, nxt)) ##### OTHERS ##### else: self.arch_analyzer.analyze_operands(self, regsctx, inst, func_obj, False) nxt = inst.address + inst.size if nxt not in self.functions: stack.append((regsctx, nxt)) # Remove all xrefs, this is not a correct flow if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: if isinstance(to_ad, list): self.api.rm_xrefs_table(from_ad, to_ad) else: self.api.rm_xref(from_ad, to_ad) return False if func_obj is not None: if entry in self.db.imports: if self.db.imports[entry] & FUNC_FLAG_NORETURN: flags |= FUNC_FLAG_NORETURN elif not ret_found: flags |= FUNC_FLAG_NORETURN func_obj[FUNC_FLAGS] = flags func_obj[FUNC_FRAME_SIZE] = frame_size func_obj[FUNC_ARGS_RESTORE] = args_restore return True
def _imm(self, imm, op_size, hexa, section=None, print_data=True, force_dont_print_data=False, is_from_jump=False): if self.gctx.capstone_string != 0: hexa = True if hexa: imm = unsigned(imm) label_printed = self._label(imm, print_colon=False) if label_printed: ty = self._dis.mem.get_type(imm) # ty == -1 : from the terminal (with -x) there are no xrefs if # the file was loaded without a database. if ty == MEM_HEAD and self._dis.mem.get_type( self._dis.mem.get_head_addr(imm)) == MEM_ASCII: ty = MEM_ASCII if imm in self._dis.xrefs and ty != MEM_UNK and \ ty != MEM_ASCII or ty == -1: return if ty == MEM_ASCII: print_data = True force_dont_print_data = False if section is None: section = self._binary.get_section(imm) if section is not None and section.start == 0: section = None # For a raw file, if the raw base is 0 the immediate is considered # as an address only if it's in the symbols list. raw_base_zero = self._binary.type == T_BIN_RAW and self.gctx.raw_base == 0 if section is not None and not raw_base_zero: if not label_printed: self._address(imm, print_colon=False, notprefix=True) if not force_dont_print_data and print_data: s = self._binary.get_string(imm, self.gctx.max_data_size) if s is not None: s = s.replace("\n", "\\n") self._add(" ") self._string('"' + s + '"') return if label_printed: return if op_size == 1: if imm == 10: self._string("'\\n'") else: self._string("'%s'" % get_char(imm)) elif hexa: if is_from_jump: self._error(hex(imm)) else: self._add(hex(imm)) else: if op_size == 4: self._add(str(c_int(imm).value)) elif op_size == 2: self._add(str(c_short(imm).value)) else: self._add(str(c_long(imm).value)) if imm > 0: if op_size == 4: packed = struct.pack("<L", imm) elif op_size == 8: packed = struct.pack("<Q", imm) else: return if set(packed).issubset(BYTES_PRINTABLE_SET): self._string(" \"" + "".join(map(chr, packed)) + "\"") return
def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): ARCH = self.load_arch_module() ARCH_OUTPUT = ARCH.output ARCH_UTILS = ARCH.utils ad = ctx.entry s = self.binary.get_section(ad) if s is None: # until is != -1 only from the visual mode # It allows to not go before the first section. if until != -1: return None # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: return None ad = s.start o = ARCH_OUTPUT.Output(ctx) o._new_line() o.curr_section = s o.mode_dump = True l = 0 api = ctx.gctx.api # For mips: after a jump we add a newline, but for mips we should # add this newline after the prefetch instruction. prefetch_after_branch = False while 1: if ad == s.start: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._section(s.name) o._add(" 0x%x -> 0x%x" % (s.start, s.end)) o._new_line() o._new_line() while ((l < lines and until == -1) or (ad < until and until != -1)) \ and ad <= s.end: ty = self.mem.get_type(ad) # A PE import should not be displayed as a subroutine if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \ and self.mem.is_code(ad): is_func = ad in self.functions if is_func: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._user_comment("; SUBROUTINE") o._new_line() o._dash() i = self.lazy_disasm(ad, s.start) if not is_func and ad in self.xrefs and \ not o.is_last_2_line_empty(): o._new_line() o._asm_inst(i) is_end = ad in self.end_functions # mips if prefetch_after_branch: prefetch_after_branch = False if not is_end: o._new_line() if is_end: for fad in self.end_functions[ad]: sy = api.get_symbol(fad) o._user_comment("; end function %s" % sy) o._new_line() o._new_line() elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i): if self.is_mips: prefetch_after_branch = True else: o._new_line() elif ARCH_UTILS.is_call(i): op = i.operands[0] if op.type == self.capstone.CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): if self.is_mips: prefetch_after_branch = True else: o._new_line() ad += i.size elif MEM_WOFFSET <= ty <= MEM_QOFFSET: prefetch_after_branch = False o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) off = s.read_int(ad, sz) if off is None: continue if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._data_prefix(sz) o._add(" ") o._imm(off, sz, True, print_data=False, force_dont_print_data=True) o._new_line() ad += sz elif ty == MEM_ASCII: prefetch_after_branch = False o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) buf = self.binary.get_string(ad, sz) if buf is not None: if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) # Split the string into multi lines splitted = buf.split("\n") j = 0 for i, st in enumerate(splitted): if i > 0 and len(st) != 0: o._new_line() o.set_line(ad + j) o._address(ad + j) ibs = 0 bs = 65 while ibs < len(st): if ibs > 0: o._new_line() o.set_line(ad + j) o._address(ad + j) blk = st[ibs:ibs + bs] if i < len(splitted) - 1 and ibs + bs >= len(st): o._string('"' + blk + '\\n"') j += len(blk) + 1 else: o._string('"' + blk + '"') j += len(blk) ibs += bs o._add(", 0") o._new_line() ad += sz elif ty == MEM_ARRAY: prefetch_after_branch = False o._label_and_address(ad) array_info = self.mem.mm[ad] total_size = array_info[0] entry_type = array_info[2] entry_size = self.mem.get_size_from_type(entry_type) n = int(total_size / entry_size) o.set_line(ad) o._data_prefix(entry_size) k = 0 while k < total_size: if o.curr_index > 70: o._new_line() o.set_line(ad) o._address(ad) o._data_prefix(entry_size) l += 1 val = s.read_int(ad, entry_size) if MEM_WOFFSET <= entry_type <= MEM_QOFFSET: o._add(" ") o._imm(val, entry_size, True, print_data=False, force_dont_print_data=True) else: o._word(val, entry_size, is_from_array=True) ad += entry_size k += entry_size if k < total_size: o._add(",") o._new_line() else: prefetch_after_branch = False o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size_from_type(ty) if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._word(s.read_int(ad, sz), sz) o._new_line() ad += sz l += 1 s = self.binary.get_section(ad) if s is None: # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: break o._new_line() ad = s.start if until != -1 and ad >= until: break if (l >= lines and until == -1) or (ad >= until and until != -1): break o.curr_section = s if until == ad: if self.mem.is_code(ad) and ad in self.xrefs or \ s is not None and ad == s.start: if not o.is_last_2_line_empty(): o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() return o
def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code): # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] regsctx = self.arch_analyzer.new_regs_context() if regsctx is None: # fatal error, but don't quit to let the user save the database return -1 if func_obj is not None: frame_size = self.ARCH_UTILS.guess_frame_size(self, entry) func_obj[FUNC_FRAME_SIZE] = frame_size else: frame_size = -1 sp_after_push = 0 last_call = None has_ret = False stack = [(regsctx, entry)] while stack: (regsctx, ad) = stack.pop() inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue if self.gctx.debugsp: ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx) inner_code[ad] = inst ##### RETURN ##### if self.is_ret(inst): self.__add_prefetch(inner_code, inst) has_ret = True ##### UNCONDITIONAL JUMP ##### elif self.is_uncond_jump(inst): self.__add_prefetch(inner_code, inst) op = inst.operands[-1] jmp_ad = None if op.type == self.ARCH_UTILS.OP_IMM: jmp_ad = unsigned(op.value.imm) else: is_jmptable = inst.address in self.jmptables # Create a jumptable if necessary if not is_jmptable: if op.type == self.ARCH_UTILS.OP_REG: jmp_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg) if jmp_ad is None: is_jmptable = self.auto_jump_table(inst, inner_code) elif op.type == self.ARCH_UTILS.OP_MEM: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) is_jmptable = self.auto_jump_table(inst, inner_code) if is_jmptable: table = self.jmptables[inst.address].table for n in table: r = self.arch_analyzer.clone_regs_context(regsctx) stack.append((r, n)) self.api.add_xrefs_table(ad, table) if add_if_code: added_xrefs.append((ad, table)) continue self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # TODO: assume it has a return if jmp_ad is None: has_ret = True continue self.api.add_xref(ad, jmp_ad) if self.db.mem.is_func(jmp_ad): has_ret = not self.is_noreturn(jmp_ad, entry) else: stack.append((regsctx, jmp_ad)) if add_if_code: added_xrefs.append((ad, jmp_ad)) ##### CONDITIONAL JUMP ##### elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.ARCH_UTILS.OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.api.add_xref(ad, nxt_jmp) if self.db.mem.is_func(direct_nxt): has_ret = not self.is_noreturn(direct_nxt, entry) else: stack.append((regsctx, direct_nxt)) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if self.db.mem.is_func(nxt_jmp): has_ret = not self.is_noreturn(nxt_jmp, entry) else: newctx = self.arch_analyzer.clone_regs_context(regsctx) stack.append((newctx, nxt_jmp)) else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # TODO : jump tables for conditional jumps ? ##### CALL ##### elif self.is_call(inst): op = inst.operands[-1] call_ad = None if op.type == self.ARCH_UTILS.OP_IMM: call_ad = unsigned(op.value.imm) elif op.type == self.ARCH_UTILS.OP_REG: # FIXME : for MIPS, addresses are loaded in t9 (generally) # then jalr t9 is executed. The problem here is that we # will analyze twice the function. The first time is done # by the function analyze_imm. call_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg) else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) if call_ad is not None: last_call = call_ad self.api.add_xref(ad, call_ad) if add_if_code: added_xrefs.append((ad, call_ad)) self.analyze_flow( call_ad, entry_is_func=True, force=False, add_if_code=add_if_code) # Reset the stack pointer to frame_size to handle stdcall. if frame_size != -1: sp_after_push = self.arch_analyzer.get_sp(regsctx) if frame_size != - sp_after_push: self.arch_analyzer.set_sp(regsctx, -frame_size) if self.db.mem.is_func(call_ad): if self.is_noreturn(call_ad, entry): self.__add_prefetch(inner_code, inst) continue nxt = inst.address + inst.size stack.append((regsctx, nxt)) ##### OTHERS ##### else: if frame_size != -1: sp_before = self.arch_analyzer.get_sp(regsctx) self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # Restore the stack pointer to sp_after_push to handle cdecl. if frame_size != -1: curr_sp = self.arch_analyzer.get_sp(regsctx) if curr_sp != sp_before and \ self.handle_cdecl(frame_size, sp_after_push, curr_sp): new_sp = sp_after_push - sp_before - curr_sp self.arch_analyzer.set_sp(regsctx, new_sp) if last_call is not None and self.db.mem.is_func(last_call): self.functions[last_call][FUNC_FLAGS] |= FUNC_FLAG_CDECL if self.gctx.debugsp: ALL_SP[ad] = sp_after_push sp_after_push = 0 nxt = inst.address + inst.size if nxt not in self.functions: stack.append((regsctx, nxt)) # Remove all xrefs, this is not a correct flow if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: if isinstance(to_ad, list): self.api.rm_xrefs_table(from_ad, to_ad) else: self.api.rm_xref(from_ad, to_ad) return -1 # Set function flags flags = self.import_flags(entry) if flags == 0 and not has_ret: flags = FUNC_FLAG_NORETURN return flags
def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): ARCH = self.load_arch_module() ARCH_OUTPUT = ARCH.output ARCH_UTILS = ARCH.utils ad = ctx.entry s = self.binary.get_section(ad) if s is None: # until is != -1 only from the visual mode # It allows to not go before the first section. if until != -1: return None # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: return None ad = s.start o = ARCH_OUTPUT.Output(ctx) o._new_line() o.section_prefix = True o.curr_section = s o.mode_dump = True l = 0 api = ctx.gctx.api # For mips: after a jump we add a newline, but for mips we should # add this newline after the prefetch instruction. prefetch_after_branch = False while 1: if ad == s.start: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._section(s.name) o._add(" 0x%x -> 0x%x" % (s.start, s.end)) o._new_line() o._new_line() while ((l < lines and until == -1) or (ad < until and until != -1)) \ and ad <= s.end: ty = self.mem.get_type(ad) # A PE import should not be displayed as a subroutine if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \ and self.mem.is_code(ad): is_func = ad in self.functions if is_func: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._user_comment("; SUBROUTINE") o._new_line() o._dash() i = self.lazy_disasm(ad, s.start) if not is_func and ad in self.xrefs and \ not o.is_last_2_line_empty(): o._new_line() o._asm_inst(i) is_end = ad in self.end_functions # mips if prefetch_after_branch: prefetch_after_branch = False if not is_end: o._new_line() if is_end: for fad in self.end_functions[ad]: sy = api.get_symbol(fad) o._user_comment("; end function %s" % sy) o._new_line() o._new_line() elif ARCH_UTILS.is_uncond_jump(i) or ARCH_UTILS.is_ret(i): if self.is_mips: prefetch_after_branch = True else: o._new_line() elif ARCH_UTILS.is_call(i): op = i.operands[0] if op.type == self.capstone.CS_OP_IMM: imm = unsigned(op.value.imm) if imm in self.functions and self.is_noreturn(imm): if self.is_mips: prefetch_after_branch = True else: o._new_line() ad += i.size elif ty == MEM_OFFSET: o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) off = s.read_int(ad, sz) if off is None: continue if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._data_prefix(sz) o._add(" ") o._imm(off, sz, True, print_data=False, force_dont_print_data=True) o._new_line() ad += sz elif ty == MEM_ASCII: o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size(ad) buf = self.binary.get_string(ad, sz) if buf is not None: if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._string('"' + buf + '"') o._add(", 0") o._new_line() ad += sz else: o._label_and_address(ad) o.set_line(ad) sz = self.mem.get_size_from_type(ty) if ctx.gctx.print_bytes: o._bytes(s.read(ad, sz)) o._word(s.read_int(ad, sz), sz) o._new_line() ad += sz l += 1 s = self.binary.get_section(ad) if s is None: # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: break o._new_line() ad = s.start if until != -1 and ad >= until: break if (l >= lines and until == -1) or (ad >= until and until != -1): break o.curr_section = s if until == ad: if self.mem.is_code(ad) and ad in self.xrefs or ad == s.start: if not o.is_last_2_line_empty(): o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() return o
def _getIMMString(self, imm, op_size, hexa, section=None, print_data=True, force_dont_print_data=False): hexa = True imm = unsigned(imm) label_printed = "LL" #self._label(imm, print_colon=False) res = "" if label_printed: ty = self.ctx.gctx.dis.mem.get_type(imm) # ty == -1 : from the terminal (with -x) there are no xrefs if # the file was loaded without a database. if imm in self.ctx.gctx.dis.xrefs and ty != MEM_UNK and \ ty != MEM_ASCII or ty == -1: return TextOp(str(imm)) if ty == MEM_ASCII: print_data = True force_dont_print_data = False if section is None: section = self.ctx.gctx.dis.binary.get_section(imm) if section is not None and section.start == 0: section = None # For a raw file, if the raw base is 0 the immediate is considered # as an address only if it's in the symbols list. raw_base_zero = self.ctx.gctx.dis.binary.type == T_BIN_RAW and self.gctx.raw_base == 0 if section is not None and not raw_base_zero: if not label_printed: res += "A1" #self._address(imm, print_colon=False, notprefix=True) if not force_dont_print_data and print_data: s = self.ctx.gctx.dis.binary.get_string(imm, self.ctx.gctx.max_data_size) if s is not None: res += " " res += '"' + s + '"' return StrOp(s) return TextOp(res) if label_printed: return TextOp(res) if op_size == 1: self._string("'%s'" % get_char(imm)) elif hexa: self._add(hex(imm)) else: if op_size == 4: self._add(str(c_int(imm).value)) elif op_size == 2: self._add(str(c_short(imm).value)) else: self._add(str(c_long(imm).value)) if imm > 0: if op_size == 4: packed = struct.pack("<L", imm) elif op_size == 8: packed = struct.pack("<Q", imm) else: return TextOp(res) if set(packed).issubset(BYTES_PRINTABLE_SET): self._string(" \"" + "".join(map(chr, packed)) + "\"") return TextOp(res)
def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code): if self.dis.binary.get_section(entry) is None: return -1 has_ret = False # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] regsctx = self.arch_analyzer.new_regs_context() if regsctx is None: # fatal error, but don't quit to let the user save the database return 0 # FIXME : this is a hack for the cdecl calling convention # if the stack pointer move after a call, this is probably a cdecl # call, so we will ignore the add instruction. one_call_called = False stack = [(regsctx, entry)] while stack: (regsctx, ad) = stack.pop() inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue inner_code[ad] = inst self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, one_call_called) if self.is_ret(inst): self.__add_prefetch(inner_code, inst) has_ret = True elif self.is_uncond_jump(inst): self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.ARCH_UTILS.OP_IMM: nxt = unsigned(op.value.imm) self.api.add_xref(ad, nxt) if self.db.mem.is_func(nxt): has_ret = not self.is_noreturn(nxt, entry) else: stack.append((regsctx, nxt)) if add_if_code: added_xrefs.append((ad, nxt)) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table # TODO : dupplicate regsctx ?? for n in table: stack.append((regsctx, n)) self.api.add_xref(ad, table) if add_if_code: added_xrefs.append((ad, table)) else: # TODO # This is a register or a memory access # we can't say if the function really returns has_ret = True elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.ARCH_UTILS.OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.api.add_xref(ad, nxt_jmp) stack.append((regsctx, direct_nxt)) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if self.db.mem.is_func(nxt_jmp): has_ret = not self.is_noreturn(nxt_jmp, entry) else: newctx = self.arch_analyzer.clone_regs_context(regsctx) stack.append((newctx, nxt_jmp)) elif self.is_call(inst): one_call_called = True op = inst.operands[-1] value = None if op.type == self.ARCH_UTILS.OP_IMM: value = unsigned(op.value.imm) elif op.type == self.ARCH_UTILS.OP_REG: # FIXME : for MIPS, addresses are loaded in t9 (generally) # then jalr t9 is executed. The problem here is that we # will analyze twice the function. The first time is done # by the function analyze_imm. value = self.arch_analyzer.reg_value(regsctx, op.value.reg) if value is not None: self.api.add_xref(ad, value) if add_if_code: added_xrefs.append((ad, value)) if not self.db.mem.is_func(value): self.analyze_flow(value, True, False, add_if_code) if self.db.mem.is_func(value) and self.is_noreturn(value, entry): self.__add_prefetch(inner_code, inst) continue nxt = inst.address + inst.size stack.append((regsctx, nxt)) else: nxt = inst.address + inst.size stack.append((regsctx, nxt)) if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: self.api.rm_xrefs(from_ad, to_ad) return -1 # for ELF if entry in self.dis.binary.imports: flags = self.import_flags(entry) elif has_ret: flags = 0 else: flags = FUNC_FLAG_NORETURN return flags
def __sub_analyze_flow(self, entry, inner_code, add_if_code): if self.dis.binary.get_section(entry) is None: return -1 stack = [entry] has_ret = False # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] while stack: ad = stack.pop() inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue inner_code[ad] = inst if self.is_ret(inst): self.__add_prefetch(inner_code, inst) has_ret = True elif self.is_uncond_jump(inst): self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.CS_OP_IMM: nxt = unsigned(op.value.imm) self.api.add_xref(ad, nxt) if self.db.mem.is_func(nxt): has_ret = not self.is_noreturn(nxt, entry) else: stack.append(nxt) if add_if_code: added_xrefs.append((ad, nxt)) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table self.api.add_xref(ad, table) if add_if_code: added_xrefs.append((ad, table)) else: # TODO # This is a register or a memory access # we can't say if the function really returns has_ret = True elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(inner_code, inst) op = inst.operands[-1] if op.type == self.CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.api.add_xref(ad, nxt_jmp) stack.append(direct_nxt) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if self.db.mem.is_func(nxt_jmp): has_ret = not self.is_noreturn(nxt_jmp, entry) else: stack.append(nxt_jmp) elif self.is_call(inst): op = inst.operands[-1] if op.type == self.CS_OP_IMM: imm = unsigned(op.value.imm) self.api.add_xref(ad, imm) if add_if_code: added_xrefs.append((ad, imm)) if not self.db.mem.is_func(imm): self.analyze_flow(imm, True, False, add_if_code) if self.db.mem.is_func(imm) and self.is_noreturn(imm, entry): self.__add_prefetch(inner_code, inst) continue nxt = inst.address + inst.size stack.append(nxt) else: nxt = inst.address + inst.size stack.append(nxt) if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: self.api.rm_xrefs(from_ad, to_ad) return -1 # for ELF if entry in self.dis.binary.imports: flags = self.import_flags(entry) elif has_ret: flags = 0 else: flags = FUNC_FLAG_NORETURN return flags
def analyze_operands(self, i, func_obj): b = self.dis.binary for op in i.operands: if op.type == self.CS_OP_IMM: val = unsigned(op.value.imm) elif op.type == self.CS_OP_MEM and op.mem.disp != 0: if self.dis.is_x86: if op.mem.segment != 0: continue if op.mem.index == 0: # Compute the rip register if op.mem.base == self.X86_REG_EIP or \ op.mem.base == self.X86_REG_RIP: val = i.address + i.size + unsigned(op.mem.disp) # Check if it's a stack variable elif (op.mem.base == self.X86_REG_EBP or \ op.mem.base == self.X86_REG_RBP): if func_obj is not None: ty = self.db.mem.find_type(op.size) func_obj[FUNC_VARS][op.mem.disp] = [ty, None] # Continue the loop !! continue else: val = unsigned(op.mem.disp) else: val = unsigned(op.mem.disp) # TODO: stack variables for arm/mips elif self.dis.is_arm: if op.mem.index == 0 and op.mem.base == self.ARM_REG_PC: val = i.address + i.size * 2 + op.mem.disp else: val = op.mem.disp elif self.dis.is_mips: if op.mem.base == self.MIPS_REG_GP: if self.dis.mips_gp == -1: continue val = op.mem.disp + self.dis.mips_gp else: val = op.mem.disp else: continue s = b.get_section(val) if s is None or s.start == 0: continue self.api.add_xref(i.address, val) if not self.db.mem.exists(val): sz = op.size if self.dis.is_x86 else self.dis.wordsize deref = s.read_int(val, sz) # If (*val) is an address if deref is not None and b.is_address(deref): ty = MEM_OFFSET self.api.add_xref(val, deref) if not self.db.mem.exists(deref): self.db.mem.add(deref, 1, MEM_UNK) # Do an anlysis on this value. if deref not in self.pending and \ deref not in self.pending_not_curr and \ self.first_inst_are_code(deref): self.pending_not_curr.add(deref) self.msg.put( (deref, self.has_prolog(deref), False, True, None)) else: # Check if this is an address to a string sz = b.is_string(val) if sz != 0: ty = MEM_ASCII else: sz = op.size if self.dis.is_x86 else self.dis.wordsize if op.type == self.CS_OP_MEM: ty = self.db.mem.find_type(sz) else: ty = MEM_UNK self.db.mem.add(val, sz, ty) if ty == MEM_UNK: # Do an analysis on this value, if this is not code # nothing will be done. # jumps and calls are already analyzed in analyze_flow. if val not in self.pending and \ not (self.is_jump(i) or self.is_call(i)) and \ val not in self.pending_not_curr and \ self.first_inst_are_code(val): self.pending_not_curr.add(val) self.msg.put( (val, self.has_prolog(val), False, True, None))
def __sub_analyze_flow(self, func_obj, entry, inner_code, add_if_code): # If entry is not "code", we have to rollback added xrefs has_bad_inst = False if add_if_code: added_xrefs = [] regsctx = self.arch_analyzer.new_regs_context() if regsctx is None: # fatal error, but don't quit to let the user save the database return False flags = 0 stack_err = False args_restore = 0 if func_obj is not None: frame_size = func_obj[FUNC_FRAME_SIZE] if frame_size == -1: frame_size = self.ARCH_UTILS.guess_frame_size(self, entry) # used in arch/*/analyzer.c func_obj[FUNC_FRAME_SIZE] = frame_size else: frame_size = -1 ret_found = False stack = [(regsctx, entry)] while stack: (regsctx, ad) = stack.pop() if self.db.mem.is_data(ad): continue inst = self.disasm(ad) if inst is None: has_bad_inst = True if add_if_code: break continue if ad in inner_code: continue if self.gctx.debugsp: ALL_SP[ad] = self.arch_analyzer.get_sp(regsctx) inner_code[ad] = inst ##### RETURN ##### if self.is_ret(inst): self.__add_prefetch(regsctx, inst, func_obj, inner_code) ret_found = True if self.dis.is_x86 and len(inst.operands) == 1: args_restore = inst.operands[0].value.imm flags |= FUNC_FLAG_STDCALL if self.arch_analyzer.get_sp(regsctx) != 0: flags |= FUNC_FLAG_ERR_STACK_ANALYSIS ##### UNCONDITIONAL JUMP ##### elif self.is_uncond_jump(inst): self.__add_prefetch(regsctx, inst, func_obj, inner_code) op = inst.operands[-1] jmp_ad = None if op.type == self.ARCH_UTILS.OP_IMM: jmp_ad = unsigned(op.value.imm) else: is_jmptable = inst.address in self.jmptables # Create a jumptable if necessary if not is_jmptable: if op.type == self.ARCH_UTILS.OP_REG: jmp_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg) if jmp_ad is None: is_jmptable = self.auto_jump_table(inst, inner_code) elif op.type == self.ARCH_UTILS.OP_MEM: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) is_jmptable = self.auto_jump_table(inst, inner_code) if is_jmptable: table = self.jmptables[inst.address].table for n in table: r = self.arch_analyzer.clone_regs_context(regsctx) stack.append((r, n)) self.api.add_xrefs_table(ad, table) if add_if_code: added_xrefs.append((ad, table)) continue self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # TODO: assume there is return if jmp_ad is None: if entry in self.db.imports: ret_found |= self.db.imports[entry] & FUNC_FLAG_NORETURN else: ret_found = True continue self.api.add_xref(ad, jmp_ad) if self.db.mem.is_func(jmp_ad): ret_found |= not self.is_func_noreturn(jmp_ad, entry) fo = self.functions[jmp_ad] flags = fo[FUNC_FLAGS] frame_size = max(fo[FUNC_FRAME_SIZE], frame_size) args_restore = fo[FUNC_ARGS_RESTORE] else: stack.append((regsctx, jmp_ad)) if add_if_code: added_xrefs.append((ad, jmp_ad)) ##### CONDITIONAL JUMP ##### elif self.is_cond_jump(inst): prefetch = self.__add_prefetch(regsctx, inst, func_obj, inner_code) op = inst.operands[-1] if op.type == self.ARCH_UTILS.OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = unsigned(unsigned(op.value.imm)) self.api.add_xref(ad, nxt_jmp) if self.db.mem.is_func(direct_nxt): ret_found |= not self.is_func_noreturn(direct_nxt, entry) fo = self.functions[direct_nxt] flags = fo[FUNC_FLAGS] frame_size = max(fo[FUNC_FRAME_SIZE], frame_size) args_restore = fo[FUNC_ARGS_RESTORE] else: stack.append((regsctx, direct_nxt)) if add_if_code: added_xrefs.append((ad, nxt_jmp)) if self.db.mem.is_func(nxt_jmp): ret_found |= not self.is_func_noreturn(nxt_jmp, entry) else: newctx = self.arch_analyzer.clone_regs_context(regsctx) stack.append((newctx, nxt_jmp)) else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) # TODO : jump tables for conditional jumps ? ##### CALL ##### elif self.is_call(inst): op = inst.operands[-1] call_ad = None sp_before = self.arch_analyzer.get_sp(regsctx) if op.type == self.ARCH_UTILS.OP_IMM: call_ad = unsigned(op.value.imm) elif op.type == self.ARCH_UTILS.OP_REG: # FIXME : for MIPS, addresses are loaded in t9 (generally) # then jalr t9 is executed. The problem here is that we # will analyze twice the function. The first time is done # by the function analyze_imm. call_ad = self.arch_analyzer.reg_value(regsctx, op.value.reg) else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) if self.db.mem.is_func(op.mem.disp) and \ self.is_func_noreturn(op.mem.disp, entry): self.__add_prefetch(regsctx, inst, func_obj, inner_code) continue if call_ad is not None: self.api.add_xref(ad, call_ad) if add_if_code: added_xrefs.append((ad, call_ad)) self.analyze_flow( call_ad, entry_is_func=True, force=False, add_if_code=add_if_code) # TODO: if the address was alredy in the pending list # we don't have a computed args size # Reset the stack pointer to frame_size to handle stdcall. if frame_size != -1 and call_ad in self.functions: fo = self.functions[call_ad] if fo is not None: n = fo[FUNC_ARGS_RESTORE] if n: self.arch_analyzer.set_sp(regsctx, sp_before + n) if self.db.mem.is_func(call_ad) and \ self.is_func_noreturn(call_ad, entry): self.__add_prefetch(regsctx, inst, func_obj, inner_code) continue # It seems it doesn't matter for the prefetched instruction nxt = inst.address + inst.size stack.append((regsctx, nxt)) ##### OTHERS ##### else: self.arch_analyzer.analyze_operands( self, regsctx, inst, func_obj, False) nxt = inst.address + inst.size if nxt not in self.functions: stack.append((regsctx, nxt)) # Remove all xrefs, this is not a correct flow if add_if_code and has_bad_inst: for from_ad, to_ad in added_xrefs: if isinstance(to_ad, list): self.api.rm_xrefs_table(from_ad, to_ad) else: self.api.rm_xref(from_ad, to_ad) return False if func_obj is not None: if entry in self.db.imports: if self.db.imports[entry] & FUNC_FLAG_NORETURN: flags |= FUNC_FLAG_NORETURN elif not ret_found: flags |= FUNC_FLAG_NORETURN func_obj[FUNC_FLAGS] = flags func_obj[FUNC_FRAME_SIZE] = frame_size func_obj[FUNC_ARGS_RESTORE] = args_restore return True