def _process_cmov(self, cur_bloc, last_instruction): assignment_block = AsmBlock(self.loc_db.add_location()) cond_block = AsmBlock(self.loc_db.add_location()) dst = last_instruction.args[0] src = last_instruction.args[1] assignment_block.lines.append(create_mov_instruction(self.mode, dst, src)) branch_target = next(iter(cur_bloc.bto)).loc_key assignment_block.lines.append(create_jump_instruction(self.mode, ExprLoc(branch_target, self.mode))) branch_name = "J" + last_instruction.name[len("CMOV"):] cur_bloc.lines.pop() if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] cond_block.lines.append(create_cond_branch_instruction(self.mode, branch_name, ExprLoc(assignment_block.loc_key, self.mode))) assignment_block.bto = {AsmConstraintTo(branch_target)} cond_block.bto = {AsmConstraintNext(branch_target), AsmConstraintTo(assignment_block.loc_key)} cur_bloc.bto = {AsmConstraintNext(cond_block.loc_key)} self.add_block(assignment_block) self.add_block(cond_block)
def _insert_flat_block(self, source_flat_block, symb_exec, flat_block_to_loc_key): """ Copies source_flat_block and sets its successors according to flat_block_to_loc_key :param flat_block_to_loc_key: dictionary mapping flat_blocks to respective loc_keys :param symb_exec: instance of current symbolic execution engine :param source_flat_block: flat_block to be inserted :return: dictionary mapping old successor loc_keys to the new ones """ # we're not using redirect_successors after copying to avoid executing the same loops multiple times source_block = self.asmcfg.loc_key_to_block( source_flat_block.block_loc_key) tobe_processed = {} new_flat_blocks = set() new_block_loc_key = flat_block_to_loc_key[source_flat_block] if self.out_asmcfg.loc_key_to_block(new_block_loc_key) is not None: raise Exception("Target loc_key is already associated to a block") new_block = AsmBlock(new_block_loc_key) # copy instructions for ln in source_block.lines: tmp_ln = instruction_x86(ln.name, ln.mode, [i.copy() for i in ln.args], ln.additional_info) tmp_ln.b = ln.b tmp_ln.l = ln.l tmp_ln.offset = ln.offset new_block.addline(tmp_ln) constraints = source_block.bto # try to simplify the destination if it's a primary flattening block if not self.flat_loops[source_block.loc_key].is_default: logger.debug("current block is a part of primary loc_keys") simplified_target = symb_exec.eval_expr(self.ircfg.IRDst) if isinstance(simplified_target, ExprInt): simplified_target = self.asmcfg.loc_db.get_offset_location( int(simplified_target)) elif isinstance(simplified_target, ExprLoc): simplified_target = simplified_target.loc_key else: # there's probably a(n) (series of) unknown instruction(s) causing an implicit conditional assignment # such as CMOV or SBB->AND->ADD, prepend comparison + cond jump if it happens to be common, or add it to # ExtendedAsmCFG.extended_discovery and split flow on the final instruction # it's also possible that it's not related to any cff loop at all addr = self.asmcfg.loc_db.get_location_offset( source_flat_block.block_loc_key) addr = hex(addr) if addr else addr logger.warning( "Couldn't simplify loc_key %s at %s, continuing" % (str(source_flat_block.block_loc_key), addr)) logger.warning("the simplified target is %s of instance %s" % (simplified_target, type(simplified_target))) simplified_target = None if simplified_target: constraints = {AsmConstraintTo(simplified_target)} mode = self.asmcfg.mode # remove redundant comparison dp = DependencyGraph(self.ircfg, True) block_loc_key = source_block.loc_key res = next( dp.get(block_loc_key, {self.ircfg.IRDst}, None, {block_loc_key})) for depnode in res.relevant_nodes: ind = depnode.line_nb ind -= (len(self.ircfg.blocks[block_loc_key]) - len(new_block.lines)) if new_block.lines[ind].name == "CMP": new_block.lines.pop(ind) new_block.lines[-1] = create_jump_instruction( mode, ExprLoc(simplified_target, mode)) # copy constraints new_bto = set() for constraint in constraints: if not self.asmcfg.loc_key_to_block(constraint.loc_key): logger.debug("Skipping bad constraint %s" % constraint.loc_key) continue flat_block = self.flat_loops.get_block(constraint.loc_key, symb_exec, source_flat_block) if flat_block not in flat_block_to_loc_key: new_flat_blocks.add(flat_block) new_loc_key = self.out_asmcfg.loc_db.add_location() tobe_processed[constraint.loc_key] = (new_loc_key, flat_block) flat_block_to_loc_key[flat_block] = new_loc_key else: new_loc_key = flat_block_to_loc_key[flat_block] new_bto.add(AsmConstraint(new_loc_key, constraint.c_t)) new_block.bto = new_bto new_block.alignment = source_block.alignment # change jmp targets if new_block.lines: for ind, arg in enumerate(list(new_block.lines[-1].args)): if isinstance(arg, ExprLoc): if not self.asmcfg.loc_key_to_block(arg.loc_key): logger.debug("Skipping bad constraint %s" % arg.loc_key) continue new_target, flat_block = tobe_processed.get( arg.loc_key, (None, None)) if not new_target: flat_block = self.flat_loops.get_block( arg.loc_key, symb_exec, source_flat_block) new_target = flat_block_to_loc_key.get(flat_block) # None in case of irrelevant calls logger.debug("new target: %s" % new_target) if new_target: new_block.lines[-1].args[ind] = ExprLoc( new_target, arg.size) self.out_asmcfg.add_block(new_block) return new_flat_blocks
def _process_jmp_table(self, cur_bloc, mn, attrib, loc_db, pool_bin, offsets_to_dis): # TODO add support for jump tables with "AND cntrl_var, range" boundary check; such jmp tables were present only # in library functions in Stantinko samples # add current block to the asmcfg to make it accessible in the ircfg edges, add_block is called anyway right # after this callback, it will notice that the block has been already added self.add_block(cur_bloc) dst_address = loc_db.get_location_offset(cur_bloc.loc_key) logger.info("Possible jump table addr: 0x%x" % dst_address) ira = get_ira(mn, attrib) ir_arch = ira(loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(self) # the previous blocks should have exactly 1 predecessor dictating range predecessors = self.predecessors(cur_bloc.loc_key) if len(predecessors) != 1: logger.info("Expected exactly one predecessor") return predecessor = ircfg.blocks[predecessors.pop()] irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # we shouldn't stumble upon crashing segm and call operators even thought implicit is required to process # initial IRDst(mentioned operators cause crashes of the engine behind implicit) since we operate only on the # 2 crucial basic blocks. The predecessor contains range of the jump table, we use it to determine constructs # of the jump table and track back base code segment address assignment to target the msvc compiler and x64 # architecture, other compilers use directly RIP related addressing to get the address. # get real predecessor asm_block = self.loc_key_to_block(predecessor.loc_key) if len(predecessor.assignblks) != len(asm_block.lines): processed = set() todo = {predecessor.loc_key} while cur_bloc.loc_key not in ircfg.successors(predecessor.loc_key): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) predecessor = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # get jump_table_control_variable from predecessor dg = DependencyGraph(ircfg, implicit=True, apply_simp=True, follow_mem=True, follow_call=False) jtcdg = JTCVariableDependencyGraph(predecessor.loc_key, ircfg, implicit=True, apply_simp=True, follow_mem=False, follow_call=False) dependency_result_iter = iter(jtcdg.get(irdst_block.loc_key, {ircfg.IRDst}, len(predecessor.assignblks), {predecessor.loc_key})) solution_predecessor = next(dependency_result_iter) # jump table control variable jtc_var = jtcdg.jtc_var if not jtc_var: logger.info("couldn't determine single jump table control variable") return # get symbolic execution engine to be used in both predecessor and jmp table block symb_exec_both = MySymbolicExecutionEngine(pool_bin, jtc_var, ir_arch) try: # symbolically evaluate lines influencing IRDst of the predecessor leading to jtc_var for line_nb in sorted({node.line_nb for node in solution_predecessor.relevant_nodes if node.loc_key == predecessor.loc_key}): assign_blk = predecessor.assignblks[line_nb] symb_exec_both.eval_updt_assignblk(assign_blk) except (KeyError, TypeError): logger.error( "Couldn't symbolically eval predecessor of 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) # stantinko contains illegal unreachable dereferences prior jmp tables, such as # xor eax, eax; movsx eax, byte ptr [eax] return # get symbolic execution engine supporting binary memory dereference symb_exec_minimal = MySymbolicExecutionEngine(pool_bin, ir_arch, symb_exec_both.symbols.copy()) predecessor_irdst_equation = symb_exec_both.symbols[ircfg.IRDst] # get equation whose solutions solve the indirect jump irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): symb_exec_both.eval_updt_irblock(irdst_block) loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) irdst_equation = symb_exec_both.eval_updt_irblock(irdst_block) sizes = set() # prevent mem processing via raw arrays by using var ID instead # we also want to set a maximum boundary so slices don't cause the sat solver generate a huge number of results visitor = ExprVisitorCallbackTopToBottom(lambda x: self._eliminate_jtc_var_slice_cb(x, sizes, jtc_var)) irdst_equation = visitor.visit(irdst_equation) predecessor_irdst_equation = visitor.visit(predecessor_irdst_equation) size_boundary = jtc_var.size sizes = sorted(filter(lambda x: x > 1, sizes)) if sizes: size_boundary = sizes[0] jtc_var_id = ExprId("jtc_var", jtc_var.size) irdst_equation = irdst_equation.replace_expr({jtc_var: jtc_var_id}) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr({jtc_var: jtc_var_id}) # track possible CS base address dependency, ignore control variable from predecessor eliminated_jtc_var_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(0, jtc_var_id.size)}) evaluated_ejtc_var_equation = symb_exec_both.eval_expr(eliminated_jtc_var_equation) if not evaluated_ejtc_var_equation.is_int(): # we need to determine code base dependencies = dg._follow_apply_cb(evaluated_ejtc_var_equation) expr_deps = {fexpr.element for fexpr in dependencies if fexpr.follow} dg_base = DependencyGraph(ircfg, implicit=False, apply_simp=True, follow_mem=True, follow_call=False) dependency_result_iter = iter(dg_base.get(cur_bloc.loc_key, expr_deps, len(cur_bloc.lines), {self.heads()[0]})) solution = next(dependency_result_iter) code_base_dict = {expr: solution.emul(ir_arch)[expr] for expr in expr_deps} irdst_equation = irdst_equation.replace_expr(code_base_dict) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr(code_base_dict) # we need backward slice of the jump table destination dependencies to retain the other independent assignments # during cmp chain assembling dependency_result = dg.get(cur_bloc.loc_key, {ircfg.IRDst}, len(cur_bloc.lines), {cur_bloc.loc_key}) dependent_line_nbs = {} for solution in dependency_result: dependent_line_nbs.setdefault(solution.loc_key, set()).update( {dn.line_nb for dn in solution.relevant_nodes}) cur_bloc_new_lines = [] for loc_key, lines in dependent_line_nbs.items(): for line_nb, assignblk in enumerate(ircfg.blocks[loc_key].assignblks): if line_nb not in lines: symb_exec_minimal.eval_assignblk(assignblk) cur_bloc_new_lines.append(assignblk.instr) comparison_reg_id = None comparison_reg_value = None if jtc_var not in symb_exec_minimal.symbols.symbols_id: comparison_reg_id = jtc_var comparison_reg_value = jtc_var else: for symbol, comparison_reg_value in symb_exec_minimal.symbols.symbols_id.items(): if jtc_var in comparison_reg_value and (symbol.is_mem() or (symbol.is_id() and symbol.name not in ["RIP", "EIP", "zf", "nf", "pf", "of", "cf", "af", "df", ircfg.IRDst.name])): replaced_jtcv = comparison_reg_value.replace_expr({jtc_var: ExprInt(0, jtc_var.size)}) if isinstance(symb_exec_minimal.eval_expr(replaced_jtcv), ExprInt): comparison_reg_id = symbol break if not comparison_reg_id or not comparison_reg_value: logger.debug("Couldn't find any candidate for comparison register at 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) return from miasm.ir.translators import Translator import z3 translator = Translator.to_language("z3") solver = z3.Solver() logger.debug("predecessor_irdst_equation: %s" % str(predecessor_irdst_equation)) logger.debug(("dst_address: 0x%x" % dst_address)) logger.debug(("jump_table_control_variable: %s" % str(jtc_var))) solver.add(translator.from_expr(predecessor_irdst_equation) == dst_address) translated_jtc_var = translator.from_expr(jtc_var_id) solver.add(translated_jtc_var >= 0) solver.add(translated_jtc_var < 2 ** (size_boundary - 1) - 1) if solver.check() != z3.sat: logger.debug("Couldn't find at least one jump table control variable") return dbg_destinations = set() next_loc_key = new_block_loc_key = loc_db.add_location() logger.debug("comparison_reg_id: %s" % str(comparison_reg_id)) dst_ranges = {} counter = 0 while counter < 500: val = solver.model()[translated_jtc_var].as_long() final_irdst_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(val, jtc_var_id.size)}) final_dst = int(symb_exec_both.eval_expr(final_irdst_equation)) cmp_reg_val = comparison_reg_value.replace_expr({jtc_var: ExprInt(val, jtc_var.size)}) cmp_reg_val = int(symb_exec_minimal.eval_expr(cmp_reg_val)) dst_ranges[final_dst] = dst_ranges.get(final_dst, interval()).union([(cmp_reg_val, cmp_reg_val)]) dbg_destinations.add(final_dst) offsets_to_dis.add(final_dst) solver.add(translated_jtc_var != translator.from_expr(ExprInt(val, jtc_var_id.size))) if solver.check() != z3.sat: break counter += 1 if counter == 500: raise RuntimeError("Interrupted; there might be a broken slice") for dst, interv in dst_ranges.items(): cond_target_loc_key = loc_db.get_or_create_offset_location(dst) for lower, upper in interv: lower = ExprInt(lower, self.mode) upper = ExprInt(upper, self.mode) new_asm_block = AsmBlock(new_block_loc_key) new_block_loc_key = loc_db.add_location() if lower == upper: new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(cond_target_loc_key, self.mode), "JZ") new_asm_block.add_cst(cond_target_loc_key, "c_to") new_asm_block.add_cst(new_block_loc_key, "c_next") else: upper_check_loc_key = loc_db.add_location() # lower boundary check new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(new_block_loc_key, self.mode), "JB") new_asm_block.add_cst(new_block_loc_key, "c_to") new_asm_block.add_cst(upper_check_loc_key, "c_next") # upper boundary check upper_check_block = AsmBlock(upper_check_loc_key) upper_check_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, upper, ExprLoc(cond_target_loc_key, self.mode), "JBE") upper_check_block.add_cst(cond_target_loc_key, "c_to") upper_check_block.add_cst(new_block_loc_key, "c_next") self.add_block(upper_check_block) self.add_block(new_asm_block) # trigger last jump unconditionally new_asm_block.bto = {AsmConstraintTo(cond_target_loc_key)} new_asm_block.lines = [create_jump_instruction(self.mode, ExprLoc(cond_target_loc_key, self.mode))] cur_bloc.lines = cur_bloc_new_lines cur_bloc.add_cst(next_loc_key, "c_next") if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] self.jmp_table_loc_keys.add(cur_bloc.loc_key) logger.debug("destinations: %s" % pformat([hex(i or 0) for i in dbg_destinations])) logger.debug("blocks: %d" % counter)
dg.enable_passes([remove_useless_blocks]) asmcfg = dg(asmcfg) ### Only two asmcfg should remain assert len(asmcfg) == 2 assert first_block.loc_key in asmcfg.nodes() assert last_block_loc_key in asmcfg.nodes() ## Graph the final output open("graph2.dot", "w").write(asmcfg.dot()) # Test helper methods ## loc_key_to_block should always be updated assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block testlabel = mdis.loc_db.get_or_create_name_location("testlabel") my_block = AsmBlock(testlabel) asmcfg.add_block(my_block) assert len(asmcfg) == 3 assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block assert asmcfg.loc_key_to_block(my_block.loc_key) == my_block ## Bad asmcfg assert len(list(asmcfg.get_bad_blocks())) == 0 assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0 ### Add a bad block, not linked testlabel_bad = mdis.loc_db.get_or_create_name_location("testlabel_bad") my_bad_block = AsmBlockBad(testlabel_bad) asmcfg.add_block(my_bad_block) assert list(asmcfg.get_bad_blocks()) == [my_bad_block] assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0 ### Link the bad block and update edges