def _process_sbb(self, cur_bloc, last_instruction): assignment_block = AsmBlock(self.loc_db.add_location()) cond_block = AsmBlock(self.loc_db.add_location()) reg = last_instruction.args[0] assignment_block.lines.append( create_mov_instruction(self.mode, reg, ExprInt(-1, reg.size))) branch_target = next(iter(cur_bloc.bto)).loc_key assignment_block.lines.append( create_jump_instruction(self.mode, ExprLoc(branch_target, self.mode))) branch_name = "JB" # JC is not implemented in miasm, using alias cur_bloc.lines.pop() pre_branch_block = AsmBlock(self.loc_db.add_location()) pre_branch_block.lines = [ create_mov_instruction(self.mode, reg, ExprInt(0, reg.size)) ] cond_block.lines.append( create_cond_branch_instruction( self.mode, branch_name, ExprLoc(assignment_block.loc_key, self.mode))) if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] assignment_block.bto = {AsmConstraintTo(branch_target)} cur_bloc.bto = {AsmConstraintNext(cond_block.loc_key)} cond_block.bto = { AsmConstraintNext(pre_branch_block.loc_key), AsmConstraintTo(assignment_block.loc_key) } pre_branch_block.bto = {AsmConstraintNext(branch_target)} self.add_block(assignment_block) self.add_block(cond_block) self.add_block(pre_branch_block)
def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): """ Add the native instruction @instr to the @ircfg @instr: instruction instance @ircfg: IRCFG instance @loc_key: loc_key instance of the instruction destination @gen_pc_updt: insert PC update effects between instructions """ if loc_key is None: offset = getattr(instr, "offset", None) loc_key = self.loc_db.get_or_create_offset_location(offset) block = AsmBlock(self.loc_db, loc_key) block.lines = [instr] self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt) return loc_key
def _process_jmp_table(self, cur_bloc, mn, attrib, loc_db, pool_bin, offsets_to_dis): # TODO add support for jump tables with "AND cntrl_var, range" boundary check; such jmp tables were present only # in library functions in Stantinko samples # add current block to the asmcfg to make it accessible in the ircfg edges, add_block is called anyway right # after this callback, it will notice that the block has been already added self.add_block(cur_bloc) dst_address = loc_db.get_location_offset(cur_bloc.loc_key) logger.info("Possible jump table addr: 0x%x" % dst_address) ira = get_ira(mn, attrib) ir_arch = ira(loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(self) # the previous blocks should have exactly 1 predecessor dictating range predecessors = self.predecessors(cur_bloc.loc_key) if len(predecessors) != 1: logger.info("Expected exactly one predecessor") return predecessor = ircfg.blocks[predecessors.pop()] irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # we shouldn't stumble upon crashing segm and call operators even thought implicit is required to process # initial IRDst(mentioned operators cause crashes of the engine behind implicit) since we operate only on the # 2 crucial basic blocks. The predecessor contains range of the jump table, we use it to determine constructs # of the jump table and track back base code segment address assignment to target the msvc compiler and x64 # architecture, other compilers use directly RIP related addressing to get the address. # get real predecessor asm_block = self.loc_key_to_block(predecessor.loc_key) if len(predecessor.assignblks) != len(asm_block.lines): processed = set() todo = {predecessor.loc_key} while cur_bloc.loc_key not in ircfg.successors(predecessor.loc_key): loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) predecessor = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) # get jump_table_control_variable from predecessor dg = DependencyGraph(ircfg, implicit=True, apply_simp=True, follow_mem=True, follow_call=False) jtcdg = JTCVariableDependencyGraph(predecessor.loc_key, ircfg, implicit=True, apply_simp=True, follow_mem=False, follow_call=False) dependency_result_iter = iter(jtcdg.get(irdst_block.loc_key, {ircfg.IRDst}, len(predecessor.assignblks), {predecessor.loc_key})) solution_predecessor = next(dependency_result_iter) # jump table control variable jtc_var = jtcdg.jtc_var if not jtc_var: logger.info("couldn't determine single jump table control variable") return # get symbolic execution engine to be used in both predecessor and jmp table block symb_exec_both = MySymbolicExecutionEngine(pool_bin, jtc_var, ir_arch) try: # symbolically evaluate lines influencing IRDst of the predecessor leading to jtc_var for line_nb in sorted({node.line_nb for node in solution_predecessor.relevant_nodes if node.loc_key == predecessor.loc_key}): assign_blk = predecessor.assignblks[line_nb] symb_exec_both.eval_updt_assignblk(assign_blk) except (KeyError, TypeError): logger.error( "Couldn't symbolically eval predecessor of 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) # stantinko contains illegal unreachable dereferences prior jmp tables, such as # xor eax, eax; movsx eax, byte ptr [eax] return # get symbolic execution engine supporting binary memory dereference symb_exec_minimal = MySymbolicExecutionEngine(pool_bin, ir_arch, symb_exec_both.symbols.copy()) predecessor_irdst_equation = symb_exec_both.symbols[ircfg.IRDst] # get equation whose solutions solve the indirect jump irdst_block = ircfg.blocks[cur_bloc.loc_key] if len(irdst_block.assignblks) != len(cur_bloc.lines): processed = set() todo = {irdst_block.loc_key} while not irdst_block.dst.is_mem(): symb_exec_both.eval_updt_irblock(irdst_block) loc_key = todo.pop() if loc_key in processed: continue processed.add(loc_key) irdst_block = ircfg.blocks[loc_key] todo.update(ircfg.successors(loc_key)) irdst_equation = symb_exec_both.eval_updt_irblock(irdst_block) sizes = set() # prevent mem processing via raw arrays by using var ID instead # we also want to set a maximum boundary so slices don't cause the sat solver generate a huge number of results visitor = ExprVisitorCallbackTopToBottom(lambda x: self._eliminate_jtc_var_slice_cb(x, sizes, jtc_var)) irdst_equation = visitor.visit(irdst_equation) predecessor_irdst_equation = visitor.visit(predecessor_irdst_equation) size_boundary = jtc_var.size sizes = sorted(filter(lambda x: x > 1, sizes)) if sizes: size_boundary = sizes[0] jtc_var_id = ExprId("jtc_var", jtc_var.size) irdst_equation = irdst_equation.replace_expr({jtc_var: jtc_var_id}) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr({jtc_var: jtc_var_id}) # track possible CS base address dependency, ignore control variable from predecessor eliminated_jtc_var_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(0, jtc_var_id.size)}) evaluated_ejtc_var_equation = symb_exec_both.eval_expr(eliminated_jtc_var_equation) if not evaluated_ejtc_var_equation.is_int(): # we need to determine code base dependencies = dg._follow_apply_cb(evaluated_ejtc_var_equation) expr_deps = {fexpr.element for fexpr in dependencies if fexpr.follow} dg_base = DependencyGraph(ircfg, implicit=False, apply_simp=True, follow_mem=True, follow_call=False) dependency_result_iter = iter(dg_base.get(cur_bloc.loc_key, expr_deps, len(cur_bloc.lines), {self.heads()[0]})) solution = next(dependency_result_iter) code_base_dict = {expr: solution.emul(ir_arch)[expr] for expr in expr_deps} irdst_equation = irdst_equation.replace_expr(code_base_dict) predecessor_irdst_equation = predecessor_irdst_equation.replace_expr(code_base_dict) # we need backward slice of the jump table destination dependencies to retain the other independent assignments # during cmp chain assembling dependency_result = dg.get(cur_bloc.loc_key, {ircfg.IRDst}, len(cur_bloc.lines), {cur_bloc.loc_key}) dependent_line_nbs = {} for solution in dependency_result: dependent_line_nbs.setdefault(solution.loc_key, set()).update( {dn.line_nb for dn in solution.relevant_nodes}) cur_bloc_new_lines = [] for loc_key, lines in dependent_line_nbs.items(): for line_nb, assignblk in enumerate(ircfg.blocks[loc_key].assignblks): if line_nb not in lines: symb_exec_minimal.eval_assignblk(assignblk) cur_bloc_new_lines.append(assignblk.instr) comparison_reg_id = None comparison_reg_value = None if jtc_var not in symb_exec_minimal.symbols.symbols_id: comparison_reg_id = jtc_var comparison_reg_value = jtc_var else: for symbol, comparison_reg_value in symb_exec_minimal.symbols.symbols_id.items(): if jtc_var in comparison_reg_value and (symbol.is_mem() or (symbol.is_id() and symbol.name not in ["RIP", "EIP", "zf", "nf", "pf", "of", "cf", "af", "df", ircfg.IRDst.name])): replaced_jtcv = comparison_reg_value.replace_expr({jtc_var: ExprInt(0, jtc_var.size)}) if isinstance(symb_exec_minimal.eval_expr(replaced_jtcv), ExprInt): comparison_reg_id = symbol break if not comparison_reg_id or not comparison_reg_value: logger.debug("Couldn't find any candidate for comparison register at 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key)) return from miasm.ir.translators import Translator import z3 translator = Translator.to_language("z3") solver = z3.Solver() logger.debug("predecessor_irdst_equation: %s" % str(predecessor_irdst_equation)) logger.debug(("dst_address: 0x%x" % dst_address)) logger.debug(("jump_table_control_variable: %s" % str(jtc_var))) solver.add(translator.from_expr(predecessor_irdst_equation) == dst_address) translated_jtc_var = translator.from_expr(jtc_var_id) solver.add(translated_jtc_var >= 0) solver.add(translated_jtc_var < 2 ** (size_boundary - 1) - 1) if solver.check() != z3.sat: logger.debug("Couldn't find at least one jump table control variable") return dbg_destinations = set() next_loc_key = new_block_loc_key = loc_db.add_location() logger.debug("comparison_reg_id: %s" % str(comparison_reg_id)) dst_ranges = {} counter = 0 while counter < 500: val = solver.model()[translated_jtc_var].as_long() final_irdst_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(val, jtc_var_id.size)}) final_dst = int(symb_exec_both.eval_expr(final_irdst_equation)) cmp_reg_val = comparison_reg_value.replace_expr({jtc_var: ExprInt(val, jtc_var.size)}) cmp_reg_val = int(symb_exec_minimal.eval_expr(cmp_reg_val)) dst_ranges[final_dst] = dst_ranges.get(final_dst, interval()).union([(cmp_reg_val, cmp_reg_val)]) dbg_destinations.add(final_dst) offsets_to_dis.add(final_dst) solver.add(translated_jtc_var != translator.from_expr(ExprInt(val, jtc_var_id.size))) if solver.check() != z3.sat: break counter += 1 if counter == 500: raise RuntimeError("Interrupted; there might be a broken slice") for dst, interv in dst_ranges.items(): cond_target_loc_key = loc_db.get_or_create_offset_location(dst) for lower, upper in interv: lower = ExprInt(lower, self.mode) upper = ExprInt(upper, self.mode) new_asm_block = AsmBlock(new_block_loc_key) new_block_loc_key = loc_db.add_location() if lower == upper: new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(cond_target_loc_key, self.mode), "JZ") new_asm_block.add_cst(cond_target_loc_key, "c_to") new_asm_block.add_cst(new_block_loc_key, "c_next") else: upper_check_loc_key = loc_db.add_location() # lower boundary check new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower, ExprLoc(new_block_loc_key, self.mode), "JB") new_asm_block.add_cst(new_block_loc_key, "c_to") new_asm_block.add_cst(upper_check_loc_key, "c_next") # upper boundary check upper_check_block = AsmBlock(upper_check_loc_key) upper_check_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, upper, ExprLoc(cond_target_loc_key, self.mode), "JBE") upper_check_block.add_cst(cond_target_loc_key, "c_to") upper_check_block.add_cst(new_block_loc_key, "c_next") self.add_block(upper_check_block) self.add_block(new_asm_block) # trigger last jump unconditionally new_asm_block.bto = {AsmConstraintTo(cond_target_loc_key)} new_asm_block.lines = [create_jump_instruction(self.mode, ExprLoc(cond_target_loc_key, self.mode))] cur_bloc.lines = cur_bloc_new_lines cur_bloc.add_cst(next_loc_key, "c_next") if not cur_bloc.lines: cur_bloc.lines = [create_nop(self.mode)] self.jmp_table_loc_keys.add(cur_bloc.loc_key) logger.debug("destinations: %s" % pformat([hex(i or 0) for i in dbg_destinations])) logger.debug("blocks: %d" % counter)