示例#1
0
 def _process_sbb(self, cur_bloc, last_instruction):
     assignment_block = AsmBlock(self.loc_db.add_location())
     cond_block = AsmBlock(self.loc_db.add_location())
     reg = last_instruction.args[0]
     assignment_block.lines.append(
         create_mov_instruction(self.mode, reg, ExprInt(-1, reg.size)))
     branch_target = next(iter(cur_bloc.bto)).loc_key
     assignment_block.lines.append(
         create_jump_instruction(self.mode, ExprLoc(branch_target,
                                                    self.mode)))
     branch_name = "JB"  # JC is not implemented in miasm, using alias
     cur_bloc.lines.pop()
     pre_branch_block = AsmBlock(self.loc_db.add_location())
     pre_branch_block.lines = [
         create_mov_instruction(self.mode, reg, ExprInt(0, reg.size))
     ]
     cond_block.lines.append(
         create_cond_branch_instruction(
             self.mode, branch_name,
             ExprLoc(assignment_block.loc_key, self.mode)))
     if not cur_bloc.lines:
         cur_bloc.lines = [create_nop(self.mode)]
     assignment_block.bto = {AsmConstraintTo(branch_target)}
     cur_bloc.bto = {AsmConstraintNext(cond_block.loc_key)}
     cond_block.bto = {
         AsmConstraintNext(pre_branch_block.loc_key),
         AsmConstraintTo(assignment_block.loc_key)
     }
     pre_branch_block.bto = {AsmConstraintNext(branch_target)}
     self.add_block(assignment_block)
     self.add_block(cond_block)
     self.add_block(pre_branch_block)
示例#2
0
    def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False):
        """
        Add the native instruction @instr to the @ircfg
        @instr: instruction instance
        @ircfg: IRCFG instance
        @loc_key: loc_key instance of the instruction destination
        @gen_pc_updt: insert PC update effects between instructions
        """

        if loc_key is None:
            offset = getattr(instr, "offset", None)
            loc_key = self.loc_db.get_or_create_offset_location(offset)
        block = AsmBlock(self.loc_db, loc_key)
        block.lines = [instr]
        self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt)
        return loc_key
示例#3
0
    def _process_jmp_table(self, cur_bloc, mn, attrib, loc_db, pool_bin, offsets_to_dis):
        # TODO add support for jump tables with "AND cntrl_var, range" boundary check; such jmp tables were present only
        #   in library functions in Stantinko samples
        # add current block to the asmcfg to make it accessible in the ircfg edges, add_block is called anyway right
        # after this callback, it will notice that the block has been already added
        self.add_block(cur_bloc)
        dst_address = loc_db.get_location_offset(cur_bloc.loc_key)

        logger.info("Possible jump table addr: 0x%x" % dst_address)

        ira = get_ira(mn, attrib)

        ir_arch = ira(loc_db)

        ircfg = ir_arch.new_ircfg_from_asmcfg(self)

        # the previous blocks should have exactly 1 predecessor dictating range
        predecessors = self.predecessors(cur_bloc.loc_key)
        if len(predecessors) != 1:
            logger.info("Expected exactly one predecessor")
            return
        predecessor = ircfg.blocks[predecessors.pop()]

        irdst_block = ircfg.blocks[cur_bloc.loc_key]
        if len(irdst_block.assignblks) != len(cur_bloc.lines):
            processed = set()
            todo = {irdst_block.loc_key}
            while not irdst_block.dst.is_mem():
                loc_key = todo.pop()
                if loc_key in processed:
                    continue
                processed.add(loc_key)
                irdst_block = ircfg.blocks[loc_key]
                todo.update(ircfg.successors(loc_key))

        # we shouldn't stumble upon crashing segm and call operators even thought implicit is required to process
        # initial IRDst(mentioned operators cause crashes of the engine behind implicit) since we operate only on the
        # 2 crucial basic blocks. The predecessor contains range of the jump table, we use it to determine constructs
        # of the jump table and track back base code segment address assignment to target the msvc compiler and x64
        # architecture, other compilers use directly RIP related addressing to get the address.

        # get real predecessor
        asm_block = self.loc_key_to_block(predecessor.loc_key)
        if len(predecessor.assignblks) != len(asm_block.lines):
            processed = set()
            todo = {predecessor.loc_key}
            while cur_bloc.loc_key not in ircfg.successors(predecessor.loc_key):
                loc_key = todo.pop()
                if loc_key in processed:
                    continue
                processed.add(loc_key)
                predecessor = ircfg.blocks[loc_key]
                todo.update(ircfg.successors(loc_key))

        # get jump_table_control_variable from predecessor
        dg = DependencyGraph(ircfg, implicit=True, apply_simp=True, follow_mem=True, follow_call=False)
        jtcdg = JTCVariableDependencyGraph(predecessor.loc_key,
                                           ircfg, implicit=True, apply_simp=True, follow_mem=False, follow_call=False)

        dependency_result_iter = iter(jtcdg.get(irdst_block.loc_key, {ircfg.IRDst}, len(predecessor.assignblks),
                                                {predecessor.loc_key}))
        solution_predecessor = next(dependency_result_iter)
        # jump table control variable
        jtc_var = jtcdg.jtc_var
        if not jtc_var:
            logger.info("couldn't determine single jump table control variable")
            return
        # get symbolic execution engine to be used in both predecessor and jmp table block
        symb_exec_both = MySymbolicExecutionEngine(pool_bin, jtc_var, ir_arch)
        try:
            # symbolically evaluate lines influencing IRDst of the predecessor leading to jtc_var
            for line_nb in sorted({node.line_nb for node in solution_predecessor.relevant_nodes
                                   if node.loc_key == predecessor.loc_key}):
                assign_blk = predecessor.assignblks[line_nb]
                symb_exec_both.eval_updt_assignblk(assign_blk)
        except (KeyError, TypeError):
            logger.error(
                "Couldn't symbolically eval predecessor of 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key))
            # stantinko contains illegal unreachable dereferences prior jmp tables, such as
            # xor     eax, eax; movsx   eax, byte ptr [eax]
            return
        # get symbolic execution engine supporting binary memory dereference
        symb_exec_minimal = MySymbolicExecutionEngine(pool_bin, ir_arch, symb_exec_both.symbols.copy())
        predecessor_irdst_equation = symb_exec_both.symbols[ircfg.IRDst]

        # get equation whose solutions solve the indirect jump
        irdst_block = ircfg.blocks[cur_bloc.loc_key]
        if len(irdst_block.assignblks) != len(cur_bloc.lines):
            processed = set()
            todo = {irdst_block.loc_key}
            while not irdst_block.dst.is_mem():
                symb_exec_both.eval_updt_irblock(irdst_block)
                loc_key = todo.pop()
                if loc_key in processed:
                    continue
                processed.add(loc_key)
                irdst_block = ircfg.blocks[loc_key]
                todo.update(ircfg.successors(loc_key))

        irdst_equation = symb_exec_both.eval_updt_irblock(irdst_block)
        sizes = set()
        # prevent mem processing via raw arrays by using var ID instead
        # we also want to set a maximum boundary so slices don't cause the sat solver generate a huge number of results
        visitor = ExprVisitorCallbackTopToBottom(lambda x: self._eliminate_jtc_var_slice_cb(x, sizes, jtc_var))
        irdst_equation = visitor.visit(irdst_equation)
        predecessor_irdst_equation = visitor.visit(predecessor_irdst_equation)
        size_boundary = jtc_var.size
        sizes = sorted(filter(lambda x: x > 1, sizes))
        if sizes:
            size_boundary = sizes[0]
        jtc_var_id = ExprId("jtc_var", jtc_var.size)
        irdst_equation = irdst_equation.replace_expr({jtc_var: jtc_var_id})
        predecessor_irdst_equation = predecessor_irdst_equation.replace_expr({jtc_var: jtc_var_id})
        # track possible CS base address dependency, ignore control variable from predecessor
        eliminated_jtc_var_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(0, jtc_var_id.size)})
        evaluated_ejtc_var_equation = symb_exec_both.eval_expr(eliminated_jtc_var_equation)
        if not evaluated_ejtc_var_equation.is_int():
            # we need to determine code base
            dependencies = dg._follow_apply_cb(evaluated_ejtc_var_equation)
            expr_deps = {fexpr.element for fexpr in dependencies if fexpr.follow}
            dg_base = DependencyGraph(ircfg, implicit=False, apply_simp=True, follow_mem=True, follow_call=False)
            dependency_result_iter = iter(dg_base.get(cur_bloc.loc_key, expr_deps, len(cur_bloc.lines),
                                                      {self.heads()[0]}))
            solution = next(dependency_result_iter)
            code_base_dict = {expr: solution.emul(ir_arch)[expr] for expr in expr_deps}
            irdst_equation = irdst_equation.replace_expr(code_base_dict)
            predecessor_irdst_equation = predecessor_irdst_equation.replace_expr(code_base_dict)

        # we need backward slice of the jump table destination dependencies to retain the other independent assignments
        # during cmp chain assembling
        dependency_result = dg.get(cur_bloc.loc_key, {ircfg.IRDst}, len(cur_bloc.lines), {cur_bloc.loc_key})
        dependent_line_nbs = {}
        for solution in dependency_result:
            dependent_line_nbs.setdefault(solution.loc_key, set()).update(
                {dn.line_nb for dn in solution.relevant_nodes})
        cur_bloc_new_lines = []
        for loc_key, lines in dependent_line_nbs.items():
            for line_nb, assignblk in enumerate(ircfg.blocks[loc_key].assignblks):
                if line_nb not in lines:
                    symb_exec_minimal.eval_assignblk(assignblk)
                    cur_bloc_new_lines.append(assignblk.instr)
        comparison_reg_id = None
        comparison_reg_value = None
        if jtc_var not in symb_exec_minimal.symbols.symbols_id:
            comparison_reg_id = jtc_var
            comparison_reg_value = jtc_var
        else:
            for symbol, comparison_reg_value in symb_exec_minimal.symbols.symbols_id.items():
                if jtc_var in comparison_reg_value and (symbol.is_mem() or
                                                        (symbol.is_id() and symbol.name not in
                                                         ["RIP", "EIP", "zf", "nf", "pf", "of", "cf", "af", "df",
                                                          ircfg.IRDst.name])):
                    replaced_jtcv = comparison_reg_value.replace_expr({jtc_var: ExprInt(0, jtc_var.size)})
                    if isinstance(symb_exec_minimal.eval_expr(replaced_jtcv), ExprInt):
                        comparison_reg_id = symbol
                        break
        if not comparison_reg_id or not comparison_reg_value:
            logger.debug("Couldn't find any candidate for comparison register at 0x%x" %
                         loc_db.get_location_offset(cur_bloc.loc_key))
            return

        from miasm.ir.translators import Translator
        import z3
        translator = Translator.to_language("z3")
        solver = z3.Solver()

        logger.debug("predecessor_irdst_equation: %s" % str(predecessor_irdst_equation))
        logger.debug(("dst_address: 0x%x" % dst_address))
        logger.debug(("jump_table_control_variable: %s" % str(jtc_var)))
        solver.add(translator.from_expr(predecessor_irdst_equation) == dst_address)
        translated_jtc_var = translator.from_expr(jtc_var_id)
        solver.add(translated_jtc_var >= 0)
        solver.add(translated_jtc_var < 2 ** (size_boundary - 1) - 1)

        if solver.check() != z3.sat:
            logger.debug("Couldn't find at least one jump table control variable")
            return

        dbg_destinations = set()
        next_loc_key = new_block_loc_key = loc_db.add_location()

        logger.debug("comparison_reg_id: %s" % str(comparison_reg_id))
        dst_ranges = {}
        counter = 0
        while counter < 500:
            val = solver.model()[translated_jtc_var].as_long()
            final_irdst_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(val, jtc_var_id.size)})
            final_dst = int(symb_exec_both.eval_expr(final_irdst_equation))
            cmp_reg_val = comparison_reg_value.replace_expr({jtc_var: ExprInt(val, jtc_var.size)})
            cmp_reg_val = int(symb_exec_minimal.eval_expr(cmp_reg_val))

            dst_ranges[final_dst] = dst_ranges.get(final_dst, interval()).union([(cmp_reg_val, cmp_reg_val)])
            dbg_destinations.add(final_dst)
            offsets_to_dis.add(final_dst)

            solver.add(translated_jtc_var != translator.from_expr(ExprInt(val, jtc_var_id.size)))
            if solver.check() != z3.sat:
                break
            counter += 1

        if counter == 500:
            raise RuntimeError("Interrupted; there might be a broken slice")

        for dst, interv in dst_ranges.items():
            cond_target_loc_key = loc_db.get_or_create_offset_location(dst)
            for lower, upper in interv:
                lower = ExprInt(lower, self.mode)
                upper = ExprInt(upper, self.mode)
                new_asm_block = AsmBlock(new_block_loc_key)
                new_block_loc_key = loc_db.add_location()
                if lower == upper:
                    new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower,
                                                                    ExprLoc(cond_target_loc_key, self.mode), "JZ")
                    new_asm_block.add_cst(cond_target_loc_key, "c_to")
                    new_asm_block.add_cst(new_block_loc_key, "c_next")
                else:
                    upper_check_loc_key = loc_db.add_location()
                    # lower boundary check
                    new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower,
                                                                    ExprLoc(new_block_loc_key, self.mode), "JB")
                    new_asm_block.add_cst(new_block_loc_key, "c_to")
                    new_asm_block.add_cst(upper_check_loc_key, "c_next")
                    # upper boundary check
                    upper_check_block = AsmBlock(upper_check_loc_key)
                    upper_check_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, upper,
                                                                        ExprLoc(cond_target_loc_key, self.mode), "JBE")
                    upper_check_block.add_cst(cond_target_loc_key, "c_to")
                    upper_check_block.add_cst(new_block_loc_key, "c_next")
                    self.add_block(upper_check_block)
                self.add_block(new_asm_block)
        # trigger last jump unconditionally
        new_asm_block.bto = {AsmConstraintTo(cond_target_loc_key)}
        new_asm_block.lines = [create_jump_instruction(self.mode, ExprLoc(cond_target_loc_key, self.mode))]

        cur_bloc.lines = cur_bloc_new_lines
        cur_bloc.add_cst(next_loc_key, "c_next")
        if not cur_bloc.lines:
            cur_bloc.lines = [create_nop(self.mode)]
        self.jmp_table_loc_keys.add(cur_bloc.loc_key)
        logger.debug("destinations: %s" % pformat([hex(i or 0) for i in dbg_destinations]))
        logger.debug("blocks: %d" % counter)