示例#1
0
    def _find_sources_of_taint(p, cfg):
        """
        Find sources of taint
        :return: addresses of taintable basic blocks and the registers that are used
        as inputs
        """
        # methods to discover that are sources of taint
        source_methods = ['read, recv']
        # source_methods = ['scanf']

        # LIMITATION: this part only works for linux binaries so far
        sources = {}
        bb_call = []

        # find all basic blocks containing the methods we are looking for
        plt_addrs = [(x, y) for x, y in p.loader.main_object.plt.items()
                     if any(s_m in x for s_m in source_methods)]
        # retrieve the basic block of the call
        for f_name, plt_addr in plt_addrs:
            no = cfg.model.get_any_node(plt_addr)
            if no:
                bb_call += [pred.addr for pred in no.predecessors]

        # for each basic block and predecessor discover the registers
        # providing input to the methods
        for b_block in bb_call:
            try:
                no = cfg.model.get_any_node(b_block)
                faddr = no.function_address
                if faddr not in sources:
                    sources[faddr] = []
                regs = arg_reg_names(p, get_arity(p, no.addr))

                sources[faddr].append((no.addr, tuple(regs)))

                # we go one level back
                n_f = cfg.model.get_any_node(faddr)
                preds = n_f.predecessors
                for pred in preds:
                    regs = arg_reg_names(p, get_arity(p, pred.addr))

                    if pred.function_address not in sources:
                        sources[pred.function_address] = []
                    sources[pred.function_address].append(
                        (pred.addr, tuple(regs)))
            except Exception as e:
                log.error(
                    f"BBF: Error encountered when discovering input registers: {e}"
                )

        for k in sources:
            sources[k] = list(set(sources[k]))

        return sources
示例#2
0
    def _save_info_preamble(self, current_path, core_taint):
        """
        Save address of function calls that have tainted parameters.
        This is useful to find wrappers to role functions.

        :param current_path: angr current path
        :param core_taint:  core taint engine
        :return:  None
        """

        try:
            no = self._cfg.get_any_node(current_path.active[0].addr)
            if not no or no.function_address != no.addr:
                return

            if len(no.predecessors) == 1:
                # check if the current address is just a normal basic block
                # that angr mistaken for a function because it contains a function preamble
                pred = no.predecessors[0]
                if self._p.factory.block(pred.addr).vex.jumpkind != 'Ijk_Call':
                    return

            if no.predecessors:
                arity = get_arity(self._p, no.predecessors[0].addr)
                for narg in xrange(arity):
                    dst_reg = ordered_argument_regs[self._p.arch.name][narg]
                    dst_addr = getattr(current_path.active[0].regs, self._p.arch.register_names[dst_reg])
                    if core_taint.is_or_points_to_tainted_data(dst_addr, current_path):
                        self._tainted_calls.append(no.addr)
                        break
        except TimeOutException:
            raise
        except:
            return
示例#3
0
def find_memcpy_like(p, cfg=None):
    """
    Finds all the memcpy-like functions in a given binary (Linux and binary blob)

    :param p: angr project
    :param cfg: angr cfg
    :return: memcpy-like functions
    """

    memcpy_like = [
        f.addr for f in p.kb.functions.values() if 'memcpy' in f.name
    ]
    if cfg is None:
        return memcpy_like

    for fun in cfg.functions.values():
        css = []

        try:
            no = cfg.model.get_any_node(fun.addr)
            css = [pred for pred in no.predecessors]
        except:
            pass

        if not css:
            continue

        cs = css[0]
        nargs = get_arity(p, cs.addr)
        if nargs > 3 or nargs < 2:
            continue

        for loop in [x for x in networkx.simple_cycles(fun.graph)]:
            # CMPNE or CMPEQ
            if any([
                    op for l in loop
                    for op in p.factory.block(l.addr).vex.operations
                    if 'cmpeq' in op.lower() or 'cmpne' in op.lower()
            ]):
                # INCREMENT
                wr_tmp = [
                    st for l in loop
                    for st in p.factory.block(l.addr).vex.statements
                    if st.tag == 'Ist_WrTmp'
                ]
                cons = [
                    w.constants for w in wr_tmp if hasattr(w, 'data')
                    and hasattr(w.data, 'op') and w.data.op == 'Iop_Add64'
                ]
                if cons:
                    cons = [c.value for cs in cons for c in cs]
                # using BootStomp thresholds
                if 1 in cons and len([x for x in fun.blocks]) <= 8:
                    memcpy_like.append(fun.addr)

    return list(set(memcpy_like))
    def backward_tainter(self, function_addr):
        """
        Implements the backward taint core functionality

        :param function_addr: function address to start the analysis
        :return: None
        """

        min_lvl = MAX_DEPTH_BACKWARD

        to_analyze = [(function_addr, self._bb_sinks, 0)]
        p = self._p
        cfg = self._cfg
        self._backward_analysis_completed = False

        # ITERATE HERE!
        while to_analyze:
            self._sink_bound_to_recv = False
            self._sink_dep_args = False

            faddr, self._bb_sinks, curr_lvl = to_analyze[0]
            if min_lvl >= curr_lvl:
                min_lvl = curr_lvl
            if curr_lvl >= MAX_DEPTH_BACKWARD:
                continue

            to_analyze = to_analyze[1:]

            white_calls = self._has_interesting_calls_backward(faddr)

            # run the taint analysis with the parameters
            self.run_coretaint(p, white_calls, faddr, self._backward_taint)

            if self._sink_bound_to_recv:
                return True

            elif not self._taint_locs and self._sink_dep_args:
                # consider the callers
                no = cfg.model.get_any_node(faddr)
                if not no:
                    continue

                functions = {}
                for pred in no.predecessors:
                    if pred.function_address not in functions:
                        functions[pred.function_address] = []
                    curr_sink = (pred.addr, tuple(arg_reg_names(p, get_arity(p, pred.addr))))
                    functions[pred.function_address].append(curr_sink)

                for faddr, finfo in functions.items():
                    to_analyze.append((faddr, finfo, curr_lvl + 1))

        if min_lvl < MAX_DEPTH_BACKWARD:
            self._backward_analysis_completed = False

        return False
    def _forward_taint(self, current_path, *_, **__):
        """
        Implements the forward taint logic

        :param current_path: angr current path
        :return:
        """

        try:
            p = self._p
            addr = current_path.active[0].addr
            bl = p.factory.block(addr)
            cfg = self._cfg

            source = [x for x in self._bb_sources if x[0] == addr]

            if source and addr not in self._taint_applied_sources:
                self._taint_applied_sources.append(addr)
                self.apply_ret_taint = True
                source = source[0]
                regs = source[1]
                for reg in regs:
                    t_addr = getattr(current_path.active[0].regs, reg)
                    size = min(self._ct.estimate_mem_buf_size(current_path.active[0], t_addr),
                               self._ct.taint_buf_size)
                    self._ct.apply_taint(current_path, t_addr, 'initial_taint', bit_size=size)

            # check sink
            if bl.vex.jumpkind == 'Ijk_Call' and self._ct.taint_applied:
                try:
                    next_path = current_path.copy(deep=True).step()
                    no = cfg.model.get_any_node(addr)
                    succ = no.successors
                    succ = succ[0]

                    if (succ.name and any([x in succ.name for x in CMP_SUCCS])) or \
                            any([x in p.loader.find_symbol(succ.addr).name for x in CMP_SUCCS]):
                        nargs = get_arity(p, no.addr)
                        for i in range(nargs):
                            reg_name = arg_reg_name(p, i)
                            if self._ct.is_or_points_to_tainted_data(getattr(next_path.active[0].regs,
                                                                             reg_name), next_path):
                                self._sink_bound_to_recv = True
                                self._ct.stop_run()
                except:
                    pass
        except:
            pass
示例#6
0
    def _find_taint_callers(self, current_path, *_, **__):
        """
        Finds tainted callers

        :param current_path:
        :return: None
        """

        active = current_path.active[0]
        p = self._current_p
        if p.factory.block(active.addr).vex.jumpkind == 'Ijk_Call':
            next_path = current_path.copy(deep=True).step()
            nargs = get_arity(p, active.addr)
            for a in range(nargs):
                var = getattr(next_path.active[0].regs, arg_reg_name(p, a))
                if self._core_taint.is_or_points_to_tainted_data(var, next_path):
                    self._tainted_callsites.append((active.addr, arg_reg_name(p, a)))
示例#7
0
    def _check_key_usage(self, current_path, *_, **__):
        """
        Runs every cpfs on the current path to check whether the role of the binary can be inferred with
        the current info

        :param current_path: current path given by the taint analysis
        :return: None
        """

        # retrieve and save the values of arguments of the function where we start the taint
        # analyis
        if not self._f_arg_vals and self._set_f_vals:
            self._set_f_vals = False
            arity = max(get_arity(self._current_p, self._current_f_addr),
                        DEF_ROLE_ARITY)
            for narg in xrange(arity):
                dst_reg = ordered_argument_regs[
                    self._current_p.arch.name][narg]
                dst_cnt = getattr(current_path.active[0].regs,
                                  self._current_p.arch.register_names[dst_reg])
                self._f_arg_vals.append(dst_cnt)

        current_bin = self._current_bin
        for pl in self._cpfs[current_bin]:
            log.debug("Entering cpf %s" % pl.name)
            try:
                found, role = pl.run(self._current_data_key,
                                     self._current_key_addr,
                                     self._current_par_name, self._core_taint,
                                     current_path, self._f_arg_vals)

                if found:
                    log.debug("Using cpf %s" % pl.name)

                    self._current_role = role
                    self._cpf_used = pl
                    if pl not in (semantic.Semantic,
                                  setter_getter.SetterGetter):
                        self._core_taint.stop_run()
                        break
            except:
                pass
    def _backward_taint(self, current_path, *_, **__):
        """
        Implements the backward taint logic

        :param current_path: angr current path
        :return: None
        """

        try:
            p = self._p
            addr = current_path.active[0].addr
            bl = p.factory.block(addr)
            cfg = self._cfg

            if not self._ct.taint_applied:
                if self._taint_locs:
                    for mem_addr in self._taint_locs:
                        size = min(self._ct.estimate_mem_buf_size(current_path.active[0], mem_addr),
                                   self._ct.taint_buf_size)
                        self._ct.apply_taint(current_path, mem_addr, 'intial_taint', bit_size=size)
                else:
                    no = cfg.model.get_any_node(current_path.active[0].addr)
                    if not no:
                        return

                    preds = no.predecessors
                    if not preds:
                        return

                    pred = preds[0]
                    nargs = get_arity(p, pred.addr)
                    for i in range(nargs):
                        reg_name = arg_reg_name(p, i)
                        t_addr = getattr(current_path.active[0].regs, reg_name)
                        size = min(self._ct.estimate_mem_buf_size(current_path.active[0], t_addr),
                                   self._ct.taint_buf_size)
                        self._ct.apply_taint(current_path, t_addr, 'initial_taint', bit_size=size)

            # check sink
            if bl.vex.jumpkind == 'Ijk_Call':
                try:
                    no = self._cfg.model.get_any_node(addr)
                    succ = no.successors
                    succ = succ[0]

                    if (succ.name and ('recv' in succ.name or 'read' in succ.name)) or \
                            'recv' in p.loader.find_symbol(succ.addr).name:
                        self._found_recv = True
                except:
                    pass

            next_path = current_path.copy(deep=True).step()
            sink = [x for x in self._bb_sinks if x[0] == addr]

            if sink:
                for curr_sink in sink:
                    for reg_name in curr_sink[1]:
                        m_addr = getattr(next_path.active[0].regs, reg_name)
                        if self._ct.is_or_points_to_tainted_data(m_addr, next_path):
                            self._sink_dep_args = True
                            if self._found_recv:
                                self._sink_bound_to_recv = True
                                self._ct.stop_run()
                                break
        except:
            pass
示例#9
0
    def _glbl_data_key_setter(self, current_path, data_key, key_addr, core_taint, reg_name, par_vals):
        """
        Check whether a data key is copied to global structure. This strategy is usually used to binaries that
        talks to themselves.

        :param current_path: angr current path
        :param data_key: data key value
        :param key_addr: data key address
        :param core_taint: core taint engine
        :param reg_name: parameter name
        :param par_vals: function argument values
        :return: None
        """

        p = self._p
        cfg = self._cfg
        plt = p.loader.main_bin.reverse_plt

        globl = False
        tainted = False
        arg_copied = False
        try:
            next_path = current_path.copy(copy_states=True).step()
            current_addr = current_path.active[0].addr
            bl = p.factory.block(current_addr)

            if bl.vex.jumpkind != 'Ijk_Call':
                return False, Role.UNKNOWN

            no = cfg.get_any_node(current_addr)
            succ = no.successors[0]

            if succ.addr not in plt:
                return False, Role.UNKNOWN

            if not succ.name:
                succ = succ.successors[0]

            if succ.name in CPY_FUNS:
                caller_block_addr = current_addr
                arity = get_arity(p, caller_block_addr)
                for narg in xrange(arity):

                    dst_reg = ordered_argument_regs[p.arch.name][narg]
                    dst_reg_cnt = getattr(next_path.active[0].regs, self._p.arch.register_names[dst_reg])
                    cnt_buff = current_path.active[0].memory.load(dst_reg_cnt)

                    if core_taint.is_or_points_to_tainted_data(dst_reg_cnt, next_path):
                        tainted = True
                    elif dst_reg_cnt.concrete and any([sec.min_addr <= dst_reg_cnt.args[0] <= sec.max_addr
                                                       for sec in p.loader.main_bin.sections
                                                       if sec.name in ('.bss', '.data')]):
                        globl = True
                    elif any([str(cnt_buff) == str(current_path.active[0].memory.load(val)) for val in par_vals]):
                        arg_copied = True

                if arg_copied and tainted and globl:
                    current_function = no.function_address
                    addrs = [x for x in current_path.active[0].history.bbl_addrs]

                    try:
                        pos_call = len(addrs) - 1 - addrs[::-1].index(current_function)
                    except TimeOutException:
                        raise
                    except:
                        pos_call = 0

                    assert pos_call > 0, 'semantic.run: unable to find the calling block'

                    caller_block = addrs[pos_call - 1]
                    cno = self._cfg.get_any_node(caller_block)
                    assert cno, 'semantic.run: could not get a node :('
                    function_x_ref = cno.function_address

                    par_id = ordered_argument_regs[p.arch.name].index(p.arch.registers[reg_name][0])

                    info = {
                        RoleInfo.ROLE: Role.SETTER,
                        RoleInfo.DATAKEY: data_key,
                        RoleInfo.X_REF_FUN: function_x_ref,
                        RoleInfo.CALLER_BB: caller_block,
                        RoleInfo.ROLE_FUN: current_function,
                        RoleInfo.ROLE_INS: cno.addr,
                        RoleInfo.ROLE_INS_IDX: len(bl.vex.statements),
                        RoleInfo.COMM_BUFF: None,
                        RoleInfo.PAR_N: par_id,
                        RoleInfo.CPF: self._name

                    }
                    if key_addr not in self._role_info:
                        self._role_info[key_addr] = []

                    if info not in self._role_info[key_addr]:
                        self._role_info[key_addr].append(info)
                    return True,  Role.SETTER

        except TimeOutException:
            raise
        except Exception as e:
            self._log.debug("Semantic cpf. Error: " + str(e))

        return False, Role.UNKNOWN
示例#10
0
    def _indirect_access_search(self, current_path, data_key, key_addr, core_taint, reg_name):
        """
        Checks whether tainted data is used indirectly (through a loop and memory comparison) to retrieve, or set, data
        into a structure.

        :param current_path: angr current path
        :param data_key: data key value
        :param key_addr: data key address
        :param core_taint: core taint engine
        :param reg_name: register name
        :return:
        """
        current_addr = current_path.active[0].addr

        next_path = current_path.copy(copy_states=True)

        next_path.step()
        try:
            next_state = next_path.active[0]
        except TimeOutException:
            raise
        except Exception:
            return False, Role.UNKNOWN

        if self._p.factory.block(current_addr).vex.jumpkind == 'Ijk_Call':
            # there must be a loop, a strcmp-like function and the data_key has to be used as key
            history_bbs = [x for x in current_path.active[0].history.bbl_addrs]
            no = self._cfg.get_any_node(current_addr)

            if no and no.successors:

                for succ in no.successors:
                    if current_addr in history_bbs and self._is_memcmp_succ(succ):

                        # we are calling a strcmp-like function within a loop.
                        if not are_parameters_in_registers(self._p):
                            raise Exception("implement me")

                        dst_reg = ordered_argument_regs[self._p.arch.name][1]
                        dst_addr = getattr(next_state.regs, self._p.arch.register_names[dst_reg])
                        dst_cnt = current_path.active[0].memory.load(dst_addr)
                        if core_taint.is_tainted(dst_cnt) or (dst_addr.concrete and dst_addr.args[0] == dst_addr):
                            # yup! they are looking for some data indexed by the key. Understand if for setting
                            # or getting
                            current_function = no.function_address
                            try:
                                pos_call = len(history_bbs) - 1 - history_bbs[::-1].index(current_function)
                            except TimeOutException:
                                raise
                            except:
                                pos_call = 0

                            assert pos_call > 0, 'semantic.run: unable to find the calling block'

                            caller_block = history_bbs[pos_call - 1]

                            # Heuristic: if the function's arity is greater than two, we assume
                            # that the third paramenter is the content to store in the shared buffer, making
                            # the function itsels a setter.
                            # FIXME: (limitation) improve this heuristic. One should perform a def-use analysis to
                            # see whether the base pointer used as first argument in the strcmp is used to return a
                            # value, or to set a value
                            nargs = get_arity(self._p, caller_block)
                            if nargs > 2:
                                candidate_role = Role.SETTER
                            else:
                                candidate_role = Role.GETTER

                            no = self._cfg.get_any_node(caller_block)
                            assert no, 'semantic.run: could not get a node :('

                            function_x_ref = no.function_address
                            par_id = ordered_argument_regs[self._p.arch.name].index(self._p.arch.registers[reg_name][0])
                            block = self._p.factory.block(no.addr)
                            is_wrapped, wrapp_addr = self._is_wrapped(history_bbs, current_path)
                            if is_wrapped:
                                last_index = len(history_bbs) - 1 - history_bbs[::-1].index(wrapp_addr)
                                if last_index > 0:
                                    current_function = wrapp_addr
                                    caller_block = history_bbs[last_index - 1]
                                    cno = self._cfg.get_any_node(caller_block)
                                    function_x_ref = None
                                    if cno:
                                        function_x_ref = cno.function_address

                            info = {
                                RoleInfo.ROLE: candidate_role,
                                RoleInfo.DATAKEY: data_key,
                                RoleInfo.X_REF_FUN: function_x_ref,
                                RoleInfo.CALLER_BB: caller_block,
                                RoleInfo.ROLE_FUN: current_function,
                                RoleInfo.ROLE_INS: no.addr,
                                RoleInfo.ROLE_INS_IDX: len(block.vex.statements),
                                RoleInfo.COMM_BUFF: None,
                                RoleInfo.PAR_N: par_id,
                                RoleInfo.CPF: self._name

                            }
                            if key_addr not in self._role_info:
                                self._role_info[key_addr] = []

                            if info not in self._role_info[key_addr]:
                                self._role_info[key_addr].append(info)
                            return True, candidate_role

        return False, Role.UNKNOWN
示例#11
0
    def _check_sink(self, current_path, guards_info, *_, **__):
        """
        Checks whether the taint propagation analysis lead to a sink, and performs the necessary actions
        :param current_path: angr current path
        :param guards_info:  guards (ITE) information
        :return: None
        """

        try:
            current_state = current_path.active[0]
            current_addr = current_state.addr
            cfg = self._current_cfg

            self._visited_bb += 1

            next_path = current_path.copy(copy_states=True).step()
            info = self._current_role_info
            # check constant comparisons and untaint if necessary
            bounded, var = self._is_any_taint_var_bounded(guards_info)
            if bounded:
                self._ct.do_recursive_untaint(var, current_path)

            # If the taint is not applied yet, apply it
            if not self._ct.taint_applied and current_addr == info[
                    RoleInfo.CALLER_BB]:
                next_state = next_path.active[0]
                self._apply_taint(current_addr,
                                  current_path,
                                  next_state,
                                  taint_key=True)

            try:
                if len(next_path.active) and self._config['eg_souce_addr']:
                    if next_path.active[0].addr == int(
                            self._config['eg_souce_addr'], 16):
                        next_state = next_path.active[0]
                        self._apply_taint(current_addr,
                                          current_path,
                                          next_state,
                                          taint_key=True)
            except TimeOutException as to:
                raise to
            except:
                pass

            if self._is_sink_and_tainted(current_path):
                delta_t = time.time() - self._analysis_starting_time
                self._raised_alert = True
                name_bin = self._ct.p.loader.main_object.binary
                self._report_alert_fun(
                    'sink',
                    name_bin,
                    current_path,
                    current_addr,
                    self._current_role_info[RoleInfo.DATAKEY],
                    pl_name=self._current_cpf_name,
                    report_time=delta_t)

            # tainted call address and tainted parameters
            bl = self._current_p.factory.block(current_addr)
            if not len(next_path.active) and len(
                    next_path.unconstrained) and bl.vex.jumpkind == 'Ijk_Call':
                cap = bl.capstone.insns[-1]
                vb = bl.vex
                reg_jump = cap.insn.op_str
                val_jump_reg = getattr(next_path.unconstrained[0].regs,
                                       reg_jump)
                if not hasattr(vb.next, 'tmp'):
                    return
                val_jump_tmp = next_path.unconstrained[0].scratch.temps[
                    vb.next.tmp]

                if not self.is_tainted_by_us(
                        val_jump_reg) and not self.is_tainted_by_us(
                            val_jump_tmp):
                    if self._ct.is_or_points_to_tainted_data(
                            val_jump_reg, next_path, unconstrained=True):
                        nargs = get_arity(self._current_p,
                                          current_path.active[0].addr)
                        for ord_reg in ordered_argument_regs[
                                self._current_p.arch.name][:nargs]:
                            reg_name = self._current_p.arch.register_names[
                                ord_reg]
                            if reg_name == reg_jump:
                                continue

                            reg_val = getattr(next_path.unconstrained[0].regs,
                                              reg_name)
                            if self._ct.is_or_points_to_tainted_data(
                                    reg_val, next_path, unconstrained=True
                            ) and self.is_address(reg_val):
                                delta_t = time.time(
                                ) - self._analysis_starting_time
                                self._raised_alert = True
                                name_bin = self._ct.p.loader.main_object.binary
                                self._report_alert_fun(
                                    'sink',
                                    name_bin,
                                    current_path,
                                    current_addr,
                                    self._current_role_info[RoleInfo.DATAKEY],
                                    pl_name=self._current_cpf_name,
                                    report_time=delta_t)

                        next_state = next_path.unconstrained[0]
                        hash_val = self.bv_to_hash(val_jump_tmp)
                        self._taint_names_applied.append(hash_val)
                        hash_val = self.bv_to_hash(val_jump_reg)
                        self._taint_names_applied.append(hash_val)
                        self._apply_taint(current_addr, current_path,
                                          next_state)

            # eventually if we are in a loop guarded by a tainted variable
            next_active = next_path.active
            if len(next_active) > 1:
                history_addrs = [t for t in current_state.history.bbl_addrs]
                seen_addr = [
                    a.addr for a in next_active if a.addr in history_addrs
                ]

                if len(seen_addr) == 0:
                    return

                back_jumps = [a for a in seen_addr if a < current_addr]
                if len(back_jumps) == 0:
                    return

                bj = back_jumps[0]
                node_s = cfg.get_any_node(bj)
                node_f = cfg.get_any_node(current_addr)

                if not node_s or not node_f:
                    return

                fun_s = node_s.function_address
                fun_f = node_f.function_address

                if fun_s != fun_f:
                    return

                idx_s = history_addrs.index(bj)
                for a in history_addrs[idx_s:]:
                    n = cfg.get_any_node(a)
                    if not n:
                        continue

                    if n.function_address != fun_s:
                        return

                # if we have a back-jump satisfiying all the conditions
                cond_guard = [g for g in next_active[0].guards][-1]

                if hasattr(cond_guard, 'args') and len(cond_guard.args) == 2 and \
                   self._ct.taint_buf in str(cond_guard.args[0]) and \
                   self._ct.taint_buf in str(cond_guard.args[1]):
                    delta_t = time.time() - self._analysis_starting_time
                    self._raised_alert = True
                    name_bin = self._ct.p.loader.main_object.binary
                    self._report_alert_fun('loop',
                                           name_bin,
                                           current_path,
                                           current_addr,
                                           cond_guard,
                                           pl_name=self._current_cpf_name,
                                           report_time=delta_t)
        except TimeOutException as to:
            raise to
        except Exception as e:
            log.error("Something went terribly wrong: %s" % str(e))