示例#1
0
class VariableManagerInternal:
    """
    Manage variables for a function. It is meant to be used internally by VariableManager.
    """
    def __init__(self, manager, func_addr=None):
        self.manager = manager

        self.func_addr = func_addr

        self._variables = OrderedSet()  # all variables that are added to any region
        self._global_region = KeyedRegion()
        self._stack_region = KeyedRegion()
        self._register_region = KeyedRegion()
        self._live_variables = { }  # a mapping between addresses of program points and live variable collections

        self._variable_accesses = defaultdict(set)
        self._insn_to_variable = defaultdict(set)
        self._block_to_variable = defaultdict(set)
        self._stmt_to_variable = defaultdict(set)
        self._atom_to_variable = defaultdict(_defaultdict_set)
        self._variable_counters = {
            'register': count(),
            'stack': count(),
            'argument': count(),
            'phi': count(),
            'global': count(),
        }

        self._unified_variables: Set[SimVariable] = set()
        self._variables_to_unified_variables: Dict[SimVariable, SimVariable] = { }

        self._phi_variables = { }
        self._phi_variables_by_block = defaultdict(set)

        self.types = { }

    #
    # Public methods
    #

    def next_variable_ident(self, sort):
        if sort not in self._variable_counters:
            raise ValueError('Unsupported variable sort %s' % sort)

        if sort == 'register':
            prefix = "r"
        elif sort == 'stack':
            prefix = "s"
        elif sort == 'argument':
            prefix = 'arg'
        elif sort == 'global':
            prefix = 'g'
        else:
            prefix = "m"

        ident = "i%s_%d" % (prefix, next(self._variable_counters[sort]))
        return ident

    def add_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.add_variable(start, variable)
        elif sort == 'register':
            self._register_region.add_variable(start, variable)
        elif sort == 'global':
            self._global_region.add_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def set_variable(self, sort, start, variable: SimVariable):
        if sort == 'stack':
            region = self._stack_region
        elif sort == 'register':
            region = self._register_region
        elif sort == 'global':
            region = self._global_region
        else:
            raise ValueError('Unsupported sort %s in set_variable().' % sort)
        existing = [x for x in region.get_variables_by_offset(start) if x.ident == variable.ident]
        if len(existing) == 1:
            var = existing[0]
            if var.renamed:
                variable.name = var.name
                variable.renamed = True
        else:
            # implicitly overwrite or add I guess
            pass
        region.set_variable(start, variable)

    def write_to(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('write', variable, offset, location, overwrite=overwrite, atom=atom)

    def read_from(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('read', variable, offset, location, overwrite=overwrite, atom=atom)

    def reference_at(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('reference', variable, offset, location, overwrite=overwrite, atom=atom)

    def _record_variable_access(self, sort, variable, offset, location, overwrite=False, atom=None):
        self._variables.add(variable)
        var_and_offset = variable, offset
        if overwrite:
            self._variable_accesses[variable] = {VariableAccess(variable, sort, location)}
            self._insn_to_variable[location.ins_addr] = {var_and_offset}
            self._block_to_variable[location.block_addr] = {var_and_offset}
            self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {var_and_offset}
            if atom is not None:
                self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom] = var_and_offset
        else:
            self._variable_accesses[variable].add(VariableAccess(variable, sort, location))
            self._insn_to_variable[location.ins_addr].add(var_and_offset)
            self._block_to_variable[location.block_addr].add(var_and_offset)
            self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add(var_and_offset)
            if atom is not None:
                self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom].add(var_and_offset)

    def make_phi_node(self, block_addr, *variables):
        """
        Create a phi variable for variables at block `block_addr`.

        :param int block_addr:  The address of the current block.
        :param variables:       Variables that the phi variable represents.
        :return:                The created phi variable.
        """

        existing_phis = set()
        non_phis = set()
        for var in variables:
            if self.is_phi_variable(var):
                existing_phis.add(var)
            else:
                non_phis.add(var)
        if len(existing_phis) == 1:
            existing_phi = next(iter(existing_phis))
            if non_phis.issubset(self.get_phi_subvariables(existing_phi)):
                return existing_phi
            else:
                # Update phi variables
                self._phi_variables[existing_phi] |= non_phis
                return existing_phi

        repre = next(iter(variables))
        repre_type = type(repre)
        if repre_type is SimRegisterVariable:
            ident_sort = 'register'
            a = SimRegisterVariable(repre.reg, repre.size, ident=self.next_variable_ident(ident_sort))
        elif repre_type is SimMemoryVariable:
            ident_sort = 'global'
            a = SimMemoryVariable(repre.addr, repre.size, ident=self.next_variable_ident(ident_sort))
        elif repre_type is SimStackVariable:
            ident_sort = 'stack'
            a = SimStackVariable(repre.offset, repre.size, ident=self.next_variable_ident(ident_sort))
        else:
            raise TypeError('make_phi_node(): Unsupported variable type "%s".' % type(repre))

        # Keep a record of all phi variables
        self._phi_variables[a] = set(variables)
        self._phi_variables_by_block[block_addr].add(a)

        return a

    def set_live_variables(self, addr, register_region, stack_region):
        lv = LiveVariables(register_region, stack_region)
        self._live_variables[addr] = lv

    def find_variables_by_insn(self, ins_addr, sort):
        if ins_addr not in self._insn_to_variable:
            return None

        if sort in (VariableType.MEMORY, 'memory'):
            vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr]
                        if isinstance(var, (SimStackVariable, SimMemoryVariable))]
        elif sort in (VariableType.REGISTER, 'register'):
            vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr]
                        if isinstance(var, SimRegisterVariable)]
        else:
            l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort)
            return [ ]

        return vars_and_offset

    def find_variable_by_stmt(self, block_addr, stmt_idx, sort):
        return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None)

    def find_variables_by_stmt(self, block_addr: int, stmt_idx: int, sort: str) -> List[Tuple[SimVariable,int]]:

        key = block_addr, stmt_idx

        if key not in self._stmt_to_variable:
            return [ ]

        variables = self._stmt_to_variable[key]
        if not variables:
            return [ ]

        if sort == 'memory':
            var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key]
                                   if isinstance(var, (SimStackVariable, SimMemoryVariable)))
        elif sort == 'register':
            var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key]
                                   if isinstance(var, SimRegisterVariable))
        else:
            l.error('find_variables_by_stmt(): Unsupported variable sort "%s".', sort)
            return [ ]

        return var_and_offsets

    def find_variable_by_atom(self, block_addr, stmt_idx, atom):
        return next(iter(self.find_variables_by_atom(block_addr, stmt_idx, atom)), None)

    def find_variables_by_atom(self, block_addr, stmt_idx, atom) -> Set[Tuple[SimVariable, int]]:

        key = block_addr, stmt_idx

        if key not in self._atom_to_variable:
            return set()

        if atom not in self._atom_to_variable[key]:
            return set()

        return self._atom_to_variable[key][atom]

    def get_variable_accesses(self, variable: SimVariable, same_name: bool=False) -> List[VariableAccess]:

        if not same_name:
            if variable in self._variable_accesses:
                return list(self._variable_accesses[variable])

            return [ ]

        # find all variables with the same variable name

        vars_list = [ ]

        for var in self._variable_accesses.keys():
            if variable.name == var.name:
                vars_list.append(var)

        accesses = [ ]
        for var in vars_list:
            accesses.extend(self.get_variable_accesses(var))

        return accesses

    def get_variables(self, sort=None, collapse_same_ident=False) -> List[Union[SimStackVariable,SimRegisterVariable]]:
        """
        Get a list of variables.

        :param str or None sort:    Sort of the variable to get.
        :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not.
        :return:                    A list of variables.
        :rtype:                     list
        """

        variables = [ ]

        if collapse_same_ident:
            raise NotImplementedError()

        for var in self._variables:
            if sort == 'stack' and not isinstance(var, SimStackVariable):
                continue
            if sort == 'reg' and not isinstance(var, SimRegisterVariable):
                continue
            variables.append(var)

        return variables

    def get_global_variables(self, addr):
        """
        Get global variable by the address of the variable.

        :param int addr:    Address of the variable.
        :return:            A set of variables or an empty set if no variable exists.
        """
        return self._global_region.get_variables_by_offset(addr)

    def is_phi_variable(self, var):
        """
        Test if `var` is a phi variable.

        :param SimVariable var: The variable instance.
        :return:                True if `var` is a phi variable, False otherwise.
        :rtype:                 bool
        """

        return var in self._phi_variables

    def get_phi_subvariables(self, var):
        """
        Get sub-variables that phi variable `var` represents.

        :param SimVariable var: The variable instance.
        :return:                A set of sub-variables, or an empty set if `var` is not a phi variable.
        :rtype:                 set
        """

        if not self.is_phi_variable(var):
            return set()
        return self._phi_variables[var]

    def get_phi_variables(self, block_addr):
        """
        Get a dict of phi variables and their corresponding variables.

        :param int block_addr:  Address of the block.
        :return:                A dict of phi variables of an empty dict if there are no phi variables at the block.
        :rtype:                 dict
        """

        if block_addr not in self._phi_variables_by_block:
            return dict()
        variables = { }
        for phi in self._phi_variables_by_block[block_addr]:
            variables[phi] = self._phi_variables[phi]
        return variables

    def input_variables(self, exclude_specials=True):
        """
        Get all variables that have never been written to.

        :return: A list of variables that are never written to.
        """

        def has_write_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'write')

        def has_read_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'read')

        input_variables = [ ]

        for variable, accesses in self._variable_accesses.items():
            if variable in self._phi_variables:
                # a phi variable is definitely not an input variable
                continue
            if not has_write_access(accesses) and has_read_access(accesses):
                if not exclude_specials or not variable.category:
                    input_variables.append(variable)

        return input_variables

    def assign_variable_names(self, labels=None):
        """
        Assign default names to all SSA variables.

        :param labels:  Known labels in the binary.
        :return:        None
        """

        for var in self._variables:
            if isinstance(var, SimStackVariable):
                if var.name is not None:
                    continue
                if var.ident.startswith('iarg'):
                    var.name = 'arg_%x' % var.offset
                else:
                    var.name = 's_%x' % (-var.offset)
                    # var.name = var.ident
            elif isinstance(var, SimRegisterVariable):
                if var.name is not None:
                    continue
                var.name = var.ident
            elif isinstance(var, SimMemoryVariable):
                if var.name is not None:
                    continue
                if labels is not None and var.addr in labels:
                    var.name = labels[var.addr]
                    if "@@" in var.name:
                        var.name = var.name[:var.name.index("@@")]
                elif var.ident is not None:
                    var.name = var.ident
                else:
                    var.name = "g_%x" % var.addr

    def assign_unified_variable_names(self, labels=None, reset:bool=False):
        """
        Assign default names to all unified variables.

        :param labels:  Known labels in the binary.
        :param reset:   Reset all variable names or not.
        :return:        None
        """

        if not self._unified_variables:
            return

        sorted_stack_variables = [ ]
        sorted_reg_variables = [ ]
        arg_vars = [ ]

        for var in self._unified_variables:
            if isinstance(var, SimStackVariable):
                if not reset and var.name is not None:
                    continue
                if var.ident and var.ident.startswith('iarg_'):
                    arg_vars.append(var)
                else:
                    sorted_stack_variables.append(var)

            elif isinstance(var, SimRegisterVariable):
                if not reset and var.name is not None:
                    continue
                if var.ident and var.ident.startswith('arg_'):
                    arg_vars.append(var)
                else:
                    sorted_reg_variables.append(var)

            elif isinstance(var, SimMemoryVariable):
                if not reset and var.name is not None:
                    continue
                # assign names directly
                if labels is not None and var.addr in labels:
                    var.name = labels[var.addr]
                    if "@@" in var.name:
                        var.name = var.name[:var.name.index("@@")]
                elif var.ident:
                    var.name = var.ident
                else:
                    var.name = "g_%x" % var.addr

        # rename variables in a fixed order
        var_ctr = count(0)

        sorted_stack_variables = sorted(sorted_stack_variables, key=lambda v: v.offset)
        sorted_reg_variables = sorted(sorted_reg_variables, key=lambda v: v.reg)

        for var in chain(sorted_stack_variables, sorted_reg_variables):
            if isinstance(var, SimStackVariable):
                var.name = 'v%d' % next(var_ctr)
            elif isinstance(var, SimRegisterVariable):
                var.name = "v%d" % next(var_ctr)
            # clear the hash cache
            var._hash = None

        # rename arguments but keeping the original order
        arg_ctr = count(0)
        arg_vars = sorted(arg_vars, key=lambda v: int(v.ident[v.ident.index("_")+1:]) if v.ident else 0)
        for var in arg_vars:
            var.name = "a%d" % next(arg_ctr)
            var._hash = None

    def get_variable_type(self, var):
        return self.types.get(var, None)

    def remove_types(self):
        self.types.clear()

    def unify_variables(self) -> None:
        """
        Map SSA variables to a unified variable. Fill in self._unified_variables.
        """

        stack_vars: Dict[int,List[SimStackVariable]] = defaultdict(list)
        reg_vars: Set[SimRegisterVariable] = set()

        # unify stack variables based on their locations
        for v in self.get_variables():
            if isinstance(v, SimStackVariable):
                stack_vars[v.offset].append(v)
            elif isinstance(v, SimRegisterVariable):
                reg_vars.add(v)

        for _, vs in stack_vars.items():
            unified = vs[0].copy()
            for v in vs:
                self.set_unified_variable(v, unified)

        # unify register variables based on phi nodes
        graph = networkx.Graph()
        for v, subvs in self._phi_variables.items():
            if not isinstance(v, SimRegisterVariable):
                continue
            if not self.get_variable_accesses(v):
                # this phi node has never been used - discard it
                continue
            for subv in subvs:
                graph.add_edge(v, subv)

        for nodes in networkx.connected_components(graph):
            if len(nodes) <= 1:
                continue
            nodes = list(nodes)
            unified = nodes[0].copy()
            for v in nodes:
                self.set_unified_variable(v, unified)
            for v in nodes:
                reg_vars.discard(v)

        for v in reg_vars:
            self.set_unified_variable(v, v)

    def set_unified_variable(self, variable: SimVariable, unified: SimVariable) -> None:
        """
        Set the unified variable for a given SSA variable.

        :param variable:    The SSA variable.
        :param unified:     The unified variable.
        :return:            None
        """
        old_unified = self._variables_to_unified_variables.get(variable, None)
        if old_unified is not None and old_unified is not unified:
            self._unified_variables.discard(old_unified)
            if old_unified.renamed and not unified.renamed:
                unified.renamed = True
                unified.name = old_unified.name

        self._unified_variables.add(unified)
        self._variables_to_unified_variables[variable] = unified

    def unified_variable(self, variable: SimVariable) -> Optional[SimVariable]:
        """
        Return the unified variable for a given SSA variable,

        :param variable:    The SSA variable.
        :return:            The unified variable, or None if there is no such SSA variable.
        """

        return self._variables_to_unified_variables.get(variable, None)
示例#2
0
class VariableManagerInternal(object):
    """
    Manage variables for a function. It is meant to be used internally by VariableManager.
    """
    def __init__(self, manager, func_addr=None):
        self.manager = manager

        self.func_addr = func_addr

        self._variables = OrderedSet()  # all variables that are added to any region
        self._stack_region = KeyedRegion()
        self._register_region = KeyedRegion()
        self._live_variables = { }  # a mapping between addresses of program points and live variable collections

        self._variable_accesses = defaultdict(set)
        self._insn_to_variable = defaultdict(set)
        self._block_to_variable = defaultdict(set)
        self._stmt_to_variable = defaultdict(set)
        self._variable_counters = {
            'register': count(),
            'stack': count(),
            'argument': count(),
            'phi': count(),
        }

    #
    # Public methods
    #

    def next_variable_ident(self, sort):
        if sort not in self._variable_counters:
            raise ValueError('Unsupported variable sort %s' % sort)

        if sort == 'register':
            prefix = "r"
        elif sort == 'stack':
            prefix = "s"
        elif sort == 'argument':
            prefix = 'arg'
        else:
            prefix = "m"

        return "i%s_%d" % (prefix, next(self._variable_counters[sort]))

    def add_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.add_variable(start, variable)
        elif sort == 'register':
            self._register_region.add_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def set_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.set_variable(start, variable)
        elif sort == 'register':
            self._register_region.set_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def write_to(self, variable, offset, location, overwrite=False):
        self._record_variable_access('write', variable, offset, location, overwrite=overwrite)

    def read_from(self, variable, offset, location, overwrite=False):
        self._record_variable_access('read', variable, offset, location, overwrite=overwrite)

    def reference_at(self, variable, offset, location, overwrite=False):
        self._record_variable_access('reference', variable, offset, location, overwrite=overwrite)

    def _record_variable_access(self, sort, variable, offset, location, overwrite=False):
        self._variables.add(variable)
        if overwrite:
            self._variable_accesses[variable] = {VariableAccess(variable, sort, location)}
            self._insn_to_variable[location.ins_addr] = {(variable, offset)}
            self._block_to_variable[location.block_addr] = {(variable, offset)}
            self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {(variable, offset)}
        else:
            self._variable_accesses[variable].add(VariableAccess(variable, sort, location))
            self._insn_to_variable[location.ins_addr].add((variable, offset))
            self._block_to_variable[location.block_addr].add((variable, offset))
            self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add((variable, offset))

    def make_phi_node(self, *variables):

        # unpack phi nodes
        existing_phi = [ ]
        unpacked = set()
        for var in variables:
            if isinstance(var, (SimRegisterVariablePhi, SimStackVariablePhi, SimMemoryVariablePhi)):
                unpacked |= var.variables
                existing_phi.append(var)
            else:
                unpacked.add(var)

        # optimization: if a phi node already contains all of the unpacked variables, just return that phi node
        for phi_node in existing_phi:
            if phi_node.variables.issuperset(unpacked):
                return phi_node

        variables = unpacked

        repre = next(iter(variables))
        repre_type = type(repre)
        if repre_type is SimRegisterVariable:
            cls = SimRegisterVariablePhi
            ident_sort = 'register'
        elif repre_type is SimMemoryVariable:
            cls = SimMemoryVariablePhi
            ident_sort = 'memory'
        elif repre_type is SimStackVariable:
            cls = SimStackVariablePhi
            ident_sort = 'stack'
        else:
            raise TypeError('make_phi_node(): Unsupported variable type "%s".' % type(repre))
        a = cls(ident=self.next_variable_ident(ident_sort),
                   region=self.func_addr,
                   variables=variables,
                   )
        return a

    def set_live_variables(self, addr, register_region, stack_region):
        lv = LiveVariables(register_region, stack_region)
        self._live_variables[addr] = lv

    def find_variables_by_insn(self, ins_addr, sort):
        if ins_addr not in self._insn_to_variable:
            return None

        if sort == VariableType.MEMORY or sort == 'memory':
            vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr]
                        if isinstance(var, (SimStackVariable, SimMemoryVariable))]
        elif sort == VariableType.REGISTER or sort == 'register':
            vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr]
                        if isinstance(var, SimRegisterVariable)]
        else:
            l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort)
            return [ ]

        return vars_and_offset

    def find_variable_by_stmt(self, block_addr, stmt_idx, sort):
        return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None)

    def find_variables_by_stmt(self, block_addr, stmt_idx, sort):

        key = block_addr, stmt_idx

        if key not in self._stmt_to_variable:
            return [ ]

        variables = self._stmt_to_variable[key]
        if not variables:
            return [ ]

        if sort == 'memory':
            var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key]
                                   if isinstance(var, (SimStackVariable, SimMemoryVariable)))
        elif sort == 'register':
            var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key]
                                   if isinstance(var, SimRegisterVariable))
        else:
            l.error('find_variables_by_stmt(): Unsupported variable sort "%s".', sort)
            return [ ]

        return var_and_offsets

    def get_variable_accesses(self, variable, same_name=False):

        if not same_name:
            if variable in self._variable_accesses:
                return self._variable_accesses[variable]

            return [ ]

        # find all variables with the same variable name

        vars_list = [ ]

        for var in self._variable_accesses.keys():
            if variable.name == var.name:
                vars_list.append(var)

        accesses = [ ]
        for var in vars_list:
            accesses.extend(self.get_variable_accesses(var))

        return accesses

    def get_variables(self, sort=None, collapse_same_ident=False):
        """
        Get a list of variables.

        :param str or None sort:    Sort of the variable to get.
        :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not.
        :return:                    A list of variables.
        :rtype:                     list
        """

        variables = [ ]

        if collapse_same_ident:
            raise NotImplementedError()

        for var in self._variables:
            if sort == 'stack' and not isinstance(var, SimStackVariable):
                continue
            if sort == 'reg' and not isinstance(var, SimRegisterVariable):
                continue
            variables.append(var)

        return variables

    def input_variables(self, exclude_specials=True):
        """
        Get all variables that have never been written to.

        :return: A list of variables that are never written to.
        """

        def has_write_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'write')

        def has_read_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'read')

        input_variables = [ ]

        for variable, accesses in self._variable_accesses.items():
            if not has_write_access(accesses) and has_read_access(accesses):
                if not exclude_specials or not variable.category:
                    input_variables.append(variable)

        return input_variables

    def assign_variable_names(self):
        """
        Assign default names to all variables.

        :return: None
        """

        for var in self._variables:
            if isinstance(var, SimStackVariable):
                if var.name is not None:
                    continue
                if var.ident.startswith('iarg'):
                    var.name = 'arg_%x' % var.offset
                else:
                    var.name = 's_%x' % (-var.offset)
                    # var.name = var.ident
            elif isinstance(var, SimRegisterVariable):
                if var.name is not None:
                    continue
                var.name = var.ident
示例#3
0
class VariableManagerInternal:
    """
    Manage variables for a function. It is meant to be used internally by VariableManager.
    """
    def __init__(self, manager, func_addr=None):
        self.manager = manager

        self.func_addr = func_addr

        self._variables = OrderedSet(
        )  # all variables that are added to any region
        self._global_region = KeyedRegion()
        self._stack_region = KeyedRegion()
        self._register_region = KeyedRegion()
        self._live_variables = {
        }  # a mapping between addresses of program points and live variable collections

        self._variable_accesses = defaultdict(set)
        self._insn_to_variable = defaultdict(set)
        self._block_to_variable = defaultdict(set)
        self._stmt_to_variable = defaultdict(set)
        self._atom_to_variable = defaultdict(_defaultdict_set)
        self._variable_counters = {
            'register': count(),
            'stack': count(),
            'argument': count(),
            'phi': count(),
            'global': count(),
        }

        self._phi_variables = {}
        self._phi_variables_by_block = defaultdict(set)

        self.types = {}

    #
    # Public methods
    #

    def next_variable_ident(self, sort):
        if sort not in self._variable_counters:
            raise ValueError('Unsupported variable sort %s' % sort)

        if sort == 'register':
            prefix = "r"
        elif sort == 'stack':
            prefix = "s"
        elif sort == 'argument':
            prefix = 'arg'
        elif sort == 'global':
            prefix = 'g'
        else:
            prefix = "m"

        ident = "i%s_%d" % (prefix, next(self._variable_counters[sort]))
        return ident

    def add_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.add_variable(start, variable)
        elif sort == 'register':
            self._register_region.add_variable(start, variable)
        elif sort == 'global':
            self._global_region.add_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def set_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.set_variable(start, variable)
        elif sort == 'register':
            self._register_region.set_variable(start, variable)
        elif sort == 'global':
            self._global_region.set_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def write_to(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('write',
                                     variable,
                                     offset,
                                     location,
                                     overwrite=overwrite,
                                     atom=atom)

    def read_from(self,
                  variable,
                  offset,
                  location,
                  overwrite=False,
                  atom=None):
        self._record_variable_access('read',
                                     variable,
                                     offset,
                                     location,
                                     overwrite=overwrite,
                                     atom=atom)

    def reference_at(self,
                     variable,
                     offset,
                     location,
                     overwrite=False,
                     atom=None):
        self._record_variable_access('reference',
                                     variable,
                                     offset,
                                     location,
                                     overwrite=overwrite,
                                     atom=atom)

    def _record_variable_access(self,
                                sort,
                                variable,
                                offset,
                                location,
                                overwrite=False,
                                atom=None):
        self._variables.add(variable)
        var_and_offset = variable, offset
        if overwrite:
            self._variable_accesses[variable] = {
                VariableAccess(variable, sort, location)
            }
            self._insn_to_variable[location.ins_addr] = {var_and_offset}
            self._block_to_variable[location.block_addr] = {var_and_offset}
            self._stmt_to_variable[(location.block_addr,
                                    location.stmt_idx)] = {var_and_offset}
            if atom is not None:
                self._atom_to_variable[(
                    location.block_addr,
                    location.stmt_idx)][atom] = var_and_offset
        else:
            self._variable_accesses[variable].add(
                VariableAccess(variable, sort, location))
            self._insn_to_variable[location.ins_addr].add(var_and_offset)
            self._block_to_variable[location.block_addr].add(var_and_offset)
            self._stmt_to_variable[(location.block_addr,
                                    location.stmt_idx)].add(var_and_offset)
            if atom is not None:
                self._atom_to_variable[(
                    location.block_addr,
                    location.stmt_idx)][atom].add(var_and_offset)

    def make_phi_node(self, block_addr, *variables):
        """
        Create a phi variable for variables at block `block_addr`.

        :param int block_addr:  The address of the current block.
        :param variables:       Variables that the phi variable represents.
        :return:                The created phi variable.
        """

        existing_phis = set()
        non_phis = set()
        for var in variables:
            if self.is_phi_variable(var):
                existing_phis.add(var)
            else:
                non_phis.add(var)
        if len(existing_phis) == 1:
            existing_phi = next(iter(existing_phis))
            if non_phis.issubset(self.get_phi_subvariables(existing_phi)):
                return existing_phi
            else:
                # Update phi variables
                self._phi_variables[existing_phi] |= non_phis
                return existing_phi

        repre = next(iter(variables))
        repre_type = type(repre)
        if repre_type is SimRegisterVariable:
            ident_sort = 'register'
            a = SimRegisterVariable(repre.reg,
                                    repre.size,
                                    ident=self.next_variable_ident(ident_sort))
        elif repre_type is SimMemoryVariable:
            ident_sort = 'memory'
            a = SimMemoryVariable(repre.addr,
                                  repre.size,
                                  ident=self.next_variable_ident(ident_sort))
        elif repre_type is SimStackVariable:
            ident_sort = 'stack'
            a = SimStackVariable(repre.offset,
                                 repre.size,
                                 ident=self.next_variable_ident(ident_sort))
        else:
            raise TypeError(
                'make_phi_node(): Unsupported variable type "%s".' %
                type(repre))

        # Keep a record of all phi variables
        self._phi_variables[a] = set(variables)
        self._phi_variables_by_block[block_addr].add(a)

        return a

    def set_live_variables(self, addr, register_region, stack_region):
        lv = LiveVariables(register_region, stack_region)
        self._live_variables[addr] = lv

    def find_variables_by_insn(self, ins_addr, sort):
        if ins_addr not in self._insn_to_variable:
            return None

        if sort in (VariableType.MEMORY, 'memory'):
            vars_and_offset = [
                (var, offset)
                for var, offset in self._insn_to_variable[ins_addr]
                if isinstance(var, (SimStackVariable, SimMemoryVariable))
            ]
        elif sort in (VariableType.REGISTER, 'register'):
            vars_and_offset = [
                (var, offset)
                for var, offset in self._insn_to_variable[ins_addr]
                if isinstance(var, SimRegisterVariable)
            ]
        else:
            l.error('find_variable_by_insn(): Unsupported variable sort "%s".',
                    sort)
            return []

        return vars_and_offset

    def find_variable_by_stmt(self, block_addr, stmt_idx, sort):
        return next(
            iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)),
            None)

    def find_variables_by_stmt(self, block_addr: int, stmt_idx: int,
                               sort: str) -> List[Tuple[SimVariable, int]]:

        key = block_addr, stmt_idx

        if key not in self._stmt_to_variable:
            return []

        variables = self._stmt_to_variable[key]
        if not variables:
            return []

        if sort == 'memory':
            var_and_offsets = list(
                (var, offset) for var, offset in self._stmt_to_variable[key]
                if isinstance(var, (SimStackVariable, SimMemoryVariable)))
        elif sort == 'register':
            var_and_offsets = list(
                (var, offset) for var, offset in self._stmt_to_variable[key]
                if isinstance(var, SimRegisterVariable))
        else:
            l.error(
                'find_variables_by_stmt(): Unsupported variable sort "%s".',
                sort)
            return []

        return var_and_offsets

    def find_variable_by_atom(self, block_addr, stmt_idx, atom):
        return next(
            iter(self.find_variables_by_atom(block_addr, stmt_idx, atom)),
            None)

    def find_variables_by_atom(self, block_addr, stmt_idx,
                               atom) -> Set[Tuple[SimVariable, int]]:

        key = block_addr, stmt_idx

        if key not in self._atom_to_variable:
            return set()

        if atom not in self._atom_to_variable[key]:
            return set()

        return self._atom_to_variable[key][atom]

    def get_variable_accesses(self,
                              variable: SimVariable,
                              same_name: bool = False) -> List[VariableAccess]:

        if not same_name:
            if variable in self._variable_accesses:
                return list(self._variable_accesses[variable])

            return []

        # find all variables with the same variable name

        vars_list = []

        for var in self._variable_accesses.keys():
            if variable.name == var.name:
                vars_list.append(var)

        accesses = []
        for var in vars_list:
            accesses.extend(self.get_variable_accesses(var))

        return accesses

    def get_variables(
        self,
        sort=None,
        collapse_same_ident=False
    ) -> List[Union[SimStackVariable, SimRegisterVariable]]:
        """
        Get a list of variables.

        :param str or None sort:    Sort of the variable to get.
        :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not.
        :return:                    A list of variables.
        :rtype:                     list
        """

        variables = []

        if collapse_same_ident:
            raise NotImplementedError()

        for var in self._variables:
            if sort == 'stack' and not isinstance(var, SimStackVariable):
                continue
            if sort == 'reg' and not isinstance(var, SimRegisterVariable):
                continue
            variables.append(var)

        return variables

    def get_global_variables(self, addr):
        """
        Get global variable by the address of the variable.

        :param int addr:    Address of the variable.
        :return:            A set of variables or an empty set if no variable exists.
        """
        return self._global_region.get_variables_by_offset(addr)

    def is_phi_variable(self, var):
        """
        Test if `var` is a phi variable.

        :param SimVariable var: The variable instance.
        :return:                True if `var` is a phi variable, False otherwise.
        :rtype:                 bool
        """

        return var in self._phi_variables

    def get_phi_subvariables(self, var):
        """
        Get sub-variables that phi variable `var` represents.

        :param SimVariable var: The variable instance.
        :return:                A set of sub-variables, or an empty set if `var` is not a phi variable.
        :rtype:                 set
        """

        if not self.is_phi_variable(var):
            return set()
        return self._phi_variables[var]

    def get_phi_variables(self, block_addr):
        """
        Get a dict of phi variables and their corresponding variables.

        :param int block_addr:  Address of the block.
        :return:                A dict of phi variables of an empty dict if there are no phi variables at the block.
        :rtype:                 dict
        """

        if block_addr not in self._phi_variables_by_block:
            return dict()
        variables = {}
        for phi in self._phi_variables_by_block[block_addr]:
            variables[phi] = self._phi_variables[phi]
        return variables

    def input_variables(self, exclude_specials=True):
        """
        Get all variables that have never been written to.

        :return: A list of variables that are never written to.
        """
        def has_write_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'write')

        def has_read_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'read')

        input_variables = []

        for variable, accesses in self._variable_accesses.items():
            if variable in self._phi_variables:
                # a phi variable is definitely not an input variable
                continue
            if not has_write_access(accesses) and has_read_access(accesses):
                if not exclude_specials or not variable.category:
                    input_variables.append(variable)

        return input_variables

    def assign_variable_names(self, labels=None):
        """
        Assign default names to all variables.

        :return: None
        """

        for var in self._variables:
            if isinstance(var, SimStackVariable):
                if var.name is not None:
                    continue
                if var.ident.startswith('iarg'):
                    var.name = 'arg_%x' % var.offset
                else:
                    var.name = 's_%x' % (-var.offset)
                    # var.name = var.ident
            elif isinstance(var, SimRegisterVariable):
                if var.name is not None:
                    continue
                var.name = var.ident
            elif isinstance(var, SimMemoryVariable):
                if var.name is not None:
                    continue
                if labels is not None and var.addr in labels:
                    var.name = labels[var.addr]
                    if "@@" in var.name:
                        var.name = var.name[:var.name.index("@@")]
                else:
                    var.name = var.ident

    def get_variable_type(self, var):
        return self.types.get(var, None)

    def remove_types(self):
        self.types.clear()
示例#4
0
class GraphVisitor:
    """
    A graph visitor takes a node in the graph and returns its successors. Typically it visits a control flow graph, and
    returns successors of a CFGNode each time. This is the base class of all graph visitors.
    """
    def __init__(self):
        self._sorted_nodes = OrderedSet()
        self._node_to_index = {}
        self._reached_fixedpoint = set()

    #
    # Interfaces
    #

    def startpoints(self):
        """
        Get all start points to begin the traversal.

        :return: A list of startpoints that the traversal should begin with.
        """

        raise NotImplementedError()

    def successors(self, node):
        """
        Get successors of a node. The node should be in the graph.

        :param node: The node to work with.
        :return:     A list of successors.
        :rtype:      list
        """

        raise NotImplementedError()

    def predecessors(self, node):
        """
        Get predecessors of a node. The node should be in the graph.

        :param node: The node to work with.
        :return:     A list of predecessors.
        :rtype:      list
        """

        raise NotImplementedError()

    def sort_nodes(self, nodes=None):
        """
        Get a list of all nodes sorted in an optimal traversal order.

        :param iterable nodes: A collection of nodes to sort. If none, all nodes in the graph will be used to sort.
        :return:               A list of sorted nodes.
        :rtype:                list
        """

        raise NotImplementedError()

    #
    # Public methods
    #

    def nodes(self):
        """
        Return an iterator of nodes following an optimal traversal order.

        :return:
        """

        return iter(self.sort_nodes())

    @deprecated(replacement='nodes')
    def nodes_iter(self):
        """
        (Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future.
        """
        return self.nodes()

    # Traversal

    def reset(self):
        """
        Reset the internal node traversal state. Must be called prior to visiting future nodes.

        :return: None
        """

        self._sorted_nodes.clear()
        self._node_to_index.clear()
        self._reached_fixedpoint.clear()

        for i, n in enumerate(self.sort_nodes()):
            self._node_to_index[n] = i
            self._sorted_nodes.add(n)

    def next_node(self):
        """
        Get the next node to visit.

        :return: A node in the graph.
        """

        if not self._sorted_nodes:
            return None

        return self._sorted_nodes.pop(last=False)

    def all_successors(self, node, skip_reached_fixedpoint=False):
        """
        Returns all successors to the specific node.

        :param node: A node in the graph.
        :return:     A set of nodes that are all successors to the given node.
        :rtype:      set
        """

        successors = set()

        stack = [node]
        while stack:
            n = stack.pop()
            successors.add(n)
            stack.extend(succ for succ in self.successors(n)
                         if succ not in successors and (
                             not skip_reached_fixedpoint
                             or succ not in self._reached_fixedpoint))

        return successors

    def revisit_successors(self, node, include_self=True):
        """
        Revisit a node in the future. As a result, the successors to this node will be revisited as well.

        :param node: The node to revisit in the future.
        :return:     None
        """

        successors = self.successors(node)  #, skip_reached_fixedpoint=True)

        if include_self:
            self._sorted_nodes.add(node)

        for succ in successors:
            self._sorted_nodes.add(succ)

        self._sorted_nodes = OrderedSet(
            sorted(self._sorted_nodes, key=lambda n: self._node_to_index[n]))

    def revisit_node(self, node):
        """
        Revisit a node in the future. Do not include its successors immediately.

        :param node:    The node to revisit in the future.
        :return:        None
        """

        self._sorted_nodes.add(node)
        self._sorted_nodes = OrderedSet(
            sorted(self._sorted_nodes, key=lambda n: self._node_to_index[n]))

    def reached_fixedpoint(self, node):
        """
        Mark a node as reached fixed-point. This node as well as all its successors will not be visited in the future.

        :param node: The node to mark as reached fixed-point.
        :return:     None
        """

        self._reached_fixedpoint.add(node)
示例#5
0
class VariableManagerInternal:
    """
    Manage variables for a function. It is meant to be used internally by VariableManager.
    """
    def __init__(self, manager, func_addr=None):
        self.manager = manager

        self.func_addr = func_addr

        self._variables = OrderedSet()  # all variables that are added to any region
        self._stack_region = KeyedRegion()
        self._register_region = KeyedRegion()
        self._live_variables = { }  # a mapping between addresses of program points and live variable collections

        self._variable_accesses = defaultdict(set)
        self._insn_to_variable = defaultdict(set)
        self._block_to_variable = defaultdict(set)
        self._stmt_to_variable = defaultdict(set)
        self._atom_to_variable = defaultdict(_defaultdict_set)
        self._variable_counters = {
            'register': count(),
            'stack': count(),
            'argument': count(),
            'phi': count(),
        }

        self._phi_variables = { }
        self._phi_variables_by_block = defaultdict(set)

    #
    # Public methods
    #

    def next_variable_ident(self, sort):
        if sort not in self._variable_counters:
            raise ValueError('Unsupported variable sort %s' % sort)

        if sort == 'register':
            prefix = "r"
        elif sort == 'stack':
            prefix = "s"
        elif sort == 'argument':
            prefix = 'arg'
        else:
            prefix = "m"

        ident = "i%s_%d" % (prefix, next(self._variable_counters[sort]))
        return ident

    def add_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.add_variable(start, variable)
        elif sort == 'register':
            self._register_region.add_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def set_variable(self, sort, start, variable):
        if sort == 'stack':
            self._stack_region.set_variable(start, variable)
        elif sort == 'register':
            self._register_region.set_variable(start, variable)
        else:
            raise ValueError('Unsupported sort %s in add_variable().' % sort)

    def write_to(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('write', variable, offset, location, overwrite=overwrite, atom=atom)

    def read_from(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('read', variable, offset, location, overwrite=overwrite, atom=atom)

    def reference_at(self, variable, offset, location, overwrite=False, atom=None):
        self._record_variable_access('reference', variable, offset, location, overwrite=overwrite, atom=atom)

    def _record_variable_access(self, sort, variable, offset, location, overwrite=False, atom=None):
        self._variables.add(variable)
        var_and_offset = variable, offset
        if overwrite:
            self._variable_accesses[variable] = {VariableAccess(variable, sort, location)}
            self._insn_to_variable[location.ins_addr] = {var_and_offset}
            self._block_to_variable[location.block_addr] = {var_and_offset}
            self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {var_and_offset}
            if atom is not None:
                self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom] = var_and_offset
        else:
            self._variable_accesses[variable].add(VariableAccess(variable, sort, location))
            self._insn_to_variable[location.ins_addr].add(var_and_offset)
            self._block_to_variable[location.block_addr].add(var_and_offset)
            self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add(var_and_offset)
            if atom is not None:
                self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom].add(var_and_offset)

    def make_phi_node(self, block_addr, *variables):
        """
        Create a phi variable for variables at block `block_addr`.

        :param int block_addr:  The address of the current block.
        :param variables:       Variables that the phi variable represents.
        :return:                The created phi variable.
        """

        existing_phis = set()
        non_phis = set()
        for var in variables:
            if self.is_phi_variable(var):
                existing_phis.add(var)
            else:
                non_phis.add(var)
        if len(existing_phis) == 1:
            existing_phi = next(iter(existing_phis))
            if non_phis.issubset(self.get_phi_subvariables(existing_phi)):
                return existing_phi
            else:
                # Update phi variables
                self._phi_variables[existing_phi] |= non_phis
                return existing_phi

        repre = next(iter(variables))
        repre_type = type(repre)
        if repre_type is SimRegisterVariable:
            ident_sort = 'register'
            a = SimRegisterVariable(repre.reg, repre.size, ident=self.next_variable_ident(ident_sort))
        elif repre_type is SimMemoryVariable:
            ident_sort = 'memory'
            a = SimMemoryVariable(repre.addr, repre.size, ident=self.next_variable_ident(ident_sort))
        elif repre_type is SimStackVariable:
            ident_sort = 'stack'
            a = SimStackVariable(repre.offset, repre.size, ident=self.next_variable_ident(ident_sort))
        else:
            raise TypeError('make_phi_node(): Unsupported variable type "%s".' % type(repre))

        # Keep a record of all phi variables
        self._phi_variables[a] = set(variables)
        self._phi_variables_by_block[block_addr].add(a)

        return a

    def set_live_variables(self, addr, register_region, stack_region):
        lv = LiveVariables(register_region, stack_region)
        self._live_variables[addr] = lv

    def find_variables_by_insn(self, ins_addr, sort):
        if ins_addr not in self._insn_to_variable:
            return None

        if sort in (VariableType.MEMORY, 'memory'):
            vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr]
                        if isinstance(var, (SimStackVariable, SimMemoryVariable))]
        elif sort in (VariableType.REGISTER, 'register'):
            vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr]
                        if isinstance(var, SimRegisterVariable)]
        else:
            l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort)
            return [ ]

        return vars_and_offset

    def find_variable_by_stmt(self, block_addr, stmt_idx, sort):
        return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None)

    def find_variables_by_stmt(self, block_addr, stmt_idx, sort):

        key = block_addr, stmt_idx

        if key not in self._stmt_to_variable:
            return [ ]

        variables = self._stmt_to_variable[key]
        if not variables:
            return [ ]

        if sort == 'memory':
            var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key]
                                   if isinstance(var, (SimStackVariable, SimMemoryVariable)))
        elif sort == 'register':
            var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key]
                                   if isinstance(var, SimRegisterVariable))
        else:
            l.error('find_variables_by_stmt(): Unsupported variable sort "%s".', sort)
            return [ ]

        return var_and_offsets

    def find_variable_by_atom(self, block_addr, stmt_idx, atom):
        return next(iter(self.find_variables_by_atom(block_addr, stmt_idx, atom)), None)

    def find_variables_by_atom(self, block_addr, stmt_idx, atom):

        key = block_addr, stmt_idx

        if key not in self._atom_to_variable:
            return [ ]

        if atom not in self._atom_to_variable[key]:
            return [ ]

        return self._atom_to_variable[key][atom]

    def get_variable_accesses(self, variable, same_name=False):

        if not same_name:
            if variable in self._variable_accesses:
                return self._variable_accesses[variable]

            return [ ]

        # find all variables with the same variable name

        vars_list = [ ]

        for var in self._variable_accesses.keys():
            if variable.name == var.name:
                vars_list.append(var)

        accesses = [ ]
        for var in vars_list:
            accesses.extend(self.get_variable_accesses(var))

        return accesses

    def get_variables(self, sort=None, collapse_same_ident=False):
        """
        Get a list of variables.

        :param str or None sort:    Sort of the variable to get.
        :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not.
        :return:                    A list of variables.
        :rtype:                     list
        """

        variables = [ ]

        if collapse_same_ident:
            raise NotImplementedError()

        for var in self._variables:
            if sort == 'stack' and not isinstance(var, SimStackVariable):
                continue
            if sort == 'reg' and not isinstance(var, SimRegisterVariable):
                continue
            variables.append(var)

        return variables

    def is_phi_variable(self, var):
        """
        Test if `var` is a phi variable.

        :param SimVariable var: The variable instance.
        :return:                True if `var` is a phi variable, False otherwise.
        :rtype:                 bool
        """

        return var in self._phi_variables

    def get_phi_subvariables(self, var):
        """
        Get sub-variables that phi variable `var` represents.

        :param SimVariable var: The variable instance.
        :return:                A set of sub-variables, or an empty set if `var` is not a phi variable.
        :rtype:                 set
        """

        if not self.is_phi_variable(var):
            return set()
        return self._phi_variables[var]

    def get_phi_variables(self, block_addr):
        """
        Get a dict of phi variables and their corresponding variables.

        :param int block_addr:  Address of the block.
        :return:                A dict of phi variables of an empty dict if there are no phi variables at the block.
        :rtype:                 dict
        """

        if block_addr not in self._phi_variables_by_block:
            return dict()
        variables = { }
        for phi in self._phi_variables_by_block[block_addr]:
            variables[phi] = self._phi_variables[phi]
        return variables

    def input_variables(self, exclude_specials=True):
        """
        Get all variables that have never been written to.

        :return: A list of variables that are never written to.
        """

        def has_write_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'write')

        def has_read_access(accesses):
            return any(acc for acc in accesses if acc.access_type == 'read')

        input_variables = [ ]

        for variable, accesses in self._variable_accesses.items():
            if not has_write_access(accesses) and has_read_access(accesses):
                if not exclude_specials or not variable.category:
                    input_variables.append(variable)

        return input_variables

    def assign_variable_names(self):
        """
        Assign default names to all variables.

        :return: None
        """

        for var in self._variables:
            if isinstance(var, SimStackVariable):
                if var.name is not None:
                    continue
                if var.ident.startswith('iarg'):
                    var.name = 'arg_%x' % var.offset
                else:
                    var.name = 's_%x' % (-var.offset)
                    # var.name = var.ident
            elif isinstance(var, SimRegisterVariable):
                if var.name is not None:
                    continue
                var.name = var.ident
示例#6
0
class GraphVisitor(object):
    """
    A graph visitor takes a node in the graph and returns its successors. Typically it visits a control flow graph, and
    returns successors of a CFGNode each time. This is the base class of all graph visitors.
    """
    def __init__(self):

        self._sorted_nodes = OrderedSet()
        self._node_to_index = { }
        self._reached_fixedpoint = set()

    #
    # Interfaces
    #

    def startpoints(self):
        """
        Get all start points to begin the traversal.

        :return: A list of startpoints that the traversal should begin with.
        """

        raise NotImplementedError()

    def successors(self, node):
        """
        Get successors of a node. The node should be in the graph.

        :param node: The node to work with.
        :return:     A list of successors.
        :rtype:      list
        """

        raise NotImplementedError()

    def predecessors(self, node):
        """
        Get predecessors of a node. The node should be in the graph.

        :param node: The node to work with.
        :return:     A list of predecessors.
        :rtype:      list
        """

        raise NotImplementedError()

    def sort_nodes(self, nodes=None):
        """
        Get a list of all nodes sorted in an optimal traversal order.

        :param iterable nodes: A collection of nodes to sort. If none, all nodes in the graph will be used to sort.
        :return:               A list of sorted nodes.
        :rtype:                list
        """

        raise NotImplementedError()

    #
    # Public methods
    #

    def nodes(self):
        """
        Return an iterator of nodes following an optimal traversal order.

        :return:
        """

        sorted_nodes = self.sort_nodes()

        return iter(sorted_nodes)

    @deprecated(replacement='nodes')
    def nodes_iter(self):
        """
        (Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future.
        """
        return self.nodes()

    # Traversal

    def reset(self):
        """
        Reset the internal node traversal state. Must be called prior to visiting future nodes.

        :return: None
        """

        self._sorted_nodes.clear()
        self._node_to_index.clear()
        self._reached_fixedpoint.clear()

        for i, n in enumerate(self.sort_nodes()):
            self._node_to_index[n] = i
            self._sorted_nodes.add(n)

    def next_node(self):
        """
        Get the next node to visit.

        :return: A node in the graph.
        """

        if not self._sorted_nodes:
            return None

        return self._sorted_nodes.pop(last=False)

    def all_successors(self, node, skip_reached_fixedpoint=False):
        """
        Returns all successors to the specific node.

        :param node: A node in the graph.
        :return:     A set of nodes that are all successors to the given node.
        :rtype:      set
        """

        successors = set()

        stack = [ node ]
        while stack:
            n = stack.pop()
            successors.add(n)
            stack.extend(succ for succ in self.successors(n) if
                         succ not in successors and
                            (not skip_reached_fixedpoint or succ not in self._reached_fixedpoint)
                         )

        return successors

    def revisit(self, node, include_self=True):
        """
        Revisit a node in the future. As a result, the successors to this node will be revisited as well.

        :param node: The node to revisit in the future.
        :return:     None
        """

        successors = self.successors(node) #, skip_reached_fixedpoint=True)

        if include_self:
            self._sorted_nodes.add(node)

        for succ in successors:
            self._sorted_nodes.add(succ)

        # reorder it
        self._sorted_nodes = OrderedSet(sorted(self._sorted_nodes, key=lambda n: self._node_to_index[n]))

    def reached_fixedpoint(self, node):
        """
        Mark a node as reached fixed-point. This node as well as all its successors will not be visited in the future.

        :param node: The node to mark as reached fixed-point.
        :return:     None
        """

        self._reached_fixedpoint.add(node)