class VariableManagerInternal: """ Manage variables for a function. It is meant to be used internally by VariableManager. """ def __init__(self, manager, func_addr=None): self.manager = manager self.func_addr = func_addr self._variables = OrderedSet() # all variables that are added to any region self._global_region = KeyedRegion() self._stack_region = KeyedRegion() self._register_region = KeyedRegion() self._live_variables = { } # a mapping between addresses of program points and live variable collections self._variable_accesses = defaultdict(set) self._insn_to_variable = defaultdict(set) self._block_to_variable = defaultdict(set) self._stmt_to_variable = defaultdict(set) self._atom_to_variable = defaultdict(_defaultdict_set) self._variable_counters = { 'register': count(), 'stack': count(), 'argument': count(), 'phi': count(), 'global': count(), } self._unified_variables: Set[SimVariable] = set() self._variables_to_unified_variables: Dict[SimVariable, SimVariable] = { } self._phi_variables = { } self._phi_variables_by_block = defaultdict(set) self.types = { } # # Public methods # def next_variable_ident(self, sort): if sort not in self._variable_counters: raise ValueError('Unsupported variable sort %s' % sort) if sort == 'register': prefix = "r" elif sort == 'stack': prefix = "s" elif sort == 'argument': prefix = 'arg' elif sort == 'global': prefix = 'g' else: prefix = "m" ident = "i%s_%d" % (prefix, next(self._variable_counters[sort])) return ident def add_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.add_variable(start, variable) elif sort == 'register': self._register_region.add_variable(start, variable) elif sort == 'global': self._global_region.add_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def set_variable(self, sort, start, variable: SimVariable): if sort == 'stack': region = self._stack_region elif sort == 'register': region = self._register_region elif sort == 'global': region = self._global_region else: raise ValueError('Unsupported sort %s in set_variable().' % sort) existing = [x for x in region.get_variables_by_offset(start) if x.ident == variable.ident] if len(existing) == 1: var = existing[0] if var.renamed: variable.name = var.name variable.renamed = True else: # implicitly overwrite or add I guess pass region.set_variable(start, variable) def write_to(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('write', variable, offset, location, overwrite=overwrite, atom=atom) def read_from(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('read', variable, offset, location, overwrite=overwrite, atom=atom) def reference_at(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('reference', variable, offset, location, overwrite=overwrite, atom=atom) def _record_variable_access(self, sort, variable, offset, location, overwrite=False, atom=None): self._variables.add(variable) var_and_offset = variable, offset if overwrite: self._variable_accesses[variable] = {VariableAccess(variable, sort, location)} self._insn_to_variable[location.ins_addr] = {var_and_offset} self._block_to_variable[location.block_addr] = {var_and_offset} self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {var_and_offset} if atom is not None: self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom] = var_and_offset else: self._variable_accesses[variable].add(VariableAccess(variable, sort, location)) self._insn_to_variable[location.ins_addr].add(var_and_offset) self._block_to_variable[location.block_addr].add(var_and_offset) self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add(var_and_offset) if atom is not None: self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom].add(var_and_offset) def make_phi_node(self, block_addr, *variables): """ Create a phi variable for variables at block `block_addr`. :param int block_addr: The address of the current block. :param variables: Variables that the phi variable represents. :return: The created phi variable. """ existing_phis = set() non_phis = set() for var in variables: if self.is_phi_variable(var): existing_phis.add(var) else: non_phis.add(var) if len(existing_phis) == 1: existing_phi = next(iter(existing_phis)) if non_phis.issubset(self.get_phi_subvariables(existing_phi)): return existing_phi else: # Update phi variables self._phi_variables[existing_phi] |= non_phis return existing_phi repre = next(iter(variables)) repre_type = type(repre) if repre_type is SimRegisterVariable: ident_sort = 'register' a = SimRegisterVariable(repre.reg, repre.size, ident=self.next_variable_ident(ident_sort)) elif repre_type is SimMemoryVariable: ident_sort = 'global' a = SimMemoryVariable(repre.addr, repre.size, ident=self.next_variable_ident(ident_sort)) elif repre_type is SimStackVariable: ident_sort = 'stack' a = SimStackVariable(repre.offset, repre.size, ident=self.next_variable_ident(ident_sort)) else: raise TypeError('make_phi_node(): Unsupported variable type "%s".' % type(repre)) # Keep a record of all phi variables self._phi_variables[a] = set(variables) self._phi_variables_by_block[block_addr].add(a) return a def set_live_variables(self, addr, register_region, stack_region): lv = LiveVariables(register_region, stack_region) self._live_variables[addr] = lv def find_variables_by_insn(self, ins_addr, sort): if ins_addr not in self._insn_to_variable: return None if sort in (VariableType.MEMORY, 'memory'): vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, (SimStackVariable, SimMemoryVariable))] elif sort in (VariableType.REGISTER, 'register'): vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, SimRegisterVariable)] else: l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort) return [ ] return vars_and_offset def find_variable_by_stmt(self, block_addr, stmt_idx, sort): return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None) def find_variables_by_stmt(self, block_addr: int, stmt_idx: int, sort: str) -> List[Tuple[SimVariable,int]]: key = block_addr, stmt_idx if key not in self._stmt_to_variable: return [ ] variables = self._stmt_to_variable[key] if not variables: return [ ] if sort == 'memory': var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, (SimStackVariable, SimMemoryVariable))) elif sort == 'register': var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, SimRegisterVariable)) else: l.error('find_variables_by_stmt(): Unsupported variable sort "%s".', sort) return [ ] return var_and_offsets def find_variable_by_atom(self, block_addr, stmt_idx, atom): return next(iter(self.find_variables_by_atom(block_addr, stmt_idx, atom)), None) def find_variables_by_atom(self, block_addr, stmt_idx, atom) -> Set[Tuple[SimVariable, int]]: key = block_addr, stmt_idx if key not in self._atom_to_variable: return set() if atom not in self._atom_to_variable[key]: return set() return self._atom_to_variable[key][atom] def get_variable_accesses(self, variable: SimVariable, same_name: bool=False) -> List[VariableAccess]: if not same_name: if variable in self._variable_accesses: return list(self._variable_accesses[variable]) return [ ] # find all variables with the same variable name vars_list = [ ] for var in self._variable_accesses.keys(): if variable.name == var.name: vars_list.append(var) accesses = [ ] for var in vars_list: accesses.extend(self.get_variable_accesses(var)) return accesses def get_variables(self, sort=None, collapse_same_ident=False) -> List[Union[SimStackVariable,SimRegisterVariable]]: """ Get a list of variables. :param str or None sort: Sort of the variable to get. :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not. :return: A list of variables. :rtype: list """ variables = [ ] if collapse_same_ident: raise NotImplementedError() for var in self._variables: if sort == 'stack' and not isinstance(var, SimStackVariable): continue if sort == 'reg' and not isinstance(var, SimRegisterVariable): continue variables.append(var) return variables def get_global_variables(self, addr): """ Get global variable by the address of the variable. :param int addr: Address of the variable. :return: A set of variables or an empty set if no variable exists. """ return self._global_region.get_variables_by_offset(addr) def is_phi_variable(self, var): """ Test if `var` is a phi variable. :param SimVariable var: The variable instance. :return: True if `var` is a phi variable, False otherwise. :rtype: bool """ return var in self._phi_variables def get_phi_subvariables(self, var): """ Get sub-variables that phi variable `var` represents. :param SimVariable var: The variable instance. :return: A set of sub-variables, or an empty set if `var` is not a phi variable. :rtype: set """ if not self.is_phi_variable(var): return set() return self._phi_variables[var] def get_phi_variables(self, block_addr): """ Get a dict of phi variables and their corresponding variables. :param int block_addr: Address of the block. :return: A dict of phi variables of an empty dict if there are no phi variables at the block. :rtype: dict """ if block_addr not in self._phi_variables_by_block: return dict() variables = { } for phi in self._phi_variables_by_block[block_addr]: variables[phi] = self._phi_variables[phi] return variables def input_variables(self, exclude_specials=True): """ Get all variables that have never been written to. :return: A list of variables that are never written to. """ def has_write_access(accesses): return any(acc for acc in accesses if acc.access_type == 'write') def has_read_access(accesses): return any(acc for acc in accesses if acc.access_type == 'read') input_variables = [ ] for variable, accesses in self._variable_accesses.items(): if variable in self._phi_variables: # a phi variable is definitely not an input variable continue if not has_write_access(accesses) and has_read_access(accesses): if not exclude_specials or not variable.category: input_variables.append(variable) return input_variables def assign_variable_names(self, labels=None): """ Assign default names to all SSA variables. :param labels: Known labels in the binary. :return: None """ for var in self._variables: if isinstance(var, SimStackVariable): if var.name is not None: continue if var.ident.startswith('iarg'): var.name = 'arg_%x' % var.offset else: var.name = 's_%x' % (-var.offset) # var.name = var.ident elif isinstance(var, SimRegisterVariable): if var.name is not None: continue var.name = var.ident elif isinstance(var, SimMemoryVariable): if var.name is not None: continue if labels is not None and var.addr in labels: var.name = labels[var.addr] if "@@" in var.name: var.name = var.name[:var.name.index("@@")] elif var.ident is not None: var.name = var.ident else: var.name = "g_%x" % var.addr def assign_unified_variable_names(self, labels=None, reset:bool=False): """ Assign default names to all unified variables. :param labels: Known labels in the binary. :param reset: Reset all variable names or not. :return: None """ if not self._unified_variables: return sorted_stack_variables = [ ] sorted_reg_variables = [ ] arg_vars = [ ] for var in self._unified_variables: if isinstance(var, SimStackVariable): if not reset and var.name is not None: continue if var.ident and var.ident.startswith('iarg_'): arg_vars.append(var) else: sorted_stack_variables.append(var) elif isinstance(var, SimRegisterVariable): if not reset and var.name is not None: continue if var.ident and var.ident.startswith('arg_'): arg_vars.append(var) else: sorted_reg_variables.append(var) elif isinstance(var, SimMemoryVariable): if not reset and var.name is not None: continue # assign names directly if labels is not None and var.addr in labels: var.name = labels[var.addr] if "@@" in var.name: var.name = var.name[:var.name.index("@@")] elif var.ident: var.name = var.ident else: var.name = "g_%x" % var.addr # rename variables in a fixed order var_ctr = count(0) sorted_stack_variables = sorted(sorted_stack_variables, key=lambda v: v.offset) sorted_reg_variables = sorted(sorted_reg_variables, key=lambda v: v.reg) for var in chain(sorted_stack_variables, sorted_reg_variables): if isinstance(var, SimStackVariable): var.name = 'v%d' % next(var_ctr) elif isinstance(var, SimRegisterVariable): var.name = "v%d" % next(var_ctr) # clear the hash cache var._hash = None # rename arguments but keeping the original order arg_ctr = count(0) arg_vars = sorted(arg_vars, key=lambda v: int(v.ident[v.ident.index("_")+1:]) if v.ident else 0) for var in arg_vars: var.name = "a%d" % next(arg_ctr) var._hash = None def get_variable_type(self, var): return self.types.get(var, None) def remove_types(self): self.types.clear() def unify_variables(self) -> None: """ Map SSA variables to a unified variable. Fill in self._unified_variables. """ stack_vars: Dict[int,List[SimStackVariable]] = defaultdict(list) reg_vars: Set[SimRegisterVariable] = set() # unify stack variables based on their locations for v in self.get_variables(): if isinstance(v, SimStackVariable): stack_vars[v.offset].append(v) elif isinstance(v, SimRegisterVariable): reg_vars.add(v) for _, vs in stack_vars.items(): unified = vs[0].copy() for v in vs: self.set_unified_variable(v, unified) # unify register variables based on phi nodes graph = networkx.Graph() for v, subvs in self._phi_variables.items(): if not isinstance(v, SimRegisterVariable): continue if not self.get_variable_accesses(v): # this phi node has never been used - discard it continue for subv in subvs: graph.add_edge(v, subv) for nodes in networkx.connected_components(graph): if len(nodes) <= 1: continue nodes = list(nodes) unified = nodes[0].copy() for v in nodes: self.set_unified_variable(v, unified) for v in nodes: reg_vars.discard(v) for v in reg_vars: self.set_unified_variable(v, v) def set_unified_variable(self, variable: SimVariable, unified: SimVariable) -> None: """ Set the unified variable for a given SSA variable. :param variable: The SSA variable. :param unified: The unified variable. :return: None """ old_unified = self._variables_to_unified_variables.get(variable, None) if old_unified is not None and old_unified is not unified: self._unified_variables.discard(old_unified) if old_unified.renamed and not unified.renamed: unified.renamed = True unified.name = old_unified.name self._unified_variables.add(unified) self._variables_to_unified_variables[variable] = unified def unified_variable(self, variable: SimVariable) -> Optional[SimVariable]: """ Return the unified variable for a given SSA variable, :param variable: The SSA variable. :return: The unified variable, or None if there is no such SSA variable. """ return self._variables_to_unified_variables.get(variable, None)
class VariableManagerInternal(object): """ Manage variables for a function. It is meant to be used internally by VariableManager. """ def __init__(self, manager, func_addr=None): self.manager = manager self.func_addr = func_addr self._variables = OrderedSet() # all variables that are added to any region self._stack_region = KeyedRegion() self._register_region = KeyedRegion() self._live_variables = { } # a mapping between addresses of program points and live variable collections self._variable_accesses = defaultdict(set) self._insn_to_variable = defaultdict(set) self._block_to_variable = defaultdict(set) self._stmt_to_variable = defaultdict(set) self._variable_counters = { 'register': count(), 'stack': count(), 'argument': count(), 'phi': count(), } # # Public methods # def next_variable_ident(self, sort): if sort not in self._variable_counters: raise ValueError('Unsupported variable sort %s' % sort) if sort == 'register': prefix = "r" elif sort == 'stack': prefix = "s" elif sort == 'argument': prefix = 'arg' else: prefix = "m" return "i%s_%d" % (prefix, next(self._variable_counters[sort])) def add_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.add_variable(start, variable) elif sort == 'register': self._register_region.add_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def set_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.set_variable(start, variable) elif sort == 'register': self._register_region.set_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def write_to(self, variable, offset, location, overwrite=False): self._record_variable_access('write', variable, offset, location, overwrite=overwrite) def read_from(self, variable, offset, location, overwrite=False): self._record_variable_access('read', variable, offset, location, overwrite=overwrite) def reference_at(self, variable, offset, location, overwrite=False): self._record_variable_access('reference', variable, offset, location, overwrite=overwrite) def _record_variable_access(self, sort, variable, offset, location, overwrite=False): self._variables.add(variable) if overwrite: self._variable_accesses[variable] = {VariableAccess(variable, sort, location)} self._insn_to_variable[location.ins_addr] = {(variable, offset)} self._block_to_variable[location.block_addr] = {(variable, offset)} self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {(variable, offset)} else: self._variable_accesses[variable].add(VariableAccess(variable, sort, location)) self._insn_to_variable[location.ins_addr].add((variable, offset)) self._block_to_variable[location.block_addr].add((variable, offset)) self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add((variable, offset)) def make_phi_node(self, *variables): # unpack phi nodes existing_phi = [ ] unpacked = set() for var in variables: if isinstance(var, (SimRegisterVariablePhi, SimStackVariablePhi, SimMemoryVariablePhi)): unpacked |= var.variables existing_phi.append(var) else: unpacked.add(var) # optimization: if a phi node already contains all of the unpacked variables, just return that phi node for phi_node in existing_phi: if phi_node.variables.issuperset(unpacked): return phi_node variables = unpacked repre = next(iter(variables)) repre_type = type(repre) if repre_type is SimRegisterVariable: cls = SimRegisterVariablePhi ident_sort = 'register' elif repre_type is SimMemoryVariable: cls = SimMemoryVariablePhi ident_sort = 'memory' elif repre_type is SimStackVariable: cls = SimStackVariablePhi ident_sort = 'stack' else: raise TypeError('make_phi_node(): Unsupported variable type "%s".' % type(repre)) a = cls(ident=self.next_variable_ident(ident_sort), region=self.func_addr, variables=variables, ) return a def set_live_variables(self, addr, register_region, stack_region): lv = LiveVariables(register_region, stack_region) self._live_variables[addr] = lv def find_variables_by_insn(self, ins_addr, sort): if ins_addr not in self._insn_to_variable: return None if sort == VariableType.MEMORY or sort == 'memory': vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, (SimStackVariable, SimMemoryVariable))] elif sort == VariableType.REGISTER or sort == 'register': vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, SimRegisterVariable)] else: l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort) return [ ] return vars_and_offset def find_variable_by_stmt(self, block_addr, stmt_idx, sort): return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None) def find_variables_by_stmt(self, block_addr, stmt_idx, sort): key = block_addr, stmt_idx if key not in self._stmt_to_variable: return [ ] variables = self._stmt_to_variable[key] if not variables: return [ ] if sort == 'memory': var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, (SimStackVariable, SimMemoryVariable))) elif sort == 'register': var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, SimRegisterVariable)) else: l.error('find_variables_by_stmt(): Unsupported variable sort "%s".', sort) return [ ] return var_and_offsets def get_variable_accesses(self, variable, same_name=False): if not same_name: if variable in self._variable_accesses: return self._variable_accesses[variable] return [ ] # find all variables with the same variable name vars_list = [ ] for var in self._variable_accesses.keys(): if variable.name == var.name: vars_list.append(var) accesses = [ ] for var in vars_list: accesses.extend(self.get_variable_accesses(var)) return accesses def get_variables(self, sort=None, collapse_same_ident=False): """ Get a list of variables. :param str or None sort: Sort of the variable to get. :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not. :return: A list of variables. :rtype: list """ variables = [ ] if collapse_same_ident: raise NotImplementedError() for var in self._variables: if sort == 'stack' and not isinstance(var, SimStackVariable): continue if sort == 'reg' and not isinstance(var, SimRegisterVariable): continue variables.append(var) return variables def input_variables(self, exclude_specials=True): """ Get all variables that have never been written to. :return: A list of variables that are never written to. """ def has_write_access(accesses): return any(acc for acc in accesses if acc.access_type == 'write') def has_read_access(accesses): return any(acc for acc in accesses if acc.access_type == 'read') input_variables = [ ] for variable, accesses in self._variable_accesses.items(): if not has_write_access(accesses) and has_read_access(accesses): if not exclude_specials or not variable.category: input_variables.append(variable) return input_variables def assign_variable_names(self): """ Assign default names to all variables. :return: None """ for var in self._variables: if isinstance(var, SimStackVariable): if var.name is not None: continue if var.ident.startswith('iarg'): var.name = 'arg_%x' % var.offset else: var.name = 's_%x' % (-var.offset) # var.name = var.ident elif isinstance(var, SimRegisterVariable): if var.name is not None: continue var.name = var.ident
class VariableManagerInternal: """ Manage variables for a function. It is meant to be used internally by VariableManager. """ def __init__(self, manager, func_addr=None): self.manager = manager self.func_addr = func_addr self._variables = OrderedSet( ) # all variables that are added to any region self._global_region = KeyedRegion() self._stack_region = KeyedRegion() self._register_region = KeyedRegion() self._live_variables = { } # a mapping between addresses of program points and live variable collections self._variable_accesses = defaultdict(set) self._insn_to_variable = defaultdict(set) self._block_to_variable = defaultdict(set) self._stmt_to_variable = defaultdict(set) self._atom_to_variable = defaultdict(_defaultdict_set) self._variable_counters = { 'register': count(), 'stack': count(), 'argument': count(), 'phi': count(), 'global': count(), } self._phi_variables = {} self._phi_variables_by_block = defaultdict(set) self.types = {} # # Public methods # def next_variable_ident(self, sort): if sort not in self._variable_counters: raise ValueError('Unsupported variable sort %s' % sort) if sort == 'register': prefix = "r" elif sort == 'stack': prefix = "s" elif sort == 'argument': prefix = 'arg' elif sort == 'global': prefix = 'g' else: prefix = "m" ident = "i%s_%d" % (prefix, next(self._variable_counters[sort])) return ident def add_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.add_variable(start, variable) elif sort == 'register': self._register_region.add_variable(start, variable) elif sort == 'global': self._global_region.add_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def set_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.set_variable(start, variable) elif sort == 'register': self._register_region.set_variable(start, variable) elif sort == 'global': self._global_region.set_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def write_to(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('write', variable, offset, location, overwrite=overwrite, atom=atom) def read_from(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('read', variable, offset, location, overwrite=overwrite, atom=atom) def reference_at(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('reference', variable, offset, location, overwrite=overwrite, atom=atom) def _record_variable_access(self, sort, variable, offset, location, overwrite=False, atom=None): self._variables.add(variable) var_and_offset = variable, offset if overwrite: self._variable_accesses[variable] = { VariableAccess(variable, sort, location) } self._insn_to_variable[location.ins_addr] = {var_and_offset} self._block_to_variable[location.block_addr] = {var_and_offset} self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {var_and_offset} if atom is not None: self._atom_to_variable[( location.block_addr, location.stmt_idx)][atom] = var_and_offset else: self._variable_accesses[variable].add( VariableAccess(variable, sort, location)) self._insn_to_variable[location.ins_addr].add(var_and_offset) self._block_to_variable[location.block_addr].add(var_and_offset) self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add(var_and_offset) if atom is not None: self._atom_to_variable[( location.block_addr, location.stmt_idx)][atom].add(var_and_offset) def make_phi_node(self, block_addr, *variables): """ Create a phi variable for variables at block `block_addr`. :param int block_addr: The address of the current block. :param variables: Variables that the phi variable represents. :return: The created phi variable. """ existing_phis = set() non_phis = set() for var in variables: if self.is_phi_variable(var): existing_phis.add(var) else: non_phis.add(var) if len(existing_phis) == 1: existing_phi = next(iter(existing_phis)) if non_phis.issubset(self.get_phi_subvariables(existing_phi)): return existing_phi else: # Update phi variables self._phi_variables[existing_phi] |= non_phis return existing_phi repre = next(iter(variables)) repre_type = type(repre) if repre_type is SimRegisterVariable: ident_sort = 'register' a = SimRegisterVariable(repre.reg, repre.size, ident=self.next_variable_ident(ident_sort)) elif repre_type is SimMemoryVariable: ident_sort = 'memory' a = SimMemoryVariable(repre.addr, repre.size, ident=self.next_variable_ident(ident_sort)) elif repre_type is SimStackVariable: ident_sort = 'stack' a = SimStackVariable(repre.offset, repre.size, ident=self.next_variable_ident(ident_sort)) else: raise TypeError( 'make_phi_node(): Unsupported variable type "%s".' % type(repre)) # Keep a record of all phi variables self._phi_variables[a] = set(variables) self._phi_variables_by_block[block_addr].add(a) return a def set_live_variables(self, addr, register_region, stack_region): lv = LiveVariables(register_region, stack_region) self._live_variables[addr] = lv def find_variables_by_insn(self, ins_addr, sort): if ins_addr not in self._insn_to_variable: return None if sort in (VariableType.MEMORY, 'memory'): vars_and_offset = [ (var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, (SimStackVariable, SimMemoryVariable)) ] elif sort in (VariableType.REGISTER, 'register'): vars_and_offset = [ (var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, SimRegisterVariable) ] else: l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort) return [] return vars_and_offset def find_variable_by_stmt(self, block_addr, stmt_idx, sort): return next( iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None) def find_variables_by_stmt(self, block_addr: int, stmt_idx: int, sort: str) -> List[Tuple[SimVariable, int]]: key = block_addr, stmt_idx if key not in self._stmt_to_variable: return [] variables = self._stmt_to_variable[key] if not variables: return [] if sort == 'memory': var_and_offsets = list( (var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, (SimStackVariable, SimMemoryVariable))) elif sort == 'register': var_and_offsets = list( (var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, SimRegisterVariable)) else: l.error( 'find_variables_by_stmt(): Unsupported variable sort "%s".', sort) return [] return var_and_offsets def find_variable_by_atom(self, block_addr, stmt_idx, atom): return next( iter(self.find_variables_by_atom(block_addr, stmt_idx, atom)), None) def find_variables_by_atom(self, block_addr, stmt_idx, atom) -> Set[Tuple[SimVariable, int]]: key = block_addr, stmt_idx if key not in self._atom_to_variable: return set() if atom not in self._atom_to_variable[key]: return set() return self._atom_to_variable[key][atom] def get_variable_accesses(self, variable: SimVariable, same_name: bool = False) -> List[VariableAccess]: if not same_name: if variable in self._variable_accesses: return list(self._variable_accesses[variable]) return [] # find all variables with the same variable name vars_list = [] for var in self._variable_accesses.keys(): if variable.name == var.name: vars_list.append(var) accesses = [] for var in vars_list: accesses.extend(self.get_variable_accesses(var)) return accesses def get_variables( self, sort=None, collapse_same_ident=False ) -> List[Union[SimStackVariable, SimRegisterVariable]]: """ Get a list of variables. :param str or None sort: Sort of the variable to get. :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not. :return: A list of variables. :rtype: list """ variables = [] if collapse_same_ident: raise NotImplementedError() for var in self._variables: if sort == 'stack' and not isinstance(var, SimStackVariable): continue if sort == 'reg' and not isinstance(var, SimRegisterVariable): continue variables.append(var) return variables def get_global_variables(self, addr): """ Get global variable by the address of the variable. :param int addr: Address of the variable. :return: A set of variables or an empty set if no variable exists. """ return self._global_region.get_variables_by_offset(addr) def is_phi_variable(self, var): """ Test if `var` is a phi variable. :param SimVariable var: The variable instance. :return: True if `var` is a phi variable, False otherwise. :rtype: bool """ return var in self._phi_variables def get_phi_subvariables(self, var): """ Get sub-variables that phi variable `var` represents. :param SimVariable var: The variable instance. :return: A set of sub-variables, or an empty set if `var` is not a phi variable. :rtype: set """ if not self.is_phi_variable(var): return set() return self._phi_variables[var] def get_phi_variables(self, block_addr): """ Get a dict of phi variables and their corresponding variables. :param int block_addr: Address of the block. :return: A dict of phi variables of an empty dict if there are no phi variables at the block. :rtype: dict """ if block_addr not in self._phi_variables_by_block: return dict() variables = {} for phi in self._phi_variables_by_block[block_addr]: variables[phi] = self._phi_variables[phi] return variables def input_variables(self, exclude_specials=True): """ Get all variables that have never been written to. :return: A list of variables that are never written to. """ def has_write_access(accesses): return any(acc for acc in accesses if acc.access_type == 'write') def has_read_access(accesses): return any(acc for acc in accesses if acc.access_type == 'read') input_variables = [] for variable, accesses in self._variable_accesses.items(): if variable in self._phi_variables: # a phi variable is definitely not an input variable continue if not has_write_access(accesses) and has_read_access(accesses): if not exclude_specials or not variable.category: input_variables.append(variable) return input_variables def assign_variable_names(self, labels=None): """ Assign default names to all variables. :return: None """ for var in self._variables: if isinstance(var, SimStackVariable): if var.name is not None: continue if var.ident.startswith('iarg'): var.name = 'arg_%x' % var.offset else: var.name = 's_%x' % (-var.offset) # var.name = var.ident elif isinstance(var, SimRegisterVariable): if var.name is not None: continue var.name = var.ident elif isinstance(var, SimMemoryVariable): if var.name is not None: continue if labels is not None and var.addr in labels: var.name = labels[var.addr] if "@@" in var.name: var.name = var.name[:var.name.index("@@")] else: var.name = var.ident def get_variable_type(self, var): return self.types.get(var, None) def remove_types(self): self.types.clear()
class GraphVisitor: """ A graph visitor takes a node in the graph and returns its successors. Typically it visits a control flow graph, and returns successors of a CFGNode each time. This is the base class of all graph visitors. """ def __init__(self): self._sorted_nodes = OrderedSet() self._node_to_index = {} self._reached_fixedpoint = set() # # Interfaces # def startpoints(self): """ Get all start points to begin the traversal. :return: A list of startpoints that the traversal should begin with. """ raise NotImplementedError() def successors(self, node): """ Get successors of a node. The node should be in the graph. :param node: The node to work with. :return: A list of successors. :rtype: list """ raise NotImplementedError() def predecessors(self, node): """ Get predecessors of a node. The node should be in the graph. :param node: The node to work with. :return: A list of predecessors. :rtype: list """ raise NotImplementedError() def sort_nodes(self, nodes=None): """ Get a list of all nodes sorted in an optimal traversal order. :param iterable nodes: A collection of nodes to sort. If none, all nodes in the graph will be used to sort. :return: A list of sorted nodes. :rtype: list """ raise NotImplementedError() # # Public methods # def nodes(self): """ Return an iterator of nodes following an optimal traversal order. :return: """ return iter(self.sort_nodes()) @deprecated(replacement='nodes') def nodes_iter(self): """ (Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future. """ return self.nodes() # Traversal def reset(self): """ Reset the internal node traversal state. Must be called prior to visiting future nodes. :return: None """ self._sorted_nodes.clear() self._node_to_index.clear() self._reached_fixedpoint.clear() for i, n in enumerate(self.sort_nodes()): self._node_to_index[n] = i self._sorted_nodes.add(n) def next_node(self): """ Get the next node to visit. :return: A node in the graph. """ if not self._sorted_nodes: return None return self._sorted_nodes.pop(last=False) def all_successors(self, node, skip_reached_fixedpoint=False): """ Returns all successors to the specific node. :param node: A node in the graph. :return: A set of nodes that are all successors to the given node. :rtype: set """ successors = set() stack = [node] while stack: n = stack.pop() successors.add(n) stack.extend(succ for succ in self.successors(n) if succ not in successors and ( not skip_reached_fixedpoint or succ not in self._reached_fixedpoint)) return successors def revisit_successors(self, node, include_self=True): """ Revisit a node in the future. As a result, the successors to this node will be revisited as well. :param node: The node to revisit in the future. :return: None """ successors = self.successors(node) #, skip_reached_fixedpoint=True) if include_self: self._sorted_nodes.add(node) for succ in successors: self._sorted_nodes.add(succ) self._sorted_nodes = OrderedSet( sorted(self._sorted_nodes, key=lambda n: self._node_to_index[n])) def revisit_node(self, node): """ Revisit a node in the future. Do not include its successors immediately. :param node: The node to revisit in the future. :return: None """ self._sorted_nodes.add(node) self._sorted_nodes = OrderedSet( sorted(self._sorted_nodes, key=lambda n: self._node_to_index[n])) def reached_fixedpoint(self, node): """ Mark a node as reached fixed-point. This node as well as all its successors will not be visited in the future. :param node: The node to mark as reached fixed-point. :return: None """ self._reached_fixedpoint.add(node)
class VariableManagerInternal: """ Manage variables for a function. It is meant to be used internally by VariableManager. """ def __init__(self, manager, func_addr=None): self.manager = manager self.func_addr = func_addr self._variables = OrderedSet() # all variables that are added to any region self._stack_region = KeyedRegion() self._register_region = KeyedRegion() self._live_variables = { } # a mapping between addresses of program points and live variable collections self._variable_accesses = defaultdict(set) self._insn_to_variable = defaultdict(set) self._block_to_variable = defaultdict(set) self._stmt_to_variable = defaultdict(set) self._atom_to_variable = defaultdict(_defaultdict_set) self._variable_counters = { 'register': count(), 'stack': count(), 'argument': count(), 'phi': count(), } self._phi_variables = { } self._phi_variables_by_block = defaultdict(set) # # Public methods # def next_variable_ident(self, sort): if sort not in self._variable_counters: raise ValueError('Unsupported variable sort %s' % sort) if sort == 'register': prefix = "r" elif sort == 'stack': prefix = "s" elif sort == 'argument': prefix = 'arg' else: prefix = "m" ident = "i%s_%d" % (prefix, next(self._variable_counters[sort])) return ident def add_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.add_variable(start, variable) elif sort == 'register': self._register_region.add_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def set_variable(self, sort, start, variable): if sort == 'stack': self._stack_region.set_variable(start, variable) elif sort == 'register': self._register_region.set_variable(start, variable) else: raise ValueError('Unsupported sort %s in add_variable().' % sort) def write_to(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('write', variable, offset, location, overwrite=overwrite, atom=atom) def read_from(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('read', variable, offset, location, overwrite=overwrite, atom=atom) def reference_at(self, variable, offset, location, overwrite=False, atom=None): self._record_variable_access('reference', variable, offset, location, overwrite=overwrite, atom=atom) def _record_variable_access(self, sort, variable, offset, location, overwrite=False, atom=None): self._variables.add(variable) var_and_offset = variable, offset if overwrite: self._variable_accesses[variable] = {VariableAccess(variable, sort, location)} self._insn_to_variable[location.ins_addr] = {var_and_offset} self._block_to_variable[location.block_addr] = {var_and_offset} self._stmt_to_variable[(location.block_addr, location.stmt_idx)] = {var_and_offset} if atom is not None: self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom] = var_and_offset else: self._variable_accesses[variable].add(VariableAccess(variable, sort, location)) self._insn_to_variable[location.ins_addr].add(var_and_offset) self._block_to_variable[location.block_addr].add(var_and_offset) self._stmt_to_variable[(location.block_addr, location.stmt_idx)].add(var_and_offset) if atom is not None: self._atom_to_variable[(location.block_addr, location.stmt_idx)][atom].add(var_and_offset) def make_phi_node(self, block_addr, *variables): """ Create a phi variable for variables at block `block_addr`. :param int block_addr: The address of the current block. :param variables: Variables that the phi variable represents. :return: The created phi variable. """ existing_phis = set() non_phis = set() for var in variables: if self.is_phi_variable(var): existing_phis.add(var) else: non_phis.add(var) if len(existing_phis) == 1: existing_phi = next(iter(existing_phis)) if non_phis.issubset(self.get_phi_subvariables(existing_phi)): return existing_phi else: # Update phi variables self._phi_variables[existing_phi] |= non_phis return existing_phi repre = next(iter(variables)) repre_type = type(repre) if repre_type is SimRegisterVariable: ident_sort = 'register' a = SimRegisterVariable(repre.reg, repre.size, ident=self.next_variable_ident(ident_sort)) elif repre_type is SimMemoryVariable: ident_sort = 'memory' a = SimMemoryVariable(repre.addr, repre.size, ident=self.next_variable_ident(ident_sort)) elif repre_type is SimStackVariable: ident_sort = 'stack' a = SimStackVariable(repre.offset, repre.size, ident=self.next_variable_ident(ident_sort)) else: raise TypeError('make_phi_node(): Unsupported variable type "%s".' % type(repre)) # Keep a record of all phi variables self._phi_variables[a] = set(variables) self._phi_variables_by_block[block_addr].add(a) return a def set_live_variables(self, addr, register_region, stack_region): lv = LiveVariables(register_region, stack_region) self._live_variables[addr] = lv def find_variables_by_insn(self, ins_addr, sort): if ins_addr not in self._insn_to_variable: return None if sort in (VariableType.MEMORY, 'memory'): vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, (SimStackVariable, SimMemoryVariable))] elif sort in (VariableType.REGISTER, 'register'): vars_and_offset = [(var, offset) for var, offset in self._insn_to_variable[ins_addr] if isinstance(var, SimRegisterVariable)] else: l.error('find_variable_by_insn(): Unsupported variable sort "%s".', sort) return [ ] return vars_and_offset def find_variable_by_stmt(self, block_addr, stmt_idx, sort): return next(iter(self.find_variables_by_stmt(block_addr, stmt_idx, sort)), None) def find_variables_by_stmt(self, block_addr, stmt_idx, sort): key = block_addr, stmt_idx if key not in self._stmt_to_variable: return [ ] variables = self._stmt_to_variable[key] if not variables: return [ ] if sort == 'memory': var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, (SimStackVariable, SimMemoryVariable))) elif sort == 'register': var_and_offsets = list((var, offset) for var, offset in self._stmt_to_variable[key] if isinstance(var, SimRegisterVariable)) else: l.error('find_variables_by_stmt(): Unsupported variable sort "%s".', sort) return [ ] return var_and_offsets def find_variable_by_atom(self, block_addr, stmt_idx, atom): return next(iter(self.find_variables_by_atom(block_addr, stmt_idx, atom)), None) def find_variables_by_atom(self, block_addr, stmt_idx, atom): key = block_addr, stmt_idx if key not in self._atom_to_variable: return [ ] if atom not in self._atom_to_variable[key]: return [ ] return self._atom_to_variable[key][atom] def get_variable_accesses(self, variable, same_name=False): if not same_name: if variable in self._variable_accesses: return self._variable_accesses[variable] return [ ] # find all variables with the same variable name vars_list = [ ] for var in self._variable_accesses.keys(): if variable.name == var.name: vars_list.append(var) accesses = [ ] for var in vars_list: accesses.extend(self.get_variable_accesses(var)) return accesses def get_variables(self, sort=None, collapse_same_ident=False): """ Get a list of variables. :param str or None sort: Sort of the variable to get. :param collapse_same_ident: Whether variables of the same identifier should be collapsed or not. :return: A list of variables. :rtype: list """ variables = [ ] if collapse_same_ident: raise NotImplementedError() for var in self._variables: if sort == 'stack' and not isinstance(var, SimStackVariable): continue if sort == 'reg' and not isinstance(var, SimRegisterVariable): continue variables.append(var) return variables def is_phi_variable(self, var): """ Test if `var` is a phi variable. :param SimVariable var: The variable instance. :return: True if `var` is a phi variable, False otherwise. :rtype: bool """ return var in self._phi_variables def get_phi_subvariables(self, var): """ Get sub-variables that phi variable `var` represents. :param SimVariable var: The variable instance. :return: A set of sub-variables, or an empty set if `var` is not a phi variable. :rtype: set """ if not self.is_phi_variable(var): return set() return self._phi_variables[var] def get_phi_variables(self, block_addr): """ Get a dict of phi variables and their corresponding variables. :param int block_addr: Address of the block. :return: A dict of phi variables of an empty dict if there are no phi variables at the block. :rtype: dict """ if block_addr not in self._phi_variables_by_block: return dict() variables = { } for phi in self._phi_variables_by_block[block_addr]: variables[phi] = self._phi_variables[phi] return variables def input_variables(self, exclude_specials=True): """ Get all variables that have never been written to. :return: A list of variables that are never written to. """ def has_write_access(accesses): return any(acc for acc in accesses if acc.access_type == 'write') def has_read_access(accesses): return any(acc for acc in accesses if acc.access_type == 'read') input_variables = [ ] for variable, accesses in self._variable_accesses.items(): if not has_write_access(accesses) and has_read_access(accesses): if not exclude_specials or not variable.category: input_variables.append(variable) return input_variables def assign_variable_names(self): """ Assign default names to all variables. :return: None """ for var in self._variables: if isinstance(var, SimStackVariable): if var.name is not None: continue if var.ident.startswith('iarg'): var.name = 'arg_%x' % var.offset else: var.name = 's_%x' % (-var.offset) # var.name = var.ident elif isinstance(var, SimRegisterVariable): if var.name is not None: continue var.name = var.ident
class GraphVisitor(object): """ A graph visitor takes a node in the graph and returns its successors. Typically it visits a control flow graph, and returns successors of a CFGNode each time. This is the base class of all graph visitors. """ def __init__(self): self._sorted_nodes = OrderedSet() self._node_to_index = { } self._reached_fixedpoint = set() # # Interfaces # def startpoints(self): """ Get all start points to begin the traversal. :return: A list of startpoints that the traversal should begin with. """ raise NotImplementedError() def successors(self, node): """ Get successors of a node. The node should be in the graph. :param node: The node to work with. :return: A list of successors. :rtype: list """ raise NotImplementedError() def predecessors(self, node): """ Get predecessors of a node. The node should be in the graph. :param node: The node to work with. :return: A list of predecessors. :rtype: list """ raise NotImplementedError() def sort_nodes(self, nodes=None): """ Get a list of all nodes sorted in an optimal traversal order. :param iterable nodes: A collection of nodes to sort. If none, all nodes in the graph will be used to sort. :return: A list of sorted nodes. :rtype: list """ raise NotImplementedError() # # Public methods # def nodes(self): """ Return an iterator of nodes following an optimal traversal order. :return: """ sorted_nodes = self.sort_nodes() return iter(sorted_nodes) @deprecated(replacement='nodes') def nodes_iter(self): """ (Deprecated) Return an iterator of nodes following an optimal traversal order. Will be removed in the future. """ return self.nodes() # Traversal def reset(self): """ Reset the internal node traversal state. Must be called prior to visiting future nodes. :return: None """ self._sorted_nodes.clear() self._node_to_index.clear() self._reached_fixedpoint.clear() for i, n in enumerate(self.sort_nodes()): self._node_to_index[n] = i self._sorted_nodes.add(n) def next_node(self): """ Get the next node to visit. :return: A node in the graph. """ if not self._sorted_nodes: return None return self._sorted_nodes.pop(last=False) def all_successors(self, node, skip_reached_fixedpoint=False): """ Returns all successors to the specific node. :param node: A node in the graph. :return: A set of nodes that are all successors to the given node. :rtype: set """ successors = set() stack = [ node ] while stack: n = stack.pop() successors.add(n) stack.extend(succ for succ in self.successors(n) if succ not in successors and (not skip_reached_fixedpoint or succ not in self._reached_fixedpoint) ) return successors def revisit(self, node, include_self=True): """ Revisit a node in the future. As a result, the successors to this node will be revisited as well. :param node: The node to revisit in the future. :return: None """ successors = self.successors(node) #, skip_reached_fixedpoint=True) if include_self: self._sorted_nodes.add(node) for succ in successors: self._sorted_nodes.add(succ) # reorder it self._sorted_nodes = OrderedSet(sorted(self._sorted_nodes, key=lambda n: self._node_to_index[n])) def reached_fixedpoint(self, node): """ Mark a node as reached fixed-point. This node as well as all its successors will not be visited in the future. :param node: The node to mark as reached fixed-point. :return: None """ self._reached_fixedpoint.add(node)