def optimize(self, expr: BinaryOp): # (expr << N) >> N ==> Convert((M-N)->M, Convert(M->(M-N), expr)) if expr.op in ("Shr", "Sar") and isinstance(expr.operands[1], Const): expr_a = expr.operands[0] n0 = expr.operands[1].value if isinstance(expr_a, BinaryOp) and expr_a.op == "Shl" and isinstance(expr_a.operands[1], Const): n1 = expr_a.operands[1].value if n0 == n1: inner_expr = expr_a.operands[0] conv_inner_expr = Convert( None, expr_a.bits, expr_a.bits - n0, expr.op == "Sar", # is_signed inner_expr, **expr.tags, ) conv_expr = Convert( None, expr_a.bits - n0, expr.bits, False, conv_inner_expr, **expr.tags, ) return conv_expr return None
def optimize(self, expr: BinaryOp): if isinstance(expr.operands[0], Convert): if (expr.operands[0].to_bits == 32 # converting to an int and isinstance(expr.operands[1], Const) ): if expr.op == "And": if expr.operands[0].from_bits == 16 and expr.operands[1].value <= 0xffff: con = Const(None, None, expr.operands[1].value, 16, **expr.operands[1].tags) new_expr = BinaryOp(expr.idx, "And", (expr.operands[0].operand, con), expr.signed, bits=16, **expr.tags) return Convert(expr.operands[0].idx, 16, 32, expr.operands[0].is_signed, new_expr, **expr.operands[0].tags) elif expr.operands[0].from_bits == 8 and expr.operands[1].value <= 0xff: con = Const(None, None, expr.operands[1].value, 8, **expr.operands[1].tags) new_expr = BinaryOp(expr.idx, "And", (expr.operands[0].operand, con), expr.signed, bits=8, **expr.tags) return Convert(expr.operands[0].idx, 8, 32, expr.operands[0].is_signed, new_expr, **expr.operands[0].tags) elif expr.op in {"CmpEQ", "CmpNE", "CmpGT", "CmpGE", "CmpGTs", "CmpGEs", "CmpLT", "CmpLE", "CmpLTs", "CmpLEs"}: if expr.operands[0].from_bits == 16 and expr.operands[1].value <= 0xffff: con = Const(None, None, expr.operands[1].value, 16, **expr.operands[1].tags) new_expr = BinaryOp(expr.idx, expr.op, (expr.operands[0].operand, con), expr.signed, bits=16, **expr.tags) return new_expr elif expr.operands[0].from_bits == 8 and expr.operands[1].value <= 0xff: con = Const(None, None, expr.operands[1].value, 8, **expr.operands[1].tags) new_expr = BinaryOp(expr.idx, expr.op, (expr.operands[0].operand, con), expr.signed, bits=8, **expr.tags) return new_expr elif (isinstance(expr.operands[1], Convert) and expr.operands[1].to_bits == expr.operands[0].to_bits and expr.operands[1].from_bits == expr.operands[0].from_bits ): if expr.op in {"Add", "Sub"}: op0 = expr.operands[0] op0_inner = expr.operands[0].operand # op1 = expr.operands[1] op1_inner = expr.operands[1].operand new_expr = BinaryOp(expr.idx, expr.op, (op0_inner, op1_inner), expr.signed, bits=op0.from_bits, **expr.tags, ) r = Convert(expr.idx, op0.from_bits, op0.to_bits, op0.is_signed, new_expr, **op0.tags, ) return r return None
def optimize(self, expr: Convert): if isinstance(expr.operand, Convert): inner = expr.operand return Convert(expr.idx, inner.from_bits, expr.to_bits, expr.is_signed, inner.operand, **expr.tags) return None
def _handle_Convert(self, expr_idx: int, expr: Convert, stmt_idx: int, stmt: Statement, block: Optional[Block]): new_operand = self._handle_expr(expr_idx, expr.operand, stmt_idx, stmt, block) if new_operand is not None and new_operand is not expr.operand: return Convert(expr.idx, expr.from_bits, expr.to_bits, expr.is_signed, new_operand, **expr.tags) return None
def optimize(self, expr: Convert): # Conv(M->1, ((expr) >> N) & 1) => expr < 0 # Conv(M->1, ((expr - 0) >> N) & 1) => expr < 0 if expr.to_bits == 1: if isinstance(expr.operand, BinaryOp) and expr.operand.op == "And" \ and isinstance(expr.operand.operands[1], Const) \ and expr.operand.operands[1].value == 1: # taking a single bit inner_expr = expr.operand.operands[0] if isinstance(inner_expr, BinaryOp) and inner_expr.op == "Shr" \ and isinstance(inner_expr.operands[1], Const): # right-shifting with a constant shr_amount = inner_expr.operands[1].value if shr_amount == 7: # int8_t to_bits = 8 elif shr_amount == 15: # int16_t to_bits = 16 elif shr_amount == 31: # int32_t to_bits = 32 elif shr_amount == 63: # int64_t to_bits = 64 else: # unsupported return None real_expr = inner_expr.operands[0] if isinstance(real_expr, BinaryOp) and real_expr.op == "Sub" \ and isinstance(real_expr.operands[1], Const) \ and real_expr.operands[1].value == 0: real_expr = real_expr.operands[0] cvt = Convert(expr.idx, real_expr.bits, to_bits, False, real_expr, **expr.tags) cmp = BinaryOp( None, "CmpLT", ( cvt, Const(None, None, 0, to_bits), ), True, **expr.tags, ) return cmp return None
def optimize(self, expr: BinaryOp): # Sub(1, Conv(1->N, some bool expression)) ==> Conv(1->N, Not(some bool expression)) if expr.op == "Sub" and isinstance(expr.operands[0], Const) and expr.operands[0].value == 1 \ and isinstance(expr.operands[1], Convert) and expr.operands[1].from_bits == 1: conv_expr = expr.operands[1] if self.is_bool_expr(conv_expr.operand): new_expr = Convert(None, 1, conv_expr.to_bits, conv_expr.is_signed, UnaryOp(None, 'Not', conv_expr.operand, **conv_expr.operand.tags), **conv_expr.tags, ) return new_expr return None
def optimize(self, expr: BinaryOp): # if expr.op == "And" \ and isinstance(expr.operands[1], Const): mask = expr.operands[1].value to_bits = _MASK_TO_BITS.get(mask, None) if to_bits is None: return None if isinstance(expr.operands[0], Convert): conv: Convert = expr.operands[0] atom = conv.operand if conv.from_bits <= to_bits: # this masking is useless return Convert(None, conv.from_bits, expr.bits, conv.is_signed, atom, **conv.tags) elif isinstance(expr.operands[0], BinaryOp) and expr.operands[0].op in {'Shl', 'Shr', 'Sar'} \ and isinstance(expr.operands[0].operands[0], Convert): binop_expr = expr.operands[0] conv: Convert = expr.operands[0].operands[0] atom = conv.operand if conv.from_bits <= to_bits: # this masking is useless # apply the binary operation atom = BinaryOp(None, binop_expr.op, (atom, binop_expr.operands[1]), binop_expr.signed, variable=binop_expr.variable, variable_offset=binop_expr.variable_offset, **binop_expr.tags) return Convert(None, conv.from_bits, expr.bits, conv.is_signed, atom, **conv.tags) return None
def optimize(self, expr: BinaryOp): # Conv(1->N, some_bool_expr) ^ 1 ==> Conv(1->N, Not(some_bool_expr)) if expr.op == "Xor" and isinstance( expr.operands[1], Const) and expr.operands[1].value == 1: arg0 = expr.operands[0] if isinstance(arg0, Convert) and arg0.from_bits == 1 \ and self.is_bool_expr(arg0.operand): new_expr = Convert( None, 1, arg0.to_bits, arg0.is_signed, UnaryOp(None, 'Not', arg0.operands[0], **expr.tags), **arg0.tags) return new_expr return None
def optimize(self, expr: BinaryOp): # (Conv(M->N, expr) << P) >> Q ==> (Conv(M->N, expr) & bitmask) >> (Q-P), where # Q >= P, and # M < N, and # bitmask = 0b('1' * (N - P)) if expr.op == "Shr" and isinstance(expr.operands[1], Const): q = expr.operands[1].value expr_b = expr.operands[0] if isinstance(expr_b, BinaryOp) and expr_b.op == "Shl" and isinstance( expr_b.operands[1], Const): p = expr_b.operands[1].value expr_a = expr_b.operands[0] if q >= p and isinstance(expr_a, Convert) and not expr_a.is_signed: m = expr_a.from_bits n = expr_a.to_bits if m < n and n >= p: bitmask = (1 << (n - p)) - 1 and_expr = BinaryOp( None, 'And', ( Convert(expr_a.idx, m, n, False, expr_a.operand, **expr_a.tags), Const(None, None, bitmask, n), ), False, variable=None, variable_offset=None, **expr.tags, ) return BinaryOp( None, 'Shr', ( and_expr, Const(None, None, q - p, and_expr.bits), ), False, **expr.tags, ) return None
def _fold_call_exprs(self) -> bool: """ Fold a call expression (statement) into other statements if the return value of the call expression (statement) is only used once, and the use site and the call site belongs to the same supernode. Example:: s1 = func(); s0 = s1; if (s0) ... after folding, it will be transformed to:: s0 = func(); if (s0) ... to avoid cases where func() is called more than once after simplification, another simplification pass will run on the structured graph to further transform it to:: if (func()) ... """ simplified = False prop = self._compute_propagation() if not prop.equivalence: return simplified addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {} for block in self.func_graph.nodes(): addr_and_idx_to_block[(block.addr, block.idx)] = block def_locations_to_remove: Set[CodeLocation] = set() updated_use_locations: Set[CodeLocation] = set() for eq in prop.equivalence: eq: Equivalence # register variable == Call if isinstance(eq.atom0, Register): if isinstance(eq.atom1, Call): # register variable = Call call = eq.atom1 elif isinstance(eq.atom1, Convert) and isinstance( eq.atom1.operand, Call): # register variable = Convert(Call) call = eq.atom1 else: continue if self._is_call_using_temporaries(call): continue if eq.codeloc in updated_use_locations: # this def is now created by an updated use. the corresponding statement will be updated in the end. # we must rerun Propagator to get an updated definition (and Equivalence) continue # find the definition of this register rd = self._compute_reaching_definitions() defs = [ d for d in rd.all_definitions if d.codeloc == eq.codeloc and isinstance(d.atom, atoms.Register) and d.atom.reg_offset == eq.atom0.reg_offset ] if not defs or len(defs) > 1: continue the_def: Definition = defs[0] # find all uses of this definition all_uses: Set[Tuple[CodeLocation, Any]] = set( rd.all_uses.get_uses_with_expr(the_def)) if len(all_uses) != 1: continue u, used_expr = next(iter(all_uses)) if u in def_locations_to_remove: # this use site has been altered by previous folding attempts. the corresponding statement will be # removed in the end. in this case, this Equivalence is probably useless, and we must rerun # Propagator to get an updated Equivalence. continue # check the statement and make sure it's not a conditional jump the_block = addr_and_idx_to_block[(u.block_addr, u.block_idx)] if isinstance(the_block.statements[u.stmt_idx], ConditionalJump): continue # check if the use and the definition is within the same supernode super_node_blocks = self._get_super_node_blocks( addr_and_idx_to_block[(the_def.codeloc.block_addr, the_def.codeloc.block_idx)]) if u.block_addr not in set(b.addr for b in super_node_blocks): continue # replace all uses old_block = addr_and_idx_to_block.get( (u.block_addr, u.block_idx), None) if old_block is None: continue # if there is an updated block, use that the_block = self.blocks.get(old_block, old_block) stmt: Statement = the_block.statements[u.stmt_idx] if isinstance(eq.atom0, Register): src = used_expr dst = call if src.bits != dst.bits: dst = Convert(None, dst.bits, src.bits, False, dst) else: continue replaced, new_block = self._replace_expr_and_update_block( the_block, u.stmt_idx, stmt, the_def, u, src, dst) if replaced: self.blocks[old_block] = new_block # this call has been folded to the use site. we can remove this call. self._calls_to_remove.add(eq.codeloc) simplified = True def_locations_to_remove.add(eq.codeloc) updated_use_locations.add(u) # no need to clear the cache at the end of this method return simplified
def _unify_local_variables(self) -> bool: """ Find variables that are definitely equivalent and then eliminate unnecessary copies. """ simplified = False prop = self._compute_propagation() if not prop.equivalence: return simplified addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {} for block in self.func_graph.nodes(): addr_and_idx_to_block[(block.addr, block.idx)] = block equivalences: Dict[Any, Set[Equivalence]] = defaultdict(set) atom_by_loc = set() for eq in prop.equivalence: equivalences[eq.atom1].add(eq) atom_by_loc.add((eq.codeloc, eq.atom1)) # sort keys to ensure a reproducible result sorted_loc_and_atoms = sorted(atom_by_loc, key=lambda x: x[0]) for _, atom in sorted_loc_and_atoms: eqs = equivalences[atom] if len(eqs) > 1: continue eq = next(iter(eqs)) # Acceptable equivalence classes: # # stack variable == register # register variable == register # stack variable == Conv(register, M->N) # global variable == register # # Equivalence is generally created at assignment sites. Therefore, eq.atom0 is the definition and # eq.atom1 is the use. the_def = None if isinstance(eq.atom0, SimMemoryVariable ): # covers both Stack and Global variables if isinstance(eq.atom1, Register): # stack_var == register or global_var == register to_replace = eq.atom1 to_replace_is_def = False elif isinstance(eq.atom1, Convert) and isinstance( eq.atom1.operand, Register): # stack_var == Conv(register, M->N) to_replace = eq.atom1.operand to_replace_is_def = False else: continue elif isinstance(eq.atom0, Register): if isinstance(eq.atom1, Register): # register == register if self.project.arch.is_artificial_register( eq.atom0.reg_offset, eq.atom0.size): to_replace = eq.atom0 to_replace_is_def = True else: to_replace = eq.atom1 to_replace_is_def = False else: continue else: continue # find the definition of this register rd = self._compute_reaching_definitions() if to_replace_is_def: # find defs defs = [] for def_ in rd.all_definitions: if def_.codeloc == eq.codeloc: if isinstance(to_replace, SimStackVariable): if isinstance(def_.atom, atoms.MemoryLocation) \ and isinstance(def_.atom.addr, atoms.SpOffset): if to_replace.offset == def_.atom.addr.offset: defs.append(def_) elif isinstance(to_replace, Register): if isinstance(def_.atom, atoms.Register) \ and to_replace.reg_offset == def_.atom.reg_offset: defs.append(def_) if len(defs) != 1: continue the_def = defs[0] else: # find uses defs = rd.all_uses.get_uses_by_location(eq.codeloc) if len(defs) != 1: # there are multiple defs for this register - we do not support replacing all of them continue for def_ in defs: def_: Definition if isinstance( def_.atom, atoms.Register ) and def_.atom.reg_offset == to_replace.reg_offset: # found it! the_def = def_ break if the_def is None: continue if isinstance(the_def.codeloc, ExternalCodeLocation): # this is a function argument. we enter a slightly different logic and try to eliminate copies of this # argument if # (a) the on-stack copy of it has never been modified in this function # (b) the function argument register has never been updated. # TODO: we may loosen requirement (b) once we have real register versioning in AIL. defs = [ def_ for def_ in rd.all_definitions if def_.codeloc == eq.codeloc ] all_uses_with_def = None replace_with = None remove_initial_assignment = None if defs and len(defs) == 1: stackvar_def = defs[0] if isinstance(stackvar_def.atom, atoms.MemoryLocation) \ and isinstance(stackvar_def.atom.addr, SpOffset): # found the stack variable # Make sure there is no other write to this location if any((def_ != stackvar_def and def_.atom == stackvar_def.atom) for def_ in rd.all_definitions if isinstance(def_.atom, atoms.MemoryLocation)): continue # Make sure the register is never updated across this function if any((def_ != the_def and def_.atom == the_def.atom) for def_ in rd.all_definitions if isinstance(def_.atom, atoms.Register)): continue # find all its uses all_stackvar_uses: Set[Tuple[CodeLocation, Any]] = set( rd.all_uses.get_uses_with_expr(stackvar_def)) all_uses_with_def = set() should_abort = False for use in all_stackvar_uses: used_expr = use[1] if used_expr is not None and used_expr.size != stackvar_def.size: should_abort = True break all_uses_with_def.add((stackvar_def, use)) if should_abort: continue #to_replace = Load(None, StackBaseOffset(None, self.project.arch.bits, eq.atom0.offset), # eq.atom0.size, endness=self.project.arch.memory_endness) replace_with = eq.atom1 remove_initial_assignment = True if all_uses_with_def is None: continue else: if isinstance(eq.atom0, SimStackVariable): # create the memory loading expression new_idx = None if self._ail_manager is None else next( self._ail_manager.atom_ctr) replace_with = Load( new_idx, StackBaseOffset(None, self.project.arch.bits, eq.atom0.offset), eq.atom0.size, endness=self.project.arch.memory_endness) elif isinstance(eq.atom0, SimMemoryVariable) and isinstance( eq.atom0.addr, int): # create the memory loading expression new_idx = None if self._ail_manager is None else next( self._ail_manager.atom_ctr) replace_with = Load( new_idx, Const(None, None, eq.atom0.addr, self.project.arch.bits), eq.atom0.size, endness=self.project.arch.memory_endness) elif isinstance(eq.atom0, Register): if isinstance(eq.atom1, Register): if self.project.arch.is_artificial_register( eq.atom0.reg_offset, eq.atom0.size): replace_with = eq.atom1 else: replace_with = eq.atom0 else: raise RuntimeError("Unsupported atom1 type %s." % type(eq.atom1)) else: raise RuntimeError("Unsupported atom0 type %s." % type(eq.atom0)) to_replace_def = the_def # find all uses of this definition # we make a copy of the set since we may touch the set (uses) when replacing expressions all_uses: Set[Tuple[CodeLocation, Any]] = set( rd.all_uses.get_uses_with_expr(to_replace_def)) # make sure none of these uses are phi nodes (depends on more than one def) all_uses_with_unique_def = set() for use_and_expr in all_uses: use_loc, used_expr = use_and_expr defs_and_exprs = rd.all_uses.get_uses_by_location( use_loc, exprs=True) filtered_defs = { def_ for def_, expr_ in defs_and_exprs if expr_ == used_expr } if len(filtered_defs) == 1: all_uses_with_unique_def.add(use_and_expr) else: # optimization: break early break if len(all_uses) != len(all_uses_with_unique_def): # only when all uses are determined by the same definition will we continue with the simplification continue all_uses_with_def = set((to_replace_def, use_and_expr) for use_and_expr in all_uses) remove_initial_assignment = False # expression folding will take care of it if not all_uses_with_def: # definitions without uses may simply be our data-flow analysis being incorrect. do not remove them. continue # TODO: We can only replace all these uses with the stack variable if the stack variable isn't # TODO: re-assigned of a new value. Perform this check. # replace all uses all_uses_replaced = True for def_, use_and_expr in all_uses_with_def: u, used_expr = use_and_expr if u == eq.codeloc: # skip the very initial assignment location continue old_block = addr_and_idx_to_block.get( (u.block_addr, u.block_idx), None) if old_block is None: continue # if there is an updated block, use it the_block = self.blocks.get(old_block, old_block) stmt: Statement = the_block.statements[u.stmt_idx] replace_with_copy = replace_with.copy() if to_replace.size != replace_with_copy.size: new_idx = None if self._ail_manager is None else next( self._ail_manager.atom_ctr) replace_with_copy = Convert( new_idx, replace_with_copy.bits, to_replace.bits, False, replace_with_copy, ) r, new_block = self._replace_expr_and_update_block( the_block, u.stmt_idx, stmt, def_, u, used_expr, replace_with_copy) if r: self.blocks[old_block] = new_block else: # failed to replace a use - we need to keep the initial assignment! all_uses_replaced = False simplified |= r if all_uses_replaced and remove_initial_assignment: # the initial statement can be removed self._assignments_to_remove.add(eq.codeloc) if simplified: self._clear_cache() return simplified
def _narrow_exprs(self) -> bool: """ A register may be used with full width even when only the lower bytes are really needed. This results in the incorrect determination of wider variables while the actual variable is narrower (e.g., int64 vs char). This optimization narrows a register definition if all its uses are narrower than the definition itself. """ narrowed = False addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {} for block in self.func_graph.nodes(): addr_and_idx_to_block[(block.addr, block.idx)] = block rd = self._compute_reaching_definitions() for def_ in rd.all_definitions: if isinstance(def_.atom, atoms.Register): needs_narrowing, to_size, use_exprs = self._narrowing_needed( def_, rd, addr_and_idx_to_block) if needs_narrowing: # replace the definition if not isinstance(def_.codeloc, ExternalCodeLocation): old_block = addr_and_idx_to_block.get( (def_.codeloc.block_addr, def_.codeloc.block_idx)) the_block = self.blocks.get(old_block, old_block) stmt = the_block.statements[def_.codeloc.stmt_idx] r, new_block = False, None if isinstance(stmt, Assignment) and isinstance( stmt.dst, Register): new_assignment_dst = Register( stmt.dst.idx, None, def_.atom.reg_offset, to_size * self.project.arch.byte_width, **stmt.dst.tags) new_assignment_src = Convert( stmt.src.idx, # FIXME: This is a hack stmt.src.bits, to_size * self.project.arch.byte_width, False, stmt.src, **stmt.src.tags) r, new_block = BlockSimplifier._replace_and_build( the_block, { def_.codeloc: { stmt.dst: new_assignment_dst, stmt.src: new_assignment_src, } }, replace_assignment_dsts=True) elif isinstance(stmt, Call): new_retexpr = Register( stmt.ret_expr.idx, None, def_.atom.reg_offset, to_size * self.project.arch.byte_width, **stmt.ret_expr.tags) r, new_block = BlockSimplifier._replace_and_build( the_block, {def_.codeloc: { stmt.ret_expr: new_retexpr }}) if not r: # couldn't replace the definition... continue self.blocks[old_block] = new_block # replace all uses for use_loc, use_expr in use_exprs: old_block = addr_and_idx_to_block.get( (use_loc.block_addr, use_loc.block_idx)) the_block = self.blocks.get(old_block, old_block) new_use_expr = Register( use_expr.idx, None, def_.atom.reg_offset, to_size * self.project.arch.byte_width, **use_expr.tags) r, new_block = BlockSimplifier._replace_and_build( the_block, {use_loc: { use_expr: new_use_expr }}) if not r: _l.warning("Failed to replace use-expr at %s.", use_loc) else: self.blocks[old_block] = new_block narrowed = True return narrowed