Exemplo n.º 1
0
def get_unique_variables(expr: Expr) -> List[Expr]:
    """
    Get all unique variables in an expression.

    To provide deterministic behavior, the list is sorted.

    Args:
        expr: Expression to parse.

    Returns:
        Sorted list of unique variables.
    """
    l = set()

    def add_to_set(e: Expr) -> Expr:
        """
        Helper function to add variables to a set.

        Args:
            expr: Expression to add.

        Returns:
            Expression.
        """
        if e.is_id():
            l.add(e)
        return e

    expr.visit(add_to_set)

    return sorted(l, key=lambda x: str(x))
Exemplo n.º 2
0
def get_subexpressions(expr: Expr) -> Iterator[Expr]:
    """
    Get all subexpressions in descending order.

    This can be understood as a breadth-first search
    on an abstract syntax tree from top to bottom.

    Args:
        expr: Expr

    Returns:
        Iterator of expressions.
    """
    l = []

    def add_to_list(e: Expr) -> Expr:
        """
        Helper function to an expression
        to a list.

        Args:
            expr: Expression to add.

        Returns:
            Expression.
        """
        l.append(e)
        return e

    expr.visit(add_to_list)

    return reversed(l)
Exemplo n.º 3
0
def simple_unwrap_expr(expr: Expr, loc_db: LocationDB):
    ra = -1
    if expr.is_int():
        ra = int(expr)
    elif expr.is_loc():
        ra = loc_db.get_location_offset(expr.loc_key)
        if ra is None:
            ra = -1

    return ra
Exemplo n.º 4
0
    def synthesize_from_expression(self, expr: Expr,
                                   num_samples: int) -> Tuple[Expr, float]:
        """
        Synthesizes an expression from a given expression that represents a function f(x0, ..., xi).

        The function constructs a synthesis oracle, a grammar and a mutator. Afterward, it
        performs the stochastic search. To deal with memory, memory accesses are replaced with
        variable accesses and re-applied afterward to the synthesized expression.

        Example:

        The given expression @64[rax] + rbx - rbx is unified to p0 + p1 - p1. A synthesized
        expression p0 with he same I/O behavior is p0. After re-applying the initial variables,
        we return @64[rax].

        Args:
            expr (Expr): Expression representing a function f(x0, ..., xi) in Miasm IR.
            num_samples (int): Number of I/O samples for the synthesis oracle.

        Returns:
            Tuple[Expr, float]: Synthesized expression and its corresponding score.
        """
        # unify expression (to remove memory etc.)
        unification_dict = gen_unification_dict(expr)
        expr = expr.replace_expr(unification_dict)

        # get list of unique variables
        variables = get_unique_variables(expr)

        # generate synthesis oracle
        oracle = SynthesisOracle.gen_from_expression(expr, variables,
                                                     num_samples)

        # init grammar
        grammar = Grammar(expr.size, variables)

        # build mutator
        mutator = Mutator.gen_from_expression(expr, grammar)

        # perform stochastic search
        state, score = self.iterated_local_search(mutator, oracle)

        # reverse unification and re-apply original variables
        expr = reverse_unification(state.get_expr_simplified(),
                                   unification_dict)

        # upcast expression if necessary
        if grammar.size > expr.size:
            expr = expr.zeroExtend(grammar.size)

        return expr, score
Exemplo n.º 5
0
    def _skip_subtree(expr: Expr) -> bool:
        """
        Skips the subtree if an expression is a terminal expression.

        A terminal expression is a leaf in the abstract syntax tree,
        such as an ExprInt (register/variable), ExprMem (memory)
        or ExprLoc (location label) or ExprInt (integer).

        Args:
            expr: Expression to test.

        Returns:
            True if expr is terminal expression.
        """
        return expr.is_id() or expr.is_int() or expr.is_loc()  # type: ignore
Exemplo n.º 6
0
    def evaluate_expression(expr: Expr, inputs_array: List[int]) -> int:
        """
        Evaluates an expression for an array of random values.

        Each input variable p0, p1, ..., pn is associated with an
        entry in the array of inputs [i0, i1, ..., in]. In the given 
        expression, we replace p0 with i1, p1 with i1 etc. and evaluate
        the expression. As a result, the expression results in a 
        final constant in form of ExprInt.

        Args:
            expr: Expression to evaluate
            inputs_array: List of random values.

        Returns: 
            Int that is the return value of the evaluated expression.
        """
        # dictionary of replacements
        replacements = {}
        # walk over unique variables in the expression
        for v in get_unique_variables(expr):
            # skip if register pattern does not match
            if not re.search("^p[0-9]*", v.name):
                continue
            # calculate index for p
            index = int(v.name.strip("p"))
            # insert into replacements dictionary
            replacements[v] = ExprInt(inputs_array[index], v.size)

        return int(expr_simp(expr.replace_expr(replacements)))
Exemplo n.º 7
0
    def _reverse_global_unification(
            self, expr: Expr, unification_dict: Dict[Expr, Expr]) -> Expr:
        """
        Iteratively reverses the global unifications of an expression.

        For the given unification dictionary, unification variables can
        be part of other unification rules. To reverse all unifications
        in a given expression, the reverse unification process is applied 
        iteratively.

        Example: Given: {r0: x + r1, r1: y} and expression r0 + r1.
                 We first transform it into (x + r1) + y and then to
                 (x + y) + y.


        Args:
            expr: Expression to reverse unification for.
            unification_dict: Dictionary of expressions containing unifications.

        Returns:
            Expression with reversed unification.
        """
        # while there is any unification variable remaining in the expression
        while any([
                v.name.startswith(self._global_variable_prefix)
                for v in get_unique_variables(expr)
        ]):
            # replace in expression
            expr = expr.replace_expr(unification_dict)

        return expr
Exemplo n.º 8
0
def get_unification_candidates(expr: Expr) -> List[Expr]:
    """
    Get all unification candidates in an expression.

    A unification candidate is a leaf in an abstract 
    syntax tree (variable, memory or label). Integers 
    are excluded.

    To provide deterministic behavior, the list is sorted.

    Args:
        expr: Expression to parse.

    Returns:
        Sorted list of unification candidates.
    """
    results = set()

    def add_to_set(e: Expr) -> Expr:
        """
        Helper function to add variables, memory 
        and labels to a set.

        Args:
            expr: Expression to add.

        Returns:
            Expression.
        """
        # memory
        if e.is_mem():
            results.add(e)
        # registers
        if e.is_id():
            results.add(e)
        # location IDs
        if e.is_loc():
            results.add(e)
        return e

    expr.visit(add_to_set)

    return sorted(list(results), key=lambda x: str(x))
Exemplo n.º 9
0
    def __init__(self, expr: Expr, replacements: Dict[Expr, Expr] = {}):
        """
        Initializes a SynthesisState instance.

        Attributes:
            expr (Expr): Expression in Miasm IR with unique variables/leaves.
            replacements (Dict[Expr, Expr], optional): Dictionary of variable replacements.
        """
        self.expr_ast: Expr = expr
        self._expr: Expr = expr.replace_expr(replacements)
        self.replacements: Dict[Expr, Expr] = replacements
Exemplo n.º 10
0
    def add_to_set(e: Expr) -> Expr:
        """
        Helper function to add variables, memory 
        and labels to a set.

        Args:
            expr: Expression to add.

        Returns:
            Expression.
        """
        # memory
        if e.is_mem():
            results.add(e)
        # registers
        if e.is_id():
            results.add(e)
        # location IDs
        if e.is_loc():
            results.add(e)
        return e
Exemplo n.º 11
0
    def add_to_set(e: Expr) -> Expr:
        """
        Helper function to add variables to a set.

        Args:
            expr: Expression to add.

        Returns:
            Expression.
        """
        if e.is_id():
            l.add(e)
        return e
Exemplo n.º 12
0
    def gen_from_expression(expr: Expr, variables: List[Expr], num_samples: int) -> SynthesisOracle:
        """
        Builds a SynthesisOracle instance from a given expression.

        For a given expression, `num_samples` independent I/O pairs are 
        evaluated as follows:

        1. We generate a list of random values, one for each variable. Random values
           are represented in Miasm IL.
        2. We evaluate the expression by replacing all variables in the expression
           by their corresponding value and do a constant propagation.
        3. We map the list of inputs to the obtained integer value (in Miasm IL).

        Args:
            expr (Expr): Expression representing a function f(x0, ..., xi).
            variables (List[Expr]): List of variables contained in `expr`.
            num_samples (int): Number of I/O samples to evaluate.

        Returns:
            SynthesisOracle: Generated SynthesisOracle instance.
        """
        # init map
        synthesis_map = {}

        # walk over number of samples
        for _ in range(num_samples):
            # list of inputs
            inputs = []
            # dictionary of expression replacements
            replacements = {}
            # walk over all variables
            for v in variables:
                # generate a random value
                value = get_rand_input()
                # replace variable with random value
                replacements[v] = ExprInt(value, v.size)
                # add random value to list of inputs
                inputs.append(ExprInt(value, v.size))

            # evaluate expression to obtain output
            result = expr_simp(expr.replace_expr(replacements))
            # output should be an ExprInt
            assert(result.is_int())
            # map list of inputs to output
            synthesis_map[tuple(inputs)] = result

        return SynthesisOracle(synthesis_map)
Exemplo n.º 13
0
def reverse_unification(expr: Expr, unification_dict: Dict[Expr, Expr]) -> Expr:
    """
    Reverses the unification of an expression.

    This way, each unified variable in an expression is replaced with
    their corresponding terminal expression in the original expression.
    To achieve this, we first have to inverse the unification dictionary.

    Example: Given: {x: p0, y:p1} and expression p0 + p1. We invert
                the dictionary {p0: x, p1: y}. The expresion becomes
                x + y.

    Args:
        expr: Expression to reverse unification for.
        unification_dict: Dictionary of expressions containing unifications.

    Returns:
        Expression with reversed unification.
    """
    return expr.replace_expr(invert_dict(unification_dict))
Exemplo n.º 14
0
def get_vars(cond: Expr, vars=[]):
    if cond.is_id():
        vars.append(cond)

    return cond
Exemplo n.º 15
0
    def _is_suitable_simplification_candidate(self, expr: Expr,
                                              simplified: Expr) -> bool:
        """
        Checks if a simplification candidate is not suitable.

        This check ensures the semantical correctness of the simplification.

        We skip the simplification candiate

        1. If the simplification candidate contains any unification variable.
           In this case, not every variable of the simplification candidate
           can be matched to a terminal expression in the original one.

        2. If the tree depth of the original expression is smaller or equal to
           the simplified one. In this case, simplification could make
           expressions even more complex.

        3. If Miasm's expression simplification results in the same expression for
           the original and the simplified one. In this case, the lookup in the
           simplification oracle is not required.

        4. If the original expression is semantically equivalent to the simplified one.
           Since this query is computationally expensive, we, by default, set a small
           timeout and check only if the SMT solver is not able to find a proof for
           inequivalence in the provided time. If the solver was not able to proof 
           the equivalence within the provided time, we still accept it. 

           The user has the possibility to enforce the SMT-based equivalence check
           to be successful by setting the `enforce_equivalence` flag and
           (optionally) increasing the `solver_timeout`.

        Args:
            expr: Original expression.
            simplified: Simplified expression candidate.

        Returns:
            True if simplification should be skipped, False otherwise.
        """
        # contains placeholder variables
        if any([
                re.search("^p[0-9]*", v.name)
                for v in get_unique_variables(simplified)
        ]):
            logger.debug(
                f"{expr} <==> {simplified} (incorrect variable replacement)")
            return False
        # checks if original is smaller to simplified
        if len(expr.graph().nodes()) <= len(simplified.graph().nodes()):
            return False
        # same normalized expression
        if expr_simp(expr) == expr_simp(simplified):
            return False
        # SMT solver proves non-equivalence or timeouts
        if self.enforce_equivalence and self.check_semantical_equivalence(
                expr, simplified) != z3.unsat:
            logger.debug(
                f"{expr} <==> {simplified} (not semantically equivalent)")
            return False
        # SMT solver finds a counter example
        if self.check_semantical_equivalence(expr, simplified) == z3.sat:
            logger.debug(
                f"{expr} <==> {simplified} (not semantically equivalent, counterexample found)"
            )
            return False
        return True