示例#1
0
文件: asmbloc.py 项目: avelik/miasm
class basicblocs:

    def __init__(self, ab=[]):
        self.blocs = {}
        self.g = DiGraph()
        self.add_blocs(ab)

    def add(self, b):
        self.blocs[b.label] = b
        self.g.add_node(b.label)
        for dst in b.bto:
            if isinstance(dst.label, asm_label):
                self.g.add_edge(b.label, dst.label)

    def add_blocs(self, ab):
        for b in ab:
            self.add(b)

    def get_bad_dst(self):
        o = set()
        for b in self.blocs.values():
            for c in b.bto:
                if c.c_t == asm_constraint.c_bad:
                    o.add(b)
        return o
示例#2
0
class basicblocs:

    def __init__(self, ab=[]):
        self.blocs = {}
        self.g = DiGraph()
        self.add_blocs(ab)

    def add(self, b):
        self.blocs[b.label] = b
        self.g.add_node(b.label)
        for dst in b.bto:
            if isinstance(dst.label, asm_label):
                self.g.add_edge(b.label, dst.label)

    def add_blocs(self, ab):
        for b in ab:
            self.add(b)

    def get_bad_dst(self):
        o = set()
        for b in self.blocs.values():
            for c in b.bto:
                if c.c_t == asm_constraint.c_bad:
                    o.add(b)
        return o
示例#3
0
def unflatGraph(flat_graph):
    graph = DiGraph()
    nodes, edges = flat_graph
    for node in nodes:
        graph.add_node(node)
    for nodeA, nodeB in edges:
        graph.add_edge(nodeA, nodeB)
    return graph
示例#4
0
 def as_graph(self):
     """Generates a Digraph of dependencies"""
     graph = DiGraph()
     for node_a, node_b in self.links:
         if not node_b:
             graph.add_node(node_a)
         else:
             graph.add_edge(node_a, node_b)
     for parent, sons in self.pending.iteritems():
         for son in sons:
             graph.add_edge(parent, son)
     return graph
示例#5
0
 def as_graph(self):
     """Generates a Digraph of dependencies"""
     graph = DiGraph()
     for node_a, node_b in self.links:
         if not node_b:
             graph.add_node(node_a)
         else:
             graph.add_edge(node_a, node_b)
     for parent, sons in self.pending.iteritems():
         for son in sons:
             graph.add_edge(parent, son)
     return graph
示例#6
0
文件: asmbloc.py 项目: avelik/miasm
def blist2graph(ab):
    """
    ab: list of asmbloc
    return: graph of asmbloc
    """
    g = DiGraph()
    g.lbl2bloc = {}
    for b in ab:
        g.lbl2bloc[b.label] = b
        g.add_node(b.label)
        for x in b.bto:
            g.add_edge(b.label, x.label)
    return g
示例#7
0
def blist2graph(ab):
    """
    ab: list of asmbloc
    return: graph of asmbloc
    """
    g = DiGraph()
    g.lbl2bloc = {}
    for b in ab:
        g.lbl2bloc[b.label] = b
        g.add_node(b.label)
        for x in b.bto:
            g.add_edge(b.label, x.label)
    return g
示例#8
0
文件: depgraph.py 项目: 0xf1sh/miasm
    def as_graph(self, starting_nodes):
        """Return a DiGraph corresponding to computed dependencies, with
        @starting_nodes as leafs
        @starting_nodes: set of DependencyNode instance
        """

        # Build subgraph for each starting_node
        subgraphs = []
        for starting_node in starting_nodes:
            subgraphs.append(self._build_depGraph(starting_node))

        # Merge subgraphs into a final DiGraph
        graph = DiGraph()
        for sourcegraph in subgraphs:
            for node in sourcegraph.nodes():
                graph.add_node(node)
            for edge in sourcegraph.edges():
                graph.add_uniq_edge(*edge)
        return graph
示例#9
0
    def as_graph(self, starting_nodes):
        """Return a DiGraph corresponding to computed dependencies, with
        @starting_nodes as leafs
        @starting_nodes: set of DependencyNode instance
        """

        # Build subgraph for each starting_node
        subgraphs = []
        for starting_node in starting_nodes:
            subgraphs.append(self._build_depgraph(starting_node))

        # Merge subgraphs into a final DiGraph
        graph = DiGraph()
        for sourcegraph in subgraphs:
            for node in sourcegraph.nodes():
                graph.add_node(node)
            for edge in sourcegraph.edges():
                graph.add_uniq_edge(*edge)
        return graph
示例#10
0
    def _build_depgraph(self, depnode):
        """Recursively build the final list of DiGraph, and clean up unmodifier
        nodes
        @depnode: starting node
        """

        if depnode not in self._cache or \
                not self._cache[depnode]:
            # There is no dependency
            graph = DiGraph()
            graph.add_node(depnode)
            return graph

        # Recursion
        dependencies = list(self._cache[depnode])

        graphs = []
        for sub_depnode in dependencies:
            graphs.append(self._build_depgraph(sub_depnode))

        # head(graphs[i]) == dependencies[i]
        graph = DiGraph()
        graph.add_node(depnode)
        for head in dependencies:
            graph.add_uniq_edge(head, depnode)

        for subgraphs in itertools.product(graphs):
            for sourcegraph in subgraphs:
                for node in sourcegraph.nodes():
                    graph.add_node(node)
                for edge in sourcegraph.edges():
                    graph.add_uniq_edge(*edge)

        # Update the running queue
        return graph
示例#11
0
文件: depgraph.py 项目: 0xf1sh/miasm
    def _build_depGraph(self, depnode):
        """Recursively build the final list of DiGraph, and clean up unmodifier
        nodes
        @depnode: starting node
        """

        if depnode not in self._cache or \
                not self._cache[depnode]:
            ## There is no dependency
            graph = DiGraph()
            graph.add_node(depnode)
            return graph

        # Recursion
        dependencies = list(self._cache[depnode])

        graphs = []
        for sub_depnode in dependencies:
            graphs.append(self._build_depGraph(sub_depnode))

        # head(graphs[i]) == dependencies[i]
        graph = DiGraph()
        graph.add_node(depnode)
        for head in dependencies:
            graph.add_uniq_edge(head, depnode)

        for subgraphs in itertools.product(graphs):
            for sourcegraph in subgraphs:
                for node in sourcegraph.nodes():
                    graph.add_node(node)
                for edge in sourcegraph.edges():
                    graph.add_uniq_edge(*edge)

        # Update the running queue
        return graph
示例#12
0
class ira:
    def ira_regs_ids(self):
        """Returns ids of all registers used in the IR"""
        return self.arch.regs.all_regs_ids + [self.IRDst]

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow

        return done

    def gen_graph(self, link_all=True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(
                        d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += '%s -> %s;\n' % (hash(a), hash(b))
        out += '}'
        return out

    def remove_dead_instr(self, irb, useful):
        """Remove dead affectations using previous reaches analysis
        @irb: irbloc instance
        @useful: useful statements from previous reach analysis
        Return True iff the block state has changed
        PRE: compute_reach(self)
        """
        modified = False
        for k, ir in enumerate(irb.irs):
            j = 0
            while j < len(ir):
                cur_instr = ir[j]
                if (isinstance(cur_instr.dst, ExprId)
                        and (irb.label, k, cur_instr) not in useful):
                    del ir[j]
                    modified = True
                else:
                    j += 1
        return modified

    def init_useful_instr(self):
        """Computes a set of triples (block, instruction number, instruction)
        containing initially useful instructions :
          - Instructions affecting final value of return registers
          - Instructions affecting IRDst register
          - Instructions writing in memory
          - Function call instructions
        Return set of intial useful instructions
        """

        useful = set()

        for node in self.g.nodes():
            if node not in self.blocs:
                continue

            block = self.blocs[node]
            successors = self.g.successors(node)
            has_son = bool(successors)
            for p_son in successors:
                if p_son not in self.blocs:
                    # Leaf has lost its son: don't remove anything
                    # reaching this block
                    for r in self.ira_regs_ids():
                        useful.update(block.cur_reach[-1][r].union(
                            block.defout[-1][r]))

            # Function call, memory write or IRDst affectation
            for k, ir in enumerate(block.irs):
                for i_cur in ir:
                    if i_cur.is_function_call():
                        # /!\ never remove ir calls
                        useful.add((block.label, k, i_cur))
                    if isinstance(i_cur.dst, ExprMem):
                        useful.add((block.label, k, i_cur))
                    useful.update(block.defout[k][self.IRDst])

            # Affecting return registers
            if not has_son:
                for r in self.get_out_regs(block):
                    useful.update(block.defout[-1][r] if block.
                                  defout[-1][r] else block.cur_reach[-1][r])

        return useful

    def _mark_useful_code(self):
        """Mark useful statements using previous reach analysis

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division,  Algorithm MK

        Return a set of triplets (block, instruction number, instruction) of
        useful instructions
        PRE: compute_reach(self)

        """

        useful = self.init_useful_instr()
        worklist = useful.copy()
        while worklist:
            elem = worklist.pop()
            useful.add(elem)
            irb, irs_ind, ins = elem

            block = self.blocs[irb]
            instr_defout = block.defout[irs_ind]
            cur_kill = block.cur_kill[irs_ind]
            cur_reach = block.cur_reach[irs_ind]

            # Handle dependencies of used variables in ins
            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
                worklist.update(cur_reach[reg].difference(useful).difference(
                    cur_kill[reg] if not instr_defout[reg] else set()))
                for _, _, i in instr_defout[reg]:
                    # Loop case (i in defout of current block)
                    if i == ins:
                        worklist.update(cur_reach[reg].difference(useful))
        return useful

    def remove_dead_code(self):
        """Remove dead instructions in each block of the graph using the reach
        analysis .
        Returns True if a block has been modified
        PRE : compute_reach(self)
        """
        useful = self._mark_useful_code()
        modified = False
        for block in self.blocs.values():
            modified |= self.remove_dead_instr(block, useful)
        return modified

    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    @staticmethod
    def print_set(v_set):
        """Print each triplet contained in a set
        @v_set: set containing triplets elements
        """
        for p in v_set:
            print '    (%s, %s, %s)' % p

    def dump_bloc_state(self, irb):
        print '*' * 80
        for k, irs in enumerate(irb.irs):
            for i in xrange(len(irs)):
                print 5 * "-"
                print 'instr', k, irs[i]
                print 5 * "-"
                for v in self.ira_regs_ids():
                    if irb.cur_reach[k][v]:
                        print 'REACH[%d][%s]' % (k, v)
                        self.print_set(irb.cur_reach[k][v])
                    if irb.cur_kill[k][v]:
                        print 'KILL[%d][%s]' % (k, v)
                        self.print_set(irb.cur_kill[k][v])
                    if irb.defout[k][v]:
                        print 'DEFOUT[%d][%s]' % (k, v)
                        self.print_set(irb.defout[k][v])

    def compute_reach_block(self, irb):
        """Variable influence computation for a single block
        @irb: irbloc instance
        PRE: init_reach()
        """

        reach_block = {
            key: value.copy()
            for key, value in irb.cur_reach[0].iteritems()
        }

        # Compute reach from predecessors
        for n_pred in self.g.predecessors(irb.label):
            p_block = self.blocs[n_pred]

            # Handle each register definition
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = p_block.defout[-1][c_reg].union(
                    p_block.cur_reach[-1][c_reg].difference(
                        p_block.cur_kill[-1][c_reg]))
                reach_block[c_reg].update(pred_through)

        # If a predecessor has changed
        if reach_block != irb.cur_reach[0]:
            irb.cur_reach[0] = reach_block
            for c_reg in self.ira_regs_ids():
                if irb.defout[0][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[0][c_reg].update(
                        reach_block[c_reg].difference(irb.defout[0][c_reg]))

        # Compute reach and kill for block's instructions
        for i in xrange(1, len(irb.irs)):
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = irb.defout[i - 1][c_reg].union(
                    irb.cur_reach[i - 1][c_reg].difference(
                        irb.cur_kill[i - 1][c_reg]))
                irb.cur_reach[i][c_reg].update(pred_through)
                if irb.defout[i][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[i][c_reg].update(
                        irb.cur_reach[i][c_reg].difference(
                            irb.defout[i][c_reg]))

    def _test_kill_reach_fix(self):
        """Return True iff a fixed point has been reached during reach
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node in self.blocs:
                irb = self.blocs[node]
                if (irb.cur_reach != irb.prev_reach
                        or irb.cur_kill != irb.prev_kill):
                    fixed = False
                    irb.prev_reach = irb.cur_reach[:]
                    irb.prev_kill = irb.cur_kill[:]
        return fixed

    def compute_reach(self):
        """
        Compute reach, defout and kill sets until a fixed point is reached.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            for node in self.g.nodes():
                if node in self.blocs:
                    self.compute_reach_block(self.blocs[node])
            fixed_point = self._test_kill_reach_fix()

    def dead_simp(self):
        """
        This function is used to analyse relation of a * complete function *
        This means the blocks under study represent a solid full function graph.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        # Update r/w variables for all irblocs
        self.get_rw(self.ira_regs_ids())
        # Liveness step
        self.compute_reach()
        self.remove_dead_code()
        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
示例#13
0
文件: analysis.py 项目: vardyh/miasm
class ira:
    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if not o in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all=False):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(
                        d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
    """
        all_lbls = {}
        for lbl in self.g.nodes():
            if not lbl in self.blocs:
                continue
            b = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in b.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[id(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            out += '%s -> %s;\n' % (id(a), id(b))
        out += '}'
        return out

    def remove_dead(self, b):
        for ir, _, c_out in zip(b.irs, b.c_in, b.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif (isinstance(i_cur.src, ExprOp)
                      and i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif not i_cur.dst in c_out:
                    del (ir[j])
                    continue
                j += 1

    def remove_blocs_dead(self):
        for b in self.blocs.values():
            self.remove_dead(b)

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def compute_in_out(self, b):
        # get out/in from bloc sons
        modified = False
        # set b in
        if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))):
            modified = True
        b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1])))

        # set b out
        c_out = set()
        has_son = False
        for n_son in self.g.successors(b.label):
            # print n_me, n_son
            has_son = True
            if not n_son in self.blocs:
                print "leaf has lost her sons!"
                continue
            b_son = self.blocs[n_son]
            c_out.update(b_son.c_in[0])
        if not has_son:
            # special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(b)
        if b.c_out[-1] != set(c_out):
            modified = True
        b.c_out[-1] = set(c_out)

        # get out/in for bloc
        for i in reversed(xrange(len(b.irs))):
            if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))):
                modified = True
            b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i])))
            if b.c_out[i] != set(b.c_in[i + 1]):
                modified = True
            b.c_out[i] = set(b.c_in[i + 1])
        return modified

    def test_in_out_fix(self):
        fixed = True
        for n in self.g.nodes():
            if not n in self.blocs:
                # leaf has lost her son
                continue
            b = self.blocs[n]
            if b.c_in != b.l_in or b.c_out != b.l_out:
                fixed = False
            b.l_in = [set(x) for x in b.c_in]
            b.l_out = [set(x) for x in b.c_out]
        return fixed

    def compute_dead(self):
        self.get_rw()

        it = 0
        fixed_point = False
        print 'iteration...',
        while not fixed_point:
            print it,
            it += 1
            for n in self.g.nodes():
                if not n in self.blocs:
                    # leaf has lost her son
                    continue
                b = self.blocs[n]
                self.compute_in_out(b)

            fixed_point = self.test_in_out_fix()
        print

    def dead_simp(self):
        self.compute_dead()
        self.remove_blocs_dead()
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            for eq in eqs:
                eq
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
示例#14
0
文件: analysis.py 项目: CaineQT/miasm
class ira:
    def ira_regs_ids(self):
        """Returns ids of all registers used in the IR"""
        return self.arch.regs.all_regs_ids + [self.IRDst]

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow

        return done

    def gen_graph(self, link_all=True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += "%s -> %s;\n" % (hash(a), hash(b))
        out += "}"
        return out

    def remove_dead_instr(self, irb, useful):
        """Remove dead affectations using previous reaches analysis
        @irb: irbloc instance
        @useful: useful statements from previous reach analysis
        Return True iff the block state has changed
        PRE: compute_reach(self)
        """
        modified = False
        for k, ir in enumerate(irb.irs):
            j = 0
            while j < len(ir):
                cur_instr = ir[j]
                if isinstance(cur_instr.dst, ExprId) and (irb.label, k, cur_instr) not in useful:
                    del ir[j]
                    modified = True
                else:
                    j += 1
        return modified

    def init_useful_instr(self):
        """Computes a set of triples (block, instruction number, instruction)
        containing initially useful instructions :
          - Instructions affecting final value of return registers
          - Instructions affecting IRDst register
          - Instructions writing in memory
          - Function call instructions
        Return set of intial useful instructions
        """

        useful = set()

        for node in self.g.nodes():
            if node not in self.blocs:
                continue

            block = self.blocs[node]
            successors = self.g.successors(node)
            has_son = bool(successors)
            for p_son in successors:
                if p_son not in self.blocs:
                    # Leaf has lost its son: don't remove anything
                    # reaching this block
                    for r in self.ira_regs_ids():
                        useful.update(block.cur_reach[-1][r].union(block.defout[-1][r]))

            # Function call, memory write or IRDst affectation
            for k, ir in enumerate(block.irs):
                for i_cur in ir:
                    if i_cur.src.is_function_call():
                        # /!\ never remove ir calls
                        useful.add((block.label, k, i_cur))
                    if isinstance(i_cur.dst, ExprMem):
                        useful.add((block.label, k, i_cur))
                    useful.update(block.defout[k][self.IRDst])

            # Affecting return registers
            if not has_son:
                for r in self.get_out_regs(block):
                    useful.update(block.defout[-1][r] if block.defout[-1][r] else block.cur_reach[-1][r])

        return useful

    def _mark_useful_code(self):
        """Mark useful statements using previous reach analysis

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division,  Algorithm MK

        Return a set of triplets (block, instruction number, instruction) of
        useful instructions
        PRE: compute_reach(self)

        """

        useful = self.init_useful_instr()
        worklist = useful.copy()
        while worklist:
            elem = worklist.pop()
            useful.add(elem)
            irb, irs_ind, ins = elem

            block = self.blocs[irb]
            instr_defout = block.defout[irs_ind]
            cur_kill = block.cur_kill[irs_ind]
            cur_reach = block.cur_reach[irs_ind]

            # Handle dependencies of used variables in ins
            for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
                worklist.update(
                    cur_reach[reg].difference(useful).difference(cur_kill[reg] if not instr_defout[reg] else set())
                )
                for _, _, i in instr_defout[reg]:
                    # Loop case (i in defout of current block)
                    if i == ins:
                        worklist.update(cur_reach[reg].difference(useful))
        return useful

    def remove_dead_code(self):
        """Remove dead instructions in each block of the graph using the reach
        analysis .
        Returns True if a block has been modified
        PRE : compute_reach(self)
        """
        useful = self._mark_useful_code()
        modified = False
        for block in self.blocs.values():
            modified |= self.remove_dead_instr(block, useful)
        return modified

    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    @staticmethod
    def print_set(v_set):
        """Print each triplet contained in a set
        @v_set: set containing triplets elements
        """
        for p in v_set:
            print "    (%s, %s, %s)" % p

    def dump_bloc_state(self, irb):
        print "*" * 80
        for k, irs in enumerate(irb.irs):
            for i in xrange(len(irs)):
                print 5 * "-"
                print "instr", k, irs[i]
                print 5 * "-"
                for v in self.ira_regs_ids():
                    if irb.cur_reach[k][v]:
                        print "REACH[%d][%s]" % (k, v)
                        self.print_set(irb.cur_reach[k][v])
                    if irb.cur_kill[k][v]:
                        print "KILL[%d][%s]" % (k, v)
                        self.print_set(irb.cur_kill[k][v])
                    if irb.defout[k][v]:
                        print "DEFOUT[%d][%s]" % (k, v)
                        self.print_set(irb.defout[k][v])

    def compute_reach_block(self, irb):
        """Variable influence computation for a single block
        @irb: irbloc instance
        PRE: init_reach()
        """

        reach_block = {key: value.copy() for key, value in irb.cur_reach[0].iteritems()}

        # Compute reach from predecessors
        for n_pred in self.g.predecessors(irb.label):
            p_block = self.blocs[n_pred]

            # Handle each register definition
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = p_block.defout[-1][c_reg].union(
                    p_block.cur_reach[-1][c_reg].difference(p_block.cur_kill[-1][c_reg])
                )
                reach_block[c_reg].update(pred_through)

        # If a predecessor has changed
        if reach_block != irb.cur_reach[0]:
            irb.cur_reach[0] = reach_block
            for c_reg in self.ira_regs_ids():
                if irb.defout[0][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[0][c_reg].update(reach_block[c_reg].difference(irb.defout[0][c_reg]))

        # Compute reach and kill for block's instructions
        for i in xrange(1, len(irb.irs)):
            for c_reg in self.ira_regs_ids():
                # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
                pred_through = irb.defout[i - 1][c_reg].union(
                    irb.cur_reach[i - 1][c_reg].difference(irb.cur_kill[i - 1][c_reg])
                )
                irb.cur_reach[i][c_reg].update(pred_through)
                if irb.defout[i][c_reg]:
                    # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
                    irb.cur_kill[i][c_reg].update(irb.cur_reach[i][c_reg].difference(irb.defout[i][c_reg]))

    def _test_kill_reach_fix(self):
        """Return True iff a fixed point has been reached during reach
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node in self.blocs:
                irb = self.blocs[node]
                if irb.cur_reach != irb.prev_reach or irb.cur_kill != irb.prev_kill:
                    fixed = False
                    irb.prev_reach = irb.cur_reach[:]
                    irb.prev_kill = irb.cur_kill[:]
        return fixed

    def compute_reach(self):
        """
        Compute reach, defout and kill sets until a fixed point is reached.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        fixed_point = False
        log.debug("iteration...")
        while not fixed_point:
            for node in self.g.nodes():
                if node in self.blocs:
                    self.compute_reach_block(self.blocs[node])
            fixed_point = self._test_kill_reach_fix()

    def dead_simp(self):
        """
        This function is used to analyse relation of a * complete function *
        This means the blocks under study represent a solid full function graph.

        Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
        IBM Thomas J. Watson Research Division, page 43

        PRE: gen_graph()
        """
        # Update r/w variables for all irblocs
        self.get_rw(self.ira_regs_ids())
        # Liveness step
        self.compute_reach()
        self.remove_dead_code()
        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print "*" * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
示例#15
0
class ira:

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if not o in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all = False):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(
                        self.symbol_pool.getby_offset_create(int(d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
    """
        all_lbls = {}
        for lbl in self.g.nodes():
            if not lbl in self.blocs:
                continue
            b = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in b.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[id(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            out += '%s -> %s;\n' % (id(a), id(b))
        out += '}'
        return out

    def remove_dead(self, b):
        for ir, _, c_out in zip(b.irs, b.c_in, b.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif (isinstance(i_cur.src, ExprOp) and
                    i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif not i_cur.dst in c_out:
                    del(ir[j])
                    continue
                j += 1

    def remove_blocs_dead(self):
        for b in self.blocs.values():
            self.remove_dead(b)

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def compute_in_out(self, b):
        # get out/in from bloc sons
        modified = False
        # set b in
        if b.c_in[-1] != set(b.r[-1].union(b.c_out[-1].difference(b.w[-1]))):
            modified = True
        b.c_in[-1] = set(b.r[-1].union(b.c_out[-1].difference(b.w[-1])))

        # set b out
        c_out = set()
        has_son = False
        for n_son in self.g.successors(b.label):
            # print n_me, n_son
            has_son = True
            if not n_son in self.blocs:
                print "leaf has lost her sons!"
                continue
            b_son = self.blocs[n_son]
            c_out.update(b_son.c_in[0])
        if not has_son:
            # special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(b)
        if b.c_out[-1] != set(c_out):
            modified = True
        b.c_out[-1] = set(c_out)

        # get out/in for bloc
        for i in reversed(xrange(len(b.irs))):
            if b.c_in[i] != set(b.r[i].union(b.c_out[i].difference(b.w[i]))):
                modified = True
            b.c_in[i] = set(b.r[i].union(b.c_out[i].difference(b.w[i])))
            if b.c_out[i] != set(b.c_in[i + 1]):
                modified = True
            b.c_out[i] = set(b.c_in[i + 1])
        return modified

    def test_in_out_fix(self):
        fixed = True
        for n in self.g.nodes():
            if not n in self.blocs:
                # leaf has lost her son
                continue
            b = self.blocs[n]
            if b.c_in != b.l_in or b.c_out != b.l_out:
                fixed = False
            b.l_in = [set(x) for x in b.c_in]
            b.l_out = [set(x) for x in b.c_out]
        return fixed

    def compute_dead(self):
        self.get_rw()

        it = 0
        fixed_point = False
        print 'iteration...',
        while not fixed_point:
            print it,
            it += 1
            for n in self.g.nodes():
                if not n in self.blocs:
                    # leaf has lost her son
                    continue
                b = self.blocs[n]
                self.compute_in_out(b)

            fixed_point = self.test_in_out_fix()
        print

    def dead_simp(self):
        self.compute_dead()
        self.remove_blocs_dead()
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            for eq in eqs:
                eq
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
示例#16
0
class ira:
    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all=True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(self.symbol_pool.getby_offset_create(int(
                        d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += '%s -> %s;\n' % (hash(a), hash(b))
        out += '}'
        return out

    def remove_dead(self, irb):
        """Remove dead affectations using previous liveness analysis
        @irb: irbloc instance
        Return True iff the bloc state has changed
        PRE: compute_in_out(@irb)
        """

        # print 'state1'
        # self.dump_bloc_state(irb)

        modified = False
        for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif i_cur.dst == self.IRDst:
                    # never delete irdst
                    pass
                elif (isinstance(i_cur.src, ExprOp)
                      and i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif i_cur.dst not in c_out:
                    del (ir[j])
                    modified = True
                    continue
                j += 1

        # print 'state2'
        # self.dump_bloc_state(irb)

        return modified

    def remove_blocs_dead(self):
        """Call remove_dead on each irbloc
        Return True iff one of the bloc state has changed
        """
        modified = False
        for b in self.blocs.values():
            modified |= self.remove_dead(b)
        return modified

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def dump_bloc_state(self, irb):
        print '*' * 80
        for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in,
                                                  irb.c_out)):
            print 'ir'
            for x in ir:
                print '\t', x
            print 'R', [str(x) for x in irb.r[i]]  #c_in]
            print 'W', [str(x) for x in irb.w[i]]  #c_out]
            print 'IN', [str(x) for x in c_in]
            print 'OUT', [str(x) for x in c_out]

    def compute_in_out(self, irb):
        """Liveness computation for a single bloc
        @irb: irbloc instance
        Return True iff bloc state has changed
        """
        modified = False

        # Compute OUT for last irb entry
        c_out = set()
        has_son = False
        for n_son in self.g.successors(irb.label):
            has_son = True
            if n_son not in self.blocs:
                # If the son is not defined, we will propagate our current out
                # nodes to the in nodes's son
                son_c_in = irb.c_out_missing
            else:
                son_c_in = self.blocs[n_son].c_in[0]
            c_out.update(son_c_in)
        if not has_son:
            # Special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(irb)

        if irb.c_out[-1] != c_out:
            irb.c_out[-1] = c_out
            modified = True

        # Compute out/in intra bloc
        for i in reversed(xrange(len(irb.irs))):
            new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i])))
            if irb.c_in[i] != new_in:
                irb.c_in[i] = new_in
                modified = True

            if i >= len(irb.irs) - 1:
                # Last out has been previously updated
                continue
            new_out = set(irb.c_in[i + 1])
            if irb.c_out[i] != new_out:
                irb.c_out[i] = new_out
                modified = True

        return modified

    def test_in_out_fix(self):
        """Return True iff a fixed point has been reached during liveness
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node not in self.blocs:
                # leaf has lost her son
                continue
            irb = self.blocs[node]
            if irb.c_in != irb.l_in or irb.c_out != irb.l_out:
                fixed = False
            irb.l_in = [set(x) for x in irb.c_in]
            irb.l_out = [set(x) for x in irb.c_out]
        return fixed

    def fill_missing_son_c_in(self):
        """Find nodes with missing sons in graph, and add virtual link to all
        written variables of all parents.
        PRE: gen_graph() and get_rw()"""

        for node in self.g.nodes():
            if node not in self.blocs:
                continue
            self.blocs[node].c_out_missing = set()
            has_all_son = True
            for node_son in self.g.successors(node):
                if node_son not in self.blocs:
                    has_all_son = False
                    break
            if has_all_son:
                continue
            parents = self.g.reachable_parents(node)
            for parent in parents:
                irb = self.blocs[parent]
                for var_w in irb.w:
                    self.blocs[node].c_out_missing.update(var_w)

    def compute_dead(self):
        """Iterate liveness analysis until a fixed point is reached.
        PRE: gen_graph()
        """

        it = 0
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            log.debug(it)
            it += 1
            for n in self.g.nodes():
                if n not in self.blocs:
                    # leaf has lost her son
                    continue
                irb = self.blocs[n]
                self.compute_in_out(irb)

            fixed_point = self.test_in_out_fix()

    def dead_simp(self):
        """This function is used to analyse relation of a * complete function *
        This mean the blocs under study represent a solid full function graph.

        Ref: CS 5470 Compiler Techniques and Principles (Liveness
        analysis/Dataflow equations)

        PRE: call to gen_graph
        """

        modified = True
        while modified:
            log.debug('dead_simp step')

            # Update r/w variables for all irblocs
            self.get_rw()
            # Fill c_in for missing sons
            self.fill_missing_son_c_in()

            # Liveness step
            self.compute_dead()
            modified = self.remove_blocs_dead()

        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")
示例#17
0
文件: analysis.py 项目: avelik/miasm
class ira:

    def sort_dst(self, todo, done):
        out = set()
        while todo:
            dst = todo.pop()
            if self.ExprIsLabel(dst):
                done.add(dst)
            elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
                done.add(dst)
            elif isinstance(dst, ExprCond):
                todo.add(dst.src1)
                todo.add(dst.src2)
            elif isinstance(dst, ExprId):
                out.add(dst)
            else:
                done.add(dst)
        return out

    def dst_trackback(self, b):
        dst = b.dst
        todo = set([dst])
        out = set()
        done = set()

        for irs in reversed(b.irs):
            if len(todo) == 0:
                break
            out = self.sort_dst(todo, done)
            found = set()
            follow = set()
            for i in irs:
                if not out:
                    break
                for o in out:
                    if i.dst == o:
                        follow.add(i.src)
                        found.add(o)
                for o in found:
                    out.remove(o)

            for o in out:
                if o not in found:
                    follow.add(o)
            todo = follow
        out = self.sort_dst(todo, done)

        return done

    def gen_graph(self, link_all = True):
        """
        Gen irbloc digraph
        @link_all: also gen edges to non present irblocs
        """
        self.g = DiGraph()
        for lbl, b in self.blocs.items():
            # print 'add', lbl
            self.g.add_node(lbl)
            # dst = self.get_bloc_dst(b)
            dst = self.dst_trackback(b)
            # print "\tdst", dst
            for d in dst:
                if isinstance(d, ExprInt):
                    d = ExprId(
                        self.symbol_pool.getby_offset_create(int(d.arg)))
                if self.ExprIsLabel(d):
                    if d.name in self.blocs or link_all is True:
                        self.g.add_edge(lbl, d.name)

    def graph(self):
        """Output the graphviz script"""
        out = """
    digraph asm_graph {
    size="80,50";
    node [
    fontsize = "16",
    shape = "box"
    ];
        """
        all_lbls = {}
        for lbl in self.g.nodes():
            if lbl not in self.blocs:
                continue
            irb = self.blocs[lbl]
            ir_txt = [str(lbl)]
            for irs in irb.irs:
                for l in irs:
                    ir_txt.append(str(l))
                ir_txt.append("")
            ir_txt.append("")
            all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
        for l, v in all_lbls.items():
            # print l, v
            out += '%s [label="%s"];\n' % (l, v)

        for a, b in self.g.edges():
            # print 'edge', a, b, hash(a), hash(b)
            out += '%s -> %s;\n' % (hash(a), hash(b))
        out += '}'
        return out

    def remove_dead(self, irb):
        """Remove dead affectations using previous liveness analysis
        @irb: irbloc instance
        Return True iff the bloc state has changed
        PRE: compute_in_out(@irb)
        """

        # print 'state1'
        # self.dump_bloc_state(irb)

        modified = False
        for ir, _, c_out in zip(irb.irs, irb.c_in, irb.c_out):
            j = 0
            while j < len(ir):
                i_cur = ir[j]
                if not isinstance(i_cur.dst, ExprId):
                    pass
                elif i_cur.dst == self.IRDst:
                    # never delete irdst
                    pass
                elif (isinstance(i_cur.src, ExprOp) and
                    i_cur.src.op.startswith('call')):
                    # /!\ never remove ir calls
                    pass
                elif i_cur.dst not in c_out:
                    del(ir[j])
                    modified = True
                    continue
                j += 1

        # print 'state2'
        # self.dump_bloc_state(irb)

        return modified

    def remove_blocs_dead(self):
        """Call remove_dead on each irbloc
        Return True iff one of the bloc state has changed
        """
        modified = False
        for b in self.blocs.values():
            modified |= self.remove_dead(b)
        return modified

    # for test XXX TODO
    def set_dead_regs(self, b):
        pass

    def add_unused_regs(self):
        pass

    def dump_bloc_state(self, irb):
        print '*'*80
        for i, (ir, c_in, c_out) in enumerate(zip(irb.irs, irb.c_in, irb.c_out)):
            print 'ir'
            for x in ir:
                print '\t', x
            print 'R', [str(x) for x in irb.r[i]]#c_in]
            print 'W', [str(x) for x in irb.w[i]]#c_out]
            print 'IN', [str(x) for x in c_in]
            print 'OUT', [str(x) for x in c_out]


    def compute_in_out(self, irb):
        """Liveness computation for a single bloc
        @irb: irbloc instance
        Return True iff bloc state has changed
        """
        modified = False

        # Compute OUT for last irb entry
        c_out = set()
        has_son = False
        for n_son in self.g.successors(irb.label):
            has_son = True
            if n_son not in self.blocs:
                # If the son is not defined, we will propagate our current out
                # nodes to the in nodes's son
                son_c_in = irb.c_out_missing
            else:
                son_c_in = self.blocs[n_son].c_in[0]
            c_out.update(son_c_in)
        if not has_son:
            # Special case: leaf nodes architecture dependant
            c_out = self.get_out_regs(irb)

        if irb.c_out[-1] != c_out:
            irb.c_out[-1] = c_out
            modified = True

        # Compute out/in intra bloc
        for i in reversed(xrange(len(irb.irs))):
            new_in = set(irb.r[i].union(irb.c_out[i].difference(irb.w[i])))
            if irb.c_in[i] != new_in:
                irb.c_in[i] = new_in
                modified = True

            if i >= len(irb.irs) - 1:
                # Last out has been previously updated
                continue
            new_out = set(irb.c_in[i + 1])
            if irb.c_out[i] != new_out:
                irb.c_out[i] = new_out
                modified = True

        return modified

    def test_in_out_fix(self):
        """Return True iff a fixed point has been reached during liveness
        analysis"""

        fixed = True
        for node in self.g.nodes():
            if node not in self.blocs:
                # leaf has lost her son
                continue
            irb = self.blocs[node]
            if irb.c_in != irb.l_in or irb.c_out != irb.l_out:
                fixed = False
            irb.l_in = [set(x) for x in irb.c_in]
            irb.l_out = [set(x) for x in irb.c_out]
        return fixed

    def fill_missing_son_c_in(self):
        """Find nodes with missing sons in graph, and add virtual link to all
        written variables of all parents.
        PRE: gen_graph() and get_rw()"""

        for node in self.g.nodes():
            if node not in self.blocs:
                continue
            self.blocs[node].c_out_missing = set()
            has_all_son = True
            for node_son in self.g.successors(node):
                if node_son not in self.blocs:
                    has_all_son = False
                    break
            if has_all_son:
                continue
            parents = self.g.reachable_parents(node)
            for parent in parents:
                irb = self.blocs[parent]
                for var_w in irb.w:
                    self.blocs[node].c_out_missing.update(var_w)

    def compute_dead(self):
        """Iterate liveness analysis until a fixed point is reached.
        PRE: gen_graph()
        """

        it = 0
        fixed_point = False
        log.debug('iteration...')
        while not fixed_point:
            log.debug(it)
            it += 1
            for n in self.g.nodes():
                if n not in self.blocs:
                    # leaf has lost her son
                    continue
                irb = self.blocs[n]
                self.compute_in_out(irb)

            fixed_point = self.test_in_out_fix()

    def dead_simp(self):
        """This function is used to analyse relation of a * complete function *
        This mean the blocs under study represent a solid full function graph.

        Ref: CS 5470 Compiler Techniques and Principles (Liveness
        analysis/Dataflow equations)

        PRE: call to gen_graph
        """

        modified = True
        while modified:
            log.debug('dead_simp step')

            # Update r/w variables for all irblocs
            self.get_rw()
            # Fill c_in for missing sons
            self.fill_missing_son_c_in()

            # Liveness step
            self.compute_dead()
            modified = self.remove_blocs_dead()

        # Simplify expressions
        self.simplify_blocs()

    def gen_equations(self):
        for irb in self.blocs.values():
            symbols_init = {}
            for r in self.arch.regs.all_regs_ids:
                x = ExprId(r.name, r.size)
                x.is_term = True
                symbols_init[r] = x
            sb = symbexec(self, dict(symbols_init))
            sb.emulbloc(irb)
            eqs = []
            for n_w in sb.symbols:
                v = sb.symbols[n_w]
                if n_w in symbols_init and symbols_init[n_w] == v:
                    continue
                eqs.append(ExprAff(n_w, v))
            print '*' * 40
            print irb
            irb.irs = [eqs]
            irb.lines = [None]

    def sizeof_char(self):
        "Return the size of a char in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_short(self):
        "Return the size of a short in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_int(self):
        "Return the size of an int in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_long(self):
        "Return the size of a long in bits"
        raise NotImplementedError("Abstract method")

    def sizeof_pointer(self):
        "Return the size of a void* in bits"
        raise NotImplementedError("Abstract method")