Пример #1
0
    def decompile(self):
        self.is_dump = False
        self.gph, pe_nb_new_syms = self.gctx.dis.get_graph(self.entry)

        if self.gph is None:
            error("capstone can't disassemble here")
            return None
        self.gph.simplify()

        if self.gctx.db.loaded and pe_nb_new_syms:
            self.gctx.db.modified = True

        try:
            self.gph.loop_detection(self.entry)
            ast, correctly_ended = generate_ast(self)
            if not correctly_ended:
                debug__("Second try...")
                self.gph.loop_detection(self.entry, True)
                ast, _ = generate_ast(self)

            self.ast = ast
        except ExcIfelse as e:
            error("can't have a ifelse here     %x" % e.addr)
            if self.gctx.interactive_mode:
                return None
            die()

        o = self.gctx.libarch.output.Output(self)
        o._ast(self.entry, ast)
        self.output = o
        return o
Пример #2
0
    def __init__(self, mem, filename, raw_type=None, raw_base=None, raw_big_endian=None):
        self.__binary = None
        self.reverse_symbols = {} # ad -> name
        self.symbols = {} # name -> ad
        self.section_names = {}
        self.imports = {} # ad -> True (the bool is just for msgpack to save the database)
        self.type = None

        self._abs_sections = {} # start section -> SectionAbs
        self._sorted_sections = [] # bisect list, contains section start address

        if raw_type != None:
            import reverse.lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(self, filename, raw_type,
                                        raw_base, raw_big_endian)
            self.type = T_BIN_RAW
            return

        start = time()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import reverse.lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(mem, self, filename)
        elif self.type == T_BIN_PE:
            import reverse.lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(mem, self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)
Пример #3
0
    def decompile(self):
        self.is_dump = False
        self.gph, pe_nb_new_syms = self.gctx.dis.get_graph(self.entry)

        if self.gph is None:
            error("capstone can't disassemble here")
            return None
        self.gph.simplify()

        if self.gctx.db.loaded and pe_nb_new_syms:
            self.gctx.db.modified = True
        
        try:
            self.gph.loop_detection(self.entry)
            ast, correctly_ended = generate_ast(self)
            if not correctly_ended:
                debug__("Second try...")
                self.gph.loop_detection(self.entry, True)
                ast, _ = generate_ast(self)

            self.ast = ast
        except ExcIfelse as e:
            error("can't have a ifelse here     %x" % e.addr)
            if self.gctx.interactive_mode:
                return None
            die()

        o = self.gctx.libarch.output.Output(self)
        o._ast(self.entry, ast)
        self.output = o
        return o
Пример #4
0
 def load_symbols(self):
     start = time()
     self.__binary.load_static_sym()
     self.__binary.load_dyn_sym()
     elapsed = time()
     elapsed = elapsed - start
     debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Пример #5
0
 def load_symbols(self):
     start = time()
     self.__binary.load_static_sym()
     self.__binary.load_dyn_sym()
     elapsed = time()
     elapsed = elapsed - start
     debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
Пример #6
0
    def __init__(self, mem, filename, raw_type=None, raw_base=None, raw_big_endian=None):
        self.__binary = None
        self.reverse_symbols = {} # ad -> name
        self.symbols = {} # name -> ad
        self.section_names = {}
        self.imports = {} # ad -> True (the bool is just for msgpack to save the database)
        self.type = None

        self._abs_sections = {} # start section -> SectionAbs
        self._sorted_sections = [] # bisect list, contains section start address

        if raw_type != None:
            import reverse.lib.fileformat.raw as LIB_RAW
            self.__binary = LIB_RAW.Raw(self, filename, raw_type,
                                        raw_base, raw_big_endian)
            self.type = T_BIN_RAW
            return

        start = time()
        self.load_magic(filename)

        if self.type == T_BIN_ELF:
            import reverse.lib.fileformat.elf as LIB_ELF
            self.__binary = LIB_ELF.ELF(mem, self, filename)
        elif self.type == T_BIN_PE:
            import reverse.lib.fileformat.pe as LIB_PE
            self.__binary = LIB_PE.PE(mem, self, filename)
        else:
            raise ExcFileFormat()

        elapsed = time()
        elapsed = elapsed - start
        debug__("Binary loaded in %fs" % elapsed)
Пример #7
0
    def simplify(self):
        nodes = list(self.nodes.keys())
        start = time()

        for ad in nodes:
            if ad in self.uncond_jumps_set or ad in self.cond_jumps_set:
                continue

            if ad not in self.link_in or len(self.link_in[ad]) != 1 or \
                    ad == self.entry_point_addr:
                continue

            pred = self.link_in[ad][0]

            # don't fuse with jumps
            if pred in self.uncond_jumps_set or pred in self.cond_jumps_set:
                continue

            if pred not in self.link_out or len(self.link_out[pred]) != 1:
                continue

            if ad in self.link_out:
                self.link_out[pred] = self.link_out[ad]
            else:
                del self.link_out[pred]

            self.nodes[pred] += self.nodes[ad]

            if ad in self.link_out:
                del self.link_out[ad]

            del self.link_in[ad]
            del self.nodes[ad]

            # replace all addr wich refers to ad
            for k, lst_i in self.link_in.items():
                if ad in lst_i:
                    lst_i[lst_i.index(ad)] = pred

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph simplified in %fs (%d nodes)" %
                (elapsed, len(self.nodes)))
Пример #8
0
    def simplify(self):
        nodes = list(self.nodes.keys())
        start = time()

        for ad in nodes:
            if ad in self.uncond_jumps_set or ad in self.cond_jumps_set:
                continue

            if ad not in self.link_in or len(self.link_in[ad]) != 1 or \
                    ad == self.entry_point_addr:
                continue

            pred = self.link_in[ad][0]

            # don't fuse with jumps
            if pred in self.uncond_jumps_set or pred in self.cond_jumps_set:
                continue

            if pred not in self.link_out or len(self.link_out[pred]) != 1:
                continue

            if ad in self.link_out:
                self.link_out[pred] = self.link_out[ad]
            else:
                del self.link_out[pred]

            self.nodes[pred] += self.nodes[ad]

            if ad in self.link_out:
                del self.link_out[ad]

            del self.link_in[ad]
            del self.nodes[ad]

            # replace all addr wich refers to ad
            for k, lst_i in self.link_in.items():
                if ad in lst_i:
                    lst_i[lst_i.index(ad)] = pred

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph simplified in %fs (%d nodes)" % (elapsed, len(self.nodes)))
Пример #9
0
def generate_ast(ctx__):
    global ctx
    ctx = ctx__

    start = time()

    ast = Ast_Branch()
    ast.parent = None
    stack = [(ast, [], -1, ctx.entry, -1)]
    visited = set()
    waiting = {}

    ast_head = ast

    fake_br = Ast_Branch()
    fake_br.level = sys.maxsize

    while stack or waiting:

        if not stack and waiting:
            if not ctx.gph.skipped_loops_analysis:
                break
            for ad in set(waiting):
                waiting[ad].unseen.clear()
                stack.append((fake_br, [], -1, ad, -1))

        ast, loops_stack, prev, curr, else_addr = stack.pop(-1)

        # Check if we enter in a false loop (see gotoinloop*)
        if loops_stack:
            _, _, l_start = loops_stack[-1]
        else:
            l_start = ctx.entry

        if (l_start, curr) in ctx.gph.false_loops:
            continue

        blk = ctx.gph.nodes[curr]

        # Exit the current loop
        while loops_stack:
            l_ast, l_prev_loop, l_start = loops_stack[-1]
            l_set = ctx.gph.loops_all[(l_prev_loop, l_start)]
            if curr not in l_set:
                loops_stack.pop(-1)
                ast = l_ast.parent
            else:
                break

        if not loops_stack:
            l_prev_loop = -1
            l_start = ctx.entry
            l_set = None

        level = ast.level

        if curr not in visited:
            # Check if we need to stop and wait on a node
            a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set,
                                l_prev_loop, l_start, True)
            if a is None:
                continue

            ast = a
            remove_unnecessary_goto(ast, curr)

            # Check if we enter in a new loop
            if (l_start, curr) in ctx.gph.loops_all:
                if curr not in ctx.gctx.db.reverse_symbols:
                    name = "loop_0x%x" % curr
                    ctx.gctx.db.symbols[name] = curr
                    ctx.gctx.db.reverse_symbols[curr] = name
                    ctx.gctx.db.modified = True

                level += 1
                a = Ast_Loop()
                a.level = level
                a.parent = ast
                a.idx_in_parent = len(ast.nodes)
                a.branch.parent = ast
                a.branch.level = level
                a.branch.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast = a.branch
                loops_stack.append((a, l_start, curr))
                else_addr = -1
                l_ast = a
                l_set = ctx.gph.loops_all[(l_start, curr)]
                l_prev_loop = l_start
                l_start = curr
                if (l_prev_loop, l_start) in ctx.gph.infinite_loop:
                    a.is_infinite = True
            # Here curr may has changed

        if curr in visited:
            if curr == l_start:
                continue
            if len(ast.nodes) > 0:
                if isinstance(ast.nodes[-1], list):
                    prev = ast.nodes[-1][0].address
                    if prev not in ctx.gph.uncond_jumps_set:
                        ast.add(Ast_Goto(curr))
            else:
                ast.add(Ast_Goto(curr))
            continue

        visited.add(curr)

        # Return instruction
        if curr not in ctx.gph.link_out:
            if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols:
                name = "ret_0x%x" % curr
                ctx.gctx.db.symbols[name] = curr
                ctx.gctx.db.reverse_symbols[curr] = name
                ctx.gctx.db.modified = True
            ast.add(blk)
            continue

        nxt = ctx.gph.link_out[curr]

        if curr in ctx.gctx.dis.jmptables:
            ast.add(blk)
            for n in nxt:
                stack.append((ast, loops_stack, curr, n, else_addr))

        elif len(nxt) == 2:
            # We are on a conditional jump

            prefetch = blk[1] if len(blk) == 2 else None

            if loops_stack:
                goto_set = False

                c1 = nxt[BRANCH_NEXT] not in l_set
                c2 = nxt[BRANCH_NEXT_JUMP] not in l_set

                if c1 and c2:
                    raise ExcIfelse(curr)

                if c1:
                    exit_loop = nxt[BRANCH_NEXT]
                    nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP]
                    cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0])
                    goto_set = True

                if c2:
                    exit_loop = nxt[BRANCH_NEXT_JUMP]
                    nxt_node_in_loop = nxt[BRANCH_NEXT]
                    cond_id = ctx.gctx.libarch.utils.get_cond(blk[0])
                    goto_set = True

                # goto to exit a loop
                if goto_set:
                    stack.append((ast.parent, list(loops_stack), curr,
                                  exit_loop, else_addr))
                    stack.append((ast, list(loops_stack), curr,
                                  nxt_node_in_loop, else_addr))
                    a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch)
                    a.parent = ast
                    a.level = level
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    continue

            # and-if
            if ctx.gctx.print_andif:
                if else_addr == nxt[BRANCH_NEXT_JUMP]:
                    cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT]))

                    # Add a fake branch, with this in the manage function
                    # all gotos to the else_addr will be invisible.
                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))
                    continue

                # and-if
                if else_addr == nxt[BRANCH_NEXT]:
                    cond_id = ctx.gctx.libarch.utils.get_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP]))

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))
                    continue

            # if-else

            endpoint = search_endpoint(ctx, stack, ast, curr,
                                       l_set, l_prev_loop, l_start)

            ast_if = Ast_Branch()
            ast_if.parent = ast
            ast_if.level = level + 1
            ast_if.idx_in_parent = len(ast.nodes)

            ast_else = Ast_Branch()
            ast_else.parent = ast
            ast_else.level = level + 1
            ast_else.idx_in_parent = len(ast.nodes)

            else_addr = nxt[BRANCH_NEXT_JUMP]

            if endpoint != -1:
                if (l_start, endpoint) not in ctx.gph.false_loops:
                    # If we have already seen this address (for example the
                    # endpoint is the beginning of the current loop) we don't
                    # re-add in the waiting list.
                    if endpoint not in visited:
                        manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set,
                                        l_prev_loop, l_start, False)
                else:
                    endpoint = -1

            stack.append((ast_if, list(loops_stack), curr,
                          nxt[BRANCH_NEXT], else_addr))

            if endpoint == -1:
                # No endpoint, so it's not useful to have an else-branch
                # -> the stack will continue on `ast`
                a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            elif endpoint == else_addr:
                # Branch ast_else will be empty
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            else:
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast_else, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(endpoint))

        else:
            ast.add(blk)
            stack.append((ast, loops_stack, curr,
                          nxt[BRANCH_NEXT], else_addr))

    ast = ast_head

    remove_all_unnecessary_goto(ast)
    fix_non_consecutives(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time()

    for func in ctx.gctx.libarch.registered:
        func(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.gctx.color:
        ctx.gctx.libarch.process_ast.assign_colors(ctx, ast)

    if waiting:
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0,
            Ast_Comment("WARNING: there is a bug, the output is incomplete !"))
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0, Ast_Comment(""))
        return ast, False

    return ast, True
Пример #10
0
    def loop_detection(self, entry, bypass_false_search=False):
        start = time()

        # Equivalent loops at a same deep in the loops dependencies tree
        self.deep_equiv = set()
        # For one loop : contains all address of the loop only
        self.loops_set = {}
        # For one loop : contains all address of the loop and sub-loops
        self.loops_all = {}
        # Loop dependencies
        self.deps = {}
        self.rev_deps = {}
        # Loops marked as "False"
        self.false_loops = set()

        if len(self.nodes) > MAX_NODES:
            self.skipped_loops_analysis = True
            return

        self.__explore(entry, set(), set(), {}, None, set())

        self.roots = self.loops_set.keys() - self.rev_deps.keys()

        self.__prune_loops()

        if not bypass_false_search:
            self.__search_false_loops()
            self.__search_same_deep_equiv_loops()

        self.__update_loops()

        # Compute all address which are not in a loop
        in_loop = set()
        for l in self.loops_set.items():
            in_loop.update(l[1])

        # Rest of all address which are not in a loop
        self.not_in_loop = self.nodes.keys() - in_loop

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        self.loops_start = set()
        for _, l_start in self.loops_all:
            self.loops_start.add(l_start)

        # For each loop we search the last node that if we enter in it,
        # we are sure to return to the loop.
        self.last_loop_node = {}
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))
Пример #11
0
    def get_graph(self, entry):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        self.CS_ARCH_MIPS = CS_ARCH_MIPS
        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry)
        stack = [entry]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                prefetch = self.__add_prefetch(addresses, inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    nxt = op.value.imm

                    if nxt in self.functions:
                        gph.new_node(inst, prefetch, None)
                    else:
                        stack.append(nxt)
                        gph.new_node(inst, prefetch, [nxt])

                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = op.value.imm
                    stack.append(direct_nxt)

                    if nxt_jmp in self.functions:
                        gph.new_node(inst, prefetch, [direct_nxt])
                    else:
                        stack.append(nxt_jmp)
                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                if ad != entry and ARCH_UTILS.is_call(inst):
                    op = inst.operands[0]
                    if op.type == CS_OP_IMM:
                        imm = op.value.imm
                        if imm in self.functions and self.is_noreturn(imm):
                            prefetch = self.__add_prefetch(addresses, inst)
                            gph.new_node(inst, prefetch, None)
                            continue

                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_list(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" %
                (elapsed, len(gph.nodes)))

        return gph, nb_new_syms
Пример #12
0
    def loop_detection(self, entry, bypass_false_search=False):
        start = time()

        # Equivalent loops at a same deep in the loops dependencies tree
        self.deep_equiv = set()
        # For one loop : contains all address of the loop only
        self.loops_set = {}
        # For one loop : contains all address of the loop and sub-loops
        self.loops_all = {}
        # Loop dependencies
        self.deps = {}
        self.rev_deps = {}
        # Loops marked as "False"
        self.false_loops = set()

        if len(self.nodes) > MAX_NODES:
            self.skipped_loops_analysis = True
            return

        self.__explore(entry, set(), set(), {}, None, set())

        self.roots = self.loops_set.keys() - self.rev_deps.keys()

        self.__prune_loops()

        if not bypass_false_search:
            self.__search_false_loops()
            self.__search_same_deep_equiv_loops()

        self.__update_loops()

        # Compute all address which are not in a loop
        in_loop = set()
        for l in self.loops_set.items():
            in_loop.update(l[1])

        # Rest of all address which are not in a loop
        self.not_in_loop = self.nodes.keys() - in_loop

        # Search inifinite loops
        self.infinite_loop = set()
        for l_curr_loop, l_set in self.loops_all.items():
            if self.__is_inf_loop(l_set):
                self.infinite_loop.add(l_curr_loop)

        # Save first address of loops
        self.loops_start = set()
        for _, l_start in self.loops_all:
            self.loops_start.add(l_start)

        # For each loop we search the last node that if we enter in it,
        # we are sure to return to the loop.
        self.last_loop_node = {}
        for (l_prev_loop, l_start), l_set in self.loops_all.items():
            self.last_loop_node[(l_prev_loop, l_start)] = set()
            self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set)

        elapsed = time()
        elapsed = elapsed - start
        debug__("Exploration: found %d loop(s) in %fs" %
                (len(self.loops_all), elapsed))
Пример #13
0
def generate_ast(ctx__):
    global ctx
    ctx = ctx__

    start = time()

    ast = Ast_Branch()
    ast.parent = None
    stack = [(ast, [], -1, ctx.entry, -1)]
    visited = set()
    waiting = {}

    ast_head = ast

    fake_br = Ast_Branch()
    fake_br.level = sys.maxsize

    while stack or waiting:

        if not stack and waiting:
            if not ctx.gph.skipped_loops_analysis:
                break
            for ad in set(waiting):
                waiting[ad].unseen.clear()
                stack.append((fake_br, [], -1, ad, -1))

        ast, loops_stack, prev, curr, else_addr = stack.pop(-1)

        # Check if we enter in a false loop (see gotoinloop*)
        if loops_stack:
            _, _, l_start = loops_stack[-1]
        else:
            l_start = ctx.entry

        if (l_start, curr) in ctx.gph.false_loops:
            continue

        blk = ctx.gph.nodes[curr]

        # Exit the current loop
        while loops_stack:
            l_ast, l_prev_loop, l_start = loops_stack[-1]
            l_set = ctx.gph.loops_all[(l_prev_loop, l_start)]
            if curr not in l_set:
                loops_stack.pop(-1)
                ast = l_ast.parent
            else:
                break

        if not loops_stack:
            l_prev_loop = -1
            l_start = ctx.entry
            l_set = None

        level = ast.level

        if curr not in visited:
            # Check if we need to stop and wait on a node
            a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set,
                                l_prev_loop, l_start, True)
            if a is None:
                continue

            ast = a
            remove_unnecessary_goto(ast, curr)

            # Check if we enter in a new loop
            if (l_start, curr) in ctx.gph.loops_all:
                if curr not in ctx.gctx.db.reverse_symbols:
                    name = "loop_0x%x" % curr
                    ctx.gctx.db.symbols[name] = curr
                    ctx.gctx.db.reverse_symbols[curr] = name
                    ctx.gctx.db.modified = True

                level += 1
                a = Ast_Loop()
                a.level = level
                a.parent = ast
                a.idx_in_parent = len(ast.nodes)
                a.branch.parent = ast
                a.branch.level = level
                a.branch.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast = a.branch
                loops_stack.append((a, l_start, curr))
                else_addr = -1
                l_ast = a
                l_set = ctx.gph.loops_all[(l_start, curr)]
                l_prev_loop = l_start
                l_start = curr
                if (l_prev_loop, l_start) in ctx.gph.infinite_loop:
                    a.is_infinite = True
            # Here curr may has changed

        if curr in visited:
            if curr == l_start:
                continue
            if len(ast.nodes) > 0:
                if isinstance(ast.nodes[-1], list):
                    prev = ast.nodes[-1][0].address
                    if prev not in ctx.gph.uncond_jumps_set:
                        ast.add(Ast_Goto(curr))
            else:
                ast.add(Ast_Goto(curr))
            continue

        visited.add(curr)

        # Return instruction
        if curr not in ctx.gph.link_out:
            if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols:
                name = "ret_0x%x" % curr
                ctx.gctx.db.symbols[name] = curr
                ctx.gctx.db.reverse_symbols[curr] = name
                ctx.gctx.db.modified = True
            ast.add(blk)
            continue

        nxt = ctx.gph.link_out[curr]

        if curr in ctx.gctx.dis.jmptables:
            ast.add(blk)
            for n in nxt:
                stack.append((ast, loops_stack, curr, n, else_addr))

        elif len(nxt) == 2:
            # We are on a conditional jump

            prefetch = blk[1] if len(blk) == 2 else None

            if loops_stack:
                goto_set = False

                c1 = nxt[BRANCH_NEXT] not in l_set
                c2 = nxt[BRANCH_NEXT_JUMP] not in l_set

                if c1 and c2:
                    raise ExcIfelse(curr)

                if c1:
                    exit_loop = nxt[BRANCH_NEXT]
                    nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP]
                    cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0])
                    goto_set = True

                if c2:
                    exit_loop = nxt[BRANCH_NEXT_JUMP]
                    nxt_node_in_loop = nxt[BRANCH_NEXT]
                    cond_id = ctx.gctx.libarch.utils.get_cond(blk[0])
                    goto_set = True

                # goto to exit a loop
                if goto_set:
                    stack.append((ast.parent, list(loops_stack), curr,
                                  exit_loop, else_addr))
                    stack.append((ast, list(loops_stack), curr,
                                  nxt_node_in_loop, else_addr))
                    a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch)
                    a.parent = ast
                    a.level = level
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    continue

            # and-if
            if ctx.gctx.print_andif:
                if else_addr == nxt[BRANCH_NEXT_JUMP]:
                    cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT]))

                    # Add a fake branch, with this in the manage function
                    # all gotos to the else_addr will be invisible.
                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))
                    continue

                # and-if
                if else_addr == nxt[BRANCH_NEXT]:
                    cond_id = ctx.gctx.libarch.utils.get_cond(blk[0])
                    a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP],
                                  prefetch)
                    a.parent = ast
                    a.idx_in_parent = len(ast.nodes)
                    ast.add(a)
                    ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP]))

                    stack.append((fake_br, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT], else_addr))

                    stack.append((ast, list(loops_stack), curr,
                                  nxt[BRANCH_NEXT_JUMP], else_addr))
                    continue

            # if-else

            endpoint = search_endpoint(ctx, stack, ast, curr, l_set,
                                       l_prev_loop, l_start)

            ast_if = Ast_Branch()
            ast_if.parent = ast
            ast_if.level = level + 1
            ast_if.idx_in_parent = len(ast.nodes)

            ast_else = Ast_Branch()
            ast_else.parent = ast
            ast_else.level = level + 1
            ast_else.idx_in_parent = len(ast.nodes)

            else_addr = nxt[BRANCH_NEXT_JUMP]

            if endpoint != -1:
                if (l_start, endpoint) not in ctx.gph.false_loops:
                    # If we have already seen this address (for example the
                    # endpoint is the beginning of the current loop) we don't
                    # re-add in the waiting list.
                    if endpoint not in visited:
                        manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set,
                                        l_prev_loop, l_start, False)
                else:
                    endpoint = -1

            stack.append(
                (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr))

            if endpoint == -1:
                # No endpoint, so it's not useful to have an else-branch
                # -> the stack will continue on `ast`
                a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            elif endpoint == else_addr:
                # Branch ast_else will be empty
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(else_addr))

            else:
                a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch)
                stack.append((ast_else, list(loops_stack), curr,
                              nxt[BRANCH_NEXT_JUMP], else_addr))

                a.parent = ast
                a.level = level + 1
                a.idx_in_parent = len(ast.nodes)
                ast.add(a)
                ast.add(Ast_Goto(endpoint))

        else:
            ast.add(blk)
            stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr))

    ast = ast_head

    remove_all_unnecessary_goto(ast)
    fix_non_consecutives(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Ast generated in %fs" % elapsed)

    # Process ast

    start = time()

    for func in ctx.gctx.libarch.registered:
        func(ctx, ast)

    elapsed = time()
    elapsed = elapsed - start
    debug__("Functions for processing ast in %fs" % elapsed)

    if ctx.gctx.color:
        ctx.gctx.libarch.process_ast.assign_colors(ctx, ast)

    if waiting:
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(
            0,
            Ast_Comment("WARNING: there is a bug, the output is incomplete !"))
        ast_head.nodes.insert(0, Ast_Comment(""))
        ast_head.nodes.insert(0, Ast_Comment(""))
        return ast, False

    return ast, True
Пример #14
0
    def get_graph(self, entry):
        from capstone import CS_OP_IMM, CS_ARCH_MIPS

        self.CS_ARCH_MIPS = CS_ARCH_MIPS
        ARCH_UTILS = self.load_arch_module().utils

        gph = Graph(self, entry)
        stack = [entry]
        start = time()
        prefetch = None
        addresses = set()

        # WARNING: this assume that on every architectures the jump
        # address is the last operand (operands[-1])

        # Here each instruction is a node. Blocks will be created in the
        # function __simplify.

        while stack:
            ad = stack.pop()
            inst = self.lazy_disasm(ad)

            if inst is None:
                # Remove all previous instructions which have a link
                # to this instruction.
                if ad in gph.link_in:
                    for i in gph.link_in[ad]:
                        gph.link_out[i].remove(ad)
                    for i in gph.link_in[ad]:
                        if not gph.link_out[i]:
                            del gph.link_out[i]
                    del gph.link_in[ad]
                continue

            if gph.exists(inst):
                continue

            addresses.add(ad)

            if ARCH_UTILS.is_ret(inst):
                prefetch = self.__add_prefetch(addresses, inst)
                gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_uncond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.uncond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    nxt = op.value.imm

                    if nxt in self.functions:
                        gph.new_node(inst, prefetch, None)
                    else:
                        stack.append(nxt)
                        gph.new_node(inst, prefetch, [nxt])

                else:
                    if inst.address in self.jmptables:
                        table = self.jmptables[inst.address].table
                        stack += table
                        gph.new_node(inst, prefetch, table)
                    else:
                        # Can't interpret jmp ADDR|reg
                        gph.new_node(inst, prefetch, None)

            elif ARCH_UTILS.is_cond_jump(inst):
                prefetch = self.__add_prefetch(addresses, inst)

                gph.cond_jumps_set.add(ad)
                op = inst.operands[-1]

                if op.type == CS_OP_IMM:
                    if prefetch is None:
                        direct_nxt = inst.address + inst.size
                    else:
                        direct_nxt = prefetch.address + prefetch.size

                    nxt_jmp = op.value.imm
                    stack.append(direct_nxt)

                    if nxt_jmp in self.functions:
                        gph.new_node(inst, prefetch, [direct_nxt])
                    else:
                        stack.append(nxt_jmp)
                        gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp])
                else:
                    # Can't interpret jmp ADDR|reg
                    gph.new_node(inst, prefetch, None)

            else:
                if ad != entry and ARCH_UTILS.is_call(inst):
                    op = inst.operands[0]
                    if op.type == CS_OP_IMM:
                        imm = op.value.imm
                        if imm in self.functions and self.is_noreturn(imm):
                            prefetch = self.__add_prefetch(addresses, inst)
                            gph.new_node(inst, prefetch, None)
                            continue

                nxt = inst.address + inst.size
                stack.append(nxt)
                gph.new_node(inst, None, [nxt])

        if len(gph.nodes) == 0:
            return None, 0

        if self.binary.type == T_BIN_PE:
            nb_new_syms = self.binary.pe_reverse_stripped_list(self, addresses)
        else:
            nb_new_syms = 0

        elapsed = time()
        elapsed = elapsed - start
        debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes)))

        return gph, nb_new_syms