Python SegmentList примеры, angr.analyses.cfg.cfg_fast.SegmentList Python примеры использования

Пример #1

0

Показать файл

def test_segment_list_0():
    seg_list = SegmentList()
    seg_list.occupy(0, 1, "code")
    seg_list.occupy(2, 3, "code")

    nose.tools.assert_equal(len(seg_list), 2)
    nose.tools.assert_equal(seg_list._list[0].end, 1)
    nose.tools.assert_equal(seg_list._list[1].end, 5)
    nose.tools.assert_equal(seg_list.is_occupied(4), True)
    nose.tools.assert_equal(seg_list.is_occupied(5), False)

Пример #2

0

Показать файл

Файл: test_cfgfast.py Проект: SlivTaMere/angr

def test_segment_list_4():
    seg_list = SegmentList()

    seg_list.occupy(5, 5, "code")
    seg_list.occupy(4, 1, "code")
    seg_list.occupy(2, 2, "code")

    nose.tools.assert_equal(len(seg_list), 1)
    nose.tools.assert_equal(seg_list._list[0].start, 2)
    nose.tools.assert_equal(seg_list._list[0].end, 10)

Пример #3

0

Показать файл

Файл: test_cfgfast.py Проект: SlivTaMere/angr

def test_segment_list_1():
    seg_list = SegmentList()

    # They should be merged
    seg_list.occupy(0, 1, "code")
    seg_list.occupy(1, 2, "code")

    nose.tools.assert_equal(len(seg_list), 1)
    nose.tools.assert_equal(seg_list._list[0].start, 0)
    nose.tools.assert_equal(seg_list._list[0].end, 3)

Пример #4

0

Показать файл

Файл: test_cfgfast.py Проект: ufo2011/angr

    def test_segment_list_1(self):
        seg_list = SegmentList()

        # They should be merged
        seg_list.occupy(0, 1, "code")
        seg_list.occupy(1, 2, "code")

        assert len(seg_list) == 1
        assert seg_list._list[0].start == 0
        assert seg_list._list[0].end == 3

Пример #5

0

Показать файл

def test_segment_list_1():
    seg_list = SegmentList()

    # They should be merged
    seg_list.occupy(0, 1, "code")
    seg_list.occupy(1, 2, "code")

    nose.tools.assert_equal(len(seg_list), 1)
    nose.tools.assert_equal(seg_list._list[0].start, 0)
    nose.tools.assert_equal(seg_list._list[0].end, 3)

Пример #6

0

Показать файл

Файл: test_cfgfast.py Проект: ufo2011/angr

    def test_segment_list_0(self):
        seg_list = SegmentList()
        seg_list.occupy(0, 1, "code")
        seg_list.occupy(2, 3, "code")

        assert len(seg_list) == 2
        assert seg_list._list[0].end == 1
        assert seg_list._list[1].end == 5
        assert seg_list.is_occupied(4)
        assert seg_list.is_occupied(5) is False

Пример #7

0

Показать файл

Файл: test_cfgfast.py Проект: SlivTaMere/angr

def test_segment_list_0():
    seg_list = SegmentList()
    seg_list.occupy(0, 1, "code")
    seg_list.occupy(2, 3, "code")

    nose.tools.assert_equal(len(seg_list), 2)
    nose.tools.assert_equal(seg_list._list[0].end, 1)
    nose.tools.assert_equal(seg_list._list[1].end, 5)
    nose.tools.assert_equal(seg_list.is_occupied(4), True)
    nose.tools.assert_equal(seg_list.is_occupied(5), False)

Пример #8

0

Показать файл

Файл: test_cfgfast.py Проект: SlivTaMere/angr

def test_segment_list_6():
    seg_list = SegmentList()

    seg_list.occupy(10, 20, "code")
    seg_list.occupy(9, 2, "data")

    nose.tools.assert_equal(len(seg_list), 2)
    nose.tools.assert_equal(seg_list._list[0].start, 9)
    nose.tools.assert_equal(seg_list._list[0].end, 11)
    nose.tools.assert_equal(seg_list._list[0].sort, 'data')

    nose.tools.assert_equal(seg_list._list[1].start, 11)
    nose.tools.assert_equal(seg_list._list[1].end, 30)
    nose.tools.assert_equal(seg_list._list[1].sort, 'code')

Пример #9

0

Показать файл

    def __init__(self,
                 binary=None,
                 start=None,
                 end=None,
                 pickle_intermediate_results=False,
                 perform_full_code_scan=False):
        self._binary = binary if binary is not None else self.project.loader.main_object
        self._start = start if start is not None else self._binary.min_addr
        self._end = end if end is not None else self._binary.max_addr
        self._pickle_intermediate_results = pickle_intermediate_results
        self._perform_full_code_scan = perform_full_code_scan

        l.debug("Starts at 0x%08x and ends at 0x%08x.", self._start, self._end)

        # Valid memory regions
        self._valid_memory_regions = sorted(
            (start, start + len(backer))
            for start, backer in self.project.loader.memory.backers())
        self._valid_memory_region_size = sum([
            (end - start) for start, end in self._valid_memory_regions
        ])

        # Size of each basic block
        self._block_size = {}

        self._next_addr = self._start - 1
        # Starting point of functions
        self.functions = None
        # Calls between functions
        self.call_map = networkx.DiGraph()
        # A CFG - this is not what you get from project.analyses.CFG() !
        self.cfg = networkx.DiGraph()
        # Create the segment list
        self._seg_list = SegmentList()

        self._read_addr_to_run = defaultdict(list)
        self._write_addr_to_run = defaultdict(list)

        # All IRSBs with an indirect exit target
        self._indirect_jumps = set()

        self._unassured_functions = set()

        self.base_address = None

        # Start working!
        self._reconnoiter()

Пример #10

0

Показать файл

Файл: test_cfgfast.py Проект: SlivTaMere/angr

def test_segment_list_3():
    seg_list = SegmentList()

    # They should be merged, and create three different segments
    seg_list.occupy(0, 5, "code")
    seg_list.occupy(5, 5, "code")
    seg_list.occupy(1, 2, "data")

    nose.tools.assert_equal(len(seg_list), 3)

    nose.tools.assert_equal(seg_list._list[0].start, 0)
    nose.tools.assert_equal(seg_list._list[0].end, 1)
    nose.tools.assert_equal(seg_list._list[0].sort, "code")

    nose.tools.assert_equal(seg_list._list[1].start, 1)
    nose.tools.assert_equal(seg_list._list[1].end, 3)
    nose.tools.assert_equal(seg_list._list[1].sort, "data")

    nose.tools.assert_equal(seg_list._list[2].start, 3)
    nose.tools.assert_equal(seg_list._list[2].end, 10)
    nose.tools.assert_equal(seg_list._list[2].sort, "code")

Пример #11

0

Показать файл

Файл: test_cfgfast.py Проект: ufo2011/angr

    def test_segment_list_5(self):
        seg_list = SegmentList()

        seg_list.occupy(5, 5, "data")
        seg_list.occupy(4, 1, "code")
        seg_list.occupy(2, 2, "data")

        assert len(seg_list) == 3
        assert seg_list._list[0].start == 2
        assert seg_list._list[2].end == 10

        seg_list.occupy(3, 2, "data")

        assert len(seg_list) == 1
        assert seg_list._list[0].start == 2
        assert seg_list._list[0].end == 10

Пример #12

0

Показать файл

def test_segment_list_5():
    seg_list = SegmentList()

    seg_list.occupy(5, 5, "data")
    seg_list.occupy(4, 1, "code")
    seg_list.occupy(2, 2, "data")

    nose.tools.assert_equal(len(seg_list), 3)
    nose.tools.assert_equal(seg_list._list[0].start, 2)
    nose.tools.assert_equal(seg_list._list[2].end, 10)

    seg_list.occupy(3, 2, "data")

    nose.tools.assert_equal(len(seg_list), 1)
    nose.tools.assert_equal(seg_list._list[0].start, 2)
    nose.tools.assert_equal(seg_list._list[0].end, 10)

Пример #13

0

Показать файл

Файл: test_cfgfast.py Проект: ufo2011/angr

    def test_segment_list_6(self):
        seg_list = SegmentList()

        seg_list.occupy(10, 20, "code")
        seg_list.occupy(9, 2, "data")

        assert len(seg_list) == 2
        assert seg_list._list[0].start == 9
        assert seg_list._list[0].end == 11
        assert seg_list._list[0].sort == "data"

        assert seg_list._list[1].start == 11
        assert seg_list._list[1].end == 30
        assert seg_list._list[1].sort == "code"

Пример #14

0

Показать файл

def test_segment_list_6():
    seg_list = SegmentList()

    seg_list.occupy(10, 20, "code")
    seg_list.occupy(9, 2, "data")

    nose.tools.assert_equal(len(seg_list), 2)
    nose.tools.assert_equal(seg_list._list[0].start, 9)
    nose.tools.assert_equal(seg_list._list[0].end, 11)
    nose.tools.assert_equal(seg_list._list[0].sort, 'data')

    nose.tools.assert_equal(seg_list._list[1].start, 11)
    nose.tools.assert_equal(seg_list._list[1].end, 30)
    nose.tools.assert_equal(seg_list._list[1].sort, 'code')

Пример #15

0

Показать файл

Файл: girlscout.py Проект: AmesianX/angr

    def __init__(self, binary=None, start=None, end=None, pickle_intermediate_results=False, perform_full_code_scan=False):
        self._binary = binary if binary is not None else self.project.loader.main_object
        self._start = start if start is not None else self._binary.min_addr
        self._end = end if end is not None else self._binary.max_addr
        self._pickle_intermediate_results = pickle_intermediate_results
        self._perform_full_code_scan = perform_full_code_scan

        l.debug("Starts at 0x%08x and ends at 0x%08x.", self._start, self._end)

        # Valid memory regions
        self._valid_memory_regions = sorted(
            (start, start+len(backer)) for start, backer in self.project.loader.memory.backers())
        self._valid_memory_region_size = sum([ (end - start) for start, end in self._valid_memory_regions ])

        # Size of each basic block
        self._block_size = { }

        self._next_addr = self._start - 1
        # Starting point of functions
        self.functions = None
        # Calls between functions
        self.call_map = networkx.DiGraph()
        # A CFG - this is not what you get from project.analyses.CFG() !
        self.cfg = networkx.DiGraph()
        # Create the segment list
        self._seg_list = SegmentList()

        self._read_addr_to_run = defaultdict(list)
        self._write_addr_to_run = defaultdict(list)

        # All IRSBs with an indirect exit target
        self._indirect_jumps = set()

        self._unassured_functions = set()

        self.base_address = None

        # Start working!
        self._reconnoiter()

Пример #16

0

Показать файл

def test_segment_list_3():
    seg_list = SegmentList()

    # They should be merged, and create three different segments
    seg_list.occupy(0, 5, "code")
    seg_list.occupy(5, 5, "code")
    seg_list.occupy(1, 2, "data")

    nose.tools.assert_equal(len(seg_list), 3)

    nose.tools.assert_equal(seg_list._list[0].start, 0)
    nose.tools.assert_equal(seg_list._list[0].end, 1)
    nose.tools.assert_equal(seg_list._list[0].sort, "code")

    nose.tools.assert_equal(seg_list._list[1].start, 1)
    nose.tools.assert_equal(seg_list._list[1].end, 3)
    nose.tools.assert_equal(seg_list._list[1].sort, "data")

    nose.tools.assert_equal(seg_list._list[2].start, 3)
    nose.tools.assert_equal(seg_list._list[2].end, 10)
    nose.tools.assert_equal(seg_list._list[2].sort, "code")

Пример #17

0

Показать файл

Файл: test_cfgfast.py Проект: ufo2011/angr

    def test_segment_list_3(self):
        seg_list = SegmentList()

        # They should be merged, and create three different segments
        seg_list.occupy(0, 5, "code")
        seg_list.occupy(5, 5, "code")
        seg_list.occupy(1, 2, "data")

        assert len(seg_list) == 3

        assert seg_list._list[0].start == 0
        assert seg_list._list[0].end == 1
        assert seg_list._list[0].sort == "code"

        assert seg_list._list[1].start == 1
        assert seg_list._list[1].end == 3
        assert seg_list._list[1].sort == "data"

        assert seg_list._list[2].start == 3
        assert seg_list._list[2].end == 10
        assert seg_list._list[2].sort == "code"

Пример #18

0

Показать файл

Файл: girlscout.py Проект: AmesianX/angr

class GirlScout(Analysis):
    """
    We find functions inside the given binary, try to decide the base address if needed, and build a control-flow
    graph on top of that to see if there is an entry or not. Obviously if the binary is not loaded as a blob (not
    using Blob as its backend), GirlScout will not try to determine the base address.

    It's also optional to perform a full code scan of the binary to show where all codes are. By default we don't scan
    the entire binary since it's time consuming.

    You probably need a BoyScout to determine the possible architecture and endianess of your binary blob.
    """

    def __init__(self, binary=None, start=None, end=None, pickle_intermediate_results=False, perform_full_code_scan=False):
        self._binary = binary if binary is not None else self.project.loader.main_object
        self._start = start if start is not None else self._binary.min_addr
        self._end = end if end is not None else self._binary.max_addr
        self._pickle_intermediate_results = pickle_intermediate_results
        self._perform_full_code_scan = perform_full_code_scan

        l.debug("Starts at 0x%08x and ends at 0x%08x.", self._start, self._end)

        # Valid memory regions
        self._valid_memory_regions = sorted(
            (start, start+len(backer)) for start, backer in self.project.loader.memory.backers())
        self._valid_memory_region_size = sum([ (end - start) for start, end in self._valid_memory_regions ])

        # Size of each basic block
        self._block_size = { }

        self._next_addr = self._start - 1
        # Starting point of functions
        self.functions = None
        # Calls between functions
        self.call_map = networkx.DiGraph()
        # A CFG - this is not what you get from project.analyses.CFG() !
        self.cfg = networkx.DiGraph()
        # Create the segment list
        self._seg_list = SegmentList()

        self._read_addr_to_run = defaultdict(list)
        self._write_addr_to_run = defaultdict(list)

        # All IRSBs with an indirect exit target
        self._indirect_jumps = set()

        self._unassured_functions = set()

        self.base_address = None

        # Start working!
        self._reconnoiter()

    @property
    def call_map(self):
        return self.call_map

    def _get_next_addr_to_search(self, alignment=None):
        # TODO: Take care of those functions that are already generated
        curr_addr = self._next_addr
        if self._seg_list.has_blocks:
            curr_addr = self._seg_list.next_free_pos(curr_addr)

        if alignment is not None:
            if curr_addr % alignment > 0:
                curr_addr = curr_addr - curr_addr % alignment + alignment

        # Make sure curr_addr exists in binary
        accepted = False
        for start, end in self._valid_memory_regions:
            if curr_addr >= start and curr_addr < end:
                # accept
                accepted = True
                break
            if curr_addr < start:
                # accept, but we are skipping the gap
                accepted = True
                curr_addr = start

        if not accepted:
            # No memory available!
            return None

        self._next_addr = curr_addr
        if self._end is None or curr_addr < self._end:
            l.debug("Returning new recon address: 0x%08x", curr_addr)
            return curr_addr
        else:
            l.debug("0x%08x is beyond the ending point.", curr_addr)
            return None

    def _get_next_code_addr(self, initial_state):
        """
        Besides calling _get_next_addr, we will check if data locates at that address seems to be code or not. If not, 
        we'll move on to request for next valid address.
        """
        next_addr = self._get_next_addr_to_search()
        if next_addr is None:
            return None

        start_addr = next_addr
        sz = ""
        is_sz = True
        while is_sz:
            # Get data until we meet a 0
            while next_addr in initial_state.memory:
                try:
                    l.debug("Searching address %x", next_addr)
                    val = initial_state.mem_concrete(next_addr, 1)
                    if val == 0:
                        if len(sz) < 4:
                            is_sz = False
                        else:
                            reach_end = True
                        break
                    if chr(val) not in string.printable:
                        is_sz = False
                        break
                    sz += chr(val)
                    next_addr += 1
                except SimValueError:
                    # Not concretizable
                    l.debug("Address 0x%08x is not concretizable!", next_addr)
                    break

            if len(sz) > 0 and is_sz:
                l.debug("Got a string of %d chars: [%s]", len(sz), sz)
                # l.debug("Occpuy %x - %x", start_addr, start_addr + len(sz) + 1)
                self._seg_list.occupy(start_addr, len(sz) + 1)
                sz = ""
                next_addr = self._get_next_addr_to_search()
                if next_addr is None:
                    return None
                # l.debug("next addr = %x", next_addr)
                start_addr = next_addr

            if is_sz:
                next_addr += 1

        instr_alignment = initial_state.arch.instruction_alignment
        if start_addr % instr_alignment > 0:
            start_addr = start_addr - start_addr % instr_alignment + \
                instr_alignment

        l.debug('_get_next_code_addr() returns 0x%x', start_addr)
        return start_addr

    def _symbolic_reconnoiter(self, addr, target_addr, max_depth=10):
        """
        When an IRSB has more than two exits (for example, a jumptable), we
        cannot concretize their exits in concrete mode. Hence we statically
        execute the function from beginning in this method, and then switch to
        symbolic mode for the final IRSB to get all possible exits of that
        IRSB.
        """
        state = self.project.factory.blank_state(addr=addr,
                                                  mode="symbolic",
                                                  add_options={o.CALLLESS}
                                                 )
        initial_exit = self.project.factory.path(state)
        explorer = Explorer(self.project,
                            start=initial_exit,
                            max_depth=max_depth,
                            find=(target_addr), num_find=1).run()
        if len(explorer.found) > 0:
            path = explorer.found[0]
            last_run = path.last_run
            return last_run.flat_exits()
        else:
            return []

    def _static_memory_slice(self, run):
        if isinstance(run, SimIRSB):
            for stmt in run.statements:
                refs = stmt.actions
                if len(refs) > 0:
                    real_ref = refs[-1]
                    if type(real_ref) == SimActionData:
                        if real_ref.action == 'write':
                            addr = real_ref.addr
                            if not run.initial_state.solver.symbolic(addr):
                                concrete_addr = run.initial_state.solver.eval(addr)
                                self._write_addr_to_run[addr].append(run.addr)
                        elif real_ref.action == 'read':
                            addr = real_ref.addr
                            if not run.initial_state.solver.symbolic(addr):
                                concrete_addr = run.initial_state.solver.eval(addr)
                            self._read_addr_to_run[addr].append(run.addr)

    def _scan_code(self, traced_addresses, function_exits, initial_state, starting_address):
        # Saving tuples like (current_function_addr, next_exit_addr)
        # Current_function_addr == -1 for exits not inside any function
        remaining_exits = set()
        next_addr = starting_address

        # Initialize the remaining_exits set
        remaining_exits.add((next_addr,
                             next_addr,
                             next_addr,
                             initial_state.copy()))

        while len(remaining_exits):
            current_function_addr, previous_addr, parent_addr, state = \
                remaining_exits.pop()
            if previous_addr in traced_addresses:
                continue

            # Add this node to the CFG first, in case this is a dangling node
            self.cfg.add_node(previous_addr)

            if current_function_addr != -1:
                l.debug("Tracing new exit 0x%08x in function 0x%08x",
                        previous_addr, current_function_addr)
            else:
                l.debug("Tracing new exit 0x%08x", previous_addr)
            traced_addresses.add(previous_addr)

            self._scan_block(previous_addr, state, current_function_addr, function_exits, remaining_exits, traced_addresses)


    def _scan_block(self, addr, state, current_function_addr, function_exits, remaining_exits, traced_addresses):
        # Let's try to create the pyvex IRSB directly, since it's much faster

        try:
            irsb = self.project.factory.block(addr).vex

            # Log the size of this basic block
            self._block_size[addr] = irsb.size

            # Occupy the block
            self._seg_list.occupy(addr, irsb.size)
        except (SimEngineError, SimMemoryError):
            return

        # Get all possible successors
        next, jumpkind = irsb.next, irsb.jumpkind
        successors = [ (i.dst, i.jumpkind) for i in irsb.statements if type(i) is pyvex.IRStmt.Exit]
        successors.append((next, jumpkind))

        # Process each successor
        for suc in successors:
            target, jumpkind = suc

            if type(target) is pyvex.IRExpr.Const:
                next_addr = target.con.value
            else:
                next_addr = None

            if jumpkind == 'Ijk_Boring' and next_addr is not None:
                remaining_exits.add((current_function_addr, next_addr,
                                     addr, None))

            elif jumpkind == 'Ijk_Call' and next_addr is not None:
                # Log it before we cut the tracing :)
                if jumpkind == "Ijk_Call":
                    if current_function_addr != -1:
                        self.functions.add(current_function_addr)
                        self.functions.add(next_addr)
                        self.call_map.add_edge(current_function_addr, next_addr)
                    else:
                        self.functions.add(next_addr)
                        self.call_map.add_node(next_addr)
                elif jumpkind == "Ijk_Boring" or \
                                jumpkind == "Ijk_Ret":
                    if current_function_addr != -1:
                        function_exits[current_function_addr].add(next_addr)

                # If we have traced it before, don't trace it anymore
                if next_addr in traced_addresses:
                    return

                remaining_exits.add((next_addr, next_addr, addr, None))
                l.debug("Function calls: %d", len(self.call_map.nodes()))

    def _scan_block_(self, addr, state, current_function_addr, function_exits, remaining_exits, traced_addresses):

        # Get a basic block
        state.ip = addr

        s_path = self.project.factory.path(state)
        try:
            s_run = s_path.next_run
        except SimIRSBError as ex:
            l.debug(ex)
            return
        except AngrError as ex:
            # "No memory at xxx"
            l.debug(ex)
            return
        except (SimValueError, SimSolverModeError) as ex:
            # Cannot concretize something when executing the SimRun
            l.debug(ex)
            return
        except SimError as ex:
            # Catch all simuvex errors
            l.debug(ex)
            return

        if type(s_run) is SimIRSB:
            # Calculate its entropy to avoid jumping into uninitialized/all-zero space
            bytes = s_run.irsb._state[1]['bytes']
            size = s_run.irsb.size
            ent = self._calc_entropy(bytes, size=size)
            if ent < 1.0 and size > 40:
                # Skipping basic blocks that have a very low entropy
                return

        # self._static_memory_slice(s_run)

        # Mark that part as occupied
        if isinstance(s_run, SimIRSB):
            self._seg_list.occupy(addr, s_run.irsb.size)
        successors = s_run.flat_successors + s_run.unsat_successors
        has_call_exit = False
        tmp_exit_set = set()
        for suc in successors:
            if suc.history.jumpkind == "Ijk_Call":
                has_call_exit = True

        for suc in successors:
            jumpkind = suc.history.jumpkind

            if has_call_exit and jumpkind == "Ijk_Ret":
                jumpkind = "Ijk_FakeRet"

            if jumpkind == "Ijk_Ret":
                continue

            try:
                # Try to concretize the target. If we can't, just move on
                # to the next target
                next_addr = suc.solver.eval_one(suc.ip)
            except (SimValueError, SimSolverModeError) as ex:
                # Undecidable jumps (might be a function return, or a conditional branch, etc.)

                # We log it
                self._indirect_jumps.add((suc.history.jumpkind, addr))
                l.info("IRSB 0x%x has an indirect exit %s.", addr, suc.history.jumpkind)

                continue

            self.cfg.add_edge(addr, next_addr, jumpkind=jumpkind)
            # Log it before we cut the tracing :)
            if jumpkind == "Ijk_Call":
                if current_function_addr != -1:
                    self.call_map.add_edge(current_function_addr, next_addr)
                else:
                    self.call_map.add_node(next_addr)
            elif jumpkind == "Ijk_Boring" or \
                            jumpkind == "Ijk_Ret":
                if current_function_addr != -1:
                    function_exits[current_function_addr].add(next_addr)

            # If we have traced it before, don't trace it anymore
            if next_addr in traced_addresses:
                continue
            # If we have traced it in current loop, don't tract it either
            if next_addr in tmp_exit_set:
                continue

            tmp_exit_set.add(next_addr)

            if jumpkind == "Ijk_Call":
                # This is a call. Let's record it
                new_state = suc.copy()
                # Unconstrain those parameters
                # TODO: Support other archs as well
                # if 12 + 16 in new_state.registers.mem:
                #    del new_state.registers.mem[12 + 16]
                #if 16 + 16 in new_state.registers.mem:
                #    del new_state.registers.mem[16 + 16]
                #if 20 + 16 in new_state.registers.mem:
                #    del new_state.registers.mem[20 + 16]
                # 0x8000000: call 0x8000045
                remaining_exits.add((next_addr, next_addr, addr, new_state))
                l.debug("Function calls: %d", len(self.call_map.nodes()))
            elif jumpkind == "Ijk_Boring" or \
                            jumpkind == "Ijk_Ret" or \
                            jumpkind == "Ijk_FakeRet":
                new_state = suc.copy()
                l.debug("New exit with jumpkind %s", jumpkind)
                # FIXME: should not use current_function_addr if jumpkind is "Ijk_Ret"
                remaining_exits.add((current_function_addr, next_addr,
                                     addr, new_state))
            elif jumpkind == "Ijk_NoDecode":
                # That's something VEX cannot decode!
                # We assume we ran into a deadend
                pass
            elif jumpkind.startswith("Ijk_Sig"):
                # Should not go into that exit
                pass
            elif jumpkind == "Ijk_TInval":
                # ppc32: isync
                # FIXME: It is the same as Ijk_Boring! Process it later
                pass
            elif jumpkind == 'Ijk_Sys_syscall':
                # Let's not jump into syscalls
                pass
            elif jumpkind == 'Ijk_InvalICache':
                pass
            elif jumpkind == 'Ijk_MapFail':
                pass
            elif jumpkind == 'Ijk_EmWarn':
                pass
            else:
                raise Exception("NotImplemented")

    def _scan_function_prologues(self, traced_address, function_exits, initial_state):
        """
        Scan the entire program space for prologues, and start code scanning at those positions
        :param traced_address:
        :param function_exits:
        :param initial_state:
        :param next_addr:
        :returns:
        """

        # Precompile all regexes
        regexes = set()
        for ins_regex in self.project.arch.function_prologs:
            r = re.compile(ins_regex)
            regexes.add(r)

        # TODO: Make sure self._start is aligned

        # Construct the binary blob first

        for start_, bytes_ in self.project.loader.main_object.memory.backers():
            for regex in regexes:
                # Match them!
                for mo in regex.finditer(bytes):
                    position = mo.start() + start_
                    if position % self.project.arch.instruction_alignment == 0:
                        if position not in traced_address:
                            percentage = self._seg_list.occupied_size * 100.0 / (self._valid_memory_region_size)
                            l.info("Scanning %xh, progress %0.04f%%", position, percentage)

                            self._unassured_functions.add(position)

                            self._scan_code(traced_address, function_exits, initial_state, position)
                        else:
                            l.info("Skipping %xh", position)

    def _process_indirect_jumps(self):
        """
        Execute each basic block with an indeterminiable exit target
        :returns:
        """

        function_starts = set()
        l.info("We have %d indirect jumps", len(self._indirect_jumps))

        for jumpkind, irsb_addr in self._indirect_jumps:
            # First execute the current IRSB in concrete mode

            if len(function_starts) > 20:
                break

            if jumpkind == "Ijk_Call":
                state = self.project.factory.blank_state(addr=irsb_addr, mode="concrete",
                                                    add_options={o.SYMBOLIC_INITIAL_VALUES}
                                                   )
                path = self.project.factory.path(state)
                l.debug(hex(irsb_addr))

                try:
                    r = (path.next_run.successors + path.next_run.unsat_successors)[0]
                    ip = r.solver.eval_one(r.ip)

                    function_starts.add(ip)
                    continue
                except SimSolverModeError as ex:
                    pass

                # Not resolved
                # Do a backward slicing from the call
                irsb = self.project.factory.block(irsb_addr).vex
                stmts = irsb.statements
                # Start slicing from the "next"

                b = Blade(self.cfg, irsb.addr, -1, project=self.project)

                # Debugging output
                for addr, stmt_idx in sorted(list(b.slice.nodes())):
                    irsb = self.project.factory.block(addr).vex
                    stmts = irsb.statements
                    l.debug("%x: %d | %s %d", (addr, stmt_idx), stmts[stmt_idx], b.slice.in_degree((addr, stmt_idx)))

                # Get all sources
                sources = [n for n in b.slice.nodes() if b.slice.in_degree(n) == 0]

                # Create the annotated CFG
                annotatedcfg = AnnotatedCFG(self.project, None, target_irsb_addr=irsb_addr, detect_loops=False)
                annotatedcfg.from_digraph(b.slice)

                for src_irsb, src_stmt_idx in sources:
                    # Use slicecutor to execute each one, and get the address
                    # We simply give up if any exception occurs on the way

                    start_state = self.project.factory.blank_state(addr=src_irsb,
                                                              add_options=
                                                              {o.DO_RET_EMULATION,
                                                               o.TRUE_RET_EMULATION_GUARD}
                                                             )

                    start_path = self.project.factory.path(start_state)

                    # Create the slicecutor
                    slicecutor = Slicecutor(self.project, annotatedcfg, start=start_path, targets=(irsb_addr,))

                    # Run it!
                    try:
                        slicecutor.run()
                    except KeyError as ex:
                        # This is because the program slice is incomplete.
                        # Blade will support more IRExprs and IRStmts
                        l.debug("KeyError occurred due to incomplete program slice.", exc_info=ex)
                        continue

                    # Get the jumping targets
                    for r in slicecutor.reached_targets:
                        if r.next_run.successors:
                            target_ip = r.next_run.successors[0].ip
                            se = r.next_run.successors[0].se

                            if not se.symbolic(target_ip):
                                concrete_ip = se.eval_one(target_ip)
                                function_starts.add(concrete_ip)
                                l.info("Found a function address %x", concrete_ip)

        return function_starts

    def _solve_forbase_address(self, function_starts, functions):
        """
        Voting for the most possible base address.

        :param function_starts:
        :param functions:
        :returns:
        """

        pseudo_base_addr = self.project.loader.main_object.min_addr

        base_addr_ctr = { }

        for s in function_starts:
            for f in functions:
                base_addr = s - f + pseudo_base_addr
                ctr = 1

                for k in function_starts:
                    if k - base_addr + pseudo_base_addr in functions:
                        ctr += 1

                if ctr > 5:
                    base_addr_ctr[base_addr] = ctr

        if len(base_addr_ctr):
            base_addr, hits = sorted([(k, v) for k, v in base_addr_ctr.items()], key=lambda x: x[1], reverse=True)[0]

            return base_addr
        else:
            return None


    def _reconnoiter(self):

        if type(self._binary) is cle.blob.Blob:
            self._determinebase_address()

        if self._perform_full_code_scan:
            self._full_code_scan()

    def _determinebase_address(self):
        """
        The basic idea is simple: start from a specific point, try to construct
        functions as much as we can, and maintain a function distribution graph
        and a call graph simultaneously. Repeat searching until we come to the
        end that there is no new function to be found.
        A function should start with:
            # some addresses that a call exit leads to, or
            # certain instructions. They are recoreded in SimArch.

        For a better performance, instead of blindly scanning the entire process
        space, we first try to search for instruction patterns that a function
        may start with, and start scanning at those positions. Then we try to
        decode anything that is left.
        """

        traced_address = set()
        self.functions = set()
        self.call_map = networkx.DiGraph()
        self.cfg = networkx.DiGraph()
        initial_state = self.project.factory.blank_state(mode="fastpath")
        initial_options = initial_state.options - { o.TRACK_CONSTRAINTS } - o.refs
        initial_options |= { o.SUPER_FASTPATH }
        # initial_options.remove(o.COW_STATES)
        initial_state.options = initial_options
        # Sadly, not all calls to functions are explicitly made by call
        # instruction - they could be a jmp or b, or something else. So we
        # should record all exits from a single function, and then add
        # necessary calling edges in our call map during the post-processing
        # phase.
        function_exits = defaultdict(set)

        dump_file_prefix = self.project.filename

        if self._pickle_intermediate_results and \
                os.path.exists(dump_file_prefix + "_indirect_jumps.angr"):
            l.debug("Loading existing intermediate results.")
            self._indirect_jumps = pickle.load(open(dump_file_prefix + "_indirect_jumps.angr", "rb"))
            self.cfg = pickle.load(open(dump_file_prefix + "_coercecfg.angr", "rb"))
            self._unassured_functions = pickle.load(open(dump_file_prefix + "_unassured_functions.angr", "rb"))
        else:
            # Performance boost :-)
            # Scan for existing function prologues
            self._scan_function_prologues(traced_address, function_exits, initial_state)

            if self._pickle_intermediate_results:
                l.debug("Dumping intermediate results.")
                pickle.dump(self._indirect_jumps, open(dump_file_prefix + "_indirect_jumps.angr", "wb"), -1)
                pickle.dump(self.cfg, open(dump_file_prefix + "_coercecfg.angr", "wb"), -1)
                pickle.dump(self._unassured_functions, open(dump_file_prefix + "_unassured_functions.angr", "wb"), -1)

        if len(self._indirect_jumps):
            # We got some indirect jumps!
            # Gotta execute each basic block and see where it wants to jump to
            function_starts = self._process_indirect_jumps()

            self.base_address = self._solve_forbase_address(function_starts, self._unassured_functions)

            l.info("Base address should be 0x%x", self.base_address)

        else:
            l.debug("No indirect jumps are found. We switch to the slowpath mode.")
            # TODO: Slowpath mode...
            while True:
                next_addr = self._get_next_code_addr(initial_state)
                percentage = self._seg_list.occupied_size * 100.0 / (self._valid_memory_region_size)
                l.info("Analyzing %xh, progress %0.04f%%", next_addr, percentage)
                if next_addr is None:
                    break

                self.call_map.add_node(next_addr)

                self._scan_code(traced_address, function_exits, initial_state, next_addr)

        # Post-processing: Map those calls that are not made by call/blr
        # instructions to their targets in our map
        for src, s in function_exits.items():
            if src in self.call_map:
                for target in s:
                    if target in self.call_map:
                        self.call_map.add_edge(src, target)

        nodes = sorted(self.call_map.nodes())
        for i in range(len(nodes) - 1):
            if nodes[i] >= nodes[i + 1] - 4:
                for dst in self.call_map.successors(nodes[i + 1]):
                    self.call_map.add_edge(nodes[i], dst)
                for src in self.call_map.predecessors(nodes[i + 1]):
                    self.call_map.add_edge(src, nodes[i])
                self.call_map.remove_node(nodes[i + 1])

        l.debug("Construction finished.")

    def _full_code_scan(self):
        """
        Perform a full code scan on the target binary.
        """

        # We gotta time this function
        start_time = datetime.now()

        traced_address = set()
        self.functions = set()
        self.call_map = networkx.DiGraph()
        self.cfg = networkx.DiGraph()
        initial_state = self.project.factory.blank_state(mode="fastpath")
        initial_options = initial_state.options - {o.TRACK_CONSTRAINTS} - o.refs
        initial_options |= {o.SUPER_FASTPATH}
        # initial_options.remove(o.COW_STATES)
        initial_state.options = initial_options
        # Sadly, not all calls to functions are explicitly made by call
        # instruction - they could be a jmp or b, or something else. So we
        # should record all exits from a single function, and then add
        # necessary calling edges in our call map during the post-processing
        # phase.
        function_exits = defaultdict(set)

        widgets = [progressbar.Percentage(),
                   ' ',
                   progressbar.Bar(marker=progressbar.RotatingMarker()),
                   ' ',
                   progressbar.Timer(),
                   ' ',
                   progressbar.ETA()
        ]

        pb = progressbar.ProgressBar(widgets=widgets, maxval=10000 * 100).start()

        while True:
            next_addr = self._get_next_code_addr(initial_state)
            percentage = self._seg_list.occupied_size * 100.0 / (self._valid_memory_region_size)
            if percentage > 100.0: percentage = 100.0
            pb.update(percentage * 10000)

            if next_addr is not None:
                l.info("Analyzing %xh, progress %0.04f%%", next_addr, percentage)
            else:
                l.info('No more addr to analyze. Progress %0.04f%%', percentage)
                break

            self.call_map.add_node(next_addr)

            self._scan_code(traced_address, function_exits, initial_state, next_addr)

        pb.finish()
        end_time = datetime.now()
        l.info("A full code scan takes %d seconds.", (end_time - start_time).seconds)

    def _calc_entropy(self, data, size=None):
        if not data:
            return 0
        entropy = 0
        if size is None: size = len(data)
        data = str(pyvex.ffi.buffer(data, size))
        for x in range(0, 256):
            p_x = float(data.count(chr(x)))/size
            if p_x > 0:
                entropy += - p_x * math.log(p_x, 2)
        return entropy

    def _dbg_output(self):
        ret = ""
        ret += "Functions:\n"
        function_list = list(self.functions)
        # Sort it
        function_list = sorted(function_list)
        for f in function_list:
            ret += "0x%08x" % f

        return ret

    def genenare_callmap_sif(self, filepath):
        """
        Generate a sif file from the call map
        """
        graph = self.call_map

        if graph is None:
            raise AngrGirlScoutError('Please generate the call graph first.')

        f = open(filepath, "wb")

        for src, dst in graph.edges():
            f.write("0x%x\tDirectEdge\t0x%x\n" % (src, dst))

        f.close()

    def generate_code_cover(self):
        """
        Generate a list of all recovered basic blocks.
        """

        lst = [ ]
        for irsb_addr in self.cfg.nodes():
            if irsb_addr not in self._block_size:
                continue
            irsb_size = self._block_size[irsb_addr]
            lst.append((irsb_addr, irsb_size))

        lst = sorted(lst, key=lambda x: x[0])

        return lst

Пример #19

0

Показать файл

class GirlScout(Analysis):
    """
    We find functions inside the given binary, try to decide the base address if needed, and build a control-flow
    graph on top of that to see if there is an entry or not. Obviously if the binary is not loaded as a blob (not
    using Blob as its backend), GirlScout will not try to determine the base address.

    It's also optional to perform a full code scan of the binary to show where all codes are. By default we don't scan
    the entire binary since it's time consuming.

    You probably need a BoyScout to determine the possible architecture and endianess of your binary blob.
    """
    def __init__(self,
                 binary=None,
                 start=None,
                 end=None,
                 pickle_intermediate_results=False,
                 perform_full_code_scan=False):
        self._binary = binary if binary is not None else self.project.loader.main_object
        self._start = start if start is not None else self._binary.min_addr
        self._end = end if end is not None else self._binary.max_addr
        self._pickle_intermediate_results = pickle_intermediate_results
        self._perform_full_code_scan = perform_full_code_scan

        l.debug("Starts at 0x%08x and ends at 0x%08x.", self._start, self._end)

        # Valid memory regions
        self._valid_memory_regions = sorted(
            (start, start + len(backer))
            for start, backer in self.project.loader.memory.backers())
        self._valid_memory_region_size = sum([
            (end - start) for start, end in self._valid_memory_regions
        ])

        # Size of each basic block
        self._block_size = {}

        self._next_addr = self._start - 1
        # Starting point of functions
        self.functions = None
        # Calls between functions
        self.call_map = networkx.DiGraph()
        # A CFG - this is not what you get from project.analyses.CFG() !
        self.cfg = networkx.DiGraph()
        # Create the segment list
        self._seg_list = SegmentList()

        self._read_addr_to_run = defaultdict(list)
        self._write_addr_to_run = defaultdict(list)

        # All IRSBs with an indirect exit target
        self._indirect_jumps = set()

        self._unassured_functions = set()

        self.base_address = None

        # Start working!
        self._reconnoiter()

    @property
    def call_map(self):
        return self.call_map

    def _get_next_addr_to_search(self, alignment=None):
        # TODO: Take care of those functions that are already generated
        curr_addr = self._next_addr
        if self._seg_list.has_blocks:
            curr_addr = self._seg_list.next_free_pos(curr_addr)

        if alignment is not None:
            if curr_addr % alignment > 0:
                curr_addr = curr_addr - curr_addr % alignment + alignment

        # Make sure curr_addr exists in binary
        accepted = False
        for start, end in self._valid_memory_regions:
            if curr_addr >= start and curr_addr < end:
                # accept
                accepted = True
                break
            if curr_addr < start:
                # accept, but we are skipping the gap
                accepted = True
                curr_addr = start

        if not accepted:
            # No memory available!
            return None

        self._next_addr = curr_addr
        if self._end is None or curr_addr < self._end:
            l.debug("Returning new recon address: 0x%08x", curr_addr)
            return curr_addr
        else:
            l.debug("0x%08x is beyond the ending point.", curr_addr)
            return None

    def _get_next_code_addr(self, initial_state):
        """
        Besides calling _get_next_addr, we will check if data locates at that address seems to be code or not. If not, 
        we'll move on to request for next valid address.
        """
        next_addr = self._get_next_addr_to_search()
        if next_addr is None:
            return None

        start_addr = next_addr
        sz = ""
        is_sz = True
        while is_sz:
            # Get data until we meet a 0
            while next_addr in initial_state.memory:
                try:
                    l.debug("Searching address %x", next_addr)
                    val = initial_state.mem_concrete(next_addr, 1)
                    if val == 0:
                        if len(sz) < 4:
                            is_sz = False
                        else:
                            reach_end = True
                        break
                    if chr(val) not in string.printable:
                        is_sz = False
                        break
                    sz += chr(val)
                    next_addr += 1
                except SimValueError:
                    # Not concretizable
                    l.debug("Address 0x%08x is not concretizable!", next_addr)
                    break

            if len(sz) > 0 and is_sz:
                l.debug("Got a string of %d chars: [%s]", len(sz), sz)
                # l.debug("Occpuy %x - %x", start_addr, start_addr + len(sz) + 1)
                self._seg_list.occupy(start_addr, len(sz) + 1)
                sz = ""
                next_addr = self._get_next_addr_to_search()
                if next_addr is None:
                    return None
                # l.debug("next addr = %x", next_addr)
                start_addr = next_addr

            if is_sz:
                next_addr += 1

        instr_alignment = initial_state.arch.instruction_alignment
        if start_addr % instr_alignment > 0:
            start_addr = start_addr - start_addr % instr_alignment + \
                instr_alignment

        l.debug('_get_next_code_addr() returns 0x%x', start_addr)
        return start_addr

    def _symbolic_reconnoiter(self, addr, target_addr, max_depth=10):
        """
        When an IRSB has more than two exits (for example, a jumptable), we
        cannot concretize their exits in concrete mode. Hence we statically
        execute the function from beginning in this method, and then switch to
        symbolic mode for the final IRSB to get all possible exits of that
        IRSB.
        """
        state = self.project.factory.blank_state(addr=addr,
                                                 mode="symbolic",
                                                 add_options={o.CALLLESS})
        initial_exit = self.project.factory.path(state)
        explorer = Explorer(self.project,
                            start=initial_exit,
                            max_depth=max_depth,
                            find=(target_addr),
                            num_find=1).run()
        if len(explorer.found) > 0:
            path = explorer.found[0]
            last_run = path.last_run
            return last_run.flat_exits()
        else:
            return []

    def _static_memory_slice(self, run):
        if isinstance(run, SimIRSB):
            for stmt in run.statements:
                refs = stmt.actions
                if len(refs) > 0:
                    real_ref = refs[-1]
                    if type(real_ref) == SimActionData:
                        if real_ref.action == 'write':
                            addr = real_ref.addr
                            if not run.initial_state.solver.symbolic(addr):
                                concrete_addr = run.initial_state.solver.eval(
                                    addr)
                                self._write_addr_to_run[addr].append(run.addr)
                        elif real_ref.action == 'read':
                            addr = real_ref.addr
                            if not run.initial_state.solver.symbolic(addr):
                                concrete_addr = run.initial_state.solver.eval(
                                    addr)
                            self._read_addr_to_run[addr].append(run.addr)

    def _scan_code(self, traced_addresses, function_exits, initial_state,
                   starting_address):
        # Saving tuples like (current_function_addr, next_exit_addr)
        # Current_function_addr == -1 for exits not inside any function
        remaining_exits = set()
        next_addr = starting_address

        # Initialize the remaining_exits set
        remaining_exits.add(
            (next_addr, next_addr, next_addr, initial_state.copy()))

        while len(remaining_exits):
            current_function_addr, previous_addr, parent_addr, state = \
                remaining_exits.pop()
            if previous_addr in traced_addresses:
                continue

            # Add this node to the CFG first, in case this is a dangling node
            self.cfg.add_node(previous_addr)

            if current_function_addr != -1:
                l.debug("Tracing new exit 0x%08x in function 0x%08x",
                        previous_addr, current_function_addr)
            else:
                l.debug("Tracing new exit 0x%08x", previous_addr)
            traced_addresses.add(previous_addr)

            self._scan_block(previous_addr, state, current_function_addr,
                             function_exits, remaining_exits, traced_addresses)

    def _scan_block(self, addr, state, current_function_addr, function_exits,
                    remaining_exits, traced_addresses):
        # Let's try to create the pyvex IRSB directly, since it's much faster

        try:
            irsb = self.project.factory.block(addr).vex

            # Log the size of this basic block
            self._block_size[addr] = irsb.size

            # Occupy the block
            self._seg_list.occupy(addr, irsb.size)
        except (SimEngineError, SimMemoryError):
            return

        # Get all possible successors
        next, jumpkind = irsb.next, irsb.jumpkind
        successors = [(i.dst, i.jumpkind) for i in irsb.statements
                      if type(i) is pyvex.IRStmt.Exit]
        successors.append((next, jumpkind))

        # Process each successor
        for suc in successors:
            target, jumpkind = suc

            if type(target) is pyvex.IRExpr.Const:
                next_addr = target.con.value
            else:
                next_addr = None

            if jumpkind == 'Ijk_Boring' and next_addr is not None:
                remaining_exits.add(
                    (current_function_addr, next_addr, addr, None))

            elif jumpkind == 'Ijk_Call' and next_addr is not None:
                # Log it before we cut the tracing :)
                if jumpkind == "Ijk_Call":
                    if current_function_addr != -1:
                        self.functions.add(current_function_addr)
                        self.functions.add(next_addr)
                        self.call_map.add_edge(current_function_addr,
                                               next_addr)
                    else:
                        self.functions.add(next_addr)
                        self.call_map.add_node(next_addr)
                elif jumpkind == "Ijk_Boring" or \
                                jumpkind == "Ijk_Ret":
                    if current_function_addr != -1:
                        function_exits[current_function_addr].add(next_addr)

                # If we have traced it before, don't trace it anymore
                if next_addr in traced_addresses:
                    return

                remaining_exits.add((next_addr, next_addr, addr, None))
                l.debug("Function calls: %d", len(self.call_map.nodes()))

    def _scan_block_(self, addr, state, current_function_addr, function_exits,
                     remaining_exits, traced_addresses):

        # Get a basic block
        state.ip = addr

        s_path = self.project.factory.path(state)
        try:
            s_run = s_path.next_run
        except SimIRSBError as ex:
            l.debug(ex)
            return
        except AngrError as ex:
            # "No memory at xxx"
            l.debug(ex)
            return
        except (SimValueError, SimSolverModeError) as ex:
            # Cannot concretize something when executing the SimRun
            l.debug(ex)
            return
        except SimError as ex:
            # Catch all simuvex errors
            l.debug(ex)
            return

        if type(s_run) is SimIRSB:
            # Calculate its entropy to avoid jumping into uninitialized/all-zero space
            bytes = s_run.irsb._state[1]['bytes']
            size = s_run.irsb.size
            ent = self._calc_entropy(bytes, size=size)
            if ent < 1.0 and size > 40:
                # Skipping basic blocks that have a very low entropy
                return

        # self._static_memory_slice(s_run)

        # Mark that part as occupied
        if isinstance(s_run, SimIRSB):
            self._seg_list.occupy(addr, s_run.irsb.size)
        successors = s_run.flat_successors + s_run.unsat_successors
        has_call_exit = False
        tmp_exit_set = set()
        for suc in successors:
            if suc.history.jumpkind == "Ijk_Call":
                has_call_exit = True

        for suc in successors:
            jumpkind = suc.history.jumpkind

            if has_call_exit and jumpkind == "Ijk_Ret":
                jumpkind = "Ijk_FakeRet"

            if jumpkind == "Ijk_Ret":
                continue

            try:
                # Try to concretize the target. If we can't, just move on
                # to the next target
                next_addr = suc.solver.eval_one(suc.ip)
            except (SimValueError, SimSolverModeError) as ex:
                # Undecidable jumps (might be a function return, or a conditional branch, etc.)

                # We log it
                self._indirect_jumps.add((suc.history.jumpkind, addr))
                l.info("IRSB 0x%x has an indirect exit %s.", addr,
                       suc.history.jumpkind)

                continue

            self.cfg.add_edge(addr, next_addr, jumpkind=jumpkind)
            # Log it before we cut the tracing :)
            if jumpkind == "Ijk_Call":
                if current_function_addr != -1:
                    self.call_map.add_edge(current_function_addr, next_addr)
                else:
                    self.call_map.add_node(next_addr)
            elif jumpkind == "Ijk_Boring" or \
                            jumpkind == "Ijk_Ret":
                if current_function_addr != -1:
                    function_exits[current_function_addr].add(next_addr)

            # If we have traced it before, don't trace it anymore
            if next_addr in traced_addresses:
                continue
            # If we have traced it in current loop, don't tract it either
            if next_addr in tmp_exit_set:
                continue

            tmp_exit_set.add(next_addr)

            if jumpkind == "Ijk_Call":
                # This is a call. Let's record it
                new_state = suc.copy()
                # Unconstrain those parameters
                # TODO: Support other archs as well
                # if 12 + 16 in new_state.registers.mem:
                #    del new_state.registers.mem[12 + 16]
                #if 16 + 16 in new_state.registers.mem:
                #    del new_state.registers.mem[16 + 16]
                #if 20 + 16 in new_state.registers.mem:
                #    del new_state.registers.mem[20 + 16]
                # 0x8000000: call 0x8000045
                remaining_exits.add((next_addr, next_addr, addr, new_state))
                l.debug("Function calls: %d", len(self.call_map.nodes()))
            elif jumpkind == "Ijk_Boring" or \
                            jumpkind == "Ijk_Ret" or \
                            jumpkind == "Ijk_FakeRet":
                new_state = suc.copy()
                l.debug("New exit with jumpkind %s", jumpkind)
                # FIXME: should not use current_function_addr if jumpkind is "Ijk_Ret"
                remaining_exits.add(
                    (current_function_addr, next_addr, addr, new_state))
            elif jumpkind == "Ijk_NoDecode":
                # That's something VEX cannot decode!
                # We assume we ran into a deadend
                pass
            elif jumpkind.startswith("Ijk_Sig"):
                # Should not go into that exit
                pass
            elif jumpkind == "Ijk_TInval":
                # ppc32: isync
                # FIXME: It is the same as Ijk_Boring! Process it later
                pass
            elif jumpkind == 'Ijk_Sys_syscall':
                # Let's not jump into syscalls
                pass
            elif jumpkind == 'Ijk_InvalICache':
                pass
            elif jumpkind == 'Ijk_MapFail':
                pass
            elif jumpkind == 'Ijk_EmWarn':
                pass
            else:
                raise Exception("NotImplemented")

    def _scan_function_prologues(self, traced_address, function_exits,
                                 initial_state):
        """
        Scan the entire program space for prologues, and start code scanning at those positions
        :param traced_address:
        :param function_exits:
        :param initial_state:
        :param next_addr:
        :returns:
        """

        # Precompile all regexes
        regexes = set()
        for ins_regex in self.project.arch.function_prologs:
            r = re.compile(ins_regex)
            regexes.add(r)

        # TODO: Make sure self._start is aligned

        # Construct the binary blob first

        for start_, bytes_ in self.project.loader.main_object.memory.backers():
            for regex in regexes:
                # Match them!
                for mo in regex.finditer(bytes):
                    position = mo.start() + start_
                    if position % self.project.arch.instruction_alignment == 0:
                        if position not in traced_address:
                            percentage = self._seg_list.occupied_size * 100.0 / (
                                self._valid_memory_region_size)
                            l.info("Scanning %xh, progress %0.04f%%", position,
                                   percentage)

                            self._unassured_functions.add(position)

                            self._scan_code(traced_address, function_exits,
                                            initial_state, position)
                        else:
                            l.info("Skipping %xh", position)

    def _process_indirect_jumps(self):
        """
        Execute each basic block with an indeterminiable exit target
        :returns:
        """

        function_starts = set()
        l.info("We have %d indirect jumps", len(self._indirect_jumps))

        for jumpkind, irsb_addr in self._indirect_jumps:
            # First execute the current IRSB in concrete mode

            if len(function_starts) > 20:
                break

            if jumpkind == "Ijk_Call":
                state = self.project.factory.blank_state(
                    addr=irsb_addr,
                    mode="concrete",
                    add_options={o.SYMBOLIC_INITIAL_VALUES})
                path = self.project.factory.path(state)
                l.debug(hex(irsb_addr))

                try:
                    r = (path.next_run.successors +
                         path.next_run.unsat_successors)[0]
                    ip = r.solver.eval_one(r.ip)

                    function_starts.add(ip)
                    continue
                except SimSolverModeError as ex:
                    pass

                # Not resolved
                # Do a backward slicing from the call
                irsb = self.project.factory.block(irsb_addr).vex
                stmts = irsb.statements
                # Start slicing from the "next"

                b = Blade(self.cfg, irsb.addr, -1, project=self.project)

                # Debugging output
                for addr, stmt_idx in sorted(list(b.slice.nodes())):
                    irsb = self.project.factory.block(addr).vex
                    stmts = irsb.statements
                    l.debug("%x: %d | %s %d",
                            (addr, stmt_idx), stmts[stmt_idx],
                            b.slice.in_degree((addr, stmt_idx)))

                # Get all sources
                sources = [
                    n for n in b.slice.nodes() if b.slice.in_degree(n) == 0
                ]

                # Create the annotated CFG
                annotatedcfg = AnnotatedCFG(self.project,
                                            None,
                                            target_irsb_addr=irsb_addr,
                                            detect_loops=False)
                annotatedcfg.from_digraph(b.slice)

                for src_irsb, src_stmt_idx in sources:
                    # Use slicecutor to execute each one, and get the address
                    # We simply give up if any exception occurs on the way

                    start_state = self.project.factory.blank_state(
                        addr=src_irsb,
                        add_options={
                            o.DO_RET_EMULATION, o.TRUE_RET_EMULATION_GUARD
                        })

                    start_path = self.project.factory.path(start_state)

                    # Create the slicecutor
                    slicecutor = Slicecutor(self.project,
                                            annotatedcfg,
                                            start=start_path,
                                            targets=(irsb_addr, ))

                    # Run it!
                    try:
                        slicecutor.run()
                    except KeyError as ex:
                        # This is because the program slice is incomplete.
                        # Blade will support more IRExprs and IRStmts
                        l.debug(
                            "KeyError occurred due to incomplete program slice.",
                            exc_info=ex)
                        continue

                    # Get the jumping targets
                    for r in slicecutor.reached_targets:
                        if r.next_run.successors:
                            target_ip = r.next_run.successors[0].ip
                            se = r.next_run.successors[0].se

                            if not se.symbolic(target_ip):
                                concrete_ip = se.eval_one(target_ip)
                                function_starts.add(concrete_ip)
                                l.info("Found a function address %x",
                                       concrete_ip)

        return function_starts

    def _solve_forbase_address(self, function_starts, functions):
        """
        Voting for the most possible base address.

        :param function_starts:
        :param functions:
        :returns:
        """

        pseudo_base_addr = self.project.loader.main_object.min_addr

        base_addr_ctr = {}

        for s in function_starts:
            for f in functions:
                base_addr = s - f + pseudo_base_addr
                ctr = 1

                for k in function_starts:
                    if k - base_addr + pseudo_base_addr in functions:
                        ctr += 1

                if ctr > 5:
                    base_addr_ctr[base_addr] = ctr

        if len(base_addr_ctr):
            base_addr, hits = sorted([(k, v)
                                      for k, v in base_addr_ctr.items()],
                                     key=lambda x: x[1],
                                     reverse=True)[0]

            return base_addr
        else:
            return None

    def _reconnoiter(self):

        if type(self._binary) is cle.blob.Blob:
            self._determinebase_address()

        if self._perform_full_code_scan:
            self._full_code_scan()

    def _determinebase_address(self):
        """
        The basic idea is simple: start from a specific point, try to construct
        functions as much as we can, and maintain a function distribution graph
        and a call graph simultaneously. Repeat searching until we come to the
        end that there is no new function to be found.
        A function should start with:
            # some addresses that a call exit leads to, or
            # certain instructions. They are recoreded in SimArch.

        For a better performance, instead of blindly scanning the entire process
        space, we first try to search for instruction patterns that a function
        may start with, and start scanning at those positions. Then we try to
        decode anything that is left.
        """

        traced_address = set()
        self.functions = set()
        self.call_map = networkx.DiGraph()
        self.cfg = networkx.DiGraph()
        initial_state = self.project.factory.blank_state(mode="fastpath")
        initial_options = initial_state.options - {o.TRACK_CONSTRAINTS
                                                   } - o.refs
        initial_options |= {o.SUPER_FASTPATH}
        # initial_options.remove(o.COW_STATES)
        initial_state.options = initial_options
        # Sadly, not all calls to functions are explicitly made by call
        # instruction - they could be a jmp or b, or something else. So we
        # should record all exits from a single function, and then add
        # necessary calling edges in our call map during the post-processing
        # phase.
        function_exits = defaultdict(set)

        dump_file_prefix = self.project.filename

        if self._pickle_intermediate_results and \
                os.path.exists(dump_file_prefix + "_indirect_jumps.angr"):
            l.debug("Loading existing intermediate results.")
            self._indirect_jumps = pickle.load(
                open(dump_file_prefix + "_indirect_jumps.angr", "rb"))
            self.cfg = pickle.load(
                open(dump_file_prefix + "_coercecfg.angr", "rb"))
            self._unassured_functions = pickle.load(
                open(dump_file_prefix + "_unassured_functions.angr", "rb"))
        else:
            # Performance boost :-)
            # Scan for existing function prologues
            self._scan_function_prologues(traced_address, function_exits,
                                          initial_state)

            if self._pickle_intermediate_results:
                l.debug("Dumping intermediate results.")
                pickle.dump(
                    self._indirect_jumps,
                    open(dump_file_prefix + "_indirect_jumps.angr", "wb"), -1)
                pickle.dump(self.cfg,
                            open(dump_file_prefix + "_coercecfg.angr", "wb"),
                            -1)
                pickle.dump(
                    self._unassured_functions,
                    open(dump_file_prefix + "_unassured_functions.angr", "wb"),
                    -1)

        if len(self._indirect_jumps):
            # We got some indirect jumps!
            # Gotta execute each basic block and see where it wants to jump to
            function_starts = self._process_indirect_jumps()

            self.base_address = self._solve_forbase_address(
                function_starts, self._unassured_functions)

            l.info("Base address should be 0x%x", self.base_address)

        else:
            l.debug(
                "No indirect jumps are found. We switch to the slowpath mode.")
            # TODO: Slowpath mode...
            while True:
                next_addr = self._get_next_code_addr(initial_state)
                percentage = self._seg_list.occupied_size * 100.0 / (
                    self._valid_memory_region_size)
                l.info("Analyzing %xh, progress %0.04f%%", next_addr,
                       percentage)
                if next_addr is None:
                    break

                self.call_map.add_node(next_addr)

                self._scan_code(traced_address, function_exits, initial_state,
                                next_addr)

        # Post-processing: Map those calls that are not made by call/blr
        # instructions to their targets in our map
        for src, s in function_exits.items():
            if src in self.call_map:
                for target in s:
                    if target in self.call_map:
                        self.call_map.add_edge(src, target)

        nodes = sorted(self.call_map.nodes())
        for i in range(len(nodes) - 1):
            if nodes[i] >= nodes[i + 1] - 4:
                for dst in self.call_map.successors(nodes[i + 1]):
                    self.call_map.add_edge(nodes[i], dst)
                for src in self.call_map.predecessors(nodes[i + 1]):
                    self.call_map.add_edge(src, nodes[i])
                self.call_map.remove_node(nodes[i + 1])

        l.debug("Construction finished.")

    def _full_code_scan(self):
        """
        Perform a full code scan on the target binary.
        """

        # We gotta time this function
        start_time = datetime.now()

        traced_address = set()
        self.functions = set()
        self.call_map = networkx.DiGraph()
        self.cfg = networkx.DiGraph()
        initial_state = self.project.factory.blank_state(mode="fastpath")
        initial_options = initial_state.options - {o.TRACK_CONSTRAINTS
                                                   } - o.refs
        initial_options |= {o.SUPER_FASTPATH}
        # initial_options.remove(o.COW_STATES)
        initial_state.options = initial_options
        # Sadly, not all calls to functions are explicitly made by call
        # instruction - they could be a jmp or b, or something else. So we
        # should record all exits from a single function, and then add
        # necessary calling edges in our call map during the post-processing
        # phase.
        function_exits = defaultdict(set)

        widgets = [
            progressbar.Percentage(), ' ',
            progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
            progressbar.Timer(), ' ',
            progressbar.ETA()
        ]

        pb = progressbar.ProgressBar(widgets=widgets,
                                     maxval=10000 * 100).start()

        while True:
            next_addr = self._get_next_code_addr(initial_state)
            percentage = self._seg_list.occupied_size * 100.0 / (
                self._valid_memory_region_size)
            if percentage > 100.0: percentage = 100.0
            pb.update(percentage * 10000)

            if next_addr is not None:
                l.info("Analyzing %xh, progress %0.04f%%", next_addr,
                       percentage)
            else:
                l.info('No more addr to analyze. Progress %0.04f%%',
                       percentage)
                break

            self.call_map.add_node(next_addr)

            self._scan_code(traced_address, function_exits, initial_state,
                            next_addr)

        pb.finish()
        end_time = datetime.now()
        l.info("A full code scan takes %d seconds.",
               (end_time - start_time).seconds)

    def _calc_entropy(self, data, size=None):
        if not data:
            return 0
        entropy = 0
        if size is None: size = len(data)
        data = str(pyvex.ffi.buffer(data, size))
        for x in range(0, 256):
            p_x = float(data.count(chr(x))) / size
            if p_x > 0:
                entropy += -p_x * math.log(p_x, 2)
        return entropy

    def _dbg_output(self):
        ret = ""
        ret += "Functions:\n"
        function_list = list(self.functions)
        # Sort it
        function_list = sorted(function_list)
        for f in function_list:
            ret += "0x%08x" % f

        return ret

    def genenare_callmap_sif(self, filepath):
        """
        Generate a sif file from the call map
        """
        graph = self.call_map

        if graph is None:
            raise AngrGirlScoutError('Please generate the call graph first.')

        f = open(filepath, "wb")

        for src, dst in graph.edges():
            f.write("0x%x\tDirectEdge\t0x%x\n" % (src, dst))

        f.close()

    def generate_code_cover(self):
        """
        Generate a list of all recovered basic blocks.
        """

        lst = []
        for irsb_addr in self.cfg.nodes():
            if irsb_addr not in self._block_size:
                continue
            irsb_size = self._block_size[irsb_addr]
            lst.append((irsb_addr, irsb_size))

        lst = sorted(lst, key=lambda x: x[0])

        return lst

Python SegmentList примеры использования