示例#1
0
文件: depgraph.py 项目: 0xf1sh/miasm
    def _build_depGraph(self, depnode):
        """Recursively build the final list of DiGraph, and clean up unmodifier
        nodes
        @depnode: starting node
        """

        if depnode not in self._cache or \
                not self._cache[depnode]:
            ## There is no dependency
            graph = DiGraph()
            graph.add_node(depnode)
            return graph

        # Recursion
        dependencies = list(self._cache[depnode])

        graphs = []
        for sub_depnode in dependencies:
            graphs.append(self._build_depGraph(sub_depnode))

        # head(graphs[i]) == dependencies[i]
        graph = DiGraph()
        graph.add_node(depnode)
        for head in dependencies:
            graph.add_uniq_edge(head, depnode)

        for subgraphs in itertools.product(graphs):
            for sourcegraph in subgraphs:
                for node in sourcegraph.nodes():
                    graph.add_node(node)
                for edge in sourcegraph.edges():
                    graph.add_uniq_edge(*edge)

        # Update the running queue
        return graph
示例#2
0
    def _build_depgraph(self, depnode):
        """Recursively build the final list of DiGraph, and clean up unmodifier
        nodes
        @depnode: starting node
        """

        if depnode not in self._cache or \
                not self._cache[depnode]:
            # There is no dependency
            graph = DiGraph()
            graph.add_node(depnode)
            return graph

        # Recursion
        dependencies = list(self._cache[depnode])

        graphs = []
        for sub_depnode in dependencies:
            graphs.append(self._build_depgraph(sub_depnode))

        # head(graphs[i]) == dependencies[i]
        graph = DiGraph()
        graph.add_node(depnode)
        for head in dependencies:
            graph.add_uniq_edge(head, depnode)

        for subgraphs in itertools.product(graphs):
            for sourcegraph in subgraphs:
                for node in sourcegraph.nodes():
                    graph.add_node(node)
                for edge in sourcegraph.edges():
                    graph.add_uniq_edge(*edge)

        # Update the running queue
        return graph
示例#3
0
文件: depgraph.py 项目: 0xf1sh/miasm
    def as_graph(self, starting_nodes):
        """Return a DiGraph corresponding to computed dependencies, with
        @starting_nodes as leafs
        @starting_nodes: set of DependencyNode instance
        """

        # Build subgraph for each starting_node
        subgraphs = []
        for starting_node in starting_nodes:
            subgraphs.append(self._build_depGraph(starting_node))

        # Merge subgraphs into a final DiGraph
        graph = DiGraph()
        for sourcegraph in subgraphs:
            for node in sourcegraph.nodes():
                graph.add_node(node)
            for edge in sourcegraph.edges():
                graph.add_uniq_edge(*edge)
        return graph
示例#4
0
    def as_graph(self, starting_nodes):
        """Return a DiGraph corresponding to computed dependencies, with
        @starting_nodes as leafs
        @starting_nodes: set of DependencyNode instance
        """

        # Build subgraph for each starting_node
        subgraphs = []
        for starting_node in starting_nodes:
            subgraphs.append(self._build_depgraph(starting_node))

        # Merge subgraphs into a final DiGraph
        graph = DiGraph()
        for sourcegraph in subgraphs:
            for node in sourcegraph.nodes():
                graph.add_node(node)
            for edge in sourcegraph.edges():
                graph.add_uniq_edge(*edge)
        return graph
示例#5
0
class Snapshot(object):
    @classmethod
    def get_byte(cls, value, byte):
        '''Return the byte @byte of the value'''
        return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))

    @classmethod
    def unpack_ptr(cls, value):
        return struct.unpack('@P', value)[0]

    def __init__(self, abicls, machine):
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self._instr_count = 0
        self._pending_call = []
        # Function addr -> list of information on calls
        self.function_calls = {}
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer() / 8
        self.sp = self._ira.sp.name

    def add_input_register(self, reg_name, reg_value):
        self.input_reg[reg_name] = reg_value

    def add_output_register(self, reg_name, reg_value):
        self.output_reg[reg_name] = reg_value

    def add_memory_read(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1,
                    Snapshot.get_byte(value, i),
                    PAGE_READ,
                )

            else:
                self.in_memory[address + i].access |= PAGE_READ

    def add_memory_write(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1,
                    "\x00",
                    # The value is
                    # not used by the
                    # test
                    PAGE_WRITE,
                )

            else:
                self.in_memory[address + i].access |= PAGE_WRITE

    def add_executed_instruction(self, address):
        '''
        Function called to signal that the address has been executed
        This function has to be called in the order of their executed instruction
        Else paths can not be updated correctly
        '''
        self._previous_addr = self._current_addr
        self._current_addr = address
        self.paths.add_uniq_edge(self._previous_addr, self._current_addr)
        self._instr_count += 1

        # Resolve call destination
        if (self._pending_call and self._previous_addr
                == self._pending_call[-1]["caller_addr"]):
            info = self._pending_call[-1]
            info["dest"] = address
            info["beg"] = self._instr_count

    def add_call(self, caller_addr, stack_ptr):
        '''
        Function call, target is not determined yet
        called *before* instruction execution
        '''
        info = {
            "stack_ptr": stack_ptr,
            "caller_addr": caller_addr,
        }
        self._pending_call.append(info)

    def add_ret(self, ret_addr, stack_ptr, value):
        '''
        Function ret
        called *after* instruction execution
        '''
        # Find corresponding call
        assert self._pending_call
        assert self._pending_call[-1]["stack_ptr"] >= stack_ptr

        info = self._pending_call.pop()
        info["end"] = self._instr_count
        info["ret"] = value
        current_interval = self.function_calls.setdefault(
            info["dest"], list()).append(info)

    def clean(self):
        """Clean the snapshot for further uses"""

        self.agglomerate_memory(self.in_memory)
        self.agglomerate_memory(self.out_memory)

    def agglomerate_memory(self, mem):
        '''
        Assuming @mem is only composed of non-overlapping block
        this function agglomerate contiguous blocks having the same access right
        '''
        for addr in sorted(mem.keys()):

            # if the addr is not already deleted
            if addr in mem:

                end_addr = addr + mem[addr].size
                while end_addr in mem:
                    cur_mem = mem[addr]
                    next_mem = mem[end_addr]

                    # If access change, do not agglomerate
                    if cur_mem.access != next_mem.access:
                        break

                    cur_mem.size += next_mem.size
                    cur_mem.data += next_mem.data
                    del mem[end_addr]
                    end_addr += next_mem.size
示例#6
0
class Snapshot(object):

    clobbered_regs = [
        "RCX", "RDX", "RSI", "RDI", "RBP", "R8", "R9", "R10", "R11", "RBP"
    ]

    @classmethod
    def get_byte(cls, value, byte):
        '''Return the byte @byte of the value'''
        return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))

    @classmethod
    def unpack_ptr(cls, value):
        return struct.unpack('@P', value)[0]

    def __init__(self, segments, abicls, machine):
        self.segments = segments
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self.refs = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer() / 8
        self.sp = self._ira.sp.name

    def _get_segment_index_by_addr(self, addr):
        for i, seg in enumerate(self.segments):
            if seg[0] <= addr < seg[1]:
                return i
        raise ValueError("Segment not found for addr %x" % addr)

    def add_input_register(self, reg_name, reg_value):
        self.input_reg[reg_name] = reg_value

    def add_output_register(self, reg_name, reg_value):
        self.output_reg[reg_name] = reg_value

    def add_memory_read(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
                self._get_segment_index_by_addr(address + i))

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1, Snapshot.get_byte(value, i), PAGE_READ,
                    self._get_segment_index_by_addr(address + i))

            else:
                self.in_memory[address + i].access |= PAGE_READ

    def add_memory_write(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(
                1,
                Snapshot.get_byte(value, i),
                0,  # Output access never used
                self._get_segment_index_by_addr(address + i))

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(
                    1,
                    "\x00",
                    # The value is
                    # not used by the
                    # test
                    PAGE_WRITE,
                    self._get_segment_index_by_addr(address + i))

            else:
                self.in_memory[address + i].access |= PAGE_WRITE

    def add_executed_instruction(self, address):
        '''
        Function called to signal that the address has been executed
        This function has to be called in the order of their executed instruction
        Else paths can not be updated correctly
        '''
        self._previous_addr = self._current_addr
        self._current_addr = address
        self.paths.add_uniq_edge(self._previous_addr, self._current_addr)

    def clean(self):
        '''Try to remove all implementation dependant elements from the trace'''

        # do not record stack frame on output because the use of this
        # memory aera is implementation dependant
        self.remove_stack_frame()

        self.agglomerate_memory(self.in_memory)
        self.agglomerate_memory(self.out_memory)

        self.remove_clobbered_registers()

        self.find_references_to_input_memory()

        self.addresses_to_segment_offset()

        self.remap_segment()

    def remove_stack_frame(self):
        '''
        Remove stack frame from the memory.
        Memory is considered fragmented ie. composed of one bit sized blocks
        '''

        SP = self.input_reg[self.sp]

        stack_seg_idx = self._get_segment_index_by_addr(SP)
        top_stack = self.segments[stack_seg_idx][0]

        for mem in (self.out_memory, self.in_memory):
            for addr in mem.keys():
                if top_stack < addr < SP + self._ptr_size:
                    # addr in stack frame
                    del mem[addr]

    def agglomerate_memory(self, mem):
        '''
        Assuming @mem is only composed of 1 byte sized bloc,
        this function agglomerate contiguous blocs that are in the same segment and have the same access right
        '''
        for addr in sorted(mem.keys()):

            # if the addr is not already deleted
            if addr in mem:

                end_addr = addr + mem[addr].size
                while end_addr in mem:
                    cur_mem = mem[addr]
                    next_mem = mem[end_addr]

                    # If access change, do not agglomerate
                    if cur_mem.access != next_mem.access:
                        break

                    # If segment change, do not agglomerate
                    if cur_mem.segment != next_mem.segment:
                        break

                    cur_mem.size += next_mem.size
                    cur_mem.data += next_mem.data
                    del mem[end_addr]
                    end_addr += next_mem.size

    def remove_clobbered_registers(self):
        '''Remove clobbered registers from the output memory'''
        self.output_reg = {
            reg: v
            for reg, v in self.output_reg.iteritems()
            if reg not in self.clobbered_regs
        }

    def find_references_to_input_memory(self):
        '''
        Populate the refs attribut
        This attribut should contain all the references (pointers) present in memory and registers
        '''

        # The heuristic used to detect is: if a memory bloc or a
        # register hold a value that is an address used in the input
        # memory, then this bloc or register is a reference

        # To determine the size of the referenced memory, the strategy
        # is a greedy one. All the memory that follow the referenced
        # address is considered as part of the reference.

        for reg, value in self.input_reg.iteritems():
            if reg != self.sp:
                self.update_references(reg, value, "in_reg")
        for reg, value in self.output_reg.iteritems():
            if reg != self.sp:
                self.update_references(reg, value, "out_reg")

        ptr_size = self._ptr_size

        for addr, mem in self.in_memory.iteritems():
            data = mem.data
            for i in xrange(len(data) - ptr_size + 1):
                self.update_references(addr + i,
                                       self.unpack_ptr(data[i:ptr_size + i]),
                                       "in_mem")

        for addr, mem in self.out_memory.iteritems():
            data = mem.data
            for i in xrange(len(data) - ptr_size + 1):
                self.update_references(addr + i,
                                       self.unpack_ptr(data[i:ptr_size + i]),
                                       "out_mem")

        self.add_rsp_ref_to_stack()

        # If two references are contiguous, then the first reference
        # size will cover the second one. The following code remove
        # these kind of overlaps
        for ref in sorted(self.refs):
            for ref2 in self.refs:
                if ref < ref2 < ref + self.refs[ref].size:
                    self.refs[ref].size -= self.refs[ref2].size

    def add_rsp_ref_to_stack(self):
        in_RSP = self.input_reg[self.sp]
        out_RSP = self.output_reg[self.sp]

        stack_seg_idx = self._get_segment_index_by_addr(in_RSP)

        if in_RSP not in self.refs:
            self.refs[in_RSP] = Reference(stack_seg_idx, self._ptr_size)
        self.refs[in_RSP].add_ref("RSP", "in_reg")
        if out_RSP not in self.refs:
            self.refs[out_RSP] = Reference(stack_seg_idx, self._ptr_size)
        self.refs[out_RSP].add_ref("RSP", "out_reg")

    def update_references(self, name, value, value_type):
        in_mem = self.in_memory
        for addr, mem in in_mem.iteritems():
            if addr <= value < addr + mem.size:

                if value not in self.refs:
                    size = mem.size - (value - addr)
                    while addr + size in in_mem:
                        size += in_mem[addr + size].size

                    self.refs[value] = Reference(mem.segment, size)

                self.refs[value].add_ref(name, value_type)

    def addresses_to_segment_offset(self):
        '''Convert the absolute addresses to segment base/offset addresses'''

        self._addresses_to_segment_offset(self.in_memory)
        self._addresses_to_segment_offset(self.out_memory)
        self._addresses_to_segment_offset(self.refs)

        for ref in self.refs.itervalues():
            ref.addresses_to_segment_offset(self.segments)

    def _addresses_to_segment_offset(self, mem):
        for addr in mem.keys():
            seg_idx = mem[addr].segment
            segment_base = self.segments[seg_idx][0]
            mem[(addr - segment_base, seg_idx)] = mem.pop(addr)

    def isRegInInputRef(self, reg):
        for addr, ref in self.refs.iteritems():
            if reg in ref.in_reg:
                return addr
        return None

    def isRegInOutputRef(self, reg):
        for addr, ref in self.refs.iteritems():
            if reg in ref.out_reg:
                return addr
        return None

    def isMemInRef(self, mem):
        for addr, ref in self.refs.iteritems():
            if mem in ref.in_mem:
                return addr
        return None

    def _updateSegmentInDict(self, dic, seg_mapping):
        '''
        Change the segment indexes used by dic according to the new segment mapping seg_mapping
        '''
        for (offset, seg) in dic.keys():
            new_seg_nb = seg_mapping[seg]
            dic[(offset, seg)].segment = new_seg_nb
            dic[(offset, new_seg_nb)] = dic.pop((offset, seg))

    def __update_mapping_struct(self, memory, seg_mapping, seg_borne):
        '''
        Add the segments used in mem to the segment mapping (seg_mapping) and update the bornes (seg_borne)
        Instance variable "__nb_seg" should be initialized to 0 before the fisrt call to this function
        '''

        for (offset, seg), mem in memory.iteritems():
            if seg in seg_mapping:
                (minAddr, maxAddr) = seg_borne[seg]
                seg_borne[seg] = (min(minAddr,
                                      offset), max(maxAddr, offset + mem.size))
            else:
                seg_borne[seg] = (offset, offset + mem.size)
                seg_mapping[seg] = self.__nb_seg
                self.__nb_seg += 1
        return self.__nb_seg

    def remap_segment(self):
        '''
        Reduce the self.segment structure to be minimalist (only segments used by in and out memory)
        '''
        seg_mapping = {}
        seg_borne = {}

        # Get the new segment mapping and corresponding sizes
        self.__nb_seg = 0
        self.__update_mapping_struct(self.in_memory, seg_mapping, seg_borne)
        self.__update_mapping_struct(self.out_memory, seg_mapping, seg_borne)
        self.__update_mapping_struct(self.refs, seg_mapping, seg_borne)

        seg_size = [0] * len(seg_mapping)
        for no_seg, borne in seg_borne.iteritems():
            seg_size[seg_mapping[no_seg]] = borne[1] - borne[0]

        # Use the new mapping in snapshot's dictionaries
        self._updateSegmentInDict(self.in_memory, seg_mapping)
        self._updateSegmentInDict(self.out_memory, seg_mapping)
        self._updateSegmentInDict(self.refs, seg_mapping)

        # Use the new mapping inside reference structure
        for ref in self.refs.itervalues():
            ref.in_mem = [(offset, seg_mapping[seg])
                          for (offset, seg) in ref.in_mem]
            ref.out_mem = [(offset, seg_mapping[seg])
                           for (offset, seg) in ref.out_mem]

        # Update self.segment according to the new mapping
        new_segments = [None] * len(seg_mapping)

        for seg, mapping in seg_mapping.iteritems():
            new_segments[mapping] = self.segments[seg]
        self.segments = new_segments

    def removeRegFromRef(self, reg):
        for ref in self.refs.itervalues():
            if reg in ref.in_reg:
                ref.in_reg.remove(reg)

    def removeMemFromRef(self, mem):
        for ref in self.refs.itervalues():
            if mem in ref.in_mem:
                ref.in_mem.remove(mem)

    def getStackSegment(self):
        for addr, ref in self.refs.iteritems():
            if self.sp in ref.in_reg:
                return addr

    # True: arg changed
    # False: arg not present
    def changeArg(self, number, newValue):

        # If argument is pass in a register
        if number < 7:
            reg_list = self.abicls.regs_mapping

            self.removeRegFromRef(reg_list[number - 1])

            try:
                self.input_reg[reg_list[number - 1]] = newValue
                del self.output_reg[reg_list[number - 1]]
            except KeyError:
                pass

            return True

        # If argument is pass on the stack
        else:
            (stackOff, stackSeg) = self.getStackSegment()

            argAddr = stackOff + 8 * (number - 7) + 8

            self.removeMemFromRef((argAddr, stackSeg))

            argFoundInSnapshot = False

            memO = self.out_memory
            memI = self.in_memory

            # Search for the arg adresse in the input memory$
            # If it is found, change its value
            for (offset, seg), mem in memI.iteritems():
                if seg == stackSeg:
                    if offset <= argAddr < offset + mem.size:
                        argFoundInSnapshot = True
                        data = mem.data
                        mem.data = data[0:argAddr - offset] + struct.pack(
                            '@P', newValue) + data[self._ptr_size + argAddr -
                                                   offset:]
                        mem.size = len(mem.data)

            # If the argument is found, we remove it from the input memory
            # Because it will be allocated as an argument by the test itself
            if argFoundInSnapshot:
                for (offset, seg) in memO.keys():
                    addr = (offset, seg)
                    if seg == stackSeg:
                        if offset <= argAddr < offset + memO[addr].size:

                            if argAddr + self._ptr_size < offset + memO[
                                    addr].size:
                                memaccess = MemoryAccess(
                                    memO[addr].size -
                                    (argAddr + self._ptr_size - offset),
                                    memO[addr].data[argAddr - offset:argAddr +
                                                    self._ptr_size - offset],
                                    memO[addr].access, stackSeg)
                                memO[(argAddr + self._ptr_size,
                                      stackSeg)] = memaccess

                            if argAddr > offset:
                                memO[addr].size = argAddr - offset

                            else:
                                del memO[addr]

            return argFoundInSnapshot
示例#7
0
文件: trace.py 项目: cea-sec/Sibyl
class Snapshot(object):

    @classmethod
    def get_byte(cls, value, byte):
        '''Return the byte @byte of the value'''
        return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))

    @classmethod
    def unpack_ptr(cls, value):
        return struct.unpack('@P', value)[0]

    def __init__(self, abicls, machine):
        self.abicls = abicls

        self.input_reg = {}
        self.output_reg = {}

        self._previous_addr = 0
        self._current_addr = 0
        self._instr_count = 0
        self._pending_call = []
        # Function addr -> list of information on calls
        self.function_calls = {}
        self.paths = DiGraph()

        self.in_memory = {}
        self.out_memory = {}

        self._ira = Machine(machine).ira()
        self._ptr_size = self._ira.sizeof_pointer()/8
        self.sp = self._ira.sp.name

    def add_input_register(self, reg_name, reg_value):
        self.input_reg[reg_name] = reg_value

    def add_output_register(self, reg_name, reg_value):
        self.output_reg[reg_name] = reg_value

    def add_memory_read(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(1,
                                                        Snapshot.get_byte(value, i),
                                                        0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(1,
                                                           Snapshot.get_byte(value, i),
                                                           PAGE_READ,
                )

            else:
                self.in_memory[address + i].access |= PAGE_READ

    def add_memory_write(self, address, size, value):
        for i in xrange(size):
            self.out_memory[address + i] = MemoryAccess(1,
                                                        Snapshot.get_byte(value, i),
                                                        0,  # Output access never used
            )

            if address + i not in self.in_memory:
                self.in_memory[address + i] = MemoryAccess(1,
                                                           "\x00",
                                                           # The value is
                                                           # not used by the
                                                           # test
                                                           PAGE_WRITE,
                )

            else:
                self.in_memory[address + i].access |= PAGE_WRITE

    def add_executed_instruction(self, address):
        '''
        Function called to signal that the address has been executed
        This function has to be called in the order of their executed instruction
        Else paths can not be updated correctly
        '''
        self._previous_addr = self._current_addr
        self._current_addr = address
        self.paths.add_uniq_edge(self._previous_addr, self._current_addr)
        self._instr_count += 1

        # Resolve call destination
        if (self._pending_call and
            self._previous_addr == self._pending_call[-1]["caller_addr"]):
            info = self._pending_call[-1]
            info["dest"] = address
            info["beg"] = self._instr_count


    def add_call(self, caller_addr, stack_ptr):
        '''
        Function call, target is not determined yet
        called *before* instruction execution
        '''
        info = {"stack_ptr": stack_ptr,
                "caller_addr": caller_addr,
        }
        self._pending_call.append(info)

    def add_ret(self, ret_addr, stack_ptr, value):
        '''
        Function ret
        called *after* instruction execution
        '''
        # Find corresponding call
        assert self._pending_call
        assert self._pending_call[-1]["stack_ptr"] >= stack_ptr

        info = self._pending_call.pop()
        info["end"] = self._instr_count
        info["ret"] = value
        current_interval = self.function_calls.setdefault(info["dest"],
                                                          list()).append(info)

    def clean(self):
        """Clean the snapshot for further uses"""

        self.agglomerate_memory(self.in_memory)
        self.agglomerate_memory(self.out_memory)

    def agglomerate_memory(self, mem):
        '''
        Assuming @mem is only composed of non-overlapping block
        this function agglomerate contiguous blocks having the same access right
        '''
        for addr in sorted(mem.keys()):

            # if the addr is not already deleted
            if addr in mem:

                end_addr = addr + mem[addr].size
                while end_addr in mem:
                    cur_mem = mem[addr]
                    next_mem = mem[end_addr]

                    # If access change, do not agglomerate
                    if cur_mem.access != next_mem.access:
                        break

                    cur_mem.size += next_mem.size
                    cur_mem.data += next_mem.data
                    del mem[end_addr]
                    end_addr += next_mem.size