Пример #1
0
def find_main(rad: RAD, instructions: List[CsInsn]) -> Union[None, int]:
    '''Try to find main by interpreting the given basic block as starting the
    C runtime.'''

    # Bail if there aren't at least two instructions.
    if len(instructions) < 2:
        note("Entry point too short to be C stub.")
        return None

    # Check that the last thing is a call.
    last = instructions[-1]
    if last.mnemonic == 'hlt':
        last = instructions[-2]
    if last.mnemonic != 'call':
        # Last thing in the block must be a call.
        note("Last effective instruction in block is not a call.")
        return None

    # Found a call.  It should be RIP relative and out of scope.
    address = OperandTests.is_rip_relative(last.operands[0])
    if address is not None and rad.in_range(address):
        # The C runtime would be out of range.
        note("Last effective instruction in block is call in range.")
        return None

    # Run through this and find a setting for RDI.
    main_addr = None
    for insn in instructions:
        # See if this is a mov.
        if insn.mnemonic != 'mov' and insn.mnemonic != 'lea':
            continue

        # See if the first operand is the RDI register.
        dest = insn.operands[0]
        sour = insn.operands[1]
        if not OperandTests.is_reg(dest) or insn.reg_name(dest.reg) != 'rdi':
            continue

        # Find out what value is being set for RDI.
        if insn.mnemonic == 'mov' and OperandTests.is_imm(sour):
            main_addr = sour.value.imm
        elif insn.mnemonic == 'lea':
            main_addr = OperandTests.is_rip_relative(sour)
            main_addr += insn.address + insn.size

    # Either we found main or we didn't.
    if main_addr is not None:
        note("Possible main function at " + hex(main_addr))
    return main_addr
Пример #2
0
def build_primes(nodes: Dict[int, Node]) -> Dict[int, SESENode]:
    '''Run through the nodes and construct either a sequence or
    an if-then-else for each one.
    '''

    sese: Dict[int, SESENode] = {}
    for address, node in nodes.items():
        # Two kinds of nodes.
        if isinstance(node, PredicateNode):
            # Construct an if-then-else.  The then and else parts
            # will just be label assignments right now.
            then_part = LabelNode(node.get_true())
            else_part = LabelNode(node.get_false())
            ite = IfThenElse(node, then_part, else_part)
            sese[address] = ite
        elif isinstance(node, FunctionNode):
            # Construct a sequence.  The entire sequence will just
            # be the function node and the label setting for now.
            part = LabelNode(node.get_next())
            seq = Sequence([node, part])
            sese[address] = seq
        else:
            # What is this and how did it get here?
            pass

    note("Finished prime build")
    note(f"Built {len(sese)} primes")

    def rewrite_many(node: SESENode, items: Dict[int, SESENode]) -> SESENode:
        '''Rewrite a node to guarantee that no references to nodes we
        are replacing remain.'''
        # This is probably overkill.
        while True:
            LabelNode.reset()
            node = node.replace(items)
            if not LabelNode.is_dirty():
                break
        return node

    # At this point we have built all the primes.  We now need to
    # do some reductions.  Each prime that is referenced exactly
    # once can be instantiated.  Make a list of those.
    items = {}
    for address in LabelNode.get_singles():
        if address in sese:
            items[address] = sese[address]
            debug(f"Instantiating {hex(address)}")
            del sese[address]

    # Fully rewrite each item we are going to substitute.
    for address, node in items.items():
        items[address] = rewrite_many(node, items)

    # Now perform the replacements.  We don't have to do multiple
    # replacements here, since we have already "cleaned up" the
    # replacement nodes.
    for address, node in sese.items():
        sese[address] = node.replace(items)

    note("Finished reduction")
    note(f"Left {len(sese)} primes")

    # Done!
    return sese
Пример #3
0
def do_pass_two(bbs: Set[int], rad: RAD) -> Dict[int, Node]:
    '''Run pass two of basic block discovery.
    
    This builds the basic blocks, creates function and predicate nodes from them,
    and stores these in a dictionary by their first address.'''

    note("Starting pass two")

    # Dictionary to hold nodes.
    nodes: Dict[int, Node] = {}

    # Now generate the nodes.
    count = 0
    for address in bbs:
        debug(f"Possible basic block at {hex(address)}")
        if not rad.in_range(address):
            continue

        # Create a basic block starting at this location.
        bb = BasicBlock()
        node: Node
        count += 1
        run = True
        while run:
            # Disassemble the instruction.
            try:
                i = rad.at(address)
            except AddressException:
                # Ignore and let the basic block be terminated.
                run = False
                continue
            except NotExecutableException:
                # Ignore and let the basic block be terminated.
                run = False
                continue

            # Add the instruction to the basic block.
            nextaddr = i.address + i.size
            bb.add(i)

            # Determine if there is a next address for us to disassemble in this
            # basic block.
            run = False
            if InstructionTests.is_call(i):
                if DebugOpts.CALL_ENDS_BB:
                    # The call ends the basic block.
                    node = FunctionNode(bb, nextaddr)
                    nodes[bb.get_address()] = node
                    continue
                else:
                    # Assume the call returns and disassemble the next address as part
                    # of this basic block.
                    address = nextaddr
                    run = True

            elif InstructionTests.is_branch(i) or InstructionTests.is_jump(i):
                # A branch or jump ends the basic block.
                if i.mnemonic.endswith('jmp'):
                    if OperandTests.is_imm(i.operands[0]):
                        node = FunctionNode(bb, int(i.op_str, 0))
                        nodes[bb.get_address()] = node
                    elif OperandTests.is_mem(i.operands[0]):
                        disp = OperandTests.is_rip_relative(i.operands[0])
                        if disp is not None:
                            node = FunctionNode(bb, nextaddr + disp)
                            nodes[bb.get_address()] = node
                        else:
                            node = FunctionNode(bb, 0)
                            nodes[bb.get_address()] = node
                    else:
                        node = FunctionNode(bb, 0)
                        nodes[bb.get_address()] = node
                else:
                    node = PredicateNode(bb, int(i.op_str, 0), nextaddr)
                    nodes[bb.get_address()] = node
                continue

            elif InstructionTests.is_interrupt(i):
                if DebugOpts.SYSCALL_ENDS_BB:
                    # The system call ends the basic block.
                    node = FunctionNode(bb, nextaddr)
                    nodes[bb.get_address()] = node
                    continue
                else:
                    # Assume the system call returns and disassemble the next address
                    # as part of this basic block.
                    address = nextaddr
                    run = True

            elif (i.mnemonic == 'hlt' or InstructionTests.is_ret(i)
                  or InstructionTests.is_interrupt_return(i)):
                # A halt or return ends the basic block.
                node = FunctionNode(bb, 0)
                nodes[bb.get_address()] = node
                continue

            else:
                # The basic block continues.
                address = nextaddr
                run = True

            # If the address is in the set of basic block starts, terminate
            # this basic block.
            if address in bbs:
                node = FunctionNode(bb, address)
                nodes[bb.get_address()] = node
                run = False

    note("Finished pass two")
    note(f"Wrote {count} basic blocks")
    note(f"Generated {len(nodes)} nodes")

    return nodes
Пример #4
0
def do_pass_one(explore: List[int], rad: RAD) -> Set[int]:
    '''Find basic block leaders in a program.  This returns a list of the
    leaders (addresses).  A list of initial leaders must be provided as the
    first argument, and an initialized random access disassembler as the
    second.'''

    note("Starting pass one")

    # We maintain a stack of addresses to explore (explore).  We also maintain
    # a set of basic block leaders we have discovered (bbs).
    bbs = set(explore)

    def add_explore(addr: int):
        '''Add an address to be explored, if it is not already scheduled to
        be explored.'''
        if addr not in explore:
            explore.append(addr)

    def add_leader(addr: int):
        '''Add a leader to the set of leaders, and also to the set of addresses
        to be explored.'''
        debug(f"adding leader: {hex(addr)}")
        if addr not in bbs:
            bbs.add(addr)
            add_explore(addr)

    # Disassemble the file, follow the links, and build a list of basic blocks
    # leaders.  Within this loop the explore list is treated as an (initialized)
    # stack to perform instruction tracing, and does not always contain only basic
    # block leaders.  Ultimately we have to discover the rest of the leaders we
    # can find, and those go in the bbs set.  Once the explore stack is empty,
    # we have finished, and bbs will contain all the potential basic block
    # leaders we have discovered.
    while len(explore) > 0:
        # Get the next address from the stack.
        address = explore.pop()

        # Disassemble at the address.
        try:
            i = rad.at(address)
        except AddressException:
            # This address is out of range; ignore and continue.
            continue
        except NotExecutableException:
            # This address is not executable; ignore and continue.
            continue

        # Figure out the address that is one byte past the end of the
        # current instruction.  This is likely the address of the next
        # instruction in sequence.
        nextaddr = i.address + i.size

        # Based on the instruction type, determine the next address(es).
        # There are three things we can do here.
        #   (1) Add an address to the set of leaders (and the explore stack)
        #   (2) Add an address to the explore stack (it is not a leader)
        #   (3) Do nothing
        if InstructionTests.is_call(i):
            debug(f"found call at {hex(i.address)}; target is a leader")
            # This is a call.  Push the call target and the next
            # address on the stack to explore.  The call target is
            # a basic block leader.  If calls end the basic block, then
            # the next address after the call is also a leader.  We
            # assume all calls return.
            if OperandTests.is_imm(i.operands[0]):
                add_leader(i.operands[0].value.imm)
            elif OperandTests.is_mem(i.operands[0]):
                # We can only handle RIP-based addressing.
                disp = OperandTests.is_rip_relative(i.operands[0])
                if disp is not None:
                    # Now we can compute the address of the call.
                    add_leader(nextaddr + disp)
            if DebugOpts.CALL_ENDS_BB:
                add_leader(nextaddr)
            else:
                add_explore(nextaddr)

        elif InstructionTests.is_branch(i) or InstructionTests.is_jump(i):
            if i.mnemonic == 'jmp':
                debug(f"found jump at {hex(i.address)}; target is leader")
                # This is a jump.  Note that you need to test for this after
                # relative branch because those are also in the jump group.
                if OperandTests.is_imm(i.operands[0]):
                    # The target of the jump is the leader of a basic block.
                    add_leader(i.operands[0].value.imm)
                elif OperandTests.is_mem(i.operands[0]):
                    # We can only handle RIP-based addressing.
                    disp = OperandTests.is_rip_relative(i.operands[0])
                    if disp is not None:
                        # Now we compute the address of the jump.
                        add_leader(nextaddr + disp)
            else:
                debug(
                    f"found branch at {hex(i.address)}; true and false branches are leaders"
                )
                # This is a conditional branch.  Both the target of the branch
                # and the instruction following the branch are leaders.
                add_leader(i.operands[0].value.imm)
                add_leader(nextaddr)

        elif InstructionTests.is_interrupt(i):
            debug(f"found interrupt at {hex(i.address)}; possible leader")
            # This is an interrupt.  Assume we return and continue.
            if DebugOpts.SYSCALL_ENDS_BB:
                add_leader(nextaddr)
            else:
                add_explore(nextaddr)

        elif (i.mnemonic == 'hlt' or InstructionTests.is_ret(i)
              or InstructionTests.is_interrupt_return(i)):
            debug(f"found halt or return at {hex(i.address)}")
            # These end the basic block and flow does not continue to
            # the next instruction, so do not add anything to explore.
            pass

        else:
            # Assume this instruction flows to the next instruction
            # in sequence, but that instruction is not necessarily
            # a leader.
            add_explore(nextaddr)

    note("Pass one complete")
    note(f"Discovered {len(bbs)} potential basic blocks")

    return bbs