示例#1
0
    def __init__(self, func, r2obj: dict):
        self.parent = func
        try:
            self.address = r2obj['offset']
            self.jump = r2obj.get('jump', 0)
            self.fail = r2obj.get('fail', 0)
            cases = r2obj.get('switchop', dict()).get('cases', dict())
            self.cases = {c['jump'] for c in cases}

            self.insns = []
            for op in r2obj['ops']:
                md = Cs(CS_ARCH_X86, CS_MODE_64)
                md.detail = True
                _addr = op['offset']
                _insns = list(
                    md.disasm(BasicBlock.to_bytes(op['bytes']), _addr))
                if len(_insns) != 1:
                    raise CapstoneDecodeError(f'Decoder error at {_addr:#x}')
                else:
                    _insn: CsInsn = _insns[0]
                    _reads, _ = _insn.regs_access()
                    indirect = _insn.mnemonic == 'jmp' and len(_reads) > 0
                    self.insns.append(Instruction(_addr, indirect))
        except KeyError:
            err_msg = f'Unexpected radare2 output at Basic Block {self.address:#x}'
            logging.error(err_msg)
            raise UnhandledOutputError(err_msg)
示例#2
0
def filter_asm_and_return_instruction_list(address,
                                           asm,
                                           symbols,
                                           arch,
                                           API,
                                           symbolic_call=True):
    #n = int(asm, 2)
    binary = binascii.unhexlify(asm)
    #binary = binascii.unhexlify('%x' % n)
    #binary = asm
    # md = Cs(CS_ARCH_X86, CS_MODE_64)
    # md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
    if arch == capstone.CS_ARCH_ARM:
        md = Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM)
    else:
        md = Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
    md.detail = True
    insns = []
    cap_insns = []
    for i in md.disasm(binary, address):
        insns.append(
            filter_memory_references(i,
                                     symbols,
                                     API,
                                     symbolic_call=symbolic_call))
        cap_insns.append(i)
    del md
    return (constantIndependt_hash(cap_insns), insns)
示例#3
0
    def generate_rule(self):
        """ Generate Yara rule. Return a YaraRule object """
        self.yr_rule.rule_name = self.rule_name
        self.yr_rule.metas["generated_by"] = "\"mkYARA - By Jelle Vergeer\""
        self.yr_rule.metas["date"] = "\"{}\"".format(datetime.now().strftime("%Y-%m-%d %H:%M"))
        self.yr_rule.metas["version"] = "\"1.0\""

        md = Cs(self.instruction_set, self.instruction_mode)
        md.detail = True
        md.syntax = CS_OPT_SYNTAX_INTEL
        chunk_nr = 0

        for chunk in self._chunks:
            chunk_nr += 1
            chunk_id = "$chunk_{}".format(chunk_nr)
            chunk_signature = ""
            chunk_comment = ""
            if chunk.is_data is False:
                disasm = md.disasm(chunk.data, chunk.offset)
                for ins in disasm:
                    rule_part, comment = self._process_instruction(ins)
                    rule_part = self.format_hex(rule_part)
                    chunk_signature += rule_part + "\n"
                    chunk_comment += comment + "\n"
                self.yr_rule.add_string(chunk_id, chunk_signature, StringType.HEX)
                if self.do_comment_sig:
                    self.yr_rule.comments.append(chunk_comment)
            else:
                rule_part = self.format_hex(chunk.data.encode("hex"))
                self.yr_rule.add_string(chunk_id, rule_part, StringType.HEX)

        self.yr_rule.condition = "any of them"
        return self.yr_rule
示例#4
0
def show_asm(buff, mode, base):
    """
    Return the given byte sequence as assembly under the given hardware mode.

    :param bytes buff: Complete data stream.
    :param int mode: Capstone hardware mode.
    :param int base: Base address from which to start.

    :return: Assembly code representation.
    :rtype: str
    """

    md = Cs(CS_ARCH_X86, mode)
    md.detail = True

    ret = ''
    for insn in md.disasm(buff, base):
        b = binascii.hexlify(insn.bytes).decode('utf-8')
        b = ' '.join(a + b for a, b in zip(b[::2], b[1::2]))
        if len(b) > 18:
            b = b[:18] + '+'
        ret += "{0:10} {1:20} {2:10} {3:10}\n".format(
            '%08x:' % insn.address, b, insn.mnemonic, insn.op_str)
    ret += '*/\n'

    return ret
示例#5
0
def get_raw_disassembler(arch, detailed=True):
    if arch == BinaryType.SCS_32BIT_BINARY.value:
        d = Cs(CS_ARCH_X86, CS_MODE_32)
    elif arch == BinaryType.SCS_64BIT_BINARY.value:
        d = Cs(CS_ARCH_X86, CS_MODE_64)
    else:
        raise Exception("No disassembler for this architecture")
    d.detail = detailed
    return d
示例#6
0
 def __init__(self, encoding, position):
     super(CAPSInstruction, self).__init__(encoding, position)
     # CAPSTONE object
     encoding_bytes = (self._encoding).to_bytes(4, byteorder='little')
     #endian = CS_MODE_LITTLE_ENDIAN if little_endian else CS_MODE_BIG_ENDIAN
     md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
     md.detail = True
     self._cap = None
     for i in md.disasm(encoding_bytes, position):
         self._cap = i
示例#7
0
def disasm_plt(bytes, offset=0):
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        instruc = disassembled[0]

        # get rip relative address
        for op in instruc.operands:
            if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                return disassembled[1].address + op.mem.disp, op.size
        return None, None
    except CsError as e:
        print("ERROR: %s" %e)
示例#8
0
def find_single((raw_data, pvaddr, elftype, elf_base_addr, arch, mode, gad,
                 need_filter, ref)):
    C_OP = 0
    C_SIZE = 1
    C_ALIGN = 2

    allgadgets = []

    md = Cs(arch, mode)
    md.detail = True

    for i in range(10):
        back_bytes = i * gad[C_ALIGN]
        section_start = ref - back_bytes
        start_address = pvaddr + section_start
        if elftype == 'DYN':
            start_address = elf_base_addr + start_address

        decodes = md.disasm(raw_data[section_start:ref + gad[C_SIZE]],
                            start_address)

        decodes = list(decodes)
        insns = []
        for decode in decodes:
            insns.append((decode.mnemonic + " " + decode.op_str).strip())

        if len(insns) > 0:
            if (start_address % gad[C_ALIGN]) == 0:
                address = start_address
                if mode == CS_MODE_THUMB:
                    address = address | 1

                bytes = raw_data[ref - (i * gad[C_ALIGN]):ref + gad[C_SIZE]]
                onegad = Gadget(address, insns, {}, 0, bytes)
                if not passClean(decodes):
                    continue

                if arch == CS_ARCH_X86:
                    onegad = filter_for_x86_big_binary(onegad)
                elif arch == CS_ARCH_ARM:
                    onegad = filter_for_arm_big_binary(onegad)

                if (not need_filter) and onegad:
                    classifier = GadgetClassifier(arch, mode)
                    onegad = classifier.classify(onegad)

                if onegad:
                    allgadgets += [onegad]

    return allgadgets
示例#9
0
def disasm_plt(bytes, offset=0):
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        instruc = disassembled[0]

        # get rip relative address
        for op in instruc.operands:
            if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                return disassembled[1].address + op.mem.disp, op.size
        return None, None
    except CsError as e:
        print("ERROR: %s" % e)
示例#10
0
def generate_pic(buff, mode):
    """
    Return a position independent result of the byte sequence.

    :param bytes buff: Complete data stream.
    :param int mode: Capstone hardware mode.

    :return: YARA compliant hex string sequence.
    :rtype: str
    """

    md = Cs(CS_ARCH_X86, mode)
    md.detail = True

    relative_tracker = []
    relative = False
    offset = 0

    for insn in md.disasm(buff, 0x0):

        if relative:
            r_size = insn.address - offset
            relative_tracker.append((offset, r_size))
            relative = False

        if insn.op_count(X86_OP_IMM) == 1 or insn.op_count(X86_OP_MEM) == 1:

            offset = insn.address + _get_opcode_length(insn.opcode)
            relative = True

            if insn.modrm > 0:
                offset += 1
            if insn.rex > 0:
                offset += 1
            if insn.sib > 0:
                offset += 1

            offset += MAX_PREFIX_SIZE - insn.prefix.count(0x0)

            continue

    if relative:
        r_size = len(buff) - offset
        relative_tracker.append((offset, r_size))

    hex_bytes = '{ ' + _to_yara_hex_string(buff, relative_tracker) + ' }'
    return hex_bytes
示例#11
0
 def __parse_plt(self):
     # parsing .plt section
     plt_sct = self.elf.get_section_by_name(".plt")
     if plt_sct is None:
         raise ValueError
     md = Cs(CS_ARCH_X86, CS_MODE_64)
     md.detail = True
     mnemonics = md.disasm(plt_sct.data(), plt_sct["sh_addr"])
     cnt = 0
     for mnemonic in mnemonics:
         if cnt % 3 == 0 and cnt != 0:
             rip = mnemonic.address + mnemonic.size
             assert len(mnemonic.operands) == 1
             rip_plus = mnemonic.operands[0].value.mem.disp
             self.plt_got_dic[mnemonic.address] = rip + rip_plus
             self.got_plt_dic[rip + rip_plus] = mnemonic.address
         cnt += 1
示例#12
0
    def _cs_disassemble_one(self, data, address):
        """Disassemble the data into an instruction in string form.
        """
        disasm = list(self._disassembler.disasm(data, address))

        # TODO: Improve this check.
        if len(disasm) > 0:
            return disasm[0]
        else:
            cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
            cs_arm.detail = True
            disasm = list(cs_arm.disasm(data, address))

            if len(disasm) > 0:
                return disasm[0]
            else:
                raise InvalidDisassemblerData("CAPSTONE: Unknown instruction (Addr: {:s}).".format(hex(address)))
示例#13
0
    def disasm(self, addr):
        (data, virtual_addr, flags) = self.binary.get_section(addr)

        if not flags["exec"]:
            die("the address 0x%x is not in an executable section" % addr)

        mode = CS_MODE_64 if self.bits == 64 else CS_MODE_32
        md = Cs(CS_ARCH_X86, mode)
        md.detail = True

        for i in md.disasm(data, virtual_addr):
            self.code[i.address] = i
            self.code_idx.append(i.address)

        # Now load imported symbols for PE. This cannot be done before,
        # because we need the code for a better resolution.
        if self.binary.get_type() == T_BIN_PE:
            self.binary.load_import_symbols(self.code)
示例#14
0
 def compute_eflags_setter(self):
     dis32 = Cs(CS_ARCH_X86, CS_MODE_32)
     dis32.detail = True
     flag_insn = False
     for fl, traces in self.traces.items():
         for trace in traces:
             for g_addr in trace:
                 if flag_insn:
                     flag_insn = False
                     break
                 gadget_bytes = self._emu.gadget_map[g_addr].rop_bytes
                 for insn in dis32.disasm(gadget_bytes, g_addr):
                     # Check every instruction of the gadget to see if it can perform a modification of the
                     # monitored bits (doesn't mean that the bits have been actually modified)
                     if insn.eflags and insn.eflags & self.capstone_to_eflags_aux(fl.monitored_bits):
                         fl.set_eflag_bitmask(self.capstone_to_eflags_aux(insn.eflags))
                         flag_insn = True
                         break
示例#15
0
    def _cs_disassemble_one(self, data, address):
        """Disassemble the data into an instruction in string form.
        """
        disasm = list(self._disassembler.disasm(data, address))

        # TODO: Improve this check.
        if len(disasm) > 0:
            return disasm[0]
        else:
            cs_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
            cs_arm.detail = True
            disasm = list(cs_arm.disasm(data, address))

            if len(disasm) > 0:
                return disasm[0]
            else:
                raise InvalidDisassemblerData(
                    "CAPSTONE: Unknown instruction (Addr: {:s}).".format(
                        hex(address)))
示例#16
0
def get_compiler_info(rom_bytes, entry_point, print_result=True):
    md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
    md.detail = True

    jumps = 0
    branches = 0

    for insn in md.disasm(rom_bytes[0x1000:], entry_point):
        if insn.mnemonic == "j":
            jumps += 1
        elif insn.mnemonic == "b":
            branches += 1

    compiler = "IDO" if branches > jumps else "GCC"
    if print_result:
        print(
            f"{branches} branches and {jumps} jumps detected in the first code segment. Compiler is most likely {compiler}"
        )
    return compiler
示例#17
0
def disasm_bytes(bytes, addr):
    md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
    md.syntax = CS_OPT_SYNTAX_ATT
    md.detail = True
    result = []
    for ins in range(0, len(bytes), 4):
        disasm = list(md.disasm(bytes[ins:ins + 4], addr + ins))
        if len(disasm):
            result += disasm
        else:
            # the instruction is invalid, so we craft a fake "nop" (to make the rest of the code work)
            # and we just overwrite it as data with a comment
            fake_ins = InstructionWrapper(
                list(md.disasm(b"\x1f\x20\x03\xd5",
                               addr + ins))[0])  # bytes for nop
            fake_ins.mnemonic = ".quad 0x%x // invalid instruction" % int.from_bytes(
                bytes[ins:ins + 4],
                byteorder="little")  # are we sure about 'little'?
            result += [fake_ins]
    return result
示例#18
0
def dumpASM(flo, mode, maxAddr=1e99):
    modeRef = {32: CS_MODE_32, 64: CS_MODE_64}

    md = Cs(CS_ARCH_X86, modeRef[mode])
    md.detail = True

    for i in md.disasm(flo, 0):
        # print(dir(i))
        print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
        print("\tImplicit registers read: ", end="")
        for r in i.regs_read:
            print("%s " % i.reg_name(r))
        print()

        print("\tImplicit registers written: ", end="")
        for r in i.regs_write:
            print("%s " % i.reg_name(r))
        print()

        if i.address > maxAddr:
            break
示例#19
0
    def disas_function(self, name):
        if len(self.functions_name_dic) == 0:
            self.__parse_functions()
        all_txt = self.elf.get_section_by_name(".text")
        base_addr = all_txt["sh_addr"]
        sct = self.functions_name_dic[name]
        if sct == None:
            return
        offset = sct["st_value"] - base_addr
        func_txt = all_txt.data()[offset:offset + sct["st_size"]]
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        for mnemonic in md.disasm(func_txt, sct["st_value"]):
            print(
                self.__disas_function_format.format(hex(mnemonic.address),
                                                    mnemonic.mnemonic,
                                                    mnemonic.op_str))
            regs = mnemonic.regs_access()
            read_regs = regs[0]
            write_regs = regs[1]

            if len(read_regs) > 1:
                print("\tRead registers: {}".format(
                    reduce(
                        lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic.
                        reg_name(r2), read_regs)))
            elif len(read_regs) == 1:
                print("\tRead registers: {}".format(
                    mnemonic.reg_name(read_regs[0])))
            if len(write_regs) > 1:
                print("\tWrite registers: {}".format(
                    reduce(
                        lambda r1, r2: mnemonic.reg_name(r1) + ", " + mnemonic.
                        reg_name(r2), write_regs)))
            elif len(write_regs) == 1:
                print("\tWrite registers: {}".format(
                    mnemonic.reg_name(write_regs[0])))
            """
示例#20
0
def test_details():
    elffile = get_ELFFile('a.out')
    code = elffile.get_section_by_name('.text')
    ops = code.data()
    addr = code['sh_addr']
    md = Cs(CS_ARCH_X86, CS_MODE_64)
    md.detail = True

    for insn in md.disasm(ops, addr):
        print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str))
        print(map(lambda x: to_x(int(x)), insn.bytes))
        imm_count = insn.op_count(X86_OP_IMM)
        if imm_count == 0:
            continue
        bytes_no_imm = []
        # Inclusive 'start' and 'end' indexes
        imm_start = insn.imm_offset
        imm_end = imm_start + insn.imm_size + 1
        for i in range(len(insn.bytes)):
            if imm_start <= i <= imm_end:
                continue
            bytes_no_imm.append(insn.bytes[i])
        print(map(lambda x: to_x(int(x)), bytes_no_imm))
示例#21
0
    def dis_assemble(self):

        status = cnst.fail()

        try:
            """
                Also it is possible to disassemble the whole code, but here only I get one. 
            """
            machine = Cs(self.machineArch, self.machineMode)
            machine.detail = True
            for inst in machine.disasm(bytes(bytearray.fromhex(self.op)),
                                       self.ip):

                if self.saveDetail:
                    self.disAssembledInstruction = inst
                    self.machine = machine

                self.extract_registers(inst)
                status = cnst.success()
        except CsError as e:
            self.log_handler.error("%s" % e)

        return status
示例#22
0
def generate_mnemonic(buff, mode):
    """
    Return a mnemonic only result of the byte sequence.

    :param bytes buff: Complete data stream.
    :param int mode: Capstone hardware mode.

    :return: YARA compliant hex string sequence.
    :rtype: str
    """

    md = Cs(CS_ARCH_X86, mode)
    md.detail = True
    mnemonic_tracker = []

    for insn in md.disasm(buff, 0x0):
        op_len = _get_opcode_length(insn.opcode)
        offset = insn.address + op_len

        r_size = len(insn.bytes) - op_len
        mnemonic_tracker.append((offset, r_size))

    hex_bytes = '{ ' + _to_yara_hex_string(buff, mnemonic_tracker) + ' }'
    return hex_bytes
示例#23
0
from __future__ import print_function

# test1.py
from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_MODE_32

CODE = b"\x8d\x44\x38\x02"

md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True

for i in md.disasm(CODE, 0):
    # print(dir(i))
    print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
    if len(i.regs_read) > 0:
        print("\tImplicit registers read: "),
        for r in i.regs_read:
            print("%s " % i.reg_name(r)),
        print
    if len(i.groups) > 0:
        print("\tThis instruction belongs to groups:", end="")
        for g in i.groups:
            print("%u" % g)
            # print("%u" % g, end="")
        print()


def dumpASM(flo, mode, maxAddr=1e99):
    modeRef = {32: CS_MODE_32, 64: CS_MODE_64}

    md = Cs(CS_ARCH_X86, modeRef[mode])
    md.detail = True
示例#24
0
from capstone import Cs, CS_ARCH_ARM, CS_MODE_ARM
from capstone.arm_const import *

#CODE = b"\xe1\x0b\x40\xe1\x20\x04\x81\xda\x20\x08\x02\x8b"
CODE = b"\x04\xe0\x2d\xe5\x20\x04\x81\xda\x20\x08\x02\x8b"
#e52de004
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
ARM_INS_STR
for insn in md.disasm(CODE, 0x38):
    print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str))

    print("\tCode condition: %u" % insn.cc)
    if len(insn.operands) > 0:
        print("\tNumber of operands: %u" % len(insn.operands))
        c = -1
        for i in insn.operands:
            c += 1
            if i.type == ARM_OP_REG:
                print("\t\toperands[%u].type: REG = %s" %
                      (c, insn.reg_name(i.value.reg)))
            if i.type == ARM_OP_IMM:
                print("\t\toperands[%u].type: IMM = 0x%x" % (c, i.value.imm))
            if i.type == ARM_OP_CIMM:
                print("\t\toperands[%u].type: C-IMM = %u" % (c, i.value.imm))
            if i.type == ARM_OP_FP:
                print("\t\toperands[%u].type: FP = %f" % (c, i.value.fp))
            if i.type == ARM_OP_MEM:
                print("\t\toperands[%u].type: MEM" % c)
                if i.value.mem.base != 0:
                    print("\t\t\toperands[%u].mem.base: REG = %s" \
示例#25
0
def disasm(bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True
            # Handle jump/call instructions
            if instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # We can only decode the destination if it's an immediate value
                if instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol = executable.ex.get_symbol_by_addr(dest_addr)
                        if symbol:
                            text_sect = executable.ex.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.external_jump = True
                            instr.jump_address = dest_addr
                            instr.jump_function_name = demangle(symbol.name)
                            instr.jump_function_address = dest_addr
                            instr.jump_function_offset = dest_addr - sect_addr + sect_offset
                            instr.jump_function_size = symbol['st_size']
                            instr.comment = demangle(symbol.name)
            # Handle individual operands
            for op in instr.operands:
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset
                    symbol = executable.ex.get_symbol_by_addr(instr.rip_resolved)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                    bytes = executable.ex.get_bytes(instr.rip_resolved, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
            # what registers does this instruction read/write?
            instr.regs_write_names = [instr.reg_name(reg) for reg in instr.regs_write]
            instr.regs_read_names = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.docfile = doc_file(instr)
            instr.short_desc = get_short_desc(instr)
            if instr.docfile is None:
                with open('missing_docs.log', 'a+') as f:
                    f.write('[{}] : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)
示例#26
0
文件: code.py 项目: gipi/abstruct
def disasm(code, arch, mode, start=0, detail: bool = True):
    md = Cs(arch, mode)
    md.detail = detail

    for _ in md.disasm(code, start):
        yield _
示例#27
0
def disasm(exe, bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic, instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True

            # Handle jump/call instructions            
            elif instr.group(x86.X86_GRP_JUMP) or instr.group(x86.X86_GRP_CALL):
                # jump table
                if instr.group(x86.X86_GRP_JUMP) and instr.operands[0].type == x86.X86_OP_REG: 
                    instr.jump_table = instr.reg_name(instr.operands[0].reg)

                # We can only decode the destination if it's an immediate value
                elif instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled)-1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol, field_name = exe.get_symbol_by_addr(
                            dest_addr, 
                            instr.address)
                        if symbol:
                            text_sect = exe.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']
                            
                            instr.comment = demangle(symbol.name)
                            # only follow call address if it is a known location
                            if symbol['st_size'] > 0:
                                instr.external_jump = True
                                instr.jump_address = symbol["st_value"]
                                instr.jump_function_name = demangle(symbol.name)
                                instr.jump_function_address = symbol["st_value"]
                                instr.jump_function_offset = symbol["st_value"] - sect_addr + sect_offset
                                instr.jump_function_size = symbol['st_size']

            if instr.group(x86.X86_GRP_RET):
                instr.return_type = True
            # Handle individual operands
            c = -1
            instr.regs_explicit = []
            for op in instr.operands:
                c += 1
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[i+1].address + instr.rip_offset

                    # file offset depends on section
                    section = exe.get_section_from_offset(instr.rip_resolved)
                    file_offset = instr.rip_resolved - section["sh_addr"] + section["sh_offset"]

                    # Read in and unpack the first byte at the offset
                    val_8 = exe.get_bytes(file_offset, 1)
                    instr.signed_8 = unpack('b', val_8)[0]
                    instr.unsigned_8 = unpack('B', val_8)[0]
                    instr.hex_8 = hex(instr.unsigned_8)

                    # Read in and unpack the first two bytes at the offset
                    val_16 = exe.get_bytes(file_offset, 2)
                    instr.signed_16 = unpack('h', val_16)[0]
                    instr.unsigned_16 = unpack('H', val_16)[0]
                    instr.hex_16 = hex(instr.unsigned_16)

                    # Read in and unpack the first four bytes at the offset
                    val_32 = exe.get_bytes(file_offset, 4)
                    instr.signed_32 = unpack('i', val_32)[0]
                    instr.unsigned_32 = unpack('I', val_32)[0]
                    instr.hex_32 = hex(instr.unsigned_32)
                    instr.float = unpack('f', val_32)[0]

                    # Read in and unpack the first eight bytes at the offset
                    val_64 = exe.get_bytes(file_offset, 8)
                    instr.signed_64 = unpack('q', val_64)[0]
                    instr.unsigned_64 = unpack('Q', val_64)[0]
                    instr.hex_64 = hex(instr.unsigned_64)
                    instr.double = unpack('d', val_64)[0]

                    symbol, field_name = exe.get_symbol_by_addr(
                        instr.rip_resolved, 
                        instr.address,
                        instr_size=op.size,
                        get_sub_symbol=True)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                        if field_name:
                            instr.comment += '.' + field_name
                    bytes = exe.get_bytes(file_offset, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
                # Handle explicitly read/written registers
                if op.type == x86.X86_OP_MEM:
                    ptr = ["", "", ""] # using an array instead of object to guarantee ordering
                    instr.regs_ptr_explicit = []
                    if op.value.mem.base != 0:
                        regname = instr.reg_name(op.value.mem.base)
                        ptr[0] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.index != 0:
                        regname = instr.reg_name(op.value.mem.index)
                        ptr[1] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.disp != 0:
                        ptr[2] = hex(op.value.mem.disp)

                    instr.ptr = ptr
                    instr.ptr_size = op.size
                    instr.regs_explicit.append(instr.ptr)
                elif op.type == x86.X86_OP_REG:
                    instr.regs_explicit.append(instr.reg_name(op.value.reg))
                else:
                    instr.regs_explicit.append("")

            # what registers does this instruction read/write?
            instr.regs_write_implicit = [instr.reg_name(reg) for reg in instr.regs_write]
            if instr.group(x86.X86_GRP_CALL) and instr.reg_name(x86.X86_REG_RAX) not in instr.regs_write_implicit:
                instr.regs_write_implicit.append(instr.reg_name(x86.X86_REG_RAX))
            instr.regs_read_implicit = [instr.reg_name(reg) for reg in instr.regs_read]
            # Add in documentation meta-data
            instr.short_desc, instr.docfile = get_documentation(instr)
            if instr.docfile is None or instr.short_desc is None:
                with open(CUR_PATH + 'missing_docs.log', 'a+') as f:
                    f.write('[{}] : {} : {} : {}\n'.format(str(datetime.datetime.now()), instr.mnemonic, instr.docfile, instr.short_desc))
        return disassembled

    except CsError as e:
        print("ERROR: %s" %e)
示例#28
0
def disasm(exe, bytes, offset=0):
    print "offset %i" % offset
    try:
        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        disassembled = list(md.disasm(bytes, offset))
        for i, instr in enumerate(disassembled):
            print "0x%x:\t%s\t%s" % (instr.address, instr.mnemonic,
                                     instr.op_str)
            # Handle no-op instructions
            if instr.id == x86.X86_INS_NOP:
                instr.nop = True

            # Handle jump/call instructions
            elif instr.group(x86.X86_GRP_JUMP) or instr.group(
                    x86.X86_GRP_CALL):
                # jump table
                if instr.group(x86.X86_GRP_JUMP
                               ) and instr.operands[0].type == x86.X86_OP_REG:
                    instr.jump_table = instr.reg_name(instr.operands[0].reg)

                # We can only decode the destination if it's an immediate value
                elif instr.operands[0].type == x86.X86_OP_IMM:
                    # Ignore if it's a jump/call to an address within this function
                    func_start_addr = disassembled[0].address
                    func_end_addr = disassembled[len(disassembled) - 1].address
                    dest_addr = instr.operands[0].imm
                    if func_start_addr <= dest_addr <= func_end_addr:
                        instr.internal_jump = True
                        instr.jump_address = dest_addr
                    else:
                        symbol, field_name = exe.get_symbol_by_addr(
                            dest_addr, instr.address)
                        if symbol:
                            text_sect = exe.elff.get_section_by_name('.text')
                            sect_addr = text_sect['sh_addr']
                            sect_offset = text_sect['sh_offset']

                            instr.comment = demangle(symbol.name)
                            # only follow call address if it is a known location
                            if symbol['st_size'] > 0:
                                instr.external_jump = True
                                instr.jump_address = symbol["st_value"]
                                instr.jump_function_name = demangle(
                                    symbol.name)
                                instr.jump_function_address = symbol[
                                    "st_value"]
                                instr.jump_function_offset = symbol[
                                    "st_value"] - sect_addr + sect_offset
                                instr.jump_function_size = symbol['st_size']

            if instr.group(x86.X86_GRP_RET):
                instr.return_type = True
            # Handle individual operands
            c = -1
            instr.regs_explicit = []
            for op in instr.operands:
                c += 1
                # Handle rip-relative operands
                if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
                    instr.rip = True
                    instr.rip_offset = op.mem.disp
                    instr.rip_resolved = disassembled[
                        i + 1].address + instr.rip_offset

                    # file offset depends on section
                    section = exe.get_section_from_offset(instr.rip_resolved)
                    file_offset = instr.rip_resolved - section[
                        "sh_addr"] + section["sh_offset"]

                    # Read in and unpack the first byte at the offset
                    val_8 = exe.get_bytes(file_offset, 1)
                    instr.signed_8 = unpack('b', val_8)[0]
                    instr.unsigned_8 = unpack('B', val_8)[0]
                    instr.hex_8 = hex(instr.unsigned_8)

                    # Read in and unpack the first two bytes at the offset
                    val_16 = exe.get_bytes(file_offset, 2)
                    instr.signed_16 = unpack('h', val_16)[0]
                    instr.unsigned_16 = unpack('H', val_16)[0]
                    instr.hex_16 = hex(instr.unsigned_16)

                    # Read in and unpack the first four bytes at the offset
                    val_32 = exe.get_bytes(file_offset, 4)
                    instr.signed_32 = unpack('i', val_32)[0]
                    instr.unsigned_32 = unpack('I', val_32)[0]
                    instr.hex_32 = hex(instr.unsigned_32)
                    instr.float = unpack('f', val_32)[0]

                    # Read in and unpack the first eight bytes at the offset
                    val_64 = exe.get_bytes(file_offset, 8)
                    instr.signed_64 = unpack('q', val_64)[0]
                    instr.unsigned_64 = unpack('Q', val_64)[0]
                    instr.hex_64 = hex(instr.unsigned_64)
                    instr.double = unpack('d', val_64)[0]

                    symbol, field_name = exe.get_symbol_by_addr(
                        instr.rip_resolved,
                        instr.address,
                        instr_size=op.size,
                        get_sub_symbol=True)
                    if symbol:
                        instr.comment = demangle(symbol.name)
                        if field_name:
                            instr.comment += '.' + field_name
                    bytes = exe.get_bytes(file_offset, op.size)
                    instr.rip_value_hex = ""
                    space = ""
                    for char in bytes:
                        instr.rip_value_hex += space + hex(ord(char))
                        space = " "
                    # HTML collapses consecutive spaces. For presentation purposes, replace spaces
                    # with &nbsp (non-breaking space)
                    nbsp_str = []
                    if op.size == 16:
                        for char in bytes:
                            if char == ' ':
                                nbsp_str.append('&nbsp')
                            else:
                                nbsp_str.append(char)
                        instr.rip_value_ascii = ''.join(nbsp_str)
                    # TODO: there's a bug involving ASCII that cannot be jsonified. To get around
                    # it, we're temporarily pretending they don't exist. Those edge cases need to be
                    # handled.
                    # see typeName(
                    else:
                        instr.rip_value_ascii = "under construction..."
                # Handle explicitly read/written registers
                if op.type == x86.X86_OP_MEM:
                    ptr = [
                        "", "", ""
                    ]  # using an array instead of object to guarantee ordering
                    instr.regs_ptr_explicit = []
                    if op.value.mem.base != 0:
                        regname = instr.reg_name(op.value.mem.base)
                        ptr[0] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.index != 0:
                        regname = instr.reg_name(op.value.mem.index)
                        ptr[1] = regname
                        if regname != "rip":
                            instr.regs_ptr_explicit.append(regname)
                    if op.value.mem.disp != 0:
                        ptr[2] = hex(op.value.mem.disp)

                    instr.ptr = ptr
                    instr.ptr_size = op.size
                    instr.regs_explicit.append(instr.ptr)
                elif op.type == x86.X86_OP_REG:
                    instr.regs_explicit.append(instr.reg_name(op.value.reg))
                else:
                    instr.regs_explicit.append("")

            # what registers does this instruction read/write?
            instr.regs_write_implicit = [
                instr.reg_name(reg) for reg in instr.regs_write
            ]
            if instr.group(x86.X86_GRP_CALL) and instr.reg_name(
                    x86.X86_REG_RAX) not in instr.regs_write_implicit:
                instr.regs_write_implicit.append(
                    instr.reg_name(x86.X86_REG_RAX))
            instr.regs_read_implicit = [
                instr.reg_name(reg) for reg in instr.regs_read
            ]
            # Add in documentation meta-data
            instr.short_desc, instr.docfile = get_documentation(instr)
            if instr.docfile is None or instr.short_desc is None:
                with open(CUR_PATH + 'missing_docs.log', 'a+') as f:
                    f.write('[{}] : {} : {} : {}\n'.format(
                        str(datetime.datetime.now()), instr.mnemonic,
                        instr.docfile, instr.short_desc))
        return disassembled

    except CsError as e:
        print("ERROR: %s" % e)
示例#29
0
    ARM_INS_POP: "pop",
    ARM_INS_PUSH: "push",
    ARM_INS_NOP: "nop",
    ARM_INS_YIELD: "yield",
    ARM_INS_WFE: "wfe",
    ARM_INS_WFI: "wfi",
    ARM_INS_SEV: "sev",
    ARM_INS_SEVL: "sevl",
    ARM_INS_VPUSH: "vpush",
    ARM_INS_VPOP: "vpop",
    ARM_INS_ENDING: "ending",
}

# disassembler definitions
THUMB_DISASSEMBLER = Cs(CS_ARCH_ARM, CS_MODE_THUMB + CS_MODE_LITTLE_ENDIAN)
THUMB_DISASSEMBLER.detail = True
ARM_DISASSEMBLER = Cs(CS_ARCH_ARM, CS_MODE_ARM + CS_MODE_LITTLE_ENDIAN)
ARM_DISASSEMBLER.detail = True

# constants
BRANCH_IDS = (ARM_INS_BX, ARM_INS_B)
COND_BRANCH_IDS = (ARM_INS_CBNZ, ARM_INS_CBZ)
CALL_IDS = (ARM_INS_BL, ARM_INS_BLX)


class Function:
    def __init__(self, block_tree: IntervalTree, name="sub"):
        if len(block_tree) == 0:
            raise ValueError("Cannot create function from empty block tree")
        self.name = name
        blocks = [iv.data for iv in block_tree]
示例#30
0
from capstone import Cs, CS_ARCH_ARM64, CS_MODE_ARM, CS_OP_REG

cs = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
cs.detail = True

# Clobbered registers (reserved by caller, cannot overwrite)
clobbered_registers = ["x" + str(i) for i in range(19, 29)
                       ] + ["w" + str(i) for i in range(19, 29)]
# Non-clobbered registers (can be overwritten by a function)
non_clobbered_registers = ["x" + str(i) for i in range(0, 19)
                           ] + ["w" + str(i) for i in range(0, 19)]
# Argument registers (used to pass function arguments)
argument_registers = ["x" + str(i) for i in range(0, 8)
                      ] + ["w" + str(i) for i in range(0, 8)]


def get_reg_size_arm(regname):
    sizes = {"B": 1, "H": 2, "W": 4, "S": 4, "X": 8, "D": 8, "Q": 16}
    return sizes[regname.upper()[0]]


def get_access_size_arm(instruction):
    bool_load = True if instruction.mnemonic.upper().startswith("L") else False
    # here we get the size from the last letter of the instruction
    # horrible hack I know, but capstone is a bad boy and is not reliable
    sizes = {"B": 1, "H": 2, "W": 4, "R": 8, "P": 16}
    acsz = sizes[instruction.mnemonic.upper()[-1]]
    if instruction.operands[0].type == CS_OP_REG:
        reg = instruction.reg_name(instruction.operands[0].reg)
        regsz = get_reg_size_arm(reg)
        if regsz < acsz or regsz == 16: