Exemplo n.º 1
0
    def disasm(self, bytez, offset, va):
        # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh.

        # Stuff for opcode parsing
        tabdesc = all_tables[
            opcode86.
            TBL_Main]  # A tuple (optable, shiftbits, mask byte, sub, max)
        startoff = offset  # Use startoff as a size knob if needed

        # Stuff we'll be putting in the opcode object
        optype = None  # This gets set if we successfully decode below
        mnem = None
        operands = []

        prefixes = 0
        pho_prefixes = 0  # faux prefixes...don't immediately apply them, they may not be the prefixes we're looking for

        while True:

            obyte = ord(bytez[offset])

            # This line changes in 64 bit mode
            p = self._dis_prefixes[obyte]
            if p is None:
                break

            # print("OBYTE",hex(obyte))
            if obyte in mandatory_prefixes:
                pho_prefixes |= p
                # ratchet through the tables

                tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3]
                # print("TABIDX: %d" % tabidx)
                opdesc = tabdesc[0][tabidx]
                # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0])))
                tabdesc = all_tables[opdesc[0]]
            else:
                prefixes |= p

            if p & PREFIX_VEX:
                if p == PREFIX_VEX2:
                    offset += 1
                    imm1 = ord(bytez[offset])
                    if imm1 & 0xc0 != 0xc0:  # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding
                        break
                    inv1 = imm1 ^ 0xff

                    vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2]
                    vvvv = ((inv1 >> 3) & 0xf)
                    pp = imm1 & 3

                    prefixes |= (inv1 << 11) & PREFIX_REX_R  # R is inverted
                    prefixes |= vex_l
                    prefixes |= (vvvv << VEX_V_SHIFT)
                    combined_mand_prefixes = vex_pp_table[pp]

                elif p == PREFIX_VEX3:
                    imm1 = ord(bytez[offset + 1])
                    if imm1 & 0xc0 != 0xc0:  # shouldn't in 64-bit mode, but in 32-bit, this keeps LDS from colliding
                        break
                    offset += 2
                    imm2 = ord(bytez[offset])
                    inv1 = imm1 ^ 0xff
                    inv2 = imm2 ^ 0xff

                    vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2]
                    vvvv = ((inv2 >> 3) & 0xf)
                    pp = imm2 & 3
                    m_mmmm = imm1 & 0x1f
                    # print("imms: %x %x \tl: %d\tvvvv: 0x%x\tpp: %d\tm_mmmm: 0x%x" % (imm1, imm2, vex_l, vvvv, pp, m_mmmm))
                    prefixes |= (
                        (inv1 << 11) & PREFIX_REX_RXB)  # RXB are inverted
                    prefixes |= (
                        (imm2 << 12) & PREFIX_REX_W)  # W is not inverted
                    prefixes |= vex_l
                    prefixes |= (vvvv << VEX_V_SHIFT)  # vvvv

                    combined_mand_prefixes = vex_pp_table[
                        pp] + vex3_mmmm_table[m_mmmm]

                # VEX prefixes default to 0F table, possibly F20F, F30F or 660F
                # VEX3 prefixes may also specify depths into 38 and 3A tables
                for tabidx in combined_mand_prefixes:
                    if tabidx is None:
                        continue
                    # print("TABIDX: %d" % tabidx)
                    opdesc = tabdesc[0][tabidx]
                    # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0])))
                    tabdesc = all_tables[opdesc[0]]

            offset += 1
            continue

        if obyte != 0x0f:
            prefixes |= pho_prefixes

        while True:

            obyte = ord(bytez[offset])

            # print("OP-OBYTE", hex(obyte))
            if (obyte > tabdesc[5]):
                # print("Jumping To Overflow Table: %s" % hex(tabdesc[5]))
                tabdesc = all_tables[tabdesc[6]]

            tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3]
            # print("TABIDX: %s" % tabidx)
            opdesc = tabdesc[0][tabidx]
            # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0])))

            # Hunt down multi-byte opcodes
            nexttable = opdesc[0]
            # print("NEXT", nexttable, hex(obyte), opcode86.tables_lookup.get(nexttable))
            if nexttable != 0:  # If we have a sub-table specified, use it.
                # print("Multi-Byte Next Hop For (%s, %s)" % (hex(obyte), opdesc[0]))
                tabdesc = all_tables[nexttable]

                # Account for the table jump we made
                offset += 1

                continue

            # We are now on the final table...
            # print(repr(opdesc))
            tbl_opercnt = tabdesc[1]
            mnem = opdesc[3 + tbl_opercnt]
            optype = opdesc[1]
            if tabdesc[3] == 0xff:
                offset += 1  # For our final opcode byte
            break

        if optype == 0:
            # print(tabidx)
            # print(opdesc)
            # print("OPTTYPE 0")
            raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16],
                                          va=va)

        operoffset = 0
        # Begin parsing operands based off address method
        for i in range(operands_index, operands_index + tbl_opercnt):

            oper = None  # Set this if we end up with an operand
            osize = 0

            # Pull out the operand description from the table
            operflags = opdesc[i]
            opertype = operflags & opcode86.OPTYPE_MASK
            addrmeth = operflags & opcode86.ADDRMETH_MASK

            # If there are no more operands, break out of the loop!
            if operflags == 0:
                break

            # print("ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype))

            # handles tsize calculations including new REX prefixes
            tsize = self._dis_calc_tsize(opertype, prefixes, operflags)

            # print(hex(opertype), hex(addrmeth), hex(tsize))

            # If addrmeth is zero, we have operands embedded in the opcode
            if addrmeth == 0:
                osize = 0
                oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i],
                                    tsize, prefixes)

            else:
                # print("ADDRTYPE", hex(addrmeth))
                ameth = self._dis_amethods[addrmeth >> 16]
                # print("AMETH", ameth)
                if ameth is None:
                    raise Exception("Implement Addressing Method 0x%.8x" %
                                    addrmeth)

                # NOTE: Depending on your addrmethod you may get beginning of operands, or offset
                try:
                    if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J:
                        osize, oper = ameth(bytez, offset + operoffset, tsize,
                                            prefixes, operflags)

                        # If we are a sign extended immediate and not the same as the other operand,
                        # do the sign extension during disassembly so nothing else has to worry about it..
                        if len(operands) and tsize != operands[-1].tsize:
                            # Check if we are an explicitly signed operand *or* REX.W
                            if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W:
                                otsize = operands[-1].tsize
                                oper.imm = e_bits.sign_extend(
                                    oper.imm, oper.tsize, otsize)
                                oper.tsize = otsize

                    else:
                        osize, oper = ameth(bytez, offset, tsize, prefixes,
                                            operflags)

                except struct.error:
                    # Catch struct unpack errors due to insufficient data length
                    raise envi.InvalidInstruction(
                        bytez=bytez[startoff:startoff + 16])

            if oper is not None:
                # This is a filty hack for now...
                oper._dis_regctx = self._dis_regctx
                operands.append(oper)

            operoffset += osize

        # Pull in the envi generic instruction flags
        iflags = iflag_lookup.get(optype, 0) | self._dis_oparch

        if prefixes & ed_i386.PREFIX_REP_MASK:
            iflags |= envi.IF_REPEAT

        if priv_lookup.get(mnem, False):
            iflags |= envi.IF_PRIV

        # Lea will have a reg-mem/sib operand with _is_deref True, but should be false
        if optype == opcode86.INS_LEA:
            operands[1]._is_deref = False

        ret = Amd64Opcode(va, optype, mnem, prefixes,
                          (offset - startoff) + operoffset, operands, iflags)
        return ret
Exemplo n.º 2
0
    def disasm(self, bytez, offset, va):
        # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh.

        # Stuff for opcode parsing
        tabdesc = all_tables[opcode86.TBL_Main]  # A tuple (optable, shiftbits, mask byte, sub, max)
        startoff = offset  # Use startoff as a size knob if needed

        # Stuff we'll be putting in the opcode object
        optype = None  # This gets set if we successfully decode below
        mnem = None
        operands = []

        prefixes = 0
        pho_prefixes = 0  # faux prefixes... don't immediately apply them, they may not be the prefixes we're looking for

        while True:

            obyte = bytez[offset]

            # This line changes in 64 bit mode
            p = self._dis_prefixes[obyte]
            if p is None:
                break

            # print "OBYTE",hex(obyte)
            if obyte in mandatory_prefixes:
                pho_prefixes |= p
                # ratchet through the tables

                tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3]
                # print "TABIDX: %d" % tabidx
                opdesc = tabdesc[0][tabidx]
                # print 'OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))
                tabdesc = all_tables[opdesc[0]]
            else:
                prefixes |= p

            if p & PREFIX_VEX:
                if p == PREFIX_VEX2:
                    offset += 1
                    imm1 = bytez[offset]
                    if imm1 & 0xc0 != 0xc0:  # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding
                        break
                    inv1 = imm1 ^ 0xff

                    vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2]
                    vvvv = ((inv1 >> 3) & 0xf)
                    pp = imm1 & 3

                    prefixes |= (inv1 << 11) & PREFIX_REX_R  # R is inverted
                    prefixes |= vex_l
                    prefixes |= (vvvv << VEX_V_SHIFT)
                    combined_mand_prefixes = vex_pp_table[pp]

                elif p == PREFIX_VEX3:
                    imm1 = bytez[offset + 1]
                    if imm1 & 0xc0 != 0xc0:  # shouldn't in 64-bit mode, but in 32-bit, this keeps LDS from colliding
                        break
                    offset += 2
                    imm2 = bytez[offset]
                    inv1 = imm1 ^ 0xff
                    inv2 = imm2 ^ 0xff

                    vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2]
                    vvvv = ((inv2 >> 3) & 0xf)
                    pp = imm2 & 3
                    m_mmmm = imm1 & 0x1f
                    # print "imms: %x %x \tl: %d\tvvvv: 0x%x\tpp: %d\tm_mmmm: 0x%x" % (imm1, imm2, vex_l, vvvv, pp, m_mmmm)
                    prefixes |= ((inv1 << 11) & PREFIX_REX_RXB)  # RXB are inverted
                    prefixes |= ((imm2 << 12) & PREFIX_REX_W)  # W is not inverted
                    prefixes |= vex_l
                    prefixes |= (vvvv << VEX_V_SHIFT)  # vvvv

                    combined_mand_prefixes = vex_pp_table[pp] + vex3_mmmm_table[m_mmmm]

                # VEX prefixes default to 0F table, possibly F20F, F30F or 660F
                #   VEX3 prefixes may also specify depths into 38 and 3A tables
                for tabidx in combined_mand_prefixes:
                    if tabidx is None:
                        continue
                    # print "TABIDX: %d" % tabidx
                    opdesc = tabdesc[0][tabidx]
                    # print 'OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))
                    tabdesc = all_tables[opdesc[0]]

            offset += 1
            continue

        if obyte != 0x0f:
            prefixes |= pho_prefixes

        while True:

            obyte = bytez[offset]

            # print "OBYTE",hex(obyte)
            if obyte > tabdesc[5]:
                # print "Jumping To Overflow Table:", tabdesc[5]
                tabdesc = all_tables[tabdesc[6]]

            tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3]
            # print "TABIDX: %s" % tabidx
            opdesc = tabdesc[0][tabidx]
            # print 'OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))

            # Hunt down multi-byte opcodes
            nexttable = opdesc[0]
            # print "NEXT",nexttable,hex(obyte), opcode86.tables_lookup.get(nexttable)
            if nexttable != 0:  # If we have a sub-table specified, use it.
                # print "Multi-Byte Next Hop For",hex(obyte),opdesc[0]
                tabdesc = all_tables[nexttable]

                # Account for the table jump we made
                offset += 1

                continue

            # We are now on the final table...
            # print repr(opdesc)
            tbl_opercnt = tabdesc[1]
            mnem = opdesc[3 + tbl_opercnt]
            optype = opdesc[1]
            if tabdesc[3] == 0xff:
                offset += 1  # For our final opcode byte
            break

        if optype == 0:
            # print tabidx
            # print opdesc
            # print "OPTTYPE 0"
            raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va)

        operoffset = 0
        # Begin parsing operands based off address method
        for i in range(operands_index, operands_index + tbl_opercnt):

            oper = None  # Set this if we end up with an operand
            osize = 0

            # Pull out the operand description from the table
            operflags = opdesc[i]
            opertype = operflags & opcode86.OPTYPE_MASK
            addrmeth = operflags & opcode86.ADDRMETH_MASK

            # If there are no more operands, break out of the loop!
            if operflags == 0:
                break

            # print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype)

            # handles tsize calculations including new REX prefixes
            tsize = self._dis_calc_tsize(opertype, prefixes, operflags)

            # print hex(opertype),hex(addrmeth),hex(tsize)

            # If addrmeth is zero, we have operands embedded in the opcode
            if addrmeth == 0:
                osize = 0
                oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i], tsize, prefixes)

            else:
                # print "ADDRTYPE",hex(addrmeth)
                ameth = self._dis_amethods[addrmeth >> 16]
                # print "AMETH",ameth
                if ameth is None:
                    raise Exception("Implement Addressing Method 0x%.8x" % addrmeth)

                # NOTE: Depending on your addrmethod you may get beginning of operands, or offset
                try:
                    if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J:
                        osize, oper = ameth(bytez, offset + operoffset, tsize, prefixes, operflags)

                        # If we are a sign extended immediate and not the same as the other operand,
                        # do the sign extension during disassembly so nothing else has to worry about it..
                        if len(operands) and tsize != operands[-1].tsize:
                            # Check if we are an explicitly signed operand *or* REX.W
                            if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W:
                                otsize = operands[-1].tsize
                                oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize)
                                oper.tsize = otsize

                    else:
                        osize, oper = ameth(bytez, offset, tsize, prefixes, operflags)

                except struct.error as e:
                    # Catch struct unpack errors due to insufficient data length
                    raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16])

            if oper is not None:
                # This is a filty hack for now...
                oper._dis_regctx = self._dis_regctx
                operands.append(oper)

            operoffset += osize

        # Pull in the envi generic instruction flags
        iflags = iflag_lookup.get(optype, 0) | self._dis_oparch

        if prefixes & ed_i386.PREFIX_REP_MASK:
            iflags |= envi.IF_REPEAT

        if priv_lookup.get(mnem, False):
            iflags |= envi.IF_PRIV

        # Lea will have a reg-mem/sib operand with _is_deref True, but should be false
        if optype == opcode86.INS_LEA:
            operands[1]._is_deref = False

        ret = Amd64Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags)

        return ret
Exemplo n.º 3
0
    def disasm(self, bytez, offset, va):
        '''
        The main amd64 decoder function. The inital steps it takes are determining what
        potential prefixes are attached to the instruction. By "potential", we mean that at
        this stage we don't know if thigs like 66, F2, F3 are being used as normal prefixes
        (representing things like a rep prefix) or if they're being used as mandatory prefixes
        that completely change with instruction we're decoding. All potential prefixes are stored
        in the pho_prefixes variable.

        To that end, there's some tap dancing we need to do to deal with what the intel manual
        refers to as "mandatory prefixes". If we hit a main opcode byte of 0F and we know we have a
        potentially mandatory prefix (and we're not in VEX land), we treat the byte right before the 0F
        as the only potential mandatory prefix (as laid out in the intel manual). Then we basically brute
        force the decoding since we really only have two paths to try. One where the mandatory prefix is
        merely a normal prefix (and doesn't affect which set of tables we traverse down) and one where
        the mandatory prefix does affect what tables we rachet through (and thus directly changes which
        instruction we're looking at). For each case, we append all the relevant output to a list (should
        the decoding produce a meaningful output).

        If we end up producing no instruction definitions from our brute force loop, we've hit an invalid
        sequence of instruction bytes and we throw an exception.

        If only one path produce output, then that's our results and we proceed on to use the instruction
        definition to determine what addressing methods and size types to use when determining operands.

        If both paths produce a valid instruction definition, then the path that uses the mandatory prefix
        to directly change the instruction takes precedence over the path where it's just a normal prefix.

        In both the one and two results case, outside of our instruction decoding loop, we've kept a list of
        the possible decodings we could have hit, and just merely pop off the end of the list (so order
        matters when building the ppref variable).
        '''
        # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh.
        # FIXME: And also REX.W

        # Stuff for opcode parsing
        tabdesc = all_tables[
            opcode86.
            TBL_Main]  # A tuple (optable, shiftbits, mask byte, sub, max)
        startoff = offset  # Use startoff as a size knob if needed
        isvex = False
        vexw = None
        last_pref = 0
        ppref = [(None, None)]

        # Stuff we'll be putting in the opcode object
        optype = None  # This gets set if we successfully decode below
        mnem = None
        operands = []

        prefixes = 0
        pho_prefixes = 0  # faux prefixes...don't immediately apply them, they may not be the prefixes we're looking for

        while True:

            obyte = ord(bytez[offset])

            # This line changes in 64 bit mode
            p = self._dis_prefixes[obyte]
            if p is None:
                break

            if MANDATORY_PREFIXES[obyte]:
                pho_prefixes |= p
                last_pref = obyte
            else:
                prefixes |= p

            if p & PREFIX_VEX:
                isvex = True
                if p == PREFIX_VEX2:
                    offset += 1
                    imm1 = ord(bytez[offset])
                    # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding
                    # TODO: So we're always in 64 bit here. This will need to be here once we unify 32/64 decoding
                    #if imm1 & 0xc0 != 0xc0:
                    #break
                    inv1 = imm1 ^ 0xff

                    vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2]
                    vvvv = ((inv1 >> 3) & 0xf)
                    pp = imm1 & 3

                    prefixes |= (inv1 << 11) & PREFIX_REX_R  # R is inverted
                    prefixes |= vex_l
                    prefixes |= (vvvv << VEX_V_SHIFT)
                    combined_mand_prefixes = vex_pp_table[pp]

                elif p == PREFIX_VEX3:
                    imm1 = ord(bytez[offset + 1])
                    offset += 2
                    # TODO: So we're always in 64 bit here. This will need to be here once we unify 32/64 decoding
                    #if imm1 & 0xc0 != 0xc0:
                    #break
                    imm2 = ord(bytez[offset])
                    inv1 = imm1 ^ 0xff
                    inv2 = imm2 ^ 0xff

                    vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2]
                    vvvv = ((inv2 >> 3) & 0xf)
                    pp = imm2 & 3
                    m_mmmm = imm1 & 0x1f
                    prefixes |= (
                        (inv1 << 11) & PREFIX_REX_RXB)  # RXB are inverted
                    vexw = ((imm2 << 12) & PREFIX_REX_W)  # W is not inverted
                    prefixes |= vexw
                    prefixes |= vex_l
                    prefixes |= (vvvv << VEX_V_SHIFT)  # vvvv

                    combined_mand_prefixes = vex_pp_table[
                        pp] + vex3_mmmm_table[m_mmmm]

                # VEX prefixes default to 0F table, possibly F20F, F30F or 660F
                # VEX3 prefixes may also specify depths into 38 and 3A tables
                for tabidx in combined_mand_prefixes:
                    if tabidx is None:
                        continue
                    opdesc = tabdesc[0][tabidx]
                    tabdesc = all_tables[opdesc[0]]
                # So VEX and mandatory prefixes don't really intermingle
                offset += 1
                break

            offset += 1
            continue

        if obyte != 0x0f:
            prefixes |= pho_prefixes

        # intel manual says VEX and legacy prefixes don't intermingle
        if obyte == 0x0f and MANDATORY_PREFIXES[last_pref] and not isvex:
            obyte = last_pref
            ppref.append((last_pref, amd64_prefixes[last_pref]))

        decodings = []
        mainbyte = offset
        all_prefixes = prefixes

        ogtabdesc = tabdesc
        # onehot in this case refers to the their prefixes that are defined in i386/disasm.py where only
        # on bit of the entire integer is set. We use that to quickly pop things in and out of the prefixes
        # list
        for pref, onehot in ppref:
            tabdesc = ogtabdesc
            offset = mainbyte
            if pref is not None:
                # our mandatory prefix is not none, which means that we have to jump through the tables
                # using the mandatory prefix byte as our "main byte"
                # As a bit of a hack, the 66/F2/F3 entries in the main table
                # directly point to the 660F/F20F/F30F tables since we're carefully tap dancing around
                # what our opcode byte really is
                obyte = pref
                # since we're treating this prefix as mandatory and not as REPNZ/REPZ/etc, we need to rip
                # it out of the pho_prefixes before we combine pho_prefixes with the main prefixes container
                all_prefixes = prefixes | (pho_prefixes & (~onehot))
            else:
                # treat nothing as a mandatory prefix (or we defaulted into here if we got no mandatory
                # prefixes). For most instructions this will be the normal case.
                obyte = ord(bytez[offset])
                all_prefixes = prefixes | pho_prefixes

            while True:
                if (obyte > tabdesc[5]):
                    tabdesc = all_tables[tabdesc[6]]

                tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3]
                opdesc = tabdesc[0][tabidx]

                # Hunt down multi-byte opcodes
                nexttable = opdesc[0]
                if nexttable != 0:  # If we have a sub-table specified, use it.
                    tabdesc = all_tables[nexttable]

                    # Account for the table jump we made
                    offset += 1
                    obyte = ord(bytez[offset])
                    continue

                # We are now on the final table...
                tbl_opercnt = tabdesc[1]
                mnem = opdesc[3 + tbl_opercnt]
                optype = opdesc[1]
                if tabdesc[3] == 0xff:
                    offset += 1  # For our final opcode byte
                break

            if optype & INS_VEXREQ and not isvex:
                continue

            if optype != 0:
                decodings.append((tabdesc, opdesc, offset, all_prefixes))

        if not len(decodings):
            raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16],
                                          va=va)

        tabdesc, opdesc, offset, prefixes = decodings.pop()
        optype = opdesc[1]
        tbl_opercnt = tabdesc[1]
        mnem = opdesc[3 + tbl_opercnt]

        if optype == 0:
            raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16],
                                          va=va)

        operoffset = 0
        # Begin parsing operands based off address method
        for i in range(operands_index, operands_index + tbl_opercnt):

            oper = None  # Set this if we end up with an operand
            osize = 0

            # Pull out the operand description from the table
            operflags = opdesc[i]
            opertype = operflags & opcode86.OPTYPE_MASK
            addrmeth = operflags & opcode86.ADDRMETH_MASK

            # If there are no more operands, break out of the loop!
            if operflags == 0:
                break

            # handles tsize calculations including new REX prefixes
            tsize = self._dis_calc_tsize(opertype, prefixes, operflags)

            # If addrmeth is zero, we have operands embedded in the opcode
            if addrmeth == 0:
                osize = 0
                oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i],
                                    tsize, prefixes)

            else:
                # So the 0x7f is here to help us deal with an issue between VEX and non-VEX
                # A super common patter in vex is to add an operand somewhere in the middle of the
                # existing operands. So if we have like cmpps xmm2, 17 in non-VEX, the vex version
                # will look like vsprlw xmm3, xmm4, 17.
                # The fun bit of this is that the vex only portions aren't exclusive to the VEX-only
                # addressing methods, so we can have ADDRMETH_V be skipped outside of VEX mode too, and not
                # just things like ADDRMETH_H. Hence, we need a new flag that I stash in the upper bits of
                # instruction operand definition so we can know when to skip operands
                ameth = self._dis_amethods[(addrmeth >> 16) & 0x7F]
                vex_skip = addrmeth & opcode86.ADDRMETH_VEXSKIP
                if not isvex and vex_skip:
                    continue

                if ameth is None:
                    raise Exception("Implement Addressing Method 0x%.8x" %
                                    addrmeth)

                # NOTE: Depending on your addrmethod you may get beginning of operands, or offset
                try:
                    if addrmeth in IMM_REQOFFS:
                        osize, oper = ameth(bytez, offset + operoffset, tsize,
                                            prefixes, operflags)

                        # If we are a sign extended immediate and not the same as the other operand,
                        # do the sign extension during disassembly so nothing else has to worry about it..
                        if operflags & opcode86.OP_SIGNED:
                            if len(operands) and tsize != operands[-1].tsize:
                                otsize = operands[-1].tsize
                                oper.imm = e_bits.sign_extend(
                                    oper.imm, oper.tsize, otsize)
                                oper.tsize = otsize
                            elif not len(operands):
                                oper.imm = e_bits.sign_extend(
                                    oper.imm, oper.tsize,
                                    self._dis_default_size)
                                oper.tsize = self._dis_default_size

                    else:
                        # see same code section in i386 for this rationale
                        osize, oper = ameth(bytez, offset, tsize, prefixes,
                                            operflags)
                        if getattr(oper, "_is_deref", False):
                            memsz = OP_EXTRA_MEMSIZES[(operflags & OP_MEMMASK)
                                                      >> 4]
                            if memsz is not None:
                                oper.tsize = memsz

                except struct.error:
                    # Catch struct unpack errors due to insufficient data length
                    raise envi.InvalidInstruction(
                        bytez=bytez[startoff:startoff + 16])

            if oper is not None:
                # This is a filty hack for now...
                oper._dis_regctx = self._dis_regctx
                operands.append(oper)

            operoffset += osize

        typemask = optype & 0xFFFF
        # Pull in the envi generic instruction flags
        iflags = iflag_lookup.get(typemask, 0) | self._dis_oparch

        if prefixes & PREFIX_REP_MASK:
            iflags |= envi.IF_REPEAT

        if priv_lookup.get(mnem, False):
            iflags |= envi.IF_PRIV

        # Lea will have a reg-mem/sib operand with _is_deref True, but should be false
        if typemask == opcode86.INS_LEA:
            operands[1]._is_deref = False

        ret = Amd64Opcode(va, optype, mnem, prefixes,
                          (offset - startoff) + operoffset, operands, iflags)
        return ret
class Amd64Disasm(e_i386.i386Disasm):

    def __init__(self):
        e_i386.i386Disasm.__init__(self)
        self._dis_oparch = envi.ARCH_AMD64
        self._dis_prefixes = amd64_prefixes
        self._dis_regctx = Amd64RegisterContext()
        self.ptrsize = 8

        # Over-ride these which are in use by the i386 version of the ASM
        self.ROFFSETMMX   = e_i386.getRegOffset(amd64regs, "mm0")
        self.ROFFSETSIMD  = e_i386.getRegOffset(amd64regs, "xmm0")
        self.ROFFSETDEBUG = e_i386.getRegOffset(amd64regs, "debug0")
        self.ROFFSETCTRL  = e_i386.getRegOffset(amd64regs, "ctrl0")
        self.ROFFSETTEST  = e_i386.getRegOffset(amd64regs, "test0")
        self.ROFFSETSEG   = e_i386.getRegOffset(amd64regs, "es")
        self.ROFFSETFPU   = e_i386.getRegOffset(amd64regs, "st0")

    # NOTE: Technically, the REX must be the *last* prefix specified

    def _dis_calc_tsize(self, opertype, prefixes, operflags):
        """
        Use the oper type and prefixes to decide on the tsize for
        the operand.
        """

        mode = MODE_32

        sizelist = opcode86.OPERSIZE.get(opertype, None)
        if sizelist == None:
            raise "OPERSIZE FAIL"

        if operflags & opcode86.OP_64AUTO:
            mode = MODE_64

        # NOTE: REX takes precedence over 66
        # (see section 2.2.1.2 in Intel 2a)
        if prefixes & PREFIX_REX_W:

            mode = MODE_64

        elif prefixes & e_i386.PREFIX_OP_SIZE:

            mode = MODE_16

        return sizelist[mode]

    def disasm(self, bytez, offset, va):

        # Stuff for opcode parsing
        tabdesc = all_tables[0] # A tuple (optable, shiftbits, mask byte, sub, max)
        startoff = offset # Use startoff as a size knob if needed

        # Stuff we'll be putting in the opcode object
        optype = None # This gets set if we successfully decode below
        mnem = None 
        operands = []

        prefixes = 0

        while True:

            obyte = ord(bytez[offset])

            # This line changes in 64 bit mode
            p = self._dis_prefixes[obyte]
            if p == None:
                break
            if obyte == 0x66 and ord(bytez[offset+1]) == 0x0f:
                break
            prefixes |= p
            offset += 1
            continue

        #pdone = False
        while True:

            obyte = ord(bytez[offset])

            #print "OBYTE",hex(obyte)
            if (obyte > tabdesc[4]):
                #print "Jumping To Overflow Table:", tabdesc[5]
                tabdesc = all_tables[tabdesc[5]]

            tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2]
            #print "TABIDX: %d" % tabidx
            opdesc = tabdesc[0][tabidx]
            #print 'OPDESC: %s' % repr(opdesc)

            # Hunt down multi-byte opcodes
            nexttable = opdesc[0]
            #print "NEXT",nexttable,hex(obyte)
            if nexttable != 0: # If we have a sub-table specified, use it.
                #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0]
                tabdesc = all_tables[nexttable]

                # In the case of 66 0f, the next table is *already* assuming we ate
                # the 66 *and* the 0f...  oblidge them.
                if obyte == 0x66 and ord(bytez[offset+1]) == 0x0f:
                    offset += 1

                # Account for the table jump we made
                offset += 1

                continue

            # We are now on the final table...
            #print repr(opdesc)
            mnem = opdesc[6]
            optype = opdesc[1]
            if tabdesc[2] == 0xff:
                offset += 1 # For our final opcode byte
            break

        if optype == 0:
            #print tabidx
            #print opdesc
            #print "OPTTYPE 0"
            raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16], va=va)

        operoffset = 0
        # Begin parsing operands based off address method
        for i in operand_range:

            oper = None # Set this if we end up with an operand
            osize = 0

            # Pull out the operand description from the table
            operflags = opdesc[i]
            opertype = operflags & opcode86.OPTYPE_MASK
            addrmeth = operflags & opcode86.ADDRMETH_MASK

            # If there are no more operands, break out of the loop!
            if operflags == 0:
                break

            #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype)

            # handles tsize calculations including new REX prefixes
            tsize = self._dis_calc_tsize(opertype, prefixes, operflags)

            #print hex(opertype),hex(addrmeth),hex(tsize)


            # If addrmeth is zero, we have operands embedded in the opcode
            if addrmeth == 0:
                osize = 0
                oper = self.ameth_0(operflags, opdesc[5+i], tsize, prefixes)

            else:
                #print "ADDRTYPE",hex(addrmeth)
                ameth = self._dis_amethods[addrmeth >> 16]
                #print "AMETH",ameth
                if ameth == None:
                    raise Exception("Implement Addressing Method 0x%.8x" % addrmeth)

                # NOTE: Depending on your addrmethod you may get beginning of operands, or offset
                try:
                    if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J:
                        osize, oper = ameth(bytez, offset+operoffset, tsize, prefixes, operflags)

                        # If we are a sign extended immediate and not the same as the other operand,
                        # do the sign extension during disassembly so nothing else has to worry about it..
                        if len(operands) and tsize != operands[-1].tsize:
                            # Check if we are an explicitly signed operand *or* REX.W
                            if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W:
                                otsize = operands[-1].tsize
                                oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize)
                                oper.tsize = otsize

                    else:
                        osize, oper = ameth(bytez, offset, tsize, prefixes, operflags)

                except struct.error, e:
                    # Catch struct unpack errors due to insufficient data length
                    raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16])

            if oper != None:
                # This is a filty hack for now...
                oper._dis_regctx = self._dis_regctx
                operands.append(oper)

            operoffset += osize

        # Pull in the envi generic instruction flags
        iflags = iflag_lookup.get(optype, 0) | self._dis_oparch

        if priv_lookup.get(mnem, False):
            iflags |= envi.IF_PRIV

        # Lea will have a reg-mem/sib operand with _is_deref True, but should be false
        if optype == opcode86.INS_LEA:
            operands[1]._is_deref = False

        ret = i386Opcode(va, optype, mnem, prefixes, (offset-startoff)+operoffset, operands, iflags)

        return ret