Exemplo n.º 1
0
    def single_operand_parser(self, address, op, idx):
        """Parse a PPC operand."""
        def constraint_value(value):
            if value > 2**16:
                return -(2**32 - value)
            return value

        # Operand parsing
        #

        if op.type == OPERAND_TYPE_NO_OPERAND:
            return None

        #print '>>>', hex(address), idx, op.type

        segment = idaapi.getseg(address)
        addressing_mode = segment.bitness

        # Start creating the AST, the root entry is always the width of the
        # operand
        operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]

        # Compose the rest of the AST
        #

        if op.type == OPERAND_TYPE_DISPLACEMENT:

            # A displacement operatior might refer to a variable...
            #
            var_name = None

            # Try to get any name that might have been assigned to the
            # variable. It's only done if the register is:
            # sp/esp (4) os bp/ebp (5)
            #
            flags = idc.GetFlags(address)
            if (idx == 0
                    and idc.isStkvar0(flags)) or (idx == 1
                                                  and idc.isStkvar1(flags)):

                var_name = self.get_operand_stack_variable_name(
                    address, op, idx)

            #if has_sib_byte(op) is True:
            # when SIB byte set, process the SIB indexing
            #    phrase = parse_phrase(op)
            #else:
            phrase = [
                self.NODE_TYPE_OPERATOR_PLUS,
                [
                    self.NODE_TYPE_REGISTER,
                    self.REGISTERS[self.as_byte_value(op.reg)], 0
                ]
            ]

            if var_name:
                value = arch.ExpressionNamedValue(long(op.addr), var_name)
            else:
                value = constraint_value(op.addr)

            operand.extend([[
                self.NODE_TYPE_DEREFERENCE,
                phrase + [[self.NODE_TYPE_VALUE, value, 1]]
            ]])

        elif op.type == OPERAND_TYPE_REGISTER:
            operand.extend([[
                self.NODE_TYPE_REGISTER,
                self.REGISTERS[self.as_byte_value(op.reg)], 1
            ]])

        elif op.type == OPERAND_TYPE_MEMORY:

            addr_name = self.get_address_name(op.addr)

            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr

            operand.extend([[
                self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_VALUE, value, 0]
            ]])

        elif op.type == OPERAND_TYPE_IMMEDIATE:

            # Keep the value's size
            #
            if self.as_byte_value(op.dtyp) == 0:
                mask = 0xff
            elif self.as_byte_value(op.dtyp) == 1:
                mask = 0xffff
            else:
                mask = 0xffffffff

            operand.extend([[self.NODE_TYPE_VALUE, op.value & mask, 0]])

        elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR):

            addr_name = self.get_address_name(op.addr)

            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr

            operand.extend([[self.NODE_TYPE_VALUE, value, 0]])

        elif op.type == OPERAND_TYPE_PHRASE:
            print '***Dunno how to parse PHRASE'
            operand.extend([[
                self.NODE_TYPE_SYMBOL,
                'UNK_PHRASE(val:%d, reg:%d, type:%d)' %
                (op.value, self.as_byte_value(op.reg), op.type), 0
            ]])

        elif op.type == OPERAND_TYPE_IDPSPEC0:

            # Handle Special Purpose Registers
            #
            register = self.SPR_REGISTERS.get(
                op.value, 'UNKNOWN_REGISTER(val:%x)' % op.value)

            operand.extend([[self.NODE_TYPE_REGISTER, register, 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC1:
            #print '***Dunno how to parse OPERAND_TYPE_IDPSPEC1'
            #operand.extend([[self.NODE_TYPE_SYMBOL,
            #    'UNK_IDPSPEC1(val:%d, reg:%d, type:%d)' % (
            #        op.value, op.reg, op.type), 0]])
            operand.extend([[
                self.NODE_TYPE_REGISTER,
                self.REGISTERS[self.as_byte_value(op.reg)], 1
            ]])
            operand.extend([[
                self.NODE_TYPE_REGISTER,
                self.REGISTERS[self.as_byte_value(op.specflag1)], 2
            ]])

        elif op.type == OPERAND_TYPE_IDPSPEC2:
            # IDSPEC2 is operand type for all rlwinm and rlwnm
            # instructions which are in general op reg, reg, byte, byte, byte
            # or eqivalent. simplified mnemonics sometimes take less than
            # five arguments.
            #
            # Keep the value's size
            #
            if self.as_byte_value(op.dtyp) == 0:
                mask = 0xff
            elif self.as_byte_value(op.dtyp) == 1:
                mask = 0xffff
            else:
                mask = 0xffffffff

            operand_1 = []
            operand_2 = []
            operand_3 = []

            # Get the object representing the instruction's data.
            # It varies between IDA pre-5.7 and 5.7 onwards, the following check
            # will take care of it (for more detail look into the similar
            # construct in arch.py)
            #
            if hasattr(idaapi, 'cmd'):
                idaapi.decode_insn(address)
                ida_instruction = idaapi.cmd
            else:
                idaapi.ua_code(address)
                ida_instruction = idaapi.cvar.cmd

            if (ida_instruction.auxpref & 0x0020):
                #print "SH"
                operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_1.extend([[
                    self.NODE_TYPE_VALUE,
                    self.as_byte_value(op.reg) & mask, 0
                ]])
            else:
                operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_1.extend([[
                    self.NODE_TYPE_REGISTER,
                    self.REGISTERS[self.as_byte_value(op.reg)], 0
                ]])
            #print operand_1

            if (ida_instruction.auxpref & 0x0040):
                #print "MB"
                operand_2 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_2.extend([[
                    self.NODE_TYPE_VALUE,
                    self.as_byte_value(op.specflag1) & mask, 0
                ]])
            #print operand_2

            if (ida_instruction.auxpref & 0x0080):
                #print "ME"
                operand_3 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_3.extend([[
                    self.NODE_TYPE_VALUE,
                    self.as_byte_value(op.specflag2) & mask, 0
                ]])
            #print operand_3

            operand = [operand_1]
            #operand = operand_1

            if (ida_instruction.auxpref & 0x0040):
                #print "MB2"
                operand.append(operand_2)
            if (ida_instruction.auxpref & 0x0080):
                #print "ME2"
                operand.append(operand_3)

            #print operand
            # operand = operand_1
            #print operand
            #print '>>>', hex(address), idx, op.type, op.reg
            #operand.extend([[self.NODE_TYPE_OPERATOR_COMMA, [self.NODE_TYPE_VALUE, op.reg&mask, 0], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 1], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 2]]])

        elif op.type == OPERAND_TYPE_IDPSPEC3:
            # CR registers
            #
            operand.extend([[
                self.NODE_TYPE_REGISTER,
                self.CR_REGISTERS[self.as_byte_value(op.reg)], 0
            ]])

        elif op.type == OPERAND_TYPE_IDPSPEC4:
            # The bit in the CR to check for
            #
            operand.extend(
                [[self.NODE_TYPE_REGISTER,
                  self.as_byte_value(op.reg), 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC5:
            # Device Control Register, implementation specific
            operand.extend([[self.NODE_TYPE_REGISTER,
                             'DCR(%x)' % op.value, 0]])

        return operand
Exemplo n.º 2
0
    def single_operand_parser(self, address, op, idx):
        """Parse a metapc operand."""

        # Convenience functions
        #
        def has_sib_byte(op):
            # Does the instruction use the SIB byte?
            return self.as_byte_value(op.specflag1) == 1

        def get_sib_scale(op):
            return (None, 2, 4, 8)[self.as_byte_value(op.specflag2) >> 6]

        def get_sib_scaled_index_reg(op):
            return self.SIB_INDEX_REGISTERS[
                (self.as_byte_value(op.specflag2) >> 3) & 0x7]

        def get_sib_base_reg(op):
            #
            #       [       [7-6]            [5-3]            [2-0] ]
            # MOD/RM = ( (mod_2 << 6) | (reg_opcode_3 << 3) | rm_3 )
            # There's not MOD/RM made available by IDA!?
            #
            #       [     [7-6]             [5-3]       [2-0] ]
            # SIB = ( (scale_2 << 6) | (index_3 << 3) | base )
            # op.specflag2
            #
            # instruction = op + modrm + sib + disp + imm
            #

            # If MOD is zero there's no base register, otherwise it's EBP
            # But IDA exposes no MOD/RM.
            # Following a discussion in IDA's forums:
            # http://www.hex-rays.com/forum/viewtopic.php?f=8&t=1424&p=8479&hilit=mod+rm#p8479
            # checking for it can be done in the following manner:
            #

            SIB_byte = self.as_byte_value(op.specflag2)

            return self.SIB_BASE_REGISTERS[SIB_byte & 0x7]

        def get_segment_prefix(op):

            seg_idx = (op.specval >> 16)
            if seg_idx == 0:
                return None

            if (op.specval >> 16) < len(self.REGISTERS[0]):
                seg_prefix = self.REGISTERS[0][op.specval >> 16] + ':'
            else:
                seg_prefix = op.specval & 0xffff

            # This must return a string in case a segment register selector is used
            # or and int/long of a descriptor itself.
            #
            return seg_prefix

        def parse_phrase(op, has_displacement=False):
            """Parse the expression used for indexed memory access.
            
            Returns its AST as a nested list of lists.
            """

            # Check the addressing mode using in this segment
            segment = idaapi.getseg(address)
            if segment.bitness != 1:
                raise Exception(
                    'Not yet handling addressing modes other than 32bit!')

            base_reg = get_sib_base_reg(op)
            scaled_index_reg = get_sib_scaled_index_reg(op)
            scale = get_sib_scale(op)

            if scale:

                # return nested list for reg+reg*scale
                if base_reg != '':
                    # The last values in each tuple indicate the
                    # preferred display position of each element.
                    # base_reg + (scale_reg * scale)
                    #

                    if scaled_index_reg == '':
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                            [self.NODE_TYPE_REGISTER, base_reg, 0]
                        ]

                    return [
                        self.NODE_TYPE_OPERATOR_PLUS,
                        [self.NODE_TYPE_REGISTER, base_reg, 0],
                        [
                            self.NODE_TYPE_OPERATOR_TIMES,
                            [self.NODE_TYPE_REGISTER, scaled_index_reg, 0],
                            [self.NODE_TYPE_VALUE, scale, 1], 1
                        ]
                    ]
                else:
                    # If there's no base register and
                    # mod == 01 or mod == 10 (=> operand has displacement)
                    # then we need to add EBP
                    if has_displacement:
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                            [self.NODE_TYPE_REGISTER, 'ebp', 0],
                            [
                                self.NODE_TYPE_OPERATOR_TIMES,
                                [self.NODE_TYPE_REGISTER, scaled_index_reg, 0],
                                [self.NODE_TYPE_VALUE, scale, 1], 1
                            ]
                        ]
                    return [
                        self.NODE_TYPE_OPERATOR_PLUS,
                        [
                            self.NODE_TYPE_OPERATOR_TIMES,
                            [self.NODE_TYPE_REGISTER, scaled_index_reg, 0],
                            [self.NODE_TYPE_VALUE, scale, 1], 0
                        ]
                    ]

            else:
                # return nested list for reg+reg
                if base_reg == '':
                    if scaled_index_reg != '':
                        if has_displacement:
                            return [
                                self.NODE_TYPE_OPERATOR_PLUS,
                                [self.NODE_TYPE_REGISTER, 'ebp', 0],
                                [self.NODE_TYPE_REGISTER, scaled_index_reg, 1]
                            ]
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                            [self.NODE_TYPE_REGISTER, scaled_index_reg, 0]
                        ]
                    else:
                        if has_displacement:
                            return [
                                self.NODE_TYPE_OPERATOR_PLUS,
                                [self.NODE_TYPE_REGISTER, 'ebp', 0]
                            ]
                        return []

                else:
                    if scaled_index_reg != '':
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                            [self.NODE_TYPE_REGISTER, base_reg, 0],
                            [self.NODE_TYPE_REGISTER, scaled_index_reg, 1]
                        ]
                    else:
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                            [self.NODE_TYPE_REGISTER, base_reg, 0]
                        ]

        # Operand parsing
        #

        if op.type == OPERAND_TYPE_NO_OPERAND:
            return None

        segment = idaapi.getseg(address)
        addressing_mode = segment.bitness

        # Start creating the AST, the root entry is always the width of the
        # operand
        operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]

        # If the operand indicates a displacement and it does
        # the indexing through the SIB the it might be referring
        # a variable on the stack and an attempt to retrieve it
        # is made.
        #

        # Compose the rest of the AST
        #

        if op.type == OPERAND_TYPE_DISPLACEMENT:

            # A displacement operatior might refer to a variable...
            #
            var_name = None

            # Try to get any stack name that might have been assigned
            # to the variable.
            #
            flags = idc.GetFlags(address)
            if (idx == 0
                    and idc.isStkvar0(flags)) or (idx == 1
                                                  and idc.isStkvar1(flags)):

                var_name = self.get_operand_stack_variable_name(
                    address, op, idx)

            if has_sib_byte(op) is True:
                # when SIB byte set, process the SIB indexing
                phrase = parse_phrase(op, has_displacement=True)
            else:
                phrase = [
                    self.NODE_TYPE_OPERATOR_PLUS,
                    [
                        self.NODE_TYPE_REGISTER,
                        self.REGISTERS[addressing_mode + 1][op.reg], 0
                    ]
                ]

            if var_name:
                value = arch.ExpressionNamedValue(long(op.addr), var_name)
            else:
                value = op.addr

            # Calculate the index of the value depending on how many components
            # we have in the phrase
            #
            idx_of_value = len(phrase) - 1
            operand.extend([[
                get_segment_prefix(op),
                [
                    self.NODE_TYPE_DEREFERENCE,
                    phrase + [[self.NODE_TYPE_VALUE, value, idx_of_value]]
                ]
            ]])

        elif op.type == OPERAND_TYPE_REGISTER:

            operand.extend([[
                self.NODE_TYPE_REGISTER,
                self.REGISTERS[self.as_byte_value(op.dtyp)][op.reg], 0
            ]])

        elif op.type == OPERAND_TYPE_MEMORY:

            addr_name = self.get_address_name(op.addr)

            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr

            if has_sib_byte(op) is True:
                # when SIB byte set, process the SIB indexing
                phrase = parse_phrase(op)

                idx_of_value = len(phrase) - 1
                operand.extend([[
                    get_segment_prefix(op),
                    [
                        self.NODE_TYPE_DEREFERENCE,
                        phrase + [[self.NODE_TYPE_VALUE, value, idx_of_value]]
                    ]
                ]])
            else:
                operand.extend([[
                    get_segment_prefix(op),
                    [
                        self.NODE_TYPE_DEREFERENCE,
                        [self.NODE_TYPE_VALUE, value, 0]
                    ]
                ]])

        elif op.type == OPERAND_TYPE_IMMEDIATE:

            width = self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]

            if width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_1:
                value = op.value & 0xff
            elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_2:
                value = op.value & 0xffff
            elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_4:
                value = op.value & 0xffffffff
            else:
                value = op.value

            operand.extend([[self.NODE_TYPE_VALUE, value, 0]])

        elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR):

            addr_name = self.get_address_name(op.addr)

            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr

            seg_prefix = get_segment_prefix(op)
            if isinstance(seg_prefix, str):
                operand.extend([[seg_prefix, [self.NODE_TYPE_VALUE, value,
                                              0]]])
            elif isinstance(seg_prefix, (int, long)):
                operand.extend([[
                    self.NODE_TYPE_OPERATOR_SEGMENT_GEN,
                    [self.NODE_TYPE_VALUE, seg_prefix, 0],
                    [self.NODE_TYPE_VALUE, value, 1]
                ]])

        elif op.type == OPERAND_TYPE_PHRASE:
            if has_sib_byte(op) is True:
                phrase = parse_phrase(op)

                # Detect observed cases (in GCC compiled sshd) where GCC's instruction
                # encoding would be parsed into a phrase with an addition of a single
                # register, without any other summands.
                # In those cases, if there's a name associated to the zero such as
                # a stack variable, we will add a zero to the sum. We do that to have
                # an expression to which alias an expression substitution (in the past
                # we were removing the addition altogether)
                # If there's no name we will remove the redundant 0
                #
                #
                # This case has been observed for the encoding of [esp] where the tree
                # would be "[" -> "+" -> "esp".
                #
                #
                if phrase[0] == self.NODE_TYPE_OPERATOR_PLUS and len(
                        phrase) == 2:

                    var_name = self.get_operand_stack_variable_name(
                        address, op, idx)
                    if var_name:
                        value = arch.ExpressionNamedValue(0, var_name)
                        phrase.append([self.NODE_TYPE_VALUE, value, 1])
                    else:
                        phrase = phrase[1]

                operand.extend([[
                    get_segment_prefix(op),
                    [self.NODE_TYPE_DEREFERENCE, phrase]
                ]])

            else:
                operand.extend([[
                    get_segment_prefix(op),
                    [
                        self.NODE_TYPE_DEREFERENCE,
                        [
                            self.NODE_TYPE_REGISTER,
                            self.REGISTERS[addressing_mode + 1][op.phrase], 0
                        ]
                    ]
                ]])

        elif op.type == OPERAND_TYPE_IDPSPEC0:
            # The operand refers to the TR* registers
            operand.extend([[self.NODE_TYPE_REGISTER, 'tr%d' % op.reg, 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC1:
            # The operand refers to the DR* registers
            operand.extend([[self.NODE_TYPE_REGISTER, 'dr%d' % op.reg, 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC2:
            # The operand refers to the CR* registers
            operand.extend([[self.NODE_TYPE_REGISTER, 'cr%d' % op.reg, 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC3:
            # The operand refers to the FPU register stack
            operand.extend([[self.NODE_TYPE_REGISTER, 'st(%d)' % op.reg, 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC4:
            # The operand is a MMX register
            operand.extend([[self.NODE_TYPE_REGISTER, 'mm%d' % op.reg, 0]])

        elif op.type == OPERAND_TYPE_IDPSPEC5:
            # The operand is a MMX register
            operand.extend([[self.NODE_TYPE_REGISTER, 'xmm%d' % op.reg, 0]])

        # If no other thing that a width, i.e. ['b2'] is retrieved
        # we assume there was no operand... this is a hack but I've seen
        # IDA pretend there's a first operand like this:
        #
        # fld ['b2'], ['b4', ['ds', ['[', ['+', ['$', 'edx'], [...]]]]]
        #
        # So, in these cases I want no first operand...
        #if len(operand)==1:
        #    return None

        return operand
Exemplo n.º 3
0
    def single_operand_parser(self, address, op, idx):
        """Parse a PPC operand."""
        
        def constraint_value(value):
            if value>2**16:
                return -(2**32-value)
            return value

        
        # Operand parsing
        #
        
        if op.type == OPERAND_TYPE_NO_OPERAND:
            return None
        
        #print '>>>', hex(address), idx, op.type
        
        segment = idaapi.getseg(address)
        addressing_mode = segment.bitness
        
        # Start creating the AST, the root entry is always the width of the
        # operand
        operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
        
        
        # Compose the rest of the AST
        #
        
        if op.type == OPERAND_TYPE_DISPLACEMENT:
            
            # A displacement operatior might refer to a variable...
            #
            var_name = None
            
            # Try to get any name that might have been assigned to the
            # variable. It's only done if the register is:
            # sp/esp (4) os bp/ebp (5)
            #
            flags = idc.GetFlags(address)
            if (idx==0 and idc.isStkvar0(flags)) or (
                idx==1 and idc.isStkvar1(flags)):
                
                var_name = self.get_operand_stack_variable_name(address, op, idx)
            
            #if has_sib_byte(op) is True:
                # when SIB byte set, process the SIB indexing
            #    phrase = parse_phrase(op)
            #else:
            phrase = [
                self.NODE_TYPE_OPERATOR_PLUS,
                    [self.NODE_TYPE_REGISTER,
                        self.REGISTERS[self.as_byte_value(op.reg)], 0]]
            
            if var_name:
                value = arch.ExpressionNamedValue(long(op.addr), var_name)
            else:
                value = constraint_value(op.addr)
            
            operand.extend([
                [self.NODE_TYPE_DEREFERENCE,
                    phrase+[ [self.NODE_TYPE_VALUE, value, 1]] ] ])
        
        elif op.type == OPERAND_TYPE_REGISTER:
            operand.extend([
                [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1]])
                
        
        elif op.type == OPERAND_TYPE_MEMORY:
            
            addr_name = self.get_address_name(op.addr)
            
            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr
            
            operand.extend([
                [self.NODE_TYPE_DEREFERENCE,
                    [self.NODE_TYPE_VALUE, value, 0]] ])
             
        
        elif op.type == OPERAND_TYPE_IMMEDIATE:
            
            # Keep the value's size
            #
            if self.as_byte_value(op.dtyp) == 0:
                mask = 0xff
            elif self.as_byte_value(op.dtyp) == 1:
                mask = 0xffff
            else:
                mask = 0xffffffff
            
            operand.extend([[self.NODE_TYPE_VALUE, op.value&mask, 0]])
            
        
        elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR):
            
            addr_name = self.get_address_name(op.addr)
            
            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr
            
            operand.extend([[self.NODE_TYPE_VALUE, value, 0]])
            
        
        elif op.type == OPERAND_TYPE_PHRASE:
            print '***Dunno how to parse PHRASE'
            operand.extend([[self.NODE_TYPE_SYMBOL,
                'UNK_PHRASE(val:%d, reg:%d, type:%d)' % (
                    op.value, self.as_byte_value(op.reg), op.type), 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC0:
            
            # Handle Special Purpose Registers
            #
            register = self.SPR_REGISTERS.get(
                op.value, 'UNKNOWN_REGISTER(val:%x)' % op.value)
            
            operand.extend([
                [self.NODE_TYPE_REGISTER, register, 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC1:
            #print '***Dunno how to parse OPERAND_TYPE_IDPSPEC1'
            #operand.extend([[self.NODE_TYPE_SYMBOL,
            #    'UNK_IDPSPEC1(val:%d, reg:%d, type:%d)' % (
            #        op.value, op.reg, op.type), 0]])
            operand.extend([
                [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1]])
            operand.extend([
                [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.specflag1)], 2]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC2:
            # IDSPEC2 is operand type for all rlwinm and rlwnm
            # instructions which are in general op reg, reg, byte, byte, byte
            # or eqivalent. simplified mnemonics sometimes take less than
            # five arguments.
            #
            # Keep the value's size
            #
            if self.as_byte_value(op.dtyp) == 0:
                mask = 0xff
            elif self.as_byte_value(op.dtyp) == 1:
                mask = 0xffff
            else:
                mask = 0xffffffff

            operand_1 = []
            operand_2 = []
            operand_3 = []

            # Get the object representing the instruction's data.
            # It varies between IDA pre-5.7 and 5.7 onwards, the following check
            # will take care of it (for more detail look into the similar 
            # construct in arch.py)
            #
            if hasattr(idaapi, 'cmd' ):
                idaapi.decode_insn(address)
                ida_instruction = idaapi.cmd
            else:
                idaapi.ua_code(address)
                ida_instruction = idaapi.cvar.cmd
            
            if (ida_instruction.auxpref & 0x0020):
                #print "SH"		    
                operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_1.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.reg)&mask, 0]])
            else:
                operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_1.extend([[self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0]])
            #print operand_1

            if (ida_instruction.auxpref & 0x0040):
                #print "MB"
                operand_2 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_2.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 0]])
            #print operand_2

            if (ida_instruction.auxpref & 0x0080):
                #print "ME"
                operand_3 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]]
                operand_3.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 0]])
            #print operand_3

            operand = [operand_1]
            #operand = operand_1

            if (ida_instruction.auxpref & 0x0040): 
                #print "MB2"
                operand.append(operand_2)
            if (ida_instruction.auxpref & 0x0080):
                #print "ME2"
                operand.append(operand_3)	    

            #print operand 
            # operand = operand_1
            #print operand
            #print '>>>', hex(address), idx, op.type, op.reg
            #operand.extend([[self.NODE_TYPE_OPERATOR_COMMA, [self.NODE_TYPE_VALUE, op.reg&mask, 0], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 1], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 2]]])

        elif op.type == OPERAND_TYPE_IDPSPEC3:
            # CR registers
            #
            operand.extend([
                [self.NODE_TYPE_REGISTER, self.CR_REGISTERS[self.as_byte_value(op.reg)], 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC4:
            # The bit in the CR to check for
            #
            operand.extend([[self.NODE_TYPE_REGISTER, self.as_byte_value(op.reg), 0]])
            
        
        elif op.type == OPERAND_TYPE_IDPSPEC5:
            # Device Control Register, implementation specific
            operand.extend([[self.NODE_TYPE_REGISTER, 'DCR(%x)' % op.value, 0]])
            
        
        return operand
Exemplo n.º 4
0
    def single_operand_parser(self, address, op, idx):
        """Parse a metapc operand."""
        
        # Convenience functions
        #
        def has_sib_byte(op):
            # Does the instruction use the SIB byte?
            return self.as_byte_value(op.specflag1)==1
        
        def get_sib_scale(op):
            return (None, 2, 4, 8)[self.as_byte_value(op.specflag2)>>6]
        
        def get_sib_scaled_index_reg(op):
            return self.SIB_INDEX_REGISTERS[(self.as_byte_value(op.specflag2)>>3)&0x7]
        
        def get_sib_base_reg(op):
            #
            #       [       [7-6]            [5-3]            [2-0] ]
            # MOD/RM = ( (mod_2 << 6) | (reg_opcode_3 << 3) | rm_3 )
            # There's not MOD/RM made available by IDA!?
            #
            #       [     [7-6]             [5-3]       [2-0] ]
            # SIB = ( (scale_2 << 6) | (index_3 << 3) | base )
            # op.specflag2
            #
            # instruction = op + modrm + sib + disp + imm
            #
            
            # If MOD is zero there's no base register, otherwise it's EBP
            # But IDA exposes no MOD/RM.
            # Following a discussion in IDA's forums:
            # http://www.hex-rays.com/forum/viewtopic.php?f=8&t=1424&p=8479&hilit=mod+rm#p8479
            # checking for it can be done in the following manner:
            #
            
            SIB_byte = self.as_byte_value(op.specflag2)
            
            return  self.SIB_BASE_REGISTERS[ SIB_byte & 0x7]
        
        def get_segment_prefix(op):
        
            seg_idx = (op.specval>>16)
            if seg_idx == 0:
                return None
                
            if (op.specval>>16) < len(self.REGISTERS[0]) :
                seg_prefix = self.REGISTERS[0][op.specval>>16] + ':'
            else:
                seg_prefix = op.specval&0xffff
                
            # This must return a string in case a segment register selector is used
            # or and int/long of a descriptor itself.
            #
            return seg_prefix
            
        
        def parse_phrase(op, has_displacement=False):
            """Parse the expression used for indexed memory access.
            
            Returns its AST as a nested list of lists.
            """
            
            # Check the addressing mode using in this segment
            segment = idaapi.getseg(address)
            if segment.bitness != 1:
                raise Exception(
                    'Not yet handling addressing modes other than 32bit!')
            
            
            base_reg = get_sib_base_reg(op)
            scaled_index_reg = get_sib_scaled_index_reg(op)
            scale = get_sib_scale(op)
            
            if scale:
                
                # return nested list for reg+reg*scale
                if base_reg != '':
                    # The last values in each tuple indicate the
                    # preferred display position of each element.
                    # base_reg + (scale_reg * scale)
                    #
                    
                    if scaled_index_reg == '':
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS, 
                                [self.NODE_TYPE_REGISTER, base_reg, 0] ]
                        
                    return [
                        self.NODE_TYPE_OPERATOR_PLUS, 
                            [self.NODE_TYPE_REGISTER, base_reg, 0],
                            [self.NODE_TYPE_OPERATOR_TIMES,
                                [self.NODE_TYPE_REGISTER, scaled_index_reg, 0],
                                [self.NODE_TYPE_VALUE, scale, 1], 1 ] ]
                else:
                    # If there's no base register and
                    # mod == 01 or mod == 10 (=> operand has displacement)
                    # then we need to add EBP
                    if has_displacement:
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                                [ self.NODE_TYPE_REGISTER, 'ebp', 0],
                                [ self.NODE_TYPE_OPERATOR_TIMES,
                                    [self.NODE_TYPE_REGISTER, scaled_index_reg, 0],
                                    [self.NODE_TYPE_VALUE, scale, 1], 1 ] ]
                    return [
                        self.NODE_TYPE_OPERATOR_PLUS,
                            [ self.NODE_TYPE_OPERATOR_TIMES,
                                [self.NODE_TYPE_REGISTER, scaled_index_reg, 0],
                                [self.NODE_TYPE_VALUE, scale, 1], 0 ] ]
            
            else:
                # return nested list for reg+reg
                if base_reg == '':
                    if scaled_index_reg != '':
                        if has_displacement:
                            return [
                                self.NODE_TYPE_OPERATOR_PLUS,
                                    [ self.NODE_TYPE_REGISTER, 'ebp', 0],
                                    [ self.NODE_TYPE_REGISTER, scaled_index_reg, 1 ] ]
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                                [self.NODE_TYPE_REGISTER, scaled_index_reg, 0 ] ]
                    else:
                        if has_displacement:
                            return [self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, 'ebp', 0] ]
                        return [ ]
                        
                else:
                    if scaled_index_reg != '':
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                                [self.NODE_TYPE_REGISTER, base_reg, 0],
                                [self.NODE_TYPE_REGISTER, scaled_index_reg, 1 ] ]
                    else:
                        return [
                            self.NODE_TYPE_OPERATOR_PLUS,
                                [self.NODE_TYPE_REGISTER, base_reg, 0] ]
        
        
        # Operand parsing
        #
        
        if op.type == OPERAND_TYPE_NO_OPERAND:
            return None
        
        segment = idaapi.getseg(address)
        addressing_mode = segment.bitness
        
        # Start creating the AST, the root entry is always the width of the
        # operand
        operand = [self.OPERAND_WIDTH[ self.as_byte_value( op.dtyp ) ]]
        
        
        # If the operand indicates a displacement and it does
        # the indexing through the SIB the it might be referring
        # a variable on the stack and an attempt to retrieve it
        # is made.
        #
        
        
        # Compose the rest of the AST
        #
        
        if op.type == OPERAND_TYPE_DISPLACEMENT:
            
            # A displacement operatior might refer to a variable...
            #
            var_name = None
            
            # Try to get any stack name that might have been assigned
            # to the variable. 
            #
            flags = idc.GetFlags(address)
            if (idx==0 and idc.isStkvar0(flags)) or (
                idx==1 and idc.isStkvar1(flags)):
                
                var_name = self.get_operand_stack_variable_name(address, op, idx)
            
            if has_sib_byte(op) is True:
                # when SIB byte set, process the SIB indexing
                phrase = parse_phrase(op, has_displacement=True)
            else:
                phrase = [
                    self.NODE_TYPE_OPERATOR_PLUS, 
                        [self.NODE_TYPE_REGISTER,
                            self.REGISTERS[addressing_mode+1][op.reg], 0] ]
            
            if var_name:
                value = arch.ExpressionNamedValue(long(op.addr), var_name)
            else:
                value = op.addr
                
            # Calculate the index of the value depending on how many components
            # we have in the phrase
            #
            idx_of_value = len( phrase ) - 1
            operand.extend([
                [ get_segment_prefix(op),
                    [self.NODE_TYPE_DEREFERENCE,
                        phrase+[ [self.NODE_TYPE_VALUE, value, idx_of_value] ] ] ] ])
            
        
        elif op.type == OPERAND_TYPE_REGISTER:
            
            operand.extend([
                [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.dtyp)][op.reg], 0]])
        
        elif op.type == OPERAND_TYPE_MEMORY:
            
            addr_name = self.get_address_name(op.addr)
            
            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr
            
            if has_sib_byte(op) is True:
                # when SIB byte set, process the SIB indexing
                phrase = parse_phrase(op)
                
                idx_of_value = len( phrase ) - 1
                operand.extend([
                    [ get_segment_prefix(op),
                        [self.NODE_TYPE_DEREFERENCE,
                            phrase+[[self.NODE_TYPE_VALUE, value, idx_of_value]] ] ] ])
            else:                
                operand.extend([
                    [ get_segment_prefix(op),
                        [self.NODE_TYPE_DEREFERENCE,
                            [self.NODE_TYPE_VALUE, value, 0] ] ] ])
            
            
        
        elif op.type == OPERAND_TYPE_IMMEDIATE:
            
            width = self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]
            
            if width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_1:
                value = op.value&0xff
            elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_2:
                value = op.value&0xffff
            elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_4:
                value = op.value&0xffffffff
            else:
                value = op.value
            
            operand.extend([[self.NODE_TYPE_VALUE, value, 0]])
            
        
        elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR):
            
            addr_name = self.get_address_name(op.addr)
            
            if addr_name:
                value = arch.ExpressionNamedValue(long(op.addr), addr_name)
            else:
                value = op.addr
            
            seg_prefix = get_segment_prefix(op)
            if isinstance(seg_prefix, str):
                operand.extend([
                    [ seg_prefix, [self.NODE_TYPE_VALUE, value, 0] ]])
            elif isinstance(seg_prefix, (int, long)):
                operand.extend([
                    [ self.NODE_TYPE_OPERATOR_SEGMENT_GEN, 
                        [self.NODE_TYPE_VALUE, seg_prefix, 0],
                        [self.NODE_TYPE_VALUE, value, 1] ]] )
            
        
        elif op.type == OPERAND_TYPE_PHRASE:
            if has_sib_byte(op) is True:
                phrase = parse_phrase(op)
                
                # Detect observed cases (in GCC compiled sshd) where GCC's instruction
                # encoding would be parsed into a phrase with an addition of a single
                # register, without any other summands. 
                # In those cases, if there's a name associated to the zero such as
                # a stack variable, we will add a zero to the sum. We do that to have
                # an expression to which alias an expression substitution (in the past
                # we were removing the addition altogether)
                # If there's no name we will remove the redundant 0
                # 
                #
                # This case has been observed for the encoding of [esp] where the tree
                # would be "[" -> "+" -> "esp".
                #
                #
                if phrase[0] == self.NODE_TYPE_OPERATOR_PLUS and len(phrase) == 2:
                    
                    var_name = self.get_operand_stack_variable_name(address, op, idx)
                    if var_name:
                        value = arch.ExpressionNamedValue(0, var_name)
                        phrase.append( [self.NODE_TYPE_VALUE, value, 1] )
                    else:
                        phrase = phrase[1]
                    
                
                operand.extend([
                    [get_segment_prefix(op),
                        [self.NODE_TYPE_DEREFERENCE, phrase] ]] )
                
            else:
                operand.extend([
                    [get_segment_prefix(op),
                        [self.NODE_TYPE_DEREFERENCE,
                            [self.NODE_TYPE_REGISTER,
                                self.REGISTERS[addressing_mode+1][op.phrase], 0] ] ]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC0:
            # The operand refers to the TR* registers
            operand.extend([
                [self.NODE_TYPE_REGISTER, 'tr%d' % op.reg, 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC1:
            # The operand refers to the DR* registers
            operand.extend([
                [self.NODE_TYPE_REGISTER, 'dr%d' % op.reg, 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC2:
            # The operand refers to the CR* registers
            operand.extend([
                [self.NODE_TYPE_REGISTER, 'cr%d' % op.reg, 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC3:
            # The operand refers to the FPU register stack
            operand.extend([
                [self.NODE_TYPE_REGISTER, 'st(%d)' % op.reg, 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC4:
            # The operand is a MMX register
            operand.extend([
                [self.NODE_TYPE_REGISTER, 'mm%d' % op.reg, 0]])
        
        elif op.type == OPERAND_TYPE_IDPSPEC5:
            # The operand is a MMX register
            operand.extend([
                [self.NODE_TYPE_REGISTER, 'xmm%d' % op.reg, 0]])
        
        # If no other thing that a width, i.e. ['b2'] is retrieved
        # we assume there was no operand... this is a hack but I've seen
        # IDA pretend there's a first operand like this:
        #
        # fld ['b2'], ['b4', ['ds', ['[', ['+', ['$', 'edx'], [...]]]]]
        #
        # So, in these cases I want no first operand...
        #if len(operand)==1:
        #    return None

        return operand