Пример #1
0
def filter_for_data(name_list):
    filtered = []
    for name in name_list:
        ea = idc.get_name_ea_simple(name)
        if idc.is_data(ea):
            filtered.append(name)
    return filtered
Пример #2
0
def dump_type(ea):
    flags = idaapi.get_flags(ea)
    if idc.is_code(flags):
        return "block" if idaapi.get_func(ea) else "code"
    if idc.is_data(flags):
        return "data"
    return "unexplored"
Пример #3
0
    def is_data(self):
        """
            Property indicating if this element is considered as data.
            Wrapper on ``idc.is_data`` .

            :return: True if current element is data, False otherwise.
            :rtype: bool
        """
        return idc.is_data(self.flags)
Пример #4
0
def iter_dynamic_functions() -> Iterable[Tuple[int, str]]:
    """
    Iterates the dynamically resolved function signatures.

    :yield: (ea, name)
    """
    # Look for data elements in the .data segment in which IDA has placed
    # a function signature element on.
    data_segment = ida_segment.get_segm_by_name(".data")
    for ea in idautils.Heads(start=data_segment.start_ea,
                             end=data_segment.end_ea):
        flags = ida_bytes.get_flags(ea)
        if idc.is_data(flags) and not idc.is_strlit(flags) and _is_func_type(
                ea):
            yield ea, ida_name.get_name(ea)
Пример #5
0
    def _getDataDisasm(self):
        """
        You cannot get array data using getdisasm. The disassembly has to be extracted differently.
        This identifies the data in question, and gets its disassembly
        :return: the disasssembly of the data item
        """
        # First, do the easy cases that just work with GetDisasm
        flags = idc.GetFlags(self.ea)
        # TODO: change this so it accounts for arrays also
        if idc.is_enum0(flags):
            return self._filterComments(idc.GetDisasm(self.ea))
        if idc.is_data(flags) and (
                idc.is_byte(flags) and idc.get_item_size(self.ea) == 1
                or idc.is_word(flags) and idc.get_item_size(self.ea) == 2
                or idc.is_dword(flags) and idc.get_item_size(self.ea) == 4):
            # normal case where an int is not misread as a reference
            content = self.getContent()
            if self.getXRefsFrom()[1] and self.isPointer(content):
                disasm = idc.GetDisasm(self.ea)
                contentData = Data(content)
                # If it's a struct member, replace it with its hex, but keep the information
                if '.' in disasm and (';' not in disasm
                                      or '.' in disasm[:disasm.index(';')]):
                    disasm = 'DCD %s+0x%X // %s' % (contentData.getName(),
                                                    content - contentData.ea,
                                                    disasm[len('DCD '):])

            elif ida_bytes.is_manual(flags, 0):
                # Manual forms put in IDA, just grab it. (This is for cases where computations are applied to data)
                disasm = idc.GetDisasm(self.ea)
            else:
                # build the disassembly: this is for none-pointer symbols found in IDA (ex: word_0)
                if idc.is_byte(flags): op = 'DCB'
                elif idc.is_word(flags): op = 'DCW'
                else: op = 'DCD'
                disasm = op + ' ' + '0x%X' % content

            return self._filterComments(disasm)
        else:  # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
            # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
            # analysis on the array is based on the very first line
            disasm = idc.GetDisasm(self.ea)
            if ';' in disasm:
                disasm = disasm[:disasm.index(';')]
            firstLineSplitDisasm = list(filter(None, re.split('[ ,]', disasm)))
            dataType = firstLineSplitDisasm[0]

            return self._getArrDisasm(len(firstLineSplitDisasm) - 1, dataType)
Пример #6
0
    def search_str_addr(s, start_ea=None, end_ea=None, down=True, nxt=True):
        """
            Static method for searching a string. In practice this perform
            a search_bytes on the binary by encoding correctly the string
            passed in argument and returning only reference to data elements.

            .. warning::

                This is different from idapython ``FindText`` method as this
                will only search for bytes in the binary (and more precisely
                the data)! It should also be way faster.

            .. todo:: this should allow to handle encoding.

            :param str s: The C string for which to search. If the string
                is NULL terminated the NULL byte must be included.
            :param start_ea: The address at which to start the search, if
                ``None`` the current address will be used.
            :param end_ea: The address at which to stop the search, if
                ``None`` the maximum or minimum (depending of searching up or
                down) will be used.
            :param down: If True (the default) search below the given
                address, if False search above.
            :param nxt: If True (the default) the current element will not
                be included in the search.
            :return: The address at which the string was found.  It will
                always be data. If no matching element was found None will be
                return.
        """
        # lets encode the string
        byt = " ".join(["{:X}".format(ord(c)) for c in s])
        # we want to skip everything which is not data without making the
        #   search, this should be faster
        curr_addr = BipElt.next_data_addr(start_ea, down=down)
        while curr_addr is not None:
            curr_addr = BipElt.search_bytes_addr(byt,
                                                 start_ea=curr_addr,
                                                 end_ea=end_ea,
                                                 down=down,
                                                 nxt=nxt)
            if curr_addr is None:
                return None  # not found
            if idc.is_data(ida_bytes.get_full_flags(curr_addr)):
                return curr_addr  # found!
            # lets continue
            curr_addr = BipElt.next_data_addr(curr_addr, down=down)
        return None  # not found
Пример #7
0
 def _is_this_elt(cls, ea):
     return (BipElt.is_mapped(ea)
             and (idc.is_data(ida_bytes.get_full_flags(ea))
             or idc.is_unknown(ida_bytes.get_full_flags(ea))))
Пример #8
0
    def _getDataDisasm(self, ea, elemsPerLine=-1):
        """
        You cannot get array data using getdisasm. The disassembly has to be extracted differently.
        This identifies the data in question, and gets its disassembly
        :param ea: the effective address of the item to get the disassembly of
        :param elemsPerLine: if 0, maximum will be used. if <0, it'll be parsed from the database. otherwise, it's n.
        :return: the disasssembly of the data item
        """
        # First, do the easy cases that just work with GetDisasm
        flags = idc.GetFlags(ea)
        if idc.is_data(flags) and (
                idc.is_byte(flags) and idc.get_item_size(ea) == 1
                or idc.is_word(flags) and idc.get_item_size(ea) == 2
                or idc.is_dword(flags) and idc.get_item_size(ea) == 4):
            # normal case where an int is not misread as a reference
            data = Data(ea)
            content = data.getContent()
            if self.isPointer(content):
                disasm = idc.GetDisasm(ea)  # very simple, this works.
            else:
                # build the disassembly: this is for none-pointer symbols found in IDA (ex: word_0)
                if idc.is_byte(flags): op = 'DCB'
                elif idc.is_word(flags): op = 'DCW'
                else: op = 'DCD'
                disasm = op + ' ' + '0x%X' % content
            return self._filterComments(disasm)
        else:  # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
            # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
            # analysis on the array is based on the very first line
            firstLineSplitDisasm = list(
                filter(None, re.split('[ ,]', idc.GetDisasm(ea))))
            dataType = firstLineSplitDisasm[0]

            # Grab all of the bytes in the array
            arr = self.getContent()

            # determine the number of elements per line, if 0 (default) is specified, then it's parsed instead
            if elemsPerLine < 0:
                commentWords = len(
                    list(filter(None, re.split('[ ,]', self.getComment()))))
                # -1 to not include type, ex: DCB, DCD... But comments can exist on the first line too!
                elemsPerLine = len(firstLineSplitDisasm) - 1 - commentWords
            elif elemsPerLine == 0:  # when specifying 0, all will show in one line!
                elemsPerLine = len(arr)

            # whether to display a name, or data, is determiend by the xrefs from this item!
            xrefs = self.getXRefsFrom()

            # only bother to check for names if it's an array of words
            wordArray = dataType == 'DCD'

            # generate disassembly for array
            disasm = dataType + ' '
            elemIndex = 0
            for elem in arr:
                # tab if new line
                if disasm[-1] == '\n': disasm += '\t%s' % (dataType + ' ')
                # add element and increment counter until new line
                # if it's a pointer and defined as an xref, display its label not just the number
                # TODO: isPointer is a bottleneck call, so prefer to call it last
                if wordArray and (elem in xrefs[1] or elem
                                  in xrefs[0]) and self.isPointer(elem):
                    # TODO: maybe you ahould get the name of Data.Data(elem) also, for +index
                    elemEA = Data(elem).ea
                    name = idc.Name(elemEA)
                    if name:
                        offset = elem - elemEA
                        if offset != 0:
                            offset = '+%d' % offset
                        else:
                            offset = ''
                        disasm += "%s%s, " % (name, offset)
                    else:
                        disasm += '0x%X, ' % elem
                else:
                    disasm += '0x%X, ' % elem

                elemIndex += 1

                # if we reach the number of elements a line, we add a new line
                if elemIndex % elemsPerLine == 0:
                    # replace ", " at the end if present
                    disasm = disasm[len(disasm) -
                                    2:] == ', ' and disasm[:-2] or disasm
                    # advance for the next line
                    disasm += "\n"

            # remove ", " at the end if present
            disasm = disasm[len(disasm) - 2:] == ', ' and disasm[:-2] or disasm
            # remove new line at the end if present
            disasm = disasm[len(disasm) - 1:] == '\n' and disasm[:-1] or disasm

            return disasm
Пример #9
0
    def getContent(self, bin=False):
        """
        reads bytes at the EA of the data item and constructs its content representation based on its type
        :param bin: if True, array of bytes is always passed back
        """
        flags = idc.GetFlags(self.ea)
        output = -1

        if idc.isCode(flags):
            # an instruction is also data, its bytes are gathered and combined into one integer
            bytes = []
            for char in idc.get_bytes(self.ea, self.getSize()):
                bytes.append(ord(char))
            # either return one discrete instruction int, or an array of bytes representing it
            if bin:
                output = bytes
            else:
                output = self._combineBytes(bytes, self.getSize())[0]

        elif idc.isStruct(flags):
            pass
        elif idc.isData(flags):
            # normal case, build up a u8, u16, or u32
            if idc.is_data(flags) and (
                    idc.is_byte(flags) and self.getSize() == 1
                    or idc.is_word(flags) and self.getSize() == 2
                    or idc.is_dword(flags) and self.getSize() == 4):
                bytes = []
                for char in idc.get_bytes(self.ea, self.getSize()):
                    bytes.append(ord(char))
                # either return one discrete primitive, or the array of bytes representing it
                if bin:
                    output = bytes
                else:
                    output = self._combineBytes(bytes, self.getSize())[0]
            # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
            else:
                # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
                firstLineSplitDisasm = list(
                    filter(None, re.split('[ ,]', idc.GetDisasm(self.ea))))
                dataType = firstLineSplitDisasm[0]
                elemsPerLine = len(
                    firstLineSplitDisasm
                ) - 1  # don't include type, ex: DCB 0, 4, 5, 0x02, 0

                # Grab all of the bytes in the array
                bytes = []
                for char in idc.get_bytes(self.ea, idc.get_item_size(self.ea)):
                    bytes.append(ord(char))

                # figure out datatype to convert the array to be of
                bytesPerElem = dataType == 'DCB' and 1 \
                               or dataType == 'DCW' and 2 \
                               or dataType == 'DCD' and 4 \
                               or 1  # if type unknown, just show it as a an array of bytes

                # create new array with correct type, or just return the bytes
                if bin:
                    output = bytes
                else:
                    output = self._combineBytes(bytes, bytesPerElem)
        elif idc.isUnknown(flags):
            # unknown data elements are always 1 byte in size!
            output = ord(idc.get_bytes(self.ea, 1))
            if bin: output = [output]
        return output