def _getDataDisasm(self): """ You cannot get array data using getdisasm. The disassembly has to be extracted differently. This identifies the data in question, and gets its disassembly :return: the disasssembly of the data item """ # First, do the easy cases that just work with GetDisasm flags = idc.GetFlags(self.ea) # TODO: change this so it accounts for arrays also if idc.is_enum0(flags): return self._filterComments(idc.GetDisasm(self.ea)) if idc.is_data(flags) and ( idc.is_byte(flags) and idc.get_item_size(self.ea) == 1 or idc.is_word(flags) and idc.get_item_size(self.ea) == 2 or idc.is_dword(flags) and idc.get_item_size(self.ea) == 4): # normal case where an int is not misread as a reference content = self.getContent() if self.getXRefsFrom()[1] and self.isPointer(content): disasm = idc.GetDisasm(self.ea) contentData = Data(content) # If it's a struct member, replace it with its hex, but keep the information if '.' in disasm and (';' not in disasm or '.' in disasm[:disasm.index(';')]): disasm = 'DCD %s+0x%X // %s' % (contentData.getName(), content - contentData.ea, disasm[len('DCD '):]) elif ida_bytes.is_manual(flags, 0): # Manual forms put in IDA, just grab it. (This is for cases where computations are applied to data) disasm = idc.GetDisasm(self.ea) else: # build the disassembly: this is for none-pointer symbols found in IDA (ex: word_0) if idc.is_byte(flags): op = 'DCB' elif idc.is_word(flags): op = 'DCW' else: op = 'DCD' disasm = op + ' ' + '0x%X' % content return self._filterComments(disasm) else: # The weird case... an array. I don't know why it's weird. IDA doesn't like it! # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! # analysis on the array is based on the very first line disasm = idc.GetDisasm(self.ea) if ';' in disasm: disasm = disasm[:disasm.index(';')] firstLineSplitDisasm = list(filter(None, re.split('[ ,]', disasm))) dataType = firstLineSplitDisasm[0] return self._getArrDisasm(len(firstLineSplitDisasm) - 1, dataType)
def _get_type_str(flag): if idc.is_byte(flag): return "unsigned char" elif idc.is_word(flag): return "unsigned short" elif idc.is_dword(flag): return "unsigned int" elif idc.is_qword(flag): return "unsigned long long" else: return "unknown"
def getTypeName(self): # type: () -> str """ :return: the type of the data item, if it's a struct/enum/const, the name of it. a number of stars can follow, indicating that it's a pointer. """ type = idc.get_type(self.ea) flags = idc.GetFlags(self.ea) typeName = "INVALID" if idc.isCode(flags): typeName = "code" elif idc.isData(flags): if idc.is_byte(flags) and self.getSize() == 1: typeName = "u8" elif idc.is_word(flags) and self.getSize() == 2: typeName = "u16" elif idc.is_dword(flags) and self.getSize() == 4: if self.isPointer(self.getContent()): typeName = "void*" else: typeName = "u32" else: # The weird case... an array. I don't know why it's weird. IDA doesn't like it! # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(self.ea)))) dataType = firstLineSplitDisasm[0] if dataType == "DCB": typeName = "u8[%d]" % (self.getSize()) if dataType == "DCW": typeName = "u16[%d]" % (self.getSize() / 2) if dataType == "DCD": if self.hasPointer(): typeName = "void*[%d]" % (self.getSize() / 4) else: typeName = "u32[%d]" % (self.getSize() / 4) elif idc.isUnknown(flags): typeName = "u8" elif idc.isStruct(flags): typeName = idc.GetStrucName return typeName
def _read_struct_member_once(ea, flags, size, member_sid, member_size, asobject): """Read part of a struct member for _read_struct_member.""" if idc.is_byte(flags): return read_word(ea, 1), 1 elif idc.is_word(flags): return read_word(ea, 2), 2 elif idc.is_dword(flags): return read_word(ea, 4), 4 elif idc.is_qword(flags): return read_word(ea, 8), 8 elif idc.is_oword(flags): return read_word(ea, 16), 16 elif idc.is_strlit(flags): return idc.GetManyBytes(ea, size), size elif idc.is_float(flags): return idc.Float(ea), 4 elif idc.is_double(flags): return idc.Double(ea), 8 elif idc.is_struct(flags): value = read_struct(ea, sid=member_sid, asobject=asobject) return value, member_size return None, size
def _getDataDisasm(self, ea, elemsPerLine=-1): """ You cannot get array data using getdisasm. The disassembly has to be extracted differently. This identifies the data in question, and gets its disassembly :param ea: the effective address of the item to get the disassembly of :param elemsPerLine: if 0, maximum will be used. if <0, it'll be parsed from the database. otherwise, it's n. :return: the disasssembly of the data item """ # First, do the easy cases that just work with GetDisasm flags = idc.GetFlags(ea) if idc.is_data(flags) and ( idc.is_byte(flags) and idc.get_item_size(ea) == 1 or idc.is_word(flags) and idc.get_item_size(ea) == 2 or idc.is_dword(flags) and idc.get_item_size(ea) == 4): # normal case where an int is not misread as a reference data = Data(ea) content = data.getContent() if self.isPointer(content): disasm = idc.GetDisasm(ea) # very simple, this works. else: # build the disassembly: this is for none-pointer symbols found in IDA (ex: word_0) if idc.is_byte(flags): op = 'DCB' elif idc.is_word(flags): op = 'DCW' else: op = 'DCD' disasm = op + ' ' + '0x%X' % content return self._filterComments(disasm) else: # The weird case... an array. I don't know why it's weird. IDA doesn't like it! # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! # analysis on the array is based on the very first line firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(ea)))) dataType = firstLineSplitDisasm[0] # Grab all of the bytes in the array arr = self.getContent() # determine the number of elements per line, if 0 (default) is specified, then it's parsed instead if elemsPerLine < 0: commentWords = len( list(filter(None, re.split('[ ,]', self.getComment())))) # -1 to not include type, ex: DCB, DCD... But comments can exist on the first line too! elemsPerLine = len(firstLineSplitDisasm) - 1 - commentWords elif elemsPerLine == 0: # when specifying 0, all will show in one line! elemsPerLine = len(arr) # whether to display a name, or data, is determiend by the xrefs from this item! xrefs = self.getXRefsFrom() # only bother to check for names if it's an array of words wordArray = dataType == 'DCD' # generate disassembly for array disasm = dataType + ' ' elemIndex = 0 for elem in arr: # tab if new line if disasm[-1] == '\n': disasm += '\t%s' % (dataType + ' ') # add element and increment counter until new line # if it's a pointer and defined as an xref, display its label not just the number # TODO: isPointer is a bottleneck call, so prefer to call it last if wordArray and (elem in xrefs[1] or elem in xrefs[0]) and self.isPointer(elem): # TODO: maybe you ahould get the name of Data.Data(elem) also, for +index elemEA = Data(elem).ea name = idc.Name(elemEA) if name: offset = elem - elemEA if offset != 0: offset = '+%d' % offset else: offset = '' disasm += "%s%s, " % (name, offset) else: disasm += '0x%X, ' % elem else: disasm += '0x%X, ' % elem elemIndex += 1 # if we reach the number of elements a line, we add a new line if elemIndex % elemsPerLine == 0: # replace ", " at the end if present disasm = disasm[len(disasm) - 2:] == ', ' and disasm[:-2] or disasm # advance for the next line disasm += "\n" # remove ", " at the end if present disasm = disasm[len(disasm) - 2:] == ', ' and disasm[:-2] or disasm # remove new line at the end if present disasm = disasm[len(disasm) - 1:] == '\n' and disasm[:-1] or disasm return disasm
def getContent(self, bin=False): """ reads bytes at the EA of the data item and constructs its content representation based on its type :param bin: if True, array of bytes is always passed back """ flags = idc.GetFlags(self.ea) output = -1 if idc.isCode(flags): # an instruction is also data, its bytes are gathered and combined into one integer bytes = [] for char in idc.get_bytes(self.ea, self.getSize()): bytes.append(ord(char)) # either return one discrete instruction int, or an array of bytes representing it if bin: output = bytes else: output = self._combineBytes(bytes, self.getSize())[0] elif idc.isStruct(flags): pass elif idc.isData(flags): # normal case, build up a u8, u16, or u32 if idc.is_data(flags) and ( idc.is_byte(flags) and self.getSize() == 1 or idc.is_word(flags) and self.getSize() == 2 or idc.is_dword(flags) and self.getSize() == 4): bytes = [] for char in idc.get_bytes(self.ea, self.getSize()): bytes.append(ord(char)) # either return one discrete primitive, or the array of bytes representing it if bin: output = bytes else: output = self._combineBytes(bytes, self.getSize())[0] # The weird case... an array. I don't know why it's weird. IDA doesn't like it! else: # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line! firstLineSplitDisasm = list( filter(None, re.split('[ ,]', idc.GetDisasm(self.ea)))) dataType = firstLineSplitDisasm[0] elemsPerLine = len( firstLineSplitDisasm ) - 1 # don't include type, ex: DCB 0, 4, 5, 0x02, 0 # Grab all of the bytes in the array bytes = [] for char in idc.get_bytes(self.ea, idc.get_item_size(self.ea)): bytes.append(ord(char)) # figure out datatype to convert the array to be of bytesPerElem = dataType == 'DCB' and 1 \ or dataType == 'DCW' and 2 \ or dataType == 'DCD' and 4 \ or 1 # if type unknown, just show it as a an array of bytes # create new array with correct type, or just return the bytes if bin: output = bytes else: output = self._combineBytes(bytes, bytesPerElem) elif idc.isUnknown(flags): # unknown data elements are always 1 byte in size! output = ord(idc.get_bytes(self.ea, 1)) if bin: output = [output] return output