def __init__(self, binary): self.grbit = littleEndian.readInt(binary, 0) if self.grbit == 0x0010 or self.grbit == 0x0800: #self.rtdVt is a 4 byte integer self.rtdVt = littleEndian.readInt(binary,4) self.lengthInBytes = 8 elif self.grbit == 0x0004: #self.rdtVt is a 4 byte boolean self.rtdVt = binary[4:8] self.lengthInBytes = 8 elif self.grbit == 0x0001: #self.rdtVt is a 64 bit floating point number self.rtdVt = binary[4:12] self.lengthInBytes = 12 elif self.grbit == 0x0002: #self.rdtVt is a RDTOpenStr, which MUST be less then 256 characters long strLength = littleEndian.readInt(binary, 4) self.rdtVT = excel_structures.XLUnicodeStringNoCch(binary[8:], strLength) self.lengthInBytes = (strLength << self.rtdVt.doubleByte) + 1 elif self.grbit == 0x1000: #self.rdtVt is a RDTOpenStr, which MUST be at least 256 characters long strLength = littleEndian.readInt(binary, 4) self.rdtVT = excel_structures.XLUnicodeStringNoCch(binary[8:], strLength) self.lengthInBytes = (strLength << self.rtdVt.doubleByte) + 1
def __init__(self, binary): self.grbit = littleEndian.readInt(binary, 0) if self.grbit == 0x0010 or self.grbit == 0x0800: #self.rtdVt is a 4 byte integer self.rtdVt = littleEndian.readInt(binary, 4) self.lengthInBytes = 8 elif self.grbit == 0x0004: #self.rdtVt is a 4 byte boolean self.rtdVt = binary[4:8] self.lengthInBytes = 8 elif self.grbit == 0x0001: #self.rdtVt is a 64 bit floating point number self.rtdVt = binary[4:12] self.lengthInBytes = 12 elif self.grbit == 0x0002: #self.rdtVt is a RDTOpenStr, which MUST be less then 256 characters long strLength = littleEndian.readInt(binary, 4) self.rdtVT = excel_structures.XLUnicodeStringNoCch( binary[8:], strLength) self.lengthInBytes = (strLength << self.rtdVt.doubleByte) + 1 elif self.grbit == 0x1000: #self.rdtVt is a RDTOpenStr, which MUST be at least 256 characters long strLength = littleEndian.readInt(binary, 4) self.rdtVT = excel_structures.XLUnicodeStringNoCch( binary[8:], strLength) self.lengthInBytes = (strLength << self.rtdVt.doubleByte) + 1
def __init__(self, binary): self.ft = littleEndian.readShort(binary, 0) self.cb = littleEndian.readShort(binary, 2) if self.ft != 0x0009: #print 'Error parsing a FtPictFmla-structure' return None self.fmla = ObjFmla(binary[4:]) self.IposInCtlStm = littleEndian.readInt(binary, 6+self.fmla.cbFmla) self.cbBufInCtlStm = littleEndian.readInt(binary, 10+self.fmla.cbFmla)
def getFat(binaryContent, sectorsize): header = binaryContent[76:sectorsize] fatSectors = [] current = 0 fatSect = littleEndian.readInt(header, current) while not fatSect in (0xFFFFFFFEL, 0xFFFFFFFFL) and current < 512: fatSectors += [fatSect + 1] current += 4 fatSect = littleEndian.readInt(header, current)
def getFat(binaryContent, sectorsize): header = binaryContent[76:sectorsize] fatSectors = [] current = 0 fatSect = littleEndian.readInt(header, current) while not fatSect in (0xFFFFFFFEL, 0xFFFFFFFFL) and current < 512: fatSectors += [fatSect+1] current += 4 fatSect = littleEndian.readInt(header, current)
def findExternalOleObjectStorageLocation(current_user_stream, ppt_document_stream): try: currentUserAtom = ppt_atom(current_user_stream) currentUserAtom.size = littleEndian.readInt(currentUserAtom.binaryData, 0) currentUserAtom.headerToken = currentUserAtom.binaryData[4:8] currentUserAtom.offsetToCurrentEdit = littleEndian.readInt( currentUserAtom.binaryData, 8) except: #print 'failed to parse currentUserAtom, file might be corrupted!' return None if len(ppt_document_stream[currentUserAtom.offsetToCurrentEdit:]) <= 0: #print 'end of stream?' return None #print [ppt_document_stream[currentUserAtom.offsetToCurrentEdit:]] userEditAtom = ppt_atom( ppt_document_stream[currentUserAtom.offsetToCurrentEdit:]) userEditAtom.lastSlieIdRef = userEditAtom.binaryData[0:4] userEditAtom.version = littleEndian.readShort(userEditAtom.binaryData, 4) userEditAtom.minorVersion = userEditAtom.binaryData[6] userEditAtom.majorVersion = userEditAtom.binaryData[7] userEditAtom.offsetLastEdit = littleEndian.readInt(userEditAtom.binaryData, 8) userEditAtom.offestPersistDirectory = littleEndian.readInt( userEditAtom.binaryData, 12) if len(ppt_document_stream[userEditAtom.offestPersistDirectory:]) <= 0: #print 'end of stream' return None persistDirectoryAtom = ppt_atom( ppt_document_stream[userEditAtom.offestPersistDirectory:]) persistId16bits = littleEndian.readShort(persistDirectoryAtom.binaryData, 0) persistId4bits = (ord(persistDirectoryAtom.binaryData[2]) & 0b00001111) << (8 * 2) persistDirectoryAtom.persistId = persistId4bits + persistId16bits persistDirectoryAtom.cPersist = ((ord(persistDirectoryAtom.binaryData[2]) & 0b11110000) >> 4) +\ (ord(persistDirectoryAtom.binaryData[3]) << 8) rgPersistOffset = [None] * persistDirectoryAtom.cPersist for PersistOffsetEntry in range(0, persistDirectoryAtom.cPersist): rgPersistOffset[PersistOffsetEntry] = littleEndian.readInt( persistDirectoryAtom.binaryData, 4 + PersistOffsetEntry * 4) persistDirectoryAtom.rgPersistOffset = rgPersistOffset externalOleObjectStorages = [] for entry in rgPersistOffset: potentialExtOleObjectStg = ppt_atom(ppt_document_stream[entry:]) if potentialExtOleObjectStg.head.recType == 0x1011: externalOleObjectStorages += [entry] return externalOleObjectStorages
def __init__(self, binary): if len(binary) != 22: print 'length doesn\'t match the length of a regular FtCmo structure' return self.ft = littleEndian.readShort(binary, 0) self.cb = littleEndian.readShort(binary, 2) self.ot = littleEndian.readShort(binary, 4) self.id = littleEndian.readShort(binary, 6) self.bitmap = littleEndian.readShort(binary, 8) self.unused8 = littleEndian.readInt(binary, 10) self.unused9 = littleEndian.readInt(binary, 14) self.unused10 = littleEndian.readInt(binary, 18) if self.ft != 0x15 or self.cb != 0x12: print 'Error parsing a FtCmo-structure' return None
def __init__(self, binary, args, json_result): self.args = args self.json_result = json_result self.frtHeader = frtHeader(binary) #12 bytes self.ichSamePrefix = littleEndian.readInt(binary, 12) #4 bytes self.XLUnicodeStringSegmentedRTD = XLUnicodeStringSegmentedRTD(binary[16:]) self.rtdOper = rtdOper(binary[self.XLUnicodeStringSegmentedRTD.lengthInBytes:])
def findFlashObjects(self, oleObject): foundFlashObject = False for binaryRecord in self.records: record = BiffRecordHeader(binaryRecord) if record.type == 93: try: objectRecord = Obj(binaryRecord[4:]) #check if a found obj-record has a pictFmla, which is mandatory for flash-objects if objectRecord.cmo.ot == 0x08: if objectRecord.pictFmla.fmla.fmla.rgce.ptg == 0x02 and \ 'Shockwave' in objectRecord.pictFmla.fmla.embededInfo.stClass.characters: #get offset of the associated acitveX control in the "ctls"-stream ctlsStream = oleObject.openstream('Ctls').read()[objectRecord.pictFmla.IposInCtlStm:\ objectRecord.pictFmla.IposInCtlStm+objectRecord.pictFmla.cbBufInCtlStm] #look for ".swf" suffix and extract path (relative or absolute) to the embedded flash-file currentOffset = ctlsStream.find('.swf') if currentOffset == -1: currentOffset = ctlsStream.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while ctlsStream[currentOffset-3:currentOffset] != '\x00\x00\x00': path = ctlsStream[currentOffset-2] + path currentOffset = currentOffset -2 pathLength = pathLength+1 bytesForPath = littleEndian.readInt(ctlsStream, currentOffset-4) #check if the length of our extracted path matches the 4 bytes in front #of it, (interpreting the 4 bytes as a unsigned integer in littleendian) if bytesForPath == pathLength*2: print 'path to .swf: ' + path foundFlashObject = True except AttributeError: pass return foundFlashObject
def findScriptlets(self, oleObject): foundScriptlet = False binaryClassID = '\xAE\xFD\x24\xAE\xC6\x03\xD1\x11\x8B\x76\x00\x80\xC7\x44\xF3\x89' for binaryRecord in self.records: record = BiffRecordHeader(binaryRecord) if record.type == 93: try: objectRecord = Obj(binaryRecord[4:]) #check if a found obj-record has a pictFmla, which is mandatory for scriptlet-controls if objectRecord.cmo.ot == 0x08: if objectRecord.pictFmla.fmla.fmla.rgce.ptg == 0x02 and \ 'ScriptBridge' in objectRecord.pictFmla.fmla.embededInfo.stClass.characters: #get offset of the associated acitveX control in the "ctls"-stream ctlsStream = oleObject.openstream('Ctls').read()[objectRecord.pictFmla.IposInCtlStm:\ objectRecord.pictFmla.IposInCtlStm+objectRecord.pictFmla.cbBufInCtlStm] pathToSourceFile = '' if ctlsStream[0:16] == binaryClassID: #a scriptlet-control will start with a 128-bit signature (ClsID) pathLength = littleEndian.readInt(ctlsStream, 18) for character in range(22, 22+(pathLength*2), 2): pathToSourceFile = pathToSourceFile + ctlsStream[character] foundScriptlet = True print 'path to source file: ', pathToSourceFile except AttributeError: pass return foundScriptlet
def findExternalOleObjectStorageLocation(current_user_stream, ppt_document_stream): try: currentUserAtom = ppt_atom(current_user_stream) currentUserAtom.size = littleEndian.readInt(currentUserAtom.binaryData, 0) currentUserAtom.headerToken = currentUserAtom.binaryData[4:8] currentUserAtom.offsetToCurrentEdit = littleEndian.readInt(currentUserAtom.binaryData, 8) except: #print 'failed to parse currentUserAtom, file might be corrupted!' return None if len(ppt_document_stream[currentUserAtom.offsetToCurrentEdit:])<=0: #print 'end of stream?' return None #print [ppt_document_stream[currentUserAtom.offsetToCurrentEdit:]] userEditAtom = ppt_atom(ppt_document_stream[currentUserAtom.offsetToCurrentEdit:]) userEditAtom.lastSlieIdRef = userEditAtom.binaryData[0:4] userEditAtom.version = littleEndian.readShort(userEditAtom.binaryData, 4) userEditAtom.minorVersion = userEditAtom.binaryData[6] userEditAtom.majorVersion = userEditAtom.binaryData[7] userEditAtom.offsetLastEdit = littleEndian.readInt(userEditAtom.binaryData, 8) userEditAtom.offestPersistDirectory = littleEndian.readInt(userEditAtom.binaryData, 12) if len(ppt_document_stream[userEditAtom.offestPersistDirectory:])<=0: #print 'end of stream' return None persistDirectoryAtom = ppt_atom(ppt_document_stream[userEditAtom.offestPersistDirectory:]) persistId16bits = littleEndian.readShort(persistDirectoryAtom.binaryData, 0) persistId4bits = (ord(persistDirectoryAtom.binaryData[2]) & 0b00001111) << (8*2) persistDirectoryAtom.persistId = persistId4bits + persistId16bits persistDirectoryAtom.cPersist = ((ord(persistDirectoryAtom.binaryData[2]) & 0b11110000) >> 4) +\ (ord(persistDirectoryAtom.binaryData[3]) << 8) rgPersistOffset = [None]*persistDirectoryAtom.cPersist for PersistOffsetEntry in range(0, persistDirectoryAtom.cPersist): rgPersistOffset[PersistOffsetEntry] = littleEndian.readInt(persistDirectoryAtom.binaryData, 4+PersistOffsetEntry*4) persistDirectoryAtom.rgPersistOffset = rgPersistOffset externalOleObjectStorages = [] for entry in rgPersistOffset: potentialExtOleObjectStg = ppt_atom(ppt_document_stream[entry:]) if potentialExtOleObjectStg.head.recType == 0x1011: externalOleObjectStorages += [entry] return externalOleObjectStorages
def __init__(self, binary): binary = binary[4:] self.vers = littleEndian.readShort(binary, 0) self.dt = littleEndian.readShort(binary, 2) self.rupBuild = littleEndian.readShort(binary, 4) self.rupYear = littleEndian.readShort(binary, 6) self.bitmap = (littleEndian.readInt(binary, 8)) & int('11111111111111111110000000000000',2) self.verLowestBiff = binary[12] self.bitmap2 = ord(binary[13]) & int('11110000',2)
def __init__(self, binary, args, json_result): self.args = args self.json_result = json_result self.frtHeader = frtHeader(binary) #12 bytes self.ichSamePrefix = littleEndian.readInt(binary, 12) #4 bytes self.XLUnicodeStringSegmentedRTD = XLUnicodeStringSegmentedRTD( binary[16:]) self.rtdOper = rtdOper( binary[self.XLUnicodeStringSegmentedRTD.lengthInBytes:])
def __init__(self, binary): self.cch = littleEndian.readInt(binary, 0) self.fHighByte = ord(binary[4]) if self.fHighByte == 0x00: self.rgb = binary[5:5 + self.cch] elif self.fHighByte == 0x01: self.rgb = binary[5:5 + (self.cch * 2)] else: #malformed RTD-Record validate will fail pass self.lengthInBytes = 5 + (self.cch << self.fHighByte)
def __init__(self, binary): self.cch = littleEndian.readInt(binary, 0) self.fHighByte = ord(binary[4]) if self.fHighByte == 0x00: self.rgb = binary[5:5+self.cch] elif self.fHighByte == 0x01: self.rgb = binary[5:5+(self.cch*2)] else: #malformed RTD-Record validate will fail pass self.lengthInBytes = 5 + (self.cch << self.fHighByte)
def __init__(self, binary): self.ft = littleEndian.readShort(binary, 0) self.cb = littleEndian.readShort(binary, 2) self.unused1 = littleEndian.readInt(binary, 4) self.iVal = littleEndian.readSignedShort(binary, 8) self.iMin = littleEndian.readSignedShort(binary, 10) self.iMax = littleEndian.readSignedShort(binary, 12) self.dInc = littleEndian.readSignedShort(binary, 14) self.dPage = littleEndian.readSignedShort(binary, 16) self.fHoriz = littleEndian.readShort(binary, 18) self.dxScroll = littleEndian.readSignedShort(binary, 20) self.flags = (ord(binary[22]) & 0xf0) >> 4 self.unused2 = (ord(binary[22]) & 0x0f) << 8 self.unused2 += ord(binary[23])
def getRecords(self, workbookStream, currentIndex): #read every record in the given workbookstream and add them to the record list #until a EOF record is read(EOF record is 4 bytes long and exactly 0x0000000A) while littleEndian.readInt(workbookStream, currentIndex) != 0x0000000A: recHeader = BiffRecordHeader(workbookStream[currentIndex:currentIndex+4]) #add current record to records-list #keep in mind: records are still unformated (binary data) self.records.append(workbookStream[currentIndex:currentIndex + recHeader.length + 4]) currentIndex = currentIndex + recHeader.length + 4 currentIndex = currentIndex + 4 if currentIndex >= len(workbookStream) -1: return currentIndex #check if another substream starts at the end of this stream. just a safety measure recHeader = BiffRecordHeader(workbookStream[currentIndex:currentIndex+4]) if recHeader.type == 2057 and recHeader.length == 16: #print 'found start of second substream at offset: %d (0x%08X)' %(currentIndex, currentIndex) pass else: #print 'found data:\r\nType: %d Length: %d' %(recHeader.type, recHeader.length) pass return currentIndex
def __init__(self, binary): self.rt = littleEndian.readShort(binary, 0) #MUST be 0x0813 self.grbitFrt = littleEndian.readShort(binary, 2) #MUST be 0x0000 self.reserved1 = littleEndian.readInt(binary, 4) #MUST be 0x00000000 self.reserved2 = littleEndian.readInt(binary, 8) #MUST be 0x00000000
def __init__(self, binaryString): temp = littleEndian.readShort(binaryString, 0) self.recVer = temp & 0x000f self.recInstance = temp >> 4 self.recType = (ord(binaryString[3]) << 8) + ord(binaryString[2]) self.recLen = littleEndian.readInt(binaryString, 4)
def locateJavascriptSource(self): foundScripttlet = False if self.mode == 0: controlTag = '<ax:ocxPr ax:name="URL" ax:value="' activeXContainers = [] fileNames = [] for dirname, dirnames, filenames in os.walk(self.pathToActiveX): for filename in filenames: fileNames.append(os.path.join(dirname, filename)) filtered = fnmatch.filter(fileNames, '*activeX*.xml') for activeXcontrol in filtered: currentControl = open(activeXcontrol, 'r') controlText = currentControl.read() #the Class-ID: AE24FDAE-03C6-11D1-8B76-0080C744F389 identifies an activeX-control as flash-object if self.MSscriptletClassID in controlText: foundScripttlet = True if not self.args.quiet: print activeXcontrol + ' is a MS-Scriptlet!' activeXBinFileName = activeXcontrol[:-3] activeXBinFileName += 'bin' activeXContainers.append(activeXBinFileName) if controlTag in controlText: codeOrigin = '' tagStart = controlText.index(controlTag) iterator = tagStart + len(controlTag) while controlText[iterator] != '"': codeOrigin = codeOrigin + controlText[iterator] iterator = iterator + 1 print codeOrigin currentControl.close() else: binaryClassID = '\xAE\xFD\x24\xAE\xC6\x03\xD1\x11\x8B\x76\x00\x80\xC7\x44\xF3\x89' assert OleFileIO_PL.isOleFile(self.fileName) ole = OleFileIO_PL.OleFileIO(self.fileName) if self.docType == '/word': wordDocStream = ole.openstream('WordDocument') wordDocBuffer = wordDocStream.read() if 'CONTROL ScriptBridge.ScriptBridge' in wordDocBuffer: if not self.args.quiet and not self.args.json: print 'use of MS Scriptlet detected' listOCXContents = [] listOLEPaths = ole.listdir() #find all OCXNAME streams in the word file for path in listOLEPaths: if path[len(path)-1] == '\x03OCXDATA': listOCXContents.append('/'.join(x for x in path)) for content in listOCXContents: OCXStream = ole.openstream(content) contentBuffer = OCXStream.read() if contentBuffer[0:16] == binaryClassID: foundScripttlet = True pathToSourceFile = '' pathLength = littleEndian.readInt(contentBuffer, 18) for character in range(22, 22+(pathLength*2), 2): pathToSourceFile = pathToSourceFile + contentBuffer[character] if not self.args.quiet and not self.args.json: print 'path to source file: ', pathToSourceFile if self.args.json: self.locations.append(pathToSourceFile) OCXStream.close() elif self.docType == '/xl': excel_structures = imp.load_source('excel_structures', 'modules/OLE_parsing/excel_structures.py') ws = excel_structures.workbook(ole) foundScripttlet = ws.findScriptlets() elif self.docType == '/ppt': ppt_structures = imp.load_source('ppt_structures', 'modules/OLE_parsing/ppt_structures.py') ppt_document_stream = ole.openstream('PowerPoint Document').read() current_user_stream = ole.openstream('Current User').read() ppt_scriptlet = ppt_structures.ppt_container(ppt_document_stream) foundScripttlet = ppt_structures.findScriptlets(ppt_scriptlet) if foundScripttlet: folderName = os.path.abspath(self.fileName.split('.')[0]) if not os.path.exists(folderName): os.makedirs(folderName) externalOleObjectStorages = ppt_structures.findExternalOleObjectStorageLocation(current_user_stream, ppt_document_stream) if not externalOleObjectStorages: return decompressedStorageFiles = ppt_structures.decompressExternalOleObjectStorage(folderName, ppt_document_stream, externalOleObjectStorages) for oleStorageFile in decompressedStorageFiles: currentStorage = OleFileIO_PL.OleFileIO(oleStorageFile) if not currentStorage.exists('\x03OCXDATA'): continue OCXStream = currentStorage.openstream('\x03OCXDATA') contentBuffer = OCXStream.read() if contentBuffer[0:16] == binaryClassID: foundScripttlet = True pathToSourceFile = '' pathLength = littleEndian.readInt(contentBuffer, 18) for character in range(22, 22+(pathLength*2), 2): pathToSourceFile = pathToSourceFile + contentBuffer[character] if not self.args.quiet and not self.args.json: print 'path to source file: ', pathToSourceFile if self.args.json: self.locations.append(pathToSourceFile) OCXStream.close() currentStorage.close() if not foundScripttlet and not self.args.quiet and not self.args.json: print 'no Javascript/Scriptlett detected' if foundScripttlet: if self.args.json: self.json_result['detections'].append({'type': 'javascript/scriptlett', 'location': self.locations})
def locateFlashObjects(self): pathToActiveX = self.pathToActiveX fileName = self.fileName docType = self.docType foundFlashObject = False if self.mode == 0: #this is an XML-based document activeXContainers = [] fileNames = [] for dirname, dirnames, filenames in os.walk(pathToActiveX): for filename in filenames: fileNames.append(os.path.join(dirname, filename)) filtered = fnmatch.filter(fileNames, '*activeX*.xml') for activeXcontrol in filtered: currentControl = open(activeXcontrol, 'r') controlText = currentControl.read() #the Class-ID: D27CDB6E-AE6D-11CF-96B8-444553540000 identifies an activeX-control as flash-object if self.ShockwaveFlashClassID in controlText: if not self.args.quiet and not self.args.json: print activeXcontrol + " is a FlashObject!" foundFlashObject = True activeXBinFileName = activeXcontrol[:-3] activeXBinFileName += 'bin' activeXContainers.append(activeXBinFileName) currentControl.close() #starting to determine the origin of the .swf file if self.docType == '/xl': for activeXBinFileName in activeXContainers: acitveXStream = open(activeXBinFileName, 'rb').read() currentOffset = acitveXStream.find('.swf') if currentOffset == -1: currentOffset = acitveXStream.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while acitveXStream[currentOffset - 3:currentOffset] != '\x00\x00\x00': path = acitveXStream[currentOffset - 2] + path currentOffset = currentOffset - 2 pathLength = pathLength + 1 bytesForPath = littleEndian.readInt( acitveXStream, currentOffset - 4) #check if the length of our extracted path matches the 4 bytes in front #of it, (interpreting the 4 bytes as a unsigned integer in littleendian) if bytesForPath == pathLength * 2 and not self.args.quiet: print 'path to .swf: ' + path if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' if foundFlashObject: if self.args.json: self.json_result['detections'].append({ 'type': 'flash', 'location': None }) else: print "found flash object" return for activeXBinFileName in activeXContainers: #make sure that our .bin files are actually OLE-files assert OleFileIO_PL.isOleFile(activeXBinFileName) ole = OleFileIO_PL.OleFileIO(activeXBinFileName) if ole.exists('Contents'): #Flash-files are embedded via activeX-controls, which are located in the "Contens" folder of the OLE-file Contents = ole.openstream('Contents') content = Contents.read() Contents.close() else: if not self.args.quiet and not self.args.json: print('Contents doesn\'t exsit') ole.close() if littleEndian.readShort(content, 24) == 8: #this means the next 4 bytes (little-endian) will tell the length of an unicode string, #which will follow right after the length field pathLength = littleEndian.readInt(content, 26) pathToSWFfile = '' for iterator in range(30, 30 + pathLength): if iterator % 2 == 0: #every second byte will be 0x00. Office doesn't allow characters, which would have to use this second byte pathToSWFfile += content[iterator] #print as a hex string, if you need to search manually in the .bin file #print (':'.join(x.encode('hex') for x in pathToSWFfile)) if not self.args.quiet and not self.args.json: print 'path to swf-file: ' + pathToSWFfile else: if not self.args.quiet and not self.args.json: print 'this doesn\'t seem to be an unicode string' ole.close() if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' if foundFlashObject: if self.args.json: self.json_result['detections'].append({ 'type': 'flash', 'location': None }) else: print "found flash object" else: #this is a OLE-formated document assert OleFileIO_PL.isOleFile(fileName) ole = OleFileIO_PL.OleFileIO(fileName) if docType == '/word': wordDocStream = ole.openstream('WordDocument') wordDocBuffer = wordDocStream.read() if 'CONTROL ShockwaveFlash.ShockwaveFlash' in wordDocBuffer: if not self.args.quiet and not self.args.json: print 'use of Shockwafe Flash detected' foundFlashObject = True else: if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' return listOCXContents = [] listOLEPaths = ole.listdir() #print ole.listdir() #find all OCXNAME streams in the word file for path in listOLEPaths: if path[len(path) - 1] == 'Contents': #print ('/'.join(x for x in path)) listOCXContents.append('/'.join(x for x in path)) for content in listOCXContents: OCXStream = ole.openstream(content) contentBuffer = OCXStream.read() #print contentBuffer currentOffset = contentBuffer.find('.swf') if currentOffset == -1: currentOffset = contentBuffer.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while contentBuffer[currentOffset - 3:currentOffset] != '\x00\x00\x00': path = contentBuffer[currentOffset - 2] + path currentOffset = currentOffset - 2 pathLength = pathLength + 1 bytesForPath = littleEndian.readInt( contentBuffer, currentOffset - 4) #check if the length of our extracted path matches the 4 bytes in front #of it, (interpreting the 4 bytes as a unsigned integer in littleendian) if bytesForPath == pathLength * 2: #print 'length does match!' pass if not self.args.quiet and not self.args.json: print 'path to .swf: ' + path else: if not self.args.quiet and not self.args.json: print 'no .swf found in contents' OCXStream.close() elif docType == '/xl': excel_structures = imp.load_source( 'excel_structures', 'modules/OLE_parsing/excel_structures.py') #import excel_structures ws = excel_structures.workbook(ole) foundFlashObject = ws.findFlashObjects() pass elif docType == '/ppt': ppt_structures = imp.load_source( 'ppt_structures', 'modules/OLE_parsing/ppt_structures.py') #import ppt_structures ppt_document_stream = ole.openstream( 'PowerPoint Document').read() current_user_stream = ole.openstream('Current User').read() ppt_flash = ppt_structures.ppt_container(ppt_document_stream) #find externalOleObjectStorage-Ids, which point to a source of Flash foundFlashObject = ppt_structures.findShockwaveFlash(ppt_flash) folderName = os.path.abspath(self.fileName.split('.')[0]) if not os.path.exists(folderName): os.makedirs(folderName) externalOleObjectStorages = ppt_structures.findExternalOleObjectStorageLocation( current_user_stream, ppt_document_stream) if not externalOleObjectStorages: return decompressedStorageFiles = ppt_structures.decompressExternalOleObjectStorage( folderName, ppt_document_stream, externalOleObjectStorages) #extract paths to Flash-Objects currentPersistId = 0 for oleStorageFile in decompressedStorageFiles: currentStorage = OleFileIO_PL.OleFileIO(oleStorageFile) if not currentStorage.exists('Contents'): currentPersistId += 1 continue contentBuffer = currentStorage.openstream( 'Contents').read() currentOffset = contentBuffer.find('.swf') if currentOffset == -1: currentOffset = contentBuffer.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while contentBuffer[currentOffset - 3:currentOffset] != '\x00\x00\x00': path = contentBuffer[currentOffset - 2] + path currentOffset = currentOffset - 2 pathLength = pathLength + 1 if not self.args.quiet and not self.args.json: print 'path to .swf: ' + path else: if not self.args.quiet and not self.args.json: print 'no .swf found in contents' currentStorage.close() currentPersistId += 1 else: if not self.args.quiet and not self.args.json: print 'No document type was given' if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' if foundFlashObject: if self.args.json: self.json_result['detections'].append({ 'type': 'flash', 'location': None }) else: print "found flash object" ole.close()
def __init__(self, binary): self.ft = littleEndian.readShort(binary, 0) self.cb = littleEndian.readShort(binary, 2) self.unused1 = littleEndian.readInt(binary, 4) self.unused2 = littleEndian.readShort(binary, 8)
def getFat(binaryContent, sectorsize): header = binaryContent[76:sectorsize] fatSectors = [] current = 0 fatSect = littleEndian.readInt(header, current) while not fatSect in (0xFFFFFFFEL, 0xFFFFFFFFL) and current < 512: fatSectors += [fatSect + 1] current += 4 fatSect = littleEndian.readInt(header, current) myFat = [] for fatSect in fatSectors: current = 0 sect = binaryContent[fatSect * sectorsize:(fatSect + 1) * sectorsize] while current < sectorsize: if littleEndian.readInt(sect, current) != 0xffffffff: myFat += [littleEndian.readInt(sect, current)] current += 4 return myFat if __name__ == '__main__': workdir = sys.path[0] os.chdir(workdir) helpText = ''' --fileName {name of file to scan for malware} -f {name of file to scan for malware} --recursive {folder to scan for office malware} -r {folder to scan for office malware}
def locateFlashObjects(self): pathToActiveX = self.pathToActiveX fileName = self.fileName docType = self.docType foundFlashObject = False if self.mode == 0: #this is an XML-based document activeXContainers = [] fileNames = [] for dirname, dirnames, filenames in os.walk(pathToActiveX): for filename in filenames: fileNames.append(os.path.join(dirname, filename)) filtered = fnmatch.filter(fileNames, '*activeX*.xml') for activeXcontrol in filtered: currentControl = open(activeXcontrol, 'r') controlText = currentControl.read() #the Class-ID: D27CDB6E-AE6D-11CF-96B8-444553540000 identifies an activeX-control as flash-object if self.ShockwaveFlashClassID in controlText: if not self.args.quiet and not self.args.json: print activeXcontrol + " is a FlashObject!" foundFlashObject = True activeXBinFileName = activeXcontrol[:-3] activeXBinFileName += 'bin' activeXContainers.append(activeXBinFileName) currentControl.close() #starting to determine the origin of the .swf file if self.docType == '/xl': for activeXBinFileName in activeXContainers: acitveXStream = open(activeXBinFileName, 'rb').read() currentOffset = acitveXStream.find('.swf') if currentOffset == -1: currentOffset = acitveXStream.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while acitveXStream[currentOffset-3:currentOffset] != '\x00\x00\x00': path = acitveXStream[currentOffset-2] + path currentOffset = currentOffset -2 pathLength = pathLength+1 bytesForPath = littleEndian.readInt(acitveXStream, currentOffset-4) #check if the length of our extracted path matches the 4 bytes in front #of it, (interpreting the 4 bytes as a unsigned integer in littleendian) if bytesForPath == pathLength*2 and not self.args.quiet: print 'path to .swf: ' + path if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' if foundFlashObject: if self.args.json: self.json_result['detections'].append({'type': 'flash', 'location': None}) else: print "found flash object" return for activeXBinFileName in activeXContainers: #make sure that our .bin files are actually OLE-files assert OleFileIO_PL.isOleFile(activeXBinFileName) ole = OleFileIO_PL.OleFileIO(activeXBinFileName) if ole.exists('Contents'): #Flash-files are embedded via activeX-controls, which are located in the "Contens" folder of the OLE-file Contents = ole.openstream('Contents') content = Contents.read() Contents.close() else: if not self.args.quiet and not self.args.json: print('Contents doesn\'t exsit') ole.close() if littleEndian.readShort(content, 24) == 8: #this means the next 4 bytes (little-endian) will tell the length of an unicode string, #which will follow right after the length field pathLength = littleEndian.readInt(content, 26) pathToSWFfile = '' for iterator in range(30, 30+pathLength): if iterator % 2 == 0: #every second byte will be 0x00. Office doesn't allow characters, which would have to use this second byte pathToSWFfile += content[iterator] #print as a hex string, if you need to search manually in the .bin file #print (':'.join(x.encode('hex') for x in pathToSWFfile)) if not self.args.quiet and not self.args.json: print 'path to swf-file: ' + pathToSWFfile else: if not self.args.quiet and not self.args.json: print 'this doesn\'t seem to be an unicode string' ole.close() if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' if foundFlashObject: if self.args.json: self.json_result['detections'].append({'type': 'flash', 'location': None}) else: print "found flash object" else: #this is a OLE-formated document assert OleFileIO_PL.isOleFile(fileName) ole = OleFileIO_PL.OleFileIO(fileName) if docType == '/word': wordDocStream = ole.openstream('WordDocument') wordDocBuffer = wordDocStream.read() if 'CONTROL ShockwaveFlash.ShockwaveFlash' in wordDocBuffer: if not self.args.quiet and not self.args.json: print 'use of Shockwafe Flash detected' foundFlashObject = True else: if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' return listOCXContents = [] listOLEPaths = ole.listdir() #print ole.listdir() #find all OCXNAME streams in the word file for path in listOLEPaths: if path[len(path)-1] == 'Contents': #print ('/'.join(x for x in path)) listOCXContents.append('/'.join(x for x in path)) for content in listOCXContents: OCXStream = ole.openstream(content) contentBuffer = OCXStream.read() #print contentBuffer currentOffset = contentBuffer.find('.swf') if currentOffset == -1: currentOffset = contentBuffer.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while contentBuffer[currentOffset-3:currentOffset] != '\x00\x00\x00': path = contentBuffer[currentOffset-2] + path currentOffset = currentOffset -2 pathLength = pathLength+1 bytesForPath = littleEndian.readInt(contentBuffer, currentOffset-4) #check if the length of our extracted path matches the 4 bytes in front #of it, (interpreting the 4 bytes as a unsigned integer in littleendian) if bytesForPath == pathLength*2: #print 'length does match!' pass if not self.args.quiet and not self.args.json: print 'path to .swf: ' + path else: if not self.args.quiet and not self.args.json: print 'no .swf found in contents' OCXStream.close() elif docType == '/xl': excel_structures = imp.load_source('excel_structures', 'modules/OLE_parsing/excel_structures.py') #import excel_structures ws = excel_structures.workbook(ole) foundFlashObject = ws.findFlashObjects() pass elif docType == '/ppt': ppt_structures = imp.load_source('ppt_structures', 'modules/OLE_parsing/ppt_structures.py') #import ppt_structures ppt_document_stream = ole.openstream('PowerPoint Document').read() current_user_stream = ole.openstream('Current User').read() ppt_flash = ppt_structures.ppt_container(ppt_document_stream) #find externalOleObjectStorage-Ids, which point to a source of Flash foundFlashObject = ppt_structures.findShockwaveFlash(ppt_flash) folderName = os.path.abspath(self.fileName.split('.')[0]) if not os.path.exists(folderName): os.makedirs(folderName) externalOleObjectStorages = ppt_structures.findExternalOleObjectStorageLocation(current_user_stream, ppt_document_stream) if not externalOleObjectStorages: return decompressedStorageFiles = ppt_structures.decompressExternalOleObjectStorage(folderName, ppt_document_stream, externalOleObjectStorages) #extract paths to Flash-Objects currentPersistId = 0 for oleStorageFile in decompressedStorageFiles: currentStorage = OleFileIO_PL.OleFileIO(oleStorageFile) if not currentStorage.exists('Contents'): currentPersistId += 1 continue contentBuffer = currentStorage.openstream('Contents').read() currentOffset = contentBuffer.find('.swf') if currentOffset == -1: currentOffset = contentBuffer.find('.\x00s\x00w\x00f') if currentOffset != -1: pathLength = 5 path = '.swf' #reading the path from back to front, since we don't know the length of the path yet while contentBuffer[currentOffset-3:currentOffset] != '\x00\x00\x00': path = contentBuffer[currentOffset-2] + path currentOffset = currentOffset -2 pathLength = pathLength+1 if not self.args.quiet and not self.args.json: print 'path to .swf: ' + path else: if not self.args.quiet and not self.args.json: print 'no .swf found in contents' currentStorage.close() currentPersistId += 1 else: if not self.args.quiet and not self.args.json: print 'No document type was given' if not foundFlashObject and not self.args.quiet and not self.args.json: print 'found no Flash-Objects' if foundFlashObject: if self.args.json: self.json_result['detections'].append({'type': 'flash', 'location': None}) else: print "found flash object" ole.close()
header = binaryContent[76:sectorsize] fatSectors = [] current = 0 fatSect = littleEndian.readInt(header, current) while not fatSect in (0xFFFFFFFEL, 0xFFFFFFFFL) and current < 512: fatSectors += [fatSect+1] current += 4 fatSect = littleEndian.readInt(header, current) myFat = [] for fatSect in fatSectors: current = 0 sect = binaryContent[fatSect*sectorsize:(fatSect+1)*sectorsize] while current < sectorsize: if littleEndian.readInt(sect, current) != 0xffffffff: myFat += [littleEndian.readInt(sect, current)] current += 4 return myFat if __name__ == '__main__': workdir = sys.path[0] os.chdir(workdir) helpText = ''' --fileName {name of file to scan for malware} -f {name of file to scan for malware} --recursive {folder to scan for office malware}
def locateJavascriptSource(self): foundScripttlet = False if self.mode == 0: controlTag = '<ax:ocxPr ax:name="URL" ax:value="' activeXContainers = [] fileNames = [] for dirname, dirnames, filenames in os.walk(self.pathToActiveX): for filename in filenames: fileNames.append(os.path.join(dirname, filename)) filtered = fnmatch.filter(fileNames, '*activeX*.xml') #print filtered for activeXcontrol in filtered: currentControl = open(activeXcontrol, 'r') controlText = currentControl.read() #the Class-ID: AE24FDAE-03C6-11D1-8B76-0080C744F389 identifies an activeX-control as flash-object if self.MSscriptletClassID in controlText: foundScripttlet = True print activeXcontrol + ' is a MS-Scriptlet!' activeXBinFileName = activeXcontrol[:-3] activeXBinFileName += 'bin' activeXContainers.append(activeXBinFileName) if controlTag in controlText: codeOrigin = '' tagStart = controlText.index(controlTag) iterator = tagStart + len(controlTag) while controlText[iterator] != '"': codeOrigin = codeOrigin + controlText[iterator] iterator = iterator + 1 print codeOrigin currentControl.close() else: binaryClassID = '\xAE\xFD\x24\xAE\xC6\x03\xD1\x11\x8B\x76\x00\x80\xC7\x44\xF3\x89' assert OleFileIO_PL.isOleFile(self.fileName) ole = OleFileIO_PL.OleFileIO(self.fileName) if self.docType == '/word': wordDocStream = ole.openstream('WordDocument') wordDocBuffer = wordDocStream.read() if 'CONTROL ScriptBridge.ScriptBridge' in wordDocBuffer: print 'use of MS Scriptlet detected' listOCXContents = [] listOLEPaths = ole.listdir() #find all OCXNAME streams in the word file for path in listOLEPaths: if path[len(path)-1] == '\x03OCXDATA': listOCXContents.append('/'.join(x for x in path)) for content in listOCXContents: OCXStream = ole.openstream(content) contentBuffer = OCXStream.read() if contentBuffer[0:16] == binaryClassID: foundScripttlet = True pathToSourceFile = '' pathLength = littleEndian.readInt(contentBuffer, 18) for character in range(22, 22+(pathLength*2), 2): pathToSourceFile = pathToSourceFile + contentBuffer[character] print 'path to source file: ', pathToSourceFile OCXStream.close() elif self.docType == '/xl': excel_structures = imp.load_source('excel_structures', 'modules/OLE_parsing/excel_structures.py') ws = excel_structures.workbook(ole) foundScripttlet = ws.findScriptlets() elif self.docType == '/ppt': ppt_structures = imp.load_source('ppt_structures', 'modules/OLE_parsing/ppt_structures.py') ppt_document_stream = ole.openstream('PowerPoint Document').read() current_user_stream = ole.openstream('Current User').read() ppt_scriptlet = ppt_structures.ppt_container(ppt_document_stream) foundScripttlet = ppt_structures.findScriptlets(ppt_scriptlet) if foundScripttlet: folderName = os.path.abspath(self.fileName.split('.')[0]) if not os.path.exists(folderName): os.makedirs(folderName) externalOleObjectStorages = ppt_structures.findExternalOleObjectStorageLocation(current_user_stream, ppt_document_stream) if not externalOleObjectStorages: return decompressedStorageFiles = ppt_structures.decompressExternalOleObjectStorage(folderName, ppt_document_stream, externalOleObjectStorages) for oleStorageFile in decompressedStorageFiles: currentStorage = OleFileIO_PL.OleFileIO(oleStorageFile) if not currentStorage.exists('\x03OCXDATA'): continue OCXStream = currentStorage.openstream('\x03OCXDATA') contentBuffer = OCXStream.read() if contentBuffer[0:16] == binaryClassID: foundScripttlet = True pathToSourceFile = '' pathLength = littleEndian.readInt(contentBuffer, 18) for character in range(22, 22+(pathLength*2), 2): pathToSourceFile = pathToSourceFile + contentBuffer[character] print 'path to source file: ', pathToSourceFile OCXStream.close() currentStorage.close() if not foundScripttlet: print 'no Javascript/Scriptlett detected'
def __init__(self, binary): self.ft = littleEndian.readShort(binary, 0) self.cb = littleEndian.readShort(binary, 2) self.guid = binary[4:20] self.fSharedNote = littleEndian.readShort(binary, 20) self.unused = littleEndian.readInt(binary, 22)