def setUp(self): """Called before tests; populates self.oleids""" self.oleids = [] for filename, file_contents in loop_over_files(): curr_id = oleid.OleID(filename=filename, data=file_contents) value_dict = dict((ind.id, ind.value) for ind in curr_id.check()) self.oleids.append((filename, value_dict))
def test_encrypted_document_detection(self): """ Run oleid and check if the document is flagged as encrypted """ filename = join(DATA_BASE_DIR, 'basic/encrypted.docx') oleid_instance = oleid.OleID(filename) indicators = oleid_instance.check() is_encrypted = next(i.value for i in indicators if i.id == 'encrypted') self.assertEqual(is_encrypted, True)
def process_file(filepath, field_filter_mode=None): """ decides which of the process_* functions to call """ if olefile.isOleFile(filepath): logger.debug('Is OLE. Checking streams to see whether this is xls') if xls_parser.is_xls(filepath): logger.debug('Process file as excel 2003 (xls)') return process_xls(filepath) # encrypted files also look like ole, even if office 2007+ (xml-based) # so check for encryption, first ole = olefile.OleFileIO(filepath, path_encoding=None) oid = oleid.OleID(ole) if oid.check_encrypted().value: log.debug('is encrypted - raise error') raise FileIsEncryptedError(filepath) elif oid.check_powerpoint().value: log.debug('is ppt - cannot have DDE') return u'' else: logger.debug('Process file as word 2003 (doc)') return process_doc(ole) with open(filepath, 'rb') as file_handle: if file_handle.read(4) == RTF_START: logger.debug('Process file as rtf') return process_rtf(file_handle, field_filter_mode) try: doctype = ooxml.get_type(filepath) logger.debug('Detected file type: {0}'.format(doctype)) except Exception as exc: logger.debug('Exception trying to xml-parse file: {0}'.format(exc)) doctype = None if doctype == ooxml.DOCTYPE_EXCEL: logger.debug('Process file as excel 2007+ (xlsx)') return process_xlsx(filepath) elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003): logger.debug('Process file as xml from excel 2003/2007+') return process_excel_xml(filepath) elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003): logger.debug('Process file as xml from word 2003/2007+') return process_docx(filepath) elif doctype is None: logger.debug('Process file as csv') return process_csv(filepath) else: # could be docx; if not: this is the old default code path logger.debug('Process file as word 2007+ (docx)') return process_docx(filepath, field_filter_mode)
def oleid(self, args, file, opts): try: oid = oleid.OleID(file.file_path) except Exception: raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file') indicators = oid.check() output = [] for i in indicators: output += [{ 'name': str(i.name), 'value': str(i.value.decode('utf-8')) if isinstance(i.value, bytes) else str(i.value), 'description': str(i.description) }] return output
def open(self, filename, *args, **kwargs): """Call OleFileIO.open, raise error if is encrypted.""" #super(OleRecordFile, self).open(filename, *args, **kwargs) OleFileIO.open(self, filename, *args, **kwargs) self.is_encrypted = oleid.OleID(self).check_encrypted().value
def test_all(self): """Run all file in test-data through oleid and compare to known ouput""" # this relies on order of indicators being constant, could relax that # Also requires that files have the correct suffixes (no rtf in doc) NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp') NON_OLE_VALUES = (False, ) WORD = b'Microsoft Office Word' PPT = b'Microsoft Office PowerPoint' EXCEL = b'Microsoft Excel' CRYPT = (True, False, 'unknown', True, False, False, False, False, False, False, 0) OLE_VALUES = { 'oleobj/sample_with_lnk_file.doc': (True, True, WORD, False, True, False, False, False, False, True, 0), 'oleobj/embedded-simple-2007.xlsb': (False, ), 'oleobj/embedded-simple-2007.docm': (False, ), 'oleobj/embedded-simple-2007.xltx': (False, ), 'oleobj/embedded-simple-2007.xlam': (False, ), 'oleobj/embedded-simple-2007.dotm': (False, ), 'oleobj/sample_with_lnk_file.ppt': (True, True, PPT, False, False, False, False, True, False, False, 0), 'oleobj/embedded-simple-2007.xlsx': (False, ), 'oleobj/embedded-simple-2007.xlsm': (False, ), 'oleobj/embedded-simple-2007.ppsx': (False, ), 'oleobj/embedded-simple-2007.pps': (True, True, PPT, False, False, False, False, True, False, False, 0), 'oleobj/embedded-simple-2007.xla': (True, True, EXCEL, False, False, False, True, False, False, False, 0), 'oleobj/sample_with_calc_embedded.doc': (True, True, WORD, False, True, False, False, False, False, True, 0), 'oleobj/embedded-unicode-2007.docx': (False, ), 'oleobj/embedded-unicode.doc': (True, True, WORD, False, True, False, False, False, False, True, 0), 'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True, False, False, False, False, True, 0), 'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False, False, False, True, False, False, False, 0), 'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True, False, False, False, False, True, 0), 'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False, True, False, False, False, False, True, 0), 'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False, False, False, True, False, False, 0), 'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False, False, False, True, False, False, 0), 'oleobj/embedded-simple-2007.pptx': (False, ), 'oleobj/embedded-simple-2007.ppsm': (False, ), 'oleobj/embedded-simple-2007.dotx': (False, ), 'oleobj/embedded-simple-2007.pptm': (False, ), 'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False, False, False, True, False, False, False, 0), 'oleobj/embedded-simple-2007.docx': (False, ), 'oleobj/embedded-simple-2007.potx': (False, ), 'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False, False, False, True, False, False, 0), 'oleobj/embedded-simple-2007.xltm': (False, ), 'oleobj/embedded-simple-2007.potm': (False, ), 'encrypted/encrypted.xlsx': CRYPT, 'encrypted/encrypted.docm': CRYPT, 'encrypted/encrypted.docx': CRYPT, 'encrypted/encrypted.pptm': CRYPT, 'encrypted/encrypted.xlsb': CRYPT, 'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False, True, False, False, False, 0), 'encrypted/encrypted.ppt': (True, False, 'unknown', True, False, False, False, True, False, False, 0), 'encrypted/encrypted.pptx': CRYPT, 'encrypted/encrypted.xlsm': CRYPT, 'encrypted/encrypted.doc': (True, True, WORD, True, True, False, False, False, False, False, 0), 'msodde/harmless-clean.docm': (False, ), 'msodde/dde-in-csv.csv': (False, ), 'msodde/dde-test-from-office2013-utf_16le-korean.doc': (True, True, WORD, False, True, False, False, False, False, False, 0), 'msodde/harmless-clean.doc': (True, True, WORD, False, True, False, False, False, False, False, 0), 'msodde/dde-test.docm': (False, ), 'msodde/dde-test.xlsb': (False, ), 'msodde/dde-test.xlsm': (False, ), 'msodde/dde-test.docx': (False, ), 'msodde/dde-test.xlsx': (False, ), 'msodde/dde-test-from-office2003.doc': (True, True, WORD, False, True, False, False, False, False, False, 0), 'msodde/dde-test-from-office2016.doc': (True, True, WORD, False, True, False, False, False, False, False, 0), 'msodde/harmless-clean.docx': (False, ), 'oleform/oleform-PR314.docm': (False, ), 'basic/encrypted.docx': CRYPT, 'oleobj/external_link/sample_with_external_link_to_doc.docx': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.xlsb': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.dotm': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.xlsm': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.pptx': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.dotx': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.docm': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.potm': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.xlsx': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.potx': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.ppsm': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.pptm': (False, ), 'oleobj/external_link/sample_with_external_link_to_doc.ppsx': (False, ), 'encrypted/autostart-encrypt-standardpassword.xlsm': (True, False, 'unknown', True, False, False, False, False, False, False, 0), 'encrypted/autostart-encrypt-standardpassword.xls': (True, True, EXCEL, True, False, True, True, False, False, False, 0), 'encrypted/dde-test-encrypt-standardpassword.xlsx': (True, False, 'unknown', True, False, False, False, False, False, False, 0), 'encrypted/dde-test-encrypt-standardpassword.xlsm': (True, False, 'unknown', True, False, False, False, False, False, False, 0), 'encrypted/autostart-encrypt-standardpassword.xlsb': (True, False, 'unknown', True, False, False, False, False, False, False, 0), 'encrypted/dde-test-encrypt-standardpassword.xls': (True, True, EXCEL, True, False, False, True, False, False, False, 0), 'encrypted/dde-test-encrypt-standardpassword.xlsb': (True, False, 'unknown', True, False, False, False, False, False, False, 0), } indicator_names = [] for base_dir, _, files in os.walk(DATA_BASE_DIR): for filename in files: full_path = join(base_dir, filename) name = relpath(full_path, DATA_BASE_DIR) values = tuple(indicator.value for indicator in oleid.OleID(full_path).check()) if len(indicator_names) < 2: # not initialized with ole yet indicator_names = tuple( indicator.name for indicator in oleid.OleID(full_path).check()) suffix = splitext(filename)[1] if suffix in NON_OLE_SUFFIXES: self.assertEqual(values, NON_OLE_VALUES, msg='For non-ole file {} expected {}, ' 'not {}'.format(name, NON_OLE_VALUES, values)) continue try: self.assertEqual( values, OLE_VALUES[name], msg='Wrong detail values for {}:\n' ' Names {}\n Found {}\n Expect {}'.format( name, indicator_names, values, OLE_VALUES[name])) except KeyError: print('Should add oleid output for {} to {} ({})'.format( name, __name__, values))