Exemplo n.º 1
 def setUp(self):
     """Called before tests; populates self.oleids"""
     self.oleids = []
     for filename, file_contents in loop_over_files():
         curr_id = oleid.OleID(filename=filename, data=file_contents)
         value_dict = dict((ind.id, ind.value) for ind in curr_id.check())
         self.oleids.append((filename, value_dict))
Exemplo n.º 2
    def test_encrypted_document_detection(self):
        """ Run oleid and check if the document is flagged as encrypted """
        filename = join(DATA_BASE_DIR, 'basic/encrypted.docx')

        oleid_instance = oleid.OleID(filename)
        indicators = oleid_instance.check()

        is_encrypted = next(i.value for i in indicators if i.id == 'encrypted')

        self.assertEqual(is_encrypted, True)
Exemplo n.º 3
def process_file(filepath, field_filter_mode=None):
    """ decides which of the process_* functions to call """
    if olefile.isOleFile(filepath):
        logger.debug('Is OLE. Checking streams to see whether this is xls')
        if xls_parser.is_xls(filepath):
            logger.debug('Process file as excel 2003 (xls)')
            return process_xls(filepath)

        # encrypted files also look like ole, even if office 2007+ (xml-based)
        # so check for encryption, first
        ole = olefile.OleFileIO(filepath, path_encoding=None)
        oid = oleid.OleID(ole)
        if oid.check_encrypted().value:
            log.debug('is encrypted - raise error')
            raise FileIsEncryptedError(filepath)
        elif oid.check_powerpoint().value:
            log.debug('is ppt - cannot have DDE')
            return u''
            logger.debug('Process file as word 2003 (doc)')
            return process_doc(ole)

    with open(filepath, 'rb') as file_handle:
        if file_handle.read(4) == RTF_START:
            logger.debug('Process file as rtf')
            return process_rtf(file_handle, field_filter_mode)

        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    elif doctype is None:
        logger.debug('Process file as csv')
        return process_csv(filepath)
    else:  # could be docx; if not: this is the old default code path
        logger.debug('Process file as word 2007+ (docx)')
        return process_docx(filepath, field_filter_mode)
Exemplo n.º 4
    def oleid(self, args, file, opts):
            oid = oleid.OleID(file.file_path)
        except Exception:
            raise error.CommandWarning('file ' + str(file.file_path) + ' is not a valid ole file')

        indicators = oid.check()
        output = []
        for i in indicators:
            output += [{
                'name': str(i.name),
                'value': str(i.value.decode('utf-8')) if isinstance(i.value, bytes) else str(i.value),
                'description': str(i.description)
        return output
Exemplo n.º 5
 def open(self, filename, *args, **kwargs):
     """Call OleFileIO.open, raise error if is encrypted."""
     #super(OleRecordFile, self).open(filename, *args, **kwargs)
     OleFileIO.open(self, filename, *args, **kwargs)
     self.is_encrypted = oleid.OleID(self).check_encrypted().value
Exemplo n.º 6
    def test_all(self):
        """Run all file in test-data through oleid and compare to known ouput"""
        # this relies on order of indicators being constant, could relax that
        # Also requires that files have the correct suffixes (no rtf in doc)
        NON_OLE_SUFFIXES = ('.xml', '.csv', '.rtf', '', '.odt', '.ods', '.odp')
        NON_OLE_VALUES = (False, )
        WORD = b'Microsoft Office Word'
        PPT = b'Microsoft Office PowerPoint'
        EXCEL = b'Microsoft Excel'
        CRYPT = (True, False, 'unknown', True, False, False, False, False,
                 False, False, 0)
        OLE_VALUES = {
            (True, True, WORD, False, True, False, False, False, False, True,
            'oleobj/embedded-simple-2007.xlsb': (False, ),
            'oleobj/embedded-simple-2007.docm': (False, ),
            'oleobj/embedded-simple-2007.xltx': (False, ),
            'oleobj/embedded-simple-2007.xlam': (False, ),
            'oleobj/embedded-simple-2007.dotm': (False, ),
            (True, True, PPT, False, False, False, False, True, False, False,
            'oleobj/embedded-simple-2007.xlsx': (False, ),
            'oleobj/embedded-simple-2007.xlsm': (False, ),
            'oleobj/embedded-simple-2007.ppsx': (False, ),
            (True, True, PPT, False, False, False, False, True, False, False,
            (True, True, EXCEL, False, False, False, True, False, False, False,
            (True, True, WORD, False, True, False, False, False, False, True,
            'oleobj/embedded-unicode-2007.docx': (False, ),
            'oleobj/embedded-unicode.doc': (True, True, WORD, False, True,
                                            False, False, False, False, True,
            'oleobj/embedded-simple-2007.doc': (True, True, WORD, False, True,
                                                False, False, False, False,
                                                True, 0),
            'oleobj/embedded-simple-2007.xls': (True, True, EXCEL, False,
                                                False, False, True, False,
                                                False, False, 0),
            'oleobj/embedded-simple-2007.dot': (True, True, WORD, False, True,
                                                False, False, False, False,
                                                True, 0),
            'oleobj/sample_with_lnk_to_calc.doc': (True, True, WORD, False,
                                                   True, False, False, False,
                                                   False, True, 0),
            'oleobj/embedded-simple-2007.ppt': (True, True, PPT, False, False,
                                                False, False, True, False,
                                                False, 0),
            'oleobj/sample_with_lnk_file.pps': (True, True, PPT, False, False,
                                                False, False, True, False,
                                                False, 0),
            'oleobj/embedded-simple-2007.pptx': (False, ),
            'oleobj/embedded-simple-2007.ppsm': (False, ),
            'oleobj/embedded-simple-2007.dotx': (False, ),
            'oleobj/embedded-simple-2007.pptm': (False, ),
            'oleobj/embedded-simple-2007.xlt': (True, True, EXCEL, False,
                                                False, False, True, False,
                                                False, False, 0),
            'oleobj/embedded-simple-2007.docx': (False, ),
            'oleobj/embedded-simple-2007.potx': (False, ),
            'oleobj/embedded-simple-2007.pot': (True, True, PPT, False, False,
                                                False, False, True, False,
                                                False, 0),
            'oleobj/embedded-simple-2007.xltm': (False, ),
            'oleobj/embedded-simple-2007.potm': (False, ),
            'encrypted/encrypted.xls': (True, True, EXCEL, True, False, False,
                                        True, False, False, False, 0),
            'encrypted/encrypted.ppt': (True, False, 'unknown', True, False,
                                        False, False, True, False, False, 0),
            'encrypted/encrypted.doc': (True, True, WORD, True, True, False,
                                        False, False, False, False, 0),
            'msodde/harmless-clean.docm': (False, ),
            'msodde/dde-in-csv.csv': (False, ),
            (True, True, WORD, False, True, False, False, False, False, False,
            'msodde/harmless-clean.doc': (True, True, WORD, False, True, False,
                                          False, False, False, False, 0),
            'msodde/dde-test.docm': (False, ),
            'msodde/dde-test.xlsb': (False, ),
            'msodde/dde-test.xlsm': (False, ),
            'msodde/dde-test.docx': (False, ),
            'msodde/dde-test.xlsx': (False, ),
            'msodde/dde-test-from-office2003.doc': (True, True, WORD, False,
                                                    True, False, False, False,
                                                    False, False, 0),
            'msodde/dde-test-from-office2016.doc': (True, True, WORD, False,
                                                    True, False, False, False,
                                                    False, False, 0),
            'msodde/harmless-clean.docx': (False, ),
            'oleform/oleform-PR314.docm': (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (False, ),
            (True, False, 'unknown', True, False, False, False, False, False,
             False, 0),
            (True, True, EXCEL, True, False, True, True, False, False, False,
            (True, False, 'unknown', True, False, False, False, False, False,
             False, 0),
            (True, False, 'unknown', True, False, False, False, False, False,
             False, 0),
            (True, False, 'unknown', True, False, False, False, False, False,
             False, 0),
            (True, True, EXCEL, True, False, False, True, False, False, False,
            (True, False, 'unknown', True, False, False, False, False, False,
             False, 0),

        indicator_names = []
        for base_dir, _, files in os.walk(DATA_BASE_DIR):
            for filename in files:
                full_path = join(base_dir, filename)
                name = relpath(full_path, DATA_BASE_DIR)
                values = tuple(indicator.value
                               for indicator in oleid.OleID(full_path).check())
                if len(indicator_names) < 2:  # not initialized with ole yet
                    indicator_names = tuple(
                        for indicator in oleid.OleID(full_path).check())
                suffix = splitext(filename)[1]
                if suffix in NON_OLE_SUFFIXES:
                                     msg='For non-ole file {} expected {}, '
                                     'not {}'.format(name, NON_OLE_VALUES,
                        msg='Wrong detail values for {}:\n'
                        '  Names  {}\n  Found  {}\n  Expect {}'.format(
                            name, indicator_names, values, OLE_VALUES[name]))
                except KeyError:
                    print('Should add oleid output for {} to {} ({})'.format(
                        name, __name__, values))