Пример #1
0
 def check_activeX_ole_contents_swf(self, unzip_dir, office_type=""):
     """
     Condition:
         activeX & SWF
     :param unzip_dir:
     :return:
     """
     # Precondition
     if office_type == 'ppt':
         return False
     ret = False
     bin_docfile = b"\xD0\xCF\x11\xE0"
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             file_path = os.path.join(root, filename)
             if bool(re.match('activeX\d{1,2}.bin', filename)):
                 if filename not in self.activeX_bin.keys():
                     with open(file_path, "r+b") as f:
                         self.activeX_bin[filename] = f.read()
                 if self.activeX_bin[filename][:4] == bin_docfile:
                     ole_ = olefile.OleFileIO(self.activeX_bin[filename])
                     for stream in ole_.listdir():
                         if stream[-1] == "Contents":
                             content = ole_.openstream(stream).read()
                             if content[8:11] == b'FWS':
                                 ret = True
                                 break
     return ret
 def check_equation_editor_harmful_face2(self, unzip_dir, office_type=""):
     ret = False
     bin_docfile = b"\xD0\xCF\x11\xE0"
     bin_eqn_clsid = b"\x02\xCE\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             if bool(re.match('oleObject\d{1,2}.bin', filename)):
                 if filename not in self.oleObject_bin.keys():
                     filepath = os.path.join(root, filename)
                     with open(filepath, "r+b") as f:
                         self.oleObject_bin[filename] = f.read()
                 if self.oleObject_bin[filename][:4] == bin_docfile:
                     if re.search(bin_eqn_clsid,
                                  self.oleObject_bin[filename]) is not None:
                         ole_ = olefile.OleFileIO(
                             self.oleObject_bin[filename])
                         for stream in ole_.listdir():
                             if stream[-1].lower(
                             ) == "\x01ole10native" or stream[-1].lower(
                             ) == 'equation native':
                                 try:
                                     content = ole_.openstream(stream).read(
                                         4)
                                     if content != b'\x1C\x00\x00\x00':
                                         ret = True
                                         break
                                 except IndexError as indErr:
                                     logging.warning(
                                         "check_equation_editor_harmful_face: {indErr}"
                                         .format(indErr=indErr))
                                     logging.warning(
                                         "[filename]: {unzip_dir}".format(
                                             unzip_dir=unzip_dir))
     return ret
 def check_equation_editor_harmful_face(self, unzip_dir, office_type=""):
     # Precondition
     if office_type == 'ppt':
         return False
     ret = False
     bin_docfile = b"\xD0\xCF\x11\xE0"
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             if bool(re.match('oleObject\d{1,2}.bin', filename)):
                 if filename not in self.oleObject_bin.keys():
                     filepath = os.path.join(root, filename)
                     with open(filepath, "r+b") as f:
                         self.oleObject_bin[filename] = f.read()
                 if self.oleObject_bin[filename][:4] == bin_docfile:
                     ole_ = olefile.OleFileIO(self.oleObject_bin[filename])
                     for stream in ole_.listdir():
                         if stream[-1].lower() == 'equation native':
                             try:
                                 if ole_.openstream(
                                         stream).read()[0x23] == 8:
                                     ret = True
                                     break
                             except IndexError as indErr:
                                 logging.warning(
                                     "check_equation_editor_harmful_face: {indErr}"
                                     .format(indErr=indErr))
                                 logging.warning(
                                     "[filename]: {unzip_dir}".format(
                                         unzip_dir=unzip_dir))
     return ret
Пример #4
0
def extract_office_docs(filename, password_list, output_folder):
    """
    Exceptions:
     - ValueError: Document is an unsupported format.
     - PasswordError: Document is a supported format, but the password is unknown.
     - ExtractionError: Document is encrypted but not in a supported format.

    :param filename: Name of the potential docx file
    :param password_list: a list of password strings, ascii or unicode
    :param output_folder: a path to a directory where we can write to
    :return: The filename we wrote. Else, an exception is thrown.
    """
    if not olefile.isOleFile(filename):
        raise ValueError("Not OLE")

    try:
        of = olefile.OleFileIO(filename)
    except IOError:
        raise ValueError("Corrupted OLE Document")
    password = None
    new_office = False
    # Checks parameters "EncryptionInfo" and "EncryptedPackage" in OLE file, which indicates the office file version
    if of.exists("EncryptionInfo") and of.exists("EncryptedPackage"):
        new_office = True
        metadata = parse_enc_info(of.openstream("EncryptionInfo"))
        # From the provided passwords, check the password and break if it's correct
        for pass_try in password_list:
            if check_password(pass_try, metadata) is True:
                password = pass_try
                break

    file = msoffcrypto.OfficeFile(open(filename, "rb"))
    if not new_office and not password:
        # re: older versions, such as xls, doc, ppt
        for pass_try in password_list:
            try:
                # use the provided password, if correct, break.
                file.load_key(password=pass_try)
                password = pass_try
                break
            except Exception as e:
                e_repr = repr(e)
                if "Failed to verify password" in e_repr:
                    continue
                else:
                    raise
    else:
        # use the provided password
        file.load_key(password=password)
    if password is None:
        raise PasswordError("Could not find correct password")
    tf = tempfile.NamedTemporaryFile(dir=output_folder, delete=False)
    name = tf.name
    file.decrypt(open(name, "wb"))
    tf.close()
    return name, password
Пример #5
0
def read_ole(downloader, datasetinfo, **kwargs):
    url = get_url(datasetinfo['url'], **kwargs)
    with temp_dir('ole') as folder:
        path = downloader.download_file(url, folder, 'olefile')
        ole = olefile.OleFileIO(path)
        data = ole.openstream('Workbook').getvalue()
        outputfile = join(folder, 'excel_file.xls')
        with open(outputfile, 'wb') as f:
            f.write(data)
        datasetinfo['url'] = outputfile
        datasetinfo['format'] = 'xls'
        return read_tabular(downloader, datasetinfo, **kwargs)
Пример #6
0
def getOLEHeaderInfo(filename):
    retval = {}
    try:
        ole = olefile.OleFileIO(filename)
        meta = ole.get_metadata()
        ole.close()
        retval['TimeStamp'] = meta.last_saved_time.strftime(
            '%Y-%m-%d %H:%M:%S')
        retval['Author'] = meta.author
        retval['Title'] = meta.title
        return retval

    except AttributeError:
        print(pefile.PEFormatError.message)
        return None
    except:
        return None
Пример #7
0
def extract_docx(filename, password_list, output_folder):
    """
    Exceptions:
     - ValueError: Document is an unsupported format.
     - PasswordError: Document is a supported format, but the password is unknown.
     - ExtractionError: Document is encrypted but not in a supported format.

    :param filename: Name of the potential docx file
    :param password_list: a list of password strings, ascii or unicode
    :param output_folder: a path to a directory where we can write to
    :return: The filename we wrote. Else, an exception is thrown.
    """
    if not olefile.isOleFile(filename):
        raise ValueError("Not OLE")

    try:
        of = olefile.OleFileIO(filename)
    except IOError:
        raise ValueError("Corrupted OLE Document")

    if of.exists("WordDocument"):
        # Cannot parse these files yet
        raise ValueError("Legacy Word Document")

    elif of.exists("EncryptionInfo") and of.exists("EncryptedPackage"):
        metadata = parse_enc_info(of.openstream("EncryptionInfo"))

        password = None
        for pass_try in password_list:
            if check_password(pass_try, metadata) is True:
                password = pass_try
                break

        if password is None:
            raise PasswordError("Could not find correct password")

        tf = tempfile.NamedTemporaryFile(dir=output_folder, suffix=".docx", delete=False)
        decode_stream(password, metadata, of.openstream("EncryptedPackage"), tf)
        name = tf.name
        tf.close()
        return name, password
    else:
        raise ValueError("Not encrypted")
 def check_ole_stream_malicious_executable_data(self,
                                                unzip_dir,
                                                office_type=""):
     ret = False
     bin_docfile = b"\xD0\xCF\x11\xE0"
     for (root, _, files) in os.walk(unzip_dir):
         # print(root, files)
         for filename in files:
             if bool(re.match('oleObject\d{1,2}.bin', filename)):
                 if filename not in self.oleObject_bin.keys():
                     filepath = os.path.join(root, filename)
                     with open(filepath, "r+b") as f:
                         self.oleObject_bin[filename] = f.read()
                 if self.oleObject_bin[filename][:4] == bin_docfile:
                     ole_ = olefile.OleFileIO(self.oleObject_bin[filename])
                     for stream in ole_.listdir():
                         if stream[-1] == "\x01Ole10Native":
                             try:
                                 content = ole_.openstream(stream).read()
                                 stream = oleobj.OleNativeStream(content)
                                 if os.path.splitext(
                                         stream.src_path)[1].lower(
                                         ) in self.susp_ext:
                                     ret = True
                                     break
                             except IndexError as indErr:
                                 logging.warning(
                                     "get_ole_stream_malicious_executable_data: {indErr}"
                                     .format(indErr=indErr))
                                 logging.warning(
                                     "[filename]: {unzip_dir}".format(
                                         unzip_dir=unzip_dir))
                             except struct.error as structErr:
                                 logging.warning(
                                     "get_ole_stream_malicious_executable_data: {structErr}"
                                     .format(structErr=structErr))
                                 logging.warning(
                                     "[filename]: {unzip_dir}".format(
                                         unzip_dir=unzip_dir))
     return ret
 def check_ole_swf_exploitable_data(self, unzip_dir, office_type=""):
     # Precondition
     if office_type == 'ppt':
         return False
     ret = False
     bin_docfile = b"\xD0\xCF\x11\xE0"
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             if bool(re.match('oleObject\d{1,2}.bin', filename)):
                 if filename not in self.oleObject_bin.keys():
                     filepath = os.path.join(root, filename)
                     with open(filepath, "r+b") as f:
                         self.oleObject_bin[filename] = f.read()
                 if self.oleObject_bin[filename][:4] == bin_docfile:
                     ole_ = olefile.OleFileIO(self.oleObject_bin[filename])
                     for stream in ole_.listdir():
                         if stream[-1] == "\x01Ole10Native":
                             try:
                                 content = ole_.openstream(stream).read()
                                 stream = oleobj.OleNativeStream(content)
                                 if stream.data is not None and stream.data[
                                         0:3] == b'FWS' and os.path.splitext(
                                             stream.filename)[1] == ".swf":
                                     ret = True
                                     break
                             except IndexError as indErr:
                                 logging.warning(
                                     "get_ole_swf_exploitable_data: {indErr}"
                                     .format(indErr=indErr))
                                 logging.warning(
                                     "[filename]: {unzip_dir}".format(
                                         unzip_dir=unzip_dir))
                             except struct.error as structErr:
                                 logging.warning(
                                     "get_ole_swf_exploitable_data: {structErr}"
                                     .format(structErr=structErr))
                                 logging.warning(
                                     "[filename]: {unzip_dir}".format(
                                         unzip_dir=unzip_dir))
     return ret
 def check_ole_settingcontent_ms(self, unzip_dir, office_type=""):
     # Precondition
     if office_type != 'word':
         return False
     ret = False
     bin_docfile = b"\xD0\xCF\x11\xE0"
     for (root, _, files) in os.walk(unzip_dir):
         for filename in files:
             filepath = os.path.join(root, filename)
             if bool(re.match('oleObject\d{1,2}.bin', filename)):
                 if filename not in self.oleObject_bin.keys():
                     filepath = os.path.join(root, filename)
                     with open(filepath, "r+b") as f:
                         self.oleObject_bin[filename] = f.read()
                 if self.oleObject_bin[filename][:4] == bin_docfile:
                     ole_ = olefile.OleFileIO(filepath)
                     for stream in ole_.listdir():
                         if stream[-1] == "\x01Ole10Native":
                             try:
                                 content = ole_.openstream(stream).read()
                                 stream = oleobj.OleNativeStream(content)
                                 if stream.data is not None and b'{12B1697E-D3A0-4DBC-B568-CCF64A3F934D}' in stream.data:  # settingcontent-ms
                                     ret = True
                                     break
                             except IndexError as indErr:
                                 logging.warning(
                                     "check_ole_settingcontent_ms: {indErr}"
                                     .format(indErr=indErr))
                                 logging.warning(
                                     "[filename]: {filepath}".format(
                                         filepath=filepath))
                             except struct.error as structErr:
                                 logging.warning(
                                     "check_ole_settingcontent_ms: {structErr}"
                                     .format(structErr=structErr))
                                 logging.warning(
                                     "[filename]: {filepath}".format(
                                         filepath=filepath))
     return ret
Пример #11
0
def read_ole(downloader, datasetinfo, **kwargs):
    # type: (Download, Dict, Any) -> Tuple[List[str],Iterator[Union[List,Dict]]]
    """Read data from OLE Excel source

    Args:
        downloader (Download): Download object for downloading files
        datasetinfo (Dict): Dictionary of information about dataset
        **kwargs: Variables to use when evaluating template arguments

    Returns:
        Tuple[List[str],Iterator[Union[List,Dict]]]: Tuple (headers, iterator where each row is a list or dictionary)
    """
    url = get_url(datasetinfo['url'], **kwargs)
    with temp_dir('ole') as folder:
        path = downloader.download_file(url, folder, 'olefile')
        ole = olefile.OleFileIO(path)
        data = ole.openstream('Workbook').getvalue()
        outputfile = join(folder, 'excel_file.xls')
        with open(outputfile, 'wb') as f:
            f.write(data)
        datasetinfo['url'] = outputfile
        datasetinfo['format'] = 'xls'
        return read_tabular(downloader, datasetinfo, **kwargs)
Пример #12
0
def getMXDVersion(filename):
    ofile = olefile.OleFileIO(filename)
    stream = ofile.openstream('Version')
    data = stream.read().decode('utf-16')
    version = data.split('\x00')[1]
    return version