Пример #1
0
def checkVersion(file):
    vrs = None
    filename = os.path.abspath(file)
    ole = OleFileIO(filename)
    elements = ole.listdir(streams=True, storages=False)
    for e in elements:
        if (e[-1] == 'RSeDb'):
            data = ole.openstream(e).read()
            version, i = getVersionInfo(data, 20)
            if (version.major >= 14):
                setDumpFolder(file)
                return ole
            break

    if (version):
        vrsName = version.major
        if (version.major >= 11): vrsName += 1996
        QMessageBox.critical(
            FreeCAD.ActiveDocument, 'FreeCAD: Inventor workbench...',
            'Can\'t load file created with Inventor v%d' % (vrsName))
        logError('Can\'t load file created with Inventor v%d' % (vrsName))
    else:
        QMessageBox.critical(
            FreeCAD.ActiveDocument, 'FreeCAD: Inventor workbench...',
            'Can\'t determine Inventor version file was created with')
        logError('Can\'t determine Inventor version file was created with!')
    return None
Пример #2
0
    def __init__(self, filename):

        self.OleFile = OleFileIO(filename)

        # Components
        self.Components = self.parseComponents(
            self.readStream("Components6/Data"))
        manifest = getU32(self.readStream("Components6/Header"))
        counted = len(self.Components)
        if manifest != counted:
            print "Warning: Header disagrees about component count, says there are " + str(
                manifest) + ", but we counted " + str(counted) + "."
Пример #3
0
 def get_streams(self,dump) -> (list,list):
     '''
     get streams
     '''
     _Listobjects = []
     _List = []
     ole = OleFileIO(dump)
     listdir = ole.listdir()
     for direntry in listdir:
         dirs = re.sub(r'[^\x20-\x7f]',r'', " : ".join(direntry))
         tempdecoded = sub(br'[^\x20-\x7F]+',b'', ole.openstream(direntry).getvalue())
         _Listobjects.append(tempdecoded)
         _List.append({"Name":dirs,"Parsed":tempdecoded.decode("utf-8",errors="ignore")})
     return _List,_Listobjects
Пример #4
0
def ReadFile(doc, readProperties):
    first = 0
    list = {}
    counters = {}

    # LOG.LOG_FILTER = LOG.LOG_FILTER | LOG.LOG_DEBUG

    if (isOleFile(getInventorFile())):
        ole = OleFileIO(getInventorFile())
        setFileVersion(ole)
        elements = ole.listdir(streams=True, storages=False)

        folder = getInventorFile()[0:-4]
        if not os.path.exists(folder):
            os.makedirs(folder)

        counter = 1
        list = []
        for fname in elements:
            if (len(fname) == 1):
                list.append(fname)
            else:
                #Ensure that RSe* files will be parsed first
                if (fname[-1].startswith('RSe')):
                    #ensure RSeDb is the very first "file" to be parsed
                    list.insert(first, fname)
                    if (fname[-1] == 'RSeDb'):
                        first += 1
                elif (not fname[-1].startswith('B')):
                    list.append(fname)

        for fname in list:
            ReadElement(ole, fname, doc, counter, readProperties)
            counter += 1
        ole.close()

        now = datetime.datetime.now()
        if (len(doc.Comment) > 0):
            doc.Comment += '\n'
        doc.Comment = '# %s: read from %s' % (
            now.strftime('%Y-%m-%d %H:%M:%S'), getInventorFile())

        logMessage("Dumped data to folder: '%s'" % (getInventorFile()[0:-4]),
                   LOG.LOG_INFO)

        return True
    logError("Error - '%s' is not a valid Autodesk Inventor file." % (infile))
    return False
Пример #5
0
 def extract_ole_metadata(self, file_path):
     with open(file_path, 'r') as fh:
         if not isOleFile(fh):
             return
         fh.seek(0)
         ole = OleFileIO(fh)
         self.extract_olefileio_metadata(ole)
def setInventorFile(file):
    global _inventor_file
    global _dump_folder

    _inventor_file = os.path.abspath(file)
    setDumpFolder(_inventor_file)
    return OleFileIO(file)
Пример #7
0
    def __init__(self, msg_file_path):
        self.msg_file_path = msg_file_path
        self.include_attachment_data = False

        if not self.is_valid_msg_file():
            raise Exception(
                "Invalid file provided, please provide valid Microsoft’s Outlook MSG file."
            )

        with OleFileIO(msg_file_path) as ole_file:
            # process directory entries
            ole_root = ole_file.root
            kids_dict = ole_root.kids_dict

            self._message = Message(kids_dict)
            self._message_dict = self._message.as_dict()

            # process msg properties
            self._set_properties()

            # process msg recipients
            self._set_recipients()

            # process attachments
            self._set_attachments()
Пример #8
0
def _get_reference(ole, txrm_name, custom_reference, ignore_reference):
    if custom_reference is not None:
        logging.info("%s is being processed with file %s as a reference.",
                     txrm_name, custom_reference.name)
        reference_path = str(custom_reference)
        try:
            if isOleFile(reference_path):
                with OleFileIO(reference_path) as ref_ole:
                    references = txrm_wrapper.extract_all_images(
                        ref_ole)  # should be float for averaging & dividing
            elif ".tif" in reference_path:
                with tf.TiffFile(reference_path) as tif:
                    references = np.asarray(tif.pages[:])
            else:
                msg = f"Unable to open file '{reference_path}'. Only tif/tiff or xrm/txrm files are supported for custom references."
                logging.error(msg)
                raise IOError(msg)
        except:
            logging.error("Error occurred reading custom reference",
                          exc_info=True)
            raise
        if len(references) > 1:
            # if reference file is an image stack take median of the images
            return _dynamic_despeckle_and_average_series(references)
        return references[0]

    elif ole.exists("ReferenceData/Image") and not ignore_reference:
        logging.info("Internal reference will be applied to %s", txrm_name)
        return txrm_wrapper.extract_reference_image(ole)

    logging.debug("%s is being processed without a reference.", txrm_name)
    return None
Пример #9
0
    def __init__(self, data: bytes):
        self.oid: Optional[oletools.oleid.OleID] = None

        if isOleFile(data):
            ole_file = OleFileIO(data)
            self.oid = oletools.oleid.OleID(ole_file)
            self.oid.check()
Пример #10
0
 def convert(self,
             txrm_file,
             custom_reference=None,
             ignore_reference=False,
             annotate=False):
     with OleFileIO(str(txrm_file)) as ole:
         images = txrm_wrapper.extract_all_images(ole)
         reference = _get_reference(ole, txrm_file.name, custom_reference,
                                    ignore_reference)
         if reference is not None:
             self.image_output = _apply_reference(images, reference)
         else:
             self.image_output = np.around(images)
         if (len(self.image_output) > 1
                 and ole.exists("ImageInfo/MosiacRows")
                 and ole.exists("ImageInfo/MosiacColumns")):
             mosaic_rows = txrm_wrapper.read_imageinfo_as_int(
                 ole, "MosiacRows")
             mosaic_cols = txrm_wrapper.read_imageinfo_as_int(
                 ole, "MosiacColumns")
             if mosaic_rows != 0 and mosaic_cols != 0:
                 # Version 13 style mosaic:
                 self.image_output = _stitch_images(
                     self.image_output, (mosaic_cols, mosaic_rows), 1)
         if annotate:
             # Extract annotations
             annotator = Annotator(self.image_output[0].shape[::-1])
             if annotator.extract_annotations(
                     ole):  # True if any annotations were drawn
                 self.annotator = annotator
             else:
                 self.annotator = False
         # Create metadata
         self.ome_metadata = create_ome_metadata(ole, self.image_output)
Пример #11
0
def is_encrypted(some_file):
    """
    Determine whether document contains encrypted content.

    This should return False for documents that are just write-protected or
    signed or finalized. It should return True if ANY content of the file is
    encrypted and can therefore not be analyzed by other oletools modules
    without given a password.

    Exception: there are way to write-protect an office document by embedding
    it as encrypted stream with hard-coded standard password into an otherwise
    empty OLE file. From an office user point of view, this is no encryption,
    but regarding file structure this is encryption, so we return `True` for
    these.

    This should not raise exceptions needlessly.

    This implementation is rather simple: it returns True if the file contains
    streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not
    test whether these streams actually contain data or whether the ole file
    structure contains the necessary references to these. It also checks the
    "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream
    is accessible (c.f. [MS-OLEPS] 2.25.1)

    :param some_file: File name or an opened OleFileIO
    :type some_file: :py:class:`olefile.OleFileIO` or `str`
    :returns: True if (and only if) the file contains encrypted content
    """
    log.debug('is_encrypted')
    if isinstance(some_file, OleFileIO):
        return is_encrypted_ole(some_file)   # assume it is OleFileIO
    if zipfile.is_zipfile(some_file):
        return is_encrypted_zip(some_file)
    # otherwise assume it is the name of an ole file
    return is_encrypted_ole(OleFileIO(some_file))
Пример #12
0
    def __init__(self, filename):

        self.OleFile = OleFileIO(filename)

        # TOC = Table Of Contents
        # A list of the footprints contained in this PcbLib can be found here:
        #self.TOC = TOC( self.readStream("Library/ComponentParamsTOC/Data") )
        # not always present

        #
        # Parse library parameters
        # Library/Data contains a list of parameters (string: "|"-separated key-value pairs)
        # followed by the count and names of footprints in the library
        #
        buffer = self.readStream("Library/Data")

        # Properties
        print "Library properties:"
        length = getU32(buffer[:4])
        self.Properties = parseKeyValueString(buffer[4:4+length])
        print self.Properties
        
        # Footprint list
        cursor = 4+length
        count = getU32(buffer[cursor:])
        cursor += 4
        print "Footprints in library: "+str(count)
        footprints = []
        for i in range(count):
            subrecord = SubRecord(buffer[cursor:])
            name = SubRecord_String(subrecord)
            print " * "+name
            footprints.append(name)
            cursor += subrecord.length

        # Parse all the footprints
        self.Footprints = []
        for footprint in footprints:
            print "Parsing "+footprint+" ..."
            self.Footprints.append(
                    Footprint(self.readStream(footprint+"/Data"))
                )
                
        # Create a dictionary of footprints to access them by name
        self.FootprintsByName = {}
        for footprint in self.Footprints:
            self.FootprintsByName[footprint.name] = footprint
Пример #13
0
    def __init__(self, filename):
 
        self.OleFile = OleFileIO(filename)

        # Components
        self.Components = self.parseComponents(self.readStream("Components6/Data"))
        manifest = getU32(self.readStream("Components6/Header"))
        counted = len(self.Components)
        if manifest != counted:
            print "Warning: Header disagrees about component count, says there are "+str(manifest)+", but we counted "+str(counted)+"."
    def __init__(self, olefile, path='', parent=None):
        if not hasattr(olefile, 'openstream'):
            isOleFile = import_isOleFile()
            OleFileIO = import_OleFileIO()

            if not isOleFile(olefile):
                errormsg = 'Not an OLE2 Compound Binary File.'
                raise InvalidOleStorageError(errormsg)
            olefile = OleFileIO(olefile)
        OleStorageItem.__init__(self, olefile, path, parent)
Пример #15
0
 def get_general(self, data, f):
     '''
     Extract general info
     '''
     for k, v in OleFileIO(f).get_metadata().__dict__.items():
         if v != None:
             if type(v) == bytes:
                 if len(v) > 0:
                     data.update({k: v.decode("utf-8", errors="ignore")})
             else:
                 data.update({k: v})
def oleMetaData(file_path, save=True):
    now = dt.now()
    file_name = getFileName(file_path)
    metadata = "Time: %d/%d/%d %d : %d : %d. Found the following metadata for file %s:\n\n" % (
        now.year, now.month, now.day, now.hour, now.minute, now.second,
        file_name[:-4])
    try:
        ole = OleFileIO(file_path)
        meta = ole.get_metadata()
        ole.close()
        author = meta.author.decode("latin-1")
        creation_time = meta.create_time.ctime()
        last_author = meta.last_saved_by.decode("latin-1")
        last_edit_time = meta.last_saved_time.ctime()
        last_printed = meta.last_printed.ctime()
        revisions = meta.revision_number.decode("latin-1")
        company = meta.company.decode("latin-1")
        creating_app = meta.creating_application.decode("latin-1")

        metadata += "Original Author: %s\nCreation Time: %s\nLast Author: %s\n" % (author, creation_time, last_author) \
                    + "Last Modification Time: %s\nLast Printed at: %s\Total Revisions: %s\n" % (last_edit_time, last_printed, revisions) \
                    + "Created with: %s\nCompany: %s" % (creating_app, company)

        try:
            print(metadata)
        except UnicodeEncodeError:
            print(
                "Console encoding can't decode the result. Enter chcp 65001 in the console and rerun the script."
            )

        if save:
            file_name = getFileName(file_path)
            tgt = file_name + ".txt"

            saveResult(tgt, metadata)

    except OSError as e1:
        print("File not supported: %s" % e1)
    except FileNotFoundError:
        print("Specified file could not be found")
Пример #17
0
 def get_general(self, data, temp_f):
     '''
     Extract general info
     '''
     for temp_k, temp_v in OleFileIO(
             temp_f).get_metadata().__dict__.items():
         if temp_v is not None:
             if isinstance(temp_v, bytes):
                 if len(temp_v) > 0:
                     data.update(
                         {temp_k: temp_v.decode("utf-8", errors="ignore")})
             else:
                 data.update({temp_k: temp_v})
Пример #18
0
 def read(self):
     mylog.info(u'Начинаю чтение %s' % self.filename)
     self.ole = OleFileIO(self.filename)
     oledirs = self.ole.listdir()
     mylog.debug('OLE_DIRS: %s' % oledirs)
     for entry in oledirs:
         entry_name = entry[0]
         mylog.debug(u'entry_name: %s' % entry_name)
         try:
             if entry_name == 'Metadata':
                 if "Main MetaData Stream" in entry and self.parse_metadata:
                     self.handler_metadata(entry)
             if entry_name == 'Document':
                 if "Dialog Stream" in entry and self.parse_dialog:
                     self.handler_dialog(entry)
                 if "Container.Profile" in entry:
                     continue
                 if "Container.Contents" in entry:
                     continue
         except Exception as e:
             mylog.exception(u'Ошибка при чтении конфигурации %s' %
                             e.message)
     return self.read_result
Пример #19
0
 def extract_ole_metadata(self, file_path, entity):
     with open(file_path, 'rb') as fh:
         if not isOleFile(fh):
             return
         fh.seek(0)
         try:
             ole = OleFileIO(fh)
             self.extract_olefileio_metadata(ole, entity)
         except (RuntimeError, IOError):
             # OLE reading can go fully recursive, at which point it's OK
             # to just eat this runtime error quietly.
             log.warning("Failed to read OLE data: %r", entity)
         except Exception:
             log.exception("Failed to read OLE data: %r", entity)
Пример #20
0
class PcbDoc:

    #
    # Open and parse
    #
    def __init__(self, filename):

        self.OleFile = OleFileIO(filename)

        # Components
        self.Components = self.parseComponents(
            self.readStream("Components6/Data"))
        manifest = getU32(self.readStream("Components6/Header"))
        counted = len(self.Components)
        if manifest != counted:
            print "Warning: Header disagrees about component count, says there are " + str(
                manifest) + ", but we counted " + str(counted) + "."

    #
    # Read a file from OLE container and return it's contents
    #
    def readStream(self, path):
        f = self.OleFile.openstream(path)
        c = True
        buffer = ""
        while c:
            c = f.read(1)
            if c:
                buffer += c
        f.close()
        return buffer

    #
    # Parse all components from list
    #
    def parseComponents(self, buffer):

        result = []
        cursor = 0

        while cursor < len(buffer):
            length = getU32(buffer[cursor:cursor + 4])
            component = parseKeyValueString(buffer[cursor + 4:cursor + length])
            #print dumps(component, sort_keys=True, indent=4)
            result.append(component)
            cursor += length + 4

        return result
Пример #21
0
class PcbDoc:
    
    #
    # Open and parse
    #
    def __init__(self, filename):
 
        self.OleFile = OleFileIO(filename)

        # Components
        self.Components = self.parseComponents(self.readStream("Components6/Data"))
        manifest = getU32(self.readStream("Components6/Header"))
        counted = len(self.Components)
        if manifest != counted:
            print "Warning: Header disagrees about component count, says there are "+str(manifest)+", but we counted "+str(counted)+"."


    #
    # Read a file from OLE container and return it's contents
    #
    def readStream(self, path):
        f = self.OleFile.openstream(path)
        c = True
        buffer = ""
        while c:
            c = f.read(1)
            if c:
                buffer += c
        f.close()
        return buffer


    #
    # Parse all components from list
    #
    def parseComponents(self, buffer):

        result = []
        cursor = 0

        while cursor < len(buffer):
            length = getU32(buffer[cursor:cursor+4])
            component = parseKeyValueString(buffer[cursor+4:cursor+length])
            #print dumps(component, sort_keys=True, indent=4)
            result.append(component)
            cursor += length+4

        return result
Пример #22
0
def ole_file_works(path):
    if (path.suffix == ".txrm") or (path.suffix == ".xrm"):
        if isOleFile(str(path)):
            with OleFileIO(str(path)) as ole_file:
                number_frames_taken = read_imageinfo_as_int(ole_file, "ImagesTaken")
                expected_number_frames = read_imageinfo_as_int(ole_file, "NoOfImages")
                # Returns true even if all frames aren't written, throwing warning.
                if number_frames_taken != expected_number_frames:
                    logging.warning("%s is an incomplete %s file: only %i out of %i frames have been written",
                                path.name, path.suffix, number_frames_taken, expected_number_frames)
                # Check for reference frame:
                if not ole_file.exists("ReferenceData/Image"):
                    logging.warning("No reference data found in file %s", path)
                return True
        else:
            logging.warning("Could not read ole file %s", path)
    else:
        logging.warning("%s not .txrm or .xrm", path)
    return False
Пример #23
0
def filename_to_lines(filepath):
    filename = filepath.split('/')[-1]
    extension = filename.split('.')[-1]
    if '.' not in filename or extension in ['txt']:
        return open(filepath, 'r', encoding='utf-8').readlines()
    if extension in ['hwp']:
        return OleFileIO(filepath).openstream('PrvText').read().decode(
            'utf-16').split('\n')
    if extension in ['doc', 'docx']:
        return [p.text for p in docx.Document(filepath).paragraphs]
    if extension in ['pdf']:
        return parser.from_file(filepath)['content'].split('\n')
    if extension in ['jpg', 'png', 'jpeg', 'bmp', 'gif', 'tiff', 'jfif']:
        easyocr_terms = EASYOCR.readtext(filepath, detail=0)
        tesseract_terms = image_to_string(Image.open(filepath),
                                          lang='kor+eng').split('\n')
        return easyocr_terms + tesseract_terms
        # return EASYOCR.readtext(filepath, detail=0)
    else:
        raise ValueError('알려지지 않은 확장자')
Пример #24
0
 def read(self):
     mylog.info(u'Начинаю чтение %s' % self.filename)
     self.ole = OleFileIO(self.filename)
     oledirs = self.ole.listdir()
     mylog.debug('OLE_DIRS: %s' % oledirs)
     for entry in oledirs:
         entry_name = entry[0]
         mylog.debug(u'entry_name: %s' % entry_name)
         try:
             if entry_name == 'Metadata':
                 if "Main MetaData Stream" in entry and self.parse_metadata:
                     self.handler_metadata(entry)
             if entry_name == 'Document':
                 if "Dialog Stream" in entry and self.parse_dialog:
                     self.handler_dialog(entry)
                 if "Container.Profile" in entry:
                     continue
                 if "Container.Contents" in entry:
                     continue
         except Exception as e:
             mylog.exception(u'Ошибка при чтении конфигурации %s' % e.message)
     return self.read_result
Пример #25
0
 def __init__(self, path):
     try:
         self._olefile = OleFileIO(path, path_encoding=None)
     except IOError as e:
         raise CreateFailedError(str(e), details=e)
Пример #26
0
def is_encrypted(some_file):
    """
    Determine whether document contains encrypted content.

    This should return False for documents that are just write-protected or
    signed or finalized. It should return True if ANY content of the file is
    encrypted and can therefore not be analyzed by other oletools modules
    without given a password.

    Exception: there are way to write-protect an office document by embedding
    it as encrypted stream with hard-coded standard password into an otherwise
    empty OLE file. From an office user point of view, this is no encryption,
    but regarding file structure this is encryption, so we return `True` for
    these.

    This should not raise exceptions needlessly.

    This implementation is rather simple: it returns True if the file contains
    streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not
    test whether these streams actually contain data or whether the ole file
    structure contains the necessary references to these. It also checks the
    "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream
    is accessible (c.f. [MS-OLEPS] 2.25.1)

    :param some_file: File name or an opened OleFileIO
    :type some_file: :py:class:`olefile.OleFileIO` or `str`
    :returns: True if (and only if) the file contains encrypted content
    """
    log.debug('is_encrypted')

    # ask msoffcrypto if possible
    if check_msoffcrypto():
        log.debug('Checking for encryption using msoffcrypto')
        file_handle = None
        file_pos = None
        try:
            if isinstance(some_file, OleFileIO):
                # TODO: hacky, replace once msoffcrypto-tools accepts OleFileIO
                file_handle = some_file.fp
                file_pos = file_handle.tell()
                file_handle.seek(0)
            else:
                file_handle = open(some_file, 'rb')

            return msoffcrypto.OfficeFile(file_handle).is_encrypted()

        except Exception as exc:
            log.warning('msoffcrypto failed to interpret file {} or determine '
                        'whether it is encrypted: {}'
                        .format(file_handle.name, exc))

        finally:
            try:
                if file_pos is not None:   # input was OleFileIO
                    file_handle.seek(file_pos)
                else:                      # input was file name
                    file_handle.close()
            except Exception as exc:
                log.warning('Ignoring error during clean up: {}'.format(exc))

    # if that failed, try ourselves with older and less accurate code
    try:
        if isinstance(some_file, OleFileIO):
            return _is_encrypted_ole(some_file)
        if zipfile.is_zipfile(some_file):
            return _is_encrypted_zip(some_file)
        # otherwise assume it is the name of an ole file
        with OleFileIO(some_file) as ole:
            return _is_encrypted_ole(ole)
    except Exception as exc:
        log.warning('Failed to check {} for encryption ({}); assume it is not '
                    'encrypted.'.format(some_file, exc))

    return False
Пример #27
0
class OleFileFS(FS):

    _meta = dict(read_only=True, thread_safe=False, network=False, unicode_paths=True, case_insensitive_paths=True)

    def __init__(self, path):
        try:
            self._olefile = OleFileIO(path, path_encoding=None)
        except IOError as e:
            raise CreateFailedError(str(e), details=e)

    #
    # Essential methods
    #

    def open(
        self, path, mode="r", buffering=-1, encoding=None, errors=None, newline=None, line_buffering=False, **kwargs
    ):
        for unsupported in "w", "a", "+":
            if unsupported in mode:
                raise OperationFailedError("open", path=path)
        segments = path_to_segments_normalized(path)
        return self._olefile.openstream(segments)

    def isdir(self, path):
        segments = path_to_segments_normalized(path)
        sty = self._olefile.get_type(segments)
        return sty in (STGTY_STORAGE, STGTY_ROOT)

    def isfile(self, path):
        segments = path_to_segments_normalized(path)
        sty = self._olefile.get_type(segments)
        return sty is STGTY_STREAM

    def listdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False):
        items = self.ilistdir(
            path=path, wildcard=wildcard, full=full, absolute=absolute, dirs_only=dirs_only, files_only=files_only
        )
        return list(items)

    def getinfo(self, path):
        segments = path_to_segments_normalized(path)
        size = self._olefile.get_size(segments)
        ctime = self._olefile.getctime(segments)
        mtime = self._olefile.getmtime(segments)
        return {"size": size, "created_time": ctime, "modified_time": mtime}

    #
    # Non-essential methods
    #

    def close(self):
        self._olefile.close()
        FS.close(self)

    def ilistdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False):
        if dirs_only:
            olefile_listdir_args = dict(streams=False, storages=True)
        elif files_only:
            olefile_listdir_args = dict(streams=True, storages=False)
        else:
            olefile_listdir_args = dict()
        given_node = path_to_segments_normalized(path)
        leafs = self._olefile.listdir(**olefile_listdir_args)
        leafs = map(tuple, leafs)
        nodes = find_children(given_node, leafs)
        for segments in nodes:
            stgty = self._olefile.get_type(segments)
            if dirs_only and stgty not in (STGTY_STORAGE, STGTY_ROOT):
                continue
            elif files_only and stgty is not STGTY_STREAM:
                continue
            if absolute:
                yield absolute_path_from_segments(segments)
            elif full:
                yield full_path_from_segments(segments)
            else:
                yield segments[-1]
Пример #28
0
class MdReader:
    """
    Чтение файла MD с извлечением примитивных структур и парсингом их в списки
    """
    class ReadedConfig:
        """
        Результат чтения конфигурации, является фабрикой для парсера
        """
        def __init__(self):
            self.dds = []
            self.dialog = []
            self.entry = {}
            self.md = None

        @property
        def MdObject(self):
            if not self.md:
                self.md = MDObject()
                self.md.parse(self.dds)
            return self.md

    def __init__(self, filename, metadata=True, dialog=False):
        self.filename = filename

        self.parse_metadata = metadata
        self.parse_dialog = dialog

        self.ole = None
        self.read_result = MdReader.ReadedConfig()

    def read(self):
        mylog.info(u'Начинаю чтение %s' % self.filename)
        self.ole = OleFileIO(self.filename)
        oledirs = self.ole.listdir()
        mylog.debug('OLE_DIRS: %s' % oledirs)
        for entry in oledirs:
            entry_name = entry[0]
            mylog.debug(u'entry_name: %s' % entry_name)
            try:
                if entry_name == 'Metadata':
                    if "Main MetaData Stream" in entry and self.parse_metadata:
                        self.handler_metadata(entry)
                if entry_name == 'Document':
                    if "Dialog Stream" in entry and self.parse_dialog:
                        self.handler_dialog(entry)
                    if "Container.Profile" in entry:
                        continue
                    if "Container.Contents" in entry:
                        continue
            except Exception as e:
                mylog.exception(u'Ошибка при чтении конфигурации %s' %
                                e.message)
        return self.read_result

    def handler_metadata(self, entry):
        if "Main MetaData Stream" in entry:
            with self.ole.openstream(entry) as f:
                tx = f.read()
            self.read_result.dds = ParseTree(
                tx.decode('cp1251', errors='ignore'))

    def handler_dialog(self, entry):
        if "Main MetaData Stream" in entry:
            with self.ole.openstream(entry) as f:
                tx = f.read()
            self.read_result.dialog = tx  # ParseTree(tx.decode('cp1251', errors='ignore'))
def is_scan_v2_ole(scan: olefile.OleFileIO) -> bool:
    return scan.exists(LSDATAV2_VERSION_FILE) and scan.exists(
        LSDATAV2_SCANDATA_SUBFOLDER)
def is_scan_v1_ole(scan: olefile.OleFileIO) -> bool:
    return scan.exists(MAIN_PARAM_STREAM) and scan.exists(SCANS_SUBFOLDER)
Пример #31
0
 def __init__(self, filename):
     OleFileIO.__init__(self, filename)
Пример #32
0
class MdReader:
    """
    Чтение файла MD с извлечением примитивных структур и парсингом их в списки
    """

    class ReadedConfig:
        """
        Результат чтения конфигурации, является фабрикой для парсера
        """
        def __init__(self):
            self.dds = []
            self.dialog = []
            self.entry = {}
            self.md = None

        @property
        def MdObject(self):
            if not self.md:
                self.md = MDObject()
                self.md.parse(self.dds)
            return self.md

    def __init__(self, filename, metadata=True, dialog=False):
        self.filename = filename

        self.parse_metadata = metadata
        self.parse_dialog = dialog

        self.ole = None
        self.read_result = MdReader.ReadedConfig()

    def read(self):
        mylog.info(u'Начинаю чтение %s' % self.filename)
        self.ole = OleFileIO(self.filename)
        oledirs = self.ole.listdir()
        mylog.debug('OLE_DIRS: %s' % oledirs)
        for entry in oledirs:
            entry_name = entry[0]
            mylog.debug(u'entry_name: %s' % entry_name)
            try:
                if entry_name == 'Metadata':
                    if "Main MetaData Stream" in entry and self.parse_metadata:
                        self.handler_metadata(entry)
                if entry_name == 'Document':
                    if "Dialog Stream" in entry and self.parse_dialog:
                        self.handler_dialog(entry)
                    if "Container.Profile" in entry:
                        continue
                    if "Container.Contents" in entry:
                        continue
            except Exception as e:
                mylog.exception(u'Ошибка при чтении конфигурации %s' % e.message)
        return self.read_result

    def handler_metadata(self, entry):
        if "Main MetaData Stream" in entry:
            with self.ole.openstream(entry) as f:
                tx = f.read()
            self.read_result.dds = ParseTree(tx.decode('cp1251', errors='ignore'))

    def handler_dialog(self, entry):
        if "Main MetaData Stream" in entry:
            with self.ole.openstream(entry) as f:
                tx = f.read()
            self.read_result.dialog = tx  # ParseTree(tx.decode('cp1251', errors='ignore'))
Пример #33
0
def get_xdata(ole: olefile.OleFileIO, header: Header) -> Optional[bytes]:
    return ole.openstream('X-Data').read() if ole.exists('X-Data') else None
Пример #34
0
def get_ydata(ole: olefile.OleFileIO, header: Header) -> Optional[bytes]:
    return ole.openstream('Y-Data').read()
Пример #35
0
def get_header(ole: olefile.OleFileIO) -> Header:
    data_info = ole.openstream('DataInfo').read()
    return Header(*unpack_data_info(data_info))
Пример #36
0
def parse_md(filename):
    mylog.info(u'Начинаю чтение %s' % filename)
    m = {'dds':[]}
    ole = OleFileIO(filename)
    # mylog.debug('OLE_DIRS: %s' % ole.listdir())
    m['entry'] = {}
    for entry in ole.listdir():
        mylog.debug(entry[0])

        #with open("stream_%s" % entry[0],'w+') as f:
        #    f.write(repr(entry))

        if entry[0]=='Document':
            #print entry
            if "Dialog Stream" in entry:
                continue
                try:
                    sz= ole.get_size(entry)
                    f=ole.openstream(entry)
                    #print f.read(sz)
                    f.close()
                except Exception as e:
                    mylog.exception(repr(e.args))
            if "Container.Profile" in entry:
                continue
                try:
                    sz= ole.get_size(entry)
                    f=ole.openstream(entry)
                    #print f.read(sz)
                    f.close()
                except:
                    mylog.exception()
            if "Container.Contents" in entry:
                continue
                sz= ole.get_size(entry)
                f=ole.openstream(entry)
                #print f.read(sz)
                f.close()

            if "MD Programm text" in entry:
                continue
                '''
                Пока что не работает, работало в прежних версиях python
                try:
                    sz= ole.get_size(entry)
                    f=ole.openstream(entry)
                    tx= f.read(sz)
                    f.close()
                    #print zlib.compress('test').encode('hex')
                    zi=zlib.decompress((zlib_head+tx))
                    print zi
                except Exception,e:
                    mylog.exception('read MD Programm text')
                    mylog.info(tx[:10].encode('hex'))
                    #print e
                '''
        if entry[0]=='Metadata':
            if "Main MetaData Stream" in entry:

                try:
                    #sz= ole.get_size(entry)
                    f = ole.openstream(entry)
                    tx = f.read()
                    f.close()
                    #print zlib.compress('test').encode('hex')
                    #d=zlib.decompressobj()
                    #zi=zlib.decompress(zlib_head+tx)
                    #tx_fixed = utils.fixunicode(tx,'cp1251')
                    #mylog.debug(tx.decode('cp1251'))
                    m['dds'] = ParseTree(tx.decode('cp1251', errors='ignore'))
                except Exception as e:
                    mylog.exception('parse metadata error')
        #if entry[0] == 'Journal':
        #write dumps
        if DUMP_META:
            if "MD Programm text" in entry:
                sz= ole.get_size(entry)
                f=ole.openstream(entry)
                tx= f.read(sz)
                f.close()
                hx = tx.encode('hex')
                if ztest.find(hx) > 0:
                    #print entry
                    #print hx
                    pass
                try:
                    #zlib.compress("//test").encode('hex')
                    #'789c d3d72f492d2e0100 0664021f'
                    #      d3d72f492d2e0100
                    tx=zlib.decompress(zlib_head+tx)
                    #print "MODULE:", tx
                    pass
                except Exception as e:
                    #print "size MD text:", sz, e.message
                    pass
                dump_stream("entry-%s" % entry, zlib_head+tx)
            else:
                dump_stream("entry-%s" % entry, ole.openstream(entry).read())
    return m
Пример #37
0
if __name__ == '__main__':
    if (len(sys.argv) > 1):
        files = sys.argv[1:]
        filename = files[0].decode(
            sys.getfilesystemencoding())  # make it UNICODE!
        setInventorFile(filename)
        if (isOleFile(filename)):
            if (len(files) == 1):
                open(filename)
            else:
                # this is only for debugging purposes...
                docname = os.path.splitext(os.path.basename(filename))[0]
                docname = decode(docname, utf=True)
                doc = FreeCAD.newDocument(docname)

                ole = OleFileIO(filename)
                setFileVersion(ole)
                elements = ole.listdir(streams=True, storages=False)
                counter = 1
                if (files[1] == 'l'):
                    for filename in elements:
                        ListElement(ole, filename, counter)
                        counter += 1
                else:
                    list = {}
                    counters = {}

                    for a in (elements):
                        path = PrintableName(a)
                        list['%s' % (counter)] = a
                        counters['%s' % (counter)] = counter