def checkVersion(file): vrs = None filename = os.path.abspath(file) ole = OleFileIO(filename) elements = ole.listdir(streams=True, storages=False) for e in elements: if (e[-1] == 'RSeDb'): data = ole.openstream(e).read() version, i = getVersionInfo(data, 20) if (version.major >= 14): setDumpFolder(file) return ole break if (version): vrsName = version.major if (version.major >= 11): vrsName += 1996 QMessageBox.critical( FreeCAD.ActiveDocument, 'FreeCAD: Inventor workbench...', 'Can\'t load file created with Inventor v%d' % (vrsName)) logError('Can\'t load file created with Inventor v%d' % (vrsName)) else: QMessageBox.critical( FreeCAD.ActiveDocument, 'FreeCAD: Inventor workbench...', 'Can\'t determine Inventor version file was created with') logError('Can\'t determine Inventor version file was created with!') return None
def __init__(self, filename): self.OleFile = OleFileIO(filename) # Components self.Components = self.parseComponents( self.readStream("Components6/Data")) manifest = getU32(self.readStream("Components6/Header")) counted = len(self.Components) if manifest != counted: print "Warning: Header disagrees about component count, says there are " + str( manifest) + ", but we counted " + str(counted) + "."
def get_streams(self,dump) -> (list,list): ''' get streams ''' _Listobjects = [] _List = [] ole = OleFileIO(dump) listdir = ole.listdir() for direntry in listdir: dirs = re.sub(r'[^\x20-\x7f]',r'', " : ".join(direntry)) tempdecoded = sub(br'[^\x20-\x7F]+',b'', ole.openstream(direntry).getvalue()) _Listobjects.append(tempdecoded) _List.append({"Name":dirs,"Parsed":tempdecoded.decode("utf-8",errors="ignore")}) return _List,_Listobjects
def ReadFile(doc, readProperties): first = 0 list = {} counters = {} # LOG.LOG_FILTER = LOG.LOG_FILTER | LOG.LOG_DEBUG if (isOleFile(getInventorFile())): ole = OleFileIO(getInventorFile()) setFileVersion(ole) elements = ole.listdir(streams=True, storages=False) folder = getInventorFile()[0:-4] if not os.path.exists(folder): os.makedirs(folder) counter = 1 list = [] for fname in elements: if (len(fname) == 1): list.append(fname) else: #Ensure that RSe* files will be parsed first if (fname[-1].startswith('RSe')): #ensure RSeDb is the very first "file" to be parsed list.insert(first, fname) if (fname[-1] == 'RSeDb'): first += 1 elif (not fname[-1].startswith('B')): list.append(fname) for fname in list: ReadElement(ole, fname, doc, counter, readProperties) counter += 1 ole.close() now = datetime.datetime.now() if (len(doc.Comment) > 0): doc.Comment += '\n' doc.Comment = '# %s: read from %s' % ( now.strftime('%Y-%m-%d %H:%M:%S'), getInventorFile()) logMessage("Dumped data to folder: '%s'" % (getInventorFile()[0:-4]), LOG.LOG_INFO) return True logError("Error - '%s' is not a valid Autodesk Inventor file." % (infile)) return False
def extract_ole_metadata(self, file_path): with open(file_path, 'r') as fh: if not isOleFile(fh): return fh.seek(0) ole = OleFileIO(fh) self.extract_olefileio_metadata(ole)
def setInventorFile(file): global _inventor_file global _dump_folder _inventor_file = os.path.abspath(file) setDumpFolder(_inventor_file) return OleFileIO(file)
def __init__(self, msg_file_path): self.msg_file_path = msg_file_path self.include_attachment_data = False if not self.is_valid_msg_file(): raise Exception( "Invalid file provided, please provide valid Microsoft’s Outlook MSG file." ) with OleFileIO(msg_file_path) as ole_file: # process directory entries ole_root = ole_file.root kids_dict = ole_root.kids_dict self._message = Message(kids_dict) self._message_dict = self._message.as_dict() # process msg properties self._set_properties() # process msg recipients self._set_recipients() # process attachments self._set_attachments()
def _get_reference(ole, txrm_name, custom_reference, ignore_reference): if custom_reference is not None: logging.info("%s is being processed with file %s as a reference.", txrm_name, custom_reference.name) reference_path = str(custom_reference) try: if isOleFile(reference_path): with OleFileIO(reference_path) as ref_ole: references = txrm_wrapper.extract_all_images( ref_ole) # should be float for averaging & dividing elif ".tif" in reference_path: with tf.TiffFile(reference_path) as tif: references = np.asarray(tif.pages[:]) else: msg = f"Unable to open file '{reference_path}'. Only tif/tiff or xrm/txrm files are supported for custom references." logging.error(msg) raise IOError(msg) except: logging.error("Error occurred reading custom reference", exc_info=True) raise if len(references) > 1: # if reference file is an image stack take median of the images return _dynamic_despeckle_and_average_series(references) return references[0] elif ole.exists("ReferenceData/Image") and not ignore_reference: logging.info("Internal reference will be applied to %s", txrm_name) return txrm_wrapper.extract_reference_image(ole) logging.debug("%s is being processed without a reference.", txrm_name) return None
def __init__(self, data: bytes): self.oid: Optional[oletools.oleid.OleID] = None if isOleFile(data): ole_file = OleFileIO(data) self.oid = oletools.oleid.OleID(ole_file) self.oid.check()
def convert(self, txrm_file, custom_reference=None, ignore_reference=False, annotate=False): with OleFileIO(str(txrm_file)) as ole: images = txrm_wrapper.extract_all_images(ole) reference = _get_reference(ole, txrm_file.name, custom_reference, ignore_reference) if reference is not None: self.image_output = _apply_reference(images, reference) else: self.image_output = np.around(images) if (len(self.image_output) > 1 and ole.exists("ImageInfo/MosiacRows") and ole.exists("ImageInfo/MosiacColumns")): mosaic_rows = txrm_wrapper.read_imageinfo_as_int( ole, "MosiacRows") mosaic_cols = txrm_wrapper.read_imageinfo_as_int( ole, "MosiacColumns") if mosaic_rows != 0 and mosaic_cols != 0: # Version 13 style mosaic: self.image_output = _stitch_images( self.image_output, (mosaic_cols, mosaic_rows), 1) if annotate: # Extract annotations annotator = Annotator(self.image_output[0].shape[::-1]) if annotator.extract_annotations( ole): # True if any annotations were drawn self.annotator = annotator else: self.annotator = False # Create metadata self.ome_metadata = create_ome_metadata(ole, self.image_output)
def is_encrypted(some_file): """ Determine whether document contains encrypted content. This should return False for documents that are just write-protected or signed or finalized. It should return True if ANY content of the file is encrypted and can therefore not be analyzed by other oletools modules without given a password. Exception: there are way to write-protect an office document by embedding it as encrypted stream with hard-coded standard password into an otherwise empty OLE file. From an office user point of view, this is no encryption, but regarding file structure this is encryption, so we return `True` for these. This should not raise exceptions needlessly. This implementation is rather simple: it returns True if the file contains streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not test whether these streams actually contain data or whether the ole file structure contains the necessary references to these. It also checks the "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream is accessible (c.f. [MS-OLEPS] 2.25.1) :param some_file: File name or an opened OleFileIO :type some_file: :py:class:`olefile.OleFileIO` or `str` :returns: True if (and only if) the file contains encrypted content """ log.debug('is_encrypted') if isinstance(some_file, OleFileIO): return is_encrypted_ole(some_file) # assume it is OleFileIO if zipfile.is_zipfile(some_file): return is_encrypted_zip(some_file) # otherwise assume it is the name of an ole file return is_encrypted_ole(OleFileIO(some_file))
def __init__(self, filename): self.OleFile = OleFileIO(filename) # TOC = Table Of Contents # A list of the footprints contained in this PcbLib can be found here: #self.TOC = TOC( self.readStream("Library/ComponentParamsTOC/Data") ) # not always present # # Parse library parameters # Library/Data contains a list of parameters (string: "|"-separated key-value pairs) # followed by the count and names of footprints in the library # buffer = self.readStream("Library/Data") # Properties print "Library properties:" length = getU32(buffer[:4]) self.Properties = parseKeyValueString(buffer[4:4+length]) print self.Properties # Footprint list cursor = 4+length count = getU32(buffer[cursor:]) cursor += 4 print "Footprints in library: "+str(count) footprints = [] for i in range(count): subrecord = SubRecord(buffer[cursor:]) name = SubRecord_String(subrecord) print " * "+name footprints.append(name) cursor += subrecord.length # Parse all the footprints self.Footprints = [] for footprint in footprints: print "Parsing "+footprint+" ..." self.Footprints.append( Footprint(self.readStream(footprint+"/Data")) ) # Create a dictionary of footprints to access them by name self.FootprintsByName = {} for footprint in self.Footprints: self.FootprintsByName[footprint.name] = footprint
def __init__(self, filename): self.OleFile = OleFileIO(filename) # Components self.Components = self.parseComponents(self.readStream("Components6/Data")) manifest = getU32(self.readStream("Components6/Header")) counted = len(self.Components) if manifest != counted: print "Warning: Header disagrees about component count, says there are "+str(manifest)+", but we counted "+str(counted)+"."
def __init__(self, olefile, path='', parent=None): if not hasattr(olefile, 'openstream'): isOleFile = import_isOleFile() OleFileIO = import_OleFileIO() if not isOleFile(olefile): errormsg = 'Not an OLE2 Compound Binary File.' raise InvalidOleStorageError(errormsg) olefile = OleFileIO(olefile) OleStorageItem.__init__(self, olefile, path, parent)
def get_general(self, data, f): ''' Extract general info ''' for k, v in OleFileIO(f).get_metadata().__dict__.items(): if v != None: if type(v) == bytes: if len(v) > 0: data.update({k: v.decode("utf-8", errors="ignore")}) else: data.update({k: v})
def oleMetaData(file_path, save=True): now = dt.now() file_name = getFileName(file_path) metadata = "Time: %d/%d/%d %d : %d : %d. Found the following metadata for file %s:\n\n" % ( now.year, now.month, now.day, now.hour, now.minute, now.second, file_name[:-4]) try: ole = OleFileIO(file_path) meta = ole.get_metadata() ole.close() author = meta.author.decode("latin-1") creation_time = meta.create_time.ctime() last_author = meta.last_saved_by.decode("latin-1") last_edit_time = meta.last_saved_time.ctime() last_printed = meta.last_printed.ctime() revisions = meta.revision_number.decode("latin-1") company = meta.company.decode("latin-1") creating_app = meta.creating_application.decode("latin-1") metadata += "Original Author: %s\nCreation Time: %s\nLast Author: %s\n" % (author, creation_time, last_author) \ + "Last Modification Time: %s\nLast Printed at: %s\Total Revisions: %s\n" % (last_edit_time, last_printed, revisions) \ + "Created with: %s\nCompany: %s" % (creating_app, company) try: print(metadata) except UnicodeEncodeError: print( "Console encoding can't decode the result. Enter chcp 65001 in the console and rerun the script." ) if save: file_name = getFileName(file_path) tgt = file_name + ".txt" saveResult(tgt, metadata) except OSError as e1: print("File not supported: %s" % e1) except FileNotFoundError: print("Specified file could not be found")
def get_general(self, data, temp_f): ''' Extract general info ''' for temp_k, temp_v in OleFileIO( temp_f).get_metadata().__dict__.items(): if temp_v is not None: if isinstance(temp_v, bytes): if len(temp_v) > 0: data.update( {temp_k: temp_v.decode("utf-8", errors="ignore")}) else: data.update({temp_k: temp_v})
def read(self): mylog.info(u'Начинаю чтение %s' % self.filename) self.ole = OleFileIO(self.filename) oledirs = self.ole.listdir() mylog.debug('OLE_DIRS: %s' % oledirs) for entry in oledirs: entry_name = entry[0] mylog.debug(u'entry_name: %s' % entry_name) try: if entry_name == 'Metadata': if "Main MetaData Stream" in entry and self.parse_metadata: self.handler_metadata(entry) if entry_name == 'Document': if "Dialog Stream" in entry and self.parse_dialog: self.handler_dialog(entry) if "Container.Profile" in entry: continue if "Container.Contents" in entry: continue except Exception as e: mylog.exception(u'Ошибка при чтении конфигурации %s' % e.message) return self.read_result
def extract_ole_metadata(self, file_path, entity): with open(file_path, 'rb') as fh: if not isOleFile(fh): return fh.seek(0) try: ole = OleFileIO(fh) self.extract_olefileio_metadata(ole, entity) except (RuntimeError, IOError): # OLE reading can go fully recursive, at which point it's OK # to just eat this runtime error quietly. log.warning("Failed to read OLE data: %r", entity) except Exception: log.exception("Failed to read OLE data: %r", entity)
class PcbDoc: # # Open and parse # def __init__(self, filename): self.OleFile = OleFileIO(filename) # Components self.Components = self.parseComponents( self.readStream("Components6/Data")) manifest = getU32(self.readStream("Components6/Header")) counted = len(self.Components) if manifest != counted: print "Warning: Header disagrees about component count, says there are " + str( manifest) + ", but we counted " + str(counted) + "." # # Read a file from OLE container and return it's contents # def readStream(self, path): f = self.OleFile.openstream(path) c = True buffer = "" while c: c = f.read(1) if c: buffer += c f.close() return buffer # # Parse all components from list # def parseComponents(self, buffer): result = [] cursor = 0 while cursor < len(buffer): length = getU32(buffer[cursor:cursor + 4]) component = parseKeyValueString(buffer[cursor + 4:cursor + length]) #print dumps(component, sort_keys=True, indent=4) result.append(component) cursor += length + 4 return result
class PcbDoc: # # Open and parse # def __init__(self, filename): self.OleFile = OleFileIO(filename) # Components self.Components = self.parseComponents(self.readStream("Components6/Data")) manifest = getU32(self.readStream("Components6/Header")) counted = len(self.Components) if manifest != counted: print "Warning: Header disagrees about component count, says there are "+str(manifest)+", but we counted "+str(counted)+"." # # Read a file from OLE container and return it's contents # def readStream(self, path): f = self.OleFile.openstream(path) c = True buffer = "" while c: c = f.read(1) if c: buffer += c f.close() return buffer # # Parse all components from list # def parseComponents(self, buffer): result = [] cursor = 0 while cursor < len(buffer): length = getU32(buffer[cursor:cursor+4]) component = parseKeyValueString(buffer[cursor+4:cursor+length]) #print dumps(component, sort_keys=True, indent=4) result.append(component) cursor += length+4 return result
def ole_file_works(path): if (path.suffix == ".txrm") or (path.suffix == ".xrm"): if isOleFile(str(path)): with OleFileIO(str(path)) as ole_file: number_frames_taken = read_imageinfo_as_int(ole_file, "ImagesTaken") expected_number_frames = read_imageinfo_as_int(ole_file, "NoOfImages") # Returns true even if all frames aren't written, throwing warning. if number_frames_taken != expected_number_frames: logging.warning("%s is an incomplete %s file: only %i out of %i frames have been written", path.name, path.suffix, number_frames_taken, expected_number_frames) # Check for reference frame: if not ole_file.exists("ReferenceData/Image"): logging.warning("No reference data found in file %s", path) return True else: logging.warning("Could not read ole file %s", path) else: logging.warning("%s not .txrm or .xrm", path) return False
def filename_to_lines(filepath): filename = filepath.split('/')[-1] extension = filename.split('.')[-1] if '.' not in filename or extension in ['txt']: return open(filepath, 'r', encoding='utf-8').readlines() if extension in ['hwp']: return OleFileIO(filepath).openstream('PrvText').read().decode( 'utf-16').split('\n') if extension in ['doc', 'docx']: return [p.text for p in docx.Document(filepath).paragraphs] if extension in ['pdf']: return parser.from_file(filepath)['content'].split('\n') if extension in ['jpg', 'png', 'jpeg', 'bmp', 'gif', 'tiff', 'jfif']: easyocr_terms = EASYOCR.readtext(filepath, detail=0) tesseract_terms = image_to_string(Image.open(filepath), lang='kor+eng').split('\n') return easyocr_terms + tesseract_terms # return EASYOCR.readtext(filepath, detail=0) else: raise ValueError('알려지지 않은 확장자')
def __init__(self, path): try: self._olefile = OleFileIO(path, path_encoding=None) except IOError as e: raise CreateFailedError(str(e), details=e)
def is_encrypted(some_file): """ Determine whether document contains encrypted content. This should return False for documents that are just write-protected or signed or finalized. It should return True if ANY content of the file is encrypted and can therefore not be analyzed by other oletools modules without given a password. Exception: there are way to write-protect an office document by embedding it as encrypted stream with hard-coded standard password into an otherwise empty OLE file. From an office user point of view, this is no encryption, but regarding file structure this is encryption, so we return `True` for these. This should not raise exceptions needlessly. This implementation is rather simple: it returns True if the file contains streams with typical encryption names (c.f. [MS-OFFCRYPTO]). It does not test whether these streams actually contain data or whether the ole file structure contains the necessary references to these. It also checks the "well-known property" PIDSI_DOC_SECURITY if the SummaryInformation stream is accessible (c.f. [MS-OLEPS] 2.25.1) :param some_file: File name or an opened OleFileIO :type some_file: :py:class:`olefile.OleFileIO` or `str` :returns: True if (and only if) the file contains encrypted content """ log.debug('is_encrypted') # ask msoffcrypto if possible if check_msoffcrypto(): log.debug('Checking for encryption using msoffcrypto') file_handle = None file_pos = None try: if isinstance(some_file, OleFileIO): # TODO: hacky, replace once msoffcrypto-tools accepts OleFileIO file_handle = some_file.fp file_pos = file_handle.tell() file_handle.seek(0) else: file_handle = open(some_file, 'rb') return msoffcrypto.OfficeFile(file_handle).is_encrypted() except Exception as exc: log.warning('msoffcrypto failed to interpret file {} or determine ' 'whether it is encrypted: {}' .format(file_handle.name, exc)) finally: try: if file_pos is not None: # input was OleFileIO file_handle.seek(file_pos) else: # input was file name file_handle.close() except Exception as exc: log.warning('Ignoring error during clean up: {}'.format(exc)) # if that failed, try ourselves with older and less accurate code try: if isinstance(some_file, OleFileIO): return _is_encrypted_ole(some_file) if zipfile.is_zipfile(some_file): return _is_encrypted_zip(some_file) # otherwise assume it is the name of an ole file with OleFileIO(some_file) as ole: return _is_encrypted_ole(ole) except Exception as exc: log.warning('Failed to check {} for encryption ({}); assume it is not ' 'encrypted.'.format(some_file, exc)) return False
class OleFileFS(FS): _meta = dict(read_only=True, thread_safe=False, network=False, unicode_paths=True, case_insensitive_paths=True) def __init__(self, path): try: self._olefile = OleFileIO(path, path_encoding=None) except IOError as e: raise CreateFailedError(str(e), details=e) # # Essential methods # def open( self, path, mode="r", buffering=-1, encoding=None, errors=None, newline=None, line_buffering=False, **kwargs ): for unsupported in "w", "a", "+": if unsupported in mode: raise OperationFailedError("open", path=path) segments = path_to_segments_normalized(path) return self._olefile.openstream(segments) def isdir(self, path): segments = path_to_segments_normalized(path) sty = self._olefile.get_type(segments) return sty in (STGTY_STORAGE, STGTY_ROOT) def isfile(self, path): segments = path_to_segments_normalized(path) sty = self._olefile.get_type(segments) return sty is STGTY_STREAM def listdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): items = self.ilistdir( path=path, wildcard=wildcard, full=full, absolute=absolute, dirs_only=dirs_only, files_only=files_only ) return list(items) def getinfo(self, path): segments = path_to_segments_normalized(path) size = self._olefile.get_size(segments) ctime = self._olefile.getctime(segments) mtime = self._olefile.getmtime(segments) return {"size": size, "created_time": ctime, "modified_time": mtime} # # Non-essential methods # def close(self): self._olefile.close() FS.close(self) def ilistdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): if dirs_only: olefile_listdir_args = dict(streams=False, storages=True) elif files_only: olefile_listdir_args = dict(streams=True, storages=False) else: olefile_listdir_args = dict() given_node = path_to_segments_normalized(path) leafs = self._olefile.listdir(**olefile_listdir_args) leafs = map(tuple, leafs) nodes = find_children(given_node, leafs) for segments in nodes: stgty = self._olefile.get_type(segments) if dirs_only and stgty not in (STGTY_STORAGE, STGTY_ROOT): continue elif files_only and stgty is not STGTY_STREAM: continue if absolute: yield absolute_path_from_segments(segments) elif full: yield full_path_from_segments(segments) else: yield segments[-1]
class MdReader: """ Чтение файла MD с извлечением примитивных структур и парсингом их в списки """ class ReadedConfig: """ Результат чтения конфигурации, является фабрикой для парсера """ def __init__(self): self.dds = [] self.dialog = [] self.entry = {} self.md = None @property def MdObject(self): if not self.md: self.md = MDObject() self.md.parse(self.dds) return self.md def __init__(self, filename, metadata=True, dialog=False): self.filename = filename self.parse_metadata = metadata self.parse_dialog = dialog self.ole = None self.read_result = MdReader.ReadedConfig() def read(self): mylog.info(u'Начинаю чтение %s' % self.filename) self.ole = OleFileIO(self.filename) oledirs = self.ole.listdir() mylog.debug('OLE_DIRS: %s' % oledirs) for entry in oledirs: entry_name = entry[0] mylog.debug(u'entry_name: %s' % entry_name) try: if entry_name == 'Metadata': if "Main MetaData Stream" in entry and self.parse_metadata: self.handler_metadata(entry) if entry_name == 'Document': if "Dialog Stream" in entry and self.parse_dialog: self.handler_dialog(entry) if "Container.Profile" in entry: continue if "Container.Contents" in entry: continue except Exception as e: mylog.exception(u'Ошибка при чтении конфигурации %s' % e.message) return self.read_result def handler_metadata(self, entry): if "Main MetaData Stream" in entry: with self.ole.openstream(entry) as f: tx = f.read() self.read_result.dds = ParseTree( tx.decode('cp1251', errors='ignore')) def handler_dialog(self, entry): if "Main MetaData Stream" in entry: with self.ole.openstream(entry) as f: tx = f.read() self.read_result.dialog = tx # ParseTree(tx.decode('cp1251', errors='ignore'))
def is_scan_v2_ole(scan: olefile.OleFileIO) -> bool: return scan.exists(LSDATAV2_VERSION_FILE) and scan.exists( LSDATAV2_SCANDATA_SUBFOLDER)
def is_scan_v1_ole(scan: olefile.OleFileIO) -> bool: return scan.exists(MAIN_PARAM_STREAM) and scan.exists(SCANS_SUBFOLDER)
def __init__(self, filename): OleFileIO.__init__(self, filename)
class MdReader: """ Чтение файла MD с извлечением примитивных структур и парсингом их в списки """ class ReadedConfig: """ Результат чтения конфигурации, является фабрикой для парсера """ def __init__(self): self.dds = [] self.dialog = [] self.entry = {} self.md = None @property def MdObject(self): if not self.md: self.md = MDObject() self.md.parse(self.dds) return self.md def __init__(self, filename, metadata=True, dialog=False): self.filename = filename self.parse_metadata = metadata self.parse_dialog = dialog self.ole = None self.read_result = MdReader.ReadedConfig() def read(self): mylog.info(u'Начинаю чтение %s' % self.filename) self.ole = OleFileIO(self.filename) oledirs = self.ole.listdir() mylog.debug('OLE_DIRS: %s' % oledirs) for entry in oledirs: entry_name = entry[0] mylog.debug(u'entry_name: %s' % entry_name) try: if entry_name == 'Metadata': if "Main MetaData Stream" in entry and self.parse_metadata: self.handler_metadata(entry) if entry_name == 'Document': if "Dialog Stream" in entry and self.parse_dialog: self.handler_dialog(entry) if "Container.Profile" in entry: continue if "Container.Contents" in entry: continue except Exception as e: mylog.exception(u'Ошибка при чтении конфигурации %s' % e.message) return self.read_result def handler_metadata(self, entry): if "Main MetaData Stream" in entry: with self.ole.openstream(entry) as f: tx = f.read() self.read_result.dds = ParseTree(tx.decode('cp1251', errors='ignore')) def handler_dialog(self, entry): if "Main MetaData Stream" in entry: with self.ole.openstream(entry) as f: tx = f.read() self.read_result.dialog = tx # ParseTree(tx.decode('cp1251', errors='ignore'))
def get_xdata(ole: olefile.OleFileIO, header: Header) -> Optional[bytes]: return ole.openstream('X-Data').read() if ole.exists('X-Data') else None
def get_ydata(ole: olefile.OleFileIO, header: Header) -> Optional[bytes]: return ole.openstream('Y-Data').read()
def get_header(ole: olefile.OleFileIO) -> Header: data_info = ole.openstream('DataInfo').read() return Header(*unpack_data_info(data_info))
def parse_md(filename): mylog.info(u'Начинаю чтение %s' % filename) m = {'dds':[]} ole = OleFileIO(filename) # mylog.debug('OLE_DIRS: %s' % ole.listdir()) m['entry'] = {} for entry in ole.listdir(): mylog.debug(entry[0]) #with open("stream_%s" % entry[0],'w+') as f: # f.write(repr(entry)) if entry[0]=='Document': #print entry if "Dialog Stream" in entry: continue try: sz= ole.get_size(entry) f=ole.openstream(entry) #print f.read(sz) f.close() except Exception as e: mylog.exception(repr(e.args)) if "Container.Profile" in entry: continue try: sz= ole.get_size(entry) f=ole.openstream(entry) #print f.read(sz) f.close() except: mylog.exception() if "Container.Contents" in entry: continue sz= ole.get_size(entry) f=ole.openstream(entry) #print f.read(sz) f.close() if "MD Programm text" in entry: continue ''' Пока что не работает, работало в прежних версиях python try: sz= ole.get_size(entry) f=ole.openstream(entry) tx= f.read(sz) f.close() #print zlib.compress('test').encode('hex') zi=zlib.decompress((zlib_head+tx)) print zi except Exception,e: mylog.exception('read MD Programm text') mylog.info(tx[:10].encode('hex')) #print e ''' if entry[0]=='Metadata': if "Main MetaData Stream" in entry: try: #sz= ole.get_size(entry) f = ole.openstream(entry) tx = f.read() f.close() #print zlib.compress('test').encode('hex') #d=zlib.decompressobj() #zi=zlib.decompress(zlib_head+tx) #tx_fixed = utils.fixunicode(tx,'cp1251') #mylog.debug(tx.decode('cp1251')) m['dds'] = ParseTree(tx.decode('cp1251', errors='ignore')) except Exception as e: mylog.exception('parse metadata error') #if entry[0] == 'Journal': #write dumps if DUMP_META: if "MD Programm text" in entry: sz= ole.get_size(entry) f=ole.openstream(entry) tx= f.read(sz) f.close() hx = tx.encode('hex') if ztest.find(hx) > 0: #print entry #print hx pass try: #zlib.compress("//test").encode('hex') #'789c d3d72f492d2e0100 0664021f' # d3d72f492d2e0100 tx=zlib.decompress(zlib_head+tx) #print "MODULE:", tx pass except Exception as e: #print "size MD text:", sz, e.message pass dump_stream("entry-%s" % entry, zlib_head+tx) else: dump_stream("entry-%s" % entry, ole.openstream(entry).read()) return m
if __name__ == '__main__': if (len(sys.argv) > 1): files = sys.argv[1:] filename = files[0].decode( sys.getfilesystemencoding()) # make it UNICODE! setInventorFile(filename) if (isOleFile(filename)): if (len(files) == 1): open(filename) else: # this is only for debugging purposes... docname = os.path.splitext(os.path.basename(filename))[0] docname = decode(docname, utf=True) doc = FreeCAD.newDocument(docname) ole = OleFileIO(filename) setFileVersion(ole) elements = ole.listdir(streams=True, storages=False) counter = 1 if (files[1] == 'l'): for filename in elements: ListElement(ole, filename, counter) counter += 1 else: list = {} counters = {} for a in (elements): path = PrintableName(a) list['%s' % (counter)] = a counters['%s' % (counter)] = counter