def __detectmimetype(zipfd, odffile): """ detects the mime-type of an ODF file @param zipfd an open zipfile.ZipFile instance @param odffile this parameter is not used @return a mime-type as a unicode string """ assert(isinstance(zipfd, zipfile.ZipFile)) assert(type(odffile)==type(u"") or 'rb' in repr(odffile) \ or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile)) try: mimetype = zipfd.read('mimetype').decode("utf-8") return mimetype except: pass # Fall-through to next mechanism manifestpart = zipfd.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for mentry,mvalue in manifest.items(): if mentry == "/": assert(type(mvalue['media-type'])==type(u"")) return mvalue['media-type'] # Fall-through to last mechanism return u'application/vnd.oasis.opendocument.text'
def __detectmimetype(zipfd, odffile): """ detects the mime-type of an ODF file @param zipfd an open zipfile.ZipFile instance @param odffile this parameter is not used @return a mime-type as a unicode string """ assert (isinstance(zipfd, zipfile.ZipFile)) assert(type(odffile)==type(u"") or 'rb' in repr(odffile) \ or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile)) try: mimetype = zipfd.read('mimetype').decode("utf-8") return mimetype except: pass # Fall-through to next mechanism manifestpart = zipfd.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for mentry, mvalue in manifest.items(): if mentry == "/": assert (type(mvalue['media-type']) == type(u"")) return mvalue['media-type'] # Fall-through to last mechanism return u'application/vnd.oasis.opendocument.text'
def load(odffile): from load import LoadParser from xml.sax import make_parser, handler z = zipfile.ZipFile(odffile) mimetype = z.read('mimetype') doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass
def load(odffile): """ Load an ODF file into memory Returns a reference to the structure """ z = zipfile.ZipFile(odffile) try: mimetype = z.read('mimetype') except KeyError: # Added by Kovid to handle malformed odt files mimetype = 'application/vnd.oasis.opendocument.text' doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) __loadxmlparts(z, manifest, doc, '') for mentry, mvalue in manifest.items(): if mentry[:9] == "Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) elif mentry == "Thumbnails/thumbnail.png": doc.addThumbnail(z.read(mentry)) elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): pass # Load subobjects into structure elif mentry[:7] == "Object " and len( mentry) < 11 and mentry[-1] == "/": subdoc = OpenDocument(mvalue['media-type'], add_generator=False) doc.addObject(subdoc, "/" + mentry[:-1]) __loadxmlparts(z, manifest, subdoc, mentry) elif mentry[:7] == "Object ": pass # Don't load subobjects as opaque objects else: if mvalue['full-path'][-1] == '/': doc._extra.append( OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) else: doc._extra.append( OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry))) # Add the SUN junk here to the struct somewhere # It is cached data, so it can be out-of-date z.close() b = doc.getElementsByType(Body) if mimetype[:39] == 'application/vnd.oasis.opendocument.text': doc.text = b[0].firstChild elif mimetype[:43] == 'application/vnd.oasis.opendocument.graphics': doc.graphics = b[0].firstChild elif mimetype[:47] == 'application/vnd.oasis.opendocument.presentation': doc.presentation = b[0].firstChild elif mimetype[:46] == 'application/vnd.oasis.opendocument.spreadsheet': doc.spreadsheet = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.chart': doc.chart = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.image': doc.image = b[0].firstChild elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc
def load(odffile): """ Load an ODF file into memory Returns a reference to the structure """ from load import LoadParser from xml.sax import make_parser, handler z = zipfile.ZipFile(odffile) mimetype = z.read("mimetype") doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read("META-INF/manifest.xml") manifest = manifestlist(manifestpart) for xmlfile in ("settings.xml", "meta.xml", "content.xml", "styles.xml"): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass
def load(odffile): """ Load an ODF file into memory @param odffile unicode string: name of a file, or as an alternative, an open readable stream @return a reference to the structure (an OpenDocument instance) """ assert(type(odffile) == type(u"") or 'rb' in repr(odffile) or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile)) z = zipfile.ZipFile(odffile) mimetype = __detectmimetype(z, odffile) doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) __loadxmlparts(z, manifest, doc, u'') for mentry, mvalue in manifest.items(): if mentry[:9] == u"Pictures/" and len(mentry) > 9: doc.addPicture( mvalue['full-path'], mvalue['media-type'], z.read(mentry)) elif mentry == u"Thumbnails/thumbnail.png": doc.addThumbnail(z.read(mentry)) elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'): pass # Load subobjects into structure elif mentry[:7] == u"Object " and len(mentry) < 11 and mentry[-1] == u"/": subdoc = OpenDocument(mvalue['media-type'], add_generator=False) doc.addObject(subdoc, u"/" + mentry[:-1]) __loadxmlparts(z, manifest, subdoc, mentry) elif mentry[:7] == u"Object ": pass # Don't load subobjects as opaque objects else: if mvalue['full-path'][-1] == u'/': doc._extra.append( OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) else: doc._extra.append( OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry))) # Add the SUN junk here to the struct somewhere # It is cached data, so it can be out-of-date z.close() b = doc.getElementsByType(Body) if mimetype[:39] == u'application/vnd.oasis.opendocument.text': doc.text = b[0].firstChild elif mimetype[:43] == u'application/vnd.oasis.opendocument.graphics': doc.graphics = b[0].firstChild elif mimetype[:47] == u'application/vnd.oasis.opendocument.presentation': doc.presentation = b[0].firstChild elif mimetype[:46] == u'application/vnd.oasis.opendocument.spreadsheet': doc.spreadsheet = b[0].firstChild elif mimetype[:40] == u'application/vnd.oasis.opendocument.chart': doc.chart = b[0].firstChild elif mimetype[:40] == u'application/vnd.oasis.opendocument.image': doc.image = b[0].firstChild elif mimetype[:42] == u'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc
def load(odffile): from load import LoadParser from xml.sax import make_parser, handler z = zipfile.ZipFile(odffile) mimetype = z.read('mimetype') doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): if not manifest.has_key(xmlfile): continue try: xmlpart = z.read(xmlfile) doc._parsing = xmlfile parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(LoadParser(doc)) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(xmlpart)) parser.parse(inpsrc) del doc._parsing except KeyError, v: pass # FIXME: Add subobjects correctly here for mentry,mvalue in manifest.items(): if mentry[:9] == "Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) elif mentry == "Thumbnails/thumbnail.png": doc.addThumbnail(z.read(mentry)) elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): pass else: if mvalue['full-path'][-1] == '/': doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) else: doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry))) # Add the SUN junk here to the struct somewhere # It is cached data, so it can be out-of-date z.close() b = doc.getElementsByType(Body) if mimetype[:39] == 'application/vnd.oasis.opendocument.text': doc.text = b[0].firstChild elif mimetype[:43] == 'application/vnd.oasis.opendocument.graphics': doc.graphics = b[0].firstChild elif mimetype[:47] == 'application/vnd.oasis.opendocument.presentation': doc.presentation = b[0].firstChild elif mimetype[:46] == 'application/vnd.oasis.opendocument.spreadsheet': doc.spreadsheet = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.chart': doc.chart = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.image': doc.image = b[0].firstChild elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc
def load(odffile): """ Load an ODF file into memory @param odffile unicode string: name of a file, or as an alternative, an open readable stream @return a reference to the structure (an OpenDocument instance) """ z = zipfile.ZipFile(odffile) mimetype = __detectmimetype(z, odffile) doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) __loadxmlparts(z, manifest, doc, u'') for mentry,mvalue in manifest.items(): if mentry[:9] == u"Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) elif mentry == u"Thumbnails/thumbnail.png": doc.addThumbnail(z.read(mentry)) elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'): pass # Load subobjects into structure elif mentry[:7] == u"Object " and len(mentry) < 11 and mentry[-1] == u"/": subdoc = OpenDocument(mvalue['media-type'], add_generator=False) doc.addObject(subdoc, u"/" + mentry[:-1]) __loadxmlparts(z, manifest, subdoc, mentry) elif mentry[:7] == u"Object ": pass # Don't load subobjects as opaque objects else: if mvalue['full-path'][-1] == u'/': doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) else: doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry))) # Add the SUN junk here to the struct somewhere # It is cached data, so it can be out-of-date z.close() b = doc.getElementsByType(Body) if mimetype[:39] == u'application/vnd.oasis.opendocument.text': doc.text = b[0].firstChild elif mimetype[:43] == u'application/vnd.oasis.opendocument.graphics': doc.graphics = b[0].firstChild elif mimetype[:47] == u'application/vnd.oasis.opendocument.presentation': doc.presentation = b[0].firstChild elif mimetype[:46] == u'application/vnd.oasis.opendocument.spreadsheet': doc.spreadsheet = b[0].firstChild elif mimetype[:40] == u'application/vnd.oasis.opendocument.chart': doc.chart = b[0].firstChild elif mimetype[:40] == u'application/vnd.oasis.opendocument.image': doc.image = b[0].firstChild elif mimetype[:42] == u'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc
def load(odffile): """ Load an ODF file into memory Returns a reference to the structure """ z = zipfile.ZipFile(odffile) try: mimetype = z.read('mimetype') except KeyError: # Added by Kovid to handle malformed odt files mimetype = 'application/vnd.oasis.opendocument.text' doc = OpenDocument(mimetype, add_generator=False) # Look in the manifest file to see if which of the four files there are manifestpart = z.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) __loadxmlparts(z, manifest, doc, '') for mentry,mvalue in manifest.items(): if mentry[:9] == "Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) elif mentry == "Thumbnails/thumbnail.png": doc.addThumbnail(z.read(mentry)) elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): pass # Load subobjects into structure elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/": subdoc = OpenDocument(mvalue['media-type'], add_generator=False) doc.addObject(subdoc, "/" + mentry[:-1]) __loadxmlparts(z, manifest, subdoc, mentry) elif mentry[:7] == "Object ": pass # Don't load subobjects as opaque objects else: if mvalue['full-path'][-1] == '/': doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) else: doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry))) # Add the SUN junk here to the struct somewhere # It is cached data, so it can be out-of-date z.close() b = doc.getElementsByType(Body) if mimetype[:39] == 'application/vnd.oasis.opendocument.text': doc.text = b[0].firstChild elif mimetype[:43] == 'application/vnd.oasis.opendocument.graphics': doc.graphics = b[0].firstChild elif mimetype[:47] == 'application/vnd.oasis.opendocument.presentation': doc.presentation = b[0].firstChild elif mimetype[:46] == 'application/vnd.oasis.opendocument.spreadsheet': doc.spreadsheet = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.chart': doc.chart = b[0].firstChild elif mimetype[:40] == 'application/vnd.oasis.opendocument.image': doc.image = b[0].firstChild elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc
def __detectmimetype(zipfd, odffile): try: mimetype = zipfd.read("mimetype") return mimetype except: pass # Fall-through to next mechanism manifestpart = zipfd.read("META-INF/manifest.xml") manifest = manifestlist(manifestpart) for mentry, mvalue in manifest.items(): if mentry == "/": return mvalue["media-type"] # Fall-through to last mechanism return "application/vnd.oasis.opendocument.text"
def __detectmimetype(zipfd, odffile): try: mimetype = zipfd.read('mimetype') return mimetype except: pass # Fall-through to next mechanism manifestpart = zipfd.read('META-INF/manifest.xml') manifest = manifestlist(manifestpart) for mentry,mvalue in manifest.items(): if mentry == "/": return mvalue['media-type'] # Fall-through to last mechanism return 'application/vnd.oasis.opendocument.text'