def __enter__(self, *args): """ Add this plugin to the python path so that it's contents become directly importable. Useful when bundling large python libraries into the plugin. Use it like this:: with plugin: import something """ if self.plugin_path is not None: from ebook_converter.utils.zipfile import ZipFile zf = ZipFile(self.plugin_path) extensions = {x.rpartition('.')[-1].lower() for x in zf.namelist()} zip_safe = True for ext in ('pyd', 'so', 'dll', 'dylib'): if ext in extensions: zip_safe = False break if zip_safe: sys.path.insert(0, self.plugin_path) self.sys_insertion_path = self.plugin_path else: from ebook_converter.ptempfile import TemporaryDirectory self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip') self.sys_insertion_path = (self._sys_insertion_tdir. __enter__(*args)) zf.extractall(self.sys_insertion_path) sys.path.insert(0, self.sys_insertion_path) zf.close()
def safe_replace(self, name, datastream, extra_replacements={}, add_missing=False): from ebook_converter.utils.zipfile import ZipFile, ZipInfo replacements = {name: datastream} replacements.update(extra_replacements) names = frozenset(list(replacements.keys())) found = set() def rbytes(name): r = replacements[name] if not isinstance(r, bytes): r = r.read() return r with SpooledTemporaryFile(max_size=100 * 1024 * 1024) as temp: ztemp = ZipFile(temp, 'w') for offset, header in self.file_info.values(): if header.filename in names: zi = ZipInfo(header.filename) zi.compress_type = header.compression_method ztemp.writestr(zi, rbytes(header.filename)) found.add(header.filename) else: ztemp.writestr(header.filename, self.read(header.filename, spool_size=0)) if add_missing: for name in names - found: ztemp.writestr(name, rbytes(name)) ztemp.close() zipstream = self.stream temp.seek(0) zipstream.seek(0) zipstream.truncate() shutil.copyfileobj(temp, zipstream) zipstream.flush()
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from ebook_converter.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from ebook_converter.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() with open('download.recipe', 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.environ.get('CALIBRE_RECIPE_URN'): from ebook_converter.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id urn = os.environ['CALIBRE_RECIPE_URN'] log('Downloading recipe urn: ' + urn) rtype, recipe_id = urn.partition(':')[::2] if not recipe_id: raise ValueError('Invalid recipe urn: ' + urn) if rtype == 'custom': self.recipe_source = get_custom_recipe(recipe_id) else: self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=True) if not self.recipe_source: raise ValueError('Could not find recipe with urn: ' + urn) if not isinstance(self.recipe_source, bytes): self.recipe_source = self.recipe_source.encode('utf-8') recipe = compile_recipe(self.recipe_source) elif os.access(recipe_or_file, os.R_OK): with open(recipe_or_file, 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from ebook_converter.web.feeds.recipes.collection import ( get_builtin_recipe_by_title, get_builtin_recipe_titles) title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] titles = frozenset(get_builtin_recipe_titles()) if title not in titles: title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = title.rpartition('.')[0] raw = get_builtin_recipe_by_title(title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % ('.'.join(recipe.requires_version))) builtin = True except: log.exception('Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: '+title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError('%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir('.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk('.'): if f.endswith('.opf'): return os.path.abspath(f)
class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): self.docx_is_transitional = True stream = path_or_stream if not hasattr(path_or_stream, 'read'): stream = open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '<stream>' self.log = log or default_log if extract: self.extract(stream) else: self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() self.namespace = DOCXNamespace(self.docx_is_transitional) def init_zipfile(self, stream): self.zipf = ZipFile(stream) self.names = frozenset(self.zipf.namelist()) def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except Exception: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from ebook_converter.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f def exists(self, name): return name in self.names def read(self, name): if hasattr(self, 'zipf'): return self.zipf.open(name).read() path = self.names[name] with open(path, 'rb') as f: return f.read() def read_content_types(self): try: raw = self.read('[Content_Types].xml') except KeyError: raise InvalidDOCX('The file %s docx file has no ' '[Content_Types].xml' % self.name) root = etree.fromstring(raw) self.content_types = {} self.default_content_types = {} for item in root.xpath('//*[local-name()="Types"]/*[local-name()=' '"Default" and @Extension and @ContentType]'): self.default_content_types[item.get('Extension').lower()] = \ item.get('ContentType') for item in root.xpath('//*[local-name()="Types"]/*[local-name()=' '"Override" and @PartName and @ContentType]'): name = item.get('PartName').lstrip('/') self.content_types[name] = item.get('ContentType') def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return mimetypes.guess_type(name)[0] def read_package_relationships(self): try: raw = self.read('_rels/.rels') except KeyError: raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name) root = etree.fromstring(raw) self.relationships = {} self.relationships_rmap = {} for item in root.xpath('//*[local-name()="Relationships"]/*[local-name' '()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') if target == 'word/document.xml': self.docx_is_transitional = (typ != 'http://purl.oclc.org/' 'ooxml/officeDocument/' 'relationships/officeDocument') self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): name = self.relationships.get(self.namespace.names['DOCUMENT'], None) if name is None: names = tuple( n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: raise InvalidDOCX('The file %s docx file has no main ' 'document' % self.name) name = names[0] return name @property def document(self): return etree.fromstring(self.read(self.document_name)) @property def document_relationships(self): return self.get_relationships(self.document_name) def get_relationships(self, name): base = '/'.join(name.split('/')[:-1]) by_id, by_type = {}, {} parts = name.split('/') name = '/'.join(parts[:-1] + ['_rels', parts[-1] + '.rels']) try: raw = self.read(name) except KeyError: pass else: root = etree.fromstring(raw) for item in root.xpath('//*[local-name()="Relationships"]/*' '[local-name()="Relationship" and @Type ' 'and @Target]'): target = item.get('Target') if (item.get('TargetMode', None) != 'External' and not target.startswith('#')): target = '/'.join((base, target.lstrip('/'))) typ = item.get('Type') Id = item.get('Id') by_id[Id] = by_type[typ] = target return by_id, by_type def get_document_properties_names(self): name = self.relationships.get(self.namespace.names['DOCPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] yield name name = self.relationships.get(self.namespace.names['APPPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: name = names[0] yield name @property def metadata(self): mi = Metadata('Unknown') dp_name, ap_name = self.get_document_properties_names() if dp_name: try: raw = self.read(dp_name) except KeyError: pass else: read_doc_props(raw, mi, self.namespace.XPath) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi, self.namespace.XPath) ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None) if ap_name: try: raw = self.read(ap_name) except KeyError: pass else: read_app_props(raw, mi) return mi def close(self): if hasattr(self, 'zipf'): self.zipf.close() else: try: shutil.rmtree(self.tdir) except EnvironmentError: pass