def add_content_file_reference(self, name): '''Add a reference to the named file (from self.name_path_map) to all content files (self.get_html_names()). Currently only CSS files with a MIME type of text/css and JavaScript files with a MIME type of application/x-javascript are supported. ''' if name not in self.name_path_map or name not in self.mime_map: raise ValueError(_("A valid file name must be given (got: {filename})").format(filename=name)) for infile in self.get_html_names(): self.log.info("Adding reference to {0} to file {1}".format(name, infile)) root = self.parsed(infile) if root is None: self.log.error("Could not retrieve content file {0}".format(infile)) continue head = root.xpath('./xhtml:head', namespaces={'xhtml': XHTML_NAMESPACE}) if head is None: self.log.error("Could not find a <head> element in content file {0}".format(infile)) continue head = head[0] if head is None: self.log.error("A <head> section was found but was undefined in content file {0}".format(infile)) continue if self.mime_map[name] == guess_type('a.css')[0]: elem = head.makeelement("{%s}link" % XHTML_NAMESPACE, rel='stylesheet', href=os.path.relpath(name, os.path.dirname(infile)).replace(os.sep, '/')) elif self.mime_map[name] == guess_type('a.js')[0]: elem = head.makeelement("{%s}script" % XHTML_NAMESPACE, type='text/javascript', src=os.path.relpath(name, os.path.dirname(infile)).replace(os.sep, '/')) else: elem = None if elem is not None: head.append(elem) if self.mime_map[name] == guess_type('a.css')[0]: self.fix_tail(elem) self.dirty(infile)
def contenttypes(self): E = ElementMaker(namespace=namespaces['ct'], nsmap={None:namespaces['ct']}) types = E.Types() for partname, mt in { "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml", "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml", "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml", "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml", "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml", "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml", "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml", "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml", "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml", "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml", "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml", }.iteritems(): types.append(E.Override(PartName=partname, ContentType=mt)) added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'} for ext in added: types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0])) for ext, mt in { "rels": "application/vnd.openxmlformats-package.relationships+xml", "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont", }.iteritems(): added.add(ext) types.append(E.Default(Extension=ext, ContentType=mt)) for fname in self.images: ext = fname.rpartition(os.extsep)[-1] if ext not in added: added.add(ext) mt = guess_type('a.' + ext)[0] if mt: types.append(E.Default(Extension=ext, ContentType=mt)) return xml2str(types)
def __init__(self, rootpath, opfpath, log): self.root = os.path.abspath(rootpath) self.log = log self.html_preprocessor = HTMLPreProcessor() self.css_preprocessor = CSSPreProcessor() self.parsed_cache = {} self.mime_map = {} self.name_path_map = {} # Map of relative paths with '/' separators from root of unzipped ePub # to absolute paths on filesystem with os-specific separators opfpath = os.path.abspath(opfpath) for dirpath, _dirnames, filenames in os.walk(self.root): for f in filenames: path = join(dirpath, f) name = relpath(path, self.root).replace(os.sep, "/") self.name_path_map[name] = path self.mime_map[name] = guess_type(path)[0] # Special case if we have stumbled onto the opf if path == opfpath: self.opf_name = name self.opf_dir = os.path.dirname(path) self.mime_map[name] = guess_type("a.opf")[0] # Update mime map with data from the OPF for item in self.opf.xpath("//opf:manifest/opf:item[@href and @media-type]", namespaces={"opf": OPF2_NS}): href = item.get("href") self.mime_map[self.href_to_name(href)] = item.get("media-type")
def insert_metadata(self, mi): self.log('Inserting metadata into book...') try: tags = map(unicode, self.oeb.metadata.subject) except: tags = [] try: comments = unicode(self.oeb.metadata.description[0]) except: comments = '' try: title = unicode(self.oeb.metadata.title[0]) except: title = _('Unknown') root = render_jacket(mi, self.opts.output_profile, alt_title=title, alt_tags=tags, alt_comments=comments, rescale_fonts=True) id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') jacket = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) self.oeb.spine.insert(0, jacket, True) self.oeb.inserted_metadata_jacket = jacket for img, path in referenced_images(root): self.oeb.log('Embedding referenced image %s into jacket' % path) ext = path.rpartition('.')[-1].lower() item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.'+ext) with open(path, 'rb') as f: item = self.oeb.manifest.add(item_id, href, guess_type(href)[0], data=f.read()) item.unload_data_from_memory() img.set('src', jacket.relhref(item.href))
def convert(self, stream, options, file_ext, log, accelerators): log.debug('Enter convert() ...') dest_dir = os.getcwdu() # note: temp dir from calibre process log.debug('dest_dir: ' + dest_dir) mi = None # call latex2mobi with markup output only from subprocess import check_output, STDOUT, CalledProcessError args = [self.java_exec, '-jar', os.path.join(self.plugin_dir, JAR_FILENAME), '-i', stream.name, '-n', '-o', dest_dir] from calibre_plugins.latexformulas_input.config import prefs if prefs['pandoc_exec'] != None and prefs['pandoc_exec'] != '': args.append('-p') args.append(prefs['pandoc_exec']) try: log.debug(check_output(args, stderr=STDOUT)) except CalledProcessError as e: log.debug(e.returncode) log.debug(e.cmd) log.debug(e.output) opf = OPFCreator(dest_dir, mi) markup_dir = dest_dir + os.path.sep + os.path.basename(stream.name) + '-markup' log.debug('Markup-dir: ' + markup_dir) log.debug('CreateManifestFromFilesIn()') opf.create_manifest_from_files_in([markup_dir]) for item in opf.manifest: if item.media_type == 'text/html': log.debug('Item ' + str(item) + ' is of type text/html') item.media_type = guess_type('a.html')[0] log.debug('Guess type result: ' + item.media_type) if item.media_type == 'text/css': log.debug('Item ' + str(item) + ' is of type text/css') item.media_type = guess_type('a.css')[0] log.debug('Guess type result: ' + item.media_type) log.debug('Create_spine()') opf.create_spine([os.path.basename(markup_dir) + os.path.sep + 'latex2mobi.html']) output_path = os.path.join(dest_dir, 'metadata.opf') with open(output_path, 'wb') as of: opf.render(of) log('Exit convert() ...') return output_path
def content_type(self, name): if name in self.content_types: return self.content_types[name] ext = name.rpartition('.')[-1].lower() if ext in self.default_content_types: return self.default_content_types[ext] return guess_type(name)[0]
def contenttypes(self): E = ElementMaker(namespace=namespaces['ct'], nsmap={None:namespaces['ct']}) types = E.Types() for partname, mt in { "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml", "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml", "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml", "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml", "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml", "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml", "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml", "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml", "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml", "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml", "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml", }.iteritems(): types.append(E.Override(PartName=partname, ContentType=mt)) added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'} for ext in added: types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0])) for ext, mt in { "rels": "application/vnd.openxmlformats-package.relationships+xml", "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont", }.iteritems(): added.add(ext) types.append(E.Default(Extension=ext, ContentType=mt)) # TODO: Iterate over all resources and add mimetypes for any that are # not already added return xml2str(types, pretty_print=True)
def load_html(path, view, codec='utf-8', mime_type=None, pre_load_callback=lambda x:None, path_is_html=False, force_as_html=False): from PyQt5.Qt import QUrl, QByteArray if mime_type is None: mime_type = guess_type(path)[0] if not mime_type: mime_type = 'text/html' if path_is_html: html = path else: with open(path, 'rb') as f: html = f.read().decode(codec, 'replace') html = EntityDeclarationProcessor(html).processed_html self_closing_pat = re.compile(r'<\s*([:A-Za-z0-9-]+)([^>]*)/\s*>') html = self_closing_pat.sub(self_closing_sub, html) loading_url = QUrl.fromLocalFile(path) pre_load_callback(loading_url) if force_as_html or re.search(r'<[a-zA-Z0-9-]+:svg', html) is None and '<![CDATA[' not in html: view.setHtml(html, loading_url) else: view.setContent(QByteArray(html.encode(codec)), mime_type, loading_url) mf = view.page().mainFrame() elem = mf.findFirstElement('parsererror') if not elem.isNull(): return False return True
def default_cover(self): ''' Create a generic cover for books that dont have a cover ''' from calibre.ebooks.metadata import authors_to_string, fmt_sidx if self.no_default_cover: return None self.log('Generating default cover') m = self.oeb.metadata title = unicode(m.title[0]) authors = [unicode(x) for x in m.creator if x.role == 'aut'] series_string = None if m.series and m.series_index: series_string = _('Book %(sidx)s of %(series)s')%dict( sidx=fmt_sidx(m.series_index[0], use_roman=True), series=unicode(m.series[0])) try: from calibre.ebooks import calibre_cover img_data = calibre_cover(title, authors_to_string(authors), series_string=series_string) id, href = self.oeb.manifest.generate('cover', u'cover_image.jpg') item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0], data=img_data) m.clear('cover') m.add('cover', item.id) return item.href except: self.log.exception('Failed to generate default cover') return None
def _manifest_from_opf(self, opf): manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): id = elem.get('id') href = elem.get('href') media_type = elem.get('media-type', None) if media_type is None: media_type = elem.get('mediatype', None) if not media_type or media_type == 'text/xml': guessed = guess_type(href)[0] media_type = guessed or media_type or BINARY_MIME if hasattr(media_type, 'lower'): media_type = media_type.lower() fallback = elem.get('fallback') if href in manifest.hrefs: self.logger.warn(u'Duplicate manifest entry for %r' % href) continue if not self.oeb.container.exists(href): self.logger.warn(u'Manifest item %r not found' % href) continue if id in manifest.ids: self.logger.warn(u'Duplicate manifest id %r' % id) id, href = manifest.generate(id, href) manifest.add(id, href, media_type, fallback) invalid = self._manifest_prune_invalid() self._manifest_add_missing(invalid)
def parsed(self, name): ans = self.parsed_cache.get(name, None) if ans is None: mime = self.mime_map.get(name, guess_type(name)[0]) ans = self.parse(self.name_path_map[name], mime) self.parsed_cache[name] = ans return ans
def write(self, path): for name in self.dirtied: data = self.cache[name] if hasattr(data, 'xpath'): data = etree.tostring(data, encoding = 'UTF-8', xml_declaration = True, pretty_print = True) data = string.replace(data, u"\uFFFD", "") f = open(self.name_map[name], "wb") f.write(data) f.close() self.dirtied.clear() if os.path.exists(path): os.unlink(path) epub = zipfile.ZipFile(path, 'w', compression = zipfile.ZIP_DEFLATED) epub.writestr('mimetype', bytes(guess_type('a.epub')[0]), compress_type = zipfile.ZIP_STORED) cwd = os.getcwdu() os.chdir(self.root) zip_prefix = self.root if not zip_prefix.endswith(os.sep): zip_prefix += os.sep for t in os.walk(self.root, topdown = True): for f in t[2]: if f not in EXCLUDE_FROM_ZIP: filepath = os.path.join(t[0], f).replace(zip_prefix, '') st = os.stat(filepath) mtime = time.localtime(st.st_mtime) if mtime[0] < 1980: os.utime(filepath, None) epub.write(filepath) epub.close() os.chdir(cwd)
def set_cover(self, mi, prefer_metadata_cover): cdata, ext = '', 'jpg' if mi.cover and os.access(mi.cover, os.R_OK): cdata = open(mi.cover, 'rb').read() ext = mi.cover.rpartition('.')[-1].lower().strip() elif mi.cover_data and mi.cover_data[-1]: cdata = mi.cover_data[1] ext = mi.cover_data[0] if ext not in ('png', 'jpg', 'jpeg'): ext = 'jpg' id = old_cover = None if 'cover' in self.oeb.guide: old_cover = self.oeb.guide['cover'] if prefer_metadata_cover and old_cover is not None: cdata = '' if cdata: self.oeb.guide.remove('cover') self.oeb.guide.remove('titlepage') if old_cover is not None: if old_cover.href in self.oeb.manifest.hrefs: item = self.oeb.manifest.hrefs[old_cover.href] if not cdata: return item.id self.remove_old_cover(item) elif not cdata: id = self.oeb.manifest.generate(id='cover') self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') return id if cdata: id, href = self.oeb.manifest.generate('cover', 'cover.'+ext) self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata) self.oeb.guide.add('cover', 'Cover', href) return id
def insert_metadata(self, mi): self.log('Inserting metadata into book...') try: tags = map(unicode, self.oeb.metadata.subject) except: tags = [] try: comments = unicode(self.oeb.metadata.description[0]) except: comments = '' try: title = unicode(self.oeb.metadata.title[0]) except: title = _('Unknown') root = render_jacket(mi, self.opts.output_profile, alt_title=title, alt_tags=tags, alt_comments=comments) id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) self.oeb.spine.insert(0, item, True) self.oeb.inserted_metadata_jacket = item
def write(self, doc): toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log) raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) for item in opf.manifest: if item.media_type == 'text/html': item.media_type = guess_type('a.xhtml')[0] opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) toc_file = os.path.join(self.dest_dir, 'toc.ncx') with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') if os.path.getsize(toc_file) == 0: os.remove(toc_file) return os.path.join(self.dest_dir, 'metadata.opf')
def default_cover(self): ''' Create a generic cover for books that dont have a cover ''' if self.no_default_cover: return None self.log('Generating default cover') m = self.oeb.metadata title = unicode(m.title[0]) authors = [unicode(x) for x in m.creator if x.role == 'aut'] try: from calibre.ebooks.covers import create_cover series = series_index = None if m.series: series, series_index = unicode(m.series[0]), m.series_index[0] img_data = create_cover(title, authors, series, series_index) id, href = self.oeb.manifest.generate('cover', u'cover_image.jpg') item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0], data=img_data) m.clear('cover') m.add('cover', item.id) return item.href except: self.log.exception('Failed to generate default cover') return None
def load_html(path, view, codec='utf-8', mime_type=None, pre_load_callback=lambda x:None, path_is_html=False, force_as_html=False, loading_url=None): from PyQt5.Qt import QUrl, QByteArray if mime_type is None: mime_type = guess_type(path)[0] if not mime_type: mime_type = 'text/html' if path_is_html: html = path else: with open(path, 'rb') as f: html = f.read().decode(codec, 'replace') html = cleanup_html(html) loading_url = loading_url or QUrl.fromLocalFile(path) pre_load_callback(loading_url) if force_as_html or load_as_html(html): view.setHtml(html, loading_url) else: view.setContent(QByteArray(html.encode(codec)), mime_type, loading_url) mf = view.page().mainFrame() elem = mf.findFirstElement('parsererror') if not elem.isNull(): return False return True
def register(): base = os.path.dirname(sys.executable) for program, data in default_programs().iteritems(): data = data.copy() exe = os.path.join(base, program) capabilities_path = cap_path(data) ext_map = {ext.lower():guess_type('file.' + ext.lower())[0] for ext in extensions(program)} ext_map = {ext:mt for ext, mt in ext_map.iteritems() if mt} prog_id_map = {ext:progid_name(data['assoc_name'], ext) for ext in ext_map} with Key(capabilities_path) as key: for k, v in {'ApplicationDescription':'description', 'ApplicationName':'name'}.iteritems(): key.set(k, data[v]) key.set('ApplicationIcon', '%s,0' % exe) key.set_default_value(r'shell\open\command', '"%s" "%%1"' % exe) with Key('FileAssociations', root=key) as fak, Key('MimeAssociations', root=key) as mak: # previous_associations = set(fak.itervalues()) for ext, prog_id in prog_id_map.iteritems(): mt = ext_map[ext] fak.set('.' + ext, prog_id) mak.set(mt, prog_id) for ext, prog_id in prog_id_map.iteritems(): create_prog_id(ext, prog_id, ext_map, exe) with Key(r'Software\RegisteredApplications') as key: key.set(data['name'], capabilities_path) from win32com.shell import shell, shellcon shell.SHChangeNotify(shellcon.SHCNE_ASSOCCHANGED, shellcon.SHCNF_DWORD | shellcon.SHCNF_FLUSH, 0, 0)
def __init__(self, href_or_path, basedir=os.getcwdu(), is_path=True): from urllib import unquote self._href = None self._basedir = basedir self.path = None self.fragment = '' try: self.mime_type = guess_type(href_or_path)[0] except: self.mime_type = None if self.mime_type is None: self.mime_type = 'application/octet-stream' if is_path: path = href_or_path if not os.path.isabs(path): path = os.path.abspath(os.path.join(basedir, path)) if isinstance(path, str): path = path.decode(sys.getfilesystemencoding()) self.path = path else: url = urlparse(href_or_path) if url[0] not in ('', 'file'): self._href = href_or_path else: pc = url[2] if isinstance(pc, unicode): pc = pc.encode('utf-8') pc = unquote(pc).decode('utf-8') self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.fragment = unquote(url[-1])
def write(self, doc): toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace) raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub) if css: with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) for item in opf.manifest: if item.media_type == 'text/html': item.media_type = guess_type('a.xhtml')[0] opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) def process_guide(E, guide): if self.toc_anchor is not None: guide.append(E.reference( href='index.html#' + self.toc_anchor, title=_('Table of Contents'), type='toc')) toc_file = os.path.join(self.dest_dir, 'toc.ncx') with lopen(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx: opf.render(of, ncx, 'toc.ncx', process_guide=process_guide) if os.path.getsize(toc_file) == 0: os.remove(toc_file) return os.path.join(self.dest_dir, 'metadata.opf')
def load_html(path, view, codec='utf-8', mime_type=None, pre_load_callback=lambda x:None, path_is_html=False): from PyQt4.Qt import QUrl, QByteArray if mime_type is None: mime_type = guess_type(path)[0] if path_is_html: html = path else: with open(path, 'rb') as f: html = f.read().decode(codec, 'replace') html = EntityDeclarationProcessor(html).processed_html has_svg = re.search(r'<[:a-zA-Z]*svg', html) is not None if 'xhtml' in mime_type: self_closing_pat = re.compile(r'<([a-z1-6]+)\s+([^>]+)/>', re.IGNORECASE) html = self_closing_pat.sub(self_closing_sub, html) html = re.sub(ur'<\s*title\s*/\s*>', u'', html, flags=re.IGNORECASE) loading_url = QUrl.fromLocalFile(path) pre_load_callback(loading_url) if has_svg: view.setContent(QByteArray(html.encode(codec)), mime_type, loading_url) else: view.setHtml(html, loading_url)
def __init__(self, path): tmpdir = PersistentTemporaryDirectory("_kobo-driver-extended") zf = zipfile.ZipFile(path) zf.extractall(tmpdir) self.root = os.path.abspath(tmpdir) self.log = Log() self.dirtied = set([]) self.cache = {} self.mime_map = {} print("Container:__init__:Got container path {0}".format(self.root)) if os.path.exists(os.path.join(self.root, 'mimetype')): os.remove(os.path.join(self.root, 'mimetype')) container_path = os.path.join(self.root, 'META-INF', 'container.xml') if not os.path.exists(container_path): raise InvalidEpub('No META-INF/container.xml in epub') self.container = etree.fromstring(open(container_path, 'rb').read()) opf_files = self.container.xpath((r'child::ocf:rootfiles/ocf:rootfile[@media-type="{0}" and @full-path]'.format(guess_type('a.opf')[0])), namespaces = self.namespaces) if not opf_files: raise InvalidEpub('META-INF/container.xml contains no link to OPF file') opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/')) if not os.path.exists(opf_path): raise InvalidEpub('OPF file does not exist at location pointed to by META-INF/container.xml') # Map of relative paths with / separators to absolute # paths on filesystem with os separators self.name_map = {} for dirpath, dirnames, filenames in os.walk(self.root): for f in filenames: path = os.path.join(dirpath, f) name = os.path.relpath(path, self.root).replace(os.sep, '/') self.name_map[name] = path self.mime_map[name] = guess_type(f)[0] if path == opf_path: self.opf_name = name self.mime_map[name] = guess_type('a.opf')[0] opf = self.opf for item in opf.xpath('//opf:manifest/opf:item[@href and @media-type]', namespaces = self.namespaces): href = unquote(item.get('href')) item.set("href", href) self.mime_map[self.href_to_name(href, os.path.dirname(self.opf_name).replace(os.sep, '/'))] = item.get('media-type') self.set(self.opf_name, opf)
def ACQUISITION_ENTRY(book_id, updated, request_context): field_metadata = request_context.db.field_metadata mi = request_context.db.get_metadata(book_id) extra = [] if mi.rating > 0: rating = rating_to_stars(mi.rating) extra.append(_('RATING: %s<br />')%rating) if mi.tags: extra.append(_('TAGS: %s<br />')%xml(format_tag_string(mi.tags, None))) if mi.series: extra.append(_('SERIES: %(series)s [%(sidx)s]<br />')% dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index)))) for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()): name, val = mi.format_field(key) if val: fm = field_metadata[key] datatype = fm['datatype'] if datatype == 'text' and fm['is_multiple']: extra.append('%s: %s<br />'% (xml(name), xml(format_tag_string(val, fm['is_multiple']['ui_to_list'], joinval=fm['is_multiple']['list_to_ui'])))) elif datatype == 'comments' or (fm['datatype'] == 'composite' and fm['display'].get('contains_html', False)): extra.append('%s: %s<br />'%(xml(name), comments_to_html(unicode(val)))) else: extra.append('%s: %s<br />'%(xml(name), xml(unicode(val)))) if mi.comments: comments = comments_to_html(mi.comments) extra.append(comments) if extra: extra = html_to_lxml('\n'.join(extra)) ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified), E.published(mi.timestamp.isoformat())) if mi.pubdate and not is_date_undefined(mi.pubdate): ans.append(ans.makeelement('{%s}date' % DC_NS)) ans[-1].text = mi.pubdate.isoformat() if len(extra): ans.append(E.content(extra, type='xhtml')) get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id) if mi.formats: fm = mi.format_metadata for fmt in mi.formats: fmt = fmt.lower() mt = guess_type('a.'+fmt)[0] if mt: link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition") ffm = fm.get(fmt.upper()) if ffm: link.set('length', str(ffm['size'])) link.set('mtime', ffm['mtime'].isoformat()) ans.append(link) ans.append(E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover")) ans.append(E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail")) return ans
def ACQUISITION_ENTRY(book_id, updated, request_context): field_metadata = request_context.db.field_metadata mi = request_context.db.get_metadata(book_id) extra = [] if mi.rating > 0: rating = u"".join(repeat(u"\u2605", int(mi.rating / 2.0))) extra.append(_("RATING: %s<br />") % rating) if mi.tags: extra.append(_("TAGS: %s<br />") % xml(format_tag_string(mi.tags, None))) if mi.series: extra.append( _("SERIES: %(series)s [%(sidx)s]<br />") % dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index))) ) for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()): name, val = mi.format_field(key) if val: fm = field_metadata[key] datatype = fm["datatype"] if datatype == "text" and fm["is_multiple"]: extra.append( "%s: %s<br />" % ( xml(name), xml( format_tag_string( val, fm["is_multiple"]["ui_to_list"], joinval=fm["is_multiple"]["list_to_ui"] ) ), ) ) elif datatype == "comments" or ( fm["datatype"] == "composite" and fm["display"].get("contains_html", False) ): extra.append("%s: %s<br />" % (xml(name), comments_to_html(unicode(val)))) else: extra.append("%s: %s<br />" % (xml(name), xml(unicode(val)))) if mi.comments: comments = comments_to_html(mi.comments) extra.append(comments) if extra: extra = html_to_lxml("\n".join(extra)) ans = E.entry( TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID("urn:uuid:" + mi.uuid), UPDATED(updated) ) if len(extra): ans.append(E.content(extra, type="xhtml")) get = partial(request_context.ctx.url_for, "/get", book_id=book_id, library_id=request_context.library_id) if mi.formats: for fmt in mi.formats: fmt = fmt.lower() mt = guess_type("a." + fmt)[0] if mt: ans.append(E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition")) ans.append(E.link(type="image/jpeg", href=get(what="cover"), rel="http://opds-spec.org/cover")) ans.append(E.link(type="image/jpeg", href=get(what="thumb"), rel="http://opds-spec.org/thumbnail")) return ans
def __init__(self, path, log): self.root = os.path.abspath(path) self.log = log self.dirtied = set([]) self.cache = {} self.mime_map = {} if exists(join(self.root, 'mimetype')): os.remove(join(self.root, 'mimetype')) container_path = join(self.root, 'META-INF', 'container.xml') if not exists(container_path): raise InvalidEpub('No META-INF/container.xml in epub') self.container = etree.fromstring(open(container_path, 'rb').read()) opf_files = self.container.xpath(( r'child::ocf:rootfiles/ocf:rootfile' '[@media-type="%s" and @full-path]'%guess_type('a.opf')[0] ), namespaces={'ocf':OCF_NS} ) if not opf_files: raise InvalidEpub('META-INF/container.xml contains no link to OPF file') opf_path = os.path.join(self.root, *opf_files[0].get('full-path').split('/')) if not exists(opf_path): raise InvalidEpub('OPF file does not exist at location pointed to' ' by META-INF/container.xml') # Map of relative paths with / separators to absolute # paths on filesystem with os separators self.name_map = {} for dirpath, dirnames, filenames in os.walk(self.root): for f in filenames: path = join(dirpath, f) name = os.path.relpath(path, self.root).replace(os.sep, '/') self.name_map[name] = path if path == opf_path: self.opf_name = name self.mime_map[name] = guess_type('a.opf')[0] for item in self.opf.xpath( '//opf:manifest/opf:item[@href and @media-type]', namespaces={'opf':OPF_NS}): href = item.get('href') self.mime_map[self.href_to_name(href, posixpath.dirname(self.opf_name))] = item.get('media-type')
def __call__(self, oeb): if not self.body_font_family: return None if not self.href: iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css') rules = [x.cssText for x in self.rules] rules = u'\n\n'.join(rules) sheet = cssutils.parseString(rules, validate=False) self.href = oeb.manifest.add(iid, href, guess_type(href)[0], data=sheet).href return self.href
def get_format(self, id, format): format = format.upper() fm = self.db.format_metadata(id, format, allow_cache=False) if not fm: raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) update_metadata = format in {'MOBI', 'EPUB', 'AZW3'} mi = newmi = self.db.get_metadata( id, index_is_id=True, cover_as_data=True, get_cover=update_metadata) cherrypy.response.headers['Last-Modified'] = \ self.last_modified(max(fm['mtime'], mi.last_modified)) fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') if fmt is None: raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) mt = guess_type('dummy.'+format.lower())[0] if mt is None: mt = 'application/octet-stream' cherrypy.response.headers['Content-Type'] = mt if format.lower() in plugboard_content_server_formats: # Get any plugboards for the content server plugboards = self.db.prefs.get('plugboards', {}) cpb = find_plugboard(plugboard_content_server_value, format.lower(), plugboards) if cpb: # Transform the metadata via the plugboard newmi = mi.deepcopy_metadata() newmi.template_to_attribute(mi, cpb) if update_metadata: # Write the updated file from calibre.ebooks.metadata.meta import set_metadata set_metadata(fmt, newmi, format.lower()) fmt.seek(0) fmt.seek(0, 2) cherrypy.response.headers['Content-Length'] = fmt.tell() fmt.seek(0) ua = cherrypy.request.headers.get('User-Agent', '').strip() have_kobo_browser = self.is_kobo_browser(ua) file_extension = "kepub.epub" if have_kobo_browser and format.lower() == "kepub" else format au = authors_to_string(newmi.authors if newmi.authors else [_('Unknown')]) title = newmi.title if newmi.title else _('Unknown') fname = u'%s - %s_%s.%s'%(title[:30], au[:30], id, file_extension.lower()) fname = ascii_filename(fname).replace('"', '_') cherrypy.response.headers['Content-Disposition'] = \ b'attachment; filename="%s"'%fname cherrypy.response.body = fmt cherrypy.response.timeout = 3600 return fmt
def OutputImageFiles(self, path): fileNames = [] for f in self.files: fname = os.path.basename(f.fileName) root, ext = os.path.splitext(fname) if ext in ['.jpeg', '.jpg', '.gif', '.svg', '.png']: file = open(os.path.join(path, fname), 'wb') file.write(f.fileBody) file.close() fileNames.append((fname, guess_type('a'+ext)[0])) return fileNames
def add_name_to_manifest(self, name, mt = None): item = self.manifest_item_for_name(name) if item is not None: return manifest = self.opf.xpath('//opf:manifest', namespaces = {'opf': self.OPF_NS})[0] item = manifest.makeelement('{%s}item' % self.OPF_NS, nsmap = {'opf': self.OPF_NS}, href = self.name_to_href(name, posixpath.dirname(self.opf_name)), id = self.generate_manifest_id()) if not mt: mt = guess_type(posixpath.basename(name))[0] if not mt: mt = 'application/octest-stream' item.set('media-type', mt) manifest.append(item) self.fix_tail(item)
def get_embed_font_info(self, family, failure_critical=True): efi = [] body_font_family = None if not family: return body_font_family, efi from calibre.utils.fonts.scanner import font_scanner, NoFonts from calibre.utils.fonts.utils import panose_to_css_generic_family try: faces = font_scanner.fonts_for_family(family) except NoFonts: msg = (u'No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi if not faces: msg = (u'No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi for i, font in enumerate(faces): ext = 'otf' if font['is_otf'] else 'ttf' fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(ascii_filename(font['full_name']).replace(u' ', u'-'), ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=font_scanner.get_font_data(font)) item.unload_data_from_memory() cfont = { u'font-family':u'"%s"'%font['font-family'], u'panose-1': u' '.join(map(unicode_type, font['panose'])), u'src': u'url(%s)'%item.href, } if i == 0: generic_family = panose_to_css_generic_family(font['panose']) body_font_family = u"'%s',%s"%(font['font-family'], generic_family) self.oeb.log(u'Embedding font: %s'%font['font-family']) for k in (u'font-weight', u'font-style', u'font-stretch'): if font[k] != u'normal': cfont[k] = font[k] rule = '@font-face { %s }'%('; '.join(u'%s:%s'%(k, v) for k, v in iteritems(cfont))) rule = css_parser.parseString(rule) efi.append(rule) return body_font_family, efi
def do_embed(f): data = font_scanner.get_font_data(f) name = f['full_name'] ext = 'otf' if f['is_otf'] else 'ttf' name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '') fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data) item.unload_data_from_memory() page_sheet = self.get_page_sheet() href = page_sheet.relhref(item.href) css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % ( f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href) sheet = self.parser.parseString(css, validate=False) page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules)) return find_font_face_rules(sheet, self.oeb)[0]
def get_embed_font_info(self, family, failure_critical=True): efi = [] body_font_family = None if not family: return body_font_family, efi from calibre.utils.fonts.scanner import font_scanner from calibre.utils.fonts.utils import panose_to_css_generic_family faces = font_scanner.fonts_for_family(family) if not faces: msg = (u'No embeddable fonts found for family: %r' % self.opts.embed_font_family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi for i, font in enumerate(faces): ext = 'otf' if font['is_otf'] else 'ttf' fid, href = self.oeb.manifest.generate( id=u'font', href=u'fonts/%s.%s' % (ascii_filename(font['full_name']).replace(u' ', u'-'), ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.' + ext)[0], data=font_scanner.get_font_data(font)) item.unload_data_from_memory() cfont = { u'font-family': u'"%s"' % font['font-family'], u'panose-1': u' '.join(map(unicode, font['panose'])), u'src': u'url(%s)' % item.href, } if i == 0: generic_family = panose_to_css_generic_family(font['panose']) body_font_family = u"'%s',%s" % (font['font-family'], generic_family) self.oeb.log(u'Embedding font: %s' % font['font-family']) for k in (u'font-weight', u'font-style', u'font-stretch'): if font[k] != u'normal': cfont[k] = font[k] rule = '@font-face { %s }' % ('; '.join( u'%s:%s' % (k, v) for k, v in cfont.iteritems())) rule = cssutils.parseString(rule) efi.append(rule) return body_font_family, efi
def unregister(): for program, data in default_programs().iteritems(): capabilities_path = cap_path(data).rpartition('\\')[0] ext_map = {ext.lower():guess_type('file.' + ext.lower())[0] for ext in extensions(program)} ext_map = {ext:mt for ext, mt in ext_map.iteritems() if mt} prog_id_map = {ext:progid_name(data['assoc_name'], ext) for ext in ext_map} with Key(r'Software\RegisteredApplications') as key: key.delete_value(data['name']) parent, sk = capabilities_path.rpartition('\\')[0::2] with Key(parent) as key: key.delete_tree(sk) for ext, prog_id in prog_id_map.iteritems(): with Key(r'Software\Classes\.%s\OpenWithProgIDs' % ext) as key: key.delete_value(prog_id) with Key(r'Software\Classes') as key: key.delete_tree(prog_id)
def set_cover(self, mi, prefer_metadata_cover): cdata, ext = b'', 'jpg' if mi.cover and os.access(mi.cover, os.R_OK): with open(mi.cover, 'rb') as f: cdata = f.read() ext = mi.cover.rpartition('.')[-1].lower().strip() elif mi.cover_data and mi.cover_data[-1]: cdata = mi.cover_data[1] ext = mi.cover_data[0] if ext not in ('png', 'jpg', 'jpeg'): ext = 'jpg' id = old_cover = None if 'cover' in self.oeb.guide: old_cover = self.oeb.guide['cover'] if prefer_metadata_cover and old_cover is not None: cdata = b'' if cdata: self.oeb.guide.remove('cover') self.oeb.guide.remove('titlepage') elif self.oeb.plumber_output_format in {'mobi', 'azw3' } and old_cover is not None: # The amazon formats dont support html cover pages, so remove them # even if no cover was specified. self.oeb.guide.remove('titlepage') do_remove_old_cover = False if old_cover is not None: if old_cover.href in self.oeb.manifest.hrefs: item = self.oeb.manifest.hrefs[old_cover.href] if not cdata: return item.id do_remove_old_cover = True elif not cdata: id = self.oeb.manifest.generate(id='cover')[0] self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') return id new_cover_item = None if cdata: id, href = self.oeb.manifest.generate('cover', 'cover.' + ext) new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.' + ext)[0], data=cdata) self.oeb.guide.add('cover', 'Cover', href) if do_remove_old_cover: self.remove_old_cover(item, new_cover_item.href) return id
def insert_cover(self): from calibre.ebooks.oeb.base import urldefrag g, m = self.oeb.guide, self.oeb.manifest item = None if 'titlepage' not in g: if 'cover' in g: href = g['cover'].href else: href = self.default_cover() if href is None: return width, height = self.inspect_cover(href) if width is None or height is None: self.log.warning('Failed to read cover dimensions') width, height = 600, 800 #if self.preserve_aspect_ratio: # width, height = 600, 800 self.svg_template = self.svg_template.replace( '__viewbox__', '0 0 %d %d' % (width, height)) self.svg_template = self.svg_template.replace( '__width__', str(width)) self.svg_template = self.svg_template.replace( '__height__', str(height)) if href is not None: templ = self.non_svg_template if self.no_svg_cover \ else self.svg_template tp = templ % unquote(href) id, href = m.generate('titlepage', u'titlepage.xhtml') item = m.add(id, href, guess_type('t.xhtml')[0], data=etree.fromstring(tp)) else: item = self.oeb.manifest.hrefs[urldefrag( self.oeb.guide['titlepage'].href)[0]] if item is not None: self.oeb.spine.insert(0, item, False) if 'cover' not in self.oeb.guide.refs: self.oeb.guide.add('cover', 'Title Page', 'a') self.oeb.guide.refs['cover'].href = item.href if 'titlepage' in self.oeb.guide.refs: self.oeb.guide.refs['titlepage'].href = item.href titem = getattr(self.oeb.toc, 'item_that_refers_to_cover', None) if titem is not None: titem.href = item.href
def write(self, path): for name in self.dirtied: data = self.cache[name] raw = data if hasattr(data, 'xpath'): raw = etree.tostring(data, encoding='utf-8', xml_declaration=True) with open(self.name_map[name], 'wb') as f: f.write(raw) self.dirtied.clear() zf = ZipFile(path, 'w') zf.writestr('mimetype', bytes(guess_type('a.epub')[0]), compression=ZIP_STORED) zf.add_dir(self.root) zf.close()
def contenttypes(self): E = ElementMaker(namespace=namespaces['ct'], nsmap={None: namespaces['ct']}) types = E.Types() for partname, mt in { "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml", "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml", "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml", "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml", "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml", "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml", "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml", "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml", "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml", "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml", "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml", }.iteritems(): types.append(E.Override(PartName=partname, ContentType=mt)) added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'} for ext in added: types.append( E.Default(Extension=ext, ContentType=guess_type('a.' + ext)[0])) for ext, mt in { "rels": "application/vnd.openxmlformats-package.relationships+xml", "odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont", }.iteritems(): added.add(ext) types.append(E.Default(Extension=ext, ContentType=mt)) # TODO: Iterate over all resources and add mimetypes for any that are # not already added return xml2str(types)
def find_programs(extensions): extensions = {ext.lower() for ext in extensions} data_dirs = [ os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share') ] data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep) data_dirs = [ force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs ] data_dirs = [x for x in data_dirs if x and os.path.isdir(x)] desktop_files = {} mime_types = {guess_type('file.' + ext)[0] for ext in extensions} ans = [] for base in data_dirs: for f in walk(os.path.join(base, 'applications')): if f.endswith('.desktop'): bn = os.path.basename(f) if f not in desktop_files: desktop_files[bn] = f for bn, path in iteritems(desktop_files): try: data = parse_desktop_file(path) except Exception: import traceback traceback.print_exc() continue if data is not None and mime_types.intersection(data['MimeType']): icon = data.get('Icon', {}).get(None) if icon and not os.path.isabs(icon): icon = find_icons().get(icon) if icon: data['Icon'] = icon else: data.pop('Icon') if not isinstance(data.get('Icon'), string_or_bytes): data.pop('Icon', None) for k in ('Name', 'GenericName', 'Comment'): val = data.get(k) if val: data[k] = localize_string(val) ans.append(data) ans.sort(key=lambda d: sort_key(d.get('Name'))) return ans
def do_send_mail(self, book, mail_to, fmt, fpath): body = open(fpath).read() # read meta info author = authors_to_string( book['authors'] if book['authors'] else [_('Unknown')]) title = book['title'] if book['title'] else _("No Title") fname = u'%s - %s.%s' % (title, author, fmt) fname = ascii_filename(fname).replace('"', '_') # content type mt = guess_type('dummy.' + fmt)[0] if mt is None: mt = 'application/octet-stream' # send mail mail_from = '*****@*****.**' mail_subject = _('Book from Calibre: %(title)s') % vars() mail_body = _('We Send this book to your kindle.') status = msg = "" try: msg = create_mail(mail_from, mail_to, mail_subject, text=mail_body, attachment_data=body, attachment_type=mt, attachment_name=fname) sendmail(msg, from_=mail_from, to=[mail_to], timeout=30, username=tweaks['smtp_username'], password=tweaks['smtp_password']) status = "success" msg = _('Send to kindle success!! email: %(mail_to)s') % vars() except: import traceback cherrypy.log.error('Failed to generate cover:') cherrypy.log.error(traceback.format_exc()) status = "danger" msg = traceback.format_exc() messages.append({'status': status, 'msg': msg}) return
def __new__(cls, path, mime_type=None, read_anchor_map=True, run_char_count=True, from_epub=False, read_links=True): ppath = path.partition('#')[0] if not os.path.exists(path) and os.path.exists(ppath): path = ppath obj = super(SpineItem, cls).__new__(cls, path) with open(path, 'rb') as f: raw = f.read() if from_epub: # According to the spec, HTML in EPUB must be encoded in utf-8 or # utf-16. Furthermore, there exist epub files produced by the usual # incompetents that have utf-8 encoded HTML files that contain # incorrect encoding declarations. See # http://www.idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section1.4.1.2 # http://www.idpf.org/epub/30/spec/epub30-publications.html#confreq-xml-enc # https://bugs.launchpad.net/bugs/1188843 # So we first decode with utf-8 and only if that fails we try xml_to_unicode. This # is the same algorithm as that used by the conversion pipeline (modulo # some BOM based detection). Sigh. try: raw, obj.encoding = raw.decode('utf-8'), 'utf-8' except UnicodeDecodeError: raw, obj.encoding = xml_to_unicode(raw) else: raw, obj.encoding = xml_to_unicode(raw) obj.character_count = character_count(raw) if run_char_count else 10000 obj.anchor_map = anchor_map(raw) if read_anchor_map else {} obj.all_links = all_links(raw) if read_links else set() obj.verified_links = set() obj.start_page = -1 obj.pages = -1 obj.max_page = -1 obj.index_entries = [] if mime_type is None: mime_type = guess_type(obj)[0] obj.mime_type = mime_type obj.is_single_page = None return obj
def get_format(self, id, format): format = format.upper() fm = self.db.format_metadata(id, format, allow_cache=False) if not fm: raise web.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) mi = newmi = self.db.get_metadata(id, index_is_id=True) self.set_header( 'Last-Modified', self.last_modified(max(fm['mtime'], mi.last_modified)) ) fmt = self.db.format(id, format, index_is_id=True, as_file=True, mode='rb') if fmt is None: raise web.HTTPError(404, 'book: %d does not have format: %s'%(id, format)) mt = guess_type('dummy.'+format.lower())[0] if mt is None: mt = 'application/octet-stream' self.set_header( 'Content-Type', mt ) if format == 'EPUB': # Get the original metadata # Get any EPUB plugboards for the content server plugboards = self.db.prefs.get('plugboards', {}) cpb = find_plugboard(plugboard_content_server_value, 'epub', plugboards) if cpb: # Transform the metadata via the plugboard newmi = mi.deepcopy_metadata() newmi.template_to_attribute(mi, cpb) if format in ('MOBI', 'EPUB'): # Write the updated file set_metadata(fmt, newmi, format.lower()) fmt.seek(0) fmt.seek(0, 2) self.set_header( 'Content-Lenght', fmt.tell() ) fmt.seek(0) au = authors_to_string(newmi.authors if newmi.authors else [_('Unknown')]) title = newmi.title if newmi.title else _('Unknown') fname = u'%s - %s_%s.%s'%(title[:30], au[:30], id, format.lower()) fname = ascii_filename(fname).replace('"', '_') self.set_header( 'Content-Disposition', b'attachment; filename="%s"'%fname ) return fmt
def _parse_cover_data(root, imgid, mi, ctx): from calibre.ebooks.fb2 import base64_decode elm_binary = ctx.XPath('//fb:binary[@id="%s"]'%imgid)(root) if elm_binary: mimetype = elm_binary[0].get('content-type', 'image/jpeg') mime_extensions = guess_all_extensions(mimetype) if not mime_extensions and mimetype.startswith('image/'): mimetype_fromid = guess_type(imgid)[0] if mimetype_fromid and mimetype_fromid.startswith('image/'): mime_extensions = guess_all_extensions(mimetype_fromid) if mime_extensions: pic_data = elm_binary[0].text if pic_data: mi.cover_data = (mime_extensions[0][1:], base64_decode(pic_data.strip())) else: prints("WARNING: Unsupported coverpage mime-type '%s' (id=#%s)" % (mimetype, imgid))
def add_name_to_manifest(self, name, mt=None): item = self.manifest_item_for_name(name) if item is not None: return manifest = self.opf.xpath('//opf:manifest', namespaces={'opf': OPF_NS})[0] item = manifest.makeelement('{%s}item' % OPF_NS, nsmap={'opf': OPF_NS}, href=self.name_to_href( name, posixpath.dirname(self.opf_name)), id=self.generate_manifest_id()) if not mt: mt = guess_type(posixpath.basename(name))[0] if not mt: mt = 'application/octest-stream' item.set('media-type', mt) manifest.append(item) self.fix_tail(item)
def compose_mail(from_, to, text, subject=None, attachment=None, attachment_name=None): attachment_type = attachment_data = None if attachment is not None: try: from calibre import guess_type guess_type except ImportError: from mimetypes import guess_type attachment_data = attachment.read() if hasattr(attachment, 'read') \ else open(attachment, 'rb').read() attachment_type = guess_type(getattr(attachment, 'name', attachment))[0] if attachment_name is None: attachment_name = os.path.basename(getattr(attachment, 'name', attachment)) subject = subject if subject else 'no subject' return create_mail(from_, to, subject, text=text, attachment_data=attachment_data, attachment_type=attachment_type, attachment_name=attachment_name)
def register(): base = os.path.dirname(sys.executable) for program, data in default_programs().iteritems(): data = data.copy() exe = os.path.join(base, program) capabilities_path = cap_path(data) ext_map = { ext.lower(): guess_type('file.' + ext.lower())[0] for ext in extensions(program) } ext_map = {ext: mt for ext, mt in ext_map.iteritems() if mt} prog_id_map = { ext: progid_name(data['assoc_name'], ext) for ext in ext_map } with Key(capabilities_path) as key: for k, v in { 'ApplicationDescription': 'description', 'ApplicationName': 'name' }.iteritems(): key.set(k, data[v]) key.set('ApplicationIcon', '%s,0' % exe) key.set_default_value(r'shell\open\command', '"%s" "%%1"' % exe) with Key('FileAssociations', root=key) as fak, Key('MimeAssociations', root=key) as mak: # previous_associations = set(fak.itervalues()) for ext, prog_id in prog_id_map.iteritems(): mt = ext_map[ext] fak.set('.' + ext, prog_id) mak.set(mt, prog_id) for ext, prog_id in prog_id_map.iteritems(): create_prog_id(ext, prog_id, ext_map, exe) with Key(r'Software\RegisteredApplications') as key: key.set(data['name'], capabilities_path) from win32com.shell import shell, shellcon shell.SHChangeNotify(shellcon.SHCNE_ASSOCCHANGED, shellcon.SHCNF_DWORD | shellcon.SHCNF_FLUSH, 0, 0)
def register(): base = os.path.dirname(sys.executable) for program, data in iteritems(default_programs()): data = data.copy() exe = os.path.join(base, program) capabilities_path = cap_path(data) ext_map = { ext.lower(): guess_type('file.' + ext.lower())[0] for ext in extensions(program) } ext_map = {ext: mt for ext, mt in iteritems(ext_map) if mt} prog_id_map = { ext: progid_name(data['assoc_name'], ext) for ext in ext_map } with Key(capabilities_path) as key: for k, v in iteritems({ 'ApplicationDescription': 'description', 'ApplicationName': 'name' }): key.set(k, data[v]) key.set('ApplicationIcon', '%s,0' % exe) key.set_default_value(r'shell\open\command', '"%s" "%%1"' % exe) with Key('FileAssociations', root=key) as fak, Key('MimeAssociations', root=key) as mak: # previous_associations = set(fak.values()) for ext, prog_id in iteritems(prog_id_map): mt = ext_map[ext] fak.set('.' + ext, prog_id) mak.set(mt, prog_id) for ext, prog_id in iteritems(prog_id_map): create_prog_id(ext, prog_id, ext_map, exe) with Key(r'Software\RegisteredApplications') as key: key.set(data['name'], capabilities_path) winutil = plugins['winutil'][0] winutil.notify_associations_changed()
def add_name_to_manifest(self, name, mt=None): item = self.manifest_item_for_name(name) if item is not None: return self.log.debug("Adding '{0}' to the manifest".format(name)) manifest = self.opf.xpath('//opf:manifest', namespaces=self.namespaces)[0] item = manifest.makeelement('{%s}item' % self.namespaces['opf'], href=self.name_to_href( name, os.path.dirname(self.opf_name)), id=self.generate_manifest_id()) if not mt: mt = guess_type(os.path.basename(name))[0] if not mt: mt = 'application/octest-stream' item.set('media-type', mt) manifest.append(item) self.fix_tail(item) self.set(self.opf_name, self.opf) self.name_map[name] = os.path.join(self.root, name) self.mime_map[name] = mt
def add_formats_from_clipboard(self): ids = self._check_add_formats_ok() if not ids: return md = QApplication.instance().clipboard().mimeData() files_to_add = [] images = [] if md.hasUrls(): for url in md.urls(): if url.isLocalFile(): path = url.toLocalFile() if os.access(path, os.R_OK): mt = guess_type(path)[0] if mt and mt.startswith('image/'): images.append(path) else: files_to_add.append(path) if not files_to_add and not images: return error_dialog( self.gui, _('No files in clipboard'), _('No files have been copied to the clipboard'), show=True) if files_to_add: self._add_formats(files_to_add, ids) if images: if len(ids) > 1 and not question_dialog( self.gui, _('Are you sure?'), _('Are you sure you want to set the same' ' cover for all %d books?') % len(ids)): return with lopen(images[0], 'rb') as f: cdata = f.read() self.gui.current_db.new_api.set_cover( {book_id: cdata for book_id in ids}) self.gui.refresh_cover_browser() m = self.gui.library_view.model() current = self.gui.library_view.currentIndex() m.current_changed(current, current)
def __new__(cls, path, mime_type=None, read_anchor_map=True, run_char_count=True): ppath = path.partition('#')[0] if not os.path.exists(path) and os.path.exists(ppath): path = ppath obj = super(SpineItem, cls).__new__(cls, path) with open(path, 'rb') as f: raw = f.read() raw, obj.encoding = xml_to_unicode(raw) obj.character_count = character_count(raw) if run_char_count else 10000 obj.anchor_map = anchor_map(raw) if read_anchor_map else {} obj.start_page = -1 obj.pages = -1 obj.max_page = -1 obj.index_entries = [] if mime_type is None: mime_type = guess_type(obj)[0] obj.mime_type = mime_type return obj
def embed_font(self, style): ff = [unicode(f) for f in style.get('font-family', []) if unicode(f).lower() not in { 'serif', 'sansserif', 'sans-serif', 'fantasy', 'cursive', 'monospace'}] if not ff: return ff = ff[0] if ff in self.warned or ff == 'inherit': return try: fonts = font_scanner.fonts_for_family(ff) except NoFonts: self.log.warn('Failed to find fonts for family:', ff, 'not embedding') self.warned.add(ff) return try: weight = int(style.get('font-weight', '400')) except (ValueError, TypeError, AttributeError): w = style['font-weight'] if w not in self.warned2: self.log.warn('Invalid weight in font style: %r' % w) self.warned2.add(w) return for f in fonts: if f['weight'] == weight and f['font-style'] == style.get('font-style', 'normal') and f['font-stretch'] == style.get('font-stretch', 'normal'): self.log('Embedding font %s from %s' % (f['full_name'], f['path'])) data = font_scanner.get_font_data(f) name = f['full_name'] ext = 'otf' if f['is_otf'] else 'ttf' name = ascii_filename(name).replace(' ', '-').replace('(', '').replace(')', '') fid, href = self.oeb.manifest.generate(id=u'font', href=u'fonts/%s.%s'%(name, ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=data) item.unload_data_from_memory() page_sheet = self.get_page_sheet() href = page_sheet.relhref(item.href) css = '''@font-face { font-family: "%s"; font-weight: %s; font-style: %s; font-stretch: %s; src: url(%s) }''' % ( f['font-family'], f['font-weight'], f['font-style'], f['font-stretch'], href) sheet = self.parser.parseString(css, validate=False) page_sheet.data.insertRule(sheet.cssRules[0], len(page_sheet.data.cssRules)) return find_font_face_rules(sheet, self.oeb)[0]
def write(self, doc): toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace) raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub) if css: with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) for item in opf.manifest: if item.media_type == 'text/html': item.media_type = guess_type('a.xhtml')[0] opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) def process_guide(E, guide): if self.toc_anchor is not None: guide.append( E.reference(href='index.html#' + self.toc_anchor, title=_('Table of Contents'), type='toc')) toc_file = os.path.join(self.dest_dir, 'toc.ncx') with lopen(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx: opf.render(of, ncx, 'toc.ncx', process_guide=process_guide) if os.path.getsize(toc_file) == 0: os.remove(toc_file) return os.path.join(self.dest_dir, 'metadata.opf')
def set_cover(self, mi, prefer_metadata_cover): cdata, ext = '', 'jpg' if mi.cover and os.access(mi.cover, os.R_OK): cdata = open(mi.cover, 'rb').read() ext = mi.cover.rpartition('.')[-1].lower().strip() elif mi.cover_data and mi.cover_data[-1]: cdata = mi.cover_data[1] ext = mi.cover_data[0] if ext not in ('png', 'jpg', 'jpeg'): ext = 'jpg' id = old_cover = None if 'cover' in self.oeb.guide: old_cover = self.oeb.guide['cover'] if prefer_metadata_cover and old_cover is not None: cdata = '' if cdata: self.oeb.guide.remove('cover') self.oeb.guide.remove('titlepage') if old_cover is not None: if old_cover.href in self.oeb.manifest.hrefs: item = self.oeb.manifest.hrefs[old_cover.href] if not cdata: return item.id self.remove_old_cover(item) elif not cdata: id = self.oeb.manifest.generate(id='cover')[0] self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') return id if cdata: id, href = self.oeb.manifest.generate('cover', 'cover.' + ext) self.oeb.manifest.add(id, href, guess_type('cover.' + ext)[0], data=cdata) self.oeb.guide.add('cover', 'Cover', href) return id
def finalize_output(self, output, request, is_http1): none_match = parse_if_none_match( request.inheaders.get('If-None-Match', '')) if isinstance(output, ETaggedDynamicOutput): matched = '*' in none_match or (output.etag and output.etag in none_match) if matched: if self.method in ('GET', 'HEAD'): self.send_not_modified(output.etag) else: self.simple_response(httplib.PRECONDITION_FAILED) return opts = self.opts outheaders = request.outheaders stat_result = file_metadata(output) if stat_result is not None: output = filesystem_file_output(output, outheaders, stat_result) if 'Content-Type' not in outheaders: mt = guess_type(output.name)[0] if mt: if mt in { 'text/plain', 'text/html', 'application/javascript', 'text/css' }: mt += '; charset=UTF-8' outheaders['Content-Type'] = mt elif isinstance(output, (bytes, type(''))): output = dynamic_output(output, outheaders) elif hasattr(output, 'read'): output = ReadableOutput(output) elif isinstance(output, StaticOutput): output = ReadableOutput(ReadOnlyFileBuffer(output.data), etag=output.etag, content_length=output.content_length) elif isinstance(output, ETaggedDynamicOutput): output = dynamic_output(output(), outheaders, etag=output.etag) else: output = GeneratedOutput(output) ct = outheaders.get('Content-Type', '').partition(';')[0] compressible = (not ct or ct.startswith('text/') or ct.startswith('image/svg') or ct.partition(';')[0] in COMPRESSIBLE_TYPES) compressible = (compressible and request.status_code == httplib.OK and (opts.compress_min_size > -1 and output.content_length >= opts.compress_min_size) and acceptable_encoding( request.inheaders.get('Accept-Encoding', '')) and not is_http1) accept_ranges = (not compressible and output.accept_ranges is not None and request.status_code == httplib.OK and not is_http1) ranges = get_ranges( request.inheaders.get('Range'), output.content_length ) if output.accept_ranges and self.method in ('GET', 'HEAD') else None if_range = (request.inheaders.get('If-Range') or '').strip() if if_range and if_range != output.etag: ranges = None if ranges is not None and not ranges: return self.send_range_not_satisfiable(output.content_length) for header in ('Accept-Ranges', 'Content-Encoding', 'Transfer-Encoding', 'ETag', 'Content-Length'): outheaders.pop(header, all=True) matched = '*' in none_match or (output.etag and output.etag in none_match) if matched: if self.method in ('GET', 'HEAD'): self.send_not_modified(output.etag) else: self.simple_response(httplib.PRECONDITION_FAILED) return output.ranges = None if output.etag and self.method in ('GET', 'HEAD'): outheaders.set('ETag', output.etag, replace_all=True) if accept_ranges: outheaders.set('Accept-Ranges', 'bytes', replace_all=True) if compressible and not ranges: outheaders.set('Content-Encoding', 'gzip', replace_all=True) if getattr(output, 'content_length', None): outheaders.set('Calibre-Uncompressed-Length', '%d' % output.content_length) output = GeneratedOutput(compress_readable_output(output.src_file), etag=output.etag) if output.content_length is not None and not compressible and not ranges: outheaders.set('Content-Length', '%d' % output.content_length, replace_all=True) if compressible or output.content_length is None: outheaders.set('Transfer-Encoding', 'chunked', replace_all=True) if ranges: if len(ranges) == 1: r = ranges[0] outheaders.set('Content-Length', '%d' % r.size, replace_all=True) outheaders.set('Content-Range', 'bytes %d-%d/%d' % (r.start, r.stop, output.content_length), replace_all=True) output.ranges = r else: range_parts = get_range_parts(ranges, outheaders.get('Content-Type'), output.content_length) size = sum(map(len, range_parts)) + sum(r.size + 4 for r in ranges) outheaders.set('Content-Length', '%d' % size, replace_all=True) outheaders.set('Content-Type', 'multipart/byteranges; boundary=' + MULTIPART_SEPARATOR, replace_all=True) output.ranges = zip_longest(ranges, range_parts) request.status_code = httplib.PARTIAL_CONTENT return output
def _manifest_add_missing(self, invalid): import cssutils manifest = self.oeb.manifest known = set(manifest.hrefs) unchecked = set(manifest.values()) cdoc = OEB_DOCS|OEB_STYLES invalid = set() while unchecked: new = set() for item in unchecked: data = None if (item.media_type in cdoc or item.media_type[-4:] in ('/xml', '+xml')): try: data = item.data except: self.oeb.log.exception(u'Failed to read from manifest ' u'entry with id: %s, ignoring'%item.id) invalid.add(item) continue if data is None: continue if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')): hrefs = [r[2] for r in iterlinks(data)] for href in hrefs: if isinstance(href, bytes): href = href.decode('utf-8') href, _ = urldefrag(href) if not href: continue try: href = item.abshref(urlnormalize(href)) scheme = urlparse(href).scheme except: self.oeb.log.exception( 'Skipping invalid href: %r'%href) continue if not scheme and href not in known: new.add(href) elif item.media_type in OEB_STYLES: try: urls = list(cssutils.getUrls(data)) except: urls = [] for url in urls: href, _ = urldefrag(url) href = item.abshref(urlnormalize(href)) scheme = urlparse(href).scheme if not scheme and href not in known: new.add(href) unchecked.clear() warned = set([]) for href in new: known.add(href) is_invalid = False for item in invalid: if href == item.abshref(urlnormalize(href)): is_invalid = True break if is_invalid: continue if not self.oeb.container.exists(href): if href not in warned: self.logger.warn('Referenced file %r not found' % href) warned.add(href) continue if href not in warned: self.logger.warn('Referenced file %r not in manifest' % href) warned.add(href) id, _ = manifest.generate(id='added') guessed = guess_type(href)[0] media_type = guessed or BINARY_MIME added = manifest.add(id, href, media_type) unchecked.add(added) for item in invalid: self.oeb.manifest.remove(item)
def update_text_record(self, record, book, path, bl_index, gtz_count, ltz_count, use_tz_var): ''' Update the Sony database from the book. This is done if the timestamp in the db differs from the timestamp on the file. ''' # It seems that a Sony device can sometimes know what timezone it is in, # and apparently converts the dates to GMT when it writes them to its # DB. We can detect that a device is timezone-aware because there is a # 'tz' variable in the Sony DB, which we can set to "0" to tell the # device to ignore its own timezone when comparing mtime to the date in # the DB. # Unfortunately, if there is no tz variable in the DB, then we can't # tell when the device applies a timezone conversion. We use a horrible # heuristic to work around this problem. First, set dates only for new # books, trying to avoid upsetting the sony. Second, voting: if a book # is not new, compare its Sony DB date against localtime and gmtime. # Count the matches. When we must set a date, use the one with the most # matches. Use localtime if the case of a tie, and hope it is right. try: timestamp = os.path.getmtime(path) except: debug_print('Failed to get timestamp for:', path) timestamp = time.time() rec_date = record.get('date', None) def clean(x): if isbytestring(x): x = x.decode(preferred_encoding, 'replace') x.replace(u'\0', '') return x def record_set(k, v): try: record.set(k, clean(v)) except: # v is not suitable for XML, ignore pass if not getattr(book, '_new_book', False): # book is not new if record.get('tz', None) is not None: use_tz_var = True if strftime(timestamp, zone=time.gmtime) == rec_date: gtz_count += 1 elif strftime(timestamp, zone=time.localtime) == rec_date: ltz_count += 1 else: # book is new. Set the time using the current votes if use_tz_var: tz = time.localtime record.set('tz', '0') debug_print("Use localtime TZ and tz='0' for new book", book.lpath) elif ltz_count >= gtz_count: tz = time.localtime debug_print("Use localtime TZ for new book", book.lpath) else: tz = time.gmtime debug_print("Use GMT TZ for new book", book.lpath) date = strftime(timestamp, zone=tz) record.set('date', clean(date)) try: record.set('size', clean(str(os.stat(path).st_size))) except: record.set('size', '0') title = book.title if book.title else _('Unknown') record_set('title', title) ts = book.title_sort if not ts: ts = title_sort(title) record_set('titleSorter', ts) if self.use_author_sort: if book.author_sort: aus = book.author_sort else: debug_print('Author_sort is None for book', book.lpath) aus = authors_to_sort_string(book.authors) record_set('author', aus) else: record_set('author', authors_to_string(book.authors)) ext = os.path.splitext(path)[1] if ext: ext = ext[1:].lower() mime = MIME_MAP.get(ext, None) if mime is None: mime = guess_type('a.'+ext)[0] if mime is not None: record.set('mime', clean(mime)) if 'sourceid' not in record.attrib: record.set('sourceid', '1') if 'id' not in record.attrib: num = self.max_id(record.getroottree().getroot()) record.set('id', str(num+1)) return (gtz_count, ltz_count, use_tz_var)
def test_http_response(self): # {{{ 'Test HTTP protocol responses' from calibre.srv.http_response import parse_multipart_byterange def handler(conn): return conn.generate_static_output('test', lambda: ''.join(conn.path)) with NamedTemporaryFile(suffix='test.epub') as f, open(P('localization/locales.zip'), 'rb') as lf, \ TestServer(handler, timeout=1, compress_min_size=0) as server: fdata = (string.ascii_letters * 100).encode('ascii') f.write(fdata), f.seek(0) # Test ETag conn = server.connect() conn.request('GET', '/an_etagged_path') r = conn.getresponse() self.ae(r.status, http_client.OK), self.ae(r.read(), b'an_etagged_path') etag = r.getheader('ETag') self.ae(etag, '"%s"' % hashlib.sha1(b'an_etagged_path').hexdigest()) conn.request('GET', '/an_etagged_path', headers={'If-None-Match': etag}) r = conn.getresponse() self.ae(r.status, http_client.NOT_MODIFIED) self.ae(r.read(), b'') # Test gzip raw = b'a' * 20000 server.change_handler(lambda conn: raw) conn = server.connect() conn.request('GET', '/an_etagged_path', headers={'Accept-Encoding': 'gzip'}) r = conn.getresponse() self.ae(unicode_type(len(raw)), r.getheader('Calibre-Uncompressed-Length')) self.ae(r.status, http_client.OK), self.ae( zlib.decompress(r.read(), 16 + zlib.MAX_WBITS), raw) # Test dynamic etagged content num_calls = [0] def edfunc(): num_calls[0] += 1 return b'data' server.change_handler( lambda conn: conn.etagged_dynamic_response("xxx", edfunc)) conn = server.connect() conn.request('GET', '/an_etagged_path') r = conn.getresponse() self.ae(r.status, http_client.OK), self.ae(r.read(), b'data') etag = r.getheader('ETag') self.ae(etag, '"xxx"') self.ae(r.getheader('Content-Length'), '4') conn.request('GET', '/an_etagged_path', headers={'If-None-Match': etag}) r = conn.getresponse() self.ae(r.status, http_client.NOT_MODIFIED) self.ae(r.read(), b'') self.ae(num_calls[0], 1) # Test getting a filesystem file for use_sendfile in (True, False): server.change_handler(lambda conn: f) server.loop.opts.use_sendfile = use_sendfile conn = server.connect() conn.request('GET', '/test') r = conn.getresponse() etag = unicode_type(r.getheader('ETag')) self.assertTrue(etag) self.ae(r.getheader('Content-Type'), guess_type(f.name)[0]) self.ae(unicode_type(r.getheader('Accept-Ranges')), 'bytes') self.ae(int(r.getheader('Content-Length')), len(fdata)) self.ae(r.status, http_client.OK), self.ae(r.read(), fdata) conn.request('GET', '/test', headers={'Range': 'bytes=2-25'}) r = conn.getresponse() self.ae(r.status, http_client.PARTIAL_CONTENT) self.ae(unicode_type(r.getheader('Accept-Ranges')), 'bytes') self.ae(unicode_type(r.getheader('Content-Range')), 'bytes 2-25/%d' % len(fdata)) self.ae(int(r.getheader('Content-Length')), 24) self.ae(r.read(), fdata[2:26]) conn.request('GET', '/test', headers={'Range': 'bytes=100000-'}) r = conn.getresponse() self.ae(r.status, http_client.REQUESTED_RANGE_NOT_SATISFIABLE) self.ae(unicode_type(r.getheader('Content-Range')), 'bytes */%d' % len(fdata)) conn.request('GET', '/test', headers={ 'Range': 'bytes=25-50', 'If-Range': etag }) r = conn.getresponse() self.ae(r.status, http_client.PARTIAL_CONTENT), self.ae( r.read(), fdata[25:51]) self.ae(int(r.getheader('Content-Length')), 26) conn.request('GET', '/test', headers={'Range': 'bytes=0-1000000'}) r = conn.getresponse() self.ae(r.status, http_client.PARTIAL_CONTENT), self.ae(r.read(), fdata) conn.request('GET', '/test', headers={ 'Range': 'bytes=25-50', 'If-Range': '"nomatch"' }) r = conn.getresponse() self.ae(r.status, http_client.OK), self.ae(r.read(), fdata) self.assertFalse(r.getheader('Content-Range')) self.ae(int(r.getheader('Content-Length')), len(fdata)) conn.request('GET', '/test', headers={'Range': 'bytes=0-25,26-50'}) r = conn.getresponse() self.ae(r.status, http_client.PARTIAL_CONTENT) clen = int(r.getheader('Content-Length')) data = r.read() self.ae(clen, len(data)) buf = BytesIO(data) self.ae( parse_multipart_byterange(buf, r.getheader('Content-Type')), [(0, fdata[:26]), (26, fdata[26:51])]) # Test sending of larger file start_time = monotonic() lf.seek(0) data = lf.read() server.change_handler(lambda conn: lf) conn = server.connect(timeout=1) conn.request('GET', '/test') r = conn.getresponse() self.ae(r.status, http_client.OK) rdata = r.read() self.ae(len(data), len(rdata)) self.ae( hashlib.sha1(data).hexdigest(), hashlib.sha1(rdata).hexdigest()) self.ae(data, rdata) time_taken = monotonic() - start_time self.assertLess(time_taken, 1, 'Large file transfer took too long')
def __enter__(self, processed=False, only_input_plugin=False, run_char_count=True, read_anchor_map=True, extract_embedded_fonts_for_qt=False): ''' Convert an ebook file into an exploded OEB book suitable for display in viewers/preprocessing etc. ''' from calibre.ebooks.conversion.plumber import Plumber, create_oebbook self.delete_on_exit = [] self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() plumber = Plumber(self.pathtoebook, self.base, self.log) plumber.setup_options() if self.pathtoebook.lower().endswith('.opf'): plumber.opts.dont_package = True if hasattr(plumber.opts, 'no_process'): plumber.opts.no_process = True plumber.input_plugin.for_viewer = True with plumber.input_plugin, open(plumber.input, 'rb') as inf: self.pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, self.log, {}, self.base) if not only_input_plugin: # Run the HTML preprocess/parsing from the conversion pipeline as # well if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and not hasattr(self.pathtoopf, 'manifest')): if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts) if hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = write_oebbook(self.pathtoopf, self.base) self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper() if getattr(plumber.input_plugin, 'is_kf8', False): self.book_format = 'KF8' self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None) if self.opf is None: self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) self.language = self.opf.language if self.language: self.language = self.language.lower() ordered = [i for i in self.opf.spine if i.is_linear] + \ [i for i in self.opf.spine if not i.is_linear] self.spine = [] Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, run_char_count=run_char_count) is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'} for i in ordered: spath = i.path mt = None if i.idref is not None: mt = self.opf.manifest.type_for_id(i.idref) if mt is None: mt = guess_type(spath)[0] try: self.spine.append(Spiny(spath, mime_type=mt)) if is_comic: self.spine[-1].is_single_page = True except: self.log.warn('Missing spine item:', repr(spath)) cover = self.opf.cover if cover and self.ebook_ext in { 'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3' }: cfile = os.path.join(self.base, 'calibre_iterator_cover.html') rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/') chtml = (TITLEPAGE % prepare_string_for_xml(rcpath, True)).encode('utf-8') with open(cfile, 'wb') as f: f.write(chtml) self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')] self.delete_on_exit.append(cfile) if self.opf.path_to_html_toc is not None and \ self.opf.path_to_html_toc not in self.spine: try: self.spine.append(Spiny(self.opf.path_to_html_toc)) except: import traceback traceback.print_exc() sizes = [i.character_count for i in self.spine] self.pages = [ math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes ] for p, s in zip(self.pages, self.spine): s.pages = p start = 1 for s in self.spine: s.start_page = start start += s.pages s.max_page = s.start_page + s.pages - 1 self.toc = self.opf.toc if read_anchor_map: create_indexing_data(self.spine, self.toc) self.read_bookmarks() if extract_embedded_fonts_for_qt: from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts try: extract_fonts(self.opf, self.log) except: ol = self.log.filter_level self.log.filter_level = self.log.DEBUG self.log.exception('Failed to extract fonts') self.log.filter_level = ol return self
def ACQUISITION_ENTRY(book_id, updated, request_context): field_metadata = request_context.db.field_metadata mi = request_context.db.get_metadata(book_id) extra = [] if (mi.rating or 0) > 0: rating = rating_to_stars(mi.rating) extra.append(_('RATING: %s<br />') % rating) if mi.tags: extra.append( _('TAGS: %s<br />') % xml(format_tag_string(mi.tags, None))) if mi.series: extra.append( _('SERIES: %(series)s [%(sidx)s]<br />') % dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index)))) for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()): name, val = mi.format_field(key) if val: fm = field_metadata[key] datatype = fm['datatype'] if datatype == 'text' and fm['is_multiple']: extra.append( '%s: %s<br />' % (xml(name), xml( format_tag_string( val, fm['is_multiple']['ui_to_list'], joinval=fm['is_multiple']['list_to_ui'])))) elif datatype == 'comments' or (fm['datatype'] == 'composite' and fm['display'].get( 'contains_html', False)): extra.append('%s: %s<br />' % (xml(name), comments_to_html(unicode_type(val)))) else: extra.append('%s: %s<br />' % (xml(name), xml(unicode_type(val)))) if mi.comments: comments = comments_to_html(mi.comments) extra.append(comments) if extra: extra = html_to_lxml('\n'.join(extra)) ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified), E.published(mi.timestamp.isoformat())) if mi.pubdate and not is_date_undefined(mi.pubdate): ans.append(ans.makeelement('{%s}date' % DC_NS)) ans[-1].text = mi.pubdate.isoformat() if len(extra): ans.append(E.content(extra, type='xhtml')) get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id) if mi.formats: fm = mi.format_metadata for fmt in mi.formats: fmt = fmt.lower() mt = guess_type('a.' + fmt)[0] if mt: link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition") ffm = fm.get(fmt.upper()) if ffm: link.set('length', unicode_type(ffm['size'])) link.set('mtime', ffm['mtime'].isoformat()) ans.append(link) ans.append( E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover")) ans.append( E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail")) ans.append( E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/image")) ans.append( E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/image/thumbnail")) return ans
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.fb2 import ensure_namespace from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.chardet import xml_to_unicode self.log = log log.debug('Parsing XML...') raw = stream.read().replace('\0', '') raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser=RECOVER_PARSER) if doc is None: raise Exception('parse failed') except: doc = etree.fromstring(raw.replace('& ', '&'), parser=RECOVER_PARSER) if doc is None: raise ValueError('The FB2 file is not valid XML') doc = ensure_namespace(doc) try: fb_ns = doc.nsmap[doc.prefix] except Exception: fb_ns = FB2NS NAMESPACES = {'f': fb_ns, 'l': XLINK_NS} stylesheets = doc.xpath( '//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: css += etree.tostring( s, encoding=str, method='text', with_tail=False) + '\n\n' if css: import cssutils, logging parser = cssutils.CSSParser(fetcher=None, log=logging.getLogger('calibre.css')) XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS text = XHTML_CSS_NAMESPACE + css log.debug('Parsing stylesheet...') stylesheet = parser.parseString(text) stylesheet.namespaces['h'] = XHTML_NS css = str(stylesheet.cssText).replace('h|style', 'h|span') css = re.sub(r'name\s*=\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') ss = open(P('templates/fb2.xsl'), 'rb').read() ss = ss.replace("__FB_NS__", fb_ns) if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->', re.DOTALL).sub('', ss) styledoc = etree.fromstring(ss) transform = etree.XSLT(styledoc) result = transform(doc) # Handle links of type note and cite notes = { a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#') } cites = { a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '') } all_ids = {x for x in result.xpath('//*/@id')} for cite, a in cites.items(): note = notes.get(cite, None) if note: c = 1 while 'cite%d' % c in all_ids: c += 1 if not note.get('id', None): note.set('id', 'cite%d' % c) all_ids.add(note.get('id')) a.set('href', '#%s' % note.get('id')) for x in result.xpath('//*[@link_note or @link_cite]'): x.attrib.pop('link_note', None) x.attrib.pop('link_cite', None) for img in result.xpath('//img[@src]'): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) open('index.xhtml', 'wb').write(index) open('inline-styles.css', 'wb').write(css) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] cpath = None if mi.cover_data and mi.cover_data[1]: with open('fb2_cover_calibre_mi.jpg', 'wb') as f: f.write(mi.cover_data[1]) cpath = os.path.abspath('fb2_cover_calibre_mi.jpg') else: for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES): href = img.get('{%s}href' % XLINK_NS, img.get('href', None)) if href is not None: if href.startswith('#'): href = href[1:] cpath = os.path.abspath(href) break opf = OPFCreator(os.getcwd(), mi) entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir('.')] opf.create_manifest(entries) opf.create_spine(['index.xhtml']) if cpath: opf.guide.set_cover(cpath) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.join(os.getcwd(), 'metadata.opf')
def setup_desktop_integration(self): # {{{ try: self.info('Setting up desktop integration...') env = os.environ.copy() cc = check_call if getattr(sys, 'frozen_path', False) and 'LD_LIBRARY_PATH' in env: paths = env.get('LD_LIBRARY_PATH', '').split(os.pathsep) paths = [x for x in paths if x] npaths = [x for x in paths if x != sys.frozen_path + '/lib'] env['LD_LIBRARY_PATH'] = os.pathsep.join(npaths) cc = partial(check_call, env=env) with TemporaryDirectory() as tdir, CurrentDir( tdir), PreserveMIMEDefaults(): def install_single_icon(iconsrc, basename, size, context, is_last_icon=False): filename = '%s-%s.png' % (basename, size) render_img(iconsrc, filename, width=int(size), height=int(size)) cmd = [ 'xdg-icon-resource', 'install', '--noupdate', '--context', context, '--size', str(size), filename, basename ] if is_last_icon: del cmd[2] cc(cmd) self.icon_resources.append((context, basename, str(size))) def install_icons(iconsrc, basename, context, is_last_icon=False): sizes = (16, 32, 48, 64, 128, 256) for size in sizes: install_single_icon(iconsrc, basename, size, context, is_last_icon and size is sizes[-1]) icons = list( filter(None, [ x.strip() for x in '''\ mimetypes/lrf.png application-lrf mimetypes mimetypes/lrf.png text-lrs mimetypes mimetypes/mobi.png application-x-mobipocket-ebook mimetypes mimetypes/tpz.png application-x-topaz-ebook mimetypes mimetypes/azw2.png application-x-kindle-application mimetypes mimetypes/azw3.png application-x-mobi8-ebook mimetypes lt.png calibre-gui apps viewer.png calibre-viewer apps tweak.png calibre-ebook-edit apps '''.splitlines() ])) for line in icons: iconsrc, basename, context = line.split() install_icons(iconsrc, basename, context, is_last_icon=line is icons[-1]) mimetypes = set() for x in all_input_formats(): mt = guess_type('dummy.' + x)[0] if mt and 'chemical' not in mt and 'ctc-posml' not in mt: mimetypes.add(mt) mimetypes.discard('application/octet-stream') def write_mimetypes(f): polyglot_write(f)('MimeType=%s;\n' % ';'.join(mimetypes)) from calibre.ebooks.oeb.polish.main import SUPPORTED from calibre.ebooks.oeb.polish.import_book import IMPORTABLE with open('calibre-lrfviewer.desktop', 'wb') as f: polyglot_write(f)(VIEWER) with open('calibre-ebook-viewer.desktop', 'wb') as f: polyglot_write(f)(EVIEWER) write_mimetypes(f) with open('calibre-ebook-edit.desktop', 'wb') as f: polyglot_write(f)(ETWEAK) mt = { guess_type('a.' + x.lower())[0] for x in (SUPPORTED | IMPORTABLE) } - {None, 'application/octet-stream'} polyglot_write(f)('MimeType=%s;\n' % ';'.join(mt)) with open('calibre-gui.desktop', 'wb') as f: polyglot_write(f)(GUI) write_mimetypes(f) des = ('calibre-gui.desktop', 'calibre-lrfviewer.desktop', 'calibre-ebook-viewer.desktop', 'calibre-ebook-edit.desktop') appdata = os.path.join( os.path.dirname(self.opts.staging_sharedir), 'metainfo') if not os.path.exists(appdata): try: os.mkdir(appdata) except: self.warning( 'Failed to create %s not installing appdata files' % appdata) if os.path.exists(appdata) and not os.access(appdata, os.W_OK): self.warning( 'Do not have write permissions for %s not installing appdata files' % appdata) else: from calibre.utils.localization import get_all_translators translators = dict(get_all_translators()) APPDATA = get_appdata() for x in des: cmd = [ 'xdg-desktop-menu', 'install', '--noupdate', './' + x ] cc(' '.join(cmd), shell=True) self.menu_resources.append(x) ak = x.partition('.')[0] if ak in APPDATA and os.access(appdata, os.W_OK): self.appdata_resources.append( write_appdata(ak, APPDATA[ak], appdata, translators)) cc(['xdg-desktop-menu', 'forceupdate']) MIME = P('calibre-mimetypes.xml') self.mime_resources.append(MIME) cc(['xdg-mime', 'install', MIME]) except Exception: if self.opts.fatal_errors: raise self.task_failed('Setting up desktop integration failed')