def __call__(self, container): import uuid opf = container.opf uid = str(uuid.uuid4()) opf.set('unique-identifier', uid) m = container.opf_xpath('/opf:package/opf:metadata') if not m: m = [ container.opf.makeelement(OPF('metadata'), nsmap={'dc': DC11_NS}) ] container.insert_into_xml(container.opf, m[0], 0) m = m[0] dc = m.makeelement(DC('identifier'), id=uid, nsmap={'opf': OPF2_NS}) dc.set(OPF('scheme'), 'uuid') dc.text = uid container.insert_into_xml(m, dc) container.dirty(container.opf_name) return True
def opf_get_or_create(self, name): ans = self.opf_xpath('//opf:' + name) if ans: return ans[0] self.dirty(self.opf_name) package = self.opf_xpath('//opf:package')[0] item = package.makeelement(OPF(name)) item.tail = '\n' package.append(item) return item
def create_cover_page(self, input_fmt): templ = ''' <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head><style> html, body, img { height: 100vh; display: block; margin: 0; padding: 0; border-width: 0; } img { width: 100%%; height: 100%%; object-fit: contain; margin-left: auto; margin-right: auto; max-width: 100vw; max-height: 100vh; top: 50vh; transform: translateY(-50%%); position: relative; } body.cover-fill img { object-fit: fill; } </style></head><body><img src="%s"/></body></html> ''' def generic_cover(): if self.book_metadata is not None: from calibre.ebooks.covers import create_cover mi = self.book_metadata return create_cover(mi.title, mi.authors, mi.series, mi.series_index) return BLANK_JPEG if input_fmt == 'epub': def cover_path(action, data): if action == 'write_image': data.write(generic_cover()) raster_cover_name, titlepage_name = set_epub_cover( self, cover_path, (lambda *a: None), options={'template': templ}) else: raster_cover_name = find_cover_image(self, strict=True) if raster_cover_name is None: item = self.generate_item(name='cover.jpeg', id_prefix='cover') raster_cover_name = self.href_to_name(item.get('href'), self.opf_name) with self.open(raster_cover_name, 'wb') as dest: dest.write(generic_cover()) item = self.generate_item(name='titlepage.html', id_prefix='titlepage') titlepage_name = self.href_to_name(item.get('href'), self.opf_name) raw = templ % prepare_string_for_xml( self.name_to_href(raster_cover_name, titlepage_name), True) with self.open(titlepage_name, 'wb') as f: f.write(raw.encode('utf-8')) spine = self.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=item.get('id')) self.insert_into_xml(spine, ref, index=0) self.dirty(self.opf_name) return raster_cover_name, titlepage_name
def mark_as_cover_azw3(container, name): href = container.name_to_href(name, container.opf_name) found = False for item in container.opf_xpath('//opf:guide/opf:reference[@href and contains(@type, "cover")]'): item.set('href', href) found = True if not found: for guide in container.opf_xpath('//opf:guide'): container.insert_into_xml(guide, guide.makeelement( OPF('reference'), href=href, type='cover')) container.dirty(container.opf_name)
def set_series(root, prefixes, refines, series, series_index): for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series" or @name="calibre:series_index"]')(root): remove_element(meta, refines) for meta in XPath('./opf:metadata/opf:meta[@property="belongs-to-collection"]')(root): remove_element(meta, refines) m = XPath('./opf:metadata')(root)[0] if series: d = m.makeelement(OPF('meta'), attrib={'property':'belongs-to-collection'}) d.text = series m.append(d) set_refines(d, refines, refdef('collection-type', 'series'), refdef('group-position', '%.2g' % series_index))
def author(item, props, val): aus = None file_as = props.get('file-as') if file_as: aus = file_as[0][-1] else: aus = item.get(OPF('file-as')) or None seq = 0 ds = props.get('display-seq') with suppress(Exception): seq = int(ds[0][-1]) return Author(normalize_whitespace(val), normalize_whitespace(aus), seq)
def create_timestamp(root, prefixes, m, val): if not is_date_undefined(val): ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX) ensure_prefix(root, prefixes, 'dcterms') val = w3cdtf(val) d = m.makeelement(OPF('meta'), attrib={ 'property': 'calibre:timestamp', 'scheme': 'dcterms:W3CDTF' }) d.text = val m.append(d)
def convert_metadata(self, oeb): from lxml import etree from calibre.ebooks.oeb.base import OPF, OPF2_NS from calibre.ebooks.metadata.opf2 import OPF as ReadOPF from io import BytesIO package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS}) oeb.metadata.to_opf2(package) self.mi = ReadOPF(BytesIO(etree.tostring(package, encoding='utf-8')), populate_spine=False, try_to_guess_cover=False).to_book_metadata()
def set_refines(elem, existing_refines, *new_refines): eid = ensure_id(elem) remove_refines(elem, existing_refines) for ref in reversed(new_refines): prop, val, scheme = ref r = elem.makeelement(OPF('meta')) r.set('refines', '#' + eid), r.set('property', prop) r.text = val.strip() if scheme: r.set('scheme', scheme) p = elem.getparent() p.insert(p.index(elem) + 1, r)
def __call__(self, container): from calibre.ebooks.oeb.base import OPF rmap = {v: k for k, v in container.manifest_id_map.iteritems()} if self.name in rmap: manifest_id = rmap[self.name] else: manifest_id = container.add_name_to_manifest(self.name) spine = container.opf_xpath('//opf:spine')[0] si = spine.makeelement(OPF('itemref'), idref=manifest_id) container.insert_into_xml(spine, si) container.dirty(container.opf_name) return True
def set_authors(root, prefixes, refines, authors): ensure_prefix(root, prefixes, 'marc') for item in XPath('./opf:metadata/dc:creator')(root): props = properties_for_id_with_scheme(item.get('id'), prefixes, refines) opf_role = item.get(OPF('role')) if (is_relators_role(props, 'aut')) or (opf_role and opf_role.lower() != 'aut'): continue remove_element(item, refines) metadata = XPath('./opf:metadata')(root)[0] for author in authors: a = metadata.makeelement(DC('creator')) aid = ensure_id(a) a.text = author.name metadata.append(a) m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'}) m.text = 'aut' metadata.append(m) if author.sort: m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'file-as'}) m.text = author.sort metadata.append(m)
def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.gui2 import must_use_qt, load_builtin_fonts from calibre.ebooks.oeb.transforms.split import Split # Turn off hinting in WebKit (requires a patched build of QtWebKit) os.environ['CALIBRE_WEBKIT_NO_HINTING'] = '1' self.filtered_font_warnings = set() self.stored_page_margins = getattr(opts, '_stored_page_margins', {}) try: # split on page breaks, as the JS code to convert page breaks to # column breaks will not work because of QWebSettings.LocalContentCanAccessFileUrls Split()(oeb_book, opts) must_use_qt() load_builtin_fonts() self.oeb = oeb_book self.input_plugin, self.opts, self.log = input_plugin, opts, log self.output_path = output_path from calibre.ebooks.oeb.base import OPF, OPF2_NS from lxml import etree from io import BytesIO package = etree.Element(OPF('package'), attrib={ 'version': '2.0', 'unique-identifier': 'dummy' }, nsmap={None: OPF2_NS}) from calibre.ebooks.metadata.opf2 import OPF self.oeb.metadata.to_opf2(package) self.metadata = OPF(BytesIO( etree.tostring(package))).to_book_metadata() self.cover_data = None if input_plugin.is_image_collection: log.debug('Converting input as an image collection...') self.convert_images(input_plugin.get_images()) else: log.debug('Converting input as a text based book...') self.convert_text(oeb_book) finally: os.environ.pop('CALIBRE_WEBKIT_NO_HINTING', None)
def mark_as_cover_epub(container, name): mmap = {v: k for k, v in iteritems(container.manifest_id_map)} if name not in mmap: raise ValueError('Cannot mark %s as cover as it is not in manifest' % name) mid = mmap[name] ver = container.opf_version_parsed # Remove all entries from the opf that identify a raster image as cover for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'): container.remove_from_xml(meta) for ref in container.opf_xpath( '//opf:guide/opf:reference[@href and @type]'): if ref.get('type').lower() not in COVER_TYPES: continue rname = container.href_to_name(ref.get('href'), container.opf_name) mt = container.mime_map.get(rname, None) if is_raster_image(mt): container.remove_from_xml(ref) if ver.major < 3: # Add reference to image in <metadata> for metadata in container.opf_xpath('//opf:metadata'): m = metadata.makeelement(OPF('meta'), name='cover', content=mid) container.insert_into_xml(metadata, m) # If no entry for cover exists in guide, insert one that points to this # image if not container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): for guide in get_guides(container): container.insert_into_xml( guide, guide.makeelement(OPF('reference'), type='cover', href=container.name_to_href( name, container.opf_name))) else: container.apply_unique_properties(name, 'cover-image') container.dirty(container.opf_name)
def add_file(self, name, data, media_type=None): ''' Add a file to this container. Entries for the file are automatically created in the OPF manifest and spine (if the file is a text document) ''' if self.has_name(name): raise ValueError('A file with the name %s already exists' % name) if '..' in name: raise ValueError('Names are not allowed to have .. in them') href = self.name_to_href(name, self.opf_name) all_hrefs = {x.get('href') for x in self.opf_xpath('//opf:manifest/opf:item[@href]')} if href in all_hrefs: raise ValueError('An item with the href %s already exists in the manifest' % href) path = self.name_to_abspath(name) base = os.path.dirname(path) if not os.path.exists(base): os.makedirs(base) with open(path, 'wb') as f: f.write(data) mt = media_type or self.guess_type(name) self.name_path_map[name] = path self.mime_map[name] = mt if name in self.names_that_need_not_be_manifested: return all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')} c = 0 item_id = 'id' while item_id in all_ids: c += 1 item_id = 'id' + '%d'%c manifest = self.opf_xpath('//opf:manifest')[0] item = manifest.makeelement(OPF('item'), id=item_id, href=href) item.set('media-type', mt) self.insert_into_xml(manifest, item) self.dirty(self.opf_name) if mt in OEB_DOCS: spine = self.opf_xpath('//opf:spine')[0] si = manifest.makeelement(OPF('itemref'), idref=item_id) self.insert_into_xml(spine, si)
def writer(root, prefixes, refines, val): if remove2: for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root): remove_element(meta, refines) for meta in XPath('./opf:metadata/opf:meta[@property]')(root): prop = expand_prefix(meta.get('property'), prefixes) if prop.lower() == pq: remove_element(meta, refines) if val: ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX) m = XPath('./opf:metadata')(root)[0] d = m.makeelement(OPF('meta'), attrib={'property':'calibre:%s' % name}) d.text = serialize(val) m.append(d)
def read_book_producers(root, prefixes, refines): ans = [] for item in XPath('./opf:metadata/dc:contributor')(root): val = (item.text or '').strip() if val: props = properties_for_id_with_scheme(item.get('id'), prefixes, refines) role = props.get('role') opf_role = item.get(OPF('role')) if role: if is_relators_role(props, 'bkp'): ans.append(normalize_whitespace(val)) elif opf_role and opf_role.lower() == 'bkp': ans.append(normalize_whitespace(val)) return ans
def set_rating(root, prefixes, refines, val): pq = '%s:rating' % CALIBRE_PREFIX for meta in XPath('./opf:metadata/opf:meta[@name="calibre:rating"]')(root): remove_element(meta, refines) for meta in XPath('./opf:metadata/opf:meta[@property]')(root): prop = expand_prefix(meta.get('property'), prefixes) if prop.lower() == pq: remove_element(meta, refines) if val: ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX) m = XPath('./opf:metadata')(root)[0] d = m.makeelement(OPF('meta'), attrib={'property':'calibre:rating'}) d.text = '%.2g' % val m.append(d)
def set_timestamp(root, prefixes, refines, val): ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX) ensure_prefix(root, prefixes, 'dcterms') pq = '%s:timestamp' % CALIBRE_PREFIX for meta in XPath('./opf:metadata/opf:meta')(root): prop = expand_prefix(meta.get('property'), prefixes) if prop.lower() == pq or meta.get('name') == 'calibre:timestamp': remove_element(meta, refines) if not is_date_undefined(val): val = isoformat(val) m = XPath('./opf:metadata')(root)[0] d = m.makeelement(OPF('meta'), attrib={'property':'calibre:timestamp', 'scheme':'dcterms:W3CDTF'}) d.text = val m.append(d)
def set_book_producers(root, prefixes, refines, producers): for item in XPath('./opf:metadata/dc:contributor')(root): props = properties_for_id_with_scheme(item.get('id'), prefixes, refines) opf_role = item.get(OPF('role')) if (opf_role and opf_role.lower() != 'bkp') or ( props.get('role') and not is_relators_role(props, 'bkp')): continue remove_element(item, refines) metadata = XPath('./opf:metadata')(root)[0] for bkp in producers: a = metadata.makeelement(DC('contributor')) aid = ensure_id(a) a.text = bkp metadata.append(a) m = metadata.makeelement(OPF('meta'), attrib={ 'refines': '#' + aid, 'property': 'role', 'scheme': 'marc:relators' }) m.text = 'bkp' metadata.append(m)
def add_name_to_manifest(self, name): all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')} c = 0 item_id = 'id' while item_id in all_ids: c += 1 item_id = 'id' + '%d' % c manifest = self.opf_xpath('//opf:manifest')[0] href = self.name_to_href(name, self.opf_name) item = manifest.makeelement(OPF('item'), id=item_id, href=href) item.set('media-type', self.mime_map[name]) self.insert_into_xml(manifest, item) self.dirty(self.opf_name) return item_id
def convert(self, oeb_book, output_path, input_plugin, opts, log): self.stored_page_margins = getattr(opts, '_stored_page_margins', {}) self.oeb = oeb_book self.input_plugin, self.opts, self.log = input_plugin, opts, log self.output_path = output_path from calibre.ebooks.oeb.base import OPF, OPF2_NS from lxml import etree from io import BytesIO package = etree.Element(OPF('package'), attrib={'version': '2.0', 'unique-identifier': 'dummy'}, nsmap={None: OPF2_NS}) from calibre.ebooks.metadata.opf2 import OPF self.oeb.metadata.to_opf2(package) self.metadata = OPF(BytesIO(etree.tostring(package))).to_book_metadata() self.cover_data = None if input_plugin.is_image_collection: log.debug('Converting input as an image collection...') self.convert_images(input_plugin.get_images()) else: log.debug('Converting input as a text based book...') self.convert_text(oeb_book)
def set_last_modified(root, prefixes, refines, val=None): pq = '%s:modified' % reserved_prefixes['dcterms'] val = w3cdtf(val or utcnow()) for meta in XPath('./opf:metadata/opf:meta[@property]')(root): prop = expand_prefix(meta.get('property'), prefixes) if prop.lower() == pq: iid = meta.get('id') if not iid or not refines[iid]: break else: ensure_prefix(root, prefixes, 'dcterms') m = XPath('./opf:metadata')(root)[0] meta = m.makeelement(OPF('meta'), attrib={'property':'dcterms:modified', 'scheme':'dcterms:W3CDTF'}) m.append(meta) meta.text = val
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v: k for k, v in iteritems(container.manifest_id_map)} # Add unreferenced text files to the spine for name, mt in iteritems(container.mime_map): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml( spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Remove the <guide> which is not needed in EPUB 3 for guide in container.opf_xpath('//*[local-name()="guide"]'): guide.getparent().remove(guide) # Ensure that the cover-image property is set cover_id = rmap['_static/' + self.config.epub_cover[0]] for item in container.opf_xpath( '//opf:item[@id="{}"]'.format(cover_id)): item.set('properties', 'cover-image') for item in container.opf_xpath( '//opf:item[@href="epub-cover.xhtml"]'): item.set('properties', 'svg calibre:title-page') for item in container.opf_xpath('//opf:package'): prefix = item.get('prefix') or '' if prefix: prefix += ' ' item.set('prefix', prefix + 'calibre: https://calibre-ebook.com') # Remove any <meta cover> tag as it is not needed in epub 3 for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.getparent().remove(meta) # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)
def mark_as_titlepage(container, name, move_to_start=True): if move_to_start: for item, q, linear in container.spine_iter: if name == q: break if not linear: item.set('linear', 'yes') if item.getparent().index(item) > 0: container.insert_into_xml(item.getparent(), item, 0) for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): ref.getparent().remove(ref) for guide in get_guides(container): container.insert_into_xml(guide, guide.makeelement( OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) container.dirty(container.opf_name)
def read_authors(root, prefixes, refines): roled_authors, unroled_authors = [], [] editors_map = {} def author(item, props, val): aus = None file_as = props.get('file-as') if file_as: aus = file_as[0][-1] else: aus = item.get(OPF('file-as')) or None seq = 0 ds = props.get('display-seq') with suppress(Exception): seq = int(ds[0][-1]) return Author(normalize_whitespace(val), normalize_whitespace(aus), seq) for item in XPath('./opf:metadata/dc:creator')(root): val = (item.text or '').strip() if val: props = properties_for_id_with_scheme(item.get('id'), prefixes, refines) role = props.get('role') opf_role = item.get(OPF('role')) if role: if is_relators_role(props, 'aut'): roled_authors.append(author(item, props, val)) if is_relators_role(props, 'edt'): # See https://bugs.launchpad.net/calibre/+bug/1950579 a = author(item, props, val) editors_map[a.name] = a elif opf_role: if opf_role.lower() == 'aut': roled_authors.append(author(item, props, val)) else: unroled_authors.append(author(item, props, val)) if roled_authors or unroled_authors: ans = uniq(roled_authors or unroled_authors) else: ans = uniq(editors_map.values()) ans.sort(key=attrgetter('seq')) return ans
def set_azw3_cover(container, cover_path, report): name = None found = True for gi in container.opf_xpath('//opf:guide/opf:reference[@href and contains(@type, "cover")]'): href = gi.get('href') name = container.href_to_name(href, container.opf_name) container.remove_from_xml(gi) if name is None or not container.has_name(name): item = container.generate_item(name='cover.jpeg', id_prefix='cover') name = container.href_to_name(item.get('href'), container.opf_name) found = False href = container.name_to_href(name, container.opf_name) guide = container.opf_xpath('//opf:guide')[0] container.insert_into_xml(guide, guide.makeelement( OPF('reference'), href=href, type='cover')) with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest: shutil.copyfileobj(src, dest) container.dirty(container.opf_name) report('Cover updated' if found else 'Cover inserted')
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.process_fonts() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def writer(root, prefixes, refines, ival=None): uid = root.get('unique-identifier') package_identifier = None for ident in XPath('./opf:metadata/dc:identifier')(root): is_package_id = uid is not None and uid == ident.get('id') if is_package_id: package_identifier = ident val = (ident.text or '').strip() if (val.startswith(name + ':') or ident.get(OPF('scheme')) == name) and not is_package_id: remove_element(ident, refines) metadata = XPath('./opf:metadata')(root)[0] if ival: ident = metadata.makeelement(DC('identifier')) ident.text = '%s:%s' % (name, ival) if package_identifier is None: metadata.append(ident) else: p = package_identifier.getparent() p.insert(p.index(package_identifier), ident)
def generate_item(self, name, id_prefix=None, media_type=None): '''Add an item to the manifest with href derived from the given name. Ensures uniqueness of href and id automatically. Returns generated item.''' id_prefix = id_prefix or 'id' media_type = media_type or guess_type(name) href = self.name_to_href(name, self.opf_name) base, ext = href.rpartition('.')[0::2] all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')} c = 0 item_id = id_prefix while item_id in all_ids: c += 1 item_id = id_prefix + '%d' % c all_names = { x.get('href') for x in self.opf_xpath('//opf:manifest/opf:item[@href]') } def exists(h): return self.exists(self.href_to_name(h, self.opf_name)) c = 0 while href in all_names or exists(href): c += 1 href = '%s_%d.%s' % (base, c, ext) manifest = self.opf_xpath('//opf:manifest')[0] item = manifest.makeelement(OPF('item'), id=item_id, href=href) item.set('media-type', media_type) self.insert_into_xml(manifest, item) self.dirty(self.opf_name) name = self.href_to_name(href, self.opf_name) self.name_path_map[name] = path = self.name_to_abspath(name) self.mime_map[name] = media_type # Ensure that the file corresponding to the newly created item exists # otherwise cloned containers will fail when they try to get the number # of links to the file base = os.path.dirname(path) if not os.path.exists(base): os.makedirs(base) open(path, 'wb').close() return item
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.process_fonts() if self.opts.pdf_use_document_margins and self.stored_page_margins: import json for href, margins in iteritems(self.stored_page_margins): item = oeb_book.manifest.hrefs.get(href) if item is not None: root = item.data if hasattr(root, 'xpath') and margins: root.set('data-calibre-pdf-output-page-margins', json.dumps(margins)) # Remove javascript for item in self.oeb.spine: root = item.data if hasattr(root, 'xpath'): for script in root.xpath('//*[local-name()="script"]'): script.text = None script.attrib.clear() for elem in root.iter('*'): for attr in tuple(elem.attrib): if attr.startswith('on'): elem.set(attr, '') with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))