def make_bookizip(self, filename=None, use_cache=False): """Extract all chapters, images, and metadata, and zip it all up for conversion to epub. If cache is true, images that have been fetched on previous runs will be reused. """ self._fetch_metadata() if filename is None: filename = self.filepath(self.bookname) bz = BookiZip(filename, self.metadata) all_images = set() for chapter in self.metadata['spine']: contents = self.get_chapter_html(chapter, wrapped=True) c = TWikiChapter(self.server, self.book, chapter, contents, use_cache=use_cache) images = c.localise_links() c.fix_bad_structure() all_images.update(images) #log(chapter, self.credits) bz.add_to_package(chapter, chapter + '.html', c.as_html(), **self.credits.get(chapter, {})) # Add images afterwards, to sift out duplicates for image in all_images: imgdata = c.image_cache.read_local_url(image) bz.add_to_package(image, image, imgdata) #XXX img ownership: where is it? bz.finish() return bz.filename
def make_bookizip(self, zfn): """Split up the document and construct a booki-toc for it.""" doc = self.concat_document() bz = BookiZip(zfn) chapters = split_tree(doc) #destroys doc. real_chapters = drop_empty_chapters(chapters) rightsholders = [ c for c, extra in self.metadata[DC].get('creator', ()) ] contributors = rightsholders + [ c for c, extra in self.metadata[DC].get('contributor', ()) ] primary_id = self.metadata[DC].get('identifier', [[None]])[0][0] if primary_id is None: primary_id = "%s-%s" % (zfn, time.strftime('%Y.%m.%d-%H.%M.%S')) src_id = None else: src_id = self.source_id log(primary_id) spine = [] for c in real_chapters: try: root = c.tree.getroot() except Exception: root = c.tree for attr in ('xmlns', 'version', 'xml:lang'): if attr in root.attrib: del root.attrib[attr] if c.title: head = root.makeelement('head') _title = etree.SubElement(head, 'title') _title.text = c.title root.insert(0, head) blob = lxml.html.tostring(c.tree) bz.add_to_package(c.ID, '%s.html' % c.ID, blob, mediatype='text/html', contributors=contributors, rightsholders=rightsholders) spine.append(c.ID) #add the images and other non-html data unchanged. for id, data in self.manifest.iteritems(): fn, mimetype = data if isinstance(fn, unicode): log("Hateful unicode: %r" % fn) if mimetype not in MARKUP_TYPES: blob = self.zip.read(fn) bz.add_to_package(id, self.media_map[fn], blob, mimetype, contributors=contributors, rightsholders=rightsholders) #now to construct a table of contents lang = self.find_language() deferred_urls = [] def write_toc(point, section): tocpoint = {} title = find_good_label(point['labels'], lang), if title and title[0]: tocpoint['title'] = title[0] ID = point['id'] if ID in spine: tocpoint['url'] = self.manifest.get(ID, ID + '.html') while deferred_urls: tp = deferred_urls.pop() tp['url'] = tocpoint['url'] else: deferred_urls.append(tocpoint) if point['points']: tocpoint['children'] = [] for child in point['points']: write_toc(child, tocpoint['children']) section.append(tocpoint) toc = [] points = self.ncxdata['navmap']['points'] for p in points: write_toc(p, toc) metadata = { FM: { 'book': {}, 'server': {}, }, DC: {} } for namespace, keys in self.metadata.items(): if 'namespace' not in metadata: metadata[namespace] = {} log(keys) for key, values in keys.items(): dest = metadata[namespace].setdefault(key, {}) for value, extra in values: scheme = '' if extra: for x in ('scheme', 'role'): if x in extra: scheme = extra[x] break dest.setdefault(scheme, []).append(value) if not metadata[FM]['book']: metadata[FM]['book'][''] = [ ''.join(x for x in primary_id if x.isalnum()) ] if not metadata[FM]['server']: metadata[FM]['server'][''] = [config.DEFAULT_BOOKI_SERVER] if src_id is not None: #duplicate the main ID as a branded ID for the epub provided #(as dictated by espri.cgi) ids = metadata[DC]['identifier'] if ids.get(src_id) is None: ids[src_id] = [primary_id] log(metadata) bz.info = { 'spine': spine, 'TOC': toc, 'metadata': metadata, 'version': '1', } bz.finish()
def make_bookizip(self, zfn): """Split up the document and construct a booki-toc for it.""" doc = self.concat_document() bz = BookiZip(zfn) chapters = split_tree(doc) # destroys doc. real_chapters = drop_empty_chapters(chapters) rightsholders = [c for c, extra in self.metadata[DC].get("creator", ())] contributors = rightsholders + [c for c, extra in self.metadata[DC].get("contributor", ())] primary_id = self.metadata[DC].get("identifier", [[None]])[0][0] if primary_id is None: primary_id = "%s-%s" % (zfn, time.strftime("%Y.%m.%d-%H.%M.%S")) src_id = None else: src_id = self.source_id log(primary_id) spine = [] for c in real_chapters: try: root = c.tree.getroot() except Exception: root = c.tree for attr in ("xmlns", "version", "xml:lang"): if attr in root.attrib: del root.attrib[attr] if c.title: head = root.makeelement("head") _title = etree.SubElement(head, "title") _title.text = c.title root.insert(0, head) blob = lxml.html.tostring(c.tree) bz.add_to_package( c.ID, "%s.html" % c.ID, blob, mediatype="text/html", contributors=contributors, rightsholders=rightsholders, ) spine.append(c.ID) # add the images and other non-html data unchanged. for id, data in self.manifest.iteritems(): fn, mimetype = data if isinstance(fn, unicode): log("Hateful unicode: %r" % fn) if mimetype not in MARKUP_TYPES: blob = self.zip.read(fn) bz.add_to_package( id, self.media_map[fn], blob, mimetype, contributors=contributors, rightsholders=rightsholders ) # now to construct a table of contents lang = self.find_language() deferred_urls = [] def write_toc(point, section): tocpoint = {} title = (find_good_label(point["labels"], lang),) if title and title[0]: tocpoint["title"] = title[0] ID = point["id"] if ID in spine: tocpoint["url"] = self.manifest.get(ID, ID + ".html") while deferred_urls: tp = deferred_urls.pop() tp["url"] = tocpoint["url"] else: deferred_urls.append(tocpoint) if point["points"]: tocpoint["children"] = [] for child in point["points"]: write_toc(child, tocpoint["children"]) section.append(tocpoint) toc = [] points = self.ncxdata["navmap"]["points"] for p in points: write_toc(p, toc) metadata = {FM: {"book": {}, "server": {}}, DC: {}} for namespace, keys in self.metadata.items(): if "namespace" not in metadata: metadata[namespace] = {} log(keys) for key, values in keys.items(): dest = metadata[namespace].setdefault(key, {}) for value, extra in values: scheme = "" if extra: for x in ("scheme", "role"): if x in extra: scheme = extra[x] break dest.setdefault(scheme, []).append(value) if not metadata[FM]["book"]: metadata[FM]["book"][""] = ["".join(x for x in primary_id if x.isalnum())] if not metadata[FM]["server"]: metadata[FM]["server"][""] = [config.DEFAULT_BOOKI_SERVER] if src_id is not None: # duplicate the main ID as a branded ID for the epub provided # (as dictated by espri.cgi) ids = metadata[DC]["identifier"] if ids.get(src_id) is None: ids[src_id] = [primary_id] log(metadata) bz.info = {"spine": spine, "TOC": toc, "metadata": metadata, "version": "1"} bz.finish()
def make_bookizip(self, zfn): """Split up the document and construct a booki-toc for it.""" doc = self.concat_document() bz = BookiZip(zfn) chapters = split_tree(doc) #destroys doc. real_chapters = drop_empty_chapters(chapters) rightsholders = [c for c, extra in self.metadata[DC].get('creator', ())] contributors = rightsholders + [c for c, extra in self.metadata[DC].get('contributor', ())] primary_id = self.metadata[DC].get('identifier', [[None]])[0][0] if primary_id is None: primary_id = "%s-%s" % (zfn, time.strftime('%Y.%m.%d-%H.%M.%S')) src_id = None else: src_id = self.source_id log(primary_id) spine = [] for c in real_chapters: try: root = c.tree.getroot() except Exception: root = c.tree for attr in ('xmlns', 'version', 'xml:lang'): if attr in root.attrib: del root.attrib[attr] if c.title: head = root.makeelement('head') _title = etree.SubElement(head, 'title') _title.text = c.title root.insert(0, head) blob = lxml.html.tostring(c.tree) bz.add_to_package(c.ID, '%s.html' % c.ID, blob, mediatype='text/html', contributors=contributors, rightsholders=rightsholders) spine.append(c.ID) #add the images and other non-html data unchanged. for id, data in self.manifest.iteritems(): fn, mimetype = data if isinstance(fn, unicode): log("Hateful unicode: %r" % fn) if mimetype not in MARKUP_TYPES: blob = self.zip.read(fn) bz.add_to_package(id, self.media_map[fn], blob, mimetype, contributors=contributors, rightsholders=rightsholders ) #now to construct a table of contents lang = self.find_language() deferred_urls = [] def write_toc(point, section): tocpoint = {} title = find_good_label(point['labels'], lang), if title and title[0]: tocpoint['title'] = title[0] ID = point['id'] if ID in spine: tocpoint['url'] = self.manifest.get(ID, ID + '.html') while deferred_urls: tp = deferred_urls.pop() tp['url'] = tocpoint['url'] log('%r has deferred url: %r' % (tp['title'], tp['url'])) else: deferred_urls.append(tocpoint) if point['points']: tocpoint['children'] = [] for child in point['points']: write_toc(child, tocpoint['children']) section.append(tocpoint) toc = [] points = self.ncxdata['navmap']['points'] for p in points: write_toc(p, toc) metadata = {FM: {'book':{}, 'server': {}, }, DC: {}} for namespace, keys in self.metadata.items(): if 'namespace' not in metadata: metadata[namespace] = {} log(keys) for key, values in keys.items(): dest = metadata[namespace].setdefault(key, {}) for value, extra in values: scheme = '' if extra: for x in ('scheme', 'role'): if x in extra: scheme = extra[x] break dest.setdefault(scheme, []).append(value) if not metadata[FM]['book']: metadata[FM]['book'][''] = [''.join(x for x in primary_id if x.isalnum())] if not metadata[FM]['server']: metadata[FM]['server'][''] = [config.DEFAULT_BOOKI_SERVER] if src_id is not None: #duplicate the main ID as a branded ID for the epub provided #(as dictated by espri.cgi) ids = metadata[DC]['identifier'] if ids.get(src_id) is None: ids[src_id] = [primary_id] log(metadata) bz.info = { 'spine': spine, 'TOC': toc, 'metadata': metadata, 'version': '1', } bz.finish()