def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # sjekk at dette er en EPUB if not epub.isepub(): return False if not epub.identifier(): self.utils.report.error( self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.") return False if epub.identifier() != self.book["name"].split(".")[0]: self.utils.report.error( self.book["name"] + ": Filnavn stemmer ikke overens med dc:identifier: {}".format( epub.identifier())) return False temp_xml_file_obj = tempfile.NamedTemporaryFile() temp_xml_file = temp_xml_file_obj.name self.utils.report.info("Lager en kopi av EPUBen") temp_epubdir_withimages_obj = tempfile.TemporaryDirectory() temp_epubdir_withimages = temp_epubdir_withimages_obj.name Filesystem.copy(self.utils.report, self.book["source"], temp_epubdir_withimages) self.utils.report.info("Lager en kopi av EPUBen med tomme bildefiler") temp_epubdir_obj = tempfile.TemporaryDirectory() temp_epubdir = temp_epubdir_obj.name Filesystem.copy(self.utils.report, temp_epubdir_withimages, temp_epubdir) for root, dirs, files in os.walk( os.path.join(temp_epubdir, "EPUB", "images")): for file in files: fullpath = os.path.join(root, file) os.remove(fullpath) Path(fullpath).touch() temp_epub = Epub(self.utils.report, temp_epubdir) self.utils.report.info("Rydder opp i nordisk EPUB nav.xhtml") nav_path = os.path.join(temp_epubdir, temp_epub.nav_path()) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "nordic-cleanup-nav.xsl"), source=nav_path, target=temp_xml_file, parameters={ "cover": " ".join([item["href"] for item in temp_epub.spine()]), "base": os.path.dirname( os.path.join(temp_epubdir, temp_epub.opf_path())) + "/" }) if not xslt.success: return False shutil.copy(temp_xml_file, nav_path) self.utils.report.info("Rydder opp i nordisk EPUB package.opf") opf_path = os.path.join(temp_epubdir, temp_epub.opf_path()) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "nordic-cleanup-opf.xsl"), source=opf_path, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, opf_path) html_dir_obj = tempfile.TemporaryDirectory() html_dir = html_dir_obj.name html_file = os.path.join(html_dir, epub.identifier() + ".xhtml") self.utils.report.info("Finner ut hvilket bibliotek boka tilhører…") edition_metadata = Metadata.get_edition_from_api( epub.identifier(), report=self.utils.report) library = None if edition_metadata is not None and edition_metadata[ "library"] is not None: library = edition_metadata["library"] else: library = Metadata.get_library_from_identifier( epub.identifier(), self.utils.report) self.utils.report.info(f"Boka tilhører '{library}'") self.utils.report.info("Zipper oppdatert versjon av EPUBen...") temp_epub.asFile(rebuild=True) self.utils.report.info( "Konverterer fra Nordisk EPUB 3 til Nordisk HTML 5...") epub_file = temp_epub.asFile() with DaisyPipelineJob(self, "nordic-epub3-to-html", { "epub": os.path.basename(epub_file), "fail-on-error": "false" }, pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context={os.path.basename(epub_file): epub_file}) as dp2_job_convert: convert_status = "SUCCESS" if dp2_job_convert.status == "SUCCESS" else "ERROR" if convert_status != "SUCCESS": self.utils.report.error("Klarte ikke å konvertere boken") return False dp2_html_dir = os.path.join(dp2_job_convert.dir_output, "output-dir", epub.identifier()) dp2_html_file = os.path.join(dp2_job_convert.dir_output, "output-dir", epub.identifier(), epub.identifier() + ".xhtml") if not os.path.isdir(dp2_html_dir): self.utils.report.error( "Finner ikke den konverterte boken: {}".format( dp2_html_dir)) return False if not os.path.isfile(dp2_html_file): self.utils.report.error( "Finner ikke den konverterte boken: {}".format( dp2_html_file)) self.utils.report.info( "Kanskje filnavnet er forskjellig fra IDen?") return False Filesystem.copy(self.utils.report, dp2_html_dir, html_dir) self.utils.report.info("Rydder opp i nordisk HTML") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "nordic-cleanup.xsl"), source=html_file, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, html_file) self.utils.report.info("Rydder opp i ns0 i page-normal") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "ns0-cleanup.xsl"), source=html_file, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, html_file) self.utils.report.info("Rydder opp i innholdsfortegnelsen") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "fix-toc-span.xsl"), source=html_file, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, html_file) self.utils.report.info( "Legger til EPUB-filer (OPF, NAV, container.xml, mediatype)...") nlbpub_tempdir_obj = tempfile.TemporaryDirectory() nlbpub_tempdir = nlbpub_tempdir_obj.name nlbpub = Epub.from_html(self, html_dir, nlbpub_tempdir) if nlbpub is None: return False self.utils.report.info( "Erstatter tomme bildefiler med faktiske bildefiler") for root, dirs, files in os.walk( os.path.join(nlbpub_tempdir, "EPUB", "images")): for file in files: fullpath = os.path.join(root, file) relpath = os.path.relpath(fullpath, nlbpub_tempdir) os.remove(fullpath) Filesystem.copy(self.utils.report, os.path.join(temp_epubdir_withimages, relpath), fullpath) temp_epub = Epub(self.utils.report, temp_epubdir) nlbpub.update_prefixes() self.utils.report.info( "Boken ble konvertert. Kopierer til NLBPUB-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( nlbpub.asDir(), temp_epub.identifier(), overwrite=self.overwrite) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier( ) + " ble konvertert 👍😄" + epubTitle return True
def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # sjekk at dette er en EPUB if not epub.isepub(): self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" return if not epub.identifier(): self.utils.report.error( self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" return # ---------- lag en kopi av EPUBen ---------- temp_epubdir_obj = tempfile.TemporaryDirectory() temp_epubdir = temp_epubdir_obj.name Filesystem.copy(self.utils.report, self.book["source"], temp_epubdir) temp_epub = Epub(self.utils.report, temp_epubdir) # ---------- gjør tilpasninger i HTML-fila med XSLT ---------- opf_path = temp_epub.opf_path() if not opf_path: self.utils.report.error(self.book["name"] + ": Klarte ikke å finne OPF-fila i EPUBen.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return opf_path = os.path.join(temp_epubdir, opf_path) opf_xml = ElementTree.parse(opf_path).getroot() html_file = opf_xml.xpath( "/*/*[local-name()='manifest']/*[@id = /*/*[local-name()='spine']/*[1]/@idref]/@href" ) html_file = html_file[0] if html_file else None if not html_file: self.utils.report.error(self.book["name"] + ": Klarte ikke å finne HTML-fila i OPFen.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return html_file = os.path.join(os.path.dirname(opf_path), html_file) if not os.path.isfile(html_file): self.utils.report.error(self.book["name"] + ": Klarte ikke å finne HTML-fila.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return temp_xml_obj = tempfile.NamedTemporaryFile() temp_xml = temp_xml_obj.name self.utils.report.info("Flater ut NLBPUB") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid, "nlbpub-flatten.xsl"), source=html_file, target=temp_xml) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return shutil.copy(temp_xml, html_file) self.utils.report.info("Deler opp NLBPUB i flere HTML-filer") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid, "nlbpub-split.xsl"), source=html_file, target=temp_xml, parameters={"output-dir": os.path.dirname(html_file)}) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return os.remove(html_file) spine_hrefs = [] for href in sorted(os.listdir(os.path.dirname(html_file))): if href.endswith(".xhtml") and href not in [ "nav.xhtml", os.path.basename(html_file) ]: spine_hrefs.append(href) self.utils.report.info("Oppdaterer OPF-fil") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid, "update-opf.xsl"), source=opf_path, target=temp_xml, parameters={"spine-hrefs": ",".join(spine_hrefs)}) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return shutil.copy(temp_xml, opf_path) nav_path = os.path.join(temp_epubdir, temp_epub.nav_path()) self.utils.report.info("Lager nytt navigasjonsdokument") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid, "generate-nav.xsl"), source=opf_path, target=nav_path) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return self.utils.report.info("Legger til properties i OPF etter behov") temp_epub.update_opf_properties() if Epubcheck.isavailable(): epubcheck = Epubcheck(self, opf_path) if not epubcheck.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return else: self.utils.report.warn( "Epubcheck not available, EPUB will not be validated!") self.utils.report.info( "Boken ble konvertert. Kopierer til e-bok-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( temp_epubdir, temp_epub.identifier()) self.utils.report.attachment(None, archived_path, "DEBUG") Bibliofil.book_available(NlbpubToEpub.publication_format, temp_epub.identifier()) self.utils.report.title = self.title + ": " + epub.identifier( ) + " ble konvertert 👍😄" + epubTitle return True
def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # sjekk at dette er en EPUB if not epub.isepub(): self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" return False if not epub.identifier(): self.utils.report.error( self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" return False # ---------- lag en kopi av EPUBen ---------- narration_epubdir_obj = tempfile.TemporaryDirectory() narration_epubdir = narration_epubdir_obj.name Filesystem.copy(self.utils.report, self.book["source"], narration_epubdir) nlbpub = Epub(self.utils.report, narration_epubdir) # ---------- gjør tilpasninger i HTML-fila med XSLT ---------- opf_path = nlbpub.opf_path() if not opf_path: self.utils.report.error(self.book["name"] + ": Klarte ikke å finne OPF-fila i EPUBen.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return False opf_path = os.path.join(narration_epubdir, opf_path) xml = ElementTree.parse(opf_path).getroot() html_file = xml.xpath( "/*/*[local-name()='manifest']/*[@id = /*/*[local-name()='spine']/*[1]/@idref]/@href" ) html_file = html_file[0] if html_file else None if not html_file: self.utils.report.error(self.book["name"] + ": Klarte ikke å finne HTML-fila i OPFen.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return False html_file = os.path.join(os.path.dirname(opf_path), html_file) if not os.path.isfile(html_file): self.utils.report.error(self.book["name"] + ": Klarte ikke å finne HTML-fila.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return False temp_html_obj = tempfile.NamedTemporaryFile() temp_html = temp_html_obj.name self.utils.report.info( "Fjerner elementer som ikke skal være med i lydboka...") self.utils.report.debug("ta-vekk-innhold.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(NlbpubToNarrationEpub.xslt_dir, NlbpubToNarrationEpub.uid, "ta-vekk-innhold.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) self.utils.report.info("Fikser Webarch-oppmerking") self.utils.report.debug("webarch-fixup.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToNarrationEpub.uid, "webarch-fixup.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) self.utils.report.info("Fikser dikt-oppmerking") self.utils.report.debug("unwrap-poem-chapters.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToNarrationEpub.uid, "unwrap-poem-chapters.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) self.utils.report.info("Lager usynlige overskrifter der det trengs...") self.utils.report.debug("create-hidden-headlines.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, PrepareForEbook.uid, "create-hidden-headlines.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) self.utils.report.info("Tilpasser innhold for innlesing...") self.utils.report.debug("prepare-for-narration.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(NlbpubToNarrationEpub.xslt_dir, NlbpubToNarrationEpub.uid, "prepare-for-narration.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) self.utils.report.info("Lager synkroniseringspunkter...") self.utils.report.debug("lag-synkroniseringspunkter.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(NlbpubToNarrationEpub.xslt_dir, NlbpubToNarrationEpub.uid, "lag-synkroniseringspunkter.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) self.utils.report.info("Gjør HTMLen litt penere...") self.utils.report.debug("pretty-print.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_html) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, Epub.uid, "pretty-print.xsl"), source=html_file, target=temp_html) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_html, html_file) # ---------- erstatt metadata i OPF med metadata fra HTML ---------- temp_opf_obj = tempfile.NamedTemporaryFile() temp_opf = temp_opf_obj.name xslt = Epub.html_to_opf(self, opf_path, temp_opf) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_opf, opf_path) # ---------- hent nytt filnavn fra OPF (det endrer seg basert på boknummer) ---------- try: xml = ElementTree.parse(opf_path).getroot() new_html_file = xml.xpath( "/*/*[local-name()='manifest']/*[@id = /*/*[local-name()='spine']/*[1]/@idref]/@href" ) new_html_file = os.path.join( os.path.dirname(opf_path), new_html_file[0]) if new_html_file else None except Exception: self.utils.report.info(traceback.format_exc(), preformatted=True) self.utils.report.error(self.book["name"] + ": Klarte ikke å finne HTML-fila i OPFen.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return False if html_file != new_html_file: shutil.copy(html_file, new_html_file) os.remove(html_file) html_file = new_html_file # ---------- lag nav.xhtml på nytt ---------- nav_path = nlbpub.nav_path() if not nav_path: self.utils.report.error( self.book["name"] + ": Klarte ikke å finne navigasjonsfila i OPFen.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return False nav_path = os.path.join(narration_epubdir, nav_path) xslt = Epub.html_to_nav(self, html_file, nav_path) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False # ---------- legg til logo ---------- library = nlbpub.meta("schema:library") library = library.upper() if library else library logo = os.path.join(Xslt.xslt_dir, PrepareForEbook.uid, "{}_logo.png".format(library)) if os.path.isfile(logo) and library == "STATPED": shutil.copy( logo, os.path.join(os.path.dirname(html_file), os.path.basename(logo))) # ---------- save EPUB ---------- self.utils.report.info( "Boken ble konvertert. Kopierer til innlesingsklart EPUB-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( nlbpub.asFile(), nlbpub.identifier(), file_extension="epub", move=True) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier( ) + " ble konvertert 👍😄" + epubTitle return True