Пример #1
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")
        epub = Epub(self.utils.report, self.book["source"])

        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass

        # sjekk at dette er en EPUB
        if not epub.isepub():
            return False

        if not epub.identifier():
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke å bestemme boknummer basert på dc:identifier.")
            return False

        if epub.identifier() != self.book["name"].split(".")[0]:
            self.utils.report.error(
                self.book["name"] +
                ": Filnavn stemmer ikke overens med dc:identifier: {}".format(
                    epub.identifier()))
            return False

        temp_xml_file_obj = tempfile.NamedTemporaryFile()
        temp_xml_file = temp_xml_file_obj.name

        self.utils.report.info("Lager en kopi av EPUBen")
        temp_epubdir_withimages_obj = tempfile.TemporaryDirectory()
        temp_epubdir_withimages = temp_epubdir_withimages_obj.name
        Filesystem.copy(self.utils.report, self.book["source"],
                        temp_epubdir_withimages)

        self.utils.report.info("Lager en kopi av EPUBen med tomme bildefiler")
        temp_epubdir_obj = tempfile.TemporaryDirectory()
        temp_epubdir = temp_epubdir_obj.name
        Filesystem.copy(self.utils.report, temp_epubdir_withimages,
                        temp_epubdir)
        for root, dirs, files in os.walk(
                os.path.join(temp_epubdir, "EPUB", "images")):
            for file in files:
                fullpath = os.path.join(root, file)
                os.remove(fullpath)
                Path(fullpath).touch()
        temp_epub = Epub(self.utils.report, temp_epubdir)

        self.utils.report.info("Rydder opp i nordisk EPUB nav.xhtml")
        nav_path = os.path.join(temp_epubdir, temp_epub.nav_path())
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "nordic-cleanup-nav.xsl"),
                    source=nav_path,
                    target=temp_xml_file,
                    parameters={
                        "cover":
                        " ".join([item["href"] for item in temp_epub.spine()]),
                        "base":
                        os.path.dirname(
                            os.path.join(temp_epubdir, temp_epub.opf_path())) +
                        "/"
                    })
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, nav_path)

        self.utils.report.info("Rydder opp i nordisk EPUB package.opf")
        opf_path = os.path.join(temp_epubdir, temp_epub.opf_path())
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "nordic-cleanup-opf.xsl"),
                    source=opf_path,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, opf_path)

        html_dir_obj = tempfile.TemporaryDirectory()
        html_dir = html_dir_obj.name
        html_file = os.path.join(html_dir, epub.identifier() + ".xhtml")

        self.utils.report.info("Finner ut hvilket bibliotek boka tilhører…")
        edition_metadata = Metadata.get_edition_from_api(
            epub.identifier(), report=self.utils.report)
        library = None
        if edition_metadata is not None and edition_metadata[
                "library"] is not None:
            library = edition_metadata["library"]
        else:
            library = Metadata.get_library_from_identifier(
                epub.identifier(), self.utils.report)
        self.utils.report.info(f"Boka tilhører '{library}'")

        self.utils.report.info("Zipper oppdatert versjon av EPUBen...")
        temp_epub.asFile(rebuild=True)

        self.utils.report.info(
            "Konverterer fra Nordisk EPUB 3 til Nordisk HTML 5...")
        epub_file = temp_epub.asFile()
        with DaisyPipelineJob(self,
                              "nordic-epub3-to-html", {
                                  "epub": os.path.basename(epub_file),
                                  "fail-on-error": "false"
                              },
                              pipeline_and_script_version=[
                                  ("1.13.6", "1.4.6"),
                                  ("1.13.4", "1.4.5"),
                                  ("1.12.1", "1.4.2"),
                                  ("1.11.1-SNAPSHOT", "1.3.0"),
                              ],
                              context={os.path.basename(epub_file):
                                       epub_file}) as dp2_job_convert:
            convert_status = "SUCCESS" if dp2_job_convert.status == "SUCCESS" else "ERROR"

            if convert_status != "SUCCESS":
                self.utils.report.error("Klarte ikke å konvertere boken")
                return False

            dp2_html_dir = os.path.join(dp2_job_convert.dir_output,
                                        "output-dir", epub.identifier())
            dp2_html_file = os.path.join(dp2_job_convert.dir_output,
                                         "output-dir", epub.identifier(),
                                         epub.identifier() + ".xhtml")

            if not os.path.isdir(dp2_html_dir):
                self.utils.report.error(
                    "Finner ikke den konverterte boken: {}".format(
                        dp2_html_dir))
                return False

            if not os.path.isfile(dp2_html_file):
                self.utils.report.error(
                    "Finner ikke den konverterte boken: {}".format(
                        dp2_html_file))
                self.utils.report.info(
                    "Kanskje filnavnet er forskjellig fra IDen?")
                return False

            Filesystem.copy(self.utils.report, dp2_html_dir, html_dir)

        self.utils.report.info("Rydder opp i nordisk HTML")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "nordic-cleanup.xsl"),
                    source=html_file,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, html_file)

        self.utils.report.info("Rydder opp i ns0 i page-normal")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "ns0-cleanup.xsl"),
                    source=html_file,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, html_file)

        self.utils.report.info("Rydder opp i innholdsfortegnelsen")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "fix-toc-span.xsl"),
                    source=html_file,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, html_file)

        self.utils.report.info(
            "Legger til EPUB-filer (OPF, NAV, container.xml, mediatype)...")
        nlbpub_tempdir_obj = tempfile.TemporaryDirectory()
        nlbpub_tempdir = nlbpub_tempdir_obj.name

        nlbpub = Epub.from_html(self, html_dir, nlbpub_tempdir)
        if nlbpub is None:
            return False

        self.utils.report.info(
            "Erstatter tomme bildefiler med faktiske bildefiler")
        for root, dirs, files in os.walk(
                os.path.join(nlbpub_tempdir, "EPUB", "images")):
            for file in files:
                fullpath = os.path.join(root, file)
                relpath = os.path.relpath(fullpath, nlbpub_tempdir)
                os.remove(fullpath)
                Filesystem.copy(self.utils.report,
                                os.path.join(temp_epubdir_withimages, relpath),
                                fullpath)
        temp_epub = Epub(self.utils.report, temp_epubdir)

        nlbpub.update_prefixes()

        self.utils.report.info(
            "Boken ble konvertert. Kopierer til NLBPUB-arkiv.")
        archived_path, stored = self.utils.filesystem.storeBook(
            nlbpub.asDir(), temp_epub.identifier(), overwrite=self.overwrite)
        self.utils.report.attachment(None, archived_path, "DEBUG")
        self.utils.report.title = self.title + ": " + epub.identifier(
        ) + " ble konvertert 👍😄" + epubTitle
        return True
Пример #2
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")
        epub = Epub(self.utils.report, self.book["source"])

        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass

        # sjekk at dette er en EPUB
        if not epub.isepub():
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎"
            return

        if not epub.identifier():
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke å bestemme boknummer basert på dc:identifier.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎"
            return

        # ---------- lag en kopi av EPUBen ----------

        temp_epubdir_obj = tempfile.TemporaryDirectory()
        temp_epubdir = temp_epubdir_obj.name
        Filesystem.copy(self.utils.report, self.book["source"], temp_epubdir)
        temp_epub = Epub(self.utils.report, temp_epubdir)

        # ---------- gjør tilpasninger i HTML-fila med XSLT ----------

        opf_path = temp_epub.opf_path()
        if not opf_path:
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne OPF-fila i EPUBen.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return
        opf_path = os.path.join(temp_epubdir, opf_path)
        opf_xml = ElementTree.parse(opf_path).getroot()

        html_file = opf_xml.xpath(
            "/*/*[local-name()='manifest']/*[@id = /*/*[local-name()='spine']/*[1]/@idref]/@href"
        )
        html_file = html_file[0] if html_file else None
        if not html_file:
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne HTML-fila i OPFen.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return
        html_file = os.path.join(os.path.dirname(opf_path), html_file)
        if not os.path.isfile(html_file):
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne HTML-fila.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return

        temp_xml_obj = tempfile.NamedTemporaryFile()
        temp_xml = temp_xml_obj.name

        self.utils.report.info("Flater ut NLBPUB")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid,
                                            "nlbpub-flatten.xsl"),
                    source=html_file,
                    target=temp_xml)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return
        shutil.copy(temp_xml, html_file)

        self.utils.report.info("Deler opp NLBPUB i flere HTML-filer")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid,
                                            "nlbpub-split.xsl"),
                    source=html_file,
                    target=temp_xml,
                    parameters={"output-dir": os.path.dirname(html_file)})
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return
        os.remove(html_file)

        spine_hrefs = []
        for href in sorted(os.listdir(os.path.dirname(html_file))):
            if href.endswith(".xhtml") and href not in [
                    "nav.xhtml", os.path.basename(html_file)
            ]:
                spine_hrefs.append(href)

        self.utils.report.info("Oppdaterer OPF-fil")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid,
                                            "update-opf.xsl"),
                    source=opf_path,
                    target=temp_xml,
                    parameters={"spine-hrefs": ",".join(spine_hrefs)})
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return
        shutil.copy(temp_xml, opf_path)

        nav_path = os.path.join(temp_epubdir, temp_epub.nav_path())

        self.utils.report.info("Lager nytt navigasjonsdokument")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToEpub.uid,
                                            "generate-nav.xsl"),
                    source=opf_path,
                    target=nav_path)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return

        self.utils.report.info("Legger til properties i OPF etter behov")
        temp_epub.update_opf_properties()

        if Epubcheck.isavailable():
            epubcheck = Epubcheck(self, opf_path)
            if not epubcheck.success:
                self.utils.report.title = self.title + ": " + epub.identifier(
                ) + " feilet 😭👎" + epubTitle
                return
        else:
            self.utils.report.warn(
                "Epubcheck not available, EPUB will not be validated!")

        self.utils.report.info(
            "Boken ble konvertert. Kopierer til e-bok-arkiv.")

        archived_path, stored = self.utils.filesystem.storeBook(
            temp_epubdir, temp_epub.identifier())
        self.utils.report.attachment(None, archived_path, "DEBUG")
        Bibliofil.book_available(NlbpubToEpub.publication_format,
                                 temp_epub.identifier())
        self.utils.report.title = self.title + ": " + epub.identifier(
        ) + " ble konvertert 👍😄" + epubTitle
        return True
Пример #3
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")
        epub = Epub(self.utils.report, self.book["source"])

        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass

        # sjekk at dette er en EPUB
        if not epub.isepub():
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎"
            return False

        if not epub.identifier():
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke å bestemme boknummer basert på dc:identifier.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎"
            return False

        # ---------- lag en kopi av EPUBen ----------

        narration_epubdir_obj = tempfile.TemporaryDirectory()
        narration_epubdir = narration_epubdir_obj.name
        Filesystem.copy(self.utils.report, self.book["source"],
                        narration_epubdir)
        nlbpub = Epub(self.utils.report, narration_epubdir)

        # ---------- gjør tilpasninger i HTML-fila med XSLT ----------

        opf_path = nlbpub.opf_path()
        if not opf_path:
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne OPF-fila i EPUBen.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return False
        opf_path = os.path.join(narration_epubdir, opf_path)

        xml = ElementTree.parse(opf_path).getroot()
        html_file = xml.xpath(
            "/*/*[local-name()='manifest']/*[@id = /*/*[local-name()='spine']/*[1]/@idref]/@href"
        )
        html_file = html_file[0] if html_file else None
        if not html_file:
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne HTML-fila i OPFen.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return False

        html_file = os.path.join(os.path.dirname(opf_path), html_file)
        if not os.path.isfile(html_file):
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne HTML-fila.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return False

        temp_html_obj = tempfile.NamedTemporaryFile()
        temp_html = temp_html_obj.name

        self.utils.report.info(
            "Fjerner elementer som ikke skal være med i lydboka...")
        self.utils.report.debug("ta-vekk-innhold.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(NlbpubToNarrationEpub.xslt_dir,
                                            NlbpubToNarrationEpub.uid,
                                            "ta-vekk-innhold.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        self.utils.report.info("Fikser Webarch-oppmerking")
        self.utils.report.debug("webarch-fixup.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir,
                                            NlbpubToNarrationEpub.uid,
                                            "webarch-fixup.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        self.utils.report.info("Fikser dikt-oppmerking")
        self.utils.report.debug("unwrap-poem-chapters.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir,
                                            NlbpubToNarrationEpub.uid,
                                            "unwrap-poem-chapters.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        self.utils.report.info("Lager usynlige overskrifter der det trengs...")
        self.utils.report.debug("create-hidden-headlines.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, PrepareForEbook.uid,
                                            "create-hidden-headlines.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        self.utils.report.info("Tilpasser innhold for innlesing...")
        self.utils.report.debug("prepare-for-narration.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(NlbpubToNarrationEpub.xslt_dir,
                                            NlbpubToNarrationEpub.uid,
                                            "prepare-for-narration.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        self.utils.report.info("Lager synkroniseringspunkter...")
        self.utils.report.debug("lag-synkroniseringspunkter.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(NlbpubToNarrationEpub.xslt_dir,
                                            NlbpubToNarrationEpub.uid,
                                            "lag-synkroniseringspunkter.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        self.utils.report.info("Gjør HTMLen litt penere...")
        self.utils.report.debug("pretty-print.xsl")
        self.utils.report.debug("    source = " + html_file)
        self.utils.report.debug("    target = " + temp_html)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, Epub.uid,
                                            "pretty-print.xsl"),
                    source=html_file,
                    target=temp_html)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False
        shutil.copy(temp_html, html_file)

        # ---------- erstatt metadata i OPF med metadata fra HTML ----------

        temp_opf_obj = tempfile.NamedTemporaryFile()
        temp_opf = temp_opf_obj.name

        xslt = Epub.html_to_opf(self, opf_path, temp_opf)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False

        shutil.copy(temp_opf, opf_path)

        # ---------- hent nytt filnavn fra OPF (det endrer seg basert på boknummer) ----------
        try:
            xml = ElementTree.parse(opf_path).getroot()
            new_html_file = xml.xpath(
                "/*/*[local-name()='manifest']/*[@id = /*/*[local-name()='spine']/*[1]/@idref]/@href"
            )
            new_html_file = os.path.join(
                os.path.dirname(opf_path),
                new_html_file[0]) if new_html_file else None
        except Exception:
            self.utils.report.info(traceback.format_exc(), preformatted=True)
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne HTML-fila i OPFen.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return False

        if html_file != new_html_file:
            shutil.copy(html_file, new_html_file)
            os.remove(html_file)
            html_file = new_html_file

        # ---------- lag nav.xhtml på nytt ----------

        nav_path = nlbpub.nav_path()
        if not nav_path:
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke å finne navigasjonsfila i OPFen.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return False
        nav_path = os.path.join(narration_epubdir, nav_path)

        xslt = Epub.html_to_nav(self, html_file, nav_path)
        if not xslt.success:
            self.utils.report.title = self.title + ": " + epub.identifier(
            ) + " feilet 😭👎" + epubTitle
            return False

        # ---------- legg til logo ----------
        library = nlbpub.meta("schema:library")
        library = library.upper() if library else library
        logo = os.path.join(Xslt.xslt_dir, PrepareForEbook.uid,
                            "{}_logo.png".format(library))

        if os.path.isfile(logo) and library == "STATPED":
            shutil.copy(
                logo,
                os.path.join(os.path.dirname(html_file),
                             os.path.basename(logo)))

        # ---------- save EPUB ----------

        self.utils.report.info(
            "Boken ble konvertert. Kopierer til innlesingsklart EPUB-arkiv.")

        archived_path, stored = self.utils.filesystem.storeBook(
            nlbpub.asFile(),
            nlbpub.identifier(),
            file_extension="epub",
            move=True)
        self.utils.report.attachment(None, archived_path, "DEBUG")
        self.utils.report.title = self.title + ": " + epub.identifier(
        ) + " ble konvertert 👍😄" + epubTitle
        return True