Пример #1
0
def get_metadata(stream):
    from calibre.ebooks.lit.reader import LitContainer
    from calibre.utils.logging import Log
    litfile = LitContainer(stream, Log())
    src = litfile.get_metadata().encode('utf-8')
    litfile = litfile._litfile
    opf = OPF(cStringIO.StringIO(src), os.getcwdu())
    mi = opf.to_book_metadata()
    covers = []
    for item in opf.iterguide():
        if 'cover' not in item.get('type', '').lower():
            continue
        ctype = item.get('type')
        href = item.get('href', '')
        candidates = [href, href.replace('&', '%26')]
        for item in litfile.manifest.values():
            if item.path in candidates:
                try:
                    covers.append((litfile.get_file('/data/'+item.internal),
                                   ctype))
                except:
                    pass
                break
    covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
    idx = 0
    if len(covers) > 1:
        if covers[1][1] == covers[0][1]+'-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
Пример #2
0
def get_metadata(stream):
    from calibre.ebooks.lit.reader import LitContainer
    from calibre.utils.logging import Log
    litfile = LitContainer(stream, Log())
    src = litfile.get_metadata().encode('utf-8')
    litfile = litfile._litfile
    opf = OPF(io.BytesIO(src), getcwd())
    mi = opf.to_book_metadata()
    covers = []
    for item in opf.iterguide():
        if 'cover' not in item.get('type', '').lower():
            continue
        ctype = item.get('type')
        href = item.get('href', '')
        candidates = [href, href.replace('&', '%26')]
        for item in litfile.manifest.values():
            if item.path in candidates:
                try:
                    covers.append(
                        (litfile.get_file('/data/' + item.internal), ctype))
                except:
                    pass
                break
    covers.sort(key=lambda x: len(x[0]), reverse=True)
    idx = 0
    if len(covers) > 1:
        if covers[1][1] == covers[0][1] + '-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
Пример #3
0
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
            zf.extractall(cwd)
        except Exception:
            log.exception("KEPUB appears to be invalid ZIP file, trying a "
                          "more forgiving ZIP parser")
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk("."):
                if (f.lower().endswith(".opf") and "__MACOSX" not in f
                        and not os.path.basename(f).startswith(".")):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)").
                format(path))

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
        opf = os.path.relpath(opf, cwd)
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (self.rationalize_cover3
             if opf.package_version >= 3.0 else self.rationalize_cover2)
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported"
                      )  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                    "application/vnd.adobe-page-template+xml",
                    "application/vnd.adobe.page-template+xml",
                    "application/adobe-page-template+xml",
                    "application/adobe.page-template+xml",
                    "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath("content.opf")
Пример #4
0
    def convert(self, stream, options, file_ext, log, accelerators):
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('KEPUB appears to be invalid ZIP file, trying a '
                          'more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                _('%s is not a valid KEPUB file (could not find opf)') % path)

        encfile = os.path.abspath('rights.xml')
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'
            for elem in opf.itermanifest():
                elem.set('href', delta + elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta + elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else \
            self.rationalize_cover2
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    _('EPUB files with DTBook markup are not supported'))

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml',
                    'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml',
                    'application/adobe.page-template+xml', 'application/text'
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(_('No valid entries in the spine of this EPUB'))

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Пример #5
0
    def convert(self, stream, options, file_ext, log, accelerators):
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('KEPUB appears to be invalid ZIP file, trying a '
                          'more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                _('%s is not a valid KEPUB file (could not find opf)') % path)

        encfile = os.path.abspath('rights.xml')
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'
            for elem in opf.itermanifest():
                elem.set('href', delta + elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta + elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else \
            self.rationalize_cover2
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(_(
                    'EPUB files with DTBook markup are not supported'))

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml',
                    'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml',
                    'application/adobe.page-template+xml', 'application/text'
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(_('No valid entries in the spine of this EPUB'))

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Пример #6
0
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except Exception:
            log.exception(
                "KEPUB appears to be invalid ZIP file, trying a "
                "more forgiving ZIP parser"
            )
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u"."):
                if (
                    f.lower().endswith(".opf")
                    and "__MACOSX" not in f
                    and not os.path.basename(f).startswith(".")
                ):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)"
                ).format(path)
            )

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (
            self.rationalize_cover3
            if opf.package_version >= 3.0
            else self.rationalize_cover2
        )
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported")  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                "application/vnd.adobe-page-template+xml",
                "application/vnd.adobe.page-template+xml",
                "application/adobe-page-template+xml",
                "application/adobe.page-template+xml",
                "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u"content.opf")