Пример #1
0
 def read_embedded_metadata(self, root, elem, guide):
     raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
             html.tostring(elem, encoding='utf-8') + '</package>'
     stream = cStringIO.StringIO(raw)
     opf = OPF(stream)
     self.embedded_mi = opf.to_book_metadata()
     if guide is not None:
         for ref in guide.xpath('descendant::reference'):
             if 'cover' in ref.get('type', '').lower():
                 href = ref.get('href', '')
                 if href.startswith('#'):
                     href = href[1:]
                 anchors = root.xpath('//*[@id="%s"]' % href)
                 if anchors:
                     cpos = anchors[0]
                     reached = False
                     for elem in root.iter():
                         if elem is cpos:
                             reached = True
                         if reached and elem.tag == 'img':
                             cover = elem.get('src', None)
                             self.embedded_mi.cover = cover
                             elem.getparent().remove(elem)
                             break
                 break
Пример #2
0
	def handle_zip_of_opf_files(self, stream):
		''' Given a zip up of a bunch of opf files, either merge them or add them to library '''
		result = {'updated':0, 'added':0}
		with ZipFile(stream, 'r') as zf:
			self.start_applying_updates()
			for zi in zf.infolist():
				ext = zi.filename.rpartition('.')[-1].lower()
				if ext in {'opf'}:
					try:
						raw = zf.open(zi)
						opf = OPF(raw)
						mi = opf.to_book_metadata()
						casanova_id = self.extract_id(mi)
						if casanova_id:
							book_mi = self.get_casanova_metadata(casanova_id['id'])
							if book_mi:
								# Update an existing book's metadata!
								result['updated'] = result['updated'] + 1
								self.apply_metadata_update(casanova_id['id'], book_mi, mi)
							else:
								# Create a new book entry
								result['added'] = result['added'] + 1
								self.model.db.import_book(mi,[])
					except:
						foo=False
				if ext in {'jpg', 'png', 'gif'}:
					# try and handle the cover
					casanova_id = zi.filename.partition('.')[0].lower()
					if casanova_id in self.book_map:
						book_id = self.book_map[casanova_id]
						raw = zf.open(zi)
						self.db.set_cover(book_id, raw)
			self.finish_applying_updates()
			return result
Пример #3
0
def get_metadata(stream):
    if isinstance(stream, bytes):
        stream = DummyFile(stream)
    root = parse_opf(stream)
    ver = parse_opf_version(root.get('version'))
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item()
Пример #4
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if not cover_href:
                    for meta in opf.metadata.xpath('//*[local-name()="meta" and @name="cover"]'):
                        val = meta.get('content')
                        if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}:
                            cover_href = val
                            break
                if cover_href:
                    try:
                        mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
                    except Exception:
                        pass
    except Exception:
        return mi
    return mi
Пример #5
0
def get_metadata(stream):
    from calibre.ebooks.lit.reader import LitContainer
    from calibre.utils.logging import Log
    litfile = LitContainer(stream, Log())
    src = litfile.get_metadata().encode('utf-8')
    litfile = litfile._litfile
    opf = OPF(cStringIO.StringIO(src), os.getcwdu())
    mi = opf.to_book_metadata()
    covers = []
    for item in opf.iterguide():
        if 'cover' not in item.get('type', '').lower():
            continue
        ctype = item.get('type')
        href = item.get('href', '')
        candidates = [href, href.replace('&', '%26')]
        for item in litfile.manifest.values():
            if item.path in candidates:
                try:
                    covers.append((litfile.get_file('/data/'+item.internal),
                                   ctype))
                except:
                    pass
                break
    covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
    idx = 0
    if len(covers) > 1:
        if covers[1][1] == covers[0][1]+'-standard':
            idx = 1
    mi.cover_data = ('jpg', covers[idx][0])
    return mi
Пример #6
0
def save_serialized_to_disk(ids, data, plugboards, root, opts, callback):
    from calibre.ebooks.metadata.opf2 import OPF
    root, opts, length = _sanitize_args(root, opts)
    failures = []
    for x in ids:
        opf, cover, format_map, last_modified = data[x]
        if isinstance(opf, unicode):
            opf = opf.encode('utf-8')
        mi = OPF(cStringIO.StringIO(opf)).to_book_metadata()
        try:
            mi.last_modified = parse_date(last_modified)
        except:
            pass
        tb = ''
        try:
            with open(cover, 'rb') as f:
                cover = f.read()
        except:
            cover = None
        try:
            failed, id, title = do_save_book_to_disk(x, mi, cover,
                plugboards, format_map, root, opts, length)
            tb = _('Requested formats not available')
        except:
            failed, id, title = True, x, mi.title
            tb = traceback.format_exc()
        if failed:
            failures.append((id, title, tb))
        if callable(callback):
            if not callback(int(id), title, failed, tb):
                break

    return failures
Пример #7
0
    def process_dir(self, dirpath, filenames, book_id):
        book_id = int(book_id)
        formats = filter(self.is_ebook_file, filenames)
        fmts    = [os.path.splitext(x)[1][1:].upper() for x in formats]
        sizes   = [os.path.getsize(os.path.join(dirpath, x)) for x in formats]
        names   = [os.path.splitext(x)[0] for x in formats]
        opf = os.path.join(dirpath, 'metadata.opf')
        mi = OPF(opf, basedir=dirpath).to_book_metadata()
        timestamp = os.path.getmtime(opf)
        path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep,
                '/')

        if int(mi.application_id) == book_id:
            self.books.append({
                'mi': mi,
                'timestamp': timestamp,
                'formats': list(zip(fmts, sizes, names)),
                'id': book_id,
                'dirpath': dirpath,
                'path': path,
            })
        else:
            self.mismatched_dirs.append(dirpath)

        alm = mi.get('author_link_map', {})
        for author, link in alm.iteritems():
            existing_link, timestamp = self.authors_links.get(author, (None, None))
            if existing_link is None or existing_link != link and timestamp < mi.timestamp:
                self.authors_links[author] = (link, mi.timestamp)
Пример #8
0
    def process_result(self, group_id, result):
        if result.err:
            mi = self.report_metadata_failure(group_id, result.traceback)
            paths = self.file_groups[group_id]
            has_cover = False
            duplicate_info = set() if self.add_formats_to_existing else False
        else:
            paths, opf, has_cover, duplicate_info = result.value
            try:
                mi = OPF(BytesIO(opf), basedir=self.tdir, populate_spine=False, try_to_guess_cover=False).to_book_metadata()
                mi.read_metadata_failed = False
            except Exception:
                mi = self.report_metadata_failure(group_id, traceback.format_exc())

        if mi.is_null('title'):
            for path in paths:
                mi.title = os.path.splitext(os.path.basename(path))[0]
                break
        if mi.application_id == '__calibre_dummy__':
            mi.application_id = None
        if gprefs.get('tag_map_on_add_rules'):
            from calibre.ebooks.metadata.tag_mapper import map_tags
            mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules'])
        if self.author_map_rules:
            from calibre.ebooks.metadata.author_mapper import map_authors
            new_authors = map_authors(mi.authors, self.author_map_rules)
            if new_authors != mi.authors:
                mi.authors = new_authors
                if self.db is None:
                    mi.author_sort = authors_to_sort_string(mi.authors)
                else:
                    mi.author_sort = self.db.author_sort_from_authors(mi.authors)

        self.pd.msg = mi.title

        cover_path = os.path.join(self.tdir, '%s.cdata' % group_id) if has_cover else None

        if self.db is None:
            if paths:
                self.items.append((mi, cover_path, paths))
            return

        if self.add_formats_to_existing:
            identical_book_ids = find_identical_books(mi, self.find_identical_books_data)
            if identical_book_ids:
                try:
                    self.merge_books(mi, cover_path, paths, identical_book_ids)
                except Exception:
                    a = self.report.append
                    a(''), a('-' * 70)
                    a(_('Failed to merge the book: ') + mi.title)
                    [a('\t' + f) for f in paths]
                    a(_('With error:')), a(traceback.format_exc())
            else:
                self.add_book(mi, cover_path, paths)
        else:
            if duplicate_info or icu_lower(mi.title or _('Unknown')) in self.added_duplicate_info:
                self.duplicates.append((mi, cover_path, paths))
            else:
                self.add_book(mi, cover_path, paths)
Пример #9
0
def update_metadata(ebook, new_opf):
    from calibre.ebooks.metadata.opf2 import OPF
    from calibre.ebooks.metadata.epub import update_metadata

    opfpath = ebook.name_to_abspath(ebook.opf_name)
    with ebook.open(ebook.opf_name, "r+b") as stream, open(new_opf, "rb") as ns:
        opf = OPF(stream, basedir=os.path.dirname(opfpath), populate_spine=False, unquote_urls=False)
        mi = OPF(ns, unquote_urls=False, populate_spine=False).to_book_metadata()
        mi.cover, mi.cover_data = None, (None, None)

        update_metadata(opf, mi, apply_null=True, update_timestamp=True)
        stream.seek(0)
        stream.truncate()
        stream.write(opf.render())
Пример #10
0
def read_serialized_metadata(book_id, data):
    from calibre.ebooks.metadata.opf2 import OPF
    from calibre.utils.date import parse_date
    mi = OPF(data['opf'], try_to_guess_cover=False, populate_spine=False, basedir=os.path.dirname(data['opf'])).to_book_metadata()
    try:
        mi.last_modified = parse_date(data['last_modified'])
    except:
        pass
    mi.cover, mi.cover_data = None, (None, None)
    cdata = None
    if 'cover' in data:
        with lopen(data['cover'], 'rb') as f:
            cdata = f.read()
    return mi, cdata
Пример #11
0
def main(do_identify, covers, metadata, ensure_fields, tdir):
    failed_ids = set()
    failed_covers = set()
    all_failed = True
    log = GUILog()
    patch_plugins()

    for book_id, mi in metadata.iteritems():
        mi = OPF(BytesIO(mi), basedir=tdir,
                populate_spine=False).to_book_metadata()
        title, authors, identifiers = mi.title, mi.authors, mi.identifiers
        cdata = None
        log.clear()

        if do_identify:
            results = []
            try:
                results = identify(log, Event(), title=title, authors=authors,
                    identifiers=identifiers)
            except:
                pass
            if results:
                all_failed = False
                mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
                identifiers = mi.identifiers
                if not mi.is_null('rating'):
                    # set_metadata expects a rating out of 10
                    mi.rating *= 2
                with open(os.path.join(tdir, '%d.mi'%book_id), 'wb') as f:
                    f.write(metadata_to_opf(mi, default_lang='und'))
            else:
                log.error('Failed to download metadata for', title)
                failed_ids.add(book_id)

        if covers:
            cdata = download_cover(log, title=title, authors=authors,
                    identifiers=identifiers)
            if cdata is None:
                failed_covers.add(book_id)
            else:
                with open(os.path.join(tdir, '%d.cover'%book_id), 'wb') as f:
                    f.write(cdata[-1])
                all_failed = False

        with open(os.path.join(tdir, '%d.log'%book_id), 'wb') as f:
            f.write(log.plain_text.encode('utf-8'))

    return failed_ids, failed_covers, all_failed
Пример #12
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    try:
        with ZipFile(stream) as zf:
            opf_name = get_first_opf_name(zf)
            opf_stream = StringIO(zf.read(opf_name))
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
                cover_href = opf.raster_cover
                if cover_href:
                    mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))
    except:
        return mi
    return mi
Пример #13
0
def opf_metadata(opfpath):
    if hasattr(opfpath, 'read'):
        f = opfpath
        opfpath = getattr(f, 'name', os.getcwdu())
    else:
        f = open(opfpath, 'rb')
    try:
        opf = OPF(f, os.path.dirname(opfpath))
        if opf.application_id is not None:
            mi = opf.to_book_metadata()
            if hasattr(opf, 'cover') and opf.cover:
                cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
                if os.access(cpath, os.R_OK):
                    fmt = cpath.rpartition('.')[-1]
                    data = open(cpath, 'rb').read()
                    mi.cover_data = (fmt, data)
            return mi
    except:
        import traceback
        traceback.print_exc()
        pass
Пример #14
0
def set_metadata(stream, mi):
    replacements = {}

    # Get the OPF in the archive.
    with ZipFile(stream) as zf:
        opf_path = get_first_opf_name(zf)
        opf_stream = StringIO(zf.read(opf_path))
    opf = OPF(opf_stream)

    # Cover.
    new_cdata = None
    try:
        new_cdata = mi.cover_data[1]
        if not new_cdata:
            raise Exception('no cover')
    except:
        try:
            new_cdata = open(mi.cover, 'rb').read()
        except:
            pass
    if new_cdata:
        cpath = opf.raster_cover
        if not cpath:
            cpath = 'cover.jpg'
        new_cover = _write_new_cover(new_cdata, cpath)
        replacements[cpath] = open(new_cover.name, 'rb')
        mi.cover = cpath

    # Update the metadata.
    opf.smart_update(mi, replace_metadata=True)
    newopf = StringIO(opf.render())
    safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)

    # Cleanup temporary files.
    try:
        if cpath is not None:
            replacements[cpath].close()
            os.remove(replacements[cpath].name)
    except:
        pass
Пример #15
0
def zip_opf_metadata(opfpath, zf):
    from calibre.ebooks.metadata.opf2 import OPF
    if hasattr(opfpath, 'read'):
        f = opfpath
        opfpath = getattr(f, 'name', getcwd())
    else:
        f = open(opfpath, 'rb')
    opf = OPF(f, os.path.dirname(opfpath))
    mi = opf.to_book_metadata()
    # This is broken, in that it only works for
    # when both the OPF file and the cover file are in the root of the
    # zip file and the cover is an actual raster image, but I don't care
    # enough to make it more robust
    if getattr(mi, 'cover', None):
        covername = os.path.basename(mi.cover)
        mi.cover = None
        names = zf.namelist()
        if covername in names:
            fmt = covername.rpartition('.')[-1]
            data = zf.read(covername)
            mi.cover_data = (fmt, data)
    return mi
Пример #16
0
 def paste_metadata(self):
     rows = self.gui.library_view.selectionModel().selectedRows()
     if not rows or len(rows) == 0:
         return error_dialog(self.gui, _('Cannot paste metadata'),
                             _('No books selected'), show=True)
     c = QApplication.clipboard()
     md = c.mimeData()
     if not md.hasFormat('application/calibre-book-metadata'):
         return error_dialog(self.gui, _('Cannot paste metadata'),
                             _('No copied metadata available'), show=True)
     if len(rows) > 1:
         if not confirm(_(
                 'You are pasting metadata onto <b>multiple books</b> ({num_of_books}). Are you'
                 ' sure you want to do that?').format(num_of_books=len(rows)), 'paste-onto-multiple', parent=self.gui):
             return
     data = bytes(md.data('application/calibre-book-metadata'))
     mi = OPF(BytesIO(data), populate_spine=False, read_toc=False, try_to_guess_cover=False).to_book_metadata()
     mi.application_id = mi.uuid_id = None
     exclude = set(tweaks['exclude_fields_on_paste'])
     paste_cover = 'cover' not in exclude
     cover = md.imageData() if paste_cover else None
     exclude.discard('cover')
     for field in exclude:
         mi.set_null(field)
     db = self.gui.current_db
     book_ids = {db.id(r.row()) for r in rows}
     title_excluded = 'title' in exclude
     authors_excluded = 'authors' in exclude
     for book_id in book_ids:
         if title_excluded:
             mi.title = db.new_api.field_for('title', book_id)
         if authors_excluded:
             mi.authors = db.new_api.field_for('authors', book_id)
         db.new_api.set_metadata(book_id, mi, ignore_errors=True)
     if cover:
         db.new_api.set_cover({book_id: cover for book_id in book_ids})
     self.refresh_books_after_metadata_edit(book_ids)
Пример #17
0
 def read_user_metadata(self):
     '''
     Read all metadata specified by the user. Command line options override
     metadata from a specified OPF file.
     '''
     from calibre.ebooks.metadata import MetaInformation
     from calibre.ebooks.metadata.opf2 import OPF
     mi = MetaInformation(None, [])
     if self.opts.read_metadata_from_opf is not None:
         self.opts.read_metadata_from_opf = os.path.abspath(
                                         self.opts.read_metadata_from_opf)
         opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
                   os.path.dirname(self.opts.read_metadata_from_opf))
         mi = opf.to_book_metadata()
     self.opts_to_mi(mi)
     if mi.cover:
         if mi.cover.startswith('http:') or mi.cover.startswith('https:'):
             mi.cover = self.download_cover(mi.cover)
         ext = mi.cover.rpartition('.')[-1].lower().strip()
         if ext not in ('png', 'jpg', 'jpeg', 'gif'):
             ext = 'jpg'
         mi.cover_data = (ext, open(mi.cover, 'rb').read())
         mi.cover = None
     self.user_metadata = mi
Пример #18
0
    def process_result(self, group_id, result):
        if result.err:
            mi = self.report_metadata_failure(group_id, result.traceback)
            paths = self.file_groups[group_id]
            has_cover = False
            duplicate_info = set() if self.add_formats_to_existing else False
        else:
            paths, opf, has_cover, duplicate_info = result.value
            try:
                mi = OPF(
                    BytesIO(opf), basedir=self.tdir, populate_spine=False, try_to_guess_cover=False
                ).to_book_metadata()
                mi.read_metadata_failed = False
            except Exception:
                mi = self.report_metadata_failure(group_id, traceback.format_exc())

        if mi.is_null("title"):
            for path in paths:
                mi.title = os.path.splitext(os.path.basename(path))[0]
                break
        if mi.application_id == "__calibre_dummy__":
            mi.application_id = None
        if gprefs.get("tag_map_on_add_rules"):
            from calibre.ebooks.metadata.tag_mapper import map_tags

            mi.tags = map_tags(mi.tags, gprefs["tag_map_on_add_rules"])

        self.pd.msg = mi.title

        cover_path = os.path.join(self.tdir, "%s.cdata" % group_id) if has_cover else None

        if self.db is None:
            if paths:
                self.items.append((mi, cover_path, paths))
            return

        if self.add_formats_to_existing:
            identical_book_ids = find_identical_books(mi, self.find_identical_books_data)
            if identical_book_ids:
                try:
                    self.merge_books(mi, cover_path, paths, identical_book_ids)
                except Exception:
                    a = self.report.append
                    a(""), a("-" * 70)
                    a(_("Failed to merge the book: ") + mi.title)
                    [a("\t" + f) for f in paths]
                    a(_("With error:")), a(traceback.format_exc())
            else:
                self.add_book(mi, cover_path, paths)
        else:
            if duplicate_info or icu_lower(mi.title or _("Unknown")) in self.added_duplicate_info:
                self.duplicates.append((mi, cover_path, paths))
            else:
                self.add_book(mi, cover_path, paths)
Пример #19
0
 def get_metadata(book_id):
     oldmi = db.get_metadata(book_id,
                             index_is_id=True,
                             get_cover=True,
                             cover_as_data=True)
     opf, cov = id_map[book_id]
     if opf is None:
         newmi = Metadata(oldmi.title, authors=tuple(oldmi.authors))
     else:
         with open(opf, 'rb') as f:
             newmi = OPF(f,
                         basedir=os.path.dirname(opf),
                         populate_spine=False).to_book_metadata()
             newmi.cover, newmi.cover_data = None, (None, None)
             for x in ('title', 'authors'):
                 if newmi.is_null(x):
                     # Title and author are set to null if they are
                     # the same as the originals as an optimization,
                     # we undo that, as it is confusing.
                     newmi.set(x, copy.copy(oldmi.get(x)))
     if cov:
         with open(cov, 'rb') as f:
             newmi.cover_data = ('jpg', f.read())
     return oldmi, newmi
Пример #20
0
 def test_backup(self):  # {{{
     'Test the automatic backup of changed metadata'
     cl = self.cloned_library
     cache = self.init_cache(cl)
     ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field
     # First empty dirtied
     cache.dump_metadata()
     af(cache.dirtied_cache)
     from calibre.db.backup import MetadataBackup
     interval = 0.01
     mb = MetadataBackup(cache, interval=interval, scheduling_interval=0)
     mb.start()
     try:
         ae(sf('title', {1: 'title1', 2: 'title2', 3: 'title3'}), {1, 2, 3})
         ae(
             sf(
                 'authors', {
                     1: 'author1 & author2',
                     2: 'author1 & author2',
                     3: 'author1 & author2'
                 }), {1, 2, 3})
         count = 6
         while cache.dirty_queue_length() and count > 0:
             mb.join(2)
             count -= 1
         af(cache.dirty_queue_length())
     finally:
         mb.stop()
     mb.join(2)
     af(mb.is_alive())
     from calibre.ebooks.metadata.opf2 import OPF
     for book_id in (1, 2, 3):
         raw = cache.read_backup(book_id)
         opf = OPF(BytesIO(raw))
         ae(opf.title, 'title%d' % book_id)
         ae(opf.authors, ['author1', 'author2'])
Пример #21
0
    def __init__(self):
        try:
            mimetype = self.open('mimetype').read().rstrip()
            if mimetype != OCF.MIMETYPE:
                print 'WARNING: Invalid mimetype declaration', mimetype
        except:
            print 'WARNING: Epub doesn\'t contain a mimetype declaration'

        try:
            with closing(self.open(OCF.CONTAINER_PATH)) as f:
                self.container = Container(f)
        except KeyError:
            raise EPubException("missing OCF container.xml file")
        self.opf_path = self.container[OPF.MIMETYPE]
        try:
            with closing(self.open(self.opf_path)) as f:
                self.opf = OPF(f, self.root, populate_spine=False)
        except KeyError:
            raise EPubException("missing OPF package file")
        try:
            with closing(self.open(self.ENCRYPTION_PATH)) as f:
                self.encryption_meta = Encryption(f.read())
        except:
            self.encryption_meta = Encryption(None)
Пример #22
0
 def paste_metadata(self):
     rows = self.gui.library_view.selectionModel().selectedRows()
     if not rows or len(rows) == 0:
         return error_dialog(self.gui,
                             _('Cannot paste metadata'),
                             _('No books selected'),
                             show=True)
     c = QApplication.clipboard()
     md = c.mimeData()
     if not md.hasFormat('application/calibre-book-metadata'):
         return error_dialog(self.gui,
                             _('Cannot paste metadata'),
                             _('No copied metadata available'),
                             show=True)
     if len(rows) > 1:
         if not confirm(_(
                 'You are pasting metadata onto <b>multiple books</b> ({num_of_books}). Are you'
                 ' sure you want to do that?').format(
                     num_of_books=len(rows)),
                        'paste-onto-multiple',
                        parent=self.gui):
             return
     data = bytes(md.data('application/calibre-book-metadata'))
     mi = OPF(BytesIO(data),
              populate_spine=False,
              read_toc=False,
              try_to_guess_cover=False).to_book_metadata()
     mi.application_id = mi.uuid_id = None
     cover = md.imageData()
     db = self.gui.current_db
     book_ids = {db.id(r.row()) for r in rows}
     for book_id in book_ids:
         db.new_api.set_metadata(book_id, mi, ignore_errors=True)
     if cover:
         db.new_api.set_cover({book_id: cover for book_id in book_ids})
     self.refresh_books_after_metadata_edit(book_ids)
Пример #23
0
    def convert_text(self, oeb_book):
        from calibre.ebooks.metadata.opf2 import OPF
        if self.opts.old_pdf_engine:
            from calibre.ebooks.pdf.writer import PDFWriter
            PDFWriter
        else:
            from calibre.ebooks.pdf.render.from_html import PDFWriter

        self.log.debug('Serializing oeb input to disk for processing...')
        self.get_cover_data()

        self.handle_embedded_fonts()

        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts,
                               self.log)

            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
            opf = OPF(opfpath, os.path.dirname(opfpath))

            self.write(PDFWriter, [s.path for s in opf.spine],
                       getattr(opf, 'toc', None))
Пример #24
0
def set_metadata_opf2(root, cover_prefix, mi, opf_version,
                      cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    if mi.languages:
        mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages)

    opf.smart_update(mi, apply_null=apply_null)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null or force_identifiers:
        opf.set_identifiers(mi.get_identifiers())
    else:
        orig = opf.get_identifiers()
        orig.update(mi.get_identifiers())
        opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v})
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
    raster_cover = opf.raster_cover
    if raster_cover is None and cover_data is not None and add_missing_cover:
        guide_raster_cover = opf.guide_raster_cover
        i = None
        if guide_raster_cover is not None:
            i = guide_raster_cover
            raster_cover = i.get('href')
        else:
            if cover_prefix and not cover_prefix.endswith('/'):
                cover_prefix += '/'
            name = cover_prefix + 'cover.jpg'
            i = create_manifest_item(opf.root, name, 'cover')
            if i is not None:
                raster_cover = name
        if i is not None:
            if opf_version.major < 3:
                [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')]
                m = opf.create_metadata_element('meta', is_dc=False)
                m.set('name', 'cover'), m.set('content', i.get('id'))
            else:
                for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'):
                    x.set('properties', x.get('properties').replace('cover-image', '').strip())
                i.set('properties', 'cover-image')

    with pretty_print:
        return opf.render(), raster_cover
Пример #25
0
    def test_against_opf2(self):  # {{{
        # opf2 {{{
        raw = '''<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
        <dc:identifier opf:scheme="calibre" id="calibre_id">1698</dc:identifier>
        <dc:identifier opf:scheme="uuid" id="uuid_id">27106d11-0721-44bc-bcdd-2840f31aaec0</dc:identifier>
        <dc:title>DOCX Demo</dc:title>
        <dc:creator opf:file-as="Goyal, Kovid" opf:role="aut">Kovid Goyal</dc:creator>
        <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (2.57.1) [http://calibre-ebook.com]</dc:contributor>
        <dc:date>2016-02-17T10:53:08+00:00</dc:date>
        <dc:description>Demonstration of DOCX support in calibre</dc:description>
        <dc:publisher>Kovid Goyal</dc:publisher>
        <dc:identifier opf:scheme="K">xxx</dc:identifier>
        <dc:language>eng</dc:language>
        <dc:subject>calibre</dc:subject>
        <dc:subject>conversion</dc:subject>
        <dc:subject>docs</dc:subject>
        <dc:subject>ebook</dc:subject>
        <meta content="{&quot;Kovid Goyal&quot;: &quot;&quot;}" name="calibre:author_link_map"/>
        <meta content="Demos" name="calibre:series"/>
        <meta content="1" name="calibre:series_index"/>
        <meta content="10" name="calibre:rating"/>
        <meta content="2015-12-11T16:28:36+00:00" name="calibre:timestamp"/>
        <meta content="DOCX Demo" name="calibre:title_sort"/>
        <meta content="{&quot;crew.crow&quot;: [], &quot;crew.moose&quot;: [], &quot;crew&quot;: []}" name="calibre:user_categories"/>
        <meta name="calibre:user_metadata:#number" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Number&quot;,
        &quot;rec_index&quot;: 29, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 12, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;number_format&quot;: null}, &quot;search_terms&quot;:
        [&quot;#number&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;int&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: 31, &quot;is_custom&quot;:
        true, &quot;label&quot;: &quot;number&quot;, &quot;table&quot;:
        &quot;custom_column_12&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#genre" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Genre&quot;,
        &quot;rec_index&quot;: 26, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 9, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;use_decorations&quot;: 0}, &quot;search_terms&quot;:
        [&quot;#genre&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;text&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: &quot;Demos&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;genre&quot;,
        &quot;table&quot;: &quot;custom_column_9&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#commetns"
        content="{&quot;kind&quot;: &quot;field&quot;, &quot;column&quot;:
        &quot;value&quot;, &quot;is_csp&quot;: false, &quot;name&quot;:
        &quot;My Comments&quot;, &quot;rec_index&quot;: 23,
        &quot;#extra#&quot;: null, &quot;colnum&quot;: 13,
        &quot;is_multiple2&quot;: {}, &quot;category_sort&quot;:
        &quot;value&quot;, &quot;display&quot;: {}, &quot;search_terms&quot;:
        [&quot;#commetns&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;comments&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;:
        &quot;&lt;div&gt;&lt;b&gt;&lt;i&gt;Testing&lt;/i&gt;&lt;/b&gt; extra
        &lt;font
        color=\&quot;#aa0000\&quot;&gt;comments&lt;/font&gt;&lt;/div&gt;&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;commetns&quot;,
        &quot;table&quot;: &quot;custom_column_13&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#formats" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Formats&quot;,
        &quot;rec_index&quot;: 25, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 4, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;composite_template&quot;: &quot;{formats}&quot;,
        &quot;contains_html&quot;: false, &quot;use_decorations&quot;: 0,
        &quot;composite_sort&quot;: &quot;text&quot;,
        &quot;make_category&quot;: false}, &quot;search_terms&quot;:
        [&quot;#formats&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;composite&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: &quot;AZW3, DOCX, EPUB&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;formats&quot;,
        &quot;table&quot;: &quot;custom_column_4&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#rating" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Rating&quot;,
        &quot;rec_index&quot;: 30, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 1, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;: {},
        &quot;search_terms&quot;: [&quot;#rating&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;:
        &quot;rating&quot;, &quot;link_column&quot;: &quot;value&quot;,
        &quot;#value#&quot;: 10, &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;rating&quot;, &quot;table&quot;:
        &quot;custom_column_1&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#series" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Series2&quot;,
        &quot;rec_index&quot;: 31, &quot;#extra#&quot;: 1.0,
        &quot;colnum&quot;: 5, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;: {},
        &quot;search_terms&quot;: [&quot;#series&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;:
        &quot;series&quot;, &quot;link_column&quot;: &quot;value&quot;,
        &quot;#value#&quot;: &quot;s&quot;, &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;series&quot;, &quot;table&quot;:
        &quot;custom_column_5&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#tags" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Tags&quot;,
        &quot;rec_index&quot;: 33, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 11, &quot;is_multiple2&quot;:
        {&quot;ui_to_list&quot;: &quot;,&quot;, &quot;cache_to_list&quot;:
        &quot;|&quot;, &quot;list_to_ui&quot;: &quot;, &quot;},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;is_names&quot;: false, &quot;description&quot;: &quot;A tag like
        column for me&quot;}, &quot;search_terms&quot;: [&quot;#tags&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;: &quot;text&quot;,
        &quot;link_column&quot;: &quot;value&quot;, &quot;#value#&quot;:
        [&quot;t1&quot;, &quot;t2&quot;], &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;tags&quot;, &quot;table&quot;:
        &quot;custom_column_11&quot;, &quot;is_multiple&quot;: &quot;|&quot;,
        &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#yesno" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Yes/No&quot;,
        &quot;rec_index&quot;: 34, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 7, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;: {},
        &quot;search_terms&quot;: [&quot;#yesno&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;: &quot;bool&quot;,
        &quot;link_column&quot;: &quot;value&quot;, &quot;#value#&quot;: false,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;yesno&quot;,
        &quot;table&quot;: &quot;custom_column_7&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#myenum" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Enum&quot;,
        &quot;rec_index&quot;: 28, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 6, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;enum_colors&quot;: [], &quot;enum_values&quot;:
        [&quot;One&quot;, &quot;Two&quot;, &quot;Three&quot;],
        &quot;use_decorations&quot;: 0}, &quot;search_terms&quot;:
        [&quot;#myenum&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;enumeration&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: &quot;Two&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;myenum&quot;,
        &quot;table&quot;: &quot;custom_column_6&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#isbn" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;ISBN&quot;,
        &quot;rec_index&quot;: 27, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 3, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;composite_template&quot;:
        &quot;{identifiers:select(isbn)}&quot;, &quot;contains_html&quot;:
        false, &quot;use_decorations&quot;: 0, &quot;composite_sort&quot;:
        &quot;text&quot;, &quot;make_category&quot;: false},
        &quot;search_terms&quot;: [&quot;#isbn&quot;], &quot;is_editable&quot;:
        true, &quot;datatype&quot;: &quot;composite&quot;,
        &quot;link_column&quot;: &quot;value&quot;, &quot;#value#&quot;:
        &quot;&quot;, &quot;is_custom&quot;: true, &quot;label&quot;:
        &quot;isbn&quot;, &quot;table&quot;: &quot;custom_column_3&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#authors" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Authors&quot;,
        &quot;rec_index&quot;: 22, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 10, &quot;is_multiple2&quot;:
        {&quot;ui_to_list&quot;: &quot;&amp;&quot;, &quot;cache_to_list&quot;:
        &quot;|&quot;, &quot;list_to_ui&quot;: &quot; &amp; &quot;},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;is_names&quot;: true}, &quot;search_terms&quot;:
        [&quot;#authors&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;text&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: [&quot;calibre, Kovid
        Goyal&quot;], &quot;is_custom&quot;: true, &quot;label&quot;:
        &quot;authors&quot;, &quot;table&quot;: &quot;custom_column_10&quot;,
        &quot;is_multiple&quot;: &quot;|&quot;, &quot;is_category&quot;:
        true}"/>
        <meta name="calibre:user_metadata:#date" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Date&quot;,
        &quot;rec_index&quot;: 24, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 2, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;date_format&quot;: &quot;dd-MM-yyyy&quot;,
        &quot;description&quot;: &quot;&quot;}, &quot;search_terms&quot;:
        [&quot;#date&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;datetime&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: {&quot;__value__&quot;:
        &quot;2016-02-17T10:54:15+00:00&quot;, &quot;__class__&quot;:
        &quot;datetime.datetime&quot;}, &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;date&quot;, &quot;table&quot;:
        &quot;custom_column_2&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: false}"/>
    </metadata><manifest><item href="start.html" media-type="text/html" id="m1"/></manifest><spine><itemref idref="m1"/></spine>
</package>'''  # }}}

        def compare_metadata(mi2, mi3):
            self.ae(mi2.get_all_user_metadata(False), mi3.get_all_user_metadata(False))
            for field in ALL_METADATA_FIELDS:
                if field not in 'manifest spine':
                    v2, v3 = getattr(mi2, field, None), getattr(mi3, field, None)
                    self.ae(v2, v3, '%s: %r != %r' % (field, v2, v3))

        mi2 = OPF(BytesIO(raw.encode('utf-8'))).to_book_metadata()
        root = etree.fromstring(raw)
        root.set('version', '3.0')
        mi3, _, raster_cover, first_spine_item  = read_metadata(root, return_extra_data=True)
        self.assertIsNone(raster_cover)
        self.ae('start.html', first_spine_item)
        compare_metadata(mi2, mi3)
        apply_metadata(root, mi3, force_identifiers=True)
        nmi = read_metadata(root)
        compare_metadata(mi3, nmi)
        mi3.tags = []
        mi3.set('#tags', [])
        mi3.set('#number', 0)
        mi3.set('#commetns', '')
        apply_metadata(root, mi3, update_timestamp=True)
        self.assertFalse(root.xpath('//*/@name'))
        nmi = read_metadata(root)
        self.assertEqual(mi2.tags, nmi.tags)
        self.assertEqual(mi2.get('#tags'), nmi.get('#tags'))
        self.assertEqual(mi2.get('#commetns'), nmi.get('#commetns'))
        self.assertEqual(0, nmi.get('#number'))
        apply_metadata(root, mi3, apply_null=True)
        nmi = read_metadata(root)
        self.assertFalse(nmi.tags)
        self.assertFalse(nmi.get('#tags'))
        self.assertFalse(nmi.get('#commetns'))
        self.assertIsNone(apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx', add_missing_cover=False))
        self.ae('xxx/cover.jpg', apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx'))
Пример #26
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(
                    unicode_type(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode_type(
                                etree.tostring(item.data,
                                               encoding=unicode_type))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images',
                                             images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(
                    io.BytesIO(
                        etree.tostring(oeb_book.metadata.to_opf1(),
                                       encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Пример #27
0
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
            zf.extractall(cwd)
        except Exception:
            log.exception("KEPUB appears to be invalid ZIP file, trying a "
                          "more forgiving ZIP parser")
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk("."):
                if (f.lower().endswith(".opf") and "__MACOSX" not in f
                        and not os.path.basename(f).startswith(".")):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)").
                format(path))

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd()
        opf = os.path.relpath(opf, cwd)
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (self.rationalize_cover3
             if opf.package_version >= 3.0 else self.rationalize_cover2)
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported"
                      )  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                    "application/vnd.adobe-page-template+xml",
                    "application/vnd.adobe.page-template+xml",
                    "application/adobe-page-template+xml",
                    "application/adobe.page-template+xml",
                    "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath("content.opf")
Пример #28
0
    def __enter__(self, processed=False, only_input_plugin=False,
                  run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        self.delete_on_exit = []
        if self.use_tdir_in_cache:
            self._tdir = tdir_in_cache('ev')
        else:
            self._tdir = PersistentTemporaryDirectory('_ebook_iter')
        self.base  = os.path.realpath(self._tdir)
        self.book_format, self.pathtoopf, input_fmt = run_extract_book(
            self.pathtoebook, self.base, only_input_plugin=only_input_plugin, view_kepub=view_kepub, processed=processed)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.mi = self.opf.to_book_metadata()
        self.language = None
        if self.mi.languages:
            self.language = self.mi.languages[0].lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
                run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
        is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
                                        'azw', 'azw3', 'docx', 'htmlz'}:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self
Пример #29
0
    def test_annotations(self):  # {{{
        'Test handling of annotations'
        from calibre.utils.date import utcnow, EPOCH
        cl = self.cloned_library
        cache = self.init_cache(cl)
        # First empty dirtied
        cache.dump_metadata()
        self.assertFalse(cache.dirtied_cache)

        def a(**kw):
            ts = utcnow()
            kw['timestamp'] = utcnow().isoformat()
            return kw, (ts - EPOCH).total_seconds()

        annot_list = [
            a(type='bookmark', title='bookmark1 changed', seq=1),
            a(type='highlight', highlighted_text='text1', uuid='1', seq=2),
            a(type='highlight', highlighted_text='text2', uuid='2', seq=3, notes='notes2 some word changed again'),
        ]

        def map_as_list(amap):
            ans = []
            for items in amap.values():
                ans.extend(items)
            ans.sort(key=lambda x:x['seq'])
            return ans

        cache.set_annotations_for_book(1, 'moo', annot_list)
        amap = cache.annotations_map_for_book(1, 'moo')
        self.assertEqual(3, len(cache.all_annotations_for_book(1)))
        self.assertEqual([x[0] for x in annot_list], map_as_list(amap))
        self.assertFalse(cache.dirtied_cache)
        cache.check_dirtied_annotations()
        self.assertEqual(set(cache.dirtied_cache), {1})
        cache.dump_metadata()
        cache.check_dirtied_annotations()
        self.assertFalse(cache.dirtied_cache)

        # Test searching
        results = cache.search_annotations('"changed"')
        self.assertEqual([1, 3], [x['id'] for x in results])
        results = cache.search_annotations('"changed"', annotation_type='bookmark')
        self.assertEqual([1], [x['id'] for x in results])
        results = cache.search_annotations('"Changed"')  # changed and change stem differently in english and other euro languages
        self.assertEqual([1, 3], [x['id'] for x in results])
        results = cache.search_annotations('"SOMe"')
        self.assertEqual([3], [x['id'] for x in results])
        results = cache.search_annotations('"change"', use_stemming=False)
        self.assertFalse(results)
        results = cache.search_annotations('"bookmark1"', highlight_start='[', highlight_end=']')
        self.assertEqual(results[0]['text'], '[bookmark1] changed')
        results = cache.search_annotations('"word"', highlight_start='[', highlight_end=']', snippet_size=3)
        self.assertEqual(results[0]['text'], '…some [word] changed…')
        self.assertRaises(FTSQueryError, cache.search_annotations, 'AND OR')
        fts_l = [a(type='bookmark', title='路坎坷走来', seq=1),]
        cache.set_annotations_for_book(1, 'moo', fts_l)
        results = cache.search_annotations('路', highlight_start='[', highlight_end=']')
        self.assertEqual(results[0]['text'], '[路]坎坷走来')

        annot_list[0][0]['title'] = 'changed title'
        cache.set_annotations_for_book(1, 'moo', annot_list)
        amap = cache.annotations_map_for_book(1, 'moo')
        self.assertEqual([x[0] for x in annot_list], map_as_list(amap))

        del annot_list[1]
        cache.set_annotations_for_book(1, 'moo', annot_list)
        amap = cache.annotations_map_for_book(1, 'moo')
        self.assertEqual([x[0] for x in annot_list], map_as_list(amap))
        cache.check_dirtied_annotations()
        cache.dump_metadata()
        from calibre.ebooks.metadata.opf2 import OPF
        raw = cache.read_backup(1)
        opf = OPF(BytesIO(raw))
        cache.restore_annotations(1, list(opf.read_annotations()))
        amap = cache.annotations_map_for_book(1, 'moo')
        self.assertEqual([x[0] for x in annot_list], map_as_list(amap))
Пример #30
0
    def test_against_opf2(self):  # {{{
        # opf2 {{{
        raw = '''<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
        <dc:identifier opf:scheme="calibre" id="calibre_id">1698</dc:identifier>
        <dc:identifier opf:scheme="uuid" id="uuid_id">27106d11-0721-44bc-bcdd-2840f31aaec0</dc:identifier>
        <dc:title>DOCX Demo</dc:title>
        <dc:creator opf:file-as="Goyal, Kovid" opf:role="aut">Kovid Goyal</dc:creator>
        <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (2.57.1) [http://calibre-ebook.com]</dc:contributor>
        <dc:date>2016-02-17T10:53:08+00:00</dc:date>
        <dc:description>Demonstration of DOCX support in calibre</dc:description>
        <dc:publisher>Kovid Goyal</dc:publisher>
        <dc:identifier opf:scheme="K">xxx</dc:identifier>
        <dc:language>eng</dc:language>
        <dc:subject>calibre</dc:subject>
        <dc:subject>conversion</dc:subject>
        <dc:subject>docs</dc:subject>
        <dc:subject>ebook</dc:subject>
        <meta content="{&quot;Kovid Goyal&quot;: &quot;&quot;}" name="calibre:author_link_map"/>
        <meta content="Demos" name="calibre:series"/>
        <meta content="1" name="calibre:series_index"/>
        <meta content="10" name="calibre:rating"/>
        <meta content="2015-12-11T16:28:36+00:00" name="calibre:timestamp"/>
        <meta content="DOCX Demo" name="calibre:title_sort"/>
        <meta content="{&quot;crew.crow&quot;: [], &quot;crew.moose&quot;: [], &quot;crew&quot;: []}" name="calibre:user_categories"/>
        <meta name="calibre:user_metadata:#number" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Number&quot;,
        &quot;rec_index&quot;: 29, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 12, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;number_format&quot;: null}, &quot;search_terms&quot;:
        [&quot;#number&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;int&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: 31, &quot;is_custom&quot;:
        true, &quot;label&quot;: &quot;number&quot;, &quot;table&quot;:
        &quot;custom_column_12&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#genre" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Genre&quot;,
        &quot;rec_index&quot;: 26, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 9, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;use_decorations&quot;: 0}, &quot;search_terms&quot;:
        [&quot;#genre&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;text&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: &quot;Demos&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;genre&quot;,
        &quot;table&quot;: &quot;custom_column_9&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#commetns"
        content="{&quot;kind&quot;: &quot;field&quot;, &quot;column&quot;:
        &quot;value&quot;, &quot;is_csp&quot;: false, &quot;name&quot;:
        &quot;My Comments&quot;, &quot;rec_index&quot;: 23,
        &quot;#extra#&quot;: null, &quot;colnum&quot;: 13,
        &quot;is_multiple2&quot;: {}, &quot;category_sort&quot;:
        &quot;value&quot;, &quot;display&quot;: {}, &quot;search_terms&quot;:
        [&quot;#commetns&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;comments&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;:
        &quot;&lt;div&gt;&lt;b&gt;&lt;i&gt;Testing&lt;/i&gt;&lt;/b&gt; extra
        &lt;font
        color=\&quot;#aa0000\&quot;&gt;comments&lt;/font&gt;&lt;/div&gt;&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;commetns&quot;,
        &quot;table&quot;: &quot;custom_column_13&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#formats" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Formats&quot;,
        &quot;rec_index&quot;: 25, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 4, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;composite_template&quot;: &quot;{formats}&quot;,
        &quot;contains_html&quot;: false, &quot;use_decorations&quot;: 0,
        &quot;composite_sort&quot;: &quot;text&quot;,
        &quot;make_category&quot;: false}, &quot;search_terms&quot;:
        [&quot;#formats&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;composite&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: &quot;AZW3, DOCX, EPUB&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;formats&quot;,
        &quot;table&quot;: &quot;custom_column_4&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#rating" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Rating&quot;,
        &quot;rec_index&quot;: 30, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 1, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;: {},
        &quot;search_terms&quot;: [&quot;#rating&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;:
        &quot;rating&quot;, &quot;link_column&quot;: &quot;value&quot;,
        &quot;#value#&quot;: 10, &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;rating&quot;, &quot;table&quot;:
        &quot;custom_column_1&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#series" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Series2&quot;,
        &quot;rec_index&quot;: 31, &quot;#extra#&quot;: 1.0,
        &quot;colnum&quot;: 5, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;: {},
        &quot;search_terms&quot;: [&quot;#series&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;:
        &quot;series&quot;, &quot;link_column&quot;: &quot;value&quot;,
        &quot;#value#&quot;: &quot;s&quot;, &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;series&quot;, &quot;table&quot;:
        &quot;custom_column_5&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#tags" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Tags&quot;,
        &quot;rec_index&quot;: 33, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 11, &quot;is_multiple2&quot;:
        {&quot;ui_to_list&quot;: &quot;,&quot;, &quot;cache_to_list&quot;:
        &quot;|&quot;, &quot;list_to_ui&quot;: &quot;, &quot;},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;is_names&quot;: false, &quot;description&quot;: &quot;A tag like
        column for me&quot;}, &quot;search_terms&quot;: [&quot;#tags&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;: &quot;text&quot;,
        &quot;link_column&quot;: &quot;value&quot;, &quot;#value#&quot;:
        [&quot;t1&quot;, &quot;t2&quot;], &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;tags&quot;, &quot;table&quot;:
        &quot;custom_column_11&quot;, &quot;is_multiple&quot;: &quot;|&quot;,
        &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#yesno" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;Yes/No&quot;,
        &quot;rec_index&quot;: 34, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 7, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;: {},
        &quot;search_terms&quot;: [&quot;#yesno&quot;],
        &quot;is_editable&quot;: true, &quot;datatype&quot;: &quot;bool&quot;,
        &quot;link_column&quot;: &quot;value&quot;, &quot;#value#&quot;: false,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;yesno&quot;,
        &quot;table&quot;: &quot;custom_column_7&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#myenum" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Enum&quot;,
        &quot;rec_index&quot;: 28, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 6, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;enum_colors&quot;: [], &quot;enum_values&quot;:
        [&quot;One&quot;, &quot;Two&quot;, &quot;Three&quot;],
        &quot;use_decorations&quot;: 0}, &quot;search_terms&quot;:
        [&quot;#myenum&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;enumeration&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: &quot;Two&quot;,
        &quot;is_custom&quot;: true, &quot;label&quot;: &quot;myenum&quot;,
        &quot;table&quot;: &quot;custom_column_6&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: true}"/>
        <meta name="calibre:user_metadata:#isbn" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;ISBN&quot;,
        &quot;rec_index&quot;: 27, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 3, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;composite_template&quot;:
        &quot;{identifiers:select(isbn)}&quot;, &quot;contains_html&quot;:
        false, &quot;use_decorations&quot;: 0, &quot;composite_sort&quot;:
        &quot;text&quot;, &quot;make_category&quot;: false},
        &quot;search_terms&quot;: [&quot;#isbn&quot;], &quot;is_editable&quot;:
        true, &quot;datatype&quot;: &quot;composite&quot;,
        &quot;link_column&quot;: &quot;value&quot;, &quot;#value#&quot;:
        &quot;&quot;, &quot;is_custom&quot;: true, &quot;label&quot;:
        &quot;isbn&quot;, &quot;table&quot;: &quot;custom_column_3&quot;,
        &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>
        <meta name="calibre:user_metadata:#authors" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Authors&quot;,
        &quot;rec_index&quot;: 22, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 10, &quot;is_multiple2&quot;:
        {&quot;ui_to_list&quot;: &quot;&amp;&quot;, &quot;cache_to_list&quot;:
        &quot;|&quot;, &quot;list_to_ui&quot;: &quot; &amp; &quot;},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;is_names&quot;: true}, &quot;search_terms&quot;:
        [&quot;#authors&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;text&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: [&quot;calibre, Kovid
        Goyal&quot;], &quot;is_custom&quot;: true, &quot;label&quot;:
        &quot;authors&quot;, &quot;table&quot;: &quot;custom_column_10&quot;,
        &quot;is_multiple&quot;: &quot;|&quot;, &quot;is_category&quot;:
        true}"/>
        <meta name="calibre:user_metadata:#date" content="{&quot;kind&quot;:
        &quot;field&quot;, &quot;column&quot;: &quot;value&quot;,
        &quot;is_csp&quot;: false, &quot;name&quot;: &quot;My Date&quot;,
        &quot;rec_index&quot;: 24, &quot;#extra#&quot;: null,
        &quot;colnum&quot;: 2, &quot;is_multiple2&quot;: {},
        &quot;category_sort&quot;: &quot;value&quot;, &quot;display&quot;:
        {&quot;date_format&quot;: &quot;dd-MM-yyyy&quot;,
        &quot;description&quot;: &quot;&quot;}, &quot;search_terms&quot;:
        [&quot;#date&quot;], &quot;is_editable&quot;: true,
        &quot;datatype&quot;: &quot;datetime&quot;, &quot;link_column&quot;:
        &quot;value&quot;, &quot;#value#&quot;: {&quot;__value__&quot;:
        &quot;2016-02-17T10:54:15+00:00&quot;, &quot;__class__&quot;:
        &quot;datetime.datetime&quot;}, &quot;is_custom&quot;: true,
        &quot;label&quot;: &quot;date&quot;, &quot;table&quot;:
        &quot;custom_column_2&quot;, &quot;is_multiple&quot;: null,
        &quot;is_category&quot;: false}"/>
    </metadata><manifest><item href="start.html" media-type="text/html" id="m1"/></manifest><spine><itemref idref="m1"/></spine>
</package>'''  # }}}

        def compare_metadata(mi2, mi3):
            self.ae(mi2.get_all_user_metadata(False),
                    mi3.get_all_user_metadata(False))
            for field in ALL_METADATA_FIELDS:
                if field not in 'manifest spine':
                    v2, v3 = getattr(mi2, field,
                                     None), getattr(mi3, field, None)
                    self.ae(v2, v3, '%s: %r != %r' % (field, v2, v3))

        mi2 = OPF(BytesIO(raw.encode('utf-8'))).to_book_metadata()
        root = etree.fromstring(raw)
        root.set('version', '3.0')
        mi3, _, raster_cover, first_spine_item = read_metadata(
            root, return_extra_data=True)
        self.assertIsNone(raster_cover)
        self.ae('start.html', first_spine_item)
        compare_metadata(mi2, mi3)
        apply_metadata(root, mi3, force_identifiers=True)
        nmi = read_metadata(root)
        compare_metadata(mi3, nmi)
        mi3.tags = []
        mi3.set('#tags', [])
        mi3.set('#number', 0)
        mi3.set('#commetns', '')
        apply_metadata(root, mi3, update_timestamp=True)
        self.assertFalse(root.xpath('//*/@name'))
        nmi = read_metadata(root)
        self.assertEqual(mi2.tags, nmi.tags)
        self.assertEqual(mi2.get('#tags'), nmi.get('#tags'))
        self.assertEqual(mi2.get('#commetns'), nmi.get('#commetns'))
        self.assertEqual(0, nmi.get('#number'))
        apply_metadata(root, mi3, apply_null=True)
        nmi = read_metadata(root)
        self.assertFalse(nmi.tags)
        self.assertFalse(nmi.get('#tags'))
        self.assertFalse(nmi.get('#commetns'))
        self.assertIsNone(
            apply_metadata(root,
                           mi3,
                           cover_data=b'x',
                           cover_prefix='xxx',
                           add_missing_cover=False))
        self.ae('xxx/cover.jpg',
                apply_metadata(root, mi3, cover_data=b'x', cover_prefix='xxx'))
Пример #31
0
    def test_get(self):  # {{{
        'Test /get'
        with self.create_server() as server:
            db = server.handler.router.ctx.library_broker.get(None)
            conn = server.connect()

            def get(what, book_id, library_id=None, q=''):
                q = ('?' + q) if q else q
                conn.request(
                    'GET', '/get/%s/%s' % (what, book_id) +
                    (('/' + library_id) if library_id else '') + q)
                r = conn.getresponse()
                return r, r.read()

            # Test various invalid parameters
            def bad(*args):
                r, data = get(*args)
                self.ae(r.status, http_client.NOT_FOUND)

            bad('xxx', 1)
            bad('fmt1', 10)
            bad('fmt1', 1, 'zzzz')
            bad('fmt1', 'xx')

            # Test simple fetching of format without metadata update
            r, data = get('fmt1', 1, db.server_library_id)
            self.ae(data, db.format(1, 'fmt1'))
            self.assertIsNotNone(r.getheader('Content-Disposition'))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('fmt1', 1)
            self.ae(data, db.format(1, 'fmt1'))
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test fetching of format with metadata update
            raw = P('quick_start/eng.epub', data=True)
            r, data = get('epub', 1)
            self.ae(r.status, http_client.OK)
            etag = r.getheader('ETag')
            self.assertIsNotNone(etag)
            self.ae(r.getheader('Used-Cache'), 'no')
            self.assertTrue(data.startswith(b'PK'))
            self.assertGreaterEqual(len(data), len(raw))
            db.set_field('title', {1: 'changed'})
            r, data = get('epub', 1)
            self.assertNotEqual(r.getheader('ETag'), etag)
            etag = r.getheader('ETag')
            self.ae(r.getheader('Used-Cache'), 'no')
            mi = get_metadata(BytesIO(data), extract_cover=False)
            self.ae(mi.title, 'changed')
            r, data = get('epub', 1)
            self.ae(r.getheader('Used-Cache'), 'yes')

            # Test plugboards
            import calibre.library.save_to_disk as c
            orig, c.DEBUG = c.DEBUG, False
            try:
                db.set_pref(
                    'plugboards', {
                        u'epub': {
                            u'content_server':
                            [[u'changed, {title}', u'title']]
                        }
                    })
                # this is needed as the cache is not invalidated for plugboard changes
                db.set_field('title', {1: 'again'})
                r, data = get('epub', 1)
                self.assertNotEqual(r.getheader('ETag'), etag)
                etag = r.getheader('ETag')
                self.ae(r.getheader('Used-Cache'), 'no')
                mi = get_metadata(BytesIO(data), extract_cover=False)
                self.ae(mi.title, 'changed, again')
            finally:
                c.DEBUG = orig

            # Test the serving of covers
            def change_cover(count, book_id=2):
                cpath = db.format_abspath(book_id, '__COVER_INTERNAL__')
                db.set_cover({2: I('lt.png', data=True)})
                t = time.time() + 1 + count
                # Ensure mtime changes, needed on OS X where HFS+ has a 1s
                # mtime resolution
                os.utime(cpath, (t, t))

            r, data = get('cover', 1)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(r.getheader('Content-Type'), 'image/jpeg')
            r, data = get('cover', 1)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(1))
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('cover', 3)
            self.ae(r.status, http_client.OK)  # Auto generated cover
            r, data = get('thumb', 1)
            self.ae(r.status, http_client.OK)
            self.ae(identify(data), ('jpeg', 60, 60))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1)
            self.ae(r.status, http_client.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, http_client.OK)
            self.ae(identify(data), ('jpeg', 100, 100))
            self.ae(r.getheader('Used-Cache'), 'no')
            r, data = get('thumb', 1, q='sz=100x100')
            self.ae(r.status, http_client.OK)
            self.ae(r.getheader('Used-Cache'), 'yes')
            change_cover(1, 1)
            r, data = get('thumb', 1, q='sz=100')
            self.ae(r.status, http_client.OK)
            self.ae(identify(data), ('jpeg', 100, 100))
            self.ae(r.getheader('Used-Cache'), 'no')

            # Test file sharing in cache
            r, data = get('cover', 2)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = from_hex_unicode(r.getheader('Tempfile'))
            f, fdata = share_open(path, 'rb'), data
            # Now force an update
            change_cover(1)
            r, data = get('cover', 2)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            path = from_hex_unicode(r.getheader('Tempfile'))
            f2, f2data = share_open(path, 'rb'), data
            # Do it again
            change_cover(2)
            r, data = get('cover', 2)
            self.ae(r.status, http_client.OK)
            self.ae(data, db.cover(2))
            self.ae(r.getheader('Used-Cache'), 'no')
            self.ae(f.read(), fdata)
            self.ae(f2.read(), f2data)

            # Test serving of metadata as opf
            r, data = get('opf', 1)
            self.ae(r.status, http_client.OK)
            self.ae(r.getheader('Content-Type'),
                    'application/oebps-package+xml; charset=UTF-8')
            self.assertIsNotNone(r.getheader('Last-Modified'))
            opf = OPF(BytesIO(data),
                      populate_spine=False,
                      try_to_guess_cover=False)
            self.ae(db.field_for('title', 1), opf.title)
            self.ae(db.field_for('authors', 1), tuple(opf.authors))
            conn.request('GET',
                         '/get/opf/1',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(r.status,
                    http_client.OK), self.ae(r.getheader('Content-Encoding'),
                                             'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)

            # Test serving metadata as json
            r, data = get('json', 1)
            self.ae(r.status, http_client.OK)
            self.ae(db.field_for('title', 1), json.loads(data)['title'])
            conn.request('GET',
                         '/get/json/1',
                         headers={'Accept-Encoding': 'gzip'})
            r = conn.getresponse()
            self.ae(r.status,
                    http_client.OK), self.ae(r.getheader('Content-Encoding'),
                                             'gzip')
            raw = r.read()
            self.ae(zlib.decompress(raw, 16 + zlib.MAX_WBITS), data)
Пример #32
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == "inline":
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer

            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == "tag":
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer

            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory("_htmlz_output") as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = "index"
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to

                fname = shorten_components_to(100, (ascii_filename(unicode(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname + ".html"), "wb") as tf:
                if isinstance(html, unicode):
                    html = html.encode("utf-8")
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == "class" and opts.htmlz_class_style == "external":
                with open(os.path.join(tdir, "style.css"), "wb") as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, "images")):
                    os.makedirs(os.path.join(tdir, "images"))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode(etree.tostring(item.data, encoding=unicode))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, "images", images[item.href])
                        with open(fname, "wb") as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.magick.draw import save_cover_data_to

                    cover_path = os.path.join(tdir, "cover.jpg")
                    with open(cover_path, "w") as cf:
                        cf.write("")
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback

                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, "metadata.opf"), "wb") as mdataf:
                opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = "cover.jpg"
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, "w")
            htmlz.add_dir(tdir)
Пример #33
0
def do_set_metadata(db, id, stream):
    mi = OPF(stream).to_book_metadata()
    db.set_metadata(id, mi)
Пример #34
0
    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  view_kepub=False,
                  read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        self.delete_on_exit = []
        if self.use_tdir_in_cache:
            self._tdir = tdir_in_cache('ev')
        else:
            self._tdir = PersistentTemporaryDirectory('_ebook_iter')
        self.base = os.path.realpath(self._tdir)
        self.book_format, self.pathtoopf, input_fmt = run_extract_book(
            self.pathtoebook,
            self.base,
            only_input_plugin=only_input_plugin,
            view_kepub=view_kepub,
            processed=processed)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.mi = self.opf.to_book_metadata()
        self.language = None
        if self.mi.languages:
            self.language = self.mi.languages[0].lower()

        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        read_links=read_links,
                        run_char_count=run_char_count,
                        from_epub=self.book_format == 'EPUB')
        if input_fmt.lower() == 'htmlz':
            self.spine.append(
                Spiny(os.path.join(os.path.dirname(self.pathtoopf),
                                   'index.html'),
                      mime_type='text/html'))
        else:
            ordered = [i for i in self.opf.spine if i.is_linear] + \
                    [i for i in self.opf.spine if not i.is_linear]
            is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
            for i in ordered:
                spath = i.path
                mt = None
                if i.idref is not None:
                    mt = self.opf.manifest.type_for_id(i.idref)
                if mt is None:
                    mt = guess_type(spath)[0]
                try:
                    self.spine.append(Spiny(spath, mime_type=mt))
                    if is_comic:
                        self.spine[-1].is_single_page = True
                except:
                    self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx',
                'htmlz'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self
Пример #35
0
class EbookIterator(BookmarksMixin):

    CHARACTERS_PER_PAGE = 1000

    def __init__(self,
                 pathtoebook,
                 log=None,
                 copy_bookmarks_to_file=True,
                 use_tdir_in_cache=False):
        BookmarksMixin.__init__(self,
                                copy_bookmarks_to_file=copy_bookmarks_to_file)
        self.use_tdir_in_cache = use_tdir_in_cache
        self.log = log or default_log
        pathtoebook = pathtoebook.strip()
        self.pathtoebook = os.path.abspath(pathtoebook)
        self.config = DynamicConfig(name='iterator')
        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
        self.ebook_ext = ext.replace('original_', '')

    def search(self, text, index, backwards=False):
        from calibre.ebooks.oeb.polish.parsing import parse
        pmap = [(i, path) for i, path in enumerate(self.spine)]
        if backwards:
            pmap.reverse()
        q = text.lower()
        for i, path in pmap:
            if (backwards and i < index) or (not backwards and i > index):
                with open(path, 'rb') as f:
                    raw = f.read().decode(path.encoding)
                root = parse(raw)
                fragments = []

                def serialize(elem):
                    if elem.text:
                        fragments.append(elem.text.lower())
                    if elem.tail:
                        fragments.append(elem.tail.lower())
                    for child in elem.iterchildren():
                        if hasattr(
                                getattr(child, 'tag', None),
                                'rpartition') and child.tag.rpartition(
                                    '}')[-1] not in {'script', 'style', 'del'}:
                            serialize(child)
                        elif getattr(child, 'tail', None):
                            fragments.append(child.tail.lower())

                for body in root.xpath('//*[local-name() = "body"]'):
                    body.tail = None
                    serialize(body)

                if q in ''.join(fragments):
                    return i

    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  view_kepub=False,
                  read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        self.delete_on_exit = []
        if self.use_tdir_in_cache:
            self._tdir = tdir_in_cache('ev')
        else:
            self._tdir = PersistentTemporaryDirectory('_ebook_iter')
        self.base = os.path.realpath(self._tdir)
        self.book_format, self.pathtoopf, input_fmt = run_extract_book(
            self.pathtoebook,
            self.base,
            only_input_plugin=only_input_plugin,
            view_kepub=view_kepub,
            processed=processed)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.mi = self.opf.to_book_metadata()
        self.language = None
        if self.mi.languages:
            self.language = self.mi.languages[0].lower()

        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        read_links=read_links,
                        run_char_count=run_char_count,
                        from_epub=self.book_format == 'EPUB')
        if input_fmt.lower() == 'htmlz':
            self.spine.append(
                Spiny(os.path.join(os.path.dirname(self.pathtoopf),
                                   'index.html'),
                      mime_type='text/html'))
        else:
            ordered = [i for i in self.opf.spine if i.is_linear] + \
                    [i for i in self.opf.spine if not i.is_linear]
            is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
            for i in ordered:
                spath = i.path
                mt = None
                if i.idref is not None:
                    mt = self.opf.manifest.type_for_id(i.idref)
                if mt is None:
                    mt = guess_type(spath)[0]
                try:
                    self.spine.append(Spiny(spath, mime_type=mt))
                    if is_comic:
                        self.spine[-1].is_single_page = True
                except:
                    self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3', 'docx',
                'htmlz'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self

    def verify_links(self):
        spine_paths = {s: s for s in self.spine}
        for item in self.spine:
            base = os.path.dirname(item)
            for link in item.all_links:
                try:
                    p = urlparse(urlunquote(link))
                except Exception:
                    continue
                if not p.scheme and not p.netloc:
                    path = os.path.abspath(os.path.join(
                        base, p.path)) if p.path else item
                    try:
                        path = spine_paths[path]
                    except Exception:
                        continue
                    if not p.fragment or p.fragment in path.anchor_map:
                        item.verified_links.add((path, p.fragment))

    def __exit__(self, *args):
        remove_dir(self._tdir)
        for x in self.delete_on_exit:
            try:
                os.remove(x)
            except:
                pass
Пример #36
0
    def convert(self, stream, options, file_ext, log, accelerators):
        """Convert a KePub file into a structure calibre can process."""
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF

        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except Exception:
            log.exception(
                "KEPUB appears to be invalid ZIP file, trying a "
                "more forgiving ZIP parser"
            )
            from calibre.utils.localunzip import extractall

            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u"."):
                if (
                    f.lower().endswith(".opf")
                    and "__MACOSX" not in f
                    and not os.path.basename(f).startswith(".")
                ):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, "name", "stream")

        if opf is None:
            raise ValueError(
                _(  # noqa: F821
                    "{0} is not a valid KEPUB file (could not find opf)"
                ).format(path)
            )

        encfile = os.path.abspath("rights.xml")
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = "/".join(parts[:-1]) + "/"
            for elem in opf.itermanifest():
                elem.set("href", delta + elem.get("href"))
            for elem in opf.iterguide():
                elem.set("href", delta + elem.get("href"))

        f = (
            self.rationalize_cover3
            if opf.package_version >= 3.0
            else self.rationalize_cover2
        )
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get("media-type", "") == "application/x-dtbook+xml":
                raise ValueError(
                    _("EPUB files with DTBook markup are not supported")  # noqa: F821
                )

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get("id", None)
            if id_ and y.get("media-type", None) in {
                "application/vnd.adobe-page-template+xml",
                "application/vnd.adobe.page-template+xml",
                "application/adobe-page-template+xml",
                "application/adobe.page-template+xml",
                "application/text",
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get("idref", None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(
                _("No valid entries in the spine of this EPUB")  # noqa: F821
            )

        with open("content.opf", "wb") as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u"content.opf")
Пример #37
0
    def add_to_db(self, data):
        from calibre.ebooks.metadata.opf2 import OPF

        gui = self.parent()
        if gui is None:
            return
        m = gui.library_view.model()
        count = 0

        needs_rescan = False
        duplicates = []
        added_ids = set()

        for fname, tdir in data.iteritems():
            paths = [os.path.join(self.worker.path, fname)]
            sz = os.path.join(tdir, 'size.txt')
            try:
                with open(sz, 'rb') as f:
                    sz = int(f.read())
                if sz != os.stat(paths[0]).st_size:
                    raise Exception('Looks like the file was written to after'
                            ' we tried to read metadata')
            except:
                needs_rescan = True
                try:
                    self.worker.staging.remove(fname)
                except KeyError:
                    pass

                continue

            mi = os.path.join(tdir, 'metadata.opf')
            if not os.access(mi, os.R_OK):
                continue
            mi = [OPF(open(mi, 'rb'), tdir,
                    populate_spine=False).to_book_metadata()]
            dups, ids = m.add_books(paths,
                    [os.path.splitext(fname)[1][1:].upper()], mi,
                    add_duplicates=not gprefs['auto_add_check_for_duplicates'],
                    return_ids=True)
            added_ids |= set(ids)
            num = len(ids)
            if dups:
                path = dups[0][0]
                with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
                        'wb') as dest, open(path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
                    dups[0][0] = dest.name
                duplicates.append(dups)

            try:
                os.remove(paths[0])
                self.worker.staging.remove(fname)
            except:
                import traceback
                traceback.print_exc()
            count += num

        if duplicates:
            paths, formats, metadata = [], [], []
            for p, f, mis in duplicates:
                paths.extend(p)
                formats.extend(f)
                metadata.extend(mis)
            dups = [(mic, mic.cover, [p]) for mic, p in zip(metadata, paths)]
            d = DuplicatesQuestion(m.db, dups, parent=gui)
            dups = tuple(d.duplicates)
            if dups:
                paths, formats, metadata = [], [], []
                for mi, cover, book_paths in dups:
                    paths.extend(book_paths)
                    formats.extend([p.rpartition('.')[-1] for p in book_paths])
                    metadata.extend([mi for i in book_paths])
                ids = m.add_books(paths, formats, metadata,
                        add_duplicates=True, return_ids=True)[1]
                added_ids |= set(ids)
                num = len(ids)
                count += num

        for tdir in data.itervalues():
            try:
                shutil.rmtree(tdir)
            except:
                pass

        if added_ids and gprefs['auto_add_auto_convert']:
            self.auto_convert.emit(added_ids)

        if count > 0:
            m.books_added(count)
            gui.status_bar.show_message(_(
                'Added %(num)d book(s) automatically from %(src)s') %
                dict(num=count, src=self.worker.path), 2000)
            if hasattr(gui, 'db_images'):
                gui.db_images.beginResetModel(), gui.db_images.endResetModel()

        if needs_rescan:
            QTimer.singleShot(2000, self.dir_changed)
Пример #38
0
    def convert(self, oeb_book, output, input_plugin, opts, log):
        self.report_version(log)

        #for mivals in oeb_book.metadata.items.values():
        #    for mival in mivals:
        #        log.info("metadata: %s" % repr(mival))

        try:
            book_name = str(oeb_book.metadata.title[0])
        except Exception:
            book_name = ""

        asin = None

        if not tweaks.get("kfx_output_ignore_asin_metadata", False):
            for idre in ["^mobi-asin$", "^amazon.*$", "^asin$"]:
                for ident in oeb_book.metadata["identifier"]:
                    idtype = ident.get(OPFNS("scheme"), "").lower()
                    if re.match(idre, idtype) and re.match(ASIN_RE, ident.value):
                        asin = ident.value
                        log.info("Found ASIN metadata %s: %s" % (idtype, asin))
                        break

                if asin:
                    break

        #with open(opts.read_metadata_from_opf, "rb") as opff:
        #    log.info("opf: %s" % opff.read())

        if opts.approximate_pages:
            page_count = 0
            if opts.number_of_pages_field and opts.number_of_pages_field != AUTO_PAGES and opts.read_metadata_from_opf:
                # This OPF contains custom column metadata not present in the oeb_book OPF
                opf = OPF(opts.read_metadata_from_opf, populate_spine=False, try_to_guess_cover=False, read_toc=False)
                mi = opf.to_book_metadata()
                page_count_str = mi.get(opts.number_of_pages_field, None)

                if page_count_str is not None:
                    try:
                        page_count = int(page_count_str)
                    except Exception:
                        pass

                    log.info("Page count value from field %s: %d ('%s')" % (opts.number_of_pages_field, page_count, page_count_str))
                else:
                    log.warning("Book has no page count field %s" % opts.number_of_pages_field)
        else:
            page_count = -1

        #log.info("oeb_book contains %d pages" % len(oeb_book.pages.pages))
        #log.info("options: %s" % str(opts.__dict__))

        # set default values for options expected by the EPUB Output plugin
        for optrec in EPUBOutput.options:
            setattr(opts, optrec.option.name, optrec.recommended_value)

        # override currently known EPUB Output plugin options
        opts.extract_to = None
        opts.dont_split_on_page_breaks = False
        opts.flow_size = 0
        opts.no_default_epub_cover = False
        opts.no_svg_cover = False
        opts.preserve_cover_aspect_ratio = True
        opts.epub_flatten = False
        opts.epub_inline_toc = False
        opts.epub_toc_at_end = False
        opts.toc_title = None

        epub_filename = self.temporary_file(".epub").name
        self.epub_output_plugin.convert(oeb_book, epub_filename, input_plugin, opts, log)  # convert input format to EPUB
        log.info("Successfully converted input format to EPUB")

        if PREPARED_FILE_SAVE_DIR:
            if not os.path.exists(PREPARED_FILE_SAVE_DIR):
                os.makedirs(PREPARED_FILE_SAVE_DIR)

            prepared_file_path = os.path.join(PREPARED_FILE_SAVE_DIR, os.path.basename(epub_filename))
            shutil.copyfile(epub_filename, prepared_file_path)
            log.warning("Saved conversion input file: %s" % prepared_file_path)

        self.convert_using_previewer(
                JobLog(log), book_name, epub_filename, asin, opts.cde_type_pdoc, page_count,
                opts.show_kpr_logs, False, TIMEOUT if opts.enable_timeout else None, output)
Пример #39
0
    def convert(self, stream, options, file_ext, log, accelerators):
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('KEPUB appears to be invalid ZIP file, trying a '
                          'more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                _('%s is not a valid KEPUB file (could not find opf)') % path)

        encfile = os.path.abspath('rights.xml')
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'
            for elem in opf.itermanifest():
                elem.set('href', delta + elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta + elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else \
            self.rationalize_cover2
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(
                    _('EPUB files with DTBook markup are not supported'))

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml',
                    'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml',
                    'application/adobe.page-template+xml', 'application/text'
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(_('No valid entries in the spine of this EPUB'))

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Пример #40
0
    def convert(self, stream, options, file_ext, log, accelerators):
        log("KEPUBInput::convert - start")
        from calibre.utils.zipfile import ZipFile
        from calibre import walk
        from calibre.ebooks import DRMError
        from calibre.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwdu())
        except:
            log.exception('KEPUB appears to be invalid ZIP file, trying a '
                          'more forgiving ZIP parser')
            from calibre.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
        opf = self.find_opf()
        if opf is None:
            for f in walk(u'.'):
                if f.lower().endswith('.opf') and '__MACOSX' not in f and \
                        not os.path.basename(f).startswith('.'):
                    opf = os.path.abspath(f)
                    break
        path = getattr(stream, 'name', 'stream')

        if opf is None:
            raise ValueError(
                _('%s is not a valid KEPUB file (could not find opf)') % path)

        encfile = os.path.abspath('rights.xml')
        if os.path.exists(encfile):
            raise DRMError(os.path.basename(path))

        opf = os.path.relpath(opf, os.getcwdu())
        parts = os.path.split(opf)
        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self.encrypted_fonts = []

        if len(parts) > 1 and parts[0]:
            delta = '/'.join(parts[:-1]) + '/'
            for elem in opf.itermanifest():
                elem.set('href', delta + elem.get('href'))
            for elem in opf.iterguide():
                elem.set('href', delta + elem.get('href'))

        f = self.rationalize_cover3 if opf.package_version >= 3.0 else \
            self.rationalize_cover2
        self.removed_cover = f(opf, log)

        self.optimize_opf_parsing = opf
        for x in opf.itermanifest():
            if x.get('media-type', '') == 'application/x-dtbook+xml':
                raise ValueError(_(
                    'EPUB files with DTBook markup are not supported'))

        not_for_spine = set()
        for y in opf.itermanifest():
            id_ = y.get('id', None)
            if id_ and y.get('media-type', None) in {
                    'application/vnd.adobe-page-template+xml',
                    'application/vnd.adobe.page-template+xml',
                    'application/adobe-page-template+xml',
                    'application/adobe.page-template+xml', 'application/text'
            }:
                not_for_spine.add(id_)

        seen = set()
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
            if not ref or ref in not_for_spine or ref in seen:
                x.getparent().remove(x)
                continue
            seen.add(ref)

        if len(list(opf.iterspine())) == 0:
            raise ValueError(_('No valid entries in the spine of this EPUB'))

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())

        return os.path.abspath(u'content.opf')
Пример #41
0
    def add_to_db(self, data):
        from calibre.ebooks.metadata.opf2 import OPF

        gui = self.parent()
        if gui is None:
            return
        m = gui.library_view.model()
        count = 0

        needs_rescan = False
        duplicates = []
        added_ids = set()

        for fname, tdir in data.iteritems():
            paths = [os.path.join(self.worker.path, fname)]
            sz = os.path.join(tdir, 'size.txt')
            try:
                with open(sz, 'rb') as f:
                    sz = int(f.read())
                if sz != os.stat(paths[0]).st_size:
                    raise Exception('Looks like the file was written to after'
                                    ' we tried to read metadata')
            except:
                needs_rescan = True
                try:
                    self.worker.staging.remove(fname)
                except KeyError:
                    pass

                continue

            mi = os.path.join(tdir, 'metadata.opf')
            if not os.access(mi, os.R_OK):
                continue
            mi = [
                OPF(open(mi, 'rb'), tdir,
                    populate_spine=False).to_book_metadata()
            ]
            dups, ids = m.add_books(
                paths, [os.path.splitext(fname)[1][1:].upper()],
                mi,
                add_duplicates=not gprefs['auto_add_check_for_duplicates'],
                return_ids=True)
            added_ids |= set(ids)
            num = len(ids)
            if dups:
                path = dups[0][0]
                with open(
                        os.path.join(tdir, 'dup_cache.' + dups[1][0].lower()),
                        'wb') as dest, open(path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
                    dups[0][0] = dest.name
                duplicates.append(dups)

            try:
                os.remove(paths[0])
                self.worker.staging.remove(fname)
            except:
                pass
            count += num

        if duplicates:
            paths, formats, metadata = [], [], []
            for p, f, mis in duplicates:
                paths.extend(p)
                formats.extend(f)
                metadata.extend(mis)
            files = [
                _('%(title)s by %(author)s') %
                dict(title=mi.title, author=mi.format_field('authors')[1])
                for mi in metadata
            ]
            if question_dialog(
                    self.parent(), _('Duplicates found!'),
                    _('Books with the same title as the following already '
                      'exist in the database. Add them anyway?'),
                    '\n'.join(files)):
                dups, ids = m.add_books(paths,
                                        formats,
                                        metadata,
                                        add_duplicates=True,
                                        return_ids=True)
                added_ids |= set(ids)
                num = len(ids)
                count += num

        for tdir in data.itervalues():
            try:
                shutil.rmtree(tdir)
            except:
                pass

        if added_ids and gprefs['auto_add_auto_convert']:
            self.auto_convert.emit(added_ids)

        if count > 0:
            m.books_added(count)
            gui.status_bar.show_message(
                _('Added %(num)d book(s) automatically from %(src)s') %
                dict(num=count, src=self.worker.path), 2000)
            if hasattr(gui, 'db_images'):
                gui.db_images.reset()

        if needs_rescan:
            QTimer.singleShot(2000, self.dir_changed)
Пример #42
0
class EbookIterator(BookmarksMixin):

    CHARACTERS_PER_PAGE = 1000

    def __init__(self, pathtoebook, log=None, copy_bookmarks_to_file=True, use_tdir_in_cache=False):
        BookmarksMixin.__init__(self, copy_bookmarks_to_file=copy_bookmarks_to_file)
        self.use_tdir_in_cache = use_tdir_in_cache
        self.log = log or default_log
        pathtoebook = pathtoebook.strip()
        self.pathtoebook = os.path.abspath(pathtoebook)
        self.config = DynamicConfig(name='iterator')
        ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
        ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
        self.ebook_ext = ext.replace('original_', '')

    def search(self, text, index, backwards=False):
        from calibre.ebooks.oeb.polish.parsing import parse
        pmap = [(i, path) for i, path in enumerate(self.spine)]
        if backwards:
            pmap.reverse()
        q = text.lower()
        for i, path in pmap:
            if (backwards and i < index) or (not backwards and i > index):
                with open(path, 'rb') as f:
                    raw = f.read().decode(path.encoding)
                root = parse(raw)
                fragments = []

                def serialize(elem):
                    if elem.text:
                        fragments.append(elem.text.lower())
                    if elem.tail:
                        fragments.append(elem.tail.lower())
                    for child in elem.iterchildren():
                        if hasattr(getattr(child, 'tag', None), 'rpartition') and child.tag.rpartition('}')[-1] not in {'script', 'style', 'del'}:
                            serialize(child)
                        elif getattr(child, 'tail', None):
                            fragments.append(child.tail.lower())
                for body in root.xpath('//*[local-name() = "body"]'):
                    body.tail = None
                    serialize(body)

                if q in ''.join(fragments):
                    return i

    def __enter__(self, processed=False, only_input_plugin=False,
                  run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        self.delete_on_exit = []
        if self.use_tdir_in_cache:
            self._tdir = tdir_in_cache('ev')
        else:
            self._tdir = PersistentTemporaryDirectory('_ebook_iter')
        self.base  = os.path.realpath(self._tdir)
        self.book_format, self.pathtoopf, input_fmt = run_extract_book(
            self.pathtoebook, self.base, only_input_plugin=only_input_plugin, view_kepub=view_kepub, processed=processed)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.mi = self.opf.to_book_metadata()
        self.language = None
        if self.mi.languages:
            self.language = self.mi.languages[0].lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
                run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
        is_comic = input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
                                        'azw', 'azw3', 'docx', 'htmlz'}:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self

    def verify_links(self):
        spine_paths = {s:s for s in self.spine}
        for item in self.spine:
            base = os.path.dirname(item)
            for link in item.all_links:
                try:
                    p = urlparse(urlunquote(link))
                except Exception:
                    continue
                if not p.scheme and not p.netloc:
                    path = os.path.abspath(os.path.join(base, p.path)) if p.path else item
                    try:
                        path = spine_paths[path]
                    except Exception:
                        continue
                    if not p.fragment or p.fragment in path.anchor_map:
                        item.verified_links.add((path, p.fragment))

    def __exit__(self, *args):
        remove_dir(self._tdir)
        for x in self.delete_on_exit:
            try:
                os.remove(x)
            except:
                pass
Пример #43
0
    def add_to_db(self, data):
        from calibre.ebooks.metadata.opf2 import OPF

        gui = self.parent()
        if gui is None:
            return
        m = gui.library_view.model()
        count = 0

        needs_rescan = False
        duplicates = []
        added_ids = set()

        for fname, tdir in data.iteritems():
            paths = [os.path.join(self.worker.path, fname)]
            sz = os.path.join(tdir, 'size.txt')
            try:
                with open(sz, 'rb') as f:
                    sz = int(f.read())
                if sz != os.stat(paths[0]).st_size:
                    raise Exception('Looks like the file was written to after'
                            ' we tried to read metadata')
            except:
                needs_rescan = True
                try:
                    self.worker.staging.remove(fname)
                except KeyError:
                    pass

                continue

            mi = os.path.join(tdir, 'metadata.opf')
            if not os.access(mi, os.R_OK):
                continue
            mi = OPF(open(mi, 'rb'), tdir, populate_spine=False).to_book_metadata()
            if gprefs.get('tag_map_on_add_rules'):
                from calibre.ebooks.metadata.tag_mapper import map_tags
                mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules'])
            mi = [mi]
            dups, ids = m.add_books(paths,
                    [os.path.splitext(fname)[1][1:].upper()], mi,
                    add_duplicates=not gprefs['auto_add_check_for_duplicates'],
                    return_ids=True)
            added_ids |= set(ids)
            num = len(ids)
            if dups:
                path = dups[0][0]
                with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
                        'wb') as dest, open(path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
                    dups[0][0] = dest.name
                duplicates.append(dups)

            try:
                os.remove(paths[0])
                self.worker.staging.remove(fname)
            except:
                import traceback
                traceback.print_exc()
            count += num

        if duplicates:
            paths, formats, metadata = [], [], []
            for p, f, mis in duplicates:
                paths.extend(p)
                formats.extend(f)
                metadata.extend(mis)
            dups = [(mic, mic.cover, [p]) for mic, p in zip(metadata, paths)]
            d = DuplicatesQuestion(m.db, dups, parent=gui)
            dups = tuple(d.duplicates)
            if dups:
                paths, formats, metadata = [], [], []
                for mi, cover, book_paths in dups:
                    paths.extend(book_paths)
                    formats.extend([p.rpartition('.')[-1] for p in book_paths])
                    metadata.extend([mi for i in book_paths])
                ids = m.add_books(paths, formats, metadata,
                        add_duplicates=True, return_ids=True)[1]
                added_ids |= set(ids)
                num = len(ids)
                count += num

        for tdir in data.itervalues():
            try:
                shutil.rmtree(tdir)
            except:
                pass

        if added_ids and gprefs['auto_add_auto_convert']:
            self.auto_convert.emit(added_ids)

        if count > 0:
            m.books_added(count)
            gui.status_bar.show_message(_(
                'Added %(num)d book(s) automatically from %(src)s') %
                dict(num=count, src=self.worker.path), 2000)
            gui.refresh_cover_browser()

        if needs_rescan:
            QTimer.singleShot(2000, self.dir_changed)
Пример #44
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(item.data, encoding='unicode')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Пример #45
0
    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  extract_embedded_fonts_for_qt=False):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook

        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf, plumber.opts,
                                                  plumber.input_fmt, self.log,
                                                  {}, self.base)

            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed
                        or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf,
                                                       self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                                                    plumber.opts)

            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            self.book_format = 'KF8'

        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        run_char_count=run_char_count)
        is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.read_bookmarks()

        if extract_embedded_fonts_for_qt:
            from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts
            try:
                extract_fonts(self.opf, self.log)
            except:
                ol = self.log.filter_level
                self.log.filter_level = self.log.DEBUG
                self.log.exception('Failed to extract fonts')
                self.log.filter_level = ol

        return self
Пример #46
0
def get_metadata2(root, ver):
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item(
    )
Пример #47
0
    def do_add(self, data):
        from calibre.ebooks.metadata.opf2 import OPF

        gui = self.parent()
        if gui is None:
            return
        m = gui.library_view.model()
        count = 0

        needs_rescan = False
        duplicates = []
        added_ids = set()

        for fname, tdir in data:
            path_to_remove = os.path.join(self.worker.path, fname)
            paths = [path_to_remove]
            fpath = os.path.join(tdir, 'file_changed_by_plugins')
            if os.path.exists(fpath):
                with open(fpath) as f:
                    paths[0] = f.read()
            sz = os.path.join(tdir, 'size.txt')
            try:
                with open(sz, 'rb') as f:
                    sz = int(f.read())
                if sz != os.stat(paths[0]).st_size:
                    raise Exception('Looks like the file was written to after'
                            ' we tried to read metadata')
            except:
                needs_rescan = True
                try:
                    self.worker.staging.remove(fname)
                except KeyError:
                    pass

                continue

            mi = os.path.join(tdir, 'metadata.opf')
            if not os.access(mi, os.R_OK):
                continue
            mi = OPF(open(mi, 'rb'), tdir, populate_spine=False).to_book_metadata()
            if gprefs.get('tag_map_on_add_rules'):
                from calibre.ebooks.metadata.tag_mapper import map_tags
                mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules'])
            if gprefs.get('author_map_on_add_rules'):
                from calibre.ebooks.metadata.author_mapper import (
                    compile_rules, map_authors
                )
                new_authors = map_authors(mi.authors, compile_rules(gprefs['author_map_on_add_rules']))
                if new_authors != mi.authors:
                    mi.authors = new_authors
                    mi.author_sort = gui.current_db.new_api.author_sort_from_authors(mi.authors)
            mi = [mi]
            dups, ids = m.add_books(paths,
                    [os.path.splitext(fname)[1][1:].upper()], mi,
                    add_duplicates=not gprefs['auto_add_check_for_duplicates'],
                    return_ids=True)
            added_ids |= set(ids)
            num = len(ids)
            if dups:
                path = dups[0][0]
                with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
                        'wb') as dest, open(path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
                    dups[0][0] = dest.name
                duplicates.append(dups)

            try:
                os.remove(path_to_remove)
                self.worker.staging.remove(fname)
            except:
                import traceback
                traceback.print_exc()
            count += num

        if duplicates:
            paths, formats, metadata = [], [], []
            for p, f, mis in duplicates:
                paths.extend(p)
                formats.extend(f)
                metadata.extend(mis)
            dups = [(mic, mic.cover, [p]) for mic, p in zip(metadata, paths)]
            d = DuplicatesQuestion(m.db, dups, parent=gui)
            dups = tuple(d.duplicates)
            if dups:
                paths, formats, metadata = [], [], []
                for mi, cover, book_paths in dups:
                    paths.extend(book_paths)
                    formats.extend([p.rpartition('.')[-1] for p in book_paths])
                    metadata.extend([mi for i in book_paths])
                ids = m.add_books(paths, formats, metadata,
                        add_duplicates=True, return_ids=True)[1]
                added_ids |= set(ids)
                num = len(ids)
                count += num

        for fname, tdir in data:
            try:
                shutil.rmtree(tdir)
            except:
                pass

        if added_ids and gprefs['auto_add_auto_convert']:
            self.auto_convert.emit(added_ids)

        if count > 0:
            m.books_added(count)
            gui.status_bar.show_message(
                (_('Added a book automatically from {src}') if count == 1 else _('Added {num} books automatically from {src}')).format(
                    num=count, src=self.worker.path), 2000)
            gui.refresh_cover_browser()

        if needs_rescan:
            QTimer.singleShot(2000, self.dir_changed)
Пример #48
0
def set_metadata_opf2(root,
                      cover_prefix,
                      mi,
                      opf_version,
                      cover_data=None,
                      apply_null=False,
                      update_timestamp=False,
                      force_identifiers=False,
                      add_missing_cover=True):
    mi = MetaInformation(mi)
    for x in ('guide', 'toc', 'manifest', 'spine'):
        setattr(mi, x, None)
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    if mi.languages:
        mi.languages = normalize_languages(
            list(opf.raw_languages) or [], mi.languages)

    opf.smart_update(mi, apply_null=apply_null)
    if getattr(mi, 'uuid', None):
        opf.application_id = mi.uuid
    if apply_null or force_identifiers:
        opf.set_identifiers(mi.get_identifiers())
    else:
        orig = opf.get_identifiers()
        orig.update(mi.get_identifiers())
        opf.set_identifiers({k: v for k, v in orig.iteritems() if k and v})
    if update_timestamp and mi.timestamp is not None:
        opf.timestamp = mi.timestamp
    raster_cover = opf.raster_cover
    if raster_cover is None and cover_data is not None and add_missing_cover:
        guide_raster_cover = opf.guide_raster_cover
        i = None
        if guide_raster_cover is not None:
            i = guide_raster_cover
            raster_cover = i.get('href')
        else:
            if cover_prefix and not cover_prefix.endswith('/'):
                cover_prefix += '/'
            name = cover_prefix + 'cover.jpg'
            i = create_manifest_item(opf.root, name, 'cover')
            if i is not None:
                raster_cover = name
        if i is not None:
            if opf_version.major < 3:
                [
                    x.getparent().remove(x) for x in opf.root.xpath(
                        '//*[local-name()="meta" and @name="cover"]')
                ]
                m = opf.create_metadata_element('meta', is_dc=False)
                m.set('name', 'cover'), m.set('content', i.get('id'))
            else:
                for x in opf.root.xpath(
                        '//*[local-name()="item" and contains(@properties, "cover-image")]'
                ):
                    x.set(
                        'properties',
                        x.get('properties').replace('cover-image', '').strip())
                i.set('properties', 'cover-image')

    with pretty_print:
        return opf.render(), raster_cover
Пример #49
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.utils.zipfile import ZipFile

        self.log = log
        html = u''
        top_levels = []

        # Extract content from zip archive.
        zf = ZipFile(stream)
        zf.extractall()

        # Find the HTML file in the archive. It needs to be
        # top level.
        index = u''
        multiple_html = False
        # Get a list of all top level files in the archive.
        for x in os.listdir(u'.'):
            if os.path.isfile(x):
                top_levels.append(x)
        # Try to find an index. file.
        for x in top_levels:
            if x.lower() in (u'index.html', u'index.xhtml', u'index.htm'):
                index = x
                break
        # Look for multiple HTML files in the archive. We look at the
        # top level files only as only they matter in HTMLZ.
        for x in top_levels:
            if os.path.splitext(x)[1].lower() in (u'.html', u'.xhtml',
                                                  u'.htm'):
                # Set index to the first HTML file found if it's not
                # called index.
                if not index:
                    index = x
                else:
                    multiple_html = True
        # Warn the user if there multiple HTML file in the archive. HTMLZ
        # supports a single HTML file. A conversion with a multiple HTML file
        # HTMLZ archive probably won't turn out as the user expects. With
        # Multiple HTML files ZIP input should be used in place of HTMLZ.
        if multiple_html:
            log.warn(
                _('Multiple HTML files found in the archive. Only %s will be used.'
                  ) % index)

        if index:
            with open(index, 'rb') as tf:
                html = tf.read()
        else:
            raise Exception(_('No top level HTML file found.'))

        if not html:
            raise Exception(_('Top level HTML file %s is empty') % index)

        # Encoding
        if options.input_encoding:
            ienc = options.input_encoding
        else:
            ienc = xml_to_unicode(html[:4096])[-1]
        html = html.decode(ienc, 'replace')

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = getcwd()
        fname = os.path.join(base, u'index.html')
        c = 0
        while os.path.exists(fname):
            c += 1
            fname = u'index%d.html' % c
        htmlfile = open(fname, 'wb')
        with htmlfile:
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html',
                                 log, {})
        options.debug_pipeline = odi
        os.remove(htmlfile.name)

        # Set metadata from file.
        from calibre.customize.ui import get_file_type_metadata
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)

        # Get the cover path from the OPF.
        cover_path = None
        opf = None
        for x in top_levels:
            if os.path.splitext(x)[1].lower() == u'.opf':
                opf = x
                break
        if opf:
            opf = OPF(opf, basedir=getcwd())
            cover_path = opf.raster_cover or opf.cover
        # Set the cover.
        if cover_path:
            cdata = None
            with open(os.path.join(getcwd(), cover_path), 'rb') as cf:
                cdata = cf.read()
            cover_name = os.path.basename(cover_path)
            id, href = oeb.manifest.generate('cover', cover_name)
            oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata)
            oeb.guide.add('cover', 'Cover', href)

        return oeb
Пример #50
0
def get_metadata2(root, ver):
    opf = OPF(None, preparsed_opf=root, read_toc=False)
    return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item()
Пример #51
0
    def process_result(self, group_id, result):
        if result.err:
            mi = self.report_metadata_failure(group_id, result.traceback)
            paths = self.file_groups[group_id]
            has_cover = False
            duplicate_info = set() if self.add_formats_to_existing else False
        else:
            paths, opf, has_cover, duplicate_info = result.value
            try:
                mi = OPF(BytesIO(opf),
                         basedir=self.tdir,
                         populate_spine=False,
                         try_to_guess_cover=False).to_book_metadata()
                mi.read_metadata_failed = False
            except Exception:
                mi = self.report_metadata_failure(group_id,
                                                  traceback.format_exc())

        if mi.is_null('title'):
            for path in paths:
                mi.title = os.path.splitext(os.path.basename(path))[0]
                break
        if mi.application_id == '__calibre_dummy__':
            mi.application_id = None
        if gprefs.get('tag_map_on_add_rules'):
            from calibre.ebooks.metadata.tag_mapper import map_tags
            mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules'])
        if self.author_map_rules:
            from calibre.ebooks.metadata.author_mapper import map_authors
            new_authors = map_authors(mi.authors, self.author_map_rules)
            if new_authors != mi.authors:
                mi.authors = new_authors
                if self.db is None:
                    mi.author_sort = authors_to_sort_string(mi.authors)
                else:
                    mi.author_sort = self.db.author_sort_from_authors(
                        mi.authors)

        self.pd.msg = mi.title

        cover_path = os.path.join(self.tdir, '%s.cdata' %
                                  group_id) if has_cover else None

        if self.db is None:
            if paths:
                self.items.append((mi, cover_path, paths))
            return

        if self.add_formats_to_existing:
            identical_book_ids = find_identical_books(
                mi, self.find_identical_books_data)
            if identical_book_ids:
                try:
                    self.merge_books(mi, cover_path, paths, identical_book_ids)
                except Exception:
                    a = self.report.append
                    a(''), a('-' * 70)
                    a(_('Failed to merge the book: ') + mi.title)
                    [a('\t' + f) for f in paths]
                    a(_('With error:')), a(traceback.format_exc())
            else:
                self.add_book(mi, cover_path, paths)
        else:
            if duplicate_info or icu_lower(
                    mi.title or _('Unknown')) in self.added_duplicate_info:
                self.duplicates.append((mi, cover_path, paths))
            else:
                self.add_book(mi, cover_path, paths)
Пример #52
0
    def add(self, id, opf, cover, name):
        formats = self.ids.pop(id)
        if opf.endswith('.error'):
            mi = MetaInformation('', [_('Unknown')])
            self.critical[name] = open(opf,
                                       'rb').read().decode('utf-8', 'replace')
        else:
            try:
                mi = OPF(opf).to_book_metadata()
            except:
                import traceback
                mi = MetaInformation('', [_('Unknown')])
                self.critical[name] = traceback.format_exc()
        formats = self.process_formats(opf, formats)
        if not mi.title:
            mi.title = os.path.splitext(name)[0]
        mi.title = mi.title if isinstance(mi.title, unicode) else \
                   mi.title.decode(preferred_encoding, 'replace')
        if mi.application_id == '__calibre_dummy__':
            mi.application_id = None
        if self.db is not None:
            if cover:
                with open(cover, 'rb') as f:
                    cover = f.read()
            orig_formats = formats
            formats = [f for f in formats if not f.lower().endswith('.opf')]
            if prefs['add_formats_to_existing']:  #automerge is on
                identical_book_list = self.db.find_identical_books(mi)
                if identical_book_list:  # books with same author and nearly same title exist in db
                    self.merged_books.add(mi.title)
                    seen_fmts = set([])

                    for identical_book in identical_book_list:
                        ib_fmts = self.db.formats(identical_book,
                                                  index_is_id=True)
                        if ib_fmts:
                            seen_fmts |= set(ib_fmts.split(','))
                        replace = gprefs['automerge'] == 'overwrite'
                        self.add_formats(identical_book,
                                         formats,
                                         replace=replace)
                    if gprefs['automerge'] == 'new record':
                        incoming_fmts = \
                            set([os.path.splitext(path)[-1].replace('.',
                                '').upper() for path in formats])
                        if incoming_fmts.intersection(seen_fmts):
                            # There was at least one duplicate format
                            # so create a new record and put the
                            # incoming formats into it
                            # We should arguably put only the duplicate
                            # formats, but no real harm is done by having
                            # all formats
                            id_ = self.db.create_book_entry(
                                mi, cover=cover, add_duplicates=True)
                            self.number_of_books_added += 1
                            self.add_formats(id_, formats)

                else:
                    # books with same author and nearly same title do not exist in db
                    id_ = self.db.create_book_entry(mi,
                                                    cover=cover,
                                                    add_duplicates=True)
                    self.number_of_books_added += 1
                    self.add_formats(id_, formats)

            else:  #automerge is off
                id_ = self.db.create_book_entry(mi,
                                                cover=cover,
                                                add_duplicates=False)
                if id_ is None:
                    self.duplicates.append((mi, cover, orig_formats))
                else:
                    self.add_formats(id_, formats)
                    self.number_of_books_added += 1
        else:
            self.names.append(name)
            self.paths.append(formats[0])
            self.infos.append(mi)
        return mi.title