def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import parse_only_date from calibre.db.write import get_series_values if 'meta' not in extensions: extensions.append('meta') md = create_markdown_object(extensions) html = md.convert(txt) mi = Metadata(title or _('Unknown')) m = md.Meta for k, v in iteritems({'date':'pubdate', 'summary':'comments'}): if v not in m and k in m: m[v] = m.pop(k) for k in 'title authors series tags pubdate comments publisher rating'.split(): val = m.get(k) if val: mf = mi.metadata_for_field(k) if not mf.get('is_multiple'): val = val[0] if k == 'series': val, si = get_series_values(val) mi.series_index = 1 if si is None else si if k == 'rating': try: val = max(0, min(int(float(val)), 10)) except Exception: continue if mf.get('datatype') == 'datetime': try: val = parse_only_date(val, assume_utc=False) except Exception: continue setattr(mi, k, val) return mi, HTML_TEMPLATE % (mi.title, html)
def metadata(self): mi = Metadata(_('Unknown')) dp_name, ap_name = self.get_document_properties_names() if dp_name: try: raw = self.read(dp_name) except KeyError: pass else: read_doc_props(raw, mi, self.namespace.XPath) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: read_default_style_language(raw, mi, self.namespace.XPath) ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None) if ap_name: try: raw = self.read(ap_name) except KeyError: pass else: read_app_props(raw, mi) return mi
def __init__(self, storage_id, lpath, other=None): Metadata.__init__(self, _('Unknown'), other=other) self.storage_id, self.lpath = storage_id, lpath self.lpath = self.path = self.lpath.replace(os.sep, '/') self.mtp_relpath = tuple([icu_lower(x) for x in self.lpath.split('/')]) self.datetime = utcnow().timetuple() self.thumbail = None
def opdsToMetadata(self, opdsBookStructure): authors = opdsBookStructure.author.replace(u'& ', u'&') metadata = Metadata(opdsBookStructure.title, authors.split(u'&')) metadata.uuid = opdsBookStructure.id.replace('urn:uuid:', '', 1) rawTimestamp = opdsBookStructure.updated parsableTimestamp = re.sub('((\.[0-9]+)?\+00:00|Z)$', '', rawTimestamp) metadata.timestamp = datetime.datetime.strptime(parsableTimestamp, '%Y-%m-%dT%H:%M:%S') tags = [] summary = opdsBookStructure.get(u'summary', u'') summarylines = summary.splitlines() for summaryline in summarylines: if summaryline.startswith(u'TAGS: '): tagsline = summaryline.replace(u'TAGS: ', u'') tagsline = tagsline.replace(u'<br />',u'') tagsline = tagsline.replace(u', ', u',') tags = tagsline.split(u',') metadata.tags = tags bookDownloadUrls = [] links = opdsBookStructure.get('links', []) for link in links: url = link.get('href', '') bookType = link.get('type', '') # Skip covers and thumbnails if not bookType.startswith('image/'): if bookType == 'application/epub+zip': # EPUB books are preferred and always put at the head of the list if found bookDownloadUrls.insert(0, url) else: # Formats other than EPUB (eg. AZW), are appended as they are found bookDownloadUrls.append(url) metadata.links = bookDownloadUrls return metadata
def test_legacy_adding_books(self): # {{{ 'Test various adding books methods' from calibre.ebooks.metadata.book.base import Metadata legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library) mi = Metadata('Added Book0', authors=('Added Author',)) with NamedTemporaryFile(suffix='.aff') as f: f.write(b'xxx') f.flush() T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy) T()(self) book_id = T(kwargs={'return_ids':True})(self)[1][0] self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) T(kwargs={'add_duplicates':False})(self) mi.title = 'Added Book1' mi.uuid = 'uuu' T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) book_id = T(kwargs={'preserve_uuid':True})(self) self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) with NamedTemporaryFile(suffix='.opf') as f: f.write(b'zzzz') f.flush() T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertFalse(legacy.new_api.formats(book_id)) mi.title = 'Added Book2' T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy) T() T({'add_duplicates':False}) T({'force_id':1000})
def to_metadata_for_single_entry(self, log, ozon_id, title, authors): # {{{ # parsing javascript data from the redirect page mi = Metadata(title, authors) mi.identifiers = {'ozon': ozon_id} return mi
def add_catalog(cache, path, title, dbapi=None): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(path)[1][1:].lower() new_book_added = False with lopen(path, 'rb') as stream: with cache.write_lock: matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None) db_id = None if matches: db_id = list(matches)[0] try: mi = get_metadata(stream, fmt) mi.authors = ['calibre'] except: mi = Metadata(title, ['calibre']) mi.title, mi.authors = title, ['calibre'] mi.author_sort = 'calibre' # The MOBI/AZW3 format sets author sort to date mi.tags = [_('Catalog')] mi.pubdate = mi.timestamp = utcnow() if fmt == 'mobi': mi.cover, mi.cover_data = None, (None, None) if db_id is None: db_id = cache._create_book_entry(mi, apply_import_tags=False) new_book_added = True else: cache._set_metadata(db_id, mi) cache.add_format(db_id, fmt, stream, dbapi=dbapi) # Cant keep write lock since post-import hooks might run return db_id, new_book_added
def get_metadata_from_detail(self, log, entry, title, authors, identifiers): # {{{ title = unicode(entry.xpath(u'normalize-space(.//h1[@itemprop="name"][1]/text())')) # log.debug(u'Tile (from_detail): -----> %s' % title) author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())')) # log.debug(u'Author (from_detail): -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")') if ozon_id: # log.debug(u'ozon_id (from_detail): -----> %s' % ozon_id) mi.identifiers = {'ozon':ozon_id} mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url (from_detail): -----> %s' % mi.ozon_cover_url) mi.rating = self.get_rating(entry) # log.debug(u'mi.rating (from_detail): -----> %s' % mi.rating) if not mi.rating: log.debug('No rating (from_detail) found. ozon_id:%s'%ozon_id) return mi
def test(scale=0.5): from PyQt5.Qt import QLabel, QApplication, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout app = QApplication([]) mi = Metadata('xxx', ['Kovid Goyal', 'John Q. Doe', 'Author']) mi.series = 'A series of styles' m = QMainWindow() sa = QScrollArea(m) w = QWidget(m) sa.setWidget(w) l = QGridLayout(w) w.setLayout(l), l.setSpacing(30) labels = [] for r, color in enumerate(sorted(default_color_themes)): for c, style in enumerate(sorted(all_styles())): mi.series_index = c + 1 mi.title = 'An algorithmic cover [%s]' % color prefs = override_prefs(cprefs, override_color_theme=color, override_style=style) for x in ('cover_width', 'cover_height', 'title_font_size', 'subtitle_font_size', 'footer_font_size'): prefs[x] = int(scale * prefs[x]) img = generate_cover(mi, prefs=prefs, as_qimage=True) la = QLabel() la.setPixmap(QPixmap.fromImage(img)) l.addWidget(la, r, c) labels.append(la) m.setCentralWidget(sa) w.resize(w.sizeHint()) m.show() app.exec_()
def test(scale=0.25): from PyQt5.Qt import QLabel, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout from calibre.gui2 import Application app = Application([]) mi = Metadata('Unknown', ['Kovid Goyal', 'John & Doe', 'Author']) mi.series = 'A series & styles' m = QMainWindow() sa = QScrollArea(m) w = QWidget(m) sa.setWidget(w) l = QGridLayout(w) w.setLayout(l), l.setSpacing(30) scale *= w.devicePixelRatioF() labels = [] for r, color in enumerate(sorted(default_color_themes)): for c, style in enumerate(sorted(all_styles())): mi.series_index = c + 1 mi.title = 'An algorithmic cover [%s]' % color prefs = override_prefs(cprefs, override_color_theme=color, override_style=style) scale_cover(prefs, scale) img = generate_cover(mi, prefs=prefs, as_qimage=True) img.setDevicePixelRatio(w.devicePixelRatioF()) la = QLabel() la.setPixmap(QPixmap.fromImage(img)) l.addWidget(la, r, c) labels.append(la) m.setCentralWidget(sa) w.resize(w.sizeHint()) m.show() app.exec_()
def add_catalog(cache, path, title): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(path)[1][1:].lower() with lopen(path, 'rb') as stream, cache.write_lock: matches = cache._search('title:="%s" and tags:="%s"' % (title.replace('"', '\\"'), _('Catalog')), None) db_id = None if matches: db_id = list(matches)[0] try: mi = get_metadata(stream, fmt) mi.authors = ['calibre'] except: mi = Metadata(title, ['calibre']) mi.title, mi.authors = title, ['calibre'] mi.tags = [_('Catalog')] mi.pubdate = mi.timestamp = utcnow() if fmt == 'mobi': mi.cover, mi.cover_data = None, (None, None) if db_id is None: db_id = cache._create_book_entry(mi, apply_import_tags=False) else: cache._set_metadata(db_id, mi) cache._add_format(db_id, fmt, stream) return db_id
def get_series(title, authors, timeout=60): mi = Metadata(title, authors) if title and title[0] in _ignore_starts: title = title[1:] title = re.sub(r'^(A|The|An)\s+', '', title).strip() if not title: return mi if isinstance(title, unicode): title = title.encode('utf-8') title = urllib.quote_plus(title) author = authors[0].strip() if not author: return mi if ',' in author: author = author.split(',')[0] else: author = author.split()[-1] url = URL.format(author, title) br = browser() try: raw = br.open_novisit(url, timeout=timeout).read() except URLError as e: if isinstance(e.reason, socket.timeout): raise Exception('KDL Server busy, try again later') raise if 'see the full results' not in raw: return mi raw = xml_to_unicode(raw)[0] soup = BeautifulSoup(raw) searcharea = soup.find('div', attrs={'class':'searcharea'}) if searcharea is None: return mi ss = searcharea.find('div', attrs={'class':'seriessearch'}) if ss is None: return mi a = ss.find('a', href=True) if a is None: return mi href = a['href'].partition('?')[-1] data = urlparse.parse_qs(href) series = data.get('SeriesName', []) if not series: return mi series = series[0] series = re.sub(r' series$', '', series).strip() if series: mi.series = series ns = ss.nextSibling if ns.contents: raw = unicode(ns.contents[0]) raw = raw.partition('.')[0].strip() try: mi.series_index = int(raw) except: pass return mi
def test_legacy_adding_books(self): # {{{ 'Test various adding books methods' from calibre.ebooks.metadata.book.base import Metadata legacy, old = self.init_legacy(self.cloned_library), self.init_old(self.cloned_library) mi = Metadata('Added Book0', authors=('Added Author',)) with NamedTemporaryFile(suffix='.aff') as f: f.write(b'xxx') f.flush() T = partial(ET, 'add_books', ([f.name], ['AFF'], [mi]), old=old, legacy=legacy) T()(self) book_id = T(kwargs={'return_ids':True})(self)[1][0] self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) T(kwargs={'add_duplicates':False})(self) mi.title = 'Added Book1' mi.uuid = 'uuu' T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) book_id = T(kwargs={'preserve_uuid':True})(self) self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) self.assertEqual(legacy.new_api.formats(book_id), ('AFF',)) T = partial(ET, 'add_format', old=old, legacy=legacy) T((0, 'AFF', BytesIO(b'fffff')))(self) T((0, 'AFF', BytesIO(b'fffff')))(self) T((0, 'AFF', BytesIO(b'fffff')), {'replace':True})(self) with NamedTemporaryFile(suffix='.opf') as f: f.write(b'zzzz') f.flush() T = partial(ET, 'import_book', (mi,[f.name]), old=old, legacy=legacy) book_id = T()(self) self.assertFalse(legacy.new_api.formats(book_id)) mi.title = 'Added Book2' T = partial(ET, 'create_book_entry', (mi,), old=old, legacy=legacy) T() T({'add_duplicates':False}) T({'force_id':1000}) with NamedTemporaryFile(suffix='.txt') as f: f.write(b'tttttt') f.seek(0) bid = legacy.add_catalog(f.name, 'My Catalog') self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid) cache = legacy.new_api self.assertEqual(cache.formats(bid), ('TXT',)) self.assertEqual(cache.field_for('title', bid), 'My Catalog') self.assertEqual(cache.field_for('authors', bid), ('calibre',)) self.assertEqual(cache.field_for('tags', bid), (_('Catalog'),)) self.assertTrue(bid < legacy.add_catalog(f.name, 'Something else')) self.assertEqual(legacy.add_catalog(f.name, 'My Catalog'), bid) self.assertEqual(old.add_catalog(f.name, 'My Catalog'), bid) bid = legacy.add_news(f.name, {'title':'Events', 'add_title_tag':True, 'custom_tags':('one', 'two')}) self.assertEqual(cache.formats(bid), ('TXT',)) self.assertEqual(cache.field_for('authors', bid), ('calibre',)) self.assertEqual(cache.field_for('tags', bid), (_('News'), 'Events', 'one', 'two')) old.close()
def metadata_from_dict(src): ans = Metadata('Unknown') for key, value in src.iteritems(): if key == 'user_metadata': ans.set_all_user_metadata(value) else: setattr(ans, key, value) return ans
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('authors', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) w, h, fmt = identify_data(data) except Exception: w, h, fmt = 0, 0, None if fmt and w and h: mi.cover_data = (fmt, data) return mi
def read_metadata_kfx(stream, read_cover=True): " Read the metadata.kfx file that is found in the sdr book folder for KFX files " c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else "" else: ans = [clean_xml_chars(y) for y in ans] return ans title = get("title") or _("Unknown") authors = get("authors", False) or [_("Unknown")] auth_pat = re.compile(r"([^,]+?)\s*,\s+([^,]+)$") def fix_author(x): if tweaks["author_sort_copy_method"] != "copy": m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + " " + m.group(1) return x mi = Metadata(title, [fix_author(x) for x in authors]) if has("author"): mi.author_sort = get("author") if has("ASIN"): mi.set_identifier("mobi-asin", get("ASIN")) elif has("content_id"): mi.set_identifier("mobi-asin", get("content_id")) if has("languages"): langs = list(filter(None, (canonicalize_lang(x) for x in get("languages", False)))) if langs: mi.languages = langs if has("issue_date"): try: mi.pubdate = parse_only_date(get("issue_date")) except Exception: pass if has("publisher") and get("publisher") != "Unknown": mi.publisher = get("publisher") if read_cover and m[COVER_KEY]: try: data = base64.standard_b64decode(m[COVER_KEY]) fmt, w, h = identify(bytes(data)) except Exception: w, h, fmt = 0, 0, None if fmt and w > -1 and h > -1: mi.cover_data = (fmt, data) return mi
def to_book_metadata(self): mi = Metadata(_('Unknown')) if self.db is None: return mi mi.set_all_user_metadata(self.db.field_metadata.custom_field_metadata()) for widget in self.basic_metadata_widgets: widget.apply_to_metadata(mi) for widget in getattr(self, 'custom_metadata_widgets', []): widget.apply_to_metadata(mi) return mi
def report_metadata_failure(self, group_id, details): a = self.report.append paths = self.file_groups[group_id] a(""), a("-" * 70) a(_("Failed to read metadata from the file(s):")) [a("\t" + f) for f in paths] a(_("With error:")), a(details) mi = Metadata(_("Unknown")) mi.read_metadata_failed = False return mi
def report_metadata_failure(self, group_id, details): a = self.report.append paths = self.file_groups[group_id] a(''), a('-' * 70) a(_('Failed to read metadata from the file(s):')) [a('\t' + f) for f in paths] a(_('With error:')), a(details) mi = Metadata(_('Unknown')) mi.read_metadata_failed = False return mi
def create_cover(title, authors, series=None, series_index=1, prefs=None, as_qimage=False): ' Create a cover from the specified title, author and series. Any user set' ' templates are ignored, to ensure that the specified metadata is used. ' mi = Metadata(title, authors) if series: mi.series, mi.series_index = series, series_index d = cprefs.defaults prefs = override_prefs( prefs or cprefs, title_template=d['title_template'], subtitle_template=d['subtitle_template'], footer_template=d['footer_template']) return generate_cover(mi, prefs=prefs, as_qimage=as_qimage)
def get_baike_metadata(self, title): from baidubaike import Page try: baike = Page(title) except: return None info = baike.get_info() print "\n".join( "%s:\t%s" % v for v in info.items()) mi = Metadata(info['title']) plat = info.get(u'首发网站', None) if not plat: plat = info.get(u'首发状态', "网络小说平台") plat = plat.replace(u'首发', '') mi.publisher = info.get(u'连载平台', plat) mi.authors = [info[u'作者']] mi.isbn = '0000000000001' mi.tags = baike.get_tags() mi.pubdate = datetime.datetime.now() mi.timestamp = datetime.datetime.now() mi.comments = baike.get_summary() if u'完结' in info.get(u'连载状态', ""): day = re.findall('\d*-\d*-\d*', info[u'连载状态']) try: mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d') except: pass return mi
def build_meta(log, issue_id): """Build metadata record based on comicvine issue_id""" issue = pycomicvine.Issue( issue_id, field_list=[ "id", "name", "volume", "issue_number", "person_credits", "description", "store_date", "cover_date", ], ) if not issue or not issue.volume: log.warn("Unable to load Issue(%d)" % issue_id) return None title = "%s #%s" % (issue.volume.name, issue.issue_number) if issue.name: title = title + ": %s" % (issue.name) authors = [p.name for p in issue.person_credits] meta = Metadata(title, authors) meta.series = issue.volume.name meta.series_index = str(issue.issue_number) meta.set_identifier("comicvine", str(issue.id)) meta.comments = issue.description meta.has_cover = False if issue.volume.publisher: meta.publisher = issue.volume.publisher.name meta.pubdate = issue.store_date or issue.cover_date return meta
def __init__(self): Metadata.__init__(self, '') self._new_book = False self.size = 0 self.datetime = time.gmtime() self.path = '' #the quitthyme book storage_id self.thumbnail = None self.quietthyme_id = -1
def test_rtf_metadata(self): stream = BytesIO(br'{\rtf1\ansi\ansicpg1252}') m = Metadata('Test ø̄title', ['Author One', 'Author БTwo']) m.tags = 'tag1 見tag2'.split() m.comments = '<p>some ⊹comments</p>' m.publisher = 'publiSher' set_metadata(stream, m) stream.seek(0) o = get_metadata(stream) for attr in 'title authors publisher comments tags'.split(): self.assertEqual(getattr(m, attr), getattr(o, attr))
def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30): dlog("IDENTIFIERS") dlog(identifiers) dc = {} for Getter in CGetter.getter_list: dlog("USING GETTER: %s" % Getter) dlog("my prefs:", prefs) isvalid = True for pref_key, is_valid_check in Getter.ls_required_pref_key: dlog("validating: %s --> = %s" % (pref_key, prefs[pref_key])) if not is_valid_check(prefs[pref_key]): dlog("INVALID: %s" % prefs[pref_key]) isvalid = False break entry_id = identifiers.get(Getter.entry_identifier, None) dlog("---------------", entry_id) if entry_id and Getter.does_understand(entry_id): dc = Getter.resolve(entry_id) dlog("RESULT") dlog(dc) if dc['authors']: break dlog(dc) if not dc: dlog("not dc: returning") return None dlog("OK") mi = Metadata(dc["title"], dc["authors"]) for attr in ("pubdate", "publisher", "issue", "abstract", "keywords", "volume", "pages", ): if hasattr(mi, attr) and attr in dc: setattr(mi, attr, dc[attr]) mi.print_all_attributes() self.clean_downloaded_metadata(mi) result_queue.put(mi) dlog("=" * 20) dlog(dir(self)) dlog("=" * 20) return None
def convert(self, oeb_book, output, input_plugin, opts, log): """Convert from calibre's internal format to KePub.""" self.epub_output_plugin.convert(oeb_book, output, input_plugin, opts, log) container = KEPubContainer(output, default_log) if container.is_drm_encumbered: return # Write the details file o = { "kepub_output_version": ".".join([str(n) for n in self.version]), "kepub_output_currenttime": datetime.utcnow().ctime(), } kte_data_file = self.temporary_file("_KePubOutputPluginInfo") kte_data_file.write(json.dumps(o)) kte_data_file.close() container.copy_file_to_container( kte_data_file.name, name="plugininfo.kte", mt="application/json" ) title = container.opf_xpath("./opf:metadata/dc:title/text()") if len(title) > 0: title = title[0] else: title = NULL_VALUES["title"] authors = container.opf_xpath( './opf:metadata/dc:creator[@opf:role="aut"]/text()' ) if len(authors) < 1: authors = NULL_VALUES["authors"] mi = Metadata(title, authors) language = container.opf_xpath("./opf:metadata/dc:language/text()") if len(language) > 0: mi.languages = language language = language[0] else: mi.languages = NULL_VALUES["languages"] language = NULL_VALUES["language"] mi.language modify_epub( container, output, metadata=mi, opts={ "clean_markup": opts.kepub_clean_markup, "hyphenate": opts.kepub_hyphenate, "no-hyphens": opts.kepub_disable_hyphenation, "smarten_punctuation": False, "extended_kepub_features": True, }, )
def read_metadata(root): ans = Metadata(_('Unknown'), [_('Unknown')]) prefixes, refines = read_prefixes(root), read_refines(root) identifiers = read_identifiers(root, prefixes, refines) ids = {} for key, vals in identifiers.iteritems(): if key == 'calibre': ans.application_id = vals[0] elif key != 'uuid': ids[key] = vals[0] ans.set_identifiers(ids) return ans
def generate_test_db( library_path, # {{{ num_of_records=20000, num_of_authors=6000, num_of_tags=10000, tag_length=7, author_length=7, title_length=10, max_authors=10, max_tags=10, ): import random, string, os, sys, time from calibre.constants import preferred_encoding if not os.path.exists(library_path): os.makedirs(library_path) letters = string.letters.decode(preferred_encoding) def randstr(length): return "".join(random.choice(letters) for i in xrange(length)) all_tags = [randstr(tag_length) for j in xrange(num_of_tags)] print "Generated", num_of_tags, "tags" all_authors = [randstr(author_length) for j in xrange(num_of_authors)] print "Generated", num_of_authors, "authors" all_titles = [randstr(title_length) for j in xrange(num_of_records)] print "Generated", num_of_records, "titles" testdb = db(library_path) print "Creating", num_of_records, "records..." start = time.time() for i, title in enumerate(all_titles): print i + 1, sys.stdout.flush() authors = random.randint(1, max_authors) authors = [random.choice(all_authors) for i in xrange(authors)] tags = random.randint(0, max_tags) tags = [random.choice(all_tags) for i in xrange(tags)] from calibre.ebooks.metadata.book.base import Metadata mi = Metadata(title, authors) mi.tags = tags testdb.import_book(mi, []) t = time.time() - start print "\nGenerated", num_of_records, "records in:", t, "seconds" print "Time per record:", t / float(num_of_records)
def parse(self, xml_detail): data = xml_detail.split('\n')[1].split("|") self.log(data) title = data[1] authors = [data[0]] comments = data[13] isbn = data[3] publisher = data[6] pub_date_tmp = data[34].split('-') pub_date = datetime.datetime(int(pub_date_tmp[0]), int(pub_date_tmp[1]), int(pub_date_tmp[2]), tzinfo=utc_tz) if isbn is not None: isbn_tmp = re.sub("-", "", isbn) cover = "%s/images/covers/%s.jpg"%(self.plugin.BASE_URL, isbn_tmp) else: cover = None if title is not None and authors is not None: mi = Metadata(title, authors) mi.languages = {'ces'} mi.comments = as_unicode(comments) mi.identifiers = {self.plugin.name:self.ident} mi.publisher = publisher mi.pubdate = pub_date mi.isbn = isbn mi.cover_url = cover if cover: self.plugin.cache_identifier_to_cover_url(self.ident, cover) return mi else: return None
def convert(self, oeb_book, output, input_plugin, opts, log): self.epub_output_plugin.convert(oeb_book, output, input_plugin, opts, log) container = KEPubContainer(output, default_log) if container.is_drm_encumbered: return # Write the details file o = { 'kepub_output_version': ".".join([str(n) for n in self.version]), 'kepub_output_currenttime': datetime.utcnow().ctime() } kte_data_file = self.temporary_file('_KePubOutputPluginInfo') kte_data_file.write(json.dumps(o)) kte_data_file.close() container.copy_file_to_container(kte_data_file.name, name='plugininfo.kte', mt='application/json') title = container.opf_xpath("./opf:metadata/dc:title/text()") if len(title) > 0: title = title[0] else: title = NULL_VALUES['title'] authors = container.opf_xpath( './opf:metadata/dc:creator[@opf:role="aut"]/text()') if len(authors) < 1: authors = NULL_VALUES['authors'] mi = Metadata(title, authors) language = container.opf_xpath("./opf:metadata/dc:language/text()") if len(language) > 0: mi.languages = language language = language[0] else: mi.languages = NULL_VALUES['languages'] language = NULL_VALUES['language'] mi.language modify_epub(container, output, metadata=mi, opts={ 'clean_markup': opts.kepub_clean_markup, 'hyphenate': opts.kepub_hyphenate, 'no-hyphens': opts.kepub_disable_hyphenation, 'replace_lang': opts.kepub_replace_lang, 'smarten_punctuation': False, 'extended_kepub_features': True })
def to_metadata(browser, log, entry_, timeout): # {{{ from lxml import etree XPath = partial(etree.XPath, namespaces=NAMESPACES) # total_results = XPath('//openSearch:totalResults') # start_index = XPath('//openSearch:startIndex') # items_per_page = XPath('//openSearch:itemsPerPage') entry = XPath('//atom:entry') entry_id = XPath('descendant::atom:id') creator = XPath('descendant::dc:creator') identifier = XPath('descendant::dc:identifier') title = XPath('descendant::dc:title') date = XPath('descendant::dc:date') publisher = XPath('descendant::dc:publisher') subject = XPath('descendant::dc:subject') description = XPath('descendant::dc:description') language = XPath('descendant::dc:language') rating = XPath('descendant::gd:rating[@average]') def get_text(extra, x): try: ans = x(extra) if ans: ans = ans[0].text if ans and ans.strip(): return ans.strip() except: log.exception('Programming error:') return None id_url = entry_id(entry_)[0].text google_id = id_url.split('/')[-1] title_ = ': '.join([x.text for x in title(entry_)]).strip() authors = [x.text.strip() for x in creator(entry_) if x.text] if not authors: authors = [_('Unknown')] if not id_url or not title: # Silently discard this entry return None mi = Metadata(title_, authors) mi.identifiers = {'google': google_id} try: raw = get_details(browser, id_url, timeout) feed = etree.fromstring( xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0]) extra = entry(feed)[0] except: log.exception('Failed to get additional details for', mi.title) return mi mi.comments = get_text(extra, description) lang = canonicalize_lang(get_text(extra, language)) if lang: mi.language = lang mi.publisher = get_text(extra, publisher) # ISBN isbns = [] for x in identifier(extra): t = str(x.text).strip() if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'): if t[:5].upper() == 'ISBN:': t = check_isbn(t[5:]) if t: isbns.append(t) if isbns: mi.isbn = sorted(isbns, key=len)[-1] mi.all_isbns = isbns # Tags try: btags = [x.text for x in subject(extra) if x.text] tags = [] for t in btags: atags = [y.strip() for y in t.split('/')] for tag in atags: if tag not in tags: tags.append(tag) except: log.exception('Failed to parse tags:') tags = [] if tags: mi.tags = [x.replace(',', ';') for x in tags] # pubdate pubdate = get_text(extra, date) if pubdate: from calibre.utils.date import parse_date, utcnow try: default = utcnow().replace(day=15) mi.pubdate = parse_date(pubdate, assume_utc=True, default=default) except: log.error('Failed to parse pubdate %r' % pubdate) # Ratings for x in rating(extra): try: mi.rating = float(x.get('average')) if mi.rating > 5: mi.rating /= 2 except: log.exception('Failed to parse rating') # Cover mi.has_google_cover = None for x in extra.xpath( '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]' ): mi.has_google_cover = x.get('href') break return mi
def metadata_from_xmp_packet(raw_bytes): root = parse_xmp_packet(raw_bytes) mi = Metadata(_('Unknown')) title = first_alt('//dc:title', root) if title: if title.startswith(r'\376\377'): # corrupted XMP packet generated by Nitro PDF. See # https://bugs.launchpad.net/calibre/+bug/1541981 raise ValueError( 'Corrupted XMP metadata packet detected, probably generated by Nitro PDF' ) mi.title = title authors = multiple_sequences('//dc:creator', root) if authors: mi.authors = [au for aus in authors for au in string_to_authors(aus)] tags = multiple_sequences('//dc:subject', root) or multiple_sequences( '//pdf:Keywords', root) if tags: mi.tags = tags comments = first_alt('//dc:description', root) if comments: mi.comments = comments publishers = multiple_sequences('//dc:publisher', root) if publishers: mi.publisher = publishers[0] try: pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False) except: pass else: mi.pubdate = pubdate bkp = first_simple('//xmp:CreatorTool', root) if bkp: mi.book_producer = bkp md = safe_parse_date(first_simple('//xmp:MetadataDate', root)) mod = safe_parse_date(first_simple('//xmp:ModifyDate', root)) fd = more_recent(md, mod) if fd is not None: mi.metadata_date = fd rating = first_simple('//calibre:rating', root) if rating is not None: try: rating = float(rating) if 0 <= rating <= 10: mi.rating = rating except (ValueError, TypeError): pass series, series_index = read_series(root) if series: mi.series, mi.series_index = series, series_index for x in ('title_sort', 'author_sort'): for elem in XPath('//calibre:' + x)(root): val = read_simple_property(elem) if val: setattr(mi, x, val) break for x in ('author_link_map', 'user_categories'): val = first_simple('//calibre:' + x, root) if val: try: setattr(mi, x, json.loads(val)) except: pass languages = multiple_sequences('//dc:language', root) if languages: languages = list(filter(None, map(canonicalize_lang, languages))) if languages: mi.languages = languages identifiers = {} for xmpid in XPath('//xmp:Identifier')(root): for scheme, value in read_xmp_identifers(xmpid): if scheme and value: identifiers[scheme.lower()] = value for namespace in ('prism', 'pdfx'): for scheme in KNOWN_ID_SCHEMES: if scheme not in identifiers: val = first_simple(f'//{namespace}:{scheme}', root) scheme = scheme.lower() if scheme == 'isbn': val = check_isbn(val) elif scheme == 'doi': val = check_doi(val) if val: identifiers[scheme] = val # Check Dublin Core for recognizable identifier types for scheme, check_func in iteritems({ 'doi': check_doi, 'isbn': check_isbn }): if scheme not in identifiers: val = check_func(first_simple('//dc:identifier', root)) if val: identifiers['doi'] = val if identifiers: mi.set_identifiers(identifiers) read_user_metadata(mi, root) return mi
class Book(Book_): def __init__(self, prefix, lpath, title=None, authors=None, mime=None, date=None, ContentType=None, thumbnail_name=None, size=None, other=None): from calibre.utils.date import parse_date # debug_print('Book::__init__ - title=', title) show_debug = title is not None and title.lower().find("xxxxx") >= 0 if other is not None: other.title = title other.published_date = date if show_debug: debug_print("Book::__init__ - title=", title, 'authors=', authors) debug_print("Book::__init__ - other=", other) super(Book, self).__init__(prefix, lpath, size, other) if title is not None and len(title) > 0: self.title = title if authors is not None and len(authors) > 0: self.authors_from_string(authors) if self.author_sort is None or self.author_sort == "Unknown": self.author_sort = author_to_author_sort(authors) self.mime = mime self.size = size # will be set later if None if ContentType == '6' and date is not None: try: self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") except: try: self.datetime = time.strptime(date.split('+')[0], "%Y-%m-%dT%H:%M:%S") except: try: self.datetime = time.strptime(date.split('+')[0], "%Y-%m-%d") except: try: self.datetime = parse_date(date, assume_utc=True).timetuple() except: try: self.datetime = time.gmtime(os.path.getctime(self.path)) except: self.datetime = time.gmtime() self.kobo_metadata = Metadata(title, self.authors) self.contentID = None self.current_shelves = [] self.kobo_collections = [] self.can_put_on_shelves = True self.kobo_series = None self.kobo_series_number = None # Kobo stores the series number as string. And it can have a leading "#". self.kobo_subtitle = None if thumbnail_name is not None: self.thumbnail = ImageWrapper(thumbnail_name) if show_debug: debug_print("Book::__init__ end - self=", self) debug_print("Book::__init__ end - title=", title, 'authors=', authors) @property def is_sideloaded(self): # If we don't have a content Id, we don't know what type it is. return self.contentID and self.contentID.startswith("file") @property def is_purchased_kepub(self): return self.contentID and not self.contentID.startswith("file") def __unicode__(self): ''' A string representation of this object, suitable for printing to console ''' ans = [u"Kobo metadata:"] def fmt(x, y): ans.append(u'%-20s: %s'%(unicode_type(x), unicode_type(y))) if self.contentID: fmt('Content ID', self.contentID) if self.kobo_series: fmt('Kobo Series', self.kobo_series + ' #%s'%self.kobo_series_number) if self.kobo_subtitle: fmt('Subtitle', self.kobo_subtitle) if self.mime: fmt('MimeType', self.mime) ans = u'\n'.join(ans) + u"\n" + self.kobo_metadata.__unicode__() return super(Book,self).__unicode__() + u"\n" + ans
def parse_details(self, raw, root): asin = parse_asin(root, self.log, self.url) if self.testing: import tempfile, uuid with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_', suffix='.html', delete=False) as f: f.write(raw) print ('Downloaded html for', asin, 'saved in', f.name) try: title = self.parse_title(root) except: self.log.exception('Error parsing title for url: %r'%self.url) title = None try: authors = self.parse_authors(root) except: self.log.exception('Error parsing authors for url: %r'%self.url) authors = [] if not title or not authors or not asin: self.log.error('Could not find title/authors/asin for %r'%self.url) self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title, authors)) return mi = Metadata(title, authors) idtype = 'amazon' if self.domain == 'com' else 'amazon_'+self.domain mi.set_identifier(idtype, asin) self.amazon_id = asin try: mi.rating = self.parse_rating(root) except: self.log.exception('Error parsing ratings for url: %r'%self.url) try: mi.comments = self.parse_comments(root) except: self.log.exception('Error parsing comments for url: %r'%self.url) try: series, series_index = self.parse_series(root) if series: mi.series, mi.series_index = series, series_index elif self.testing: mi.series, mi.series_index = 'Dummy series for testing', 1 except: self.log.exception('Error parsing series for url: %r'%self.url) try: mi.tags = self.parse_tags(root) except: self.log.exception('Error parsing tags for url: %r'%self.url) try: self.cover_url = self.parse_cover(root, raw) except: self.log.exception('Error parsing cover for url: %r'%self.url) mi.has_cover = bool(self.cover_url) non_hero = tuple(self.selector('div#bookDetails_container_div div#nonHeroSection')) if non_hero: # New style markup try: self.parse_new_details(root, mi, non_hero[0]) except: self.log.exception('Failed to parse new-style book details section') else: pd = root.xpath(self.pd_xpath) if pd: pd = pd[0] try: isbn = self.parse_isbn(pd) if isbn: self.isbn = mi.isbn = isbn except: self.log.exception('Error parsing ISBN for url: %r'%self.url) try: mi.publisher = self.parse_publisher(pd) except: self.log.exception('Error parsing publisher for url: %r'%self.url) try: mi.pubdate = self.parse_pubdate(pd) except: self.log.exception('Error parsing publish date for url: %r'%self.url) try: lang = self.parse_language(pd) if lang: mi.language = lang except: self.log.exception('Error parsing language for url: %r'%self.url) else: self.log.warning('Failed to find product description for url: %r'%self.url) mi.source_relevance = self.relevance if self.amazon_id: if self.isbn: self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id) if self.cover_url: self.plugin.cache_identifier_to_cover_url(self.amazon_id, self.cover_url) self.plugin.clean_downloaded_metadata(mi) self.result_queue.put(mi)
def identify( log, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30): if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None start_time = time.time() plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()] kwargs = { 'title': title, 'authors': authors, 'identifiers': identifiers, 'timeout': timeout, } log('Running identify query with parameters:') log(kwargs) log('Using plugins:', ', '.join([p.name for p in plugins])) log('The log from individual plugins is below') workers = [Worker(p, kwargs, abort) for p in plugins] for w in workers: w.start() first_result_at = None results = {} for p in plugins: results[p] = [] logs = dict([(w.plugin, w.buf) for w in workers]) def get_results(): found = False for w in workers: try: result = w.rq.get_nowait() except Empty: pass else: results[w.plugin].append(result) found = True return found wait_time = msprefs['wait_after_first_identify_result'] while True: time.sleep(0.2) if get_results() and first_result_at is None: first_result_at = time.time() if not is_worker_alive(workers): break if (first_result_at is not None and time.time() - first_result_at > wait_time): log.warn('Not waiting any longer for more results. Still running' ' sources:') for worker in workers: if worker.is_alive(): log.debug('\t' + worker.name) abort.set() break while not abort.is_set() and get_results(): pass sort_kwargs = dict(kwargs) for k in list(sort_kwargs.iterkeys()): if k not in ('title', 'authors', 'identifiers'): sort_kwargs.pop(k) longest, lp = -1, '' for plugin, presults in results.iteritems(): presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) # Throw away lower priority results from the same source that have exactly the same # title and authors as a higher priority result filter_results = set() filtered_results = [] for r in presults: key = (r.title, tuple(r.authors)) if key not in filter_results: filtered_results.append(r) filter_results.add(key) results[plugin] = presults = filtered_results plog = logs[plugin].getvalue().strip() log('\n' + '*' * 30, plugin.name, '*' * 30) log('Request extra headers:', plugin.browser.addheaders) log('Found %d results' % len(presults)) time_spent = getattr(plugin, 'dl_time_spent', None) if time_spent is None: log('Downloading was aborted') longest, lp = -1, plugin.name else: log('Downloading from', plugin.name, 'took', time_spent) if time_spent > longest: longest, lp = time_spent, plugin.name for r in presults: log('\n\n---') log(unicode(r)) if plog: log(plog) log('\n' + '*' * 80) dummy = Metadata(_('Unknown')) for i, result in enumerate(presults): for f in plugin.prefs['ignore_fields']: if ':' not in f: setattr(result, f, getattr(dummy, f)) if f == 'series': result.series_index = dummy.series_index result.relevance_in_source = i result.has_cached_cover_url = (plugin.cached_cover_url_is_reliable and plugin.get_cached_cover_url( result.identifiers) is not None) result.identify_plugin = plugin if msprefs['txt_comments']: if plugin.has_html_comments and result.comments: result.comments = html2text(result.comments) log('The identify phase took %.2f seconds' % (time.time() - start_time)) log('The longest time (%f) was taken by:' % longest, lp) log('Merging results from different sources and finding earliest ', 'publication dates from the worldcat.org service') start_time = time.time() results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % (len(results), time.time() - start_time)) max_tags = msprefs['max_tags'] for r in results: r.tags = r.tags[:max_tags] if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year: r.pubdate = None if msprefs['swap_author_names']: for r in results: def swap_to_ln_fn(a): if ',' in a: return a parts = a.split(None) if len(parts) <= 1: return a surname = parts[-1] return '%s, %s' % (surname, ' '.join(parts[:-1])) r.authors = [swap_to_ln_fn(a) for a in r.authors] return results
def _GoodreadsBook_to_Metadata(self, book): # type: (_GoodreadsBook) -> Metadata """ :param book: _GoodreadsBook: book :return: Metadata: Metadata """ mi = Metadata(book.title, book.authors) mi.source_relevance = 0 mi.set_identifier('goodreads', book.id) if self.prefs['NEVER_REPLACE_ISBN'] and mi.get_identifiers().get( 'isbn'): mi.set_identifier('isbn', '') if book.asin and not self.prefs['NEVER_REPLACE_AMAZONID']: mi.set_identifier('amazon', book.asin) if book.isbn and not self.prefs['NEVER_REPLACE_ISBN']: try: if len(book.isbn) == 10: mi.isbn = check_isbn13(_ISBNConvert.convert(book.isbn)) else: mi.isbn = check_isbn13(book.isbn) except: self.log.error("ISBN CONVERSION ERROR:", book.isbn) self.log.exception() if book.image_url: self.log.info('cache_identifier_to_cover_url:', book.asin, ':', book.image_url) self.cache_identifier_to_cover_url(book.id, book.image_url) if book.publisher: self.log.info('book.publisher is:', book.publisher) mi.publisher = book.publisher if book.pubdate: self.log.info('book.pubdate is:', book.pubdate.strftime('%Y-%m-%d')) mi.pubdate = book.pubdate if book.comments: self.log.info('book.editorial_review is:', book.comments) mi.comments = book.comments tags = self.prefs['ADD_THESE_TAGS'].split(',') tags.extend(book.tags) # tag_mappings = JSONConfig('plugins/GenreMappings')['genreMappings'] # mi.tags = list(set(sorted(filter(lambda x: tag_mappings.get(x, x), tags)))) if book.series: mi.series = book.series self.log.info(u'series:', book.series) if book.series_index: mi.series_index = book.series_index self.log.info(u'series_index:', "{0:.2f}".format(book.series_index)) else: mi.series_index = 0 if book.average_rating: mi.rating = book.average_rating self.clean_downloaded_metadata(mi) return mi
def validate(self, x): from calibre.ebooks.metadata.book.base import Metadata self.book = Metadata('') return self.vformat(x, [], {})
def test_legacy_direct(self): # {{{ 'Test read-only methods that are directly equivalent in the old and new interface' from calibre.ebooks.metadata.book.base import Metadata from datetime import timedelta ndb = self.init_legacy(self.cloned_library) db = self.init_old() newstag = ndb.new_api.get_item_id('tags', 'news') self.assertEqual(dict(db.prefs), dict(ndb.prefs)) for meth, args in { 'find_identical_books': [(Metadata('title one', ['author one']), ), (Metadata('unknown'), ), (Metadata('xxxx'), )], 'get_books_for_category': [('tags', newstag), ('#formats', 'FMT1')], 'get_next_series_num_for': [('A Series One', )], 'get_id_from_uuid': [('ddddd', ), (db.uuid(1, True), )], 'cover': [(0, ), (1, ), (2, )], 'get_author_id': [('author one', ), ('unknown', ), ('xxxxx', )], 'series_id': [(0, ), (1, ), (2, )], 'publisher_id': [(0, ), (1, ), (2, )], '@tags_older_than': [ ('News', None), ('Tag One', None), ('xxxx', None), ('Tag One', None, 'News'), ('News', None, 'xxxx'), ('News', None, None, ['xxxxxxx']), ('News', None, 'Tag One', ['Author Two', 'Author One']), ('News', timedelta(0), None, None), ('News', timedelta(100000)), ], 'format': [(1, 'FMT1', True), (2, 'FMT1', True), (0, 'xxxxxx')], 'has_format': [(1, 'FMT1', True), (2, 'FMT1', True), (0, 'xxxxxx')], 'sizeof_format': [(1, 'FMT1', True), (2, 'FMT1', True), (0, 'xxxxxx')], '@format_files': [(0, ), (1, ), (2, )], 'formats': [(0, ), (1, ), (2, )], 'max_size': [(0, ), (1, ), (2, )], 'format_hash': [(1, 'FMT1'), (1, 'FMT2'), (2, 'FMT1')], 'author_sort_from_authors': [(['Author One', 'Author Two', 'Unknown'], )], 'has_book': [(Metadata('title one'), ), (Metadata('xxxx1111'), )], 'has_id': [(1, ), (2, ), (3, ), (9999, )], 'id': [ (1, ), (2, ), (0, ), ], 'index': [ (1, ), (2, ), (3, ), ], 'row': [ (1, ), (2, ), (3, ), ], 'is_empty': [()], 'count': [()], 'all_author_names': [()], 'all_tag_names': [()], 'all_series_names': [()], 'all_publisher_names': [()], '!all_authors': [()], '!all_tags2': [()], '@all_tags': [()], '@get_all_identifier_types': [()], '!all_publishers': [()], '!all_titles': [()], '!all_series': [()], 'standard_field_keys': [()], 'all_field_keys': [()], 'searchable_fields': [()], 'search_term_to_field_key': [('author', ), ('tag', )], 'metadata_for_field': [('title', ), ('tags', )], 'sortable_field_keys': [()], 'custom_field_keys': [(True, ), (False, )], '!get_usage_count_by_id': [('authors', ), ('tags', ), ('series', ), ('publisher', ), ('#tags', ), ('languages', )], 'get_field': [(1, 'title'), (2, 'tags'), (0, 'rating'), (1, 'authors'), (2, 'series'), (1, '#tags')], 'all_formats': [()], 'get_authors_with_ids': [()], '!get_tags_with_ids': [()], '!get_series_with_ids': [()], '!get_publishers_with_ids': [()], '!get_ratings_with_ids': [()], '!get_languages_with_ids': [()], 'tag_name': [(3, )], 'author_name': [(3, )], 'series_name': [(3, )], 'authors_sort_strings': [(0, ), (1, ), (2, )], 'author_sort_from_book': [(0, ), (1, ), (2, )], 'authors_with_sort_strings': [(0, ), (1, ), (2, )], 'book_on_device_string': [(1, ), (2, ), (3, )], 'books_in_series_of': [(0, ), (1, ), (2, )], 'books_with_same_title': [(Metadata(db.title(0)), ), (Metadata(db.title(1)), ), (Metadata('1234'), )], }.iteritems(): fmt = lambda x: x if meth[0] in {'!', '@'}: fmt = {'!': dict, '@': frozenset}[meth[0]] meth = meth[1:] elif meth == 'get_authors_with_ids': fmt = lambda val: {x[0]: tuple(x[1:]) for x in val} for a in args: self.assertEqual( fmt(getattr(db, meth)(*a)), fmt(getattr(ndb, meth)(*a)), 'The method: %s() returned different results for argument %s' % (meth, a)) def f( x, y ): # get_top_level_move_items is broken in the old db on case-insensitive file systems x.discard('metadata_db_prefs_backup.json') return x, y self.assertEqual(f(*db.get_top_level_move_items()), f(*ndb.get_top_level_move_items())) d1, d2 = BytesIO(), BytesIO() db.copy_cover_to(1, d1, True) ndb.copy_cover_to(1, d2, True) self.assertTrue(d1.getvalue() == d2.getvalue()) d1, d2 = BytesIO(), BytesIO() db.copy_format_to(1, 'FMT1', d1, True) ndb.copy_format_to(1, 'FMT1', d2, True) self.assertTrue(d1.getvalue() == d2.getvalue()) old = db.get_data_as_dict(prefix='test-prefix') new = ndb.get_data_as_dict(prefix='test-prefix') for o, n in zip(old, new): o = { type('')(k) if isinstance(k, bytes) else k: set(v) if isinstance(v, list) else v for k, v in o.iteritems() } n = { k: set(v) if isinstance(v, list) else v for k, v in n.iteritems() } self.assertEqual(o, n) ndb.search('title:Unknown') db.search('title:Unknown') self.assertEqual(db.row(3), ndb.row(3)) self.assertRaises(ValueError, ndb.row, 2) self.assertRaises(ValueError, db.row, 2) db.close()
def reset_info(self): self.show_data(Metadata(_('Unknown')))
def read_metadata_kfx(stream, read_cover=True): ' Read the metadata.kfx file that is found in the sdr book folder for KFX files ' c = Container(stream.read()) m = extract_metadata(c.decode()) # dump_metadata(m) def has(x): return m[x] and m[x][0] def get(x, single=True): ans = m[x] if single: ans = clean_xml_chars(ans[0]) if ans else '' else: ans = [clean_xml_chars(y) for y in ans] return ans title = get('title') or _('Unknown') authors = get('author', False) or [_('Unknown')] auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$') def fix_author(x): if tweaks['author_sort_copy_method'] != 'copy': m = auth_pat.match(x.strip()) if m is not None: return m.group(2) + ' ' + m.group(1) return x unique_authors = [] # remove duplicates while retaining order for f in [fix_author(x) for x in authors]: if f not in unique_authors: unique_authors.append(f) mi = Metadata(title, unique_authors) if has('author'): mi.author_sort = get('author') if has('ASIN'): mi.set_identifier('mobi-asin', get('ASIN')) elif has('content_id'): mi.set_identifier('mobi-asin', get('content_id')) if has('languages'): langs = list( filter(None, (canonicalize_lang(x) for x in get('languages', False)))) if langs: mi.languages = langs if has('issue_date'): try: mi.pubdate = parse_only_date(get('issue_date')) except Exception: pass if has('publisher') and get('publisher') != 'Unknown': mi.publisher = get('publisher') if read_cover and m[COVER_KEY]: try: data = from_base64_bytes(m[COVER_KEY]) fmt, w, h = identify(data) except Exception: w, h, fmt = 0, 0, None if fmt and w > -1 and h > -1: mi.cover_data = (fmt, data) return mi
def mi(self): mi = Metadata(unicode(self.title.text()).strip() or _('Unknown')) mi.authors = string_to_authors(unicode( self.authors.text()).strip()) or [_('Unknown')] mi.languages = self.languages.lang_codes or [get_lang()] return mi
def to_metadata(log,gmetadata,ExHentai_Status): # {{{ title = gmetadata['title'] title_jpn = gmetadata['title_jpn'] tags = gmetadata['tags'] rating = gmetadata['rating'] category = gmetadata['category'] gid = gmetadata['gid'] token = gmetadata['token'] thumb = gmetadata['thumb'] # title if title_jpn: raw_title = title_jpn else: raw_title = title pat1 = re.compile(r'(?P<comments>.*?\[(?P<author>(?:(?!汉化|漢化)[^\[\]])*)\](?:\s*(?:\[[^\(\)]+\]|\([^\[\]\(\)]+\))\s*)*(?P<title>[^\[\]\(\)]+).*)') if re.findall(pat1,raw_title): m = re.search(pat1, raw_title) title_ = m.group('title').strip() author = m.group('author').strip() else: title_ = raw_title.strip() author = 'Unknown' log.exception('Title match failed. Title is %s' % raw_title) authors = [(author)] mi = Metadata(title_, authors) mi.identifiers = {'ehentai':'%s_%s_%d' % (str(gid),str(token),int(ExHentai_Status))} # publisher pat2 = re.compile(r'^\(([^\[\]\(\)]*)\)') if re.findall(pat2, raw_title): publisher = re.search(pat2, raw_title).group(1).strip() mi.publisher = publisher else: mi.publisher = 'Unknown' log.exception('Not Found publisher.') # Tags tags_ = [] for tag in tags: if re.match('language',tag): tag_ = re.sub('language:','',tag) if tag_ != 'translated': mi.language = tag_ else: tags_.append(tag_) # elif re.match('parody|group|character|artist', tag): # log('drop tag %s' % tag) # continue elif not ':' in tag: log('drop tag %s' % tag) continue else: tags_.append(tag) tags_.append(category) mi.tags = tags_ # rating mi.rating = float(rating) # cover mi.has_ehentai_cover = None if thumb: mi.has_ehentai_cover = thumb return mi
def to_metadata(browser, log, entry_, timeout): # {{{ from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.date import parse_date, utcnow from calibre.utils.cleantext import clean_ascii_chars XPath = partial(etree.XPath, namespaces=NAMESPACES) entry = XPath('//atom:entry') entry_id = XPath('descendant::atom:id') title = XPath('descendant::atom:title') description = XPath('descendant::atom:summary') publisher = XPath("descendant::db:attribute[@name='publisher']") isbn = XPath("descendant::db:attribute[@name='isbn13']") date = XPath("descendant::db:attribute[@name='pubdate']") creator = XPath("descendant::db:attribute[@name='author']") booktag = XPath("descendant::db:tag/attribute::name") rating = XPath("descendant::gd:rating/attribute::average") cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href") def get_text(extra, x): try: ans = x(extra) if ans: ans = ans[0].text if ans and ans.strip(): return ans.strip() except: log.exception('Programming error:') return None id_url = entry_id(entry_)[0].text douban_id = id_url.split('/')[-1] title_ = ': '.join([x.text for x in title(entry_)]).strip() authors = [x.text.strip() for x in creator(entry_) if x.text] if not authors: authors = [_('Unknown')] if not id_url or not title: # Silently discard this entry return None mi = Metadata(title_, authors) mi.identifiers = {'douban': douban_id} try: raw = get_details(browser, id_url, timeout) feed = etree.fromstring( xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0]) extra = entry(feed)[0] except: log.exception('Failed to get additional details for', mi.title) return mi mi.comments = get_text(extra, description) mi.publisher = get_text(extra, publisher) # ISBN isbns = [] for x in [t.text for t in isbn(extra)]: if check_isbn(x): isbns.append(x) if isbns: mi.isbn = sorted(isbns, key=len)[-1] mi.all_isbns = isbns # Tags try: btags = [x for x in booktag(extra) if x] tags = [] for t in btags: atags = [y.strip() for y in t.split('/')] for tag in atags: if tag not in tags: tags.append(tag) except: log.exception('Failed to parse tags:') tags = [] if tags: mi.tags = [x.replace(',', ';') for x in tags] # pubdate pubdate = get_text(extra, date) if pubdate: try: default = utcnow().replace(day=15) mi.pubdate = parse_date(pubdate, assume_utc=True, default=default) except: log.error('Failed to parse pubdate %r' % pubdate) # Ratings if rating(extra): try: mi.rating = float(rating(extra)[0]) / 2.0 except: log.exception('Failed to parse rating') mi.rating = 0 # Cover mi.has_douban_cover = None u = cover_url(extra) if u: u = u[0].replace('/spic/', '/lpic/') # If URL contains "book-default", the book doesn't have a cover if u.find('book-default') == -1: mi.has_douban_cover = u return mi
def _metadata(self, baike): from calibre.ebooks.metadata.book.base import Metadata from cStringIO import StringIO info = baike.get_info() print "\n".join("%s:\t%s" % v for v in info.items()) mi = Metadata(info['title']) plat = "網絡小說平台" plat = info.get(u'首發狀態', plat) plat = info.get(u'首發網站', plat) plat = plat.replace(u'首發', '') mi.publisher = info.get(u'連載平台', plat) mi.authors = [info.get(u'作者', u'佚名')] mi.author_sort = mi.authors[0] mi.isbn = BAIKE_ISBN mi.tags = baike.get_tags() mi.pubdate = datetime.datetime.now() mi.timestamp = datetime.datetime.now() mi.cover_url = baike.get_image() mi.comments = re.sub(r'\[\d+\]$', "", baike.get_summary()) mi.website = baike.http.url mi.source = u'百度百科' if self.copy_image: img = StringIO(urlopen(mi.cover_url).read()) img_fmt = mi.cover_url.split(".")[-1] mi.cover_data = (img_fmt, img) if u'完結' in info.get(u'連載狀態', ""): day = re.findall('\d*-\d*-\d*', info[u'連載狀態']) try: mi.pubdate = datetime.datetime.strptime(day[0], '%Y-%m-%d') except: pass return mi
if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx) if __name__ == '__main__': from calibre.ebooks.metadata.book.base import Metadata mi = Metadata('Test book', authors=('Kovid Goyal', )) path = sys.argv[-1] ext = path.rpartition('.')[-1].lower() if ext not in valid_empty_formats: print(('Unsupported format:', ext)) raise SystemExit(1) create_book(mi, path, fmt=ext)
def build_meta(log, issue_id): '''Build metadata record based on comicvine issue_id''' issue = pycomicvine.Issue(issue_id, field_list=[ 'id', 'name', 'volume', 'issue_number', 'person_credits', 'description', 'store_date', 'cover_date']) if not issue or not issue.volume: log.warn('Unable to load Issue(%d)' % issue_id) return None title = '%s #%s' % (issue.volume.name, issue.issue_number) if issue.name: title = title + ': %s' % (issue.name) authors = [p.name for p in issue.person_credits] meta = Metadata(title, authors) meta.series = issue.volume.name meta.series_index = str(issue.issue_number) meta.set_identifier('comicvine', str(issue.id)) meta.set_identifier('comicvine-volume', str(issue.volume.id)) meta.comments = issue.description meta.has_cover = False if issue.volume.publisher: meta.publisher = issue.volume.publisher.name meta.pubdate = issue.store_date or issue.cover_date return meta
def parse(self, raw): from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import parse_only_date, UNDEFINED_DATE from css_selectors import Select root = parse_html(raw) selector = Select(root) sku = next(selector('div.sku.attGroup')) info = sku.getparent() top = info.getparent().getparent() banner = top.find('div') spans = banner.findall('span') title = '' for i, span in enumerate(spans): if i == 0 or '12pt' in span.get('style', ''): title += astext(span) else: break authors = [re.sub(r'\(.*\)', '', x).strip() for x in astext(spans[-1]).split(',')] mi = Metadata(title.strip(), authors) # Identifiers isbns = [check_isbn(x.strip()) for x in astext(sku).split(',')] for isbn in isbns: if isbn: self.plugin.cache_isbn_to_identifier(isbn, self.sku) isbns = sorted(isbns, key=lambda x:len(x) if x else 0, reverse=True) if isbns and isbns[0]: mi.isbn = isbns[0] mi.set_identifier('edelweiss', self.sku) # Tags bisac = tuple(selector('div.bisac.attGroup')) if bisac: bisac = astext(bisac[0]) mi.tags = [x.strip() for x in bisac.split(',')] mi.tags = [t[1:].strip() if t.startswith('&') else t for t in mi.tags] # Publisher pub = tuple(selector('div.supplier.attGroup')) if pub: pub = astext(pub[0]) mi.publisher = pub # Pubdate pub = tuple(selector('div.shipDate.attGroupItem')) if pub: pub = astext(pub[0]) parts = pub.partition(':')[0::2] pub = parts[1] or parts[0] try: if ', Ship Date:' in pub: pub = pub.partition(', Ship Date:')[0] q = parse_only_date(pub, assume_utc=True) if q.year != UNDEFINED_DATE: mi.pubdate = q except: self.log.exception('Error parsing published date: %r'%pub) # Comments comm = '' general = tuple(selector('div#pd-general-overview-content')) if general: q = self.render_comments(general[0]) if q != '<p>No title summary available. </p>': comm += q general = tuple(selector('div#pd-general-contributor-content')) if general: comm += self.render_comments(general[0]) general = tuple(selector('div#pd-general-quotes-content')) if general: comm += self.render_comments(general[0]) if comm: mi.comments = comm # Cover img = tuple(selector('img.title-image[src]')) if img: href = img[0].get('src').replace('jacket_covers/medium/', 'jacket_covers/flyout/') self.plugin.cache_identifier_to_cover_url(self.sku, href) mi.has_cover = self.plugin.cached_identifier_to_cover_url(self.sku) is not None return mi
def get_metadata_(src, encoding=None): # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 if isbytestring(src): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, 'replace') src = src[:150000] # Searching shouldn't take too long comment_tags = parse_comment_tags(src) meta_tags = parse_meta_tags(src) def get(field): ans = comment_tags.get(field, meta_tags.get(field, None)) if ans: ans = ans.strip() if not ans: ans = None return ans # Title title = get('title') if not title: pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE) match = pat.search(src) if match: title = replace_entities(match.group(1)) # Author authors = get('authors') or _('Unknown') # Create MetaInformation with Title and Author mi = Metadata(title or _('Unknown'), string_to_authors(authors)) for field in ('publisher', 'isbn', 'language', 'comments'): val = get(field) if val: setattr(mi, field, val) for field in ('pubdate', 'timestamp'): try: val = parse_date(get(field)) except: pass else: if not is_date_undefined(val): setattr(mi, field, val) # SERIES series = get('series') if series: pat = re.compile(r'\[([.0-9]+)\]$') match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), '').strip() mi.series = series if series_index is None: series_index = get('series_index') try: series_index = float(series_index) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = get('rating') if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2. if mi.rating > 5: mi.rating = 0 except: pass # TAGS tags = get('tags') if tags: tags = [x.strip() for x in tags.split(',') if x.strip()] if tags: mi.tags = tags return mi
def read_metadata(root, ver=None, return_extra_data=False): ans = Metadata(_('Unknown'), [_('Unknown')]) prefixes, refines = read_prefixes(root), read_refines(root) identifiers = read_identifiers(root, prefixes, refines) ids = {} for key, vals in iteritems(identifiers): if key == 'calibre': ans.application_id = vals[0] elif key == 'uuid': ans.uuid = vals[0] else: ids[key] = vals[0] ans.set_identifiers(ids) ans.title = read_title(root, prefixes, refines) or ans.title ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort ans.languages = read_languages(root, prefixes, refines) or ans.languages auts, aus = [], [] for a in read_authors(root, prefixes, refines): auts.append(a.name), aus.append(a.sort) ans.authors = auts or ans.authors ans.author_sort = authors_to_string(aus) or ans.author_sort bkp = read_book_producers(root, prefixes, refines) if bkp: if bkp[0]: ans.book_producer = bkp[0] pd = read_pubdate(root, prefixes, refines) if not is_date_undefined(pd): ans.pubdate = pd ts = read_timestamp(root, prefixes, refines) if not is_date_undefined(ts): ans.timestamp = ts lm = read_last_modified(root, prefixes, refines) if not is_date_undefined(lm): ans.last_modified = lm ans.comments = read_comments(root, prefixes, refines) or ans.comments ans.publisher = read_publisher(root, prefixes, refines) or ans.publisher ans.tags = read_tags(root, prefixes, refines) or ans.tags ans.rating = read_rating(root, prefixes, refines) or ans.rating s, si = read_series(root, prefixes, refines) if s: ans.series, ans.series_index = s, si ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories for name, fm in iteritems((read_user_metadata(root, prefixes, refines) or {})): ans.set_user_metadata(name, fm) if return_extra_data: ans = ans, ver, read_raster_cover(root, prefixes, refines), first_spine_item( root, prefixes, refines) return ans
def merge(self, results, min_year, do_asr=True): ans = Metadata(_('Unknown')) # We assume the shortest title has the least cruft in it ans.title = self.length_merge('title', results, null_value=ans.title) # No harm in having extra authors, maybe something useful like an # editor or translator ans.authors = self.length_merge('authors', results, null_value=ans.authors, shortest=False) # We assume the shortest publisher has the least cruft in it ans.publisher = self.length_merge('publisher', results, null_value=ans.publisher) # We assume the smallest set of tags has the least cruft in it ans.tags = self.length_merge('tags', results, null_value=ans.tags, shortest=msprefs['fewer_tags']) # We assume the longest series has the most info in it ans.series = self.length_merge('series', results, null_value=ans.series, shortest=False) for r in results: if r.series and r.series == ans.series: ans.series_index = r.series_index break # Average the rating over all sources ratings = [] for r in results: rating = r.rating if rating and rating > 0 and rating <= 5: ratings.append(rating) if ratings: ans.rating = sum(ratings) / len(ratings) # Smallest language is likely to be valid ans.language = self.length_merge('language', results, null_value=ans.language) # Choose longest comments ans.comments = self.length_merge('comments', results, null_value=ans.comments, shortest=False) # Published date if min_year: for r in results: year = getattr(r.pubdate, 'year', None) if year == min_year: ans.pubdate = r.pubdate break if getattr(ans.pubdate, 'year', None) == min_year: min_date = datetime(min_year, ans.pubdate.month, ans.pubdate.day, tzinfo=utc_tz) else: min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) ans.pubdate = min_date else: min_date = datetime(3001, 1, 1, tzinfo=utc_tz) for r in results: if r.pubdate is not None: candidate = as_utc(r.pubdate) if candidate < min_date: min_date = candidate if min_date.year < 3000: ans.pubdate = min_date # Identifiers for r in results: ans.identifiers.update(r.identifiers) # Cover URL ans.has_cached_cover_url = bool( [r for r in results if getattr(r, 'has_cached_cover_url', False)]) # Merge any other fields with no special handling (random merge) touched_fields = set() for r in results: if hasattr(r, 'identify_plugin'): touched_fields |= r.identify_plugin.touched_fields for f in touched_fields: if f.startswith('identifier:') or not ans.is_null(f): continue setattr(ans, f, self.random_merge(f, results, null_value=getattr(ans, f))) if do_asr: avg = [x.relevance_in_source for x in results] avg = sum(avg) / len(avg) ans.average_source_relevance = avg return ans
def __init__(self, parent, text, mi=None, fm=None, color_field=None, icon_field_key=None, icon_rule_kind=None): QDialog.__init__(self, parent) Ui_TemplateDialog.__init__(self) self.setupUi(self) self.coloring = color_field is not None self.iconing = icon_field_key is not None cols = [] if fm is not None: for key in sorted(displayable_columns(fm), key=lambda (k): sort_key(fm[k]['name']) if k != color_row_key else 0): if key == color_row_key and not self.coloring: continue from calibre.gui2.preferences.coloring import all_columns_string name = all_columns_string if key == color_row_key else fm[key][ 'name'] if name: cols.append((name, key)) self.color_layout.setVisible(False) self.icon_layout.setVisible(False) if self.coloring: self.color_layout.setVisible(True) for n1, k1 in cols: self.colored_field.addItem(n1, k1) self.colored_field.setCurrentIndex( self.colored_field.findData(color_field)) elif self.iconing: self.icon_layout.setVisible(True) for n1, k1 in cols: self.icon_field.addItem(n1, k1) self.icon_file_names = [] d = os.path.join(config_dir, 'cc_icons') if os.path.exists(d): for icon_file in os.listdir(d): icon_file = icu_lower(icon_file) if os.path.exists(os.path.join(d, icon_file)): if icon_file.endswith('.png'): self.icon_file_names.append(icon_file) self.icon_file_names.sort(key=sort_key) self.update_filename_box() dex = 0 from calibre.gui2.preferences.coloring import icon_rule_kinds for i, tup in enumerate(icon_rule_kinds): txt, val = tup self.icon_kind.addItem(txt, userData=QVariant(val)) if val == icon_rule_kind: dex = i self.icon_kind.setCurrentIndex(dex) self.icon_field.setCurrentIndex( self.icon_field.findData(icon_field_key)) if mi: self.mi = mi else: self.mi = Metadata(_('Title'), [_('Author')]) self.mi.author_sort = _('Author Sort') self.mi.series = _('Series') self.mi.series_index = 3 self.mi.rating = 4.0 self.mi.tags = [_('Tag 1'), _('Tag 2')] self.mi.languages = ['eng'] if fm is not None: self.mi.set_all_user_metadata(fm.custom_field_metadata()) else: # No field metadata. Grab a copy from the current library so # that we can validate any custom column names. The values for # the columns will all be empty, which in some very unusual # cases might cause formatter errors. We can live with that. from calibre.gui2.ui import get_gui self.mi.set_all_user_metadata(get_gui( ).current_db.new_api.field_metadata.custom_field_metadata()) # Remove help icon on title bar icon = self.windowIcon() self.setWindowFlags(self.windowFlags() & (~Qt.WindowContextHelpButtonHint)) self.setWindowIcon(icon) self.last_text = '' self.highlighter = TemplateHighlighter(self.textbox.document()) self.textbox.cursorPositionChanged.connect(self.text_cursor_changed) self.textbox.textChanged.connect(self.textbox_changed) self.textbox.setTabStopWidth(10) self.source_code.setTabStopWidth(10) self.documentation.setReadOnly(True) self.source_code.setReadOnly(True) if text is not None: self.textbox.setPlainText(text) self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK')) self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel')) self.color_copy_button.clicked.connect(self.color_to_clipboard) self.filename_button.clicked.connect(self.filename_button_clicked) self.icon_copy_button.clicked.connect(self.icon_to_clipboard) try: with open(P('template-functions.json'), 'rb') as f: self.builtin_source_dict = json.load(f, encoding='utf-8') except: self.builtin_source_dict = {} self.funcs = formatter_functions().get_functions() self.builtins = formatter_functions().get_builtins() func_names = sorted(self.funcs) self.function.clear() self.function.addItem('') self.function.addItems(func_names) self.function.setCurrentIndex(0) self.function.currentIndexChanged[str].connect(self.function_changed) self.textbox_changed() self.rule = (None, '') tt = _('Template language tutorial') self.template_tutorial.setText( '<a href="http://manual.calibre-ebook.com/template_lang.html">' '%s</a>' % tt) tt = _('Template function reference') self.template_func_reference.setText( '<a href="http://manual.calibre-ebook.com/template_ref.html">' '%s</a>' % tt) self.font_size_box.setValue(gprefs['gpm_template_editor_font_size']) self.font_size_box.valueChanged.connect(self.font_size_changed)
def convert(self, oeb_book, output, input_plugin, opts, logger): """Convert from calibre's internal format to KePub.""" common.log.debug("Running ePub conversion") self.epub_output_plugin.convert( oeb_book, output, input_plugin, opts, common.log ) common.log.debug("Done ePub conversion") container = KEPubContainer(output, common.log, opts.kepub_clean_markup) if container.is_drm_encumbered: common.log.error("DRM-encumbered container, skipping conversion") return # Write the details file o = { "kepub_output_version": ".".join([str(n) for n in self.version]), "kepub_output_currenttime": datetime.utcnow().ctime(), } kte_data_file = self.temporary_file("_KePubOutputPluginInfo") kte_data_file.write(json.dumps(o).encode("UTF-8")) kte_data_file.close() container.copy_file_to_container( kte_data_file.name, name="plugininfo.kte", mt="application/json" ) title = container.opf_xpath("./opf:metadata/dc:title/text()") if len(title) > 0: title = title[0] else: title = NULL_VALUES["title"] authors = container.opf_xpath( './opf:metadata/dc:creator[@opf:role="aut"]/text()' ) if len(authors) < 1: authors = NULL_VALUES["authors"] mi = Metadata(title, authors) language = container.opf_xpath("./opf:metadata/dc:language/text()") if len(language) > 0: mi.languages = language language = language[0] else: mi.languages = NULL_VALUES["languages"] language = NULL_VALUES["language"] try: common.modify_epub( container, output, metadata=mi, opts={ "hyphenate": opts.kepub_hyphenate, "hyphen_min_chars": opts.kepub_hyphenate_chars, "hyphen_min_chars_before": opts.kepub_hyphenate_chars_before, "hyphen_min_chars_after": opts.kepub_hyphenate_chars_after, "hyphen_limit_lines": opts.kepub_hyphenate_limit_lines, "no-hyphens": opts.kepub_disable_hyphenation, "smarten_punctuation": False, "extended_kepub_features": True, }, ) except Exception: common.log.exception("Failed converting!") raise
def __init__(self, prefix, lpath, title=None, authors=None, mime=None, date=None, ContentType=None, thumbnail_name=None, size=None, other=None): from calibre.utils.date import parse_date # debug_print('Book::__init__ - title=', title) show_debug = title is not None and title.lower().find("xxxxx") >= 0 if other is not None: other.title = title other.published_date = date if show_debug: debug_print("Book::__init__ - title=", title, 'authors=', authors) debug_print("Book::__init__ - other=", other) super(Book, self).__init__(prefix, lpath, size, other) if title is not None and len(title) > 0: self.title = title if authors is not None and len(authors) > 0: self.authors_from_string(authors) if self.author_sort is None or self.author_sort == "Unknown": self.author_sort = author_to_author_sort(authors) self.mime = mime self.size = size # will be set later if None if ContentType == '6' and date is not None: try: self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") except: try: self.datetime = time.strptime( date.split('+')[0], "%Y-%m-%dT%H:%M:%S") except: try: self.datetime = time.strptime( date.split('+')[0], "%Y-%m-%d") except: try: self.datetime = parse_date( date, assume_utc=True).timetuple() except: try: self.datetime = time.gmtime( os.path.getctime(self.path)) except: self.datetime = time.gmtime() self.kobo_metadata = Metadata(title, self.authors) self.contentID = None self.current_shelves = [] self.kobo_collections = [] self.can_put_on_shelves = True self.kobo_series = None self.kobo_series_number = None # Kobo stores the series number as string. And it can have a leading "#". self.kobo_subtitle = None if thumbnail_name is not None: self.thumbnail = ImageWrapper(thumbnail_name) if show_debug: debug_print("Book::__init__ end - self=", self) debug_print("Book::__init__ end - title=", title, 'authors=', authors)
def sample_results(self): m1 = Metadata('The Great Gatsby', ['Francis Scott Fitzgerald']) m2 = Metadata( 'The Great Gatsby - An extra long title to test resizing', ['F. Scott Fitzgerald']) m1.has_cached_cover_url = True m2.has_cached_cover_url = False m1.comments = 'Some comments ' * 10 m1.tags = ['tag%d' % i for i in range(20)] m1.rating = 4.4 m1.language = 'en' m2.language = 'fr' m1.pubdate = utcnow() m2.pubdate = fromordinal(1000000) m1.publisher = 'Publisher 1' m2.publisher = 'Publisher 2' return [m1, m2]
def test_legacy_adding_books(self): # {{{ 'Test various adding/deleting books methods' from calibre.ebooks.metadata.book.base import Metadata from calibre.ptempfile import TemporaryFile legacy, old = self.init_legacy(self.cloned_library), self.init_old( self.cloned_library) mi = Metadata('Added Book0', authors=('Added Author', )) with TemporaryFile(suffix='.aff') as name: with open(name, 'wb') as f: f.write(b'xxx') T = partial(ET, 'add_books', ([name], ['AFF'], [mi]), old=old, legacy=legacy) T()(self) book_id = T(kwargs={'return_ids': True})(self)[1][0] self.assertEqual(legacy.new_api.formats(book_id), ('AFF', )) T(kwargs={'add_duplicates': False})(self) mi.title = 'Added Book1' mi.uuid = 'uuu' T = partial(ET, 'import_book', (mi, [name]), old=old, legacy=legacy) book_id = T()(self) self.assertNotEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) book_id = T(kwargs={'preserve_uuid': True})(self) self.assertEqual(legacy.uuid(book_id, index_is_id=True), old.uuid(book_id, index_is_id=True)) self.assertEqual(legacy.new_api.formats(book_id), ('AFF', )) T = partial(ET, 'add_format', old=old, legacy=legacy) T((0, 'AFF', BytesIO(b'fffff')))(self) T((0, 'AFF', BytesIO(b'fffff')))(self) T((0, 'AFF', BytesIO(b'fffff')), {'replace': True})(self) with TemporaryFile(suffix='.opf') as name: with open(name, 'wb') as f: f.write(b'zzzz') T = partial(ET, 'import_book', (mi, [name]), old=old, legacy=legacy) book_id = T()(self) self.assertFalse(legacy.new_api.formats(book_id)) mi.title = 'Added Book2' T = partial(ET, 'create_book_entry', (mi, ), old=old, legacy=legacy) T() T({'add_duplicates': False}) T({'force_id': 1000}) with TemporaryFile(suffix='.txt') as name: with open(name, 'wb') as f: f.write(b'tttttt') bid = legacy.add_catalog(name, 'My Catalog') self.assertEqual(old.add_catalog(name, 'My Catalog'), bid) cache = legacy.new_api self.assertEqual(cache.formats(bid), ('TXT', )) self.assertEqual(cache.field_for('title', bid), 'My Catalog') self.assertEqual(cache.field_for('authors', bid), ('calibre', )) self.assertEqual(cache.field_for('tags', bid), (_('Catalog'), )) self.assertTrue(bid < legacy.add_catalog(name, 'Something else')) self.assertEqual(legacy.add_catalog(name, 'My Catalog'), bid) self.assertEqual(old.add_catalog(name, 'My Catalog'), bid) bid = legacy.add_news( name, { 'title': 'Events', 'add_title_tag': True, 'custom_tags': ('one', 'two') }) self.assertEqual(cache.formats(bid), ('TXT', )) self.assertEqual(cache.field_for('authors', bid), ('calibre', )) self.assertEqual(cache.field_for('tags', bid), (_('News'), 'Events', 'one', 'two')) self.assertTrue(legacy.cover(1, index_is_id=True)) origcov = legacy.cover(1, index_is_id=True) self.assertTrue(legacy.has_cover(1)) legacy.remove_cover(1) self.assertFalse(legacy.has_cover(1)) self.assertFalse(legacy.cover(1, index_is_id=True)) legacy.set_cover(3, origcov) self.assertEqual(legacy.cover(3, index_is_id=True), origcov) self.assertTrue(legacy.has_cover(3)) self.assertTrue(legacy.format(1, 'FMT1', index_is_id=True)) legacy.remove_format(1, 'FMT1', index_is_id=True) self.assertIsNone(legacy.format(1, 'FMT1', index_is_id=True)) legacy.delete_book(1) old.delete_book(1) self.assertNotIn(1, legacy.all_ids()) legacy.dump_metadata((2, 3)) old.close()
class TemplateDialog(QDialog, Ui_TemplateDialog): def __init__(self, parent, text, mi=None, fm=None, color_field=None, icon_field_key=None, icon_rule_kind=None): QDialog.__init__(self, parent) Ui_TemplateDialog.__init__(self) self.setupUi(self) self.coloring = color_field is not None self.iconing = icon_field_key is not None cols = [] if fm is not None: for key in sorted(displayable_columns(fm), key=lambda (k): sort_key(fm[k]['name']) if k != color_row_key else 0): if key == color_row_key and not self.coloring: continue from calibre.gui2.preferences.coloring import all_columns_string name = all_columns_string if key == color_row_key else fm[key][ 'name'] if name: cols.append((name, key)) self.color_layout.setVisible(False) self.icon_layout.setVisible(False) if self.coloring: self.color_layout.setVisible(True) for n1, k1 in cols: self.colored_field.addItem(n1, k1) self.colored_field.setCurrentIndex( self.colored_field.findData(color_field)) elif self.iconing: self.icon_layout.setVisible(True) for n1, k1 in cols: self.icon_field.addItem(n1, k1) self.icon_file_names = [] d = os.path.join(config_dir, 'cc_icons') if os.path.exists(d): for icon_file in os.listdir(d): icon_file = icu_lower(icon_file) if os.path.exists(os.path.join(d, icon_file)): if icon_file.endswith('.png'): self.icon_file_names.append(icon_file) self.icon_file_names.sort(key=sort_key) self.update_filename_box() dex = 0 from calibre.gui2.preferences.coloring import icon_rule_kinds for i, tup in enumerate(icon_rule_kinds): txt, val = tup self.icon_kind.addItem(txt, userData=QVariant(val)) if val == icon_rule_kind: dex = i self.icon_kind.setCurrentIndex(dex) self.icon_field.setCurrentIndex( self.icon_field.findData(icon_field_key)) if mi: self.mi = mi else: self.mi = Metadata(_('Title'), [_('Author')]) self.mi.author_sort = _('Author Sort') self.mi.series = _('Series') self.mi.series_index = 3 self.mi.rating = 4.0 self.mi.tags = [_('Tag 1'), _('Tag 2')] self.mi.languages = ['eng'] if fm is not None: self.mi.set_all_user_metadata(fm.custom_field_metadata()) else: # No field metadata. Grab a copy from the current library so # that we can validate any custom column names. The values for # the columns will all be empty, which in some very unusual # cases might cause formatter errors. We can live with that. from calibre.gui2.ui import get_gui self.mi.set_all_user_metadata(get_gui( ).current_db.new_api.field_metadata.custom_field_metadata()) # Remove help icon on title bar icon = self.windowIcon() self.setWindowFlags(self.windowFlags() & (~Qt.WindowContextHelpButtonHint)) self.setWindowIcon(icon) self.last_text = '' self.highlighter = TemplateHighlighter(self.textbox.document()) self.textbox.cursorPositionChanged.connect(self.text_cursor_changed) self.textbox.textChanged.connect(self.textbox_changed) self.textbox.setTabStopWidth(10) self.source_code.setTabStopWidth(10) self.documentation.setReadOnly(True) self.source_code.setReadOnly(True) if text is not None: self.textbox.setPlainText(text) self.buttonBox.button(QDialogButtonBox.Ok).setText(_('&OK')) self.buttonBox.button(QDialogButtonBox.Cancel).setText(_('&Cancel')) self.color_copy_button.clicked.connect(self.color_to_clipboard) self.filename_button.clicked.connect(self.filename_button_clicked) self.icon_copy_button.clicked.connect(self.icon_to_clipboard) try: with open(P('template-functions.json'), 'rb') as f: self.builtin_source_dict = json.load(f, encoding='utf-8') except: self.builtin_source_dict = {} self.funcs = formatter_functions().get_functions() self.builtins = formatter_functions().get_builtins() func_names = sorted(self.funcs) self.function.clear() self.function.addItem('') self.function.addItems(func_names) self.function.setCurrentIndex(0) self.function.currentIndexChanged[str].connect(self.function_changed) self.textbox_changed() self.rule = (None, '') tt = _('Template language tutorial') self.template_tutorial.setText( '<a href="http://manual.calibre-ebook.com/template_lang.html">' '%s</a>' % tt) tt = _('Template function reference') self.template_func_reference.setText( '<a href="http://manual.calibre-ebook.com/template_ref.html">' '%s</a>' % tt) self.font_size_box.setValue(gprefs['gpm_template_editor_font_size']) self.font_size_box.valueChanged.connect(self.font_size_changed) def font_size_changed(self, toWhat): gprefs['gpm_template_editor_font_size'] = toWhat self.highlighter.initializeFormats() self.highlighter.rehighlight() def filename_button_clicked(self): try: path = choose_files(self, 'choose_category_icon', _('Select Icon'), filters=[('Images', ['png', 'gif', 'jpg', 'jpeg'])], all_files=False, select_only_single_file=True) if path: icon_path = path[0] icon_name = sanitize_file_name_unicode( os.path.splitext(os.path.basename(icon_path))[0] + '.png') if icon_name not in self.icon_file_names: self.icon_file_names.append(icon_name) self.update_filename_box() try: p = QIcon(icon_path).pixmap(QSize(128, 128)) d = os.path.join(config_dir, 'cc_icons') if not os.path.exists(os.path.join(d, icon_name)): if not os.path.exists(d): os.makedirs(d) with open(os.path.join(d, icon_name), 'wb') as f: f.write(pixmap_to_data(p, format='PNG')) except: traceback.print_exc() self.icon_files.setCurrentIndex( self.icon_files.findText(icon_name)) self.icon_files.adjustSize() except: traceback.print_exc() return def update_filename_box(self): self.icon_files.clear() self.icon_file_names.sort(key=sort_key) self.icon_files.addItem('') self.icon_files.addItems(self.icon_file_names) for i, filename in enumerate(self.icon_file_names): icon = QIcon(os.path.join(config_dir, 'cc_icons', filename)) self.icon_files.setItemIcon(i + 1, icon) def color_to_clipboard(self): app = QApplication.instance() c = app.clipboard() c.setText(unicode(self.color_name.color)) def icon_to_clipboard(self): app = QApplication.instance() c = app.clipboard() c.setText(unicode(self.icon_files.currentText())) def textbox_changed(self): cur_text = unicode(self.textbox.toPlainText()) if self.last_text != cur_text: self.last_text = cur_text self.highlighter.regenerate_paren_positions() self.text_cursor_changed() self.template_value.setText(SafeFormat().safe_format( cur_text, self.mi, _('EXCEPTION: '), self.mi)) def text_cursor_changed(self): cursor = self.textbox.textCursor() position = cursor.position() t = unicode(self.textbox.toPlainText()) if position > 0 and position <= len(t): block_number = cursor.blockNumber() pos_in_block = cursor.positionInBlock() - 1 self.highlighter.check_cursor_pos(t[position - 1], block_number, pos_in_block) def function_changed(self, toWhat): name = unicode(toWhat) self.source_code.clear() self.documentation.clear() if name in self.funcs: self.documentation.setPlainText(self.funcs[name].doc) if name in self.builtins and name in self.builtin_source_dict: self.source_code.setPlainText(self.builtin_source_dict[name]) else: self.source_code.setPlainText(self.funcs[name].program_text) def accept(self): txt = unicode(self.textbox.toPlainText()).rstrip() if self.coloring: if self.colored_field.currentIndex() == -1: error_dialog(self, _('No column chosen'), _('You must specify a column to be colored'), show=True) return if not txt: error_dialog(self, _('No template provided'), _('The template box cannot be empty'), show=True) return self.rule = (unicode( self.colored_field.itemData( self.colored_field.currentIndex()).toString()), txt) elif self.iconing: rt = unicode( self.icon_kind.itemData( self.icon_kind.currentIndex()).toString()) self.rule = (rt, unicode( self.icon_field.itemData( self.icon_field.currentIndex()).toString()), txt) else: self.rule = ('', txt) QDialog.accept(self)
def test_legacy_setters(self): # {{{ 'Test methods that are directly equivalent in the old and new interface' from calibre.ebooks.metadata.book.base import Metadata from calibre.utils.date import now n = now() ndb = self.init_legacy(self.cloned_library) amap = ndb.new_api.get_id_map('authors') sorts = [(aid, 's%d' % aid) for aid in amap] db = self.init_old(self.cloned_library) run_funcs(self, db, ndb, ( ('+format_metadata', 1, 'FMT1', itemgetter('size')), ('+format_metadata', 1, 'FMT2', itemgetter('size')), ('+format_metadata', 2, 'FMT1', itemgetter('size')), ('get_tags', 0), ('get_tags', 1), ('get_tags', 2), ('is_tag_used', 'News'), ('is_tag_used', 'xchkjgfh'), ('bulk_modify_tags', (1, ), ['t1'], ['News']), ('bulk_modify_tags', (2, ), ['t1'], ['Tag One', 'Tag Two']), ('bulk_modify_tags', (3, ), ['t1', 't2', 't3']), (db.clean, ), ('@all_tags', ), ('@tags', 0), ('@tags', 1), ('@tags', 2), ('unapply_tags', 1, ['t1']), ('unapply_tags', 2, ['xxxx']), ('unapply_tags', 3, ['t2', 't3']), (db.clean, ), ('@all_tags', ), ('@tags', 0), ('@tags', 1), ('@tags', 2), ('update_last_modified', (1, ), True, n), ('update_last_modified', (3, ), True, n), ('metadata_last_modified', 1, True), ('metadata_last_modified', 3, True), ('set_sort_field_for_author', sorts[0][0], sorts[0][1]), ('set_sort_field_for_author', sorts[1][0], sorts[1][1]), ('set_sort_field_for_author', sorts[2][0], sorts[2][1]), ('set_link_field_for_author', sorts[0][0], sorts[0][1]), ('set_link_field_for_author', sorts[1][0], sorts[1][1]), ('set_link_field_for_author', sorts[2][0], sorts[2][1]), (db.refresh, ), ('author_sort', 0), ('author_sort', 1), ('author_sort', 2), )) omi = [db.get_metadata(x) for x in (0, 1, 2)] nmi = [ndb.get_metadata(x) for x in (0, 1, 2)] self.assertEqual([x.author_sort_map for x in omi], [x.author_sort_map for x in nmi]) self.assertEqual([x.author_link_map for x in omi], [x.author_link_map for x in nmi]) db.close() ndb = self.init_legacy(self.cloned_library) db = self.init_old(self.cloned_library) run_funcs(self, db, ndb, ( ( 'set_authors', 1, ('author one', ), ), ('set_authors', 2, ('author two', ), True, True, True), ('set_author_sort', 3, 'new_aus'), ('set_comment', 1, ''), ('set_comment', 2, None), ('set_comment', 3, '<p>a comment</p>'), ('set_has_cover', 1, True), ('set_has_cover', 2, True), ('set_has_cover', 3, 1), ('set_identifiers', 2, { 'test': '', 'a': 'b' }), ('set_identifiers', 3, { 'id': '1', 'isbn': '9783161484100' }), ('set_identifiers', 1, {}), ('set_languages', 1, ('en', )), ('set_languages', 2, ()), ('set_languages', 3, ('deu', 'spa', 'fra')), ('set_pubdate', 1, None), ('set_pubdate', 2, '2011-1-7'), ('set_series', 1, 'a series one'), ('set_series', 2, 'another series [7]'), ('set_series', 3, 'a third series'), ('set_publisher', 1, 'publisher two'), ('set_publisher', 2, None), ('set_publisher', 3, 'a third puB'), ('set_rating', 1, 2.3), ('set_rating', 2, 0), ('set_rating', 3, 8), ('set_timestamp', 1, None), ('set_timestamp', 2, '2011-1-7'), ('set_uuid', 1, None), ('set_uuid', 2, 'a test uuid'), ('set_title', 1, 'title two'), ('set_title', 2, None), ('set_title', 3, 'The Test Title'), ('set_tags', 1, ['a1', 'a2'], True), ('set_tags', 2, ['b1', 'tag one'], False, False, False, True), ('set_tags', 3, ['A1']), (db.refresh, ), ('title', 0), ('title', 1), ('title', 2), ('title_sort', 0), ('title_sort', 1), ('title_sort', 2), ('authors', 0), ('authors', 1), ('authors', 2), ('author_sort', 0), ('author_sort', 1), ('author_sort', 2), ('has_cover', 3), ('has_cover', 1), ('has_cover', 2), ('get_identifiers', 0), ('get_identifiers', 1), ('get_identifiers', 2), ('pubdate', 0), ('pubdate', 1), ('pubdate', 2), ('timestamp', 0), ('timestamp', 1), ('timestamp', 2), ('publisher', 0), ('publisher', 1), ('publisher', 2), ('rating', 0), ('+rating', 1, lambda x: x or 0), ('rating', 2), ('series', 0), ('series', 1), ('series', 2), ('series_index', 0), ('series_index', 1), ('series_index', 2), ('uuid', 0), ('uuid', 1), ('uuid', 2), ('isbn', 0), ('isbn', 1), ('isbn', 2), ('@tags', 0), ('@tags', 1), ('@tags', 2), ('@all_tags', ), ('@get_all_identifier_types', ), ('set_title_sort', 1, 'Title Two'), ('set_title_sort', 2, None), ('set_title_sort', 3, 'The Test Title_sort'), ('set_series_index', 1, 2.3), ('set_series_index', 2, 0), ('set_series_index', 3, 8), ('set_identifier', 1, 'moose', 'val'), ('set_identifier', 2, 'test', ''), ('set_identifier', 3, '', ''), (db.refresh, ), ('series_index', 0), ('series_index', 1), ('series_index', 2), ('title_sort', 0), ('title_sort', 1), ('title_sort', 2), ('get_identifiers', 0), ('get_identifiers', 1), ('get_identifiers', 2), ('@get_all_identifier_types', ), ('set_metadata', 1, Metadata( 'title', ('a1', )), False, False, False, True, True), ('set_metadata', 3, Metadata('title', ('a1', ))), (db.refresh, ), ('title', 0), ('title', 1), ('title', 2), ('title_sort', 0), ('title_sort', 1), ('title_sort', 2), ('authors', 0), ('authors', 1), ('authors', 2), ('author_sort', 0), ('author_sort', 1), ('author_sort', 2), ('@tags', 0), ('@tags', 1), ('@tags', 2), ('@all_tags', ), ('@get_all_identifier_types', ), )) db.close() ndb = self.init_legacy(self.cloned_library) db = self.init_old(self.cloned_library) run_funcs(self, db, ndb, ( ('set', 0, 'title', 'newtitle'), ('set', 0, 'tags', 't1,t2,tag one', True), ('set', 0, 'authors', 'author one & Author Two', True), ('set', 0, 'rating', 3.2), ('set', 0, 'publisher', 'publisher one', False), (db.refresh, ), ('title', 0), ('rating', 0), ('#tags', 0), ('#tags', 1), ('#tags', 2), ('authors', 0), ('authors', 1), ('authors', 2), ('publisher', 0), ('publisher', 1), ('publisher', 2), ('delete_tag', 'T1'), ('delete_tag', 'T2'), ('delete_tag', 'Tag one'), ('delete_tag', 'News'), (db.clean, ), (db.refresh, ), ('@all_tags', ), ('#tags', 0), ('#tags', 1), ('#tags', 2), )) db.close() ndb = self.init_legacy(self.cloned_library) db = self.init_old(self.cloned_library) run_funcs(self, db, ndb, ( ('remove_all_tags', (1, 2, 3)), (db.clean, ), ('@all_tags', ), ('@tags', 0), ('@tags', 1), ('@tags', 2), )) db.close() ndb = self.init_legacy(self.cloned_library) db = self.init_old(self.cloned_library) a = {v: k for k, v in ndb.new_api.get_id_map('authors').iteritems() }['Author One'] t = {v: k for k, v in ndb.new_api.get_id_map('tags').iteritems()}['Tag One'] s = {v: k for k, v in ndb.new_api.get_id_map('series').iteritems() }['A Series One'] p = {v: k for k, v in ndb.new_api.get_id_map('publisher').iteritems() }['Publisher One'] run_funcs(self, db, ndb, ( ('rename_author', a, 'Author Two'), ('rename_tag', t, 'News'), ('rename_series', s, 'ss'), ('rename_publisher', p, 'publisher one'), (db.clean, ), (db.refresh, ), ('@all_tags', ), ('tags', 0), ('tags', 1), ('tags', 2), ('series', 0), ('series', 1), ('series', 2), ('publisher', 0), ('publisher', 1), ('publisher', 2), ('series_index', 0), ('series_index', 1), ('series_index', 2), ('authors', 0), ('authors', 1), ('authors', 2), ('author_sort', 0), ('author_sort', 1), ('author_sort', 2), )) db.close()
def parse_details(self, root): try: kyobobook_id = self.parse_kyobobook_id(self.url) except: self.log.exception('Error parsing Kyobobook id for url: %r' % self.url) kyobobook_id = None try: (title, series, series_index) = self.parse_title_series(root) except: self.log.exception('Error parsing title and series for url: %r' % self.url) title = series = series_index = None try: authors = self.parse_authors(root) except: self.log.exception('Error parsing authors for url: %r' % self.url) authors = [] if not title or not authors or not kyobobook_id: self.log.error('Could not find title/authors/kyobobook id for %r' % self.url) self.log.error('Kyobobook: %r Title: %r Authors: %r' % (kyobobook_id, title, authors)) return mi = Metadata(title, authors) if series: mi.series = series mi.series_index = series_index mi.set_identifier('kyobobook', kyobobook_id) self.kyobobook_id = kyobobook_id try: isbn = self.parse_isbn(root) if isbn: self.isbn = mi.isbn = isbn except: self.log.exception('Error parsing ISBN for url: %r' % self.url) try: mi.rating = self.parse_rating(root) except: self.log.exception('Error parsing ratings for url: %r' % self.url) try: mi.comments = self.parse_comments(root) except: self.log.exception('Error parsing comments for url: %r' % self.url) try: self.cover_url = self.parse_cover(root) except: self.log.exception('Error parsing cover for url: %r' % self.url) mi.has_cover = bool(self.cover_url) try: tags = self.parse_tags(root) if tags: mi.tags = tags except: self.log.exception('Error parsing tags for url: %r' % self.url) try: mi.publisher, mi.pubdate = self.parse_publisher_and_date(root) except: self.log.exception('Error parsing publisher and date for url: %r' % self.url) try: lang = self._parse_language(root) if lang: mi.language = lang except: self.log.exception('Error parsing language for url: %r' % self.url) mi.source_relevance = self.relevance if self.kyobobook_id: if self.isbn: self.plugin.cache_isbn_to_identifier(self.isbn, self.kyobobook_id) if self.cover_url: self.plugin.cache_identifier_to_cover_url( self.kyobobook_id, self.cover_url) self.plugin.clean_downloaded_metadata(mi) self.result_queue.put(mi)
def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers): from lxml import etree def tostring(x): if x is None: return '' return etree.tostring(x, method='text', encoding=unicode).strip() orig_isbn = identifiers.get('isbn', None) title_tokens = list(self.get_title_tokens(orig_title)) author_tokens = list(self.get_author_tokens(orig_authors)) results = [] def ismatch(title, authors): authors = lower(' '.join(authors)) title = lower(title) match = not title_tokens for t in title_tokens: if lower(t) in title: match = True break amatch = not author_tokens for a in author_tokens: if lower(a) in authors: amatch = True break if not author_tokens: amatch = True return match and amatch bl = feed.find('BookList') if bl is None: err = tostring(feed.find('errormessage')) raise ValueError('ISBNDb query failed:' + err) total_results = int(bl.get('total_results')) shown_results = int(bl.get('shown_results')) for bd in bl.xpath('.//BookData'): isbn = check_isbn(bd.get('isbn', None)) isbn13 = check_isbn(bd.get('isbn13', None)) if not isbn and not isbn13: continue if orig_isbn and orig_isbn not in {isbn, isbn13}: continue title = tostring(bd.find('Title')) if not title: continue authors = [] for au in bd.xpath('.//Authors/Person'): au = tostring(au) if au: if ',' in au: ln, _, fn = au.partition(',') au = fn.strip() + ' ' + ln.strip() authors.append(au) if not authors: continue comments = tostring(bd.find('Summary')) id_ = (title, tuple(authors)) if id_ in seen: continue seen.add(id_) if not ismatch(title, authors): continue publisher = tostring(bd.find('PublisherText')) if not publisher: publisher = None if publisher and 'audio' in publisher.lower(): continue mi = Metadata(title, authors) mi.isbn = isbn mi.publisher = publisher mi.comments = comments results.append(mi) return total_results, shown_results, results
def start(self, title, authors, identifiers): book = Metadata(title, authors) book.identifiers = identifiers self.covers_widget.start(book, self.current_cover, title, authors, {}) return self.exec_()