def do_map(self, book_ids, selected): from calibre.ebooks.metadata.author_mapper import map_authors, compile_rules from calibre.gui2.author_mapper import RulesDialog from calibre.gui2.device import BusyCursor d = RulesDialog(self.gui) d.setWindowTitle(ngettext( 'Map authors for one book in the library', 'Map authors for {} books in the library', len(book_ids)).format(len(book_ids))) d.rules = gprefs.get('library-author-mapper-ruleset', ()) txt = ngettext( 'The changes will be applied to the <b>selected book</b>', 'The changes will be applied to the <b>{} selected books</b>', len(book_ids)) if selected else ngettext( 'The changes will be applied to <b>one book in the library</b>', 'The changes will be applied to <b>{} books in the library</b>', len(book_ids)) d.edit_widget.msg_label.setText(d.edit_widget.msg_label.text() + '<p>' + txt.format(len(book_ids))) if d.exec_() != QDialog.DialogCode.Accepted: return with BusyCursor(): rules = d.rules gprefs.set('library-author-mapper-ruleset', rules) rules = compile_rules(rules) db = self.gui.current_db.new_api author_map = db.all_field_for('authors', book_ids) changed_author_map = {} changed_author_sort_map = {} for book_id, authors in iteritems(author_map): authors = list(authors) new_authors = map_authors(authors, rules) if authors != new_authors: changed_author_map[book_id] = new_authors changed_author_sort_map[book_id] = db.author_sort_from_authors(new_authors) if changed_author_map: db.set_field('authors', changed_author_map) db.set_field('author_sort', changed_author_sort_map) self.gui.library_view.model().refresh_ids(tuple(changed_author_map), current_row=self.gui.library_view.currentIndex().row())
def process_result(self, group_id, result): if result.err: mi = self.report_metadata_failure(group_id, result.traceback) paths = self.file_groups[group_id] has_cover = False duplicate_info = set() if self.add_formats_to_existing else False else: paths, opf, has_cover, duplicate_info = result.value try: mi = OPF(BytesIO(opf), basedir=self.tdir, populate_spine=False, try_to_guess_cover=False).to_book_metadata() mi.read_metadata_failed = False except Exception: mi = self.report_metadata_failure(group_id, traceback.format_exc()) if mi.is_null('title'): for path in paths: mi.title = os.path.splitext(os.path.basename(path))[0] break if mi.application_id == '__calibre_dummy__': mi.application_id = None if gprefs.get('tag_map_on_add_rules'): from calibre.ebooks.metadata.tag_mapper import map_tags mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules']) if self.author_map_rules: from calibre.ebooks.metadata.author_mapper import map_authors new_authors = map_authors(mi.authors, self.author_map_rules) if new_authors != mi.authors: mi.authors = new_authors if self.db is None: mi.author_sort = authors_to_sort_string(mi.authors) else: mi.author_sort = self.db.author_sort_from_authors(mi.authors) self.pd.msg = mi.title cover_path = os.path.join(self.tdir, '%s.cdata' % group_id) if has_cover else None if self.db is None: if paths: self.items.append((mi, cover_path, paths)) return if self.add_formats_to_existing: identical_book_ids = find_identical_books(mi, self.find_identical_books_data) if identical_book_ids: try: self.merge_books(mi, cover_path, paths, identical_book_ids) except Exception: a = self.report.append a(''), a('-' * 70) a(_('Failed to merge the book: ') + mi.title) [a('\t' + f) for f in paths] a(_('With error:')), a(traceback.format_exc()) else: self.add_book(mi, cover_path, paths) else: if duplicate_info or icu_lower(mi.title or _('Unknown')) in self.added_duplicate_info: self.duplicates.append((mi, cover_path, paths)) else: self.add_book(mi, cover_path, paths)
def do_map(self, book_ids, selected): from calibre.ebooks.metadata.author_mapper import map_authors, compile_rules from calibre.gui2.author_mapper import RulesDialog from calibre.gui2.device import BusyCursor d = RulesDialog(self.gui) d.setWindowTitle(ngettext( 'Map authors for one book in the library', 'Map authors for {} books in the library', len(book_ids)).format(len(book_ids))) d.rules = gprefs.get('library-author-mapper-ruleset', ()) txt = ngettext( 'The changes will be applied to the <b>selected book</b>', 'The changes will be applied to the <b>{} selected books</b>', len(book_ids)) if selected else ngettext( 'The changes will be applied to <b>one book in the library</b>', 'The changes will be applied to <b>{} books in the library</b>', len(book_ids)) d.edit_widget.msg_label.setText(d.edit_widget.msg_label.text() + '<p>' + txt.format(len(book_ids))) if d.exec_() != d.Accepted: return with BusyCursor(): rules = d.rules gprefs.set('library-author-mapper-ruleset', rules) rules = compile_rules(rules) db = self.gui.current_db.new_api author_map = db.all_field_for('authors', book_ids) changed_author_map = {} changed_author_sort_map = {} for book_id, authors in iteritems(author_map): authors = list(authors) new_authors = map_authors(authors, rules) if authors != new_authors: changed_author_map[book_id] = new_authors changed_author_sort_map[book_id] = db.author_sort_from_authors(new_authors) if changed_author_map: db.set_field('authors', changed_author_map) db.set_field('author_sort', changed_author_sort_map) self.gui.library_view.model().refresh_ids(tuple(changed_author_map), current_row=self.gui.library_view.currentIndex().row())
def identify( log, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30, allowed_plugins=None): if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None start_time = time.time() plugins = [ p for p in metadata_plugins(['identify']) if p.is_configured() and ( allowed_plugins is None or p.name in allowed_plugins) ] kwargs = { 'title': title, 'authors': authors, 'identifiers': identifiers, 'timeout': timeout, } log('Running identify query with parameters:') log(kwargs) log('Using plugins:', ', '.join(['%s %s' % (p.name, p.version) for p in plugins])) log('The log from individual plugins is below') workers = [Worker(p, kwargs, abort) for p in plugins] for w in workers: w.start() first_result_at = None results = {} for p in plugins: results[p] = [] logs = dict([(w.plugin, w.buf) for w in workers]) def get_results(): found = False for w in workers: try: result = w.rq.get_nowait() except Empty: pass else: results[w.plugin].append(result) found = True return found wait_time = msprefs['wait_after_first_identify_result'] while True: time.sleep(0.2) if get_results() and first_result_at is None: first_result_at = time.time() if not is_worker_alive(workers): break if (first_result_at is not None and time.time() - first_result_at > wait_time): log.warn('Not waiting any longer for more results. Still running' ' sources:') for worker in workers: if worker.is_alive(): log.debug('\t' + worker.name) abort.set() break while not abort.is_set() and get_results(): pass sort_kwargs = dict(kwargs) for k in list(sort_kwargs.iterkeys()): if k not in ('title', 'authors', 'identifiers'): sort_kwargs.pop(k) longest, lp = -1, '' for plugin, presults in results.iteritems(): presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) # Throw away lower priority results from the same source that have exactly the same # title and authors as a higher priority result filter_results = set() filtered_results = [] for r in presults: key = (r.title, tuple(r.authors)) if key not in filter_results: filtered_results.append(r) filter_results.add(key) results[plugin] = presults = filtered_results plog = logs[plugin].getvalue().strip() log('\n' + '*' * 30, plugin.name, '%s' % (plugin.version, ), '*' * 30) log('Found %d results' % len(presults)) time_spent = getattr(plugin, 'dl_time_spent', None) if time_spent is None: log('Downloading was aborted') longest, lp = -1, plugin.name else: log('Downloading from', plugin.name, 'took', time_spent) if time_spent > longest: longest, lp = time_spent, plugin.name for r in presults: log('\n\n---') try: log(unicode(r)) except TypeError: log(repr(r)) if plog: log(plog) log('\n' + '*' * 80) dummy = Metadata(_('Unknown')) for i, result in enumerate(presults): for f in plugin.prefs['ignore_fields']: if ':' not in f: setattr(result, f, getattr(dummy, f)) if f == 'series': result.series_index = dummy.series_index result.relevance_in_source = i result.has_cached_cover_url = (plugin.cached_cover_url_is_reliable and plugin.get_cached_cover_url( result.identifiers) is not None) result.identify_plugin = plugin if msprefs['txt_comments']: if plugin.has_html_comments and result.comments: result.comments = html2text(result.comments) log('The identify phase took %.2f seconds' % (time.time() - start_time)) log('The longest time (%f) was taken by:' % longest, lp) log('Merging results from different sources') start_time = time.time() results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % (len(results), time.time() - start_time)) tm_rules = msprefs['tag_map_rules'] if tm_rules: from calibre.ebooks.metadata.tag_mapper import map_tags am_rules = msprefs['author_map_rules'] if am_rules: from calibre.ebooks.metadata.author_mapper import map_authors, compile_rules am_rules = compile_rules(am_rules) max_tags = msprefs['max_tags'] for r in results: if tm_rules: r.tags = map_tags(r.tags, tm_rules) r.tags = r.tags[:max_tags] if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year: r.pubdate = None if msprefs['swap_author_names']: for r in results: def swap_to_ln_fn(a): if ',' in a: return a parts = a.split(None) if len(parts) <= 1: return a surname = parts[-1] return '%s, %s' % (surname, ' '.join(parts[:-1])) r.authors = [swap_to_ln_fn(a) for a in r.authors] if am_rules: for r in results: new_authors = map_authors(r.authors, am_rules) if new_authors != r.authors: r.authors = new_authors r.author_sort = authors_to_sort_string(r.authors) return results
def do_test(self): authors = string_to_authors(self.value.strip()) ans = map_authors(authors, compile_rules(self.rules)) self.result.setText(authors_to_string(ans))
def do_add(self, data): from calibre.ebooks.metadata.opf2 import OPF gui = self.parent() if gui is None: return m = gui.library_view.model() count = 0 needs_rescan = False duplicates = [] added_ids = set() for fname, tdir in data: path_to_remove = os.path.join(self.worker.path, fname) paths = [path_to_remove] fpath = os.path.join(tdir, 'file_changed_by_plugins') if os.path.exists(fpath): with open(fpath) as f: paths[0] = f.read() sz = os.path.join(tdir, 'size.txt') try: with open(sz, 'rb') as f: sz = int(f.read()) if sz != os.stat(paths[0]).st_size: raise Exception('Looks like the file was written to after' ' we tried to read metadata') except: needs_rescan = True try: self.worker.staging.remove(fname) except KeyError: pass continue mi = os.path.join(tdir, 'metadata.opf') if not os.access(mi, os.R_OK): continue mi = OPF(open(mi, 'rb'), tdir, populate_spine=False).to_book_metadata() if gprefs.get('tag_map_on_add_rules'): from calibre.ebooks.metadata.tag_mapper import map_tags mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules']) if gprefs.get('author_map_on_add_rules'): from calibre.ebooks.metadata.author_mapper import ( compile_rules, map_authors ) new_authors = map_authors(mi.authors, compile_rules(gprefs['author_map_on_add_rules'])) if new_authors != mi.authors: mi.authors = new_authors mi.author_sort = gui.current_db.new_api.author_sort_from_authors(mi.authors) mi = [mi] dups, ids = m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi, add_duplicates=not gprefs['auto_add_check_for_duplicates'], return_ids=True) added_ids |= set(ids) num = len(ids) if dups: path = dups[0][0] with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()), 'wb') as dest, open(path, 'rb') as src: shutil.copyfileobj(src, dest) dups[0][0] = dest.name duplicates.append(dups) try: os.remove(path_to_remove) self.worker.staging.remove(fname) except: import traceback traceback.print_exc() count += num if duplicates: paths, formats, metadata = [], [], [] for p, f, mis in duplicates: paths.extend(p) formats.extend(f) metadata.extend(mis) dups = [(mic, mic.cover, [p]) for mic, p in zip(metadata, paths)] d = DuplicatesQuestion(m.db, dups, parent=gui) dups = tuple(d.duplicates) if dups: paths, formats, metadata = [], [], [] for mi, cover, book_paths in dups: paths.extend(book_paths) formats.extend([p.rpartition('.')[-1] for p in book_paths]) metadata.extend([mi for i in book_paths]) ids = m.add_books(paths, formats, metadata, add_duplicates=True, return_ids=True)[1] added_ids |= set(ids) num = len(ids) count += num for fname, tdir in data: try: shutil.rmtree(tdir) except: pass if added_ids and gprefs['auto_add_auto_convert']: self.auto_convert.emit(added_ids) if count > 0: m.books_added(count) gui.status_bar.show_message( (_('Added a book automatically from {src}') if count == 1 else _('Added {num} books automatically from {src}')).format( num=count, src=self.worker.path), 2000) gui.refresh_cover_browser() if needs_rescan: QTimer.singleShot(2000, self.dir_changed)
def identify(log, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30, allowed_plugins=None): if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None start_time = time.time() plugins = [p for p in metadata_plugins(['identify']) if p.is_configured() and (allowed_plugins is None or p.name in allowed_plugins)] kwargs = { 'title': title, 'authors': authors, 'identifiers': identifiers, 'timeout': timeout, } log('Running identify query with parameters:') log(kwargs) log('Using plugins:', ', '.join(['%s %s' % (p.name, p.version) for p in plugins])) log('The log from individual plugins is below') workers = [Worker(p, kwargs, abort) for p in plugins] for w in workers: w.start() first_result_at = None results = {} for p in plugins: results[p] = [] logs = dict([(w.plugin, w.buf) for w in workers]) def get_results(): found = False for w in workers: try: result = w.rq.get_nowait() except Empty: pass else: results[w.plugin].append(result) found = True return found wait_time = msprefs['wait_after_first_identify_result'] while True: time.sleep(0.2) if get_results() and first_result_at is None: first_result_at = time.time() if not is_worker_alive(workers): break if (first_result_at is not None and time.time() - first_result_at > wait_time): log.warn('Not waiting any longer for more results. Still running' ' sources:') for worker in workers: if worker.is_alive(): log.debug('\t' + worker.name) abort.set() break while not abort.is_set() and get_results(): pass sort_kwargs = dict(kwargs) for k in list(sort_kwargs.iterkeys()): if k not in ('title', 'authors', 'identifiers'): sort_kwargs.pop(k) longest, lp = -1, '' for plugin, presults in results.iteritems(): presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) # Throw away lower priority results from the same source that have exactly the same # title and authors as a higher priority result filter_results = set() filtered_results = [] for r in presults: key = (r.title, tuple(r.authors)) if key not in filter_results: filtered_results.append(r) filter_results.add(key) results[plugin] = presults = filtered_results plog = logs[plugin].getvalue().strip() log('\n'+'*'*30, plugin.name, '%s' % (plugin.version,), '*'*30) log('Found %d results'%len(presults)) time_spent = getattr(plugin, 'dl_time_spent', None) if time_spent is None: log('Downloading was aborted') longest, lp = -1, plugin.name else: log('Downloading from', plugin.name, 'took', time_spent) if time_spent > longest: longest, lp = time_spent, plugin.name for r in presults: log('\n\n---') try: log(unicode(r)) except TypeError: log(repr(r)) if plog: log(plog) log('\n'+'*'*80) dummy = Metadata(_('Unknown')) for i, result in enumerate(presults): for f in plugin.prefs['ignore_fields']: if ':' not in f: setattr(result, f, getattr(dummy, f)) if f == 'series': result.series_index = dummy.series_index result.relevance_in_source = i result.has_cached_cover_url = ( plugin.cached_cover_url_is_reliable and plugin.get_cached_cover_url(result.identifiers) is not None) result.identify_plugin = plugin if msprefs['txt_comments']: if plugin.has_html_comments and result.comments: result.comments = html2text(result.comments) log('The identify phase took %.2f seconds'%(time.time() - start_time)) log('The longest time (%f) was taken by:'%longest, lp) log('Merging results from different sources') start_time = time.time() results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % (len(results), time.time() - start_time)) tm_rules = msprefs['tag_map_rules'] if tm_rules: from calibre.ebooks.metadata.tag_mapper import map_tags am_rules = msprefs['author_map_rules'] if am_rules: from calibre.ebooks.metadata.author_mapper import map_authors, compile_rules am_rules = compile_rules(am_rules) max_tags = msprefs['max_tags'] for r in results: if tm_rules: r.tags = map_tags(r.tags, tm_rules) r.tags = r.tags[:max_tags] if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year: r.pubdate = None if msprefs['swap_author_names']: for r in results: def swap_to_ln_fn(a): if ',' in a: return a parts = a.split(None) if len(parts) <= 1: return a surname = parts[-1] return '%s, %s' % (surname, ' '.join(parts[:-1])) r.authors = [swap_to_ln_fn(a) for a in r.authors] if am_rules: for r in results: new_authors = map_authors(r.authors, am_rules) if new_authors != r.authors: r.authors = new_authors r.author_sort = authors_to_sort_string(r.authors) return results
def do_add(self, data): from calibre.ebooks.metadata.opf2 import OPF gui = self.parent() if gui is None: return m = gui.library_view.model() count = 0 needs_rescan = False duplicates = [] added_ids = set() for fname, tdir in data: paths = [os.path.join(self.worker.path, fname)] sz = os.path.join(tdir, 'size.txt') try: with open(sz, 'rb') as f: sz = int(f.read()) if sz != os.stat(paths[0]).st_size: raise Exception('Looks like the file was written to after' ' we tried to read metadata') except: needs_rescan = True try: self.worker.staging.remove(fname) except KeyError: pass continue mi = os.path.join(tdir, 'metadata.opf') if not os.access(mi, os.R_OK): continue mi = OPF(open(mi, 'rb'), tdir, populate_spine=False).to_book_metadata() if gprefs.get('tag_map_on_add_rules'): from calibre.ebooks.metadata.tag_mapper import map_tags mi.tags = map_tags(mi.tags, gprefs['tag_map_on_add_rules']) if gprefs.get('author_map_on_add_rules'): from calibre.ebooks.metadata.author_mapper import map_authors, compile_rules new_authors = map_authors(mi.authors, compile_rules(gprefs['author_map_on_add_rules'])) if new_authors != mi.authors: mi.authors = new_authors mi.author_sort = gui.current_db.new_api.author_sort_from_authors(mi.authors) mi = [mi] dups, ids = m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi, add_duplicates=not gprefs['auto_add_check_for_duplicates'], return_ids=True) added_ids |= set(ids) num = len(ids) if dups: path = dups[0][0] with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()), 'wb') as dest, open(path, 'rb') as src: shutil.copyfileobj(src, dest) dups[0][0] = dest.name duplicates.append(dups) try: os.remove(paths[0]) self.worker.staging.remove(fname) except: import traceback traceback.print_exc() count += num if duplicates: paths, formats, metadata = [], [], [] for p, f, mis in duplicates: paths.extend(p) formats.extend(f) metadata.extend(mis) dups = [(mic, mic.cover, [p]) for mic, p in zip(metadata, paths)] d = DuplicatesQuestion(m.db, dups, parent=gui) dups = tuple(d.duplicates) if dups: paths, formats, metadata = [], [], [] for mi, cover, book_paths in dups: paths.extend(book_paths) formats.extend([p.rpartition('.')[-1] for p in book_paths]) metadata.extend([mi for i in book_paths]) ids = m.add_books(paths, formats, metadata, add_duplicates=True, return_ids=True)[1] added_ids |= set(ids) num = len(ids) count += num for fname, tdir in data: try: shutil.rmtree(tdir) except: pass if added_ids and gprefs['auto_add_auto_convert']: self.auto_convert.emit(added_ids) if count > 0: m.books_added(count) gui.status_bar.show_message( (_('Added a book automatically from {src}') if count == 1 else _('Added {num} books automatically from {src}')).format( num=count, src=self.worker.path), 2000) gui.refresh_cover_browser() if needs_rescan: QTimer.singleShot(2000, self.dir_changed)