def single_identify(title, authors, identifiers): log = GUILog() patch_plugins() results = identify(log, Event(), title=title, authors=authors, identifiers=identifiers) return [metadata_to_opf(r) for r in results], [r.has_cached_cover_url for r in results], dump_caches(), log.dump()
def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) buf = BytesIO() log = create_log(buf) abort = Event() patch_plugins() authors = [] if opts.authors: authors = string_to_authors(opts.authors) identifiers = {} if opts.isbn: identifiers['isbn'] = opts.isbn allowed_plugins = frozenset(opts.allowed_plugin) results = identify(log, abort, title=opts.title, authors=authors, identifiers=identifiers, timeout=int(opts.timeout), allowed_plugins=allowed_plugins or None) if not results: print(log, file=sys.stderr) prints('No results found', file=sys.stderr) raise SystemExit(1) result = results[0] cf = None if opts.cover and results: cover = download_cover(log, title=opts.title, authors=authors, identifiers=result.identifiers, timeout=int(opts.timeout)) if cover is None and not opts.opf: prints('No cover found', file=sys.stderr) else: save_cover_data_to(cover[-1], opts.cover) result.cover = cf = opts.cover log = buf.getvalue() result = (metadata_to_opf(result) if opts.opf else unicode(result).encode('utf-8')) if opts.verbose: print(log, file=sys.stderr) print(result) if not opts.opf and opts.cover: prints('Cover :', cf) return 0
def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) buf = BytesIO() log = create_log(buf) abort = Event() patch_plugins() authors = [] if opts.authors: authors = string_to_authors(opts.authors) identifiers = {} for idspec in opts.identifier: k, v = idspec.partition(':')[::2] if not k or not v: raise SystemExit('Not a valid identifier: {}'.format(idspec)) identifiers[k] = v if opts.isbn: identifiers['isbn'] = opts.isbn allowed_plugins = frozenset(opts.allowed_plugin) results = identify(log, abort, title=opts.title, authors=authors, identifiers=identifiers, timeout=int(opts.timeout), allowed_plugins=allowed_plugins or None) if not results: print(log, file=sys.stderr) prints('No results found', file=sys.stderr) raise SystemExit(1) result = results[0] cf = None if opts.cover and results: cover = download_cover(log, title=opts.title, authors=authors, identifiers=result.identifiers, timeout=int(opts.timeout)) if cover is None and not opts.opf: prints('No cover found', file=sys.stderr) else: save_cover_data_to(cover[-1], opts.cover) result.cover = cf = opts.cover log = buf.getvalue() result = (metadata_to_opf(result) if opts.opf else unicode_type(result).encode('utf-8')) if opts.verbose: print(log, file=sys.stderr) print(result) if not opts.opf and opts.cover: prints('Cover :', cf) return 0
def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) buf = BytesIO() log = create_log(buf) abort = Event() patch_plugins() authors = [] if opts.authors: authors = string_to_authors(opts.authors) identifiers = {} for idspec in opts.identifier: k, v = idspec.partition(':')[::2] if not k or not v: raise SystemExit('Not a valid identifier: {}'.format(idspec)) identifiers[k] = v if opts.isbn: identifiers['isbn'] = opts.isbn allowed_plugins = frozenset(opts.allowed_plugin) results = identify(log, abort, title=opts.title, authors=authors, identifiers=identifiers, timeout=int(opts.timeout), allowed_plugins=allowed_plugins or None) if not results: prints(buf.getvalue(), file=sys.stderr) prints('No results found', file=sys.stderr) raise SystemExit(1) result = results[0] cf = None if opts.cover and results: cover = download_cover(log, title=opts.title, authors=authors, identifiers=result.identifiers, timeout=int(opts.timeout)) if cover is None: if not opts.opf: prints('No cover found', file=sys.stderr) else: save_cover_data_to(cover[-1], opts.cover) result.cover = cf = opts.cover if opts.verbose: prints(buf.getvalue(), file=sys.stderr) if opts.opf: getattr(sys.stdout, 'buffer', sys.stdout).write(metadata_to_opf(result)) print() else: prints(str(result)) if not opts.opf and opts.cover: prints('Cover :', cf) return 0
def main(do_identify, covers, metadata, ensure_fields, tdir): failed_ids = set() failed_covers = set() all_failed = True log = GUILog() patch_plugins() for book_id, mi in iteritems(metadata): mi = OPF(BytesIO(mi), basedir=tdir, populate_spine=False).to_book_metadata() title, authors, identifiers = mi.title, mi.authors, mi.identifiers cdata = None log.clear() if do_identify: results = [] try: results = identify(log, Event(), title=title, authors=authors, identifiers=identifiers) except: pass if results: all_failed = False mi = merge_result(mi, results[0], ensure_fields=ensure_fields) identifiers = mi.identifiers if not mi.is_null('rating'): # set_metadata expects a rating out of 10 mi.rating *= 2 with open(os.path.join(tdir, '%d.mi' % book_id), 'wb') as f: f.write(metadata_to_opf(mi, default_lang='und')) else: log.error('Failed to download metadata for', title) failed_ids.add(book_id) if covers: cdata = download_cover(log, title=title, authors=authors, identifiers=identifiers) if cdata is None: failed_covers.add(book_id) else: with open(os.path.join(tdir, '%d.cover' % book_id), 'wb') as f: f.write(cdata[-1]) all_failed = False with open(os.path.join(tdir, '%d.log' % book_id), 'wb') as f: f.write(log.plain_text.encode('utf-8')) return failed_ids, failed_covers, all_failed
def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) buf = BytesIO() log = create_log(buf) abort = Event() authors = [] if opts.authors: authors = string_to_authors(opts.authors) identifiers = {} if opts.isbn: identifiers['isbn'] = opts.isbn results = identify(log, abort, title=opts.title, authors=authors, identifiers=identifiers, timeout=int(opts.timeout)) if not results: print (log, file=sys.stderr) prints('No results found', file=sys.stderr) raise SystemExit(1) result = results[0] cf = None if opts.cover and results: cover = download_cover(log, title=opts.title, authors=authors, identifiers=result.identifiers, timeout=int(opts.timeout)) if cover is None: prints('No cover found', file=sys.stderr) else: save_cover_data_to(cover[-1], opts.cover) result.cover = cf = opts.cover log = buf.getvalue() result = (metadata_to_opf(result) if opts.opf else unicode(result).encode('utf-8')) if opts.verbose: print (log, file=sys.stderr) print (result) if not opts.opf and opts.cover: prints('Cover :', cf) return 0
def main(do_identify, covers, metadata, ensure_fields, tdir): failed_ids = set() failed_covers = set() all_failed = True log = GUILog() patch_plugins() for book_id, mi in metadata.iteritems(): mi = OPF(BytesIO(mi), basedir=tdir, populate_spine=False).to_book_metadata() title, authors, identifiers = mi.title, mi.authors, mi.identifiers cdata = None log.clear() if do_identify: results = [] try: results = identify(log, Event(), title=title, authors=authors, identifiers=identifiers) except: pass if results: all_failed = False mi = merge_result(mi, results[0], ensure_fields=ensure_fields) identifiers = mi.identifiers if not mi.is_null('rating'): # set_metadata expects a rating out of 10 mi.rating *= 2 with open(os.path.join(tdir, '%d.mi'%book_id), 'wb') as f: f.write(metadata_to_opf(mi, default_lang='und')) else: log.error('Failed to download metadata for', title) failed_ids.add(book_id) if covers: cdata = download_cover(log, title=title, authors=authors, identifiers=identifiers) if cdata is None: failed_covers.add(book_id) else: with open(os.path.join(tdir, '%d.cover'%book_id), 'wb') as f: f.write(cdata[-1]) all_failed = False with open(os.path.join(tdir, '%d.log'%book_id), 'wb') as f: f.write(log.plain_text.encode('utf-8')) return failed_ids, failed_covers, all_failed
def test_identify(tests): # {{{ ''' :param tests: List of 2-tuples. Each two tuple is of the form (args, test_funcs). args is a dict of keyword arguments to pass to the identify method. test_funcs are callables that accept a Metadata object and return True iff the object passes the test. ''' from calibre.ebooks.metadata.sources.identify import identify tdir, lf, log, abort = init_test('Full Identify') prints('Log saved to', lf) times = [] for kwargs, test_funcs in tests: log('') log('#' * 80) log('### Running test with:', kwargs) log('#' * 80) prints('Running test with:', kwargs) args = (log, abort) start_time = time.time() results = identify(*args, **kwargs) total_time = time.time() - start_time times.append(total_time) if not results: prints('identify failed to find any results') break prints('Found', len(results), 'matches:', end=' ') prints('Smaller relevance means better match') for i, mi in enumerate(results): prints('*' * 30, 'Relevance:', i, '*' * 30) if mi.rating: mi.rating *= 2 prints(mi) prints('\nCached cover URLs :', [x[0].name for x in get_cached_cover_urls(mi)]) prints('*' * 75, '\n\n') possibles = [] for mi in results: test_failed = False for tfunc in test_funcs: if not tfunc(mi): test_failed = True break if not test_failed: possibles.append(mi) if not possibles: prints('ERROR: No results that passed all tests were found') prints('Log saved to', lf) log.close() dump_log(lf) raise SystemExit(1) if results[0] is not possibles[0]: prints('Most relevant result failed the tests') raise SystemExit(1) log('\n\n') prints('Average time per query', sum(times) / len(times)) prints('Full log is at:', lf)
def isbn_api_add(self, isbn): ''' Add the book and return the new db id. ''' try: isbn = int(re.sub(r'[^\d]+', '', isbn)) except: raise cherrypy.HTTPError(404, 'Invalid isbn %s is not a number: '%isbn) isbn_len = len(str(isbn)) if isbn_len != 10 and isbn_len != 13: raise cherrypy.HTTPError(404, 'Invalid isbn {0} has a wrong length of {1}'.format(isbn, isbn_len)) mi = MetaInformation(None) mi.isbn = str(isbn) fmts = [] new_id = 0 try: new_id = self.db.import_book(mi, fmts) except: return 'could not add new book with isdb {0}'.format(isbn) # Start the threaded download of metadata and return with the id of the added book result = '' try: ids = [] ids.append(new_id) buf = BytesIO() log = create_log(buf) abort = Event() authors = [] identifiers = {} identifiers['isbn'] = mi.isbn results = identify(log, abort, title=None, authors=authors, identifiers=identifiers, timeout=int(30000)) if not results: print (log, file=sys.stderr) prints('No results found', file=sys.stderr) return 'Could not find metadata for isbn {0}'.format(isbn) result = results[0] self.db.set_metadata(new_id, result) #cf = None # #if opts.cover and results: # cover = download_cover(log, title=None, authors=authors, # identifiers=result.identifiers, timeout=int(30000)) # if cover is None and not opts.opf: # prints('No cover found', file=sys.stderr) # else: # save_cover_data_to(cover[-1], opts.cover) # result.cover = cf = opts.cover log = buf.getvalue() result = unicode(result).encode('utf-8') except e: return 'Error getting metadata {0}'.format(e) return 'Added new book with isbn {0} with new id {1} and metadata {2}'.format(isbn, new_id, result) # }}}
def test_identify(tests): # {{{ ''' :param tests: List of 2-tuples. Each two tuple is of the form (args, test_funcs). args is a dict of keyword arguments to pass to the identify method. test_funcs are callables that accept a Metadata object and return True iff the object passes the test. ''' from calibre.ebooks.metadata.sources.identify import identify tdir, lf, log, abort = init_test('Full Identify') prints('Log saved to', lf) times = [] for kwargs, test_funcs in tests: log('#'*80) log('### Running test with:', kwargs) log('#'*80) prints('Running test with:', kwargs) args = (log, abort) start_time = time.time() results = identify(*args, **kwargs) total_time = time.time() - start_time times.append(total_time) if not results: prints('identify failed to find any results') break prints('Found', len(results), 'matches:', end=' ') prints('Smaller relevance means better match') for i, mi in enumerate(results): prints('*'*30, 'Relevance:', i, '*'*30) prints(mi) prints('\nCached cover URLs :', [x[0].name for x in get_cached_cover_urls(mi)]) prints('*'*75, '\n\n') possibles = [] for mi in results: test_failed = False for tfunc in test_funcs: if not tfunc(mi): test_failed = True break if not test_failed: possibles.append(mi) if not possibles: prints('ERROR: No results that passed all tests were found') prints('Log saved to', lf) raise SystemExit(1) if results[0] is not possibles[0]: prints('Most relevant result failed the tests') raise SystemExit(1) log('\n\n') prints('Average time per query', sum(times)/len(times)) prints('Full log is at:', lf)