def run_work_finder(i): t0 = time() d = i['data'] print('timestamp:', i['timestamp']) print('author:', d['author']) print('%d records updated:' % len(d['result'])) if 'changeset' not in d: print('no changeset in author merge') print() return changeset = d['changeset'] try: assert len(changeset['data']) == 2 and 'master' in changeset[ 'data'] and 'duplicates' in changeset['data'] except: print(d['changeset']) raise akey = changeset['data']['master'] dup_keys = changeset['data']['duplicates'] #print d['changeset'] print('dups:', dup_keys) title_redirects = find_title_redirects(akey) works = find_works(get_books(akey, books_query(akey)), existing=title_redirects) print('author:', akey) print('works:', works) updated = update_works(akey, works, do_updates=True) print('%d records updated' % len(updated)) t1 = time() - t0 update_times.append(t1) print('update takes: %d seconds' % t1) print()
def run_work_finder(i): t0 = time() d = i['data'] print 'timestamp:', i['timestamp'] print 'author:', d['author'] print '%d records updated:' % len(d['result']) if 'changeset' not in d: print 'no changeset in author merge' print return changeset = d['changeset'] try: assert len(changeset['data']) == 2 and 'master' in changeset['data'] and 'duplicates' in changeset['data'] except: print d['changeset'] raise akey = changeset['data']['master'] dup_keys = changeset['data']['duplicates'] print d['changeset'] print 'dups:', dup_keys title_redirects = find_title_redirects(akey) works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects) print 'author:', akey print 'works:', works updated = update_works(akey, works, do_updates=True) print '%d records updated' % len(updated) t1 = time() - t0 update_times.append(t1) print 'update takes: %d seconds' % t1 print
def run_work_finder(i): t0 = time() d = i["data"] print "timestamp:", i["timestamp"] print "author:", d["author"] print "%d records updated:" % len(d["result"]) if "changeset" not in d: print "no changeset in author merge" print return changeset = d["changeset"] try: assert len(changeset["data"]) == 2 and "master" in changeset["data"] and "duplicates" in changeset["data"] except: print d["changeset"] raise akey = changeset["data"]["master"] dup_keys = changeset["data"]["duplicates"] # print d['changeset'] print "dups:", dup_keys title_redirects = find_title_redirects(akey) works = find_works(get_books(akey, books_query(akey)), existing=title_redirects) print "author:", akey print "works:", works updated = update_works(akey, works, do_updates=True) print "%d records updated" % len(updated) t1 = time() - t0 update_times.append(t1) print "update takes: %d seconds" % t1 print
#!/usr/bin/python from __future__ import print_function from openlibrary.catalog.works.find_works import find_title_redirects, find_works, get_books, books_query, update_works import sys from pprint import pprint akey = sys.argv[1] title_redirects = find_title_redirects(akey) print('title_redirects:') pprint(title_redirects) print() works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects) works = list(works) print('works:') pprint(works) print() updated = update_works(akey, works, do_updates=True) print('updated works:') pprint(updated)
elif action == 'save_many': author_merge = i['data']['comment'] == 'merge authors' if author_merge and skip_author_merge: continue if author_merge and only_author_merge: continue if handle_author_merge and not i['data']['author'].endswith( 'Bot') and author_merge: first_redirect = i['data']['query'][0] assert first_redirect['type']['key'] == '/type/redirect' akey = first_redirect['location'] if akey.startswith('/authors/'): akey = '/a/' + akey[len('/authors/'):] title_redirects = find_title_redirects(akey) works = find_works(akey, get_books(akey, books_query(akey)), existing=title_redirects) updated = update_works(akey, works, do_updates=True) works_to_update.update(w['key'] for w in updated) for query in i['data']['query']: key = query.pop('key') process_save(key, query) # store.put gets called when any document is updated in the store. Borrowing/Returning a book triggers one. elif action == 'store.put': # A sample record looks like this: # { # "action": "store.put", # "timestamp": "2011-12-01T00:00:44.241604", # "data": { # "data": {"borrowed": "false", "_key": "ebooks/books/OL5854888M", "_rev": "975708", "type": "ebook", "book_key": "/books/OL5854888M"}, # "key": "ebooks/books/OL5854888M"