def load(rec, account_key=None): """Given a record, tries to add/match that edition in the system. Record is a dictionary containing all the metadata of the edition. The following fields are mandatory: * title: str * source_records: list :param dict rec: Edition record to add :rtype: dict :return: a dict to be converted into a JSON HTTP response, same as load_data() """ required_fields = ['title', 'source_records' ] # ['authors', 'publishers', 'publish_date'] for field in required_fields: if not rec.get(field): raise RequiredField(field) if not isinstance(rec['source_records'], list): rec['source_records'] = [rec['source_records']] # Split subtitle if required and not already present if ':' in rec.get('title') and not rec.get('subtitle'): title, subtitle = split_subtitle(rec.get('title')) if subtitle: rec['title'] = title rec['subtitle'] = subtitle rec = normalize_record_isbns(rec) edition_pool = build_pool(rec) if not edition_pool: # No match candidates found, add edition return load_data(rec, account_key=account_key) match = early_exit(rec) if not match: match = find_exact_match(rec, edition_pool) if not match: rec['full_title'] = rec['title'] if rec.get('subtitle'): rec['full_title'] += ' ' + rec['subtitle'] e1 = build_marc(rec) add_db_name(e1) match = find_match(e1, edition_pool) if not match: # No match found, add edition return load_data(rec, account_key=account_key) # We have an edition match at this point need_work_save = need_edition_save = False w = None e = web.ctx.site.get(match) # check for, and resolve, author redirects for a in e.authors: while is_redirect(a): if a in e.authors: e.authors.remove(a) a = web.ctx.site.get(a.location) if not is_redirect(a): e.authors.append(a) if e.get('works'): w = e.works[0].dict() work_created = False else: # Found an edition without a work work_created = need_work_save = need_edition_save = True w = new_work(e.dict(), rec) e.works = [{'key': w['key']}] # Add subjects to work, if not already present if 'subjects' in rec: work_subjects = list(w.get('subjects', [])) for s in rec['subjects']: if s not in work_subjects: work_subjects.append(s) need_work_save = True if need_work_save and work_subjects: w['subjects'] = work_subjects # Add cover to edition if 'cover' in rec and not e.get_covers(): cover_url = rec['cover'] cover_id = add_cover(cover_url, e.key, account_key=account_key) if cover_id: e['covers'] = [cover_id] need_edition_save = True # Add cover to work, if needed if not w.get('covers') and e.get_covers(): w['covers'] = [e['covers'][0]] need_work_save = True # Add description to work, if needed if not w.get('description') and e.get('description'): w['description'] = e['description'] need_work_save = True # Add authors to work, if needed if not w.get('authors'): authors = [import_author(a) for a in rec.get('authors', [])] w['authors'] = [{ 'type': { 'key': '/type/author_role' }, 'author': a.key } for a in authors if a.get('key')] if w.get('authors'): need_work_save = True # Add ocaid to edition (str), if needed if 'ocaid' in rec and not e.ocaid: e['ocaid'] = rec['ocaid'] need_edition_save = True # Add list fields to edition as needed edition_fields = [ 'local_id', 'lccn', 'lc_classifications', 'source_records', ] for f in edition_fields: if f not in rec: continue # ensure values is a list values = rec[f] if isinstance(rec[f], list) else [rec[f]] if f in e: # get values from rec that are not currently on the edition to_add = [v for v in values if v not in e[f]] e[f] += to_add else: e[f] = to_add = values if to_add: need_edition_save = True edits = [] reply = { 'success': True, 'edition': { 'key': match, 'status': 'matched' }, 'work': { 'key': w['key'], 'status': 'matched' }, } if need_edition_save: reply['edition']['status'] = 'modified' edits.append(e.dict()) if need_work_save: reply['work']['status'] = 'created' if work_created else 'modified' edits.append(w) if edits: web.ctx.site.save_many(edits, comment='import existing book', action='edit-book') if 'ocaid' in rec: update_ia_metadata_for_ol_edition(match.split('/')[-1]) return reply
def test_import_author_name_unchanged(author, new_import): expect = author['name'] result = import_author(author) assert result['name'] == expect
def load_data(rec, account_key=None): """ Adds a new Edition to Open Library. Checks for existing Works. Creates a new Work, and Author, if required, otherwise associates the new Edition with the existing Work. :param dict rec: Edition record to add (no further checks at this point) :rtype: dict :return: { "success": False, "error": <error msg> } OR { "success": True, "work": {"key": <key>, "status": "created" | "modified" | "matched"}, "edition": {"key": <key>, "status": "created"}, "authors": [{"status": "matched", "name": "John Smith", "key": <key>}, ...] } """ cover_url = None if 'cover' in rec: cover_url = rec['cover'] del rec['cover'] try: # get an OL style edition dict edition = build_query(rec) except InvalidLanguage as e: return { 'success': False, 'error': str(e), } ekey = web.ctx.site.new_key('/type/edition') cover_id = None if cover_url: cover_id = add_cover(cover_url, ekey, account_key=account_key) if cover_id: edition['covers'] = [cover_id] edits = [] # Things (Edition, Work, Authors) to be saved reply = {} # TOFIX: edition.authors has already been processed by import_authors() in build_query(), following line is a NOP? author_in = [ import_author(a, eastern=east_in_by_statement(rec, a)) for a in edition.get('authors', []) ] # build_author_reply() adds authors to edits (authors, author_reply) = build_author_reply(author_in, edits, rec['source_records'][0]) if authors: edition['authors'] = authors reply['authors'] = author_reply wkey = None work_state = 'created' # Look for an existing work if 'authors' in edition: wkey = find_matching_work(edition) if wkey: w = web.ctx.site.get(wkey) work_state = 'matched' found_wkey_match = True need_update = False for k in subject_fields: if k not in rec: continue for s in rec[k]: if normalize(s) not in [ normalize(existing) for existing in w.get(k, []) ]: w.setdefault(k, []).append(s) need_update = True if cover_id: w.setdefault('covers', []).append(cover_id) need_update = True if need_update: work_state = 'modified' edits.append(w.dict()) else: # Create new work w = new_work(edition, rec, cover_id) wkey = w['key'] edits.append(w) assert wkey edition['works'] = [{'key': wkey}] edition['key'] = ekey edits.append(edition) web.ctx.site.save_many(edits, comment='import new book', action='add-book') # Writes back `openlibrary_edition` and `openlibrary_work` to # archive.org item after successful import: if 'ocaid' in rec: update_ia_metadata_for_ol_edition(ekey.split('/')[-1]) reply['success'] = True reply['edition'] = {'key': ekey, 'status': 'created'} reply['work'] = {'key': wkey, 'status': work_state} return reply
def test_import_author_name_natural_order(author, new_import): result = import_author(author) assert result['name'] == 'Forename Surname'