def try_merge(edition, ekey, thing): thing_type = thing['type']['key'] if 'isbn_10' not in edition: print edition asin = edition.get('isbn_10', None) or edition['asin'] if 'authors' in edition: authors = [i['name'] for i in edition['authors']] else: authors = [] a = amazon_merge.build_amazon(edition, authors) assert isinstance(asin, basestring) assert thing_type == '/type/edition' #print edition['asin'], ekey if 'source_records' in thing: if 'amazon:' + asin in thing['source_records']: return True return source_records_match(a, thing) #print 'no source records' mc = get_mc(ekey) #print 'mc:', mc if mc == 'amazon:' + asin: return True if not mc: return False data = get_from_local(mc) e1 = build_marc(fast_parse.read_edition(data)) return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
def add_source_records(key, new, thing): sr = None e = ol.get(key) if 'source_records' in e: if new in e['source_records']: return e['source_records'].append(new) else: existing = get_mc(key) amazon = 'amazon:' if existing.startswith(amazon): sr = amazon_source_records(existing[len(amazon):]) or [existing] else: m = re_meta_mrc.match(existing) sr = ['marc:' + existing if not m else 'ia:' + m.group(1)] assert new not in sr e['source_records'] = sr + [new] # fix other bits of the record as well new_toc = fix_toc(e) if new_toc: e['table_of_contents'] = new_toc if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']): subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']] e['subjects'] = subjects print(ol.save(key, e, 'found a matching MARC record')) if new_toc: new_edition = ol.get(key) # [{u'type': <ref: u'/type/toc_item'>}, ...] assert 'title' in new_edition['table_of_contents'][0]
def get_src(key): e = withKey(key) if 'source_records' in e: return e['source_records'] src = get_mc(key) if src: return [src]
def try_merge(edition, ekey, thing): thing_type = thing['type']['key'] if 'isbn_10' not in edition: print(edition) asin = edition.get('isbn_10', None) or edition['asin'] if 'authors' in edition: authors = [i['name'] for i in edition['authors']] else: authors = [] a = amazon_merge.build_amazon(edition, authors) assert isinstance(asin, six.string_types) assert thing_type == '/type/edition' #print edition['asin'], ekey if 'source_records' in thing: if 'amazon:' + asin in thing['source_records']: return True return source_records_match(a, thing) #print 'no source records' mc = get_mc(ekey) #print 'mc:', mc if mc == 'amazon:' + asin: return True if not mc: return False data = get_from_local(mc) e1 = build_marc(fast_parse.read_edition(data)) return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
def add_source_records(key, new, thing, data): sr = None e = ol.get(key) if 'source_records' in e: if new in e['source_records']: return e['source_records'].append(new) else: existing = get_mc(key) amazon = 'amazon:' if existing.startswith('ia:'): sr = [existing] elif existing.startswith(amazon): sr = amazon_source_records(existing[len(amazon):]) or [existing] else: m = re_meta_mrc.match(existing) sr = ['marc:' + existing if not m else 'ia:' + m.group(1)] assert new not in sr e['source_records'] = sr + [new] # fix other bits of the record as well new_toc = fix_toc(e) if new_toc: e['table_of_contents'] = new_toc if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']): subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']] e['subjects'] = subjects if 'authors' in e: if any(a=='None' for a in e['authors']): assert len(e['authors']) == 1 new_author = author_from_data(new, data) e['authors'] = [new_author] else: print e['authors'] authors = [ol.get(akey) for akey in e['authors']] authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \ for a in authors] e['authors'] = [a['key'] for a in authors] undelete_authors(authors) try: print ol.save(key, e, 'found a matching MARC record') except: print e raise if new_toc: new_edition = ol.get(key) # [{u'type': <ref: u'/type/toc_item'>}, ...] assert 'title' in new_edition['table_of_contents'][0]
def try_merge(e1, edition_key, thing): thing_type = thing['type']['key'] if thing_type == '/type/delete': # return False assert thing_type == '/type/edition' if 'source_records' in thing: if fix_source_records(edition_key, thing): thing = withKey(edition_key) # reload return source_records_match(e1, thing) ia = thing.get('ocaid', None) print edition_key mc = get_mc(edition_key) print mc if mc: if mc.startswith('ia:'): ia = mc[3:] elif mc.endswith('.xml') or mc.endswith('.mrc'): ia = mc[:mc.find('/')] if '_meta.mrc:' in mc: assert 'ocaid' in thing ia = thing['ocaid'] rec2 = None if ia: if is_dark_or_bad(ia): return False try: loc2, rec2 = get_ia(ia) except xml.parsers.expat.ExpatError: return False except urllib2.HTTPError, error: print error.code assert error.code in (404, 403) if not rec2: return True
rec2 = None if ia: if is_dark_or_bad(ia): return False try: loc2, rec2 = get_ia(ia) except xml.parsers.expat.ExpatError: return False except urllib2.HTTPError, error: print error.code assert error.code in (404, 403) if not rec2: return True if not rec2: if not mc: mc = get_mc(thing['key']) if not mc or mc == 'initial import': return False if mc.startswith('amazon:'): try: a = try_amazon(thing) except IndexError: print thing['key'] raise except AttributeError: return False if not a: return False try: return amazon.attempt_merge(a, e1, threshold, debug=False) except: