def ia_match(a, ia): try: loc, rec = get_ia(ia) except urllib.error.HTTPError: return False if rec is None or 'full_title' not in rec: return False try: e1 = build_marc(rec) except TypeError: print(rec) raise return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
continue try: rec = fast_parse.read_edition(marc_data, accept_electronic=True) except: bad_binary = "MARC parse error" if bad_binary and not formats['xml']: load_error_mail(ia, bad_binary, 'bad MARC binary, no MARC XML') continue if not use_binary and formats['xml']: if bad_ia_xml(ia) and bad_binary: load_error_mail(ia, bad_binary, 'bad MARC binary, bad MARC XML') continue try: rec = get_ia(ia) except (KeyboardInterrupt, NameError): raise except NoMARCXML: write_log(ia, when, "no MARCXML") continue except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue if not use_binary and not formats['xml']: print('skipping, no MARC') continue if not rec: write_log(ia, when, "error: no rec") continue
iter = db.query( "select identifier, updated from metadata where contributor='Cornell University Library' and scanner is not null and noindex is null and mediatype='texts' and (curatestate='approved' or curatestate is null) and scandate is not null order by updated", {'start': start}) t_start = time() for row in iter: ia = row.identifier print((repr(ia), row.updated)) when = str(row.updated) if query({'type': '/type/edition', 'ocaid': ia}): print('already loaded') continue if query({'type': '/type/edition', 'source_records': 'ia:' + ia}): print('already loaded') continue try: loc, rec = get_ia(ia) except (KeyboardInterrupt, NameError): raise except NoMARCXML: write_log(ia, when, "no MARCXML") continue except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue if loc is None: write_log(ia, when, "error: no loc ") if rec is None: write_log(ia, when, "error: no rec") continue print(loc, rec)
if str(marc_data)[6:8] != 'am': # only want books print 'not a book!' continue try: rec = fast_parse.read_edition(marc_data, accept_electronic = True) except: bad_binary = "MARC parse error" if bad_binary and not formats['xml']: load_error_mail(ia, bad_binary, 'bad MARC binary, no MARC XML') continue if not use_binary and formats['xml']: if bad_ia_xml(ia) and bad_binary: load_error_mail(ia, bad_binary, 'bad MARC binary, bad MARC XML') continue try: rec = get_ia(ia) except (KeyboardInterrupt, NameError): raise except NoMARCXML: write_log(ia, when, "no MARCXML") continue except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue if not use_binary and not formats['xml']: print 'skipping, no MARC' continue if not rec: write_log(ia, when, "error: no rec") continue
#iter = db.query("select identifier, updated from metadata where scanner is not null and noindex is null and mediatype='texts' and (curatestate='approved' or curatestate is null) and scandate is not null and updated > $start order by updated", {'start': start}) iter = db.query("select identifier, updated from metadata where contributor='Cornell University Library' and scanner is not null and noindex is null and mediatype='texts' and (curatestate='approved' or curatestate is null) and scandate is not null order by updated", {'start': start}) t_start = time() for row in iter: ia = row.identifier print `ia`, row.updated when = str(row.updated) if query({'type': '/type/edition', 'ocaid': ia}): print 'already loaded' continue if query({'type': '/type/edition', 'source_records': 'ia:' + ia}): print 'already loaded' continue try: loc, rec = get_ia(ia) except (KeyboardInterrupt, NameError): raise except NoMARCXML: write_log(ia, when, "no MARCXML") continue except urllib2.HTTPError as error: write_log(ia, when, "error: HTTPError: " + str(error)) continue if loc is None: write_log(ia, when, "error: no loc ") if rec is None: write_log(ia, when, "error: no rec") continue print loc, rec