Exemplo n.º 1
0
def ia_match(a, ia):
    try:
        loc, rec = get_ia(ia)
    except urllib.error.HTTPError:
        return False
    if rec is None or 'full_title' not in rec:
        return False
    try:
        e1 = build_marc(rec)
    except TypeError:
        print(rec)
        raise
    return amazon_merge.attempt_merge(a, e1, threshold, debug=False)
Exemplo n.º 2
0
                        continue
                    try:
                        rec = fast_parse.read_edition(marc_data,
                                                      accept_electronic=True)
                    except:
                        bad_binary = "MARC parse error"
            if bad_binary and not formats['xml']:
                load_error_mail(ia, bad_binary, 'bad MARC binary, no MARC XML')
                continue
            if not use_binary and formats['xml']:
                if bad_ia_xml(ia) and bad_binary:
                    load_error_mail(ia, bad_binary,
                                    'bad MARC binary, bad MARC XML')
                    continue
                try:
                    rec = get_ia(ia)
                except (KeyboardInterrupt, NameError):
                    raise
                except NoMARCXML:
                    write_log(ia, when, "no MARCXML")
                    continue
                except urllib2.HTTPError as error:
                    write_log(ia, when, "error: HTTPError: " + str(error))
                    continue
            if not use_binary and not formats['xml']:
                print('skipping, no MARC')
                continue

            if not rec:
                write_log(ia, when, "error: no rec")
                continue
Exemplo n.º 3
0
iter = db.query(
    "select identifier, updated from metadata where contributor='Cornell University Library' and scanner is not null and noindex is null and mediatype='texts' and (curatestate='approved' or curatestate is null) and scandate is not null order by updated",
    {'start': start})
t_start = time()
for row in iter:
    ia = row.identifier
    print((repr(ia), row.updated))
    when = str(row.updated)
    if query({'type': '/type/edition', 'ocaid': ia}):
        print('already loaded')
        continue
    if query({'type': '/type/edition', 'source_records': 'ia:' + ia}):
        print('already loaded')
        continue
    try:
        loc, rec = get_ia(ia)
    except (KeyboardInterrupt, NameError):
        raise
    except NoMARCXML:
        write_log(ia, when, "no MARCXML")
        continue
    except urllib2.HTTPError as error:
        write_log(ia, when, "error: HTTPError: " + str(error))
        continue
    if loc is None:
        write_log(ia, when, "error: no loc ")
    if rec is None:
        write_log(ia, when, "error: no rec")
        continue
    print(loc, rec)
Exemplo n.º 4
0
                    if str(marc_data)[6:8] != 'am': # only want books
                        print 'not a book!'
                        continue
                    try:
                        rec = fast_parse.read_edition(marc_data, accept_electronic = True)
                    except:
                        bad_binary = "MARC parse error"
            if bad_binary and not formats['xml']:
                load_error_mail(ia, bad_binary, 'bad MARC binary, no MARC XML')
                continue
            if not use_binary and formats['xml']:
                if bad_ia_xml(ia) and bad_binary:
                    load_error_mail(ia, bad_binary, 'bad MARC binary, bad MARC XML')
                    continue
                try:
                    rec = get_ia(ia)
                except (KeyboardInterrupt, NameError):
                    raise
                except NoMARCXML:
                    write_log(ia, when, "no MARCXML")
                    continue
                except urllib2.HTTPError as error:
                    write_log(ia, when, "error: HTTPError: " + str(error))
                    continue
            if not use_binary and not formats['xml']:
                print 'skipping, no MARC'
                continue

            if not rec:
                write_log(ia, when, "error: no rec")
                continue
Exemplo n.º 5
0
#iter = db.query("select identifier, updated from metadata where scanner is not null and noindex is null and mediatype='texts' and (curatestate='approved' or curatestate is null) and scandate is not null and updated > $start order by updated", {'start': start})
iter = db.query("select identifier, updated from metadata where contributor='Cornell University Library' and scanner is not null and noindex is null and mediatype='texts' and (curatestate='approved' or curatestate is null) and scandate is not null order by updated", {'start': start})
t_start = time()
for row in iter:
    ia = row.identifier
    print `ia`, row.updated
    when = str(row.updated)
    if query({'type': '/type/edition', 'ocaid': ia}):
        print 'already loaded'
        continue
    if query({'type': '/type/edition', 'source_records': 'ia:' + ia}):
        print 'already loaded'
        continue
    try:
        loc, rec = get_ia(ia)
    except (KeyboardInterrupt, NameError):
        raise
    except NoMARCXML:
        write_log(ia, when, "no MARCXML")
        continue
    except urllib2.HTTPError as error:
        write_log(ia, when, "error: HTTPError: " + str(error))
        continue
    if loc is None:
        write_log(ia, when, "error: no loc ")
    if rec is None:
        write_log(ia, when, "error: no rec")
        continue
    print loc, rec