rc = read_rc() ol = OpenLibrary("http://dev.openlibrary.org") ol.login('EdwardBot', rc['EdwardBot']) sys.stdout = codecs.getwriter('utf-8')(sys.stdout) re_skip = re.compile( '\b([A-Z]|Co|Dr|Jr|Capt|Mr|Mrs|Ms|Prof|Rev|Revd|Hon|etc)\.$') base_url = "http://dev.openlibrary.org" query_url = base_url + "/query.json?query=" work_num = 184076 set_staging(True) def withKey(key): url = base_url + key + ".json" return urllib.urlopen(url).read() def find_new_work_key(): global work_num while True: key = "/w/OL%dW" % work_num ret = withKey(key) if ret.startswith("Not Found:"): return work_num work_num += 1
from __future__ import print_function from openlibrary.catalog.utils.query import query_iter, set_staging, get_mc from openlibrary.catalog.get_ia import get_data from openlibrary.catalog.marc.fast_parse import get_tag_lines, get_all_subfields, get_subfields from pprint import pprint from identify_people import read_people from build_object import build_person_object import sys from collections import defaultdict set_staging(True) def work_and_marc(): i = 0 skip = True for w in query_iter({'type': '/type/work', 'title': None}): if skip: if w['key'] == '/w/OL56814W': skip = False else: continue marc = set() q = {'type': '/type/edition', 'works': w['key'], 'title': None, 'source_records': None} for e in query_iter(q): if e.get('source_records', []): marc.update(i[5:] for i in e['source_records'] if i.startswith('marc:')) mc = get_mc(e['key']) if mc and not mc.startswith('ia:') and not mc.startswith('amazon:'): marc.add(mc) if marc: