def get_work_title(e, mc): # use first work title we find in source MARC records wt = None for src_type, src in get_marc_src(e, mc): if src_type == 'ia': wt = get_ia_work_title(src) if wt: wt = wt.strip('. ') if wt: break continue assert src_type == 'marc' data = None try: data = get_data(src) except ValueError: print 'bad record source:', src print 'http://openlibrary.org' + e['key'] continue except urllib2.HTTPError, error: print 'HTTP error:', error.code, error.msg print e['key'] if not data: continue is_marc8 = data[9] != 'a' try: line = get_first_tag(data, set(['240'])) except BadDictionary: print 'bad dictionary:', src print 'http://openlibrary.org' + e['key'] continue if line: wt = ' '.join(get_subfield_values(line, ['a'], is_marc8)).strip('. ') break
def get_work_title(e, mc): # use first work title we find in source MARC records wt = None for src_type, src in get_marc_src(e, mc): if src_type == 'ia': wt = get_ia_work_title(src) if wt: wt = wt.strip('. ') if wt: break continue assert src_type == 'marc' data = None try: data = get_data(src) except ValueError: print('bad record source:', src) print('http://openlibrary.org' + e['key']) continue except urllib.error.HTTPError as error: print('HTTP error:', error.code, error.msg) print(e['key']) if not data: continue is_marc8 = data[9] != 'a' try: line = get_first_tag(data, set(['240'])) except BadDictionary: print('bad dictionary:', src) print('http://openlibrary.org' + e['key']) continue if line: wt = ' '.join(get_subfield_values(line, ['a'], is_marc8)).strip('. ') break if wt: return wt for f in 'work_titles', 'work_title': e_wt = e.get(f, []) if e_wt: assert isinstance(e_wt, list) return e_wt[0].strip('. ')
def get_work_title(e, mc): # use first work title we find in source MARC records wt = None for src_type, src in get_marc_src(e, mc): if src_type == 'ia': wt = get_ia_work_title(src) if wt: wt = wt.strip('. ') if wt: break continue assert src_type == 'marc' data = None try: data = get_data(src) except ValueError: print('bad record source:', src) print('http://openlibrary.org' + e['key']) continue except urllib2.HTTPError as error: print('HTTP error:', error.code, error.msg) print(e['key']) if not data: continue is_marc8 = data[9] != 'a' try: line = get_first_tag(data, set(['240'])) except BadDictionary: print('bad dictionary:', src) print('http://openlibrary.org' + e['key']) continue if line: wt = ' '.join(get_subfield_values(line, ['a'], is_marc8)).strip('. ') break if wt: return wt for f in 'work_titles', 'work_title': e_wt = e.get(f, []) if e_wt: assert isinstance(e_wt, list) return e_wt[0].strip('. ')
def get_work_title(e): # use first work title we find in source MARC records wt = None for src_type, src in get_marc_src(e): if src_type == 'ia': wt = get_ia_work_title(src) if wt: break continue assert src_type == 'marc' data = None #print 'get from archive:', src try: data = get_data(src) except ValueError: print('bad record source:', src) print('http://openlibrary.org' + e['key']) continue except urllib2.HTTPError as error: print('HTTP error:', error.code, error.msg) print(e['key']) if not data: continue try: line = get_first_tag(data, set(['240'])) except BadDictionary: print('bad dictionary:', src) print('http://openlibrary.org' + e['key']) continue if line: wt = ' '.join(get_subfield_values(line, ['a'])).strip('. ') break if wt: return wt if not e.get('work_titles', []): return print('work title in MARC, but not in OL') print('http://openlibrary.org' + e['key']) return e['work_titles'][0]
def get_marc_subjects(w): for src in get_marc_source(w): data = None try: data = get_data(src) except ValueError: print 'bad record source:', src print 'http://openlibrary.org' + w['key'] continue except urllib2.HTTPError, error: print 'HTTP error:', error.code, error.msg print 'http://openlibrary.org' + w['key'] if not data: continue try: lines = list(get_tag_lines(data, subject_fields)) except BadDictionary: print 'bad dictionary:', src print 'http://openlibrary.org' + w['key'] continue if lines: yield lines
def get_marc_subjects(w): for src in get_marc_source(w): data = None from openlibrary.catalog.get_ia import get_data try: data = get_data(src) except ValueError: print 'bad record source:', src print 'http://openlibrary.org' + w['key'] continue except urllib2.HTTPError, error: print 'HTTP error:', error.code, error.msg print 'http://openlibrary.org' + w['key'] if not data: continue try: lines = list(get_tag_lines(data, subject_fields)) except BadDictionary: print 'bad dictionary:', src print 'http://openlibrary.org' + w['key'] continue if lines: yield lines
def get_marc_subjects(w): for src in get_marc_source(w): data = None from openlibrary.catalog.get_ia import get_data try: data = get_data(src) except ValueError: print('bad record source:', src) print('http://openlibrary.org' + w['key']) continue except urllib2.HTTPError as error: print('HTTP error:', error.code, error.msg) print('http://openlibrary.org' + w['key']) if not data: continue try: lines = list(get_tag_lines(data, subject_fields)) except BadDictionary: print('bad dictionary:', src) print('http://openlibrary.org' + w['key']) continue if lines: yield lines
def read_works(): i = 0 pages = {} page_marc = {} for work, marc in work_and_marc(): lines = [] for loc in marc: data = get_data(loc) if not data: continue found = [v for k, v in get_tag_lines(data, set(['600']))] if found: lines.append((loc, found)) if not lines: continue work['lines'] = lines i += 1 print(i, work['key'], work['title']) try: people, marc_alt = read_people(j[1] for j in lines) except AssertionError: print(work['lines']) continue except KeyError: print(work['lines']) continue marc_alt_reverse = defaultdict(set) for k, v in marc_alt.items(): marc_alt_reverse[v].add(k) w = ol.get(work['key']) w['subject_people'] = [] for p, num in people.iteritems(): print(' %2d %s' % (num, ' '.join("%s: %s" % (k, v) for k, v in p))) print(' ', p) if p in page_marc: w['subject_people'].append({'key': '/subjects/people/' + page_marc[p]}) continue obj = build_person_object(p, marc_alt_reverse.get(p, [])) key = obj['name'].replace(' ', '_') full_key = '/subjects/people/' + key w['subject_people'].append({'key': full_key}) if key in pages: print(key) pages[key]['marc'].append(p) continue for m in obj['marc']: page_marc[m] = key pages[key] = obj obj_for_db = obj.copy() del obj_for_db['marc'] obj_for_db['key'] = full_key obj_for_db['type'] = '/type/person' print(ol.save(full_key.encode('utf-8'), obj_for_db, 'create a new person page')) print(w) print(ol.save(w['key'], w, 'add links to people that this work is about'))
def read_works(): i = 0 pages = {} page_marc = {} for work, marc in work_and_marc(): lines = [] for loc in marc: data = get_data(loc) if not data: continue found = [v for k, v in get_tag_lines(data, set(['600']))] if found: lines.append((loc, found)) if not lines: continue work['lines'] = lines i += 1 print i, work['key'], work['title'] try: people, marc_alt = read_people(j[1] for j in lines) except AssertionError: print work['lines'] continue except KeyError: print work['lines'] continue marc_alt_reverse = defaultdict(set) for k, v in marc_alt.items(): marc_alt_reverse[v].add(k) w = ol.get(work['key']) w['subject_people'] = [] for p, num in people.iteritems(): print ' %2d %s' % (num, ' '.join("%s: %s" % (k, v) for k, v in p)) print ' ', p if p in page_marc: w['subject_people'].append({'key': '/subjects/people/' + page_marc[p]}) continue obj = build_person_object(p, marc_alt_reverse.get(p, [])) key = obj['name'].replace(' ', '_') full_key = '/subjects/people/' + key w['subject_people'].append({'key': full_key}) if key in pages: print key pages[key]['marc'].append(p) continue for m in obj['marc']: page_marc[m] = key pages[key] = obj obj_for_db = obj.copy() del obj_for_db['marc'] obj_for_db['key'] = full_key obj_for_db['type'] = '/type/person' print ol.save(full_key.encode('utf-8'), obj_for_db, 'create a new person page') print w print ol.save(w['key'], w, 'add links to people that this work is about')