Пример #1
0
def get_work_title(e):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                break
            continue
        assert src_type == 'marc'
        try:
            data = get_from_archive(src)
        except ValueError:
            print 'bad record source:', src
            print 'http://openlibrary.org' + e['key']
            continue
        if not data:
            continue
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print 'bad dictionary:', src
            print 'http://openlibrary.org' + e['key']
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'])).strip('. ')
            break
    if wt:
        return wt
    if not e.get('work_titles', []):
        return
    print 'work title in MARC, but not in OL'
    print 'http://openlibrary.org' + e['key']
    return e['work_titles'][0]
Пример #2
0
def get_work_subjects(w, do_get_mc=True):
    found = set()
    for e in w['editions']:
        sr = e.get('source_records', [])
        if sr:
            for i in sr:
                if i.endswith('initial import'):
                    continue
                if i.startswith(('ia:', 'marc:')):
                    found.add(i)
                    continue
        else:
            mc = None
            if do_get_mc:
                m = re_edition_key.match(e['key'])
                mc = get_mc('/b/' + m.group(1))
            if mc:
                if mc.endswith('initial import'):
                    continue
                if not mc.startswith('amazon:') and not re_ia_marc.match(mc):
                    found.add('marc:' + mc)
    subjects = []
    for sr in found:
        if sr.startswith('marc:ia:'):
            subjects.append(get_subjects_from_ia(sr[8:]))
        elif sr.startswith('marc:'):
            loc = sr[5:]
            data = get_from_archive(loc)
            rec = MarcBinary(data)
            subjects.append(read_subjects(rec))
        else:
            assert sr.startswith('ia:')
            subjects.append(get_subjects_from_ia(sr[3:]))
    return combine_subjects(subjects)
Пример #3
0
def get_work_subjects(w):
    found = set()
    for e in w['editions']:
        sr = e.get('source_records', [])
        if sr:
            for i in sr:
                if i.endswith('initial import'):
                    bad_source_record(e, i)
                    continue
                if i.startswith('ia:') or i.startswith('marc:'):
                    found.add(i)
                    continue
        else:
            m = re_edition_key.match(e['key'])
            mc = get_mc('/b/' + m.group(1))
            if mc:
                if mc.endswith('initial import'):
                    bad_source_record(e, mc)
                    continue
                if not mc.startswith('amazon:') and not re_ia_marc.match(mc):
                    found.add('marc:' + mc)
    subjects = []
    for sr in found:
        if sr.startswith('marc:ia:'):
            subjects.append(get_subjects_from_ia(sr[8:]))
        elif sr.startswith('marc:'):
            loc = sr[5:]
            data = get_from_archive(loc)
            rec = MarcBinary(data)
            subjects.append(read_subjects(rec))
        else:
            assert sr.startswith('ia:')
            subjects.append(get_subjects_from_ia(sr[3:]))
    return combine_subjects(subjects)
Пример #4
0
def get_record(key, mc):
    data = get_from_archive(mc)
    try:
        rec = fast_parse.read_edition(data)
    except (fast_parse.SoundRecording, IndexError, AssertionError):
        print(mc)
        print(key)
        return False
    try:
        return marc.build_marc(rec)
    except TypeError:
        print(rec)
        raise
Пример #5
0
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

re_subtag = re.compile('\x1f(.)([^\x1f]*)')


def fmt_subfields(line):
    def bold(s):
        return ''.join(c + "\b" + c for c in s)

    assert line[-1] == '\x1e'
    return ''.join(' ' + bold('$' + m.group(1)) + ' ' + translate(m.group(2))
                   for m in re_subtag.finditer(line[2:-1]))


def show_book(data):
    print 'leader:', data[:24]
    for tag, line in get_all_tag_lines(data):
        if tag.startswith('00'):
            print tag, line[:-1]
        else:
            print tag, line[0:2], fmt_subfields(line)


if __name__ == '__main__':
    source = sys.argv[1]
    if ':' in source:
        data = get_from_archive(source)
    else:
        data = open(source).read()
    show_book(data)
Пример #6
0
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)

re_subtag = re.compile("\x1f(.)([^\x1f]*)")


def fmt_subfields(line):
    def bold(s):
        return "".join(c + "\b" + c for c in s)

    assert line[-1] == "\x1e"
    return "".join(" " + bold("$" + m.group(1)) + " " + translate(m.group(2)) for m in re_subtag.finditer(line[2:-1]))


def show_book(data):
    print "leader:", data[:24]
    for tag, line in get_all_tag_lines(data):
        if tag.startswith("00"):
            print tag, line[:-1]
        else:
            print tag, line[0:2], fmt_subfields(line)


if __name__ == "__main__":
    source = sys.argv[1]
    if ":" in source:
        data = get_from_archive(source)
    else:
        data = open(source).read()
    show_book(data)