示例#1
0
def try_merge(e1, edition_key, existing):
    thing_type = existing.type.key
    if thing_type == '/type/delete':
        return False
    assert thing_type == '/type/edition'

    rec2 = {}
    rec2['full_title'] = existing.title
    if existing.subtitle:
        rec2['full_title'] += ' ' + existing.subtitle
    for f in 'isbn', 'isbn_10', 'isbn_13', 'lccn', 'publish_country', 'publishers', 'publish_date':
        if existing.get(f):
            rec2[f] = existing[f]
    if existing.authors:
        rec2['authors'] = []
        for a in existing.authors:
            author_type = a.type.key
            while author_type == '/type/delete' or author_type == '/type/redirect':
                if author_type == '/type/delete':
                    a = undelete_author(a)
                    author_type = a.type.key
                    continue
                if author_type == '/type/redirect':
                    a = web.ctx.site.get(a.location)
                    author_type = a.type.key
                    continue
            assert author_type == '/type/author'
            assert a['name']
            rec2['authors'].append({'name': a['name'], 'db_name': db_name(a)})

    e2 = build_marc(rec2)
    return attempt_merge(e1, e2, threshold, debug=False)
def test_merge():
    bpl = {'authors': [{'birth_date': u'1897',
                      'db_name': u'Green, Constance McLaughlin 1897-',
                      'entity_type': 'person',
                      'name': u'Green, Constance McLaughlin',
                      'personal_name': u'Green, Constance McLaughlin'}],
         'full_title': u'Eli Whitney and the birth of American technology',
         'isbn': [u'188674632X'],
         'normalized_title': u'eli whitney and the birth of american technology',
         'number_of_pages': 215,
         'publish_date': '1956',
         'publishers': [u'HarperCollins', u'[distributed by Talman Pub.]'],
         'short_title': u'eli whitney and the birth',
         'source_record_loc': 'bpl101.mrc:0:1226',
         'titles': [u'Eli Whitney and the birth of American technology',
                    u'eli whitney and the birth of american technology']}
    lc = {'authors': [{'birth_date': u'1897',
                     'db_name': u'Green, Constance McLaughlin 1897-',
                     'entity_type': 'person',
                     'name': u'Green, Constance McLaughlin',
                     'personal_name': u'Green, Constance McLaughlin'}],
        'full_title': u'Eli Whitney and the birth of American technology.',
        'isbn': [],
        'normalized_title': u'eli whitney and the birth of american technology',
        'number_of_pages': 215,
        'publish_date': '1956',
        'publishers': ['Little, Brown'],
        'short_title': u'eli whitney and the birth',
        'source_record_loc': 'marc_records_scriblio_net/part04.dat:119539872:591',
        'titles': [u'Eli Whitney and the birth of American technology.',
                   u'eli whitney and the birth of american technology']}

    assert compare_authors(bpl, lc) == ('authors', 'exact match', 125)
    threshold = 875
    assert attempt_merge(bpl, lc, threshold) is True
示例#3
0
def try_merge(e1, edition_key, existing):
    thing_type = existing.type.key
    if thing_type == '/type/delete':
        return False
    assert thing_type == '/type/edition'

    rec2 = {}
    rec2['full_title'] = existing.title
    if existing.subtitle:
        rec2['full_title'] += ' ' + existing.subtitle
    if existing.lccn:
        rec2['lccn'] = existing.lccn
    rec2['authors'] = [{
        'name': a.name,
        'db_name': db_name(a)
    } for a in existing.authors]
    if existing.publishers:
        rec2['publishers'] = existing.publishers
    if existing.publish_date:
        rec2['publisher_date'] = existing.publish_date

    e2 = build_marc(rec2)
    print
    print 'e1:', e1
    print 'e2:', e2
    return attempt_merge(e1, e2, threshold, debug=True)
示例#4
0
文件: merge.py 项目: yzou/openlibrary
def try_merge(e1, edition_key, existing):
    thing_type = existing.type.key
    if thing_type == '/type/delete':
        return False
    assert thing_type == '/type/edition'

    rec2 = {}
    rec2['full_title'] = existing.title
    if existing.subtitle:
        rec2['full_title'] += ' ' + existing.subtitle
    for f in 'isbn', 'isbn_10', 'isbn_13', 'lccn', 'publish_country', 'publishers', 'publish_date':
        if existing.get(f):
            rec2[f] = existing[f]
    if existing.authors:
        rec2['authors'] = []
        for a in existing.authors:
            author_type = a.type.key
            while author_type == '/type/delete' or author_type == '/type/redirect':
                if author_type == '/type/delete':
                    a = undelete_author(a)
                    author_type = a.type.key
                    continue
                if author_type == '/type/redirect':
                    a = web.ctx.site.get(a.location)
                    author_type = a.type.key
                    continue
            assert author_type == '/type/author'
            assert a['name']
            rec2['authors'].append({'name': a['name'], 'db_name': db_name(a)})

    e2 = build_marc(rec2)
    return attempt_merge(e1, e2, threshold, debug=False)
示例#5
0
def try_merge(e1, edition_key, existing):
    """
    Converts the existing edition into a comparable dict and performs a
    thresholded comparison to decide whether they are the same.

    :param dict e1:
    :param str edition_key:
    :param Thing existing: Edition object that most likely matches e1, the object of edition_key
    :rtype: bool
    :return: Whether e1 is sufficiently the same as the 'existing' edition
    """
    thing_type = existing.type.key
    if thing_type == '/type/delete':
        return False
    assert thing_type == '/type/edition'
    rec2 = {}
    rec2['full_title'] = existing.title
    if existing.subtitle:
        rec2['full_title'] += ' ' + existing.subtitle
    for f in 'isbn', 'isbn_10', 'isbn_13', 'lccn', 'publish_country', 'publishers', 'publish_date':
        if existing.get(f):
            rec2[f] = existing[f]
    if existing.authors:
        rec2['authors'] = []
        for a in existing.authors:
            while a.type.key == '/type/redirect':
                a = web.ctx.site.get(a.location)
            if a.type.key == '/type/author':
                assert a['name']
                rec2['authors'].append({
                    'name': a['name'],
                    'db_name': db_name(a)
                })
    e2 = build_marc(rec2)
    return attempt_merge(e1, e2, threshold)
def test_merge2():
    amazon = {'publishers': [u'Collins'], 'isbn_10': ['0002167530'], 'number_of_pages': 287, 'short_title': u'sea birds britain ireland', 'normalized_title': u'sea birds britain ireland', 'full_title': u'Sea Birds Britain Ireland', 'titles': [u'Sea Birds Britain Ireland', u'sea birds britain ireland'], 'publish_date': u'1975',
            'authors': [{'name': 'Stanley Cramp', 'db_name': 'Cramp, Stanley'}]}

    marc = {'publisher': [u'Collins'], 'isbn_10': [u'0002167530'], 'short_title': u'seabirds of britain and i', 'normalized_title': u'seabirds of britain and ireland', 'full_title': u'seabirds of Britain and Ireland', 'titles': [u'seabirds of Britain and Ireland', u'seabirds of britain and ireland'], 'publish_date': '1974', 'authors': [{'db_name': u'Cramp, Stanley.', 'entity_type': 'person', 'name': u'Cramp, Stanley.', 'personal_name': u'Cramp, Stanley.'}], 'source_record_loc': 'marc_records_scriblio_net/part08.dat:61449973:855'}
    threshold = 875
    # build_marc() will place all isbn_ types in the 'isbn' field.
    # compare_author_fields() expects all authors to have a db_name
    assert attempt_merge(build_marc(amazon), build_marc(marc), threshold, debug=True)
示例#7
0
def try_merge(e1, edition_key, existing):
    """
    Converts the existing edition into a comparable dict and performs a
    thresholded comparison to decide whether they are the same.
    Used by add_book.load() -> add_book.find_match() to check whether two
    editions match.

    :param dict e1: Output of build_marc(import record candidate)
    :param str edition_key: edition key of existing
    :param Thing existing: Edition object to be tested against e1, the object of edition_key
    :rtype: bool
    :return: Whether e1 is sufficiently the same as the 'existing' edition
    """

    thing_type = existing.type.key
    if thing_type == '/type/delete':
        return False
    # FIXME: will fail if existing is a redirect.
    assert thing_type == '/type/edition'
    rec2 = {}
    rec2['full_title'] = existing.title
    if existing.subtitle:
        rec2['full_title'] += ' ' + existing.subtitle
    for f in 'isbn', 'isbn_10', 'isbn_13', 'lccn', 'publish_country', 'publishers', 'publish_date':
        if existing.get(f):
            rec2[f] = existing[f]
    if existing.authors:
        rec2['authors'] = []
        for a in existing.authors:
            while a.type.key == '/type/redirect':
                a = web.ctx.site.get(a.location)
            if a.type.key == '/type/author':
                assert a['name']
                rec2['authors'].append({
                    'name': a['name'],
                    'db_name': db_name(a)
                })
    e2 = build_marc(rec2)
    return attempt_merge(e1, e2, threshold)
def test_author_contrib():
    rec1 = {'authors': [{'db_name': u'Bruner, Jerome S.', 'name': u'Bruner, Jerome S.'}],
    'full_title': u'Contemporary approaches to cognition a symposium held at the University of Colorado.',
    'number_of_pages': 210,
    'publish_country': 'xxu',
    'publish_date': '1957',
    'publishers': [u'Harvard U.P']}

    rec2 = {'authors': [{'db_name': u'University of Colorado (Boulder campus). Dept. of Psychology.',
                'name': u'University of Colorado (Boulder campus). Dept. of Psychology.'}],
    'contribs': [{'db_name': u'Bruner, Jerome S.', 'name': u'Bruner, Jerome S.'}],
    'full_title': u'Contemporary approaches to cognition a symposium held at the University of Colorado',
    'lccn': ['57012963'],
    'number_of_pages': 210,
    'publish_country': 'mau',
    'publish_date': '1957',
    'publishers': [u'Harvard University Press']}

    e1 = build_marc(rec1)
    e2 = build_marc(rec2)

    assert compare_authors(e1, e2) == ('authors', 'exact match', 125)
    threshold = 875
    assert attempt_merge(e1, e2, threshold) is True
示例#9
0
def try_merge(e1, edition_key, existing):
    thing_type = existing.type.key
    if thing_type == '/type/delete':
        return False
    assert thing_type == '/type/edition'

    rec2 = {}
    rec2['full_title'] = existing.title
    if existing.subtitle:
        rec2['full_title'] += ' ' + existing.subtitle
    if existing.lccn:
        rec2['lccn'] = existing.lccn
    rec2['authors'] = [{'name': a.name, 'db_name': db_name(a)}
        for a in existing.authors]
    if existing.publishers:
        rec2['publishers'] = existing.publishers
    if existing.publish_date:
        rec2['publisher_date'] = existing.publish_date

    e2 = build_marc(rec2)
    print
    print 'e1:', e1
    print 'e2:', e2
    return attempt_merge(e1, e2, threshold, debug=True)