Python metadata_comparison_print示例，invenio.bibauthorid_general_utils.metadata_comparison_print Python示例

示例#1

0

显示文件

def _compare_key_words(bib1, bib2):
    metadata_comparison_print("Comparing key words.")
    words1 = _find_key_words(bib1)
    words2 = _find_key_words(bib2)
    cmpv = jaccard(words1, words2)
    metadata_comparison_print(" key words got (%s vs %s) for %s"% (words1, words2, cmpv))
    return cmpv

示例#2

0

显示文件

def _compare_papers(bib1, bib2):
    metadata_comparison_print(
        "Checking if the two bib refs are in the same paper...")
    if bib1[2] == bib2[2]:
        metadata_comparison_print("  ... Yes they are! Are you crazy, man?")
        return '-'
    return '?'

示例#3

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_key_words(bib1, bib2):
    metadata_comparison_print("Comparing key words.")
    words1 = _find_key_words(bib1)
    words2 = _find_key_words(bib2)
    cmpv = jaccard(words1, words2)
    metadata_comparison_print(" key words got (%s vs %s) for %s" % (words1, words2, cmpv))
    return cmpv

示例#4

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_coauthors(bib1, bib2):
    metadata_comparison_print("Comparing authors.")

    aths1 = _find_coauthors(bib1)
    aths2 = _find_coauthors(bib2)

    cmpv = jaccard(aths1, aths2)
    metadata_comparison_print("   coauthors lists as %s" % (cmpv))
    return cmpv

示例#5

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_citations(bib1, bib2):
    metadata_comparison_print("Comparing citations.")

    cites1 = _find_citations(bib1)
    cites2 = _find_citations(bib2)

    cmpv = jaccard(cites1, cites2)
    metadata_comparison_print(" citations as %s" % cmpv)
    return cmpv

示例#6

0

显示文件

def _compare_coauthors(bib1, bib2):
    metadata_comparison_print("Comparing authors.")

    aths1 = _find_coauthors(bib1)
    aths2 = _find_coauthors(bib2)

    cmpv = jaccard(aths1, aths2)
    metadata_comparison_print("   coauthors lists as %s" % (cmpv))
    return cmpv

示例#7

0

显示文件

def _compare_citations(bib1, bib2):
    metadata_comparison_print("Comparing citations.")

    cites1 = _find_citations(bib1)
    cites2 = _find_citations(bib2)

    cmpv = jaccard(cites1, cites2)
    metadata_comparison_print(" citations as %s" % cmpv)
    return cmpv

示例#8

0

显示文件

def compare_bibrefrecs(bibref1, bibref2):
    '''
    This function compares two bibrefrecs (100:123,456) using all metadata
    and returns:
        * a pair with two numbers in [0, 1] - the probability that the two belong
            together and the ratio of the metadata functions used to the number of
            all metadata functions.
        * '+' - the metadata showed us that the two belong together for sure.
        * '-' - the metadata showed us that the two do not belong together for sure.

        Example:
            '(0.7, 0.4)' - 2 out of 5 functions managed to compare the bibrefrecs and
                using their computations the average value of 0.7 is returned.
            '-' - the two bibrefres are in the same paper, so they dont belong together
                for sure.
            '(1, 0)' There was insufficient metadata to compare the bibrefrecs. (The
                first values in ignored).
    '''

    metadata_comparison_print("")
    metadata_comparison_print("Started comparing %s vs %s"% (str(bibref1),str(bibref2)))
    # try first the metrics, which might return + or -
    papers = _compare_papers(bibref1, bibref2)
    if papers != '?':
        return papers

#    if bconfig.CFG_INSPIRE_SITE:
#        insp_ids = _compare_inspireid(bibref1, bibref2)
#        if insp_ids != '?':
#            return insp_ids

    results = []
    for func, weight, fname in cbrr_func_weight:
        r = func(bibref1,bibref2)
        assert r == '?' or (r <= 1 and r>=0), 'COMPARISON %s returned %s for %s' % (fname, str(r),str(len(results)))
        results.append((r, weight))

    total_weights = sum(res[1] for res in results)

    metadata_comparison_print("Final comparison vector: %s." % str(results))

    results = filter(lambda x: x[0] != '?', results)

    if not results:
        metadata_comparison_print("Final result: Skipped all tests, returning 0,0")
        return 0, 0

    cert = sum(starmap(mul, results))
    prob = sum(res[1] for res in results)
    vals =  cert / prob, prob / total_weights
    assert vals[0] >= 0 and vals[0] <= 1, 'COMPARISON: RETURNING VAL out of range'
    assert vals[1] >= 0 and vals[1] <= 1, 'COMPARISON: RETURNING PROB out of range'

    metadata_comparison_print("Final result: %s" % str(vals))

    return vals

示例#9

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def compare_bibrefrecs(bibref1, bibref2):
    """
    This function compares two bibrefrecs (100:123,456) using all metadata
    and returns:
        * a pair with two numbers in [0, 1] - the probability that the two belong
            together and the ratio of the metadata functions used to the number of
            all metadata functions.
        * '+' - the metadata showed us that the two belong together for sure.
        * '-' - the metadata showed us that the two do not belong together for sure.

        Example:
            '(0.7, 0.4)' - 2 out of 5 functions managed to compare the bibrefrecs and
                using their computations the average value of 0.7 is returned.
            '-' - the two bibrefres are in the same paper, so they dont belong together
                for sure.
            '(1, 0)' There was insufficient metadata to compare the bibrefrecs. (The
                first values in ignored).
    """

    metadata_comparison_print("")
    metadata_comparison_print("Started comparing %s vs %s" % (str(bibref1), str(bibref2)))
    # try first the metrics, which might return + or -
    papers = _compare_papers(bibref1, bibref2)
    if papers != "?":
        return papers

    #    if bconfig.CFG_INSPIRE_SITE:
    #        insp_ids = _compare_inspireid(bibref1, bibref2)
    #        if insp_ids != '?':
    #            return insp_ids

    results = list()
    for func, weight, fname in cbrr_func_weight:
        r = func(bibref1, bibref2)
        assert r == "?" or (r <= 1 and r >= 0), "COMPARISON %s returned %s for %s" % (fname, str(r), str(len(results)))
        results.append((r, weight))

    total_weights = sum(res[1] for res in results)

    metadata_comparison_print("Final comparison vector: %s." % str(results))

    results = filter(lambda x: x[0] != "?", results)

    if not results:
        metadata_comparison_print("Final result: Skipped all tests, returning 0,0")
        return (0, 0)

    cert = sum(starmap(mul, results))
    prob = sum(res[1] for res in results)
    vals = cert / prob, prob / total_weights
    assert vals[0] >= 0 and vals[0] <= 1, "COMPARISON: RETURNING VAL out of range"
    assert vals[1] >= 0 and vals[1] <= 1, "COMPARISON: RETURNING PROB out of range"

    metadata_comparison_print("Final result: %s" % str(vals))

    return vals

示例#10

0

显示文件

def _compare_unified_affiliations(bib1, bib2):
    metadata_comparison_print("Comparing unified affiliations.")

    aff1 = _find_affiliation(bib1)
    aff2 = _find_affiliation(bib2)

    ret = jaccard(aff1, aff2)

    metadata_comparison_print("Affiliations: %s %s %s", (str(aff1), str(aff2), str(ret)))
    return ret

示例#11

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_unified_affiliations(bib1, bib2):
    metadata_comparison_print("Comparing unified affiliations.")

    aff1 = _find_affiliation(bib1)
    aff2 = _find_affiliation(bib2)

    ret = jaccard(aff1, aff2)

    metadata_comparison_print("Affiliations: %s %s %s", (str(aff1), str(aff2), str(ret)))
    return ret

示例#12

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_collaboration(bib1, bib2):
    metadata_comparison_print("Comparing collaboration.")

    colls1 = _find_collaboration(bib1)
    colls2 = _find_collaboration(bib2)

    metadata_comparison_print("Found %d, %d different collaborations for the two sets." % (len(colls1), len(colls2)))
    if len(colls1) != 1 or len(colls2) != 1:
        return "?"
    elif colls1 == colls2:
        return 1.0
    else:
        return 0.0

示例#13

0

显示文件

def _compare_collaboration(bib1, bib2):
    metadata_comparison_print("Comparing collaboration.")

    colls1 = _find_collaboration(bib1)
    colls2 = _find_collaboration(bib2)

    metadata_comparison_print("Found %d, %d different collaborations for the two sets." % (len(colls1), len(colls2)))
    if (len(colls1) != 1 or
        len(colls2) != 1):
        return '?'
    elif colls1 == colls2:
        return 1.
    else:
        return 0.

示例#14

0

显示文件

def jaccard(set1, set2):
    '''
    This is no longer jaccard distance.
    '''
    metadata_comparison_print("Jaccard: Found %d items in the first set and %d in nthe second set" % (len(set1), len(set2)))

    if not set1 or not set2:
        return '?'

    match = len(set1 & set2)
    ret = match / float(len(set1) + len(set2) - match)

    metadata_comparison_print("Jaccard: %d common items; returning %f" % (match, ret))
    return ret

示例#15

0

显示文件

文件： bibauthorid_comparison.py 项目： pombredanne/invenio-op

def jaccard(set1, set2):
    '''
    This is no longer jaccard distance.
    '''
    metadata_comparison_print("Jaccard: Found %d items in the first set and %d in nthe second set" % (len(set1), len(set2)))

    if not set1 or not set2:
        return '?'

    match = len(set1 & set2)
    ret = match / float(len(set1) + len(set2) - match)

    metadata_comparison_print("Jaccard: %d common items; returning %f" % (match, ret))
    return ret

示例#16

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_email(bib1, bib2):
    metadata_comparison_print("Comparing email addresses.")

    iids1 = _find_email(bib1)
    iids2 = _find_email(bib2)

    metadata_comparison_print("Found %d, %d different email addresses for the two sets." % (len(iids1), len(iids2)))
    if len(iids1) != 1 or len(iids2) != 1:
        return "?"
    elif iids1 == iids2:
        metadata_comparison_print("The addresses are the same.")
        return 1.0
    else:
        metadata_comparison_print("The addresses are there, but different.")
        return 0.3

示例#17

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_inspireid(bib1, bib2):
    metadata_comparison_print("Comparing inspire ids.")

    iids1 = _find_inspireid(bib1)
    iids2 = _find_inspireid(bib2)

    metadata_comparison_print("Found %d, %d different inspire ids for the two sets." % (len(iids1), len(iids2)))
    if len(iids1) != 1 or len(iids2) != 1:
        return "?"

    elif iids1 == iids2:
        metadata_comparison_print("The ids are the same.")
        return 1
    else:
        metadata_comparison_print("The ids are different.")
        return 0

示例#18

0

显示文件

def _compare_email(bib1, bib2):
    metadata_comparison_print("Comparing email addresses.")

    iids1 = _find_email(bib1)
    iids2 = _find_email(bib2)

    metadata_comparison_print("Found %d, %d different email addresses for the two sets." % (len(iids1), len(iids2)))
    if (len(iids1) != 1 or
        len(iids2) != 1):
        return '?'
    elif iids1 == iids2:
        metadata_comparison_print("The addresses are the same.")
        return 1.0
    else:
        metadata_comparison_print("The addresses are there, but different.")
        return 0.3

示例#19

0

显示文件

def _compare_inspireid(bib1, bib2):
    metadata_comparison_print("Comparing inspire ids.")

    iids1 = _find_inspireid(bib1)
    iids2 = _find_inspireid(bib2)

    metadata_comparison_print("Found %d, %d different inspire ids for the two sets." % (len(iids1), len(iids2)))
    if (len(iids1) != 1 or
        len(iids2) != 1):
        return '?'

    elif iids1 == iids2:
        metadata_comparison_print("The ids are the same.")
        return 1
    else:
        metadata_comparison_print("The ids are different.")
        return 0

示例#20

0

显示文件

def _compare_names(bib1, bib2):
    metadata_comparison_print("Comparing names.")

    name1 = get_name_by_bibrecref(bib1)
    name2 = get_name_by_bibrecref(bib2)

    metadata_comparison_print(" Found %s and %s" % (name1,name2))
    if name1 and name2:
        cmpv = compare_names(name1, name2, False)
        metadata_comparison_print(" cmp(%s,%s) = %s" % (name1, name2, str(cmpv)))
        return cmpv
    return '?'

示例#21

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_names(bib1, bib2):
    metadata_comparison_print("Comparing names.")

    name1 = cached_get_name_by_bibrecref(bib1)
    name2 = cached_get_name_by_bibrecref(bib2)

    metadata_comparison_print(" Found %s and %s" % (name1, name2))
    if name1 and name2:
        cmpv = cached_compare_names(name1, name2)
        metadata_comparison_print(" cmp(%s,%s) = %s" % (name1, name2, str(cmpv)))
        return cmpv
    return "?"

示例#22

0

显示文件

文件： bibauthorid_comparison.py 项目： aw-bib/tind-invenio

def _compare_papers(bib1, bib2):
    metadata_comparison_print("Checking if the two bib refs are in the same paper...")
    if bib1[2] == bib2[2]:
        metadata_comparison_print("  ... Yes they are! Are you crazy, man?")
        return "-"
    return "?"