示例#1
0
def get_author_year_score_for_input_fields(result_record, hypothesis):
    """
    returns evidences based on just author and year.

    For most sources, you should rather use get_basic_score_for_input_fields
    -- see there for more information.
    
    :param result_record: 
    :param hypothesis: 
    :return: 
    """
    input_fields = hypothesis.get_detail('input_fields')

    evidences = Evidences()

    normalized_authors = hypothesis.get_detail('normalized_authors')
    if normalized_authors is None:
        normalized_authors = normalize_author_list(
            input_fields.get('author', ''))

    add_author_evidence(evidences,
                        normalized_authors,
                        result_record['author_norm'],
                        result_record['first_author_norm'],
                        has_etal=hypothesis.get_detail('has_etal'))

    add_year_evidence(evidences, input_fields.get('year'),
                      result_record.get('year'))

    return evidences
示例#2
0
def get_author_year_score_for_input_fields(result_record, hypothesis):
    """
    returns evidences based on just author and year.

    :param result_record:
    :param hypothesis: 
    :return: 
    """
    input_fields = hypothesis.get_detail('input_fields')

    normalized_authors = hypothesis.get_detail('normalized_authors')
    if normalized_authors is None:
        normalized_authors = normalize_author_list(
            input_fields.get('author', ''))

    evidences = Evidences()

    add_author_evidence(evidences,
                        normalized_authors,
                        result_record['author_norm'],
                        result_record['first_author_norm'],
                        has_etal=hypothesis.get_detail('has_etal'))

    add_year_evidence(evidences, input_fields.get('year'),
                      result_record.get('year'))

    return evidences
示例#3
0
def get_thesis_score_for_input_fields(result_record, hypothesis):
    """
    returns Evidences for result_record being some sort of thesis matching
    hypothesis.

    This involves matching of author (including, to some extent, an initial),
    and year.

    We could try to match institutions, but for now we don't.

    :param result_record:
    :param hypothesis:
    :return:
    """
    evidences = Evidences()

    # Theses should only have one author
    if len(result_record["author_norm"]) > 1:
        evidences.add_evidence(-0.1, "thesis with multiple authors?")

    input_fields = hypothesis.get_detail("input_fields")

    # compare authors manually to have initials included.
    ref_last, ref_first_init = re.sub(
        r"[\s.]", "",
        hypothesis.get_detail("normalized_authors")).lower().split(",")
    ref_first_init = ref_first_init[0]
    ads_last, ads_first_init = re.sub(
        r"[\s.]", "", result_record["author_norm"][0].lower()).split(",")

    if ref_last == ads_last and ref_first_init == ads_first_init:
        evidences.add_evidence(current_app.config["EVIDENCE_SCORE_RANGE"][1],
                               "author")
    else:
        evidences.add_evidence(current_app.config["EVIDENCE_SCORE_RANGE"][0],
                               "author")

    add_year_evidence(evidences, input_fields.get('year'),
                      result_record.get('year'))

    if has_thesis_indicators(result_record["pub_raw"]):
        evidences.add_evidence(current_app.config["EVIDENCE_SCORE_RANGE"][1],
                               "thesisString")
    else:
        evidences.add_evidence(current_app.config["EVIDENCE_SCORE_RANGE"][0],
                               "thesisString")
    # XXX TODO: When we have pub_raw, we could also check for places;
    # ideally, there would be -1 for a place in refstring not in ADS
    # and +1 for a place in ADS that's also in the refstring.  We'd
    # need a list of places then, though.  I'd have that as a seperate
    # evidence.

    return evidences
示例#4
0
def get_thesis_score_for_input_fields(result_record, hypothesis):
    """
    returns Evidences for result_record being some sort of thesis matching
    hypothesis.

    This involves matching of author (including, to some extent, an initial),
    and year.

    We could try to match institutions, but for now we don't.

    :param result_record:
    :param hypothesis:
    :return:
    """
    evidences = Evidences()

    # consider only thesis records
    if result_record["doctype"] in ["phdthesis", "mastersthesis"]:
        evidences.add_evidence(current_app.config["EVIDENCE_SCORE_RANGE"][1],
                               "doctype")
    else:
        evidences.add_evidence(current_app.config["EVIDENCE_SCORE_RANGE"][0],
                               "doctype")

    input_fields = hypothesis.get_detail("input_fields")

    # consider number of authors
    if len(result_record["author_norm"]) == 1:
        # compare authors manually to have initials included.
        ref_lastname, ref_first_init = re.sub(
            r"[\s.]", "",
            hypothesis.get_detail("normalized_authors")).lower().split(",")
        ads_lastname, ads_first_init = re.sub(
            r"[\s.]", "", result_record["author_norm"][0].lower()).split(",")
        # lastname match is worth 0.7, first inital 0.3
        author_score = int(ref_lastname==ads_lastname) * current_app.config["EVIDENCE_SCORE_RANGE"][1] * 0.7 + \
                       int(ref_first_init==ads_first_init) * current_app.config["EVIDENCE_SCORE_RANGE"][1] * 0.3
    else:
        author_score = current_app.config["EVIDENCE_SCORE_RANGE"][0]
    evidences.add_evidence(author_score, "author")

    add_year_evidence(evidences, input_fields.get('year'),
                      result_record.get('year'))

    # count how many words of affiliation is in reference string
    ref_str = input_fields.get('refstr')
    aff_raw = ' '.join(result_record["aff_raw"]).split()
    aff_score = sum([1.0
                     for word in aff_raw if word in ref_str]) / len(aff_raw)
    evidences.add_evidence(aff_score, "affiliation")

    return evidences