示例#1
0
def test_confusion_matrix(caplog):
    """Test the confusion matrix."""
    caplog.set_level(logging.INFO)

    # Synthesize candidates
    cand1 = Candidate(id=1, type="type")
    cand2 = Candidate(id=2, type="type")
    cand3 = Candidate(id=3, type="type")
    cand4 = Candidate(id=4, type="type")

    # pred and gold as set
    pred = {cand1, cand2, cand3}
    gold = {cand1, cand2, cand4}
    (TP, FP, FN) = confusion_matrix(pred, gold)

    assert TP == {cand1, cand2}
    assert FP == {cand3}
    assert FN == {cand4}

    # pred as list
    pred = [cand1, cand2, cand3]
    (TP, FP, FN) = confusion_matrix(pred, gold)

    assert TP == {cand1, cand2}
    assert FP == {cand3}
    assert FN == {cand4}

    # test if the order of elements does not affect the output
    pred = [cand3, cand2, cand1]
    (TP, FP, FN) = confusion_matrix(pred, gold)

    assert TP == {cand1, cand2}
    assert FP == {cand3}
    assert FN == {cand4}

    # Assume the followings are entities
    pred = {"1", "2", "3"}
    gold = {"1", "2", "4"}
    (TP, FP, FN) = confusion_matrix(pred, gold)

    assert TP == {"1", "2"}
    assert FP == {"3"}
    assert FN == {"4"}
示例#2
0
def entity_level_f1(candidates, gold_file, corpus=None):
    """Checks entity-level recall of candidates compared to gold.

    Turns a CandidateSet into a normal set of entity-level tuples
    (doc, president_name, birthplace)
    then compares this to the entity-level tuples found in the gold.

    Example Usage:
        from hardware_utils import entity_level_total_recall
        candidates = # CandidateSet of all candidates you want to consider
        gold_file = 'tutorials/tables/data/hardware/hardware_gold.csv'
        entity_level_total_recall(candidates, gold_file, 'stg_temp_min')
    """
    docs = [(doc.name).upper() for doc in corpus] if corpus else None
    gold_set = get_gold_dict(gold_file, docs=docs)
    if len(gold_set) == 0:
        print("Gold File: {gold_file}")
        print("Gold set is empty.")
        return
    # Turn CandidateSet into set of tuples
    print("Preparing candidates...")
    entities = set()
    for i, c in enumerate(tqdm(candidates)):
        doc = c[0].context.sentence.document.name.upper()
        president_name = c[0].context.get_span().upper()
        birthplace = c[1].context.get_span().upper()
        entities.add((doc, president_name, birthplace))

    (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set)
    TP = len(TP_set)
    FP = len(FP_set)
    FN = len(FN_set)

    prec = TP / (TP + FP) if TP + FP > 0 else float("nan")
    rec = TP / (TP + FN) if TP + FN > 0 else float("nan")
    f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan")
    print("========================================")
    print("Scoring on Entity-Level Gold Data")
    print("========================================")
    print(f"Corpus Precision {prec:.3}")
    print(f"Corpus Recall    {rec:.3}")
    print(f"Corpus F1        {f1:.3}")
    print("----------------------------------------")
    print(f"TP: {TP} | FP: {FP} | FN: {FN}")
    print("========================================\n")
    return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]
示例#3
0
def entity_level_f1(
    candidates, gold_file, attribute=None, corpus=None, parts_by_doc=None
):
    """Checks entity-level recall of candidates compared to gold.

    Turns a CandidateSet into a normal set of entity-level tuples
    (doc, part, [attribute_value])
    then compares this to the entity-level tuples found in the gold.

    Example Usage:
        from hardware_utils import entity_level_total_recall
        candidates = # CandidateSet of all candidates you want to consider
        gold_file = 'tutorials/tables/data/hardware/hardware_gold.csv'
        entity_level_total_recall(candidates, gold_file, 'stg_temp_min')
    """
    docs = [(doc.name).upper() for doc in corpus] if corpus else None
    val_on = attribute is not None
    gold_set = get_gold_dict(
        gold_file,
        docs=docs,
        doc_on=True,
        part_on=True,
        val_on=val_on,
        attribute=attribute,
    )
    if len(gold_set) == 0:
        logger.info(f"Gold File: {gold_file}\n Attribute: {attribute}")
        logger.error("Gold set is empty.")
        return
    # Turn CandidateSet into set of tuples
    logger.info("Preparing candidates...")
    entities = set()
    for i, c in enumerate(tqdm(candidates)):
        part = c[0].context.get_span()
        doc = c[0].context.sentence.document.name.upper()
        if attribute:
            val = c[1].context.get_span()
        for p in get_implied_parts(part, doc, parts_by_doc):
            if attribute:
                entities.add((doc, p, val))
            else:
                entities.add((doc, p))

    (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set)
    TP = len(TP_set)
    FP = len(FP_set)
    FN = len(FN_set)

    prec = TP / (TP + FP) if TP + FP > 0 else float("nan")
    rec = TP / (TP + FN) if TP + FN > 0 else float("nan")
    f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan")
    logger.info("========================================")
    logger.info("Scoring on Entity-Level Gold Data")
    logger.info("========================================")
    logger.info(f"Corpus Precision {prec:.3}")
    logger.info(f"Corpus Recall    {rec:.3}")
    logger.info(f"Corpus F1        {f1:.3}")
    logger.info("----------------------------------------")
    logger.info(f"TP: {TP} | FP: {FP} | FN: {FN}")
    logger.info("========================================\n")
    return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]
def entity_level_f1(
    candidates, gold_file, attribute=None, corpus=None, stations_mapping_dict=None
):
    """Checks entity-level recall of candidates compared to gold.

    Turns a CandidateSet into a normal set of entity-level tuples
    (doc, part, [attribute_value])
    then compares this to the entity-level tuples found in the gold.

    Example Usage:
        from electricity_utils import entity_level_f1
        candidates = # CandidateSet of all candidates you want to consider
        gold_file = 'tutorials/tables/data/electricity/electricity_gold.csv'
        entity_level_f1(candidates, gold_file, 'elec_price_vol')
    """
    docs = [(re.sub("Document ", "", doc.name)).upper() for doc in corpus] if corpus else None
    price_on = attribute is not None
    gold_set = get_gold_dict(
        gold_file,
        docs=docs,
        doc_on=True,
        station_on=True,
        price_on=price_on,
        attribute=attribute,
        stations_mapping_dict=stations_mapping_dict
    )
    if len(gold_set) == 0:
        print(f"Gold File: {gold_file}\n Attribute: {attribute}")
        print("Gold set is empty.")
        return
    # Turn CandidateSet into set of tuples
    print("Preparing candidates...")
    entities = set()
    for i, c in enumerate(tqdm(candidates)):
        station = c[0].context.get_span().upper()
        doc = c[0].context.sentence.document.name.upper()
        price = c[1].context.get_span()

        # Account for all station abbrevations, as we do not consider the entity-linking problem (same entity with multiple identity descriptors)
        # We only take the entity by the name how it is represented in the gold_dict
        stations = stations_mapping_dict[station.lower()] if stations_mapping_dict != None else [station]
        added_any = False
        for station_abbr in stations:
            if (doc, station_abbr.upper(), price) in gold_set:
                entities.add((doc, station_abbr.upper(), price))
                added_any = True
        if (not added_any):
            entities.add((doc, station, price))
    
    (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set)
    TP = len(TP_set)
    FP = len(FP_set)
    FN = len(FN_set)

    prec = TP / (TP + FP) if TP + FP > 0 else float("nan")
    rec = TP / (TP + FN) if TP + FN > 0 else float("nan")
    f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan")
    print("========================================")
    print("Scoring on Entity-Level Gold Data")
    print("========================================")
    print(f"Corpus Precision {prec:.3}")
    print(f"Corpus Recall    {rec:.3}")
    print(f"Corpus F1        {f1:.3}")
    print("----------------------------------------")
    print(f"TP: {TP} | FP: {FP} | FN: {FN}")
    print("========================================\n")
    return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]
def entity_level_f1(candidates,
                    gold_file,
                    attribute=None,
                    corpus=None,
                    row_on=True,
                    col_on=False):
    """Checks entity-level recall of candidates compared to gold.

    Turns a CandidateSet into a normal set of entity-level tuples
    (doc, data, [attribute_value])
    then compares this to the entity-level tuples found in the gold.
    """
    docs = [(re.sub("Document ", "", doc.name)).upper()
            for doc in corpus] if corpus else None
    price_on = attribute is not None
    gold_set = get_gold_dict(
        gold_file,
        docs=docs,
        row_on=row_on,
        col_on=col_on,
    )
    if len(gold_set) == 0:
        print(f"Gold File: {gold_file}\n Attribute: {attribute}")
        print("Gold set is empty.")
        return
    # Turn CandidateSet into set of tuples
    print("Preparing candidates...")
    entities = set()
    for i, c in enumerate(tqdm(candidates)):
        doc = (c[0].context.sentence.document.name).upper()
        data = (c[0].context.get_span()).upper()
        align = (c[1].context.get_span()).upper()
        if (row_on and col_on):
            align2 = (c[2].context.get_span()).upper()

        # Account for the multiple labels given with | separators (one match suffies)
        matches = [
            x for x in gold_set
            if (x[0] == doc and data_matches_gold(data, x[1])
                and align_matches_gold(align, x[2]) and
                (not (row_on and col_on) or align_matches_gold(align2, x[3])))
        ]
        if (len(matches) > 0):
            for match in matches:
                align_complete = match[2]
                data = match[1]
                if (row_on and col_on):
                    entities.add((doc, data, align_complete, match[3]))
                else:
                    entities.add((doc, data, align_complete))
        else:
            if (row_on and col_on):
                entities.add((doc, data, align, align2))
            else:
                entities.add((doc, data, align))

    (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set)
    TP = len(TP_set)
    FP = len(FP_set)
    FN = len(FN_set)

    prec = TP / (TP + FP) if TP + FP > 0 else float("nan")
    rec = TP / (TP + FN) if TP + FN > 0 else float("nan")
    f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan")
    print("========================================")
    print("Scoring on Entity-Level Gold Data")
    print("========================================")
    print(f"Corpus Precision {prec:.3}")
    print(f"Corpus Recall    {rec:.3}")
    print(f"Corpus F1        {f1:.3}")
    print("----------------------------------------")
    print(f"TP: {TP} | FP: {FP} | FN: {FN}")
    print("========================================\n")
    return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]