def test_confusion_matrix(caplog): """Test the confusion matrix.""" caplog.set_level(logging.INFO) # Synthesize candidates cand1 = Candidate(id=1, type="type") cand2 = Candidate(id=2, type="type") cand3 = Candidate(id=3, type="type") cand4 = Candidate(id=4, type="type") # pred and gold as set pred = {cand1, cand2, cand3} gold = {cand1, cand2, cand4} (TP, FP, FN) = confusion_matrix(pred, gold) assert TP == {cand1, cand2} assert FP == {cand3} assert FN == {cand4} # pred as list pred = [cand1, cand2, cand3] (TP, FP, FN) = confusion_matrix(pred, gold) assert TP == {cand1, cand2} assert FP == {cand3} assert FN == {cand4} # test if the order of elements does not affect the output pred = [cand3, cand2, cand1] (TP, FP, FN) = confusion_matrix(pred, gold) assert TP == {cand1, cand2} assert FP == {cand3} assert FN == {cand4} # Assume the followings are entities pred = {"1", "2", "3"} gold = {"1", "2", "4"} (TP, FP, FN) = confusion_matrix(pred, gold) assert TP == {"1", "2"} assert FP == {"3"} assert FN == {"4"}
def entity_level_f1(candidates, gold_file, corpus=None): """Checks entity-level recall of candidates compared to gold. Turns a CandidateSet into a normal set of entity-level tuples (doc, president_name, birthplace) then compares this to the entity-level tuples found in the gold. Example Usage: from hardware_utils import entity_level_total_recall candidates = # CandidateSet of all candidates you want to consider gold_file = 'tutorials/tables/data/hardware/hardware_gold.csv' entity_level_total_recall(candidates, gold_file, 'stg_temp_min') """ docs = [(doc.name).upper() for doc in corpus] if corpus else None gold_set = get_gold_dict(gold_file, docs=docs) if len(gold_set) == 0: print("Gold File: {gold_file}") print("Gold set is empty.") return # Turn CandidateSet into set of tuples print("Preparing candidates...") entities = set() for i, c in enumerate(tqdm(candidates)): doc = c[0].context.sentence.document.name.upper() president_name = c[0].context.get_span().upper() birthplace = c[1].context.get_span().upper() entities.add((doc, president_name, birthplace)) (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set) TP = len(TP_set) FP = len(FP_set) FN = len(FN_set) prec = TP / (TP + FP) if TP + FP > 0 else float("nan") rec = TP / (TP + FN) if TP + FN > 0 else float("nan") f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan") print("========================================") print("Scoring on Entity-Level Gold Data") print("========================================") print(f"Corpus Precision {prec:.3}") print(f"Corpus Recall {rec:.3}") print(f"Corpus F1 {f1:.3}") print("----------------------------------------") print(f"TP: {TP} | FP: {FP} | FN: {FN}") print("========================================\n") return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]
def entity_level_f1( candidates, gold_file, attribute=None, corpus=None, parts_by_doc=None ): """Checks entity-level recall of candidates compared to gold. Turns a CandidateSet into a normal set of entity-level tuples (doc, part, [attribute_value]) then compares this to the entity-level tuples found in the gold. Example Usage: from hardware_utils import entity_level_total_recall candidates = # CandidateSet of all candidates you want to consider gold_file = 'tutorials/tables/data/hardware/hardware_gold.csv' entity_level_total_recall(candidates, gold_file, 'stg_temp_min') """ docs = [(doc.name).upper() for doc in corpus] if corpus else None val_on = attribute is not None gold_set = get_gold_dict( gold_file, docs=docs, doc_on=True, part_on=True, val_on=val_on, attribute=attribute, ) if len(gold_set) == 0: logger.info(f"Gold File: {gold_file}\n Attribute: {attribute}") logger.error("Gold set is empty.") return # Turn CandidateSet into set of tuples logger.info("Preparing candidates...") entities = set() for i, c in enumerate(tqdm(candidates)): part = c[0].context.get_span() doc = c[0].context.sentence.document.name.upper() if attribute: val = c[1].context.get_span() for p in get_implied_parts(part, doc, parts_by_doc): if attribute: entities.add((doc, p, val)) else: entities.add((doc, p)) (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set) TP = len(TP_set) FP = len(FP_set) FN = len(FN_set) prec = TP / (TP + FP) if TP + FP > 0 else float("nan") rec = TP / (TP + FN) if TP + FN > 0 else float("nan") f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan") logger.info("========================================") logger.info("Scoring on Entity-Level Gold Data") logger.info("========================================") logger.info(f"Corpus Precision {prec:.3}") logger.info(f"Corpus Recall {rec:.3}") logger.info(f"Corpus F1 {f1:.3}") logger.info("----------------------------------------") logger.info(f"TP: {TP} | FP: {FP} | FN: {FN}") logger.info("========================================\n") return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]
def entity_level_f1( candidates, gold_file, attribute=None, corpus=None, stations_mapping_dict=None ): """Checks entity-level recall of candidates compared to gold. Turns a CandidateSet into a normal set of entity-level tuples (doc, part, [attribute_value]) then compares this to the entity-level tuples found in the gold. Example Usage: from electricity_utils import entity_level_f1 candidates = # CandidateSet of all candidates you want to consider gold_file = 'tutorials/tables/data/electricity/electricity_gold.csv' entity_level_f1(candidates, gold_file, 'elec_price_vol') """ docs = [(re.sub("Document ", "", doc.name)).upper() for doc in corpus] if corpus else None price_on = attribute is not None gold_set = get_gold_dict( gold_file, docs=docs, doc_on=True, station_on=True, price_on=price_on, attribute=attribute, stations_mapping_dict=stations_mapping_dict ) if len(gold_set) == 0: print(f"Gold File: {gold_file}\n Attribute: {attribute}") print("Gold set is empty.") return # Turn CandidateSet into set of tuples print("Preparing candidates...") entities = set() for i, c in enumerate(tqdm(candidates)): station = c[0].context.get_span().upper() doc = c[0].context.sentence.document.name.upper() price = c[1].context.get_span() # Account for all station abbrevations, as we do not consider the entity-linking problem (same entity with multiple identity descriptors) # We only take the entity by the name how it is represented in the gold_dict stations = stations_mapping_dict[station.lower()] if stations_mapping_dict != None else [station] added_any = False for station_abbr in stations: if (doc, station_abbr.upper(), price) in gold_set: entities.add((doc, station_abbr.upper(), price)) added_any = True if (not added_any): entities.add((doc, station, price)) (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set) TP = len(TP_set) FP = len(FP_set) FN = len(FN_set) prec = TP / (TP + FP) if TP + FP > 0 else float("nan") rec = TP / (TP + FN) if TP + FN > 0 else float("nan") f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan") print("========================================") print("Scoring on Entity-Level Gold Data") print("========================================") print(f"Corpus Precision {prec:.3}") print(f"Corpus Recall {rec:.3}") print(f"Corpus F1 {f1:.3}") print("----------------------------------------") print(f"TP: {TP} | FP: {FP} | FN: {FN}") print("========================================\n") return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]
def entity_level_f1(candidates, gold_file, attribute=None, corpus=None, row_on=True, col_on=False): """Checks entity-level recall of candidates compared to gold. Turns a CandidateSet into a normal set of entity-level tuples (doc, data, [attribute_value]) then compares this to the entity-level tuples found in the gold. """ docs = [(re.sub("Document ", "", doc.name)).upper() for doc in corpus] if corpus else None price_on = attribute is not None gold_set = get_gold_dict( gold_file, docs=docs, row_on=row_on, col_on=col_on, ) if len(gold_set) == 0: print(f"Gold File: {gold_file}\n Attribute: {attribute}") print("Gold set is empty.") return # Turn CandidateSet into set of tuples print("Preparing candidates...") entities = set() for i, c in enumerate(tqdm(candidates)): doc = (c[0].context.sentence.document.name).upper() data = (c[0].context.get_span()).upper() align = (c[1].context.get_span()).upper() if (row_on and col_on): align2 = (c[2].context.get_span()).upper() # Account for the multiple labels given with | separators (one match suffies) matches = [ x for x in gold_set if (x[0] == doc and data_matches_gold(data, x[1]) and align_matches_gold(align, x[2]) and (not (row_on and col_on) or align_matches_gold(align2, x[3]))) ] if (len(matches) > 0): for match in matches: align_complete = match[2] data = match[1] if (row_on and col_on): entities.add((doc, data, align_complete, match[3])) else: entities.add((doc, data, align_complete)) else: if (row_on and col_on): entities.add((doc, data, align, align2)) else: entities.add((doc, data, align)) (TP_set, FP_set, FN_set) = confusion_matrix(entities, gold_set) TP = len(TP_set) FP = len(FP_set) FN = len(FN_set) prec = TP / (TP + FP) if TP + FP > 0 else float("nan") rec = TP / (TP + FN) if TP + FN > 0 else float("nan") f1 = 2 * (prec * rec) / (prec + rec) if prec + rec > 0 else float("nan") print("========================================") print("Scoring on Entity-Level Gold Data") print("========================================") print(f"Corpus Precision {prec:.3}") print(f"Corpus Recall {rec:.3}") print(f"Corpus F1 {f1:.3}") print("----------------------------------------") print(f"TP: {TP} | FP: {FP} | FN: {FN}") print("========================================\n") return [sorted(list(x)) for x in [TP_set, FP_set, FN_set]]