Пример #1
0
def concept_mismatch(all_amr_list, comparison_amr_list, concept):
    """Find cases where dev amr should have 'possibilty' but doesn't"""
    missing = list()
    spurious = list()
    correct = list()
    for comparison_amr in comparison_amr_list:
        matches = [
            base_tuple for base_tuple in all_amr_list
            if base_tuple[0] == comparison_amr[0]
        ]
        if len(matches) > 0:
            match = matches[0]
            match_amr = AMR.parse_AMR_line(match[2])
            match_nodes = match_amr.node_values
            comparison_nodes = AMR.parse_AMR_line(
                comparison_amr[2]).node_values
            id_number = int(comparison_amr[0].split('::')[0].split('.')[1])
            if concept in match_nodes and concept not in comparison_nodes:
                #missing.append(comparison_amr[0])
                missing.append((id_number, comparison_amr[1]))
            elif concept in comparison_nodes and concept not in match_nodes:
                #spurious.append(comparison_amr[0])
                spurious.append((id_number, comparison_amr[1]))
            else:
                #correct.append(comparison_amr[0])
                correct.append((id_number, comparison_amr[1]))
    return sorted(correct), sorted(missing), sorted(spurious)
Пример #2
0
def main(data):
    logging.basicConfig(level=logging.ERROR)
    logger = logging.getLogger(__name__)
    test = codecs.open(data.test, 'r', 'utf-8')
    gold = codecs.open(data.gold, 'r', 'utf-8')
    flag = False
    sema = Sema()
    while True:
        cur_amr1 = AMR.get_amr_line(test)
        cur_amr2 = AMR.get_amr_line(gold)

        if cur_amr1 == '' and cur_amr2 == '':
            break
        if cur_amr1 == '':
            logger.error('Error: File 1 has less AMRs than file 2')
            logger.error('Ignoring remaining AMRs')
            flag = True
            break
        if cur_amr2 == '':
            logger.error('Error: File 2 has less AMRs than file 1')
            logger.error('Ignoring remaining AMRs')
            flag = True
            break
        try:
            amr1 = AMR.parse_AMR_line(cur_amr1)
        except Exception as e:
            logger.error('Error in parsing amr 1: %s' % cur_amr1)
            logger.error(
                "Please check if the AMR is ill-formatted. Ignoring remaining AMRs"
            )
            logger.error("Error message: %s" % str(e))
            flag = True
            break
        try:
            amr2 = AMR.parse_AMR_line(cur_amr2)
        except Exception as e:
            logger.error("Error in parsing amr 2: %s" % cur_amr2)
            logger.error(
                "Please check if the AMR is ill-formatted. Ignoring remaining AMRs"
            )
            logger.error("Error message: %s" % str(e))
            flag = True
            break
        prefix_test = 'a'
        prefix_gold = 'b'
        amr1.rename_node(prefix_test)
        amr2.rename_node(prefix_gold)
        sema.compute_sema(amr1, amr2)
    if not flag:
        precision, recall, f1 = sema.get_sema_value()
        print(f'SEMA: P {precision:.2f} R {recall:.2f} F1 {f1:.2f}')
Пример #3
0
def score_amr_pair(ref_amr_line, rec_amr_line, restart_num, justinstance=False, 
                   justattribute=False, justrelation=False):

    # parse lines
    amr1 = AMR.parse_AMR_line(ref_amr_line)
    amr2 = AMR.parse_AMR_line(rec_amr_line)

    # Fix prefix
    prefix1 = "a"
    prefix2 = "b"
    # Rename node to "a1", "a2", .etc
    amr1.rename_node(prefix1)
    # Renaming node to "b1", "b2", .etc
    amr2.rename_node(prefix2)

    # get triples
    (instance1, attributes1, relation1) = amr1.get_triples()
    (instance2, attributes2, relation2) = amr2.get_triples()

    # optionally turn off some of the node comparison
    doinstance = doattribute = dorelation = True
    if justinstance:
        doattribute = dorelation = False
    if justattribute:
        doinstance = dorelation = False
    if justrelation:
        doinstance = doattribute = False

    (best_mapping, best_match_num) = smatch.get_best_match(
        instance1, attributes1, relation1,
        instance2, attributes2, relation2,
        prefix1, prefix2, 
        restart_num,
        doinstance=doinstance,
        doattribute=doattribute,
        dorelation=dorelation
    )

    if justinstance:
        test_triple_num = len(instance1)
        gold_triple_num = len(instance2)
    elif justattribute:
        test_triple_num = len(attributes1)
        gold_triple_num = len(attributes2)
    elif justrelation:
        test_triple_num = len(relation1)
        gold_triple_num = len(relation2)
    else:
        test_triple_num = len(instance1) + len(attributes1) + len(relation1)
        gold_triple_num = len(instance2) + len(attributes2) + len(relation2)
    return best_match_num, test_triple_num, gold_triple_num
Пример #4
0
def get_named_entities(all_amr_file=None):
    """Get all the named entities
    Inputs:
        amr_file: file with all the AMRs
    Returns:
        list of (id, snt, amr) tuples
    """
    if all_amr_file is None:
        all_amr_file = GOLD_AMRS
    match_amrs = list()  #(id,snt)
    comments_and_amrs = read_amrz(all_amr_file)  #(comment_list, amr_list)
    comments = comments_and_amrs[0]  #{'snt','id'}
    amrs = comments_and_amrs[1]
    for i in range(len(amrs)):
        amr_graph = AMR.parse_AMR_line(amrs[i])
        # amr_evaluation var2concept
        v2c = {}
        for n, v in zip(amr_graph.nodes, amr_graph.node_values):
            v2c[n] = v
        # print(v2c)
        # I don't know why we need these indices but we do
        triples = [t for t in amr_graph.get_triples()[1]]
        triples.extend([t for t in amr_graph.get_triples()[2]])
        #print(triples)
        # named_ent(v2c, triples)
        named_entities = [
            str(v2c[v1]) for (l, v1, v2) in triples if l == "name"
        ]
        print(named_entities)
Пример #5
0
def _smatch(cur_amr1, cur_amr2, n_iter):
    clear_match_triple_dict()

    amr1 = AMR.parse_AMR_line(cur_amr1)
    amr2 = AMR.parse_AMR_line(cur_amr2)
    prefix1 = "a"
    prefix2 = "b"

    amr1.rename_node(prefix1)
    amr2.rename_node(prefix2)
    instance1, attributes1, relation1 = amr1.get_triples()
    instance2, attributes2, relation2 = amr2.get_triples()

    best_mapping, best_match_num = get_best_match(instance1, attributes1,
                                                  relation1, instance2,
                                                  attributes2, relation2,
                                                  prefix1, prefix2)

    test_triple_num = len(instance1) + len(attributes1) + len(relation1)
    gold_triple_num = len(instance2) + len(attributes2) + len(relation2)
    return best_match_num, test_triple_num, gold_triple_num
Пример #6
0
def get_amrs_with_concept(concept, all_amr_file=None):
    """Get the IDs of all AMRs with 'possible' concept
    Inputs:
        amr_file: file with all the AMRs
    Returns:
        list of (id, snt, amr) tuples
    """
    if all_amr_file is None:
        all_amr_file = GOLD_AMRS
    match_amrs = list()  #(id,snt)
    comments_and_amrs = read_amrz(all_amr_file)  #(comment_list, amr_list)
    comments = comments_and_amrs[0]  #{'snt','id'}
    amrs = comments_and_amrs[1]
    for i in range(len(amrs)):
        amr_graph = AMR.parse_AMR_line(amrs[i])
        node_values = amr_graph.node_values
        if concept in node_values:
            match_amrs.append((comments[i]['id'], comments[i]['snt'], amrs[i]))
            #possible_ids.append((comments[i]['id'].encode('utf8'),comments[i]['snt'].encode('utf8'),amrs[i].encode('utf8')))
    print("Total number of AMRs with '{}': {}".format(concept,
                                                      len(match_amrs)))
    return sorted(match_amrs,
                  key=lambda x: int(x[0].split(' ')[0].split('.')[1])
                  )  #sort by id number
Пример #7
0
def compute_subscores(pred, gold):
    inters = defaultdict(int)
    golds = defaultdict(int)
    preds = defaultdict(int)
    # Loop through all entries
    for amr_pred, amr_gold in zip(pred, gold):
        # Create the predicted data
        amr_pred = AMR.parse_AMR_line(amr_pred.replace("\n", ""))
        if amr_pred is None:
            logger.error('Empty amr_pred entry')
            continue
        dict_pred = var2concept(amr_pred)
        triples_pred = [t for t in amr_pred.get_triples()[1]]
        triples_pred.extend([t for t in amr_pred.get_triples()[2]])
        # Create the gold data
        amr_gold = AMR.parse_AMR_line(amr_gold.replace("\n", ""))
        if amr_gold is None:
            logger.error('Empty amr_gold entry')
            continue
        dict_gold = var2concept(amr_gold)
        triples_gold = [t for t in amr_gold.get_triples()[1]]
        triples_gold.extend([t for t in amr_gold.get_triples()[2]])
        # Non_sense_frames scores
        list_pred = non_sense_frames(dict_pred)
        list_gold = non_sense_frames(dict_gold)
        inters["Non_sense_frames"] += len(
            list(set(list_pred) & set(list_gold)))
        preds["Non_sense_frames"] += len(set(list_pred))
        golds["Non_sense_frames"] += len(set(list_gold))
        # Wikification scores
        list_pred = wikification(triples_pred)
        list_gold = wikification(triples_gold)
        inters["Wikification"] += len(list(set(list_pred) & set(list_gold)))
        preds["Wikification"] += len(set(list_pred))
        golds["Wikification"] += len(set(list_gold))
        # Named entity scores
        list_pred = namedent(dict_pred, triples_pred)
        list_gold = namedent(dict_gold, triples_gold)
        inters["Named Ent."] += len(list(set(list_pred) & set(list_gold)))
        preds["Named Ent."] += len(set(list_pred))
        golds["Named Ent."] += len(set(list_gold))
        # Negation scores
        list_pred = negations(dict_pred, triples_pred)
        list_gold = negations(dict_gold, triples_gold)
        inters["Negations"] += len(list(set(list_pred) & set(list_gold)))
        preds["Negations"] += len(set(list_pred))
        golds["Negations"] += len(set(list_gold))
        # Ignore Vars scores
        list_pred = everything(dict_pred, triples_pred)
        list_gold = everything(dict_gold, triples_gold)
        inters["IgnoreVars"] += len(list(set(list_pred) & set(list_gold)))
        preds["IgnoreVars"] += len(set(list_pred))
        golds["IgnoreVars"] += len(set(list_gold))
        # Concepts scores
        list_pred = concepts(dict_pred)
        list_gold = concepts(dict_gold)
        inters["Concepts"] += len(list(set(list_pred) & set(list_gold)))
        preds["Concepts"] += len(set(list_pred))
        golds["Concepts"] += len(set(list_gold))
        # Frames scores
        list_pred = frames(dict_pred)
        list_gold = frames(dict_gold)
        inters["Frames"] += len(list(set(list_pred) & set(list_gold)))
        preds["Frames"] += len(set(list_pred))
        golds["Frames"] += len(set(list_gold))
    # Create the return dictionary
    rdict = OrderedDict()
    for score in preds:
        pr = 0 if preds[score] <= 0 else inters[score] / float(preds[score])
        rc = 0 if golds[score] <= 0 else inters[score] / float(golds[score])
        f = 0 if pr + rc <= 0 else 2 * (pr * rc) / (pr + rc)
        rdict[score] = (pr, rc, f)
    return rdict