Exemplo n.º 1
0
def eval_proj(xc):
    prj_sents = []
    sup_sents = []

    for inst in xc:
        fix_ctn_gloss_line(inst, tag_method=INTENT_POS_PROJ)
        # Do the projection comparison
        sup = inst.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_MANUAL)
        prj = inst.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_PROJ)

        sup_tags = []
        prj_tags = []

        for s in sup:
            sup_tags.append(POSToken(s.value(), label=s.value()))
            # If the same tag occurs in the projections...
            if not prj:
                prj_tags.append(POSToken('UNALIGNED', label='UNALIGNED'))
                continue

            proj_tag = prj.find(alignment=s.attributes[ALIGNMENT])
            if proj_tag:
                prj_tags.append(POSToken(proj_tag.value(), label=proj_tag.value()))
            else:
                prj_tags.append(POSToken('UNALIGNED', label='UNALIGNED'))

        sup_sents.append(sup_tags)
        prj_sents.append(prj_tags)

    poseval(prj_sents, sup_sents, details=True)
Exemplo n.º 2
0
def eval_classifier(c, inst_list, context_feats=False, posdict=None):
    """

    :param c: The classifier
    :param inst_list: A list of Igt instances to test against. Must already have POS tags.
    """

    gold_sents = []
    eval_sents = []

    to_dump = XigtCorpus()

    for inst in inst_list:

        to_tag = inst.copy()
        strip_pos(to_tag)

        # Do the classification.
        to_tag.classify_gloss_pos(c, lowercase=True,
                                  feat_next_gram=context_feats,
                                  feat_prev_gram=context_feats,
                                  posdict=posdict)


        to_dump.append(to_tag)
        # Fix the tags...
        # fix_ctn_gloss_line(to_tag, tag_method=INTENT_POS_CLASS)

        # Now, retrieve eval/gold.
        eval_tags = [v.value() for v in to_tag.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_CLASS)]
        gold_tags = [v.value() for v in inst.get_pos_tags(GLOSS_WORD_ID, tag_method=INTENT_POS_MANUAL)]


        tag_tokens = [POSToken('a', label=l) for l in eval_tags]
        gold_tokens= [POSToken('a', label=l) for l in gold_tags]

        if not len(tag_tokens) == len(gold_tokens):
            print("LENGTH OF SEQUENCE IS MISMATCHED")
            continue

        gold_sents.append(gold_tokens)
        eval_sents.append(tag_tokens)


    xigtxml.dump(open('./enriched_ctn_dev.xml', 'w'), to_dump)
    return poseval(eval_sents, gold_sents, details=True,csv=True, matrix=True)