Exemplo n.º 1
0
def evaluate_classifier_on_instances(inst_list, classifier, feat_list, pos_class_matrix, gold_tagmap=None):
    """
    Given a list of instances, do the evaluation on them.

    :param inst_list:
    :param classifier:
    :param tagger:
    :return:
    """

    pd = load_posdict() if (CLASS_FEATS_DICT in feat_list) or (CLASS_FEATS_PDICT in feat_list) or (CLASS_FEATS_NDICT in feat_list) else False

    matches = 0
    compares = 0

    for inst in inst_list:
        sup_postier = gloss_tag_tier(inst, tag_method=INTENT_POS_MANUAL)
        if sup_postier is None:
            continue
        gw_tier = gloss(inst)
        classify_gloss_pos(inst, classifier,
                           posdict=pd,
                           feat_prev_gram=CLASS_FEATS_PRESW in feat_list,
                           feat_next_gram=CLASS_FEATS_NEXSW in feat_list,
                           feat_dict=CLASS_FEATS_DICT in feat_list,
                           feat_prev_gram_dict=CLASS_FEATS_PDICT in feat_list,
                           feat_next_gram_dict=CLASS_FEATS_NDICT in feat_list,
                           feat_suffix=CLASS_FEATS_SUF in feat_list,
                           feat_prefix=CLASS_FEATS_PRE in feat_list,
                           feat_morph_num=CLASS_FEATS_NUMSW in feat_list,
                           feat_has_number=CLASS_FEATS_NUM in feat_list,
                           feat_basic=CLASS_FEATS_SW in feat_list)


        cls_postier = gloss_tag_tier(inst, tag_method=INTENT_POS_CLASS)


        for cls_tag in cls_postier:
            word = xigt_find(gw_tier, id=cls_tag.alignment)
            sup_tag = xigt_find(sup_postier, alignment=cls_tag.alignment)

            if sup_tag is None:
                continue
            else:
                sup_tag_v = sup_tag.value()
                if gold_tagmap is not None:
                    sup_tag_v = gold_tagmap.get(sup_tag_v)

            pos_class_matrix.add(sup_tag_v, cls_tag.value())
            if cls_tag.value() == sup_tag_v:
                matches += 1
            compares += 1

    return matches, compares, matches/compares*100
Exemplo n.º 2
0
def gather_gloss_pos_stats(inst, subword_dict, feat_list):
    """
    Given an instance, look for the gloss pos tags, and save the statistics
    about them, so that we can filter by the number of times each kind was
    seen later.

    :param inst: Instance to process.
    :type inst: RGIgt
    :param subword_dict: This dictionary will record the number of times each (word, TAG)
                          pair has been seen.
    :type subword_dict: SubwordDict
    :param gram_tag_dict: This dictionary will record the number of times individual grams are seen.
    :type gram_tag_dict: TwoLevelCountDict
    """

    # Grab the gloss POS tier...
    gpos_tier = gloss_tag_tier(inst)
    lpos_tier = lang_tag_tier(inst)
    gw_tier = gloss(inst)

    if CLASS_FEATS_ALN in feat_list:
        heur_align_inst(inst)
        get_trans_glosses_alignment(inst, aln_method=INTENT_ALN_HEUR)

    # If there are POS tags on the language line but not the gloss line...
    if gpos_tier is None and lpos_tier is not None:
        add_gloss_lang_alignments(inst)
        project_lang_to_gloss(inst)
        gpos_tier = gloss_tag_tier(inst)


    # If this tier exists, then let's process it.
    if gpos_tier is not None:

        # Iterate over each gloss POS tag...
        for i, gw in enumerate(gw_tier):
            tag = xigt_find(inst, alignment=gw.id)

            if tag is None:
                continue

            prev_word = gw_tier[i-1].value().lower() if i > 0 else None
            next_word = gw_tier[i+1].value().lower() if i < len(gw_tier)-1 else None

            if CLASS_FEATS_ALN in feat_list:
                subword_dict.add_word_tag(gw.value().lower(), tag.value(), prev_word, next_word)
Exemplo n.º 3
0
 def test_proj_method(method):
     project_trans_pos_to_gloss(inst, aln_method=method)
     gtt = gloss_tag_tier(inst, tag_method=INTENT_POS_PROJ)
     self.assertIsNotNone(inst, gtt)
     self.assertEqual(get_intent_proj_aln_method(gtt), method)
Exemplo n.º 4
0
def evaluate_pos_projections_on_file(lang, xc, plma, pos_proj_matrix, tagger, gold_tagmap=None, trans_tagmap=None, outstream=sys.stdout):
    """
    :type plma: PerLangMethodAccuracies
    :type pos_proj_matrix: POSMatrix
    """
    new_xc = XigtCorpus(xc.id)
    for inst in xc:

        gtt = gloss_tag_tier(inst, INTENT_POS_MANUAL)
        ttt = trans_tag_tier(inst, INTENT_POS_MANUAL)
        m_aln = get_trans_gloss_alignment(inst, INTENT_ALN_MANUAL)

        # Only continue if we have manual gloss tags, trans tags, and manual alignment.
        if gtt is None or m_aln is None or ttt is None:
            continue

        # Get the heuristic alignment...
        h_aln = heur_align_inst(inst)

        # And tag the translation line.
        tag_trans_pos(inst, tagger=tagger)

        # Now, iterate through each alignment method and set of tags.
        for aln_method in [INTENT_ALN_MANUAL, INTENT_ALN_HEUR]:
            for trans_tag_method in [INTENT_POS_MANUAL, INTENT_POS_TAGGER]:
                project_trans_pos_to_gloss(inst, aln_method=aln_method, trans_tag_method=trans_tag_method)
                proj_gtt = gloss_tag_tier(inst, tag_method=INTENT_POS_PROJ)

                # Go through each word in the gloss line and, if it has a gold
                # tag, was it correct?
                matches = 0
                compares = 0
                for gw in gloss(inst):
                    gold_tag = xigt_find(gtt, alignment=gw.id)
                    proj_tag = xigt_find(proj_gtt, alignment=gw.id)

                    if gold_tag is not None:
                        gold_tag_v = gold_tag.value()

                        # Remap the tags if asked...
                        if gold_tagmap is not None:
                            try:
                                gold_tag_v = gold_tagmap.get(gold_tag_v)
                            except TagMapException:
                                pass

                        if proj_tag is None:
                            proj_str = '**UNK'
                        else:
                            proj_str = proj_tag.value()
                            if trans_tagmap is not None:
                                # Try to remap the tag, but keep it if it can't be remapped.
                                try:
                                    proj_str = trans_tagmap.get(proj_str)
                                except TagMapException:
                                    pass

                        pos_proj_matrix.add(gold_tag_v, proj_str)

                        if proj_tag is not None and proj_str == gold_tag_v:
                            matches += 1
                        compares += 1


                plma.add(lang, '{}:{}'.format(aln_method, trans_tag_method), matches, compares)

    outstream.write('{}\n'.format(plma))





    return new_xc