Exemplo n.º 1
0
 def test_gloss_projection_unaligned(self):
     xc = xc_load(os.path.join(testfile_dir, "xigt/project_gloss_lang_tests.xml"))
     igt = xc[0]
     project_gloss_pos_to_lang(igt, tag_method=INTENT_POS_PROJ, unk_handling='keep')
     self.assertEqual('UNK', pos_tag_tier(igt, lang(igt).id, INTENT_POS_PROJ)[-1].value())
Exemplo n.º 2
0
def filter_xc(xc, require_lang=False, require_gloss=False, require_trans=False, require_aln=False, require_gloss_pos=False, require_grammatical=False, max_instances=0, prev_good_instances=0):

    new_corp = XigtCorpus()

    examined = 0
    failures = 0
    successes= 0

    my_filter = ''

    for inst in xc:
        examined += 1
        assert isinstance(inst, Igt)

        def fail(reason):
            nonlocal failures, my_filter
            my_filter = filter_string(inst).format("FAIL", '['+reason+']')
            failures += 1
            FILTER_LOG.info(my_filter)

        def success():
            nonlocal successes, my_filter
            my_filter = filter_string(inst).format("SUCCESS", "")
            successes += 1


        def trytier(f):
            try:
                result = f(inst)
            except (NoNormLineException) as nnle:
                return None
                fail("Bad Lines")
            else:
                return result


        lt = trytier(lang)
        gt = trytier(gloss)
        tt = trytier(trans)


        if require_lang  and lt is None:
            fail("LANG")
            continue
        if require_gloss and gt is None:
            fail("GLOSS")
            continue
        if require_trans and tt is None:
            fail("TRANS")
            continue
        if require_aln:

            if gt is None:
                fail("ALIGN-GLOSS")
                continue
            if lt is None:
                fail("ALIGN-LANG")
                continue

            try:
                word_align(gt, lt)
            except GlossLangAlignException:
                fail("ALIGN")
                continue

        if require_grammatical:
            if lt:
                grammatical_ll = [l for l in lang_lines(inst) if l.get_attribute(ODIN_JUDGMENT_ATTRIBUTE)]
            if gt:
                grammatical_gl = gloss_line(inst).get_attribute(ODIN_JUDGMENT_ATTRIBUTE)
            if tt:
                grammatical_tl = [l for l in trans_lines(inst) if l.get_attribute(ODIN_JUDGMENT_ATTRIBUTE)]

            if grammatical_ll or grammatical_gl or grammatical_tl:
                fail("UNGRAMMATICAL")
                continue



        if require_gloss_pos:
            if pos_tag_tier(inst, gt.id) is None:
                fail("GLOSS_POS")
                continue

        # Otherwise, attach to the new corpus.
        new_corp.append(inst)

        success()
        FILTER_LOG.info(my_filter)
        inst.sort_tiers()

        # -------------------------------------------
        # Break out of the loop if we've hit the maximum
        # number of good instances.
        # -------------------------------------------
        if max_instances and prev_good_instances+successes >= max_instances:
            break

    return new_corp, examined, successes, failures
Exemplo n.º 3
0
    def test_add_pos_tags(self):
        add_pos_tags(self.igt, 'gw', self.tags)

        self.assertEquals(tier_tokens(pos_tag_tier(self.igt, 'gw')), self.tags)