def set_bilingual_align_test(self): """ Set the bilingual alignment manually, and ensure that it is read back correctly. """ a = Alignment([(1,1),(1,2),(2,8),(4,3),(5,7),(6,5)]) set_bilingual_alignment(self.igt, trans(self.igt), glosses(self.igt), a, INTENT_ALN_MANUAL) get_trans_glosses_alignment(self.igt, INTENT_ALN_MANUAL)
def gather_gloss_pos_stats(inst, subword_dict, feat_list): """ Given an instance, look for the gloss pos tags, and save the statistics about them, so that we can filter by the number of times each kind was seen later. :param inst: Instance to process. :type inst: RGIgt :param subword_dict: This dictionary will record the number of times each (word, TAG) pair has been seen. :type subword_dict: SubwordDict :param gram_tag_dict: This dictionary will record the number of times individual grams are seen. :type gram_tag_dict: TwoLevelCountDict """ # Grab the gloss POS tier... gpos_tier = gloss_tag_tier(inst) lpos_tier = lang_tag_tier(inst) gw_tier = gloss(inst) if CLASS_FEATS_ALN in feat_list: heur_align_inst(inst) get_trans_glosses_alignment(inst, aln_method=INTENT_ALN_HEUR) # If there are POS tags on the language line but not the gloss line... if gpos_tier is None and lpos_tier is not None: add_gloss_lang_alignments(inst) project_lang_to_gloss(inst) gpos_tier = gloss_tag_tier(inst) # If this tier exists, then let's process it. if gpos_tier is not None: # Iterate over each gloss POS tag... for i, gw in enumerate(gw_tier): tag = xigt_find(inst, alignment=gw.id) if tag is None: continue prev_word = gw_tier[i-1].value().lower() if i > 0 else None next_word = gw_tier[i+1].value().lower() if i < len(gw_tier)-1 else None if CLASS_FEATS_ALN in feat_list: subword_dict.add_word_tag(gw.value().lower(), tag.value(), prev_word, next_word)
def giza_align_test(self): new_c = copy_xigt(self.xc) giza_align_t_g(new_c) giza_aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_GIZA) print(giza_aln) self.assertEqual(giza_aln, Alignment([(5, 7), (1, 1), (4, 3), (6, 5)]))
def heur_align_test(self): new_c = copy_xigt(self.xc) heur_align_corp(new_c) aln = get_trans_glosses_alignment(new_c[0], aln_method=INTENT_ALN_HEUR) a = Alignment([(5, 7), (6, 5), (1, 1), (4, 3)]) self.assertEquals(a, aln)