def ordering_test(self): """ This particular tree structure results in changing a child of a tree while iterating through the children and required a fix such that if such a change is detected, we start iterating over the children again, so we're not holding onto a stale pointer. """ src_t = IdTree.fromstring('''(ROOT (FRAG (ADVP (RB Probably)) (SBAR (S (NP (PRP you)) (VP (VBP find) (ADJP (JJ something)) ) ) ) ))''') tgt_t = IdTree.fromstring('''(ROOT (FRAG (VBP chitt-u-m) (ADVP (RB hola)) (UNK ni) (UNK hou) (VBP chitt-u-m) ))''') tgt_w = create_words_tier_from_string('''chitt-u-m hola ni hou chitt-u-m''') aln = Alignment([(1,2),(3,1),(3,5)]) proj = project_ps(src_t, tgt_w, aln) self.assertTrue(tgt_t.similar(proj))
def test_projection(self): proj = project_ps(self.t, create_words_tier_from_string("rhoddodd yr athro lyfr i'r bachgen ddoe"), self.aln) # Reassign the ids after everything has moved around. proj.assign_ids() self.assertEqual(self.proj, proj)
def test_projection_2(self): ds3 = DepTree.fromstring(self.ds3str) # English sentence: # 1 2 3 4 5 # "Tomorrow Mary will meet Hans" # # Den Hans wird Maria morgen treffen # 1 2 3 4 5 6 aln = Alignment([(1,5),(2,4),(3,3),(4,6),(5,2)]) tgt_w = create_words_tier_from_string("Den Hans wird Maria morgen treffen") ds_proj = project_ds(ds3, tgt_w, aln) exp_proj = DepTree.fromstring(""" (ROOT[0] (treffen[6] (Hans[2] (Den[1])) (wird[3]) (Maria[4]) (morgen[5]) )) """, stype=DEPSTR_PTB) self.assertTrue(ds_proj.structurally_eq(exp_proj))
def ctn_merge_2_test(self): src_t = IdTree.fromstring('''(ROOT (UCP (S (NP (PRP They)) (VP (VBP are) (RB also) (ADJP (RB too) (JJ lazy) (S (VP (TO to) (VP (VB take) (NP (PRP it)) (ADVP (RB out,)))))))) (CC and) (SBAR (IN so) (S (NP (PRP they)) (VP (VBP do) (RB not) (VP (VB drink) (NP (NN it.))))))))''') tgt_w = create_words_tier_from_string('loĩs-ma yaŋ hunci-suma kat-a-ŋs-e kina u-tus-u-kV-nɨŋ') aln = Alignment([(16, 6), (3, 2), (7, 1), (15, 3), (9, 3), (11, 3), (12, 6), (14, 6), (13, 6), (4, 3), (5, 3)]) proj = project_ps(src_t, tgt_w, aln) self.assertEqual(len(proj.leaves()), 6)
def failed_insertion_test(self): t = IdTree.fromstring('''(ROOT (SBARQ (WHNP (WDT What) (NP (NN kind) (PP (IN of) (NP (NNP work,))))) (SQ (VP (VBZ then?)))))''') tgt_w = create_words_tier_from_string('kam-a na them lis-no-kha hou') aln = Alignment([(1, 3), (2, 5), (4, 1), (5, 5)]) project_ps(t, tgt_w, aln)
def test_duplicates(self): """ Test the case where an English word aligns to multiple language words. """ src_t = IdTree.fromstring('(ROOT (SBARQ (WHNP (WP Who)) (SQ (VP (VBZ else?)))))') tgt_w = create_words_tier_from_string('sa-lo sa-lo') tgt_t = IdTree.fromstring('(ROOT (SBARQ (WHNP (WP sa-lo) (WP sa-lo))))') aln = Alignment([(1,1),(1,2)]) result = project_ps(src_t, tgt_w, aln) self.assertTrue(tgt_t.similar(result))
def test_projection_1(self): """ Testcase for the DS projection in Fei/Will's paper. """ ds1 = DepTree.fromstring(self.ds1str) ds2 = DepTree.fromstring(self.ds2str, stype=DEPSTR_PTB) # ----------------------------------------------------------------------------- # 1 2 3 4 5 6 7 # Rhoddod yr athro lyfr i'r bachgen ddoe # gave-3sg the teacher book to-the boy yesterday # # The teacher gave a book to the boy yesterday # 1 2 3 4 5 6 7 8 9 tgt_w = create_words_tier_from_string("Rhoddodd yr athro lyfr i'r bachgen ddoe") aln = Alignment([(1,2),(2,3),(3,1),(5,4),(6,5),(7,5),(8,6),(9,7)]) # And now, project... ds_proj = project_ds(ds1, tgt_w, aln) self.assertTrue(ds2.structurally_eq(ds_proj))
def test_direct_pos_heur(self): gloss_pos = tier_tokens(create_words_tier_from_string('NOUN PRON VERB DET')) trans_pos = tier_tokens(create_words_tier_from_string('NOUN VERB DET NOUN')) h = heur_alignments(self.gloss_tokens, self.trans_tokens, gloss_pos=gloss_pos, trans_pos=trans_pos).flip() self.assertEqual(self.a2, h)