def test_best_model2_alignment_handles_fertile_words(self): # arrange sentence_pair = AlignedSent( ['i', 'really', ',', 'really', 'love', 'ham'], TestIBMModel.__TEST_SRC_SENTENCE) # 'bien' produces 2 target words: 'really' and another 'really' translation_table = { 'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0}, 'really': {"j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09}, ',': {"j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7}, 'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03}, 'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0} } alignment_table = defaultdict( lambda: defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: 0.2)))) ibm_model = IBMModel([]) ibm_model.translation_table = translation_table ibm_model.alignment_table = alignment_table # act a_info = ibm_model.best_model2_alignment(sentence_pair) # assert self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4)) self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
def test_best_model2_alignment_does_not_change_pegged_alignment(self): # arrange sentence_pair = AlignedSent( TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE) translation_table = { 'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0}, 'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03}, 'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0} } alignment_table = defaultdict( lambda: defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: 0.2)))) ibm_model = IBMModel([]) ibm_model.translation_table = translation_table ibm_model.alignment_table = alignment_table # act: force 'love' to be pegged to 'jambon' a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4) # assert self.assertEqual(a_info.alignment[1:], (1, 4, 4)) self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])
def test_best_model2_alignment(self): # arrange sentence_pair = AlignedSent( TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE) # None and 'bien' have zero fertility translation_table = { 'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0}, 'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03}, 'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0} } alignment_table = defaultdict( lambda: defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: 0.2)))) ibm_model = IBMModel([]) ibm_model.translation_table = translation_table ibm_model.alignment_table = alignment_table # act a_info = ibm_model.best_model2_alignment(sentence_pair) # assert self.assertEqual(a_info.alignment[1:], (1, 2, 4)) # 0th element unused self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
def test_best_model2_alignment_handles_fertile_words(self): # arrange sentence_pair = AlignedSent( ['i', 'really', ',', 'really', 'love', 'ham'], TestIBMModel.__TEST_SRC_SENTENCE) # 'bien' produces 2 target words: 'really' and another 'really' translation_table = { 'i': { "j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0 }, 'really': { "j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09 }, ',': { "j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7 }, 'love': { "j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03 }, 'ham': { "j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0 } } alignment_table = defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: defaultdict(lambda: 0.2)))) ibm_model = IBMModel([]) ibm_model.translation_table = translation_table ibm_model.alignment_table = alignment_table # act a_info = ibm_model.best_model2_alignment(sentence_pair) # assert self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4)) self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
def test_best_model2_alignment_handles_empty_trg_sentence(self): # arrange sentence_pair = AlignedSent([], TestIBMModel.__TEST_SRC_SENTENCE) ibm_model = IBMModel([]) # act a_info = ibm_model.best_model2_alignment(sentence_pair) # assert self.assertEqual(a_info.alignment[1:], ()) self.assertEqual(a_info.cepts, [[], [], [], [], []])
def test_best_model2_alignment_handles_empty_src_sentence(self): # arrange sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, []) ibm_model = IBMModel([]) # act a_info = ibm_model.best_model2_alignment(sentence_pair) # assert self.assertEqual(a_info.alignment[1:], (0, 0, 0)) self.assertEqual(a_info.cepts, [[1, 2, 3]])
def test_best_model2_alignment(self): # arrange sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE) # None and 'bien' have zero fertility translation_table = { 'i': { "j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0 }, 'love': { "j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03 }, 'ham': { "j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0 } } alignment_table = defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: defaultdict(lambda: 0.2)))) ibm_model = IBMModel([]) ibm_model.translation_table = translation_table ibm_model.alignment_table = alignment_table # act a_info = ibm_model.best_model2_alignment(sentence_pair) # assert self.assertEqual(a_info.alignment[1:], (1, 2, 4)) # 0th element unused self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
def test_best_model2_alignment_does_not_change_pegged_alignment(self): # arrange sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE) translation_table = { 'i': { "j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0 }, 'love': { "j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03 }, 'ham': { "j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0 } } alignment_table = defaultdict(lambda: defaultdict(lambda: defaultdict( lambda: defaultdict(lambda: 0.2)))) ibm_model = IBMModel([]) ibm_model.translation_table = translation_table ibm_model.alignment_table = alignment_table # act: force 'love' to be pegged to 'jambon' a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4) # assert self.assertEqual(a_info.alignment[1:], (1, 4, 4)) self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])