Exemplo n.º 1
0
    def test_best_model2_alignment_handles_fertile_words(self):
        # arrange
        sentence_pair = AlignedSent(
            ['i', 'really', ',', 'really', 'love', 'ham'],
            TestIBMModel.__TEST_SRC_SENTENCE)
        # 'bien' produces 2 target words: 'really' and another 'really'
        translation_table = {
            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
            'really': {"j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09},
            ',': {"j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7},
            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0}
        }
        alignment_table = defaultdict(
            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
                lambda: 0.2))))

        ibm_model = IBMModel([])
        ibm_model.translation_table = translation_table
        ibm_model.alignment_table = alignment_table

        # act
        a_info = ibm_model.best_model2_alignment(sentence_pair)

        # assert
        self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4))
        self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
Exemplo n.º 2
0
    def test_best_model2_alignment_does_not_change_pegged_alignment(self):
        # arrange
        sentence_pair = AlignedSent(
            TestIBMModel.__TEST_TRG_SENTENCE,
            TestIBMModel.__TEST_SRC_SENTENCE)
        translation_table = {
            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03,
                  None: 0},
            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01,
                     None: 0.03},
            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0}
        }
        alignment_table = defaultdict(
            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
                lambda: 0.2))))

        ibm_model = IBMModel([])
        ibm_model.translation_table = translation_table
        ibm_model.alignment_table = alignment_table

        # act: force 'love' to be pegged to 'jambon'
        a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4)
        # assert
        self.assertEqual(a_info.alignment[1:], (1, 4, 4))
        self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])
Exemplo n.º 3
0
    def test_best_model2_alignment(self):
        # arrange
        sentence_pair = AlignedSent(
            TestIBMModel.__TEST_TRG_SENTENCE,
            TestIBMModel.__TEST_SRC_SENTENCE)
        # None and 'bien' have zero fertility
        translation_table = {
            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03,
                  None: 0},
            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01,
                     None: 0.03},
            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99,
                    None: 0}
        }
        alignment_table = defaultdict(
            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
                lambda: 0.2))))

        ibm_model = IBMModel([])
        ibm_model.translation_table = translation_table
        ibm_model.alignment_table = alignment_table

        # act
        a_info = ibm_model.best_model2_alignment(sentence_pair)

        # assert
        self.assertEqual(a_info.alignment[1:], (1, 2, 4))  # 0th element unused
        self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
    def test_best_model2_alignment_handles_fertile_words(self):
        # arrange
        sentence_pair = AlignedSent(
            ['i', 'really', ',', 'really', 'love', 'ham'],
            TestIBMModel.__TEST_SRC_SENTENCE)
        # 'bien' produces 2 target words: 'really' and another 'really'
        translation_table = {
            'i': {
                "j'": 0.9,
                'aime': 0.05,
                'bien': 0.02,
                'jambon': 0.03,
                None: 0
            },
            'really': {
                "j'": 0,
                'aime': 0,
                'bien': 0.9,
                'jambon': 0.01,
                None: 0.09
            },
            ',': {
                "j'": 0,
                'aime': 0,
                'bien': 0.3,
                'jambon': 0,
                None: 0.7
            },
            'love': {
                "j'": 0.05,
                'aime': 0.9,
                'bien': 0.01,
                'jambon': 0.01,
                None: 0.03
            },
            'ham': {
                "j'": 0,
                'aime': 0.01,
                'bien': 0,
                'jambon': 0.99,
                None: 0
            }
        }
        alignment_table = defaultdict(lambda: defaultdict(lambda: defaultdict(
            lambda: defaultdict(lambda: 0.2))))

        ibm_model = IBMModel([])
        ibm_model.translation_table = translation_table
        ibm_model.alignment_table = alignment_table

        # act
        a_info = ibm_model.best_model2_alignment(sentence_pair)

        # assert
        self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4))
        self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
    def test_best_model2_alignment(self):
        # arrange
        sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE,
                                    TestIBMModel.__TEST_SRC_SENTENCE)
        # None and 'bien' have zero fertility
        translation_table = {
            'i': {
                "j'": 0.9,
                'aime': 0.05,
                'bien': 0.02,
                'jambon': 0.03,
                None: 0
            },
            'love': {
                "j'": 0.05,
                'aime': 0.9,
                'bien': 0.01,
                'jambon': 0.01,
                None: 0.03
            },
            'ham': {
                "j'": 0,
                'aime': 0.01,
                'bien': 0,
                'jambon': 0.99,
                None: 0
            }
        }
        alignment_table = defaultdict(lambda: defaultdict(lambda: defaultdict(
            lambda: defaultdict(lambda: 0.2))))

        ibm_model = IBMModel([])
        ibm_model.translation_table = translation_table
        ibm_model.alignment_table = alignment_table

        # act
        a_info = ibm_model.best_model2_alignment(sentence_pair)

        # assert
        self.assertEqual(a_info.alignment[1:], (1, 2, 4))  # 0th element unused
        self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
    def test_best_model2_alignment_does_not_change_pegged_alignment(self):
        # arrange
        sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE,
                                    TestIBMModel.__TEST_SRC_SENTENCE)
        translation_table = {
            'i': {
                "j'": 0.9,
                'aime': 0.05,
                'bien': 0.02,
                'jambon': 0.03,
                None: 0
            },
            'love': {
                "j'": 0.05,
                'aime': 0.9,
                'bien': 0.01,
                'jambon': 0.01,
                None: 0.03
            },
            'ham': {
                "j'": 0,
                'aime': 0.01,
                'bien': 0,
                'jambon': 0.99,
                None: 0
            }
        }
        alignment_table = defaultdict(lambda: defaultdict(lambda: defaultdict(
            lambda: defaultdict(lambda: 0.2))))

        ibm_model = IBMModel([])
        ibm_model.translation_table = translation_table
        ibm_model.alignment_table = alignment_table

        # act: force 'love' to be pegged to 'jambon'
        a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4)
        # assert
        self.assertEqual(a_info.alignment[1:], (1, 4, 4))
        self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])