示例#1
0
 def test_alignment_no_target(self):
     alignmentFE = AlignmentFeatureExtractor()
     obj = {
         'token': u'hits',
         'index': 2,
         'source': [u'un', u'garcon', u'frappe', u'un', u'chien']
     }
     with self.assertRaises(NoDataError):
         alignmentFE.get_features(obj)
示例#2
0
 def test_alignment_no_source(self):
     alignmentFE = AlignmentFeatureExtractor()
     obj = {
         'token': u'hits',
         'index': 2,
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     with self.assertRaises(NoDataError):
         alignmentFE.get_features(obj)
示例#3
0
 def setUp(self):
     self.module_path = os.path.dirname(os.path.realpath(__file__))
     self.src_name = os.path.join(
         self.module_path,
         '../../preprocessing/tests/test_data/corpus.de.1000')
     self.tg_name = os.path.join(
         self.module_path,
         '../../preprocessing/tests/test_data/corpus.en.1000')
     self.aligner_no_model = AlignmentFeatureExtractor()
     self.aligner_no_model_2 = AlignmentFeatureExtractor(context_size=2)
示例#4
0
 def test_align_model_in_extractor(self):
     obj = {
         'token': u'boy',
         'index': 1,
         'source': [u'ein', u'junge', u'schlägt', u'einen', u'Hund'],
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     aligner_model = AlignmentFeatureExtractor(align_model=os.path.join(
         self.module_path, 'test_data/alignments/align_model'))
     (cont_word, left, right) = aligner_model.get_features(obj)
     self.assertTrue('alignments' in obj)
     self.assertEqual(cont_word, u'junge')
 def test_align_model_in_extractor(self):
     obj = {
         "token": u"boy",
         "index": 1,
         "source": [u"ein", u"junge", u"schlägt", u"einen", u"Hund"],
         "target": [u"a", u"boy", u"hits", u"a", u"dog"],
     }
     aligner_model = AlignmentFeatureExtractor(
         align_model=os.path.join(self.module_path, "test_data/alignments/align_model")
     )
     (cont_word, left, right) = aligner_model.get_features(obj)
     self.assertTrue("alignments" in obj)
     self.assertEqual(cont_word, u"junge")
 def test_alignment_on_the_fly(self):
     obj = {
         "token": u"boy",
         "index": 1,
         "source": [u"ein", u"junge", u"schlägt", u"einen", u"Hund"],
         "target": [u"a", u"boy", u"hits", u"a", u"dog"],
     }
     aligner_corpus = AlignmentFeatureExtractor(src_file=self.src_name, tg_file=self.tg_name)
     (cont_word, left, right) = aligner_corpus.get_features(obj)
     self.assertTrue("alignments" in obj)
     self.assertEqual(cont_word, u"junge")
     for a_file in glob.glob("align_model.*"):
         os.remove(a_file)
     for a_file in glob.glob(os.path.basename(self.src_name) + "_" + os.path.basename(self.tg_name) + "*"):
         os.remove(a_file)
示例#7
0
 def test_alignment_on_the_fly(self):
     obj = {
         'token': u'boy',
         'index': 1,
         'source': [u'ein', u'junge', u'schlägt', u'einen', u'Hund'],
         'target': [u'a', u'boy', u'hits', u'a', u'dog']
     }
     aligner_corpus = AlignmentFeatureExtractor(src_file=self.src_name,
                                                tg_file=self.tg_name)
     (cont_word, left, right) = aligner_corpus.get_features(obj)
     self.assertTrue('alignments' in obj)
     self.assertEqual(cont_word, u'junge')
     for a_file in glob.glob('align_model.*'):
         os.remove(a_file)
     for a_file in glob.glob(
             os.path.basename(self.src_name) + '_' +
             os.path.basename(self.tg_name) + '*'):
         os.remove(a_file)
示例#8
0
 def test_alignment_no_alignments(self):
     alignmentFE = AlignmentFeatureExtractor()
     obj = {'token':u'hits', 'index':2, 'target':[u'a',u'boy',u'hits',u'a',u'dog'], 'source':[u'un', u'garcon',u'frappe', u'un', u'chien']}
     with self.assertRaises(NoDataError):
         alignmentFE.get_features(obj)
示例#9
0
class AlignmentFeatureExtractorTests(unittest.TestCase):
    def setUp(self):
        self.module_path = os.path.dirname(os.path.realpath(__file__))
        self.src_name = os.path.join(
            self.module_path,
            '../../preprocessing/tests/test_data/corpus.de.1000')
        self.tg_name = os.path.join(
            self.module_path,
            '../../preprocessing/tests/test_data/corpus.en.1000')
        self.aligner_no_model = AlignmentFeatureExtractor()
        self.aligner_no_model_2 = AlignmentFeatureExtractor(context_size=2)

    def test_alignment_in_obj(self):
        obj = {
            'token': u'hits',
            'index': 2,
            'target': [u'a', u'boy', u'hits', u'a', u'dog'],
            'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
            'target_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'source_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'alignments': [[0], [1], [3], [2], [4]]
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u'un')
        self.assertEqual(left, u'frappe')
        self.assertEqual(right, u'chien')
        (cont_word, left, right) = self.aligner_no_model_2.get_features(obj)
        self.assertEqual(left, u'garcon|frappe')
        self.assertEqual(right, u'chien|_END_')

    def test_alignment_on_the_fly(self):
        obj = {
            'token': u'boy',
            'index': 1,
            'source': [u'ein', u'junge', u'schlägt', u'einen', u'Hund'],
            'target': [u'a', u'boy', u'hits', u'a', u'dog']
        }
        aligner_corpus = AlignmentFeatureExtractor(src_file=self.src_name,
                                                   tg_file=self.tg_name)
        (cont_word, left, right) = aligner_corpus.get_features(obj)
        self.assertTrue('alignments' in obj)
        self.assertEqual(cont_word, u'junge')
        for a_file in glob.glob('align_model.*'):
            os.remove(a_file)
        for a_file in glob.glob(
                os.path.basename(self.src_name) + '_' +
                os.path.basename(self.tg_name) + '*'):
            os.remove(a_file)

    def test_align_model_in_extractor(self):
        obj = {
            'token': u'boy',
            'index': 1,
            'source': [u'ein', u'junge', u'schlägt', u'einen', u'Hund'],
            'target': [u'a', u'boy', u'hits', u'a', u'dog']
        }
        aligner_model = AlignmentFeatureExtractor(align_model=os.path.join(
            self.module_path, 'test_data/alignments/align_model'))
        (cont_word, left, right) = aligner_model.get_features(obj)
        self.assertTrue('alignments' in obj)
        self.assertEqual(cont_word, u'junge')

    def test_unaligned(self):
        obj = {
            'token': u'hits',
            'index': 2,
            'target': [u'a', u'boy', u'hits', u'a', u'dog'],
            'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
            'target_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'source_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'alignments': [[0], [1], [], [2], [4]]
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u'__unaligned__')
        self.assertEqual(left, u'__unaligned__')
        self.assertEqual(right, u'__unaligned__')

    def test_align_two_adjacent(self):
        obj = {
            'token': u'hits',
            'index': 2,
            'target': [u'a', u'boy', u'hits', u'a', u'dog'],
            'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
            'target_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'source_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'alignments': [[0], [1], [1, 2], [3], [4]]
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u'garcon|frappe')
        self.assertEqual(left, u'un')
        self.assertEqual(right, 'un')

    def test_align_two_gap(self):
        obj = {
            'token': u'hits',
            'index': 2,
            'target': [u'a', u'boy', u'hits', u'a', u'dog'],
            'source': [u'un', u'garcon', u'frappe', u'un', u'chien'],
            'target_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'source_pos': ['DT', 'NN', 'VBZ', 'DT', 'NN'],
            'alignments': [[0], [1], [2, 4], [3], [4]]
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u'frappe|chien')
        self.assertEqual(left, u'garcon')
        self.assertEqual(right, u'_END_')
 def setUp(self):
     self.module_path = os.path.dirname(os.path.realpath(__file__))
     self.src_name = os.path.join(self.module_path, "../../preprocessing/tests/test_data/corpus.de.1000")
     self.tg_name = os.path.join(self.module_path, "../../preprocessing/tests/test_data/corpus.en.1000")
     self.aligner_no_model = AlignmentFeatureExtractor()
     self.aligner_no_model_2 = AlignmentFeatureExtractor(context_size=2)
class AlignmentFeatureExtractorTests(unittest.TestCase):
    def setUp(self):
        self.module_path = os.path.dirname(os.path.realpath(__file__))
        self.src_name = os.path.join(self.module_path, "../../preprocessing/tests/test_data/corpus.de.1000")
        self.tg_name = os.path.join(self.module_path, "../../preprocessing/tests/test_data/corpus.en.1000")
        self.aligner_no_model = AlignmentFeatureExtractor()
        self.aligner_no_model_2 = AlignmentFeatureExtractor(context_size=2)

    def test_alignment_in_obj(self):
        obj = {
            "token": u"hits",
            "index": 2,
            "target": [u"a", u"boy", u"hits", u"a", u"dog"],
            "source": [u"un", u"garcon", u"frappe", u"un", u"chien"],
            "target_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "source_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "alignments": [[0], [1], [3], [2], [4]],
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u"un")
        self.assertEqual(left, u"frappe")
        self.assertEqual(right, u"chien")
        (cont_word, left, right) = self.aligner_no_model_2.get_features(obj)
        self.assertEqual(left, u"garcon|frappe")
        self.assertEqual(right, u"chien|_END_")

    def test_alignment_on_the_fly(self):
        obj = {
            "token": u"boy",
            "index": 1,
            "source": [u"ein", u"junge", u"schlägt", u"einen", u"Hund"],
            "target": [u"a", u"boy", u"hits", u"a", u"dog"],
        }
        aligner_corpus = AlignmentFeatureExtractor(src_file=self.src_name, tg_file=self.tg_name)
        (cont_word, left, right) = aligner_corpus.get_features(obj)
        self.assertTrue("alignments" in obj)
        self.assertEqual(cont_word, u"junge")
        for a_file in glob.glob("align_model.*"):
            os.remove(a_file)
        for a_file in glob.glob(os.path.basename(self.src_name) + "_" + os.path.basename(self.tg_name) + "*"):
            os.remove(a_file)

    def test_align_model_in_extractor(self):
        obj = {
            "token": u"boy",
            "index": 1,
            "source": [u"ein", u"junge", u"schlägt", u"einen", u"Hund"],
            "target": [u"a", u"boy", u"hits", u"a", u"dog"],
        }
        aligner_model = AlignmentFeatureExtractor(
            align_model=os.path.join(self.module_path, "test_data/alignments/align_model")
        )
        (cont_word, left, right) = aligner_model.get_features(obj)
        self.assertTrue("alignments" in obj)
        self.assertEqual(cont_word, u"junge")

    def test_unaligned(self):
        obj = {
            "token": u"hits",
            "index": 2,
            "target": [u"a", u"boy", u"hits", u"a", u"dog"],
            "source": [u"un", u"garcon", u"frappe", u"un", u"chien"],
            "target_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "source_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "alignments": [[0], [1], [], [2], [4]],
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u"__unaligned__")
        self.assertEqual(left, u"__unaligned__")
        self.assertEqual(right, u"__unaligned__")

    def test_align_two_adjacent(self):
        obj = {
            "token": u"hits",
            "index": 2,
            "target": [u"a", u"boy", u"hits", u"a", u"dog"],
            "source": [u"un", u"garcon", u"frappe", u"un", u"chien"],
            "target_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "source_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "alignments": [[0], [1], [1, 2], [3], [4]],
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u"garcon|frappe")
        self.assertEqual(left, u"un")
        self.assertEqual(right, "un")

    def test_align_two_gap(self):
        obj = {
            "token": u"hits",
            "index": 2,
            "target": [u"a", u"boy", u"hits", u"a", u"dog"],
            "source": [u"un", u"garcon", u"frappe", u"un", u"chien"],
            "target_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "source_pos": ["DT", "NN", "VBZ", "DT", "NN"],
            "alignments": [[0], [1], [2, 4], [3], [4]],
        }
        (cont_word, left, right) = self.aligner_no_model.get_features(obj)
        self.assertEqual(cont_word, u"frappe|chien")
        self.assertEqual(left, u"garcon")
        self.assertEqual(right, u"_END_")