示例#1
0
    def test_updating_from_scratch_memory(self):
        corpus = CompactCorpus(os.path.join(RES_FOLDER, 'Memory.en__it.cpt'))
        self.mmt.import_corpus(compact=corpus.path)
        self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(7, 0))

        self.mmt.stop()
        shutil.rmtree(self.mmt.memory.path)
        self.mmt.start()

        self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(7, 0))
        self._verify_index_integrity()
示例#2
0
class _OnlineLearningTest(unittest.TestCase):
    mmt = ModernMT('OnlineLearningTest')
    _engine_tar = os.path.join(RES_FOLDER, 'engine.tar.gz')

    def setUp(self):
        self.mmt.delete_engine()

        tar = tarfile.open(self._engine_tar, 'r:gz')
        tar.extractall(
            os.path.abspath(os.path.join(self.mmt.engine_path, os.pardir)))
        tar.close()

        self.mmt.start()

    def tearDown(self):
        self.mmt.stop()
        self.mmt.delete_engine()

    # Assertion

    def assertInContent(self, content, element):
        element = ''.join(element.split())
        content = [''.join(line.split()) for line in content]

        self.assertIn(element, content)

    def assertInParallelContent(self, content, sentence, translation):
        sentence = ''.join(sentence.split())
        translation = ''.join(translation.split())
        content = [(''.join(s.split()), ''.join(t.split()))
                   for s, t in content]

        self.assertIn((sentence, translation), content)
示例#3
0
class _OnlineLearningTest(unittest.TestCase):
    """
    Content of engine.xconf:

    <engine type="neural">
        <languages>
            <pair source="en" target="fr" />
            <pair source="fr" target="en" />

            <pair source="en" target="it" />

            <pair source="en" target="es-ES" />
            <pair source="en" target="es-MX" />

            <pair source="en" target="zh-TW" />
            <pair source="en" target="zh-CN" />
            <pair source="zh" target="en" />

            <rules>
                <rule lang="zh" from="zh-HK" to="zh-TW" />
                <rule lang="zh" from="*" to="zh-CN" />

                <rule lang="es" from="es" to="es-ES" />
                <rule lang="es" from="*" to="es-MX" />
            </rules>
        </languages>
    </engine>
    """
    mmt = ModernMT('OnlineLearningTest')
    _engine_tar = os.path.join(RES_FOLDER, 'engine.tar.gz')

    def setUp(self):
        self.mmt.delete_engine()

        tar = tarfile.open(self._engine_tar, 'r:gz')
        tar.extractall(os.path.abspath(os.path.join(self.mmt.engine_path, os.pardir)))
        tar.close()

        self.mmt.start()

    def tearDown(self):
        self.mmt.stop()
        self.mmt.delete_engine()

    # Assertion

    def assertInContent(self, content, element):
        element = ''.join(element.split())
        content = [''.join(line.split()) for line in content]

        self.assertIn(element, content)

    def assertInParallelContent(self, content, sentence, translation):
        sentence = ''.join(sentence.split())
        translation = ''.join(translation.split())
        content = [(''.join(s.split()), ''.join(t.split())) for s, t in content]

        self.assertIn((sentence, translation), content)
示例#4
0
class _PrivacyTest(unittest.TestCase):
    USER_1 = '00000000-0000-0000-0000-000000000001'
    USER_2 = '00000000-0000-0000-0000-000000000002'

    mmt = ModernMT('PrivacyTest')
    _engine_tar = os.path.join(RES_FOLDER, 'engine.tar.gz')

    def setUp(self):
        self.mmt.delete_engine()

        tar = tarfile.open(self._engine_tar, 'r:gz')
        tar.extractall(os.path.abspath(os.path.join(self.mmt.engine_path, os.pardir)))
        tar.close()

        self.mmt.start(verbosity=2)

    def tearDown(self):
        self.mmt.stop()
        self.mmt.delete_engine()

    def _setup_with_memories(self):
        a = self.mmt.api.create_memory('A')
        b = self.mmt.api.create_memory('B', owner=self.USER_1)
        c = self.mmt.api.create_memory('B', owner=self.USER_2)

        self.assertNotIn('owner', a)
        self.assertEqual(self.USER_1, b['owner'])
        self.assertEqual(self.USER_2, c['owner'])

        self.mmt.import_corpus(compact=CompactCorpus(os.path.join(RES_FOLDER, 'Memory.A.cpt')).path, memory=a['id'])
        self.mmt.import_corpus(compact=CompactCorpus(os.path.join(RES_FOLDER, 'Memory.B.cpt')).path, memory=b['id'])
        self.mmt.import_corpus(compact=CompactCorpus(os.path.join(RES_FOLDER, 'Memory.C.cpt')).path, memory=c['id'])

    def _setup_with_contributions(self):
        a = self.mmt.api.create_memory('A')
        b = self.mmt.api.create_memory('B', owner=self.USER_1)
        c = self.mmt.api.create_memory('B', owner=self.USER_2)

        self.assertNotIn('owner', a)
        self.assertEqual(self.USER_1, b['owner'])
        self.assertEqual(self.USER_2, c['owner'])

        with CompactCorpus(os.path.join(RES_FOLDER, 'Memory.A.cpt')).reader() as reader:
            for s, t, sentence, translation in reader:
                self.mmt.add_contributions(s, t, [(sentence, translation)], memory=1)

        with CompactCorpus(os.path.join(RES_FOLDER, 'Memory.B.cpt')).reader() as reader:
            for s, t, sentence, translation in reader:
                self.mmt.add_contributions(s, t, [(sentence, translation)], memory=2)

        with CompactCorpus(os.path.join(RES_FOLDER, 'Memory.C.cpt')).reader() as reader:
            for s, t, sentence, translation in reader:
                self.mmt.add_contributions(s, t, [(sentence, translation)], memory=3)
示例#5
0
    def test_single_contribution(self):
        self.mmt.add_contributions('en', 'it', [(u'Hello world', u'Ciao mondo')])

        ctx_source = self.mmt.context_analyzer.get_content(1, 'en', 'it')
        ctx_target = self.mmt.context_analyzer.get_content(1, 'it', 'en')
        mem_data = self.mmt.memory.dump().get_content(1, 'en', 'it')

        self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(0, 0))

        self.assertEqual(1, len(ctx_source))
        self.assertEqual(1, len(ctx_target))
        self.assertEqual(1, len(mem_data))

        self.assertInContent(ctx_source, u'Hello world')
        self.assertInContent(ctx_target, u'Ciao mondo')
        self.assertInParallelContent(mem_data, u'Hello world', u'Ciao mondo')
示例#6
0
class TrainingTest(unittest.TestCase):
    mmt = ModernMT('TrainingTest')

    def tearDown(self):
        self.mmt.stop()
        self.mmt.delete_engine()

    @staticmethod
    def _get_tmx_content(path):
        with open(path, 'rb') as stream:
            return ' '.join(stream.read().decode('utf-8').split())

    # Assertion

    def assertInContent(self, content, element):
        element = ''.join(element.split())
        content = [''.join(line.split()) for line in content]

        self.assertIn(element, content)

    def assertInParallelContent(self, content, sentence, translation):
        sentence = ''.join(sentence.split())
        translation = ''.join(translation.split())
        content = [(''.join(s.split()), ''.join(t.split())) for s, t in content]

        self.assertIn((sentence, translation), content)

    def assertTranslateMatch(self, source, target, sentence, chars):
        translation = self.mmt.translate(source, target, sentence)

        for c in chars:
            if c in translation:
                return

        raise self.failureException(u'Translation "%s" does not contain any of %s' % (translation, repr(chars)))

    def assertTranslateFail(self, source, target, sentence):
        try:
            self.mmt.translate(source, target, sentence)
            raise self.failureException('Invalid translation request: %s %s' % (source, target))
        except ApiException as e:
            self.assertIn('HTTP request failed with code 400', e.message)

    # Tests

    def test_train_chinese(self):
        self.mmt.create('en zh %s --neural --debug --no-split --validation-corpora %s' % (TRAIN_FOLDER, DEV_FOLDER))

        tm_content = self.mmt.memory.dump()

        self.assertEqual({1, 2}, self.mmt.context_analyzer.get_domains())
        self.assertEqual({1, 2}, tm_content.get_domains())

        # Direct TM test
        ctx_source = self.mmt.context_analyzer.get_content(1, 'en', 'zh')
        ctx_target = self.mmt.context_analyzer.get_content(1, 'zh', 'en')
        mem_data = tm_content.get_content(1, 'en', 'zh')

        self.assertEqual(4, len(ctx_source))
        self.assertEqual(4, len(ctx_target))

        self.assertInContent(ctx_source, u'The en__zh example one')
        self.assertInContent(ctx_source, u'This is en__zh example two')
        self.assertInContent(ctx_source, u'This is en__zh example three')
        self.assertInContent(ctx_source, u'This is en__zh example four')

        self.assertInContent(ctx_target, u'en__zh例子之一')
        self.assertInContent(ctx_target, u'这是en__zh例子二')
        self.assertInContent(ctx_target, u'這是en__zh例子三')
        self.assertInContent(ctx_target, u'這是en__zh例子四')

        self.assertEqual(4, len(mem_data))
        self.assertInParallelContent(mem_data, u'The en__zh example one', u'en__zh例子之一')
        self.assertInParallelContent(mem_data, u'This is en__zh example two', u'这是en__zh例子二')
        self.assertInParallelContent(mem_data, u'This is en__zh example three', u'這是en__zh例子三')
        self.assertInParallelContent(mem_data, u'This is en__zh example four', u'這是en__zh例子四')

        # Reverse TM test
        ctx_source = self.mmt.context_analyzer.get_content(2, 'en', 'zh')
        ctx_target = self.mmt.context_analyzer.get_content(2, 'zh', 'en')
        mem_data = tm_content.get_content(2, 'en', 'zh')

        self.assertEqual(1, len(ctx_source))
        self.assertEqual(1, len(ctx_target))

        self.assertInContent(ctx_source, u'The zh__en example one')
        self.assertInContent(ctx_target, u'zh__en例子之一')

        self.assertEqual(1, len(mem_data))
        self.assertInParallelContent(mem_data, u'The zh__en example one', u'zh__en例子之一')

        # Runtime test
        self.mmt.start()

        self.assertTranslateMatch('en', 'zh', u'This is example', {u'这', u'這', u'是', u'例', u'子'})
        self.assertTranslateMatch('en', 'zh-CN', u'This is example', {u'这', u'這', u'是', u'例', u'子'})
        self.assertTranslateMatch('en', 'zh-TW', u'This is example', {u'这', u'這', u'是', u'例', u'子'})

        self.mmt.add_contributions('en', 'zh', [(u'The en__zh example five', u'en__zh例子五')], 1)

        ctx_source = self.mmt.context_analyzer.get_content(1, 'en', 'zh')
        mem_data = self.mmt.memory.dump().get_content(1, 'en', 'zh')

        self.assertInContent(ctx_source, u'The en__zh example five')
        self.assertInParallelContent(mem_data, u'The en__zh example five', u'en__zh例子五')
示例#7
0
 def test_updating_partial_memory(self):
     self._prepare_partial(context=False, memory=True)
     self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(0, 7))
     self._verify_index_integrity()
示例#8
0
 def test_updating_partial_all(self):
     self._prepare_partial()
     self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(0, 7))
     self._verify_index_integrity()
示例#9
0
    def test_upload_domain(self):
        corpus = CompactCorpus(os.path.join(RES_FOLDER, 'Memory.en__it.cpt'))
        self.mmt.import_corpus(compact=corpus.path)

        self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(7, 0))
        self._verify_index_integrity()