def test_updating_from_scratch_memory(self): corpus = CompactCorpus(os.path.join(RES_FOLDER, 'Memory.en__it.cpt')) self.mmt.import_corpus(compact=corpus.path) self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(7, 0)) self.mmt.stop() shutil.rmtree(self.mmt.memory.path) self.mmt.start() self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(7, 0)) self._verify_index_integrity()
class _OnlineLearningTest(unittest.TestCase): mmt = ModernMT('OnlineLearningTest') _engine_tar = os.path.join(RES_FOLDER, 'engine.tar.gz') def setUp(self): self.mmt.delete_engine() tar = tarfile.open(self._engine_tar, 'r:gz') tar.extractall( os.path.abspath(os.path.join(self.mmt.engine_path, os.pardir))) tar.close() self.mmt.start() def tearDown(self): self.mmt.stop() self.mmt.delete_engine() # Assertion def assertInContent(self, content, element): element = ''.join(element.split()) content = [''.join(line.split()) for line in content] self.assertIn(element, content) def assertInParallelContent(self, content, sentence, translation): sentence = ''.join(sentence.split()) translation = ''.join(translation.split()) content = [(''.join(s.split()), ''.join(t.split())) for s, t in content] self.assertIn((sentence, translation), content)
class _OnlineLearningTest(unittest.TestCase): """ Content of engine.xconf: <engine type="neural"> <languages> <pair source="en" target="fr" /> <pair source="fr" target="en" /> <pair source="en" target="it" /> <pair source="en" target="es-ES" /> <pair source="en" target="es-MX" /> <pair source="en" target="zh-TW" /> <pair source="en" target="zh-CN" /> <pair source="zh" target="en" /> <rules> <rule lang="zh" from="zh-HK" to="zh-TW" /> <rule lang="zh" from="*" to="zh-CN" /> <rule lang="es" from="es" to="es-ES" /> <rule lang="es" from="*" to="es-MX" /> </rules> </languages> </engine> """ mmt = ModernMT('OnlineLearningTest') _engine_tar = os.path.join(RES_FOLDER, 'engine.tar.gz') def setUp(self): self.mmt.delete_engine() tar = tarfile.open(self._engine_tar, 'r:gz') tar.extractall(os.path.abspath(os.path.join(self.mmt.engine_path, os.pardir))) tar.close() self.mmt.start() def tearDown(self): self.mmt.stop() self.mmt.delete_engine() # Assertion def assertInContent(self, content, element): element = ''.join(element.split()) content = [''.join(line.split()) for line in content] self.assertIn(element, content) def assertInParallelContent(self, content, sentence, translation): sentence = ''.join(sentence.split()) translation = ''.join(translation.split()) content = [(''.join(s.split()), ''.join(t.split())) for s, t in content] self.assertIn((sentence, translation), content)
class _PrivacyTest(unittest.TestCase): USER_1 = '00000000-0000-0000-0000-000000000001' USER_2 = '00000000-0000-0000-0000-000000000002' mmt = ModernMT('PrivacyTest') _engine_tar = os.path.join(RES_FOLDER, 'engine.tar.gz') def setUp(self): self.mmt.delete_engine() tar = tarfile.open(self._engine_tar, 'r:gz') tar.extractall(os.path.abspath(os.path.join(self.mmt.engine_path, os.pardir))) tar.close() self.mmt.start(verbosity=2) def tearDown(self): self.mmt.stop() self.mmt.delete_engine() def _setup_with_memories(self): a = self.mmt.api.create_memory('A') b = self.mmt.api.create_memory('B', owner=self.USER_1) c = self.mmt.api.create_memory('B', owner=self.USER_2) self.assertNotIn('owner', a) self.assertEqual(self.USER_1, b['owner']) self.assertEqual(self.USER_2, c['owner']) self.mmt.import_corpus(compact=CompactCorpus(os.path.join(RES_FOLDER, 'Memory.A.cpt')).path, memory=a['id']) self.mmt.import_corpus(compact=CompactCorpus(os.path.join(RES_FOLDER, 'Memory.B.cpt')).path, memory=b['id']) self.mmt.import_corpus(compact=CompactCorpus(os.path.join(RES_FOLDER, 'Memory.C.cpt')).path, memory=c['id']) def _setup_with_contributions(self): a = self.mmt.api.create_memory('A') b = self.mmt.api.create_memory('B', owner=self.USER_1) c = self.mmt.api.create_memory('B', owner=self.USER_2) self.assertNotIn('owner', a) self.assertEqual(self.USER_1, b['owner']) self.assertEqual(self.USER_2, c['owner']) with CompactCorpus(os.path.join(RES_FOLDER, 'Memory.A.cpt')).reader() as reader: for s, t, sentence, translation in reader: self.mmt.add_contributions(s, t, [(sentence, translation)], memory=1) with CompactCorpus(os.path.join(RES_FOLDER, 'Memory.B.cpt')).reader() as reader: for s, t, sentence, translation in reader: self.mmt.add_contributions(s, t, [(sentence, translation)], memory=2) with CompactCorpus(os.path.join(RES_FOLDER, 'Memory.C.cpt')).reader() as reader: for s, t, sentence, translation in reader: self.mmt.add_contributions(s, t, [(sentence, translation)], memory=3)
def test_single_contribution(self): self.mmt.add_contributions('en', 'it', [(u'Hello world', u'Ciao mondo')]) ctx_source = self.mmt.context_analyzer.get_content(1, 'en', 'it') ctx_target = self.mmt.context_analyzer.get_content(1, 'it', 'en') mem_data = self.mmt.memory.dump().get_content(1, 'en', 'it') self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(0, 0)) self.assertEqual(1, len(ctx_source)) self.assertEqual(1, len(ctx_target)) self.assertEqual(1, len(mem_data)) self.assertInContent(ctx_source, u'Hello world') self.assertInContent(ctx_target, u'Ciao mondo') self.assertInParallelContent(mem_data, u'Hello world', u'Ciao mondo')
class TrainingTest(unittest.TestCase): mmt = ModernMT('TrainingTest') def tearDown(self): self.mmt.stop() self.mmt.delete_engine() @staticmethod def _get_tmx_content(path): with open(path, 'rb') as stream: return ' '.join(stream.read().decode('utf-8').split()) # Assertion def assertInContent(self, content, element): element = ''.join(element.split()) content = [''.join(line.split()) for line in content] self.assertIn(element, content) def assertInParallelContent(self, content, sentence, translation): sentence = ''.join(sentence.split()) translation = ''.join(translation.split()) content = [(''.join(s.split()), ''.join(t.split())) for s, t in content] self.assertIn((sentence, translation), content) def assertTranslateMatch(self, source, target, sentence, chars): translation = self.mmt.translate(source, target, sentence) for c in chars: if c in translation: return raise self.failureException(u'Translation "%s" does not contain any of %s' % (translation, repr(chars))) def assertTranslateFail(self, source, target, sentence): try: self.mmt.translate(source, target, sentence) raise self.failureException('Invalid translation request: %s %s' % (source, target)) except ApiException as e: self.assertIn('HTTP request failed with code 400', e.message) # Tests def test_train_chinese(self): self.mmt.create('en zh %s --neural --debug --no-split --validation-corpora %s' % (TRAIN_FOLDER, DEV_FOLDER)) tm_content = self.mmt.memory.dump() self.assertEqual({1, 2}, self.mmt.context_analyzer.get_domains()) self.assertEqual({1, 2}, tm_content.get_domains()) # Direct TM test ctx_source = self.mmt.context_analyzer.get_content(1, 'en', 'zh') ctx_target = self.mmt.context_analyzer.get_content(1, 'zh', 'en') mem_data = tm_content.get_content(1, 'en', 'zh') self.assertEqual(4, len(ctx_source)) self.assertEqual(4, len(ctx_target)) self.assertInContent(ctx_source, u'The en__zh example one') self.assertInContent(ctx_source, u'This is en__zh example two') self.assertInContent(ctx_source, u'This is en__zh example three') self.assertInContent(ctx_source, u'This is en__zh example four') self.assertInContent(ctx_target, u'en__zh例子之一') self.assertInContent(ctx_target, u'这是en__zh例子二') self.assertInContent(ctx_target, u'這是en__zh例子三') self.assertInContent(ctx_target, u'這是en__zh例子四') self.assertEqual(4, len(mem_data)) self.assertInParallelContent(mem_data, u'The en__zh example one', u'en__zh例子之一') self.assertInParallelContent(mem_data, u'This is en__zh example two', u'这是en__zh例子二') self.assertInParallelContent(mem_data, u'This is en__zh example three', u'這是en__zh例子三') self.assertInParallelContent(mem_data, u'This is en__zh example four', u'這是en__zh例子四') # Reverse TM test ctx_source = self.mmt.context_analyzer.get_content(2, 'en', 'zh') ctx_target = self.mmt.context_analyzer.get_content(2, 'zh', 'en') mem_data = tm_content.get_content(2, 'en', 'zh') self.assertEqual(1, len(ctx_source)) self.assertEqual(1, len(ctx_target)) self.assertInContent(ctx_source, u'The zh__en example one') self.assertInContent(ctx_target, u'zh__en例子之一') self.assertEqual(1, len(mem_data)) self.assertInParallelContent(mem_data, u'The zh__en example one', u'zh__en例子之一') # Runtime test self.mmt.start() self.assertTranslateMatch('en', 'zh', u'This is example', {u'这', u'這', u'是', u'例', u'子'}) self.assertTranslateMatch('en', 'zh-CN', u'This is example', {u'这', u'這', u'是', u'例', u'子'}) self.assertTranslateMatch('en', 'zh-TW', u'This is example', {u'这', u'這', u'是', u'例', u'子'}) self.mmt.add_contributions('en', 'zh', [(u'The en__zh example five', u'en__zh例子五')], 1) ctx_source = self.mmt.context_analyzer.get_content(1, 'en', 'zh') mem_data = self.mmt.memory.dump().get_content(1, 'en', 'zh') self.assertInContent(ctx_source, u'The en__zh example five') self.assertInParallelContent(mem_data, u'The en__zh example five', u'en__zh例子五')
def test_updating_partial_memory(self): self._prepare_partial(context=False, memory=True) self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(0, 7)) self._verify_index_integrity()
def test_updating_partial_all(self): self._prepare_partial() self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(0, 7)) self._verify_index_integrity()
def test_upload_domain(self): corpus = CompactCorpus(os.path.join(RES_FOLDER, 'Memory.en__it.cpt')) self.mmt.import_corpus(compact=corpus.path) self.assertEqual(self.mmt.get_channels(), ModernMT.Channels(7, 0)) self._verify_index_integrity()