示例#1
0
 def test_spanish_tune(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.es")) as f:
         self.assertEqual(
             f.read().strip(),
             "s16-l\ns17-l\ns18-l\ns19-l\ns4-m\ns5-m\ns6-m\ns7-m\ns4-m\ns5-m"
         )
示例#2
0
 def test_english_tune(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.en")) as f:
         self.assertEqual(
             f.read().strip(),
             "e16-l\ne17-l\ne18-l\ne19-l\ne4-m\ne5-m\ne6-m\ne7-m\ne4-m\ne5-m"
         )
示例#3
0
 def test_spanish_train(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files",
                            "train.en-es.es")) as f:
         self.assertEqual(
             f.read().strip(),
             "s1-l\ns2-l\ns3-l\ns4-l\ns5-l\ns6-l\ns7-l\ns8-l\ns9-l\ns10-l\ns11-l\ns12-l\ns13-l\ns14-l\ns15-l\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m"
         )
示例#4
0
 def test_english_train(self):
     print self.cconf
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files",
                            "train.en-es.en")) as f:
         self.assertEqual(
             f.read().strip(),
             "e1-l\ne2-l\ne3-l\ne4-l\ne5-l\ne6-l\ne7-l\ne8-l\ne9-l\ne10-l\ne11-l\ne12-l\ne13-l\ne14-l\ne15-l\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m"
         )
示例#5
0
def create_corpora(args):
    conf = fetch_config(args)

    if args.t_corpora_config is None:
        cconf = conf.system.files.data.corpora
    elif os.path.isfile(args.t_corpora_config):
        cconf = CorporaConfig(ingest_yaml_doc(args.t_corpora_config))
    else:
        logger.error(args.t_corpora_config + " doesn't exist")
        return

    if os.path.exists(cconf.container_path):
        logger.error(cconf.container_path + " already exists. Please delete it or change the container and try again")
        return

    create_hybrid_corpora(cconf)
示例#6
0
def create_corpora(args):
    conf = fetch_config(args)

    if args.t_corpora_config is None:
        cconf = conf.system.files.data.corpora
    elif os.path.isfile(args.t_corpora_config):
        cconf = CorporaConfig(args.t_corpora_config)
    else:
        logger.error(args.t_corpora_config + " doesn't exist")
        return

    if os.path.exists(cconf.container_path):
        logger.error(cconf.container_path +
                     " already exists. Please remove and try again")
        return

    create_hybrid_corpora(cconf)
示例#7
0
 def test_english_test(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.en")) as f:
         self.assertEqual(f.read().strip(),
                          "e20-l\ne20-l\ne8-m\ne9-m\ne10-m")
示例#8
0
 def test_english_train(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files",
                            "train.en-es.en")) as f:
         self.assertEqual(f.read().strip(), "hello\ne1-m\ne2-m\ne3-m")
示例#9
0
 def test_spanish_train(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.es")) as f:
         self.assertEqual(f.read().strip(), "s1-l\ns2-l\ns3-l\ns4-l\ns5-l\ns6-l\ns7-l\ns8-l\ns9-l\ns10-l\ns11-l\ns12-l\ns13-l\ns14-l\ns15-l\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m\ns1-m\ns2-m\ns3-m")
示例#10
0
 def test_spanish_test(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.es")) as f:
         self.assertEqual(f.read().strip(), "")
示例#11
0
 def test_english_train(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.en")) as f:
         self.assertEqual(f.read().strip(), "hello")
示例#12
0
 def test_english_test(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.en")) as f:
         self.assertEqual(f.read().strip(), "e20-l\ne20-l\ne8-m\ne9-m\ne10-m")
示例#13
0
 def test_spanish_tune(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.es")) as f:
         self.assertEqual(f.read().strip(), "s16-l\ns17-l\ns18-l\ns19-l\ns4-m\ns5-m\ns6-m\ns7-m\ns4-m\ns5-m")
示例#14
0
 def test_english_tune(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "tune.en-es.en")) as f:
         self.assertEqual(f.read().strip(), "e16-l\ne17-l\ne18-l\ne19-l\ne4-m\ne5-m\ne6-m\ne7-m\ne4-m\ne5-m")
示例#15
0
 def test_spanish_test(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "test.en-es.es")) as f:
         self.assertEqual(f.read().strip(),
                          "s20-l\ns20-l\ns8-m\ns9-m\ns10-m")
示例#16
0
 def test_spanish_train(self):
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files",
                            "train.en-es.es")) as f:
         self.assertEqual(f.read().strip(), "hola")
示例#17
0
 def test_english_train(self):
     print self.cconf
     create_hybrid_corpora(self.cconf)
     with open(os.path.join(TEST_PATH, "temp_files", "train.en-es.en")) as f:
         self.assertEqual(f.read().strip(), "e1-l\ne2-l\ne3-l\ne4-l\ne5-l\ne6-l\ne7-l\ne8-l\ne9-l\ne10-l\ne11-l\ne12-l\ne13-l\ne14-l\ne15-l\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m\ne1-m\ne2-m\ne3-m")