def test_trainer_with_data(self): #setup_logging(1,None) corpus = TrainingCorpus() corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "fra.dict"), mappingfile=os.path.join(RESOURCES_PATH,"models","models-fra","monophones.repl" )) corpus.lang = "fra" corpus.datatrainer.protodir = os.path.join(HERE,"protos") corpus.add_file( os.path.join(HERE,"F_F_B003-P8-palign.TextGrid"), os.path.join(HERE,"F_F_B003-P8.wav") ) corpus.add_file( os.path.join(HERE,"track_0001-phon.xra"), os.path.join(HERE,"track_0001.wav") ) corpus.add_corpus( os.path.join(SAMPLES_PATH,"samples-fra") ) trainer = HTKModelTrainer( corpus ) acmodel = trainer.training_recipe( delete=True )
def test_trainingcorpus(self): corpus = TrainingCorpus() self.assertEqual( corpus.phonemap.map_entry('#'), "#" ) corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "nan.dict")) self.assertEqual( corpus.monophones.get_size(), 44 ) corpus.fix_resources(dictfile=os.path.join(RESOURCES_PATH, "dict", "nan.dict"), mappingfile=os.path.join(RESOURCES_PATH,"models","models-nan","monophones.repl" )) self.assertEqual( corpus.phonemap.map_entry('#'), "sil" ) self.assertFalse( corpus.add_file( "toto", "toto" ) ) self.assertTrue( corpus.add_file( os.path.join(HERE,"F_F_B003-P8-palign.TextGrid"), os.path.join(HERE,"F_F_B003-P8.wav") ) ) corpus.datatrainer.delete()
# - protodir=None (in) # - protofilename=DEFAULT_PROTO_FILENAME (out) datatrainer = DataTrainer() datatrainer.create( workdir=args.t, protodir=args.p ) # --------------------------------- # 2. Create a Corpus Manager # it manages the set of training data: # - establishes the list of phonemes (from the dict); # - converts the input annotated data into the HTK-specific data format; # - codes the audio data. corpus = TrainingCorpus( datatrainer, lang=args.l ) corpus.fix_resources( dictfile=args.r, mappingfile=args.m ) if args.i: for entry in args.i: if os.path.isdir( entry ): corpus.add_corpus( entry ) else: logging.info('[ WARNING ] Ignore the given entry: %s'%entry) # --------------------------------- # 3. Acoustic Model Training trainer = HTKModelTrainer( corpus ) DELETE = False if args.t is None: