def test_load(config_directory): path = os.path.join(config_directory, 'basic_train_config.yaml') train, align = train_yaml_to_config(path) assert len(train.training_configs) == 4 assert isinstance(train.training_configs[0], MonophoneTrainer) assert isinstance(train.training_configs[1], TriphoneTrainer) assert isinstance(train.training_configs[-1], SatTrainer) path = os.path.join(config_directory, 'out_of_order_config.yaml') with pytest.raises(ConfigError): train, align = train_yaml_to_config(path)
def align_corpus(args): if not args.temp_directory: temp_dir = TEMP_DIR else: temp_dir = os.path.expanduser(args.temp_directory) corpus_name = os.path.basename(args.corpus_directory) if corpus_name == '': args.corpus_directory = os.path.dirname(args.corpus_directory) corpus_name = os.path.basename(args.corpus_directory) data_directory = os.path.join(temp_dir, corpus_name) conf_path = os.path.join(data_directory, 'config.yml') if os.path.exists(conf_path): with open(conf_path, 'r') as f: conf = yaml.load(f) else: conf = { 'dirty': False, 'begin': time.time(), 'version': __version__, 'type': 'train_and_align', 'corpus_directory': args.corpus_directory, 'dictionary_path': args.dictionary_path } if getattr(args, 'clean', False) \ or conf['dirty'] or conf['type'] != 'train_and_align' \ or conf['corpus_directory'] != args.corpus_directory \ or conf['version'] != __version__ \ or conf['dictionary_path'] != args.dictionary_path: shutil.rmtree(data_directory, ignore_errors=True) os.makedirs(data_directory, exist_ok=True) os.makedirs(args.output_directory, exist_ok=True) try: corpus = Corpus(args.corpus_directory, data_directory, speaker_characters=args.speaker_characters, num_jobs=getattr(args, 'num_jobs', 3), debug=getattr(args, 'debug', False), ignore_exceptions=getattr(args, 'ignore_exceptions', False)) if corpus.issues_check: print('WARNING: Some issues parsing the corpus were detected. ' 'Please run the validator to get more information.') dictionary = Dictionary(args.dictionary_path, data_directory, word_set=corpus.word_set) utt_oov_path = os.path.join(corpus.split_directory(), 'utterance_oovs.txt') if os.path.exists(utt_oov_path): shutil.copy(utt_oov_path, args.output_directory) oov_path = os.path.join(corpus.split_directory(), 'oovs_found.txt') if os.path.exists(oov_path): shutil.copy(oov_path, args.output_directory) if args.config_path: train_config, align_config = train_yaml_to_config(args.config_path) else: train_config, align_config = load_basic_train() a = TrainableAligner(corpus, dictionary, train_config, align_config, args.output_directory, temp_directory=data_directory) a.verbose = args.verbose a.train() a.export_textgrids() if args.output_model_path is not None: a.save(args.output_model_path) except: conf['dirty'] = True raise finally: with open(conf_path, 'w') as f: yaml.dump(conf, f)
def nnet_ivectors_train_config(config_directory): return train_yaml_to_config(os.path.join(config_directory, 'nnet_ivectors_train.yaml'))
def ivector_train_config(config_directory): return train_yaml_to_config(os.path.join(config_directory, 'ivector_train.yaml'))
def lda_sat_train_config(config_directory): return train_yaml_to_config(os.path.join(config_directory, 'lda_sat_train.yaml'))
def nnet_ivectors_train_config(config_directory): return train_yaml_to_config( os.path.join(config_directory, 'nnet_ivectors_train.yaml'))
def ivector_train_config(config_directory): return train_yaml_to_config( os.path.join(config_directory, 'ivector_train.yaml'))
def lda_sat_train_config(config_directory): return train_yaml_to_config( os.path.join(config_directory, 'lda_sat_train.yaml'))
def align_corpus(args): if not args.temp_directory: temp_dir = TEMP_DIR else: temp_dir = os.path.expanduser(args.temp_directory) corpus_name = os.path.basename(args.corpus_directory) if corpus_name == "": args.corpus_directory = os.path.dirname(args.corpus_directory) corpus_name = os.path.basename(args.corpus_directory) data_directory = os.path.join(temp_dir, corpus_name) conf_path = os.path.join(data_directory, "config.yml") if os.path.exists(conf_path): with open(conf_path, "r") as f: conf = yaml.load(f) else: conf = { "dirty": False, "begin": time.time(), "version": __version__, "type": "train_and_align", "corpus_directory": args.corpus_directory, "dictionary_path": args.dictionary_path, } if ( getattr(args, "clean", False) or conf["dirty"] or conf["type"] != "train_and_align" or conf["corpus_directory"] != args.corpus_directory or conf["version"] != __version__ or conf["dictionary_path"] != args.dictionary_path ): shutil.rmtree(data_directory, ignore_errors=True) os.makedirs(data_directory, exist_ok=True) os.makedirs(args.output_directory, exist_ok=True) try: corpus = Corpus( args.corpus_directory, data_directory, speaker_characters=args.speaker_characters, num_jobs=getattr(args, "num_jobs", 3), debug=getattr(args, "debug", False), ignore_exceptions=getattr(args, "ignore_exceptions", False), ) if corpus.issues_check: print( "WARNING: Some issues parsing the corpus were detected. " "Please run the validator to get more information." ) dictionary = Dictionary( args.dictionary_path, data_directory, word_set=corpus.word_set ) utt_oov_path = os.path.join(corpus.split_directory(), "utterance_oovs.txt") if os.path.exists(utt_oov_path): shutil.copy(utt_oov_path, args.output_directory) oov_path = os.path.join(corpus.split_directory(), "oovs_found.txt") if os.path.exists(oov_path): shutil.copy(oov_path, args.output_directory) if args.config_path: train_config, align_config = train_yaml_to_config(args.config_path) else: train_config, align_config = load_basic_train() a = TrainableAligner( corpus, dictionary, train_config, align_config, args.output_directory, temp_directory=data_directory, ) a.verbose = args.verbose a.train() a.export_textgrids() if args.output_model_path is not None: a.save(args.output_model_path) except: conf["dirty"] = True raise finally: with open(conf_path, "w") as f: yaml.dump(conf, f)
def align_corpus(args): if not args.temp_directory: temp_dir = TEMP_DIR else: temp_dir = os.path.expanduser(args.temp_directory) corpus_name = os.path.basename(args.corpus_directory) if corpus_name == '': args.corpus_directory = os.path.dirname(args.corpus_directory) corpus_name = os.path.basename(args.corpus_directory) data_directory = os.path.join(temp_dir, corpus_name) conf_path = os.path.join(data_directory, 'config.yml') if os.path.exists(conf_path): with open(conf_path, 'r') as f: conf = yaml.load(f) else: conf = {'dirty': False, 'begin': time.time(), 'version': __version__, 'type': 'train_and_align', 'corpus_directory': args.corpus_directory, 'dictionary_path': args.dictionary_path} if getattr(args, 'clean', False) \ or conf['dirty'] or conf['type'] != 'train_and_align' \ or conf['corpus_directory'] != args.corpus_directory \ or conf['version'] != __version__ \ or conf['dictionary_path'] != args.dictionary_path: shutil.rmtree(data_directory, ignore_errors=True) os.makedirs(data_directory, exist_ok=True) os.makedirs(args.output_directory, exist_ok=True) try: corpus = Corpus(args.corpus_directory, data_directory, speaker_characters=args.speaker_characters, num_jobs=getattr(args, 'num_jobs', 3), debug=getattr(args, 'debug', False), ignore_exceptions=getattr(args, 'ignore_exceptions', False)) if corpus.issues_check: print('WARNING: Some issues parsing the corpus were detected. ' 'Please run the validator to get more information.') dictionary = Dictionary(args.dictionary_path, data_directory, word_set=corpus.word_set) utt_oov_path = os.path.join(corpus.split_directory(), 'utterance_oovs.txt') if os.path.exists(utt_oov_path): shutil.copy(utt_oov_path, args.output_directory) oov_path = os.path.join(corpus.split_directory(), 'oovs_found.txt') if os.path.exists(oov_path): shutil.copy(oov_path, args.output_directory) if args.config_path: train_config, align_config = train_yaml_to_config(args.config_path) else: train_config, align_config = load_basic_train() a = TrainableAligner(corpus, dictionary, train_config, align_config, args.output_directory, temp_directory=data_directory) a.verbose = args.verbose a.train() a.export_textgrids() if args.output_model_path is not None: a.save(args.output_model_path) except: conf['dirty'] = True raise finally: with open(conf_path, 'w') as f: yaml.dump(conf, f)