def train(opts, dirs): ## Handle corpus: print(" -- Gather corpus") ## Get names of directories containing corpus data (all txt and wav): corpora = [] if opts.command_line_corpus: for location in opts.command_line_corpus: assert os.path.isdir(location) corpora.append(location) else: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "txt")) corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "wav")) # additional large text corpus: if opts.text_corpus_name: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.TEXT_CORPORA, opts.text_corpus_name)) # Set file number if opts.file_num: file_num = int(opts.file_num) else: file_num = float("inf") # Get names of individual txt and wav files: voice_data = [] for c in corpora: count = 0 file_list = sorted(os.listdir(c)) if opts.shuffle: random.seed(1) random.shuffle(file_list) for f in file_list: if '._' not in f: voice_data.append(os.path.join(c, f)) count += 1 # Stop appending voice data at file_num if count >= file_num: break corpus = Corpus.Corpus(voice_data) print(" -- Train voice") voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, \ dirs, clear_old_data=opts.clear, max_cores=opts.max_cores) ## Train the voice (i.e. train processors in pipeline context): voice.train(corpus)
def train(opts, dirs): ## Handle corpus: print " -- Gather corpus" ## Get names of directories containing corpus data (all txt and wav): corpora = [] if opts.command_line_corpus: for location in opts.command_line_corpus: assert os.path.isdir(location) corpora.append(location) else: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "txt")) corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "wav")) # additional large text corpus: if opts.text_corpus_name: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.TEXT_CORPORA, opts.text_corpus_name)) ## Get names of individual txt and wav files: voice_data = [] for c in corpora: for f in os.listdir(c): voice_data.append(os.path.join(c, f)) corpus = Corpus.Corpus(voice_data) print " -- Train voice" voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, \ dirs, clear_old_data=opts.clear, max_cores=opts.max_cores) ## Train the voice (i.e. train processors in pipeline context): voice.train(corpus)