def train_post(): dataset_name = request.form["path"] epochs = request.form["epochs"] batch_size = request.form["batch_size"] early_stopping = request.form.get("early_stopping") is not None metadata_path = os.path.join(paths["datasets"], dataset_name, METADATA_FILE) audio_folder = os.path.join(paths["datasets"], dataset_name, AUDIO_FOLDER) checkpoint_folder = os.path.join(paths["models"], dataset_name) pretrained_folder = os.path.join(paths["pretrained"], dataset_name) if request.files.get("pretrained_model"): os.makedirs(pretrained_folder, exist_ok=True) transfer_learning_path = os.path.join(pretrained_folder, "pretrained.pt") request.files["pretrained_model"].save(transfer_learning_path) else: transfer_learning_path = None start_progress_thread( train, metadata_path=metadata_path, dataset_directory=audio_folder, output_directory=checkpoint_folder, transfer_learning_path=transfer_learning_path, epochs=int(epochs), batch_size=int(batch_size), early_stopping=early_stopping, ) return render_template("progress.html", next_url=get_next_url(URLS, request.path))
def create_dataset_post(): min_confidence = request.form["confidence"] if request.form["name"]: output_folder = os.path.join(paths["datasets"], request.form["name"]) if os.path.exists(output_folder): request.files = None raise Exception("Dataset name taken") os.makedirs(output_folder, exist_ok=True) text_path = os.path.join(output_folder, TEXT_FILE) audio_path = os.path.join(output_folder, AUDIO_FILE) forced_alignment_path = os.path.join(output_folder, ALIGNMENT_FILE) output_path = os.path.join(output_folder, AUDIO_FOLDER) label_path = os.path.join(output_folder, METADATA_FILE) info_path = os.path.join(output_folder, INFO_FILE) request.files["text_file"].save(text_path) request.files["audio_file"].save(audio_path) start_progress_thread( create_dataset, text_path=text_path, audio_path=audio_path, forced_alignment_path=forced_alignment_path, output_path=output_path, label_path=label_path, info_path=info_path, min_confidence=float(min_confidence), ) else: output_folder = os.path.join(paths["datasets"], request.form["path"]) suffix = get_suffix() text_path = os.path.join(output_folder, f"text-{suffix}.txt") audio_path = os.path.join(output_folder, f"audio-{suffix}.mp3") forced_alignment_path = os.path.join(output_folder, f"align-{suffix}.json") info_path = os.path.join(output_folder, INFO_FILE) request.files["text_file"].save(text_path) request.files["audio_file"].save(audio_path) existing_output_path = os.path.join(output_folder, AUDIO_FOLDER) existing_label_path = os.path.join(output_folder, METADATA_FILE) start_progress_thread( extend_existing_dataset, text_path=text_path, audio_path=audio_path, forced_alignment_path=forced_alignment_path, output_path=existing_output_path, label_path=existing_label_path, suffix=suffix, info_path=info_path, min_confidence=float(min_confidence), ) return render_template("progress.html", next_url=get_next_url(URLS, request.path))
def train_post(): language = request.form["language"] symbols = get_symbols(language) dataset_name = request.form["dataset"] epochs = request.form["epochs"] batch_size = request.form["batch_size"] early_stopping = request.form.get("early_stopping") is not None iters_per_checkpoint = request.form["checkpoint_frequency"] iters_per_backup_checkpoint = request.form["backup_checkpoint_frequency"] train_size = 1 - float(request.form["validation_size"]) alignment_sentence = request.form["alignment_sentence"] multi_gpu = request.form.get("multi_gpu") is not None checkpoint_path = (os.path.join(paths["models"], dataset_name, request.form["checkpoint"]) if request.form.get("checkpoint") else None) metadata_path = os.path.join(paths["datasets"], dataset_name, METADATA_FILE) use_metadata = os.path.isfile(metadata_path) trainlist_path = os.path.join(paths["datasets"], dataset_name, TRAIN_FILE) vallist_path = os.path.join(paths["datasets"], dataset_name, VALIDATION_FILE) audio_folder = os.path.join(paths["datasets"], dataset_name, AUDIO_FOLDER) checkpoint_folder = os.path.join(paths["models"], dataset_name) if request.files.get("pretrained_model"): transfer_learning_path = os.path.join("data", "pretrained.pt") request.files["pretrained_model"].save(transfer_learning_path) else: transfer_learning_path = None start_progress_thread( train, metadata_path=metadata_path if use_metadata else None, trainlist_path=trainlist_path if not use_metadata else None, vallist_path=vallist_path if not use_metadata else None, audio_directory=audio_folder, output_directory=checkpoint_folder, symbols=symbols, checkpoint_path=checkpoint_path, transfer_learning_path=transfer_learning_path, epochs=int(epochs), batch_size=int(batch_size), early_stopping=early_stopping, multi_gpu=multi_gpu, iters_per_checkpoint=int(iters_per_checkpoint), iters_per_backup_checkpoint=int(iters_per_backup_checkpoint), train_size=train_size, alignment_sentence=alignment_sentence, ) return render_template("progress.html", next_url=get_next_url(URLS, request.path), voice=Path(checkpoint_folder).stem)
def upload_dataset(): dataset = request.files["dataset"] dataset.save(TEMP_DATASET_UPLOAD) dataset_name = request.values["name"] dataset_directory = os.path.join(paths["datasets"], dataset_name) audio_folder = os.path.join(dataset_directory, AUDIO_FOLDER) assert not os.path.isdir(dataset_directory), "Output folder already exists" start_progress_thread(import_dataset, dataset=TEMP_DATASET_UPLOAD, dataset_directory=dataset_directory, audio_folder=audio_folder) return render_template("progress.html", next_url="/import-export")
def train_hifigan_post(): dataset_name = request.form["dataset"] epochs = request.form["epochs"] batch_size = request.form["batch_size"] iters_per_checkpoint = request.form["checkpoint_frequency"] iters_per_backup_checkpoint = request.form["backup_checkpoint_frequency"] train_size = 1 - float(request.form["validation_size"]) audio_folder = os.path.join(paths["datasets"], dataset_name, AUDIO_FOLDER) output_directory = os.path.join(paths["hifigan_training"], dataset_name) if request.form.get("checkpoint_iteration"): checkpoint_g = os.path.join( paths["hifigan_training"], dataset_name, f"g_{request.form['checkpoint_iteration']}") checkpoint_do = os.path.join( paths["hifigan_training"], dataset_name, f"do_{request.form['checkpoint_iteration']}") elif request.files.get("pretrained_model_g"): checkpoint_g = os.path.join("data", "pretrained_model_g.pt") checkpoint_do = os.path.join("data", "pretrained_model_do.pt") request.files["pretrained_model_g"].save(checkpoint_g) request.files["pretrained_model_do"].save(checkpoint_do) else: checkpoint_g = None checkpoint_do = None start_progress_thread( train_hifigan, audio_folder=audio_folder, output_directory=output_directory, checkpoint_g=checkpoint_g, checkpoint_do=checkpoint_do, epochs=int(epochs), batch_size=int(batch_size), iters_per_checkpoint=int(iters_per_checkpoint), iters_per_backup_checkpoint=int(iters_per_backup_checkpoint), train_size=train_size, ) return render_template("progress.html", next_url="/synthesis")
def create_dataset_post(): min_confidence = float(request.form["confidence"]) language = request.form["language"] combine_clips = request.form.get("combine_clips") is not None min_length = float(request.form["min_length"]) max_length = float(request.form["max_length"]) transcription_model = ( Silero(language) if language in SILERO_LANGUAGES else DeepSpeech( os.path.join(paths["languages"], language, TRANSCRIPTION_MODEL))) symbols = get_symbols(language) text_file = SUBTITLE_FILE if request.files["text_file"].filename.endswith( ".srt") else TEXT_FILE if request.form["name"]: output_folder = os.path.join(paths["datasets"], request.form["name"]) if os.path.exists(output_folder): request.files = None raise Exception("Dataset name taken") os.makedirs(output_folder, exist_ok=True) text_path = os.path.join(output_folder, text_file) audio_path = os.path.join(output_folder, request.files["audio_file"].filename) with open(text_path, "w", encoding=CHARACTER_ENCODING) as f: f.write(request.files["text_file"].read().decode( CHARACTER_ENCODING, "ignore").replace("\r\n", "\n")) request.files["audio_file"].save(audio_path) start_progress_thread( create_dataset, text_path=text_path, audio_path=audio_path, transcription_model=transcription_model, output_folder=output_folder, min_length=min_length, max_length=max_length, min_confidence=min_confidence, combine_clips=combine_clips, symbols=symbols, ) else: output_folder = os.path.join(paths["datasets"], request.form["dataset"]) suffix = get_suffix() text_path = os.path.join(output_folder, add_suffix(text_file, suffix)) audio_path = os.path.join( output_folder, add_suffix(request.files["audio_file"].filename, suffix)) with open(text_path, "w", encoding=CHARACTER_ENCODING) as f: f.write(request.files["text_file"].read().decode( CHARACTER_ENCODING, "ignore").replace("\r\n", "\n")) request.files["audio_file"].save(audio_path) start_progress_thread( extend_existing_dataset, text_path=text_path, audio_path=audio_path, transcription_model=transcription_model, output_folder=output_folder, suffix=suffix, min_length=min_length, max_length=max_length, min_confidence=min_confidence, combine_clips=combine_clips, symbols=symbols, ) return render_template("progress.html", next_url=get_next_url(URLS, request.path))