def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, "eval") log_dir = os.path.join(output_dir, "logs-eval") #Create output path if it doesn"t exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, "wavs"), exist_ok=True) os.makedirs(os.path.join(log_dir, "plots"), exist_ok=True) log(hparams_debug_string()) synth = Tacotron2(checkpoint_path, hparams) #Set inputs batch wise sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)] log("Starting Synthesis") with open(os.path.join(eval_dir, "map.txt"), "w") as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = ["batch_{}_sentence_{}".format(i, j) for j in range(len(texts))] mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None) for elems in zip(texts, mel_filenames, speaker_ids): file.write("|".join([str(x) for x in elems]) + "\n") log("synthesized mel spectrograms at {}".format(eval_dir)) return eval_dir
def load(self): """ Effectively loads the model to GPU memory given the weights file that was passed in the constructor. """ if self._low_mem: raise Exception("Cannot load the synthesizer permanently in low mem mode") tf.reset_default_graph() self._model = Tacotron2(self.checkpoint_fpath, hparams)
def _one_shot_synthesize_spectrograms(checkpoint_fpath, embeddings, texts): # Load the model and forward the inputs tf.reset_default_graph() model = Tacotron2(checkpoint_fpath, hparams) specs, alignments = model.my_synthesize(embeddings, texts) # Detach the outputs (not doing so will cause the process to hang) specs, alignments = [spec.copy() for spec in specs], alignments.copy() model.session.close() return specs, alignments
def run_synthesis(in_dir, out_dir, model_dir, hparams): synth_dir = os.path.join(out_dir, "mels_gta") os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(in_dir, "train.txt") print(hparams_debug_string()) # Load the model in memory weights_dir = os.path.join(model_dir, "taco_pretrained") checkpoint_fpath = tf.train.get_checkpoint_state( weights_dir).model_checkpoint_path checkpoint_fpath = checkpoint_fpath.replace( '/ssd_scratch/cvit/rudra/SV2TTS/', '') checkpoint_fpath = checkpoint_fpath.replace('logs-', '') synth = Tacotron2(checkpoint_fpath, hparams, gta=True) # Load the metadata with open(metadata_filename, encoding="utf-8") as f: metadata = [line.strip().split("|") for line in f][:149736] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / 3600 print("Loaded metadata for {} examples ({:.2f} hours)".format( len(metadata), hours)) #Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] # TODO: come on big boy, fix this # Quick and dirty fix to make sure that all batches have the same size metadata = metadata[:-1] print("Starting Synthesis") mel_dir = os.path.join(in_dir, "mels") embed_dir = os.path.join(in_dir, "embeds") meta_out_fpath = os.path.join(out_dir, "synthesized.txt") with open(meta_out_fpath, "w") as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] embed_filenames = [os.path.join(embed_dir, m[2]) for m in meta] basenames = [ os.path.basename(m).replace(".npy", "").replace("mel-", "") for m in mel_filenames ] synth.synthesize(texts, basenames, synth_dir, None, mel_filenames, embed_filenames) for elems in meta: file.write("|".join([str(x) for x in elems]) + "\n") print("Synthesized mel spectrograms at {}".format(synth_dir)) return meta_out_fpath
def _one_shot_synthesize_spectrograms(checkpoint_fpath, embeddings, texts): # Load the model and forward the inputs tf.compat.v1.reset_default_graph() model = Tacotron2(checkpoint_fpath, hparams, seed=self._seed) specs, alignments = model.my_synthesize(embeddings, texts) # Detach the outputs (not doing so will cause the process to hang) specs, alignments = [spec.copy() for spec in specs], alignments.copy() # Close cuda for this process model.session.close() numba.cuda.select_device(0) numba.cuda.close() return specs, alignments
def __init__(self, checkpoints_dir: Path, verbose=True, low_mem=False): """ Creates a synthesizer ready for inference. The actual model isn't loaded in memory until needed or until load() is called. :param checkpoints_dir: path to the directory containing the checkpoint file as well as the weight files (.data, .index and .meta files) :param verbose: if False, only tensorflow's output will be printed TODO: suppress them too :param low_mem: if True, the model will be loaded in a separate process and its resources will be released after each usage. Adds a large overhead, only recommended if your GPU memory is low (<= 2gb) """ self.verbose = verbose self._low_mem = low_mem # Prepare the model self._model = None # type: Tacotron2 checkpoint_state = tf.train.get_checkpoint_state(checkpoints_dir) if checkpoint_state is None: raise Exception("Could not find any synthesizer weights under %s" % checkpoints_dir) self.checkpoint_fpath = checkpoint_state.model_checkpoint_path if not self._low_mem: #session = self.create_session() tf.reset_default_graph() self._model = Tacotron2(self.checkpoint_fpath, hparams, session=None) if verbose: model_name = checkpoints_dir.parent.name.replace("logs-", "") step = int(self.checkpoint_fpath[self.checkpoint_fpath.rfind('-') + 1:]) print("Found synthesizer \"%s\" trained to step %d" % (model_name, step))