def __init__(self, data_utterance_paths, hparams): """Data loader for the PPG->Mel task. Args: data_utterance_paths: A text file containing a list of file paths. hparams: The hyper-parameters. """ self.data_utterance_paths = load_filepaths(data_utterance_paths) self.max_wav_value = hparams.max_wav_value self.sampling_rate = hparams.sampling_rate self.is_full_ppg = hparams.is_full_ppg self.is_append_f0 = hparams.is_append_f0 self.is_cache_feats = hparams.is_cache_feats self.load_feats_from_disk = hparams.load_feats_from_disk self.feats_cache_path = hparams.feats_cache_path self.ppg_subsampling_factor = hparams.ppg_subsampling_factor self.ppg_deps = DependenciesPPG() self.encoder_model_fpath = hparams.encoder_model_fpath if self.is_cache_feats and self.load_feats_from_disk: raise ValueError('If you are loading feats from the disk, do not ' 'rewrite them back!') self.stft = layers.TacotronSTFT( hparams.filter_length, hparams.hop_length, hparams.win_length, hparams.n_acoustic_feat_dims, hparams.sampling_rate, hparams.mel_fmin, hparams.mel_fmax) random.seed(hparams.seed) random.shuffle(self.data_utterance_paths) self.ppg_sequences = [] self.acoustic_sequences = [] self.dvec_sequences = [] if self.load_feats_from_disk: print('Loading data from %s.' % self.feats_cache_path) with open(self.feats_cache_path, 'rb') as f: data = pickle.load(f) self.ppg_sequences = data[0] self.acoustic_sequences = data[1] self.dvec_sequences = data[2] else: for utterance_path in self.data_utterance_paths: ppg_feat_pair_spkr = self.extract_utterance_feats_spkr(utterance_path, self.is_full_ppg) self.ppg_sequences.append(ppg_feat_pair_spkr[0].astype(np.float32)) self.acoustic_sequences.append(ppg_feat_pair_spkr[1]) self.dvec_sequences.append(ppg_feat_pair_spkr[2]) if self.is_cache_feats: print('Caching data to %s.' % self.feats_cache_path) with open(self.feats_cache_path, 'wb') as f: pickle.dump([self.ppg_sequences, self.acoustic_sequences, self.dvec_sequences], f)
def __init__(self, data_utterance_paths, cache_path, hparams, bs, loop): """Data loader for the PPG->Mel task. Args: data_utterance_paths: A text file containing a list of file paths. hparams: The hyper-parameters. """ self.data_utterance_paths = load_filepaths(data_utterance_paths) self.max_wav_value = 32768.0 self.sampling_rate = hparams.sampling_rate self.is_full_ppg = True self.is_append_f0 = False self.is_cache_feats = True self.feats_cache_path = cache_path self.ppg_subsampling_factor = 1 self.ppg_deps = DependenciesPPG() # 20 data = n(4) * b(5) self.n = int(bs) - 1 self.b = 5 self.l = int(loop) - 1 self.stft = layers.TacotronSTFT( hparams.filter_length, hparams.hop_length, hparams.win_length, hparams.n_acoustic_feat_dims, self.sampling_rate, hparams.mel_fmin, hparams.mel_fmax) random.seed(hparams.seed) random.shuffle(self.data_utterance_paths) if self.n > 0: with open(self.feats_cache_path, 'rb') as f: data = pickle.load(f) self.ppg_sequences = data[0] self.acoustic_sequences = data[1] else: self.ppg_sequences = [] self.acoustic_sequences = [] for utterance_path in self.data_utterance_paths[self.n * self.b + self.l * 20 : (self.n+1) * self.b + self.l * 20]: ppg_feat_pair = self.extract_utterance_feats(utterance_path, self.is_full_ppg) self.ppg_sequences.append(ppg_feat_pair[0].astype( np.float32)) self.acoustic_sequences.append(ppg_feat_pair[1]) if self.is_cache_feats: print('Caching data to %s.' % self.feats_cache_path) with open(self.feats_cache_path, 'wb+') as f: pickle.dump([self.ppg_sequences, self.acoustic_sequences], f)