def __init__(self, data_utterance_paths, hparams):
        """Data loader for the PPG->Mel task.

        Args:
            data_utterance_paths: A text file containing a list of file paths.
            hparams: The hyper-parameters.
        """
        self.data_utterance_paths = load_filepaths(data_utterance_paths)
        self.max_wav_value = hparams.max_wav_value
        self.sampling_rate = hparams.sampling_rate
        self.is_full_ppg = hparams.is_full_ppg
        self.is_append_f0 = hparams.is_append_f0
        self.is_cache_feats = hparams.is_cache_feats
        self.load_feats_from_disk = hparams.load_feats_from_disk
        self.feats_cache_path = hparams.feats_cache_path
        self.ppg_subsampling_factor = hparams.ppg_subsampling_factor
        self.ppg_deps = DependenciesPPG()
        self.encoder_model_fpath = hparams.encoder_model_fpath

        if self.is_cache_feats and self.load_feats_from_disk:
            raise ValueError('If you are loading feats from the disk, do not '
                             'rewrite them back!')

        self.stft = layers.TacotronSTFT(
            hparams.filter_length, hparams.hop_length, hparams.win_length,
            hparams.n_acoustic_feat_dims, hparams.sampling_rate,
            hparams.mel_fmin, hparams.mel_fmax)
        random.seed(hparams.seed)
        random.shuffle(self.data_utterance_paths)

        self.ppg_sequences = []
        self.acoustic_sequences = []
        self.dvec_sequences = []
        if self.load_feats_from_disk:
            print('Loading data from %s.' % self.feats_cache_path)
            with open(self.feats_cache_path, 'rb') as f:
                data = pickle.load(f)
            self.ppg_sequences = data[0]
            self.acoustic_sequences = data[1]
            self.dvec_sequences = data[2]
        else:
            for utterance_path in self.data_utterance_paths:
                ppg_feat_pair_spkr = self.extract_utterance_feats_spkr(utterance_path, self.is_full_ppg)
                self.ppg_sequences.append(ppg_feat_pair_spkr[0].astype(np.float32))
                self.acoustic_sequences.append(ppg_feat_pair_spkr[1])
                self.dvec_sequences.append(ppg_feat_pair_spkr[2])
        if self.is_cache_feats:
            print('Caching data to %s.' % self.feats_cache_path)
            with open(self.feats_cache_path, 'wb') as f:
                pickle.dump([self.ppg_sequences, self.acoustic_sequences, self.dvec_sequences], f)
示例#2
0
    def __init__(self, data_utterance_paths, cache_path, hparams, bs, loop):
        """Data loader for the PPG->Mel task.

        Args:
            data_utterance_paths: A text file containing a list of file paths.
            hparams: The hyper-parameters.
        """
        self.data_utterance_paths = load_filepaths(data_utterance_paths)
        self.max_wav_value = 32768.0
        self.sampling_rate = hparams.sampling_rate
        self.is_full_ppg = True
        self.is_append_f0 = False
        self.is_cache_feats = True
        self.feats_cache_path = cache_path
        self.ppg_subsampling_factor = 1
        self.ppg_deps = DependenciesPPG()
        # 20 data = n(4) * b(5)
        self.n = int(bs) - 1
        self.b = 5
        self.l = int(loop) - 1

        self.stft = layers.TacotronSTFT(
            hparams.filter_length, hparams.hop_length, hparams.win_length,
            hparams.n_acoustic_feat_dims, self.sampling_rate,
            hparams.mel_fmin, hparams.mel_fmax)
        random.seed(hparams.seed)
        random.shuffle(self.data_utterance_paths)
        
        if self.n > 0:
            with open(self.feats_cache_path, 'rb') as f:
                data = pickle.load(f)
            self.ppg_sequences = data[0]
            self.acoustic_sequences = data[1]
        else:
            self.ppg_sequences = []
            self.acoustic_sequences = []

        for utterance_path in self.data_utterance_paths[self.n * self.b + self.l * 20 : (self.n+1) * self.b + self.l * 20]:

            ppg_feat_pair = self.extract_utterance_feats(utterance_path,
                                                self.is_full_ppg)
            self.ppg_sequences.append(ppg_feat_pair[0].astype(
                np.float32))
            self.acoustic_sequences.append(ppg_feat_pair[1])
        
        if self.is_cache_feats:
            print('Caching data to %s.' % self.feats_cache_path)
            with open(self.feats_cache_path, 'wb+') as f:
                pickle.dump([self.ppg_sequences, self.acoustic_sequences], f)