Exemplo n.º 1
0
def setup_loader(ap, is_val=False, verbose=False):
    loader = None
    if not is_val or c.run_eval:
        dataset = GANDataset(ap=ap,
                             items=eval_data if is_val else train_data,
                             seq_len=c.seq_len,
                             hop_len=ap.hop_length,
                             pad_short=c.pad_short,
                             conv_pad=c.conv_pad,
                             is_training=not is_val,
                             return_segments=not is_val,
                             use_noise_augment=c.use_noise_augment,
                             use_cache=c.use_cache,
                             verbose=verbose)
        dataset.shuffle_mapping()
        sampler = DistributedSampler(dataset,
                                     shuffle=True) if num_gpus > 1 else None
        loader = DataLoader(dataset,
                            batch_size=1 if is_val else c.batch_size,
                            shuffle=num_gpus == 0,
                            drop_last=False,
                            sampler=sampler,
                            num_workers=c.num_val_loader_workers
                            if is_val else c.num_loader_workers,
                            pin_memory=False)
    return loader
Exemplo n.º 2
0
Arquivo: gan.py Projeto: gerazov/TTS
    def get_data_loader(  # pylint: disable=no-self-use
        self,
        config: Coqpit,
        ap: AudioProcessor,
        is_eval: True,
        data_items: List,
        verbose: bool,
        num_gpus: int,
    ):
        """Initiate and return the GAN dataloader.

        Args:
            config (Coqpit): Model config.
            ap (AudioProcessor): Audio processor.
            is_eval (True): Set the dataloader for evaluation if true.
            data_items (List): Data samples.
            verbose (bool): Log information if true.
            num_gpus (int): Number of GPUs in use.

        Returns:
            DataLoader: Torch dataloader.
        """
        dataset = GANDataset(
            ap=ap,
            items=data_items,
            seq_len=config.seq_len,
            hop_len=ap.hop_length,
            pad_short=config.pad_short,
            conv_pad=config.conv_pad,
            return_pairs=config.diff_samples_for_G_and_D
            if "diff_samples_for_G_and_D" in config else False,
            is_training=not is_eval,
            return_segments=not is_eval,
            use_noise_augment=config.use_noise_augment,
            use_cache=config.use_cache,
            verbose=verbose,
        )
        dataset.shuffle_mapping()
        sampler = DistributedSampler(dataset,
                                     shuffle=True) if num_gpus > 1 else None
        loader = DataLoader(
            dataset,
            batch_size=1 if is_eval else config.batch_size,
            shuffle=num_gpus == 0,
            drop_last=False,
            sampler=sampler,
            num_workers=config.num_eval_loader_workers
            if is_eval else config.num_loader_workers,
            pin_memory=False,
        )
        return loader
Exemplo n.º 3
0
def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: List, verbose: bool) -> Dataset:
    if config.model.lower() in "gan":
        dataset = GANDataset(
            ap=ap,
            items=data_items,
            seq_len=config.seq_len,
            hop_len=ap.hop_length,
            pad_short=config.pad_short,
            conv_pad=config.conv_pad,
            return_pairs=config.diff_samples_for_G_and_D if "diff_samples_for_G_and_D" in config else False,
            is_training=not is_eval,
            return_segments=not is_eval,
            use_noise_augment=config.use_noise_augment,
            use_cache=config.use_cache,
            verbose=verbose,
        )
        dataset.shuffle_mapping()
    elif config.model.lower() == "wavegrad":
        dataset = WaveGradDataset(
            ap=ap,
            items=data_items,
            seq_len=config.seq_len,
            hop_len=ap.hop_length,
            pad_short=config.pad_short,
            conv_pad=config.conv_pad,
            is_training=not is_eval,
            return_segments=True,
            use_noise_augment=False,
            use_cache=config.use_cache,
            verbose=verbose,
        )
    elif config.model.lower() == "wavernn":
        dataset = WaveRNNDataset(
            ap=ap,
            items=data_items,
            seq_len=config.seq_len,
            hop_len=ap.hop_length,
            pad=config.model_params.pad,
            mode=config.model_params.mode,
            mulaw=config.model_params.mulaw,
            is_training=not is_eval,
            verbose=verbose,
        )
    else:
        raise ValueError(f" [!] Dataset for model {config.model.lower()} cannot be found.")
    return dataset
Exemplo n.º 4
0
def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments,
                     use_noise_augment, use_cache, num_workers):
    ''' run dataloader with given parameters and check conditions '''
    ap = AudioProcessor(**C.audio)
    _, train_items = load_wav_data(test_data_path, 10)
    dataset = GANDataset(ap,
                         train_items,
                         seq_len=seq_len,
                         hop_len=hop_len,
                         pad_short=2000,
                         conv_pad=conv_pad,
                         return_segments=return_segments,
                         use_noise_augment=use_noise_augment,
                         use_cache=use_cache)
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    max_iter = 10
    count_iter = 0

    # return random segments or return the whole audio
    if return_segments:
        for item1, _ in loader:
            feat1, wav1 = item1
            # feat2, wav2 = item2
            expected_feat_shape = (batch_size, ap.num_mels,
                                   seq_len // hop_len + conv_pad * 2)

            # check shapes
            assert np.all(feat1.shape == expected_feat_shape
                          ), f" [!] {feat1.shape} vs {expected_feat_shape}"
            assert (feat1.shape[2] - conv_pad * 2) * hop_len == wav1.shape[2]

            # check feature vs audio match
            if not use_noise_augment:
                for idx in range(batch_size):
                    audio = wav1[idx].squeeze()
                    feat = feat1[idx]
                    mel = ap.melspectrogram(audio)
                    # the first 2 and the last frame is skipped due to the padding
                    # applied in spec. computation.
                    assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum(
                    ) == 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum()}'

            count_iter += 1
            # if count_iter == max_iter:
            #     break
    else:
        for item in loader:
            feat, wav = item
            expected_feat_shape = (batch_size, ap.num_mels,
                                   (wav.shape[-1] // hop_len) + (conv_pad * 2))
            assert np.all(feat.shape == expected_feat_shape
                          ), f" [!] {feat.shape} vs {expected_feat_shape}"
            assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2]
            count_iter += 1
            if count_iter == max_iter:
                break
def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_pairs,
                     return_segments, use_noise_augment, use_cache,
                     num_workers):
    """Run dataloader with given parameters and check conditions"""
    ap = AudioProcessor(**C.audio)
    _, train_items = load_wav_data(test_data_path, 10)
    dataset = GANDataset(
        ap,
        train_items,
        seq_len=seq_len,
        hop_len=hop_len,
        pad_short=2000,
        conv_pad=conv_pad,
        return_pairs=return_pairs,
        return_segments=return_segments,
        use_noise_augment=use_noise_augment,
        use_cache=use_cache,
    )
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    max_iter = 10
    count_iter = 0

    def check_item(feat, wav):
        """Pass a single pair of features and waveform"""
        feat = feat.numpy()
        wav = wav.numpy()
        expected_feat_shape = (batch_size, ap.num_mels,
                               seq_len // hop_len + conv_pad * 2)

        # check shapes
        assert np.all(feat.shape == expected_feat_shape
                      ), f" [!] {feat.shape} vs {expected_feat_shape}"
        assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2]

        # check feature vs audio match
        if not use_noise_augment:
            for idx in range(batch_size):
                audio = wav[idx].squeeze()
                feat = feat[idx]
                mel = ap.melspectrogram(audio)
                # the first 2 and the last 2 frames are skipped due to the padding
                # differences in stft
                max_diff = abs((feat - mel[:, :feat.shape[-1]])[:, 2:-2]).max()
                assert max_diff <= 1e-6, f" [!] {max_diff}"

    # return random segments or return the whole audio
    if return_segments:
        if return_pairs:
            for item1, item2 in loader:
                feat1, wav1 = item1
                feat2, wav2 = item2
                check_item(feat1, wav1)
                check_item(feat2, wav2)
                count_iter += 1
        else:
            for item1 in loader:
                feat1, wav1 = item1
                check_item(feat1, wav1)
                count_iter += 1
    else:
        for item in loader:
            feat, wav = item
            expected_feat_shape = (batch_size, ap.num_mels,
                                   (wav.shape[-1] // hop_len) + (conv_pad * 2))
            assert np.all(feat.shape == expected_feat_shape
                          ), f" [!] {feat.shape} vs {expected_feat_shape}"
            assert (feat.shape[2] - conv_pad * 2) * hop_len == wav.shape[2]
            count_iter += 1
            if count_iter == max_iter:
                break