Exemplo n.º 1
0
 def test_fade(self, fade_shape):
     transform = T.Fade(fade_shape=fade_shape)
     waveform = get_whitenoise(sample_rate=8000,
                               duration=0.05,
                               n_channels=2)
     self.assert_grad(transform, [waveform], nondet_tol=1e-10)
Exemplo n.º 2
0
    def test_dither_noise_shaping(self):
        def func(tensor):
            return F.dither(tensor, noise_shaping=True)

        tensor = common_utils.get_whitenoise(n_channels=2)
        self._assert_consistency(func, tensor)
Exemplo n.º 3
0
 def test_resample(self, orig_freq, new_freq):
     transform = T.Resample(orig_freq=orig_freq, new_freq=new_freq)
     waveform = get_whitenoise(sample_rate=8000,
                               duration=0.05,
                               n_channels=2)
     self.assert_grad(transform, [waveform])
Exemplo n.º 4
0
 def test_Fade(self):
     waveform = common_utils.get_whitenoise()
     fade_in_len = 3000
     fade_out_len = 3000
     self._assert_consistency(T.Fade(fade_in_len, fade_out_len), waveform)
Exemplo n.º 5
0
    def test_dither_GPDF(self):
        def func(tensor):
            return F.dither(tensor, 'GPDF')

        tensor = common_utils.get_whitenoise(n_channels=2)
        self._assert_consistency(func, tensor, shape_only=True)
Exemplo n.º 6
0
def get_mock_dataset(dataset_dir):
    """
    dataset_dir: directory of the mocked dataset
    """
    mocked_samples = {}
    os.makedirs(dataset_dir, exist_ok=True)
    sample_rate = 16000  # 16kHz
    seed = 0

    for release in ["release1", "release2", "release3"]:
        data = get_whitenoise(sample_rate=sample_rate,
                              duration=10.00,
                              n_channels=1,
                              dtype="float32",
                              seed=seed)
        if release in ["release1", "release2"]:
            release_dir = os.path.join(
                dataset_dir,
                tedlium._RELEASE_CONFIGS[release]["folder_in_archive"],
                tedlium._RELEASE_CONFIGS[release]["subset"],
            )
        else:
            release_dir = os.path.join(
                dataset_dir,
                tedlium._RELEASE_CONFIGS[release]["folder_in_archive"],
                tedlium._RELEASE_CONFIGS[release]["data_path"],
            )
        os.makedirs(release_dir, exist_ok=True)
        os.makedirs(os.path.join(release_dir, "stm"),
                    exist_ok=True)  # Subfolder for transcripts
        os.makedirs(os.path.join(release_dir, "sph"),
                    exist_ok=True)  # Subfolder for audio files
        filename = f"{release}.sph"
        path = os.path.join(os.path.join(release_dir, "sph"), filename)
        save_wav(path, data, sample_rate)

        trans_filename = f"{release}.stm"
        trans_path = os.path.join(os.path.join(release_dir, "stm"),
                                  trans_filename)
        with open(trans_path, "w") as f:
            f.write("".join(_UTTERANCES))

        dict_filename = f"{release}.dic"
        dict_path = os.path.join(release_dir, dict_filename)
        with open(dict_path, "w") as f:
            f.write("\n".join(_PHONEME))

        # Create a samples list to compare with
        mocked_samples[release] = []
        for utterance in _UTTERANCES:
            talk_id, _, speaker_id, start_time, end_time, identifier, transcript = utterance.split(
                " ", 6)
            start_time = int(float(start_time)) * sample_rate
            end_time = int(float(end_time)) * sample_rate
            sample = (
                data[:, start_time:end_time],
                sample_rate,
                transcript,
                talk_id,
                speaker_id,
                identifier,
            )
            mocked_samples[release].append(sample)
        seed += 1
    return mocked_samples
Exemplo n.º 7
0
 def test_mfcc(self, log_mels):
     sample_rate = 8000
     transform = T.MFCC(sample_rate=sample_rate, log_mels=log_mels)
     waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2)
     self.assert_grad(transform, [waveform])
Exemplo n.º 8
0
 def test_Vol(self):
     waveform = common_utils.get_whitenoise()
     self._assert_consistency(T.Vol(1.1), waveform)
Exemplo n.º 9
0
 def test_SpectralCentroid(self):
     sample_rate = 44100
     waveform = common_utils.get_whitenoise(sample_rate=sample_rate)
     self._assert_consistency(T.SpectralCentroid(sample_rate=sample_rate),
                              waveform)
Exemplo n.º 10
0
    def test_resample_identity(self, resampling_method, sample_rate):
        waveform = get_whitenoise(sample_rate=sample_rate, duration=1)

        resampled = F.resample(waveform, sample_rate, sample_rate)
        self.assertEqual(waveform, resampled)
Exemplo n.º 11
0
def get_mock_dataset(root_dir):
    """
    root_dir: root directory of the mocked data
    """
    mocked_samples = {}

    dataset_dir = Path(root_dir) / "DR-VCTK" / "DR-VCTK"
    dataset_dir.mkdir(parents=True, exist_ok=True)

    config_dir = dataset_dir / "configurations"
    config_dir.mkdir(parents=True, exist_ok=True)

    sample_rate = 16000
    seed = 0

    for subset in _SUBSETS:
        mocked_samples[subset] = []

        for condition in _CONDITIONS:
            audio_dir = dataset_dir / f"{condition}_{subset}set_wav_16k"
            audio_dir.mkdir(parents=True, exist_ok=True)

        config_filepath = config_dir / f"{subset}_ch_log.txt"
        with open(config_filepath, "w") as f:
            if subset == "train":
                f.write("\n")
            f.write("File Name\tMain Source\tChannel Idx\n")

            for speaker_id in _SPEAKER_IDS:
                utterance_id = 1
                for source in _SOURCES:
                    for channel_id in _CHANNEL_IDS:
                        filename = f"p{speaker_id}_{utterance_id:03d}.wav"
                        f.write(f"{filename}\t{source}\t{channel_id}\n")

                        data = {}
                        for condition in _CONDITIONS:
                            data[condition] = get_whitenoise(
                                sample_rate=sample_rate,
                                duration=0.01,
                                n_channels=1,
                                dtype='float32',
                                seed=seed)
                            audio_dir = dataset_dir / f"{condition}_{subset}set_wav_16k"
                            audio_file_path = audio_dir / filename
                            save_wav(audio_file_path, data[condition],
                                     sample_rate)
                            seed += 1

                        sample = (
                            data[_CONDITIONS[0]],
                            sample_rate,
                            data[_CONDITIONS[1]],
                            sample_rate,
                            "p" + str(speaker_id),
                            f"{utterance_id:03d}",
                            source,
                            channel_id,
                        )
                        mocked_samples[subset].append(sample)
                        utterance_id += 1

    return mocked_samples
Exemplo n.º 12
0
 def test_vol(self, gain, gain_type):
     sample_rate = 8000
     transform = T.Vol(gain=gain, gain_type=gain_type)
     waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2)
     self.assert_grad(transform, [waveform])
Exemplo n.º 13
0
 def test_amplitude_to_db(self):
     sample_rate = 8000
     transform = T.AmplitudeToDB()
     waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2)
     self.assert_grad(transform, [waveform])
Exemplo n.º 14
0
 def test_spectral_centroid(self):
     sample_rate = 8000
     transform = T.SpectralCentroid(sample_rate=sample_rate)
     waveform = get_whitenoise(sample_rate=sample_rate, duration=0.05, n_channels=2)
     self.assert_grad(transform, [waveform], nondet_tol=1e-10)
Exemplo n.º 15
0
 def test_lfilter_all_inputs(self):
     torch.random.manual_seed(2434)
     x = get_whitenoise(sample_rate=22050, duration=0.01, n_channels=2)
     a = torch.tensor([0.7, 0.2, 0.6])
     b = torch.tensor([0.4, 0.2, 0.9])
     self.assert_grad(F.lfilter, (x, a, b))
Exemplo n.º 16
0
 def test_PitchShift(self):
     sample_rate = 8000
     n_steps = 4
     waveform = common_utils.get_whitenoise(sample_rate=sample_rate)
     self._assert_consistency(
         T.PitchShift(sample_rate=sample_rate, n_steps=n_steps), waveform)
Exemplo n.º 17
0
 def test_biquad(self):
     torch.random.manual_seed(2434)
     x = get_whitenoise(sample_rate=22050, duration=0.01, n_channels=1)
     a = torch.tensor([0.7, 0.2, 0.6])
     b = torch.tensor([0.4, 0.2, 0.9])
     self.assert_grad(F.biquad, (x, b[0], b[1], b[2], a[0], a[1], a[2]))
Exemplo n.º 18
0
 def test_MuLawEncoding(self):
     tensor = common_utils.get_whitenoise()
     self._assert_consistency(T.MuLawEncoding(), tensor)
Exemplo n.º 19
0
 def test_Resample(self):
     sr1, sr2 = 16000, 8000
     tensor = common_utils.get_whitenoise(sample_rate=sr1)
     self._assert_consistency(T.Resample(float(sr1), float(sr2)), tensor)
Exemplo n.º 20
0
 def test_compute_kaldi_pitch(self):
     sample_rate = 44100
     waveform = common_utils.get_whitenoise(sample_rate=sample_rate)
     self.assert_batch_consistencies(F.compute_kaldi_pitch,
                                     waveform,
                                     sample_rate=sample_rate)