示例#1
0
    def test_batch_Resample(self):
        waveform = torch.randn(2, 2786)

        # Single then transform then batch
        expected = transforms.Resample()(waveform).repeat(3, 1, 1)

        # Batch then transform
        computed = transforms.Resample()(waveform.repeat(3, 1, 1))

        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))
示例#2
0
def get_train_transforms(
        config: object,
        transforms_set: TformsSet = TformsSet.Audtorch) -> object:
    if config.use_mels:
        if transforms_set == TformsSet.TorchAudio:
            trans = tforms_vision.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_torch.MelSpectrogram(sample_rate=config.resampling_rate,
                                            n_fft=config.n_fft,
                                            win_length=config.hop_length,
                                            hop_length=config.hop_length,
                                            f_min=float(config.fmin),
                                            f_max=float(config.fmax),
                                            pad=0,
                                            n_mels=config.n_mels),
                tforms_torch.AmplitudeToDB(stype='power', top_db=80),
                #tforms_aud.RandomCrop(config.max_length_frames),  # Raises "Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead."
            ])
        elif transforms_set == TformsSet.MySet:  # this works
            trans = tforms_aud.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_mine.Spectrogram(config),
                tforms_aud.RandomCrop(config.max_length_frames)
            ])
    else:
        if transforms_set == TformsSet.TorchAudio:  # this works
            trans = tforms_aud.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_torch.Spectrogram(n_fft=config.n_fft,
                                         win_length=config.hop_length,
                                         hop_length=config.hop_length,
                                         pad=0,
                                         power=2,
                                         normalized=True),
                tforms_torch.AmplitudeToDB(stype='power', top_db=80),
                tforms_aud.RandomCrop(config.max_length_frames)
            ])
        elif transforms_set == TformsSet.MySet:  # this works
            trans = tforms_aud.Compose([
                tforms_torch.Resample(orig_freq=44100,
                                      new_freq=config.resampling_rate),
                tforms_mine.Spectrogram(config),
                tforms_aud.RandomCrop(config.max_length_frames)
            ])
    return trans
示例#3
0
    def test_resample_identity(self, resampling_method, sample_rate):
        """When sampling rate is not changed, the transform returns an identical Tensor"""
        waveform = get_whitenoise(sample_rate=sample_rate, duration=1)

        resampler = T.Resample(sample_rate, sample_rate, resampling_method)
        resampled = resampler(waveform)
        self.assertEqual(waveform, resampled)
    def __call__(self, waveform, sample_rate):
        '''
        Args:
            waveform: torch tsr [num_audio_channels, num_time_steps]
            sample_rate: per second sample rate
        Returns:
            batched torch tsr of shape [N, C, T]
        '''
        x = waveform.mean(axis=0, keepdims=True)  # average over channels
        resampler = ta_trans.Resample(sample_rate,
                                      CommonParams.TARGET_SAMPLE_RATE)
        x = resampler(x)
        x = self.mel_trans_ope(x)
        x = x.squeeze(dim=0).T  # # [1, C, T] -> [T, C]

        window_size_in_frames = int(
            round(CommonParams.PATCH_WINDOW_IN_SECONDS /
                  CommonParams.STFT_HOP_LENGTH_SECONDS))
        num_chunks = x.shape[0] // window_size_in_frames

        # reshape into chunks of non-overlapping sliding window
        num_frames_to_use = num_chunks * window_size_in_frames
        x = x[:num_frames_to_use]
        # [num_chunks, 1, window_size, num_freq]
        x = x.reshape(num_chunks, 1, window_size_in_frames, x.shape[-1])
        return x
示例#5
0
def benchmark_resample(
    method,
    waveform,
    sample_rate,
    resample_rate,
    lowpass_filter_width=DEFAULT_LOWPASS_FILTER_WIDTH,
    rolloff=DEFAULT_ROLLOFF,
    resampling_method=DEFAULT_RESAMPLING_METHOD,
    beta=None,
    librosa_type=None,
    iters=5
):
  if method == "functional":
    begin = time.time()
    for _ in range(iters):
      F.resample(waveform, sample_rate, resample_rate, lowpass_filter_width=lowpass_filter_width,
                 rolloff=rolloff, resampling_method=resampling_method)
    elapsed = time.time() - begin
    return elapsed / iters
  elif method == "transforms":
    resampler = T.Resample(sample_rate, resample_rate, lowpass_filter_width=lowpass_filter_width,
                           rolloff=rolloff, resampling_method=resampling_method, dtype=waveform.dtype)
    begin = time.time()
    for _ in range(iters):
      resampler(waveform)
    elapsed = time.time() - begin
    return elapsed / iters
  elif method == "librosa":
    waveform_np = waveform.squeeze().numpy()
    begin = time.time()
    for _ in range(iters):
      librosa.resample(waveform_np, sample_rate, resample_rate, res_type=librosa_type)
    elapsed = time.time() - begin
    return elapsed / iters
示例#6
0
    def __init__(self, fs):
        super(ApplyReverb, self).__init__()

        dir = '/m/cs/scratch/sequentialml/datasets/RIRs/razr'
        fn = 'BRIRs_23-Nov-2019_19-35-31.mat'
        fn = os.path.join(dir, fn)

        self.resampler = tforms_torch.Resample(orig_freq=48000, new_freq=fs)

        import h5py
        tmp_rirs = []
        with h5py.File(fn, 'r') as f:
            #for k, v in f.items():
             #   arrays[k] = np.array(v)

            for i in range(0, len(f['allIRs'])):
                tmp = np.array(f[f['allIRs'][i][0]]).transpose()
                tmp = np.mean(tmp, axis=1)  # mono
                tmp = torch.Tensor(tmp)

                if len(tmp.shape) < 2:
                    tmp = tmp.unsqueeze(0)  # shape is (channels, timesteps)

                tmp = self.resampler(tmp)
                tmp_rirs.append(tmp)

        self.rirs = tmp_rirs

        print("Loaded {} RIRs ", len(self.rirs))
示例#7
0
def get_train_transforms_audio_only(
        config: object,
        transforms_set: TformsSet = TformsSet.Audtorch) -> object:
    trans = tforms_torch.Resample(orig_freq=44100,
                                  new_freq=config.resampling_rate)

    return trans
示例#8
0
def preprocess(file_path='../DATASETS/LJSpeech-1.1/metadata.csv',
               root_dir='../DATASETS/LJSpeech-1.1'):
    with open(file_path, encoding='utf8') as file:
        data_ = [line.strip().split('|') for line in file]
    root_dir = root_dir
    sample_rate = 8000
    resample = transforms.Resample(orig_freq=22050, new_freq=sample_rate)
    spectogram = transforms.Spectrogram(n_fft=1024, hop_length=256)
    to_mel = transforms.MelScale(n_mels=80,
                                 sample_rate=sample_rate,
                                 n_stft=1024 // 2 + 1)

    mel_data = torch.zeros(len(data_), 316, 80)
    mel_len = torch.empty(len(data_), dtype=torch.int)

    for idx, data in enumerate(tqdm(data_)):
        path, text = data[0], data[1]
        path = f'{root_dir}/wavs/{path}.wav'

        data, sample_rate = torchaudio.load(path)
        data = resample(data)
        data = spectogram(data)
        data = to_mel(data)
        data = data.transpose(1, 2).squeeze(0)
        mel_data[idx, :data.size(0)] = data
        mel_len[idx] = data.size(0)

    torch.save(mel_data, f'{root_dir}/mel_data.pt')
    torch.save(mel_len, f'{root_dir}/mel_len.pt')
示例#9
0
    def test_vctk_transform_pipeline(self):
        test_filepath_vctk = common_utils.get_asset_path(
            'VCTK-Corpus', 'wav48', 'p224', 'p224_002.wav')
        wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)

        # rate
        sample = T.Resample(sr_vctk,
                            16000,
                            resampling_method='sinc_interpolation')
        wf_vctk = sample(wf_vctk)
        # dither
        wf_vctk = F.dither(wf_vctk, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath_vctk)
        E.append_effect_to_chain("gain", ["-h"])
        E.append_effect_to_chain("channels", [1])
        E.append_effect_to_chain("rate", [16000])
        E.append_effect_to_chain("gain", ["-rh"])
        E.append_effect_to_chain("dither", ["-s"])
        wf_vctk_sox = E.sox_build_flow_effects()[0]

        torch.testing.assert_allclose(wf_vctk,
                                      wf_vctk_sox,
                                      rtol=1e-03,
                                      atol=1e-03)
示例#10
0
    def test_vctk_transform_pipeline(self):
        test_filepath_vctk = os.path.join(self.test_dirpath,
                                          "assets/VCTK-Corpus/wav48/p224/",
                                          "p224_002.wav")
        wf_vctk, sr_vctk = torchaudio.load(test_filepath_vctk)

        # rate
        sample = T.Resample(sr_vctk,
                            16000,
                            resampling_method='sinc_interpolation')
        wf_vctk = sample(wf_vctk)
        # dither
        wf_vctk = F.dither(wf_vctk, noise_shaping=True)

        E = torchaudio.sox_effects.SoxEffectsChain()
        E.set_input_file(test_filepath_vctk)
        E.append_effect_to_chain("gain", ["-h"])
        E.append_effect_to_chain("channels", [1])
        E.append_effect_to_chain("rate", [16000])
        E.append_effect_to_chain("gain", ["-rh"])
        E.append_effect_to_chain("dither", ["-s"])
        wf_vctk_sox = E.sox_build_flow_effects()[0]

        self.assertTrue(
            torch.allclose(wf_vctk, wf_vctk_sox, rtol=1e-03, atol=1e-03))
示例#11
0
def load_data(path, f, sr=22050, normalize=False, transforms=None):
    '''
    Load in audio, sample-rate and x (either MFCC or spectrogram).
    '''

    #if t1 is None and t2 is None:
    #Load the entire thing.
    audio, sr0 = torchaudio.load(path, normalize=normalize)
    #else:
    #Load from t1 to t2.
    #    sr0 = torchaudio.info(path).sample_rate
    #    frame_offset = int(np.round(t1 * sr0))
    #    num_frames = int(np.round((t2-t1)*sr0))
    #    audio, sr0 = torchaudio.load(path, normalize = normalize, frame_offset = frame_offset, num_frames = num_frames)

    #    assert audio.shape[-1] != 0, f'audio.shape[-1] found to have size 0.'

    #if normalize:
    #    audio, sr0 = torchaudio.sox_effects.apply_effects_tensor(audio, sr0, [['gain', '-n']], channels_first=True)

    #Resample to the desired sample rate.

    if sr0 != sr:
        audio = T.Resample(sr0, sr)(audio)

    if transforms:
        if type(transforms) is not list:
            transforms = [transforms]

        for t in transforms:
            audio, sr = t(audio, sr)

    x = f(audio)
    return audio, sr, x
示例#12
0
    def _decode_example_with_torchaudio(self, value):
        try:
            import torchaudio
            import torchaudio.transforms as T
        except ImportError as err:
            raise ImportError(
                "To support decoding 'mp3' audio files, please install 'torchaudio'."
            ) from err
        try:
            torchaudio.set_audio_backend("sox_io")
        except RuntimeError as err:
            raise ImportError(
                "To support decoding 'mp3' audio files, please install 'sox'."
            ) from err

        array, sampling_rate = torchaudio.load(value)
        if self.sampling_rate and self.sampling_rate != sampling_rate:
            if not hasattr(self, "_resampler"):
                self._resampler = T.Resample(sampling_rate, self.sampling_rate)
            array = self._resampler(array)
            sampling_rate = self.sampling_rate
        array = array.numpy()
        if self.mono:
            array = array.mean(axis=0)
        return array, sampling_rate
示例#13
0
def get_resampling_transform(config):
    '''
    Torchaudio has no support for batches when resmapling.
    :param config:
    :return:
    '''
    return nn.Sequential(
        tforms_torch.Resample(orig_freq=config.original_fs,
                              new_freq=config.new_fs))
示例#14
0
def load_wav_to_torch(full_path, sr):
    sampling_rate, data = read(full_path)
    if sampling_rate != sr:
        data = torch.FloatTensor(data.astype(np.float32))
        data = transforms.Resample(orig_freq=sampling_rate, new_freq=sr)(data)
        return data
    # assert sr == sampling_rate, "{} SR doesn't match {} on path {}".format(
    #     sr, sampling_rate, full_path)
    return torch.FloatTensor(data.astype(np.float32))
示例#15
0
 def __init__(self, orig_fs=44100, **kargs):
     super().__init__(**kargs)
     self.orig_fs = orig_fs
     self.trans = [
         atrans.Resample(self.orig_fs, self.fs),
         atrans.MelSpectrogram(sample_rate=self.fs,
                               n_fft=self.n_fft,
                               hop_length=self.hop_length,
                               n_mels=self.n_mels)
     ]
示例#16
0
 def __init__(self, file_path, root_dir, mel_scale):
     with open(file_path, encoding='utf8') as file:
         self.data = [line.strip().split('|') for line in file]
     self.root_dir = root_dir
     self.resample = transforms.Resample(orig_freq=22050, new_freq=16000)
     self.to_mel = transforms.MelSpectrogram(n_mels=80,
                                             sample_rate=16000,
                                             n_fft=1024,
                                             hop_length=256,
                                             f_max=8000.)
     self.mel_scale = mel_scale
     self.text_pad = _symbol_to_id[' ']
示例#17
0
    def _process(self, wav_path):
        waveform, tmp_sr = torchaudio.load(wav_path, normalization=True)
        if tmp_sr != self.sr:
            waveform = AT.Resample(orig_freq=tmp_sr,
                                   new_freq=self.sr)(waveform)

        name = basename(wav_path).replace(".wav", ".pt").replace(".sph", ".pt")
        if self.vad_mask:
            vad_percent = torch.load(join(self.root, "VAD", name))
            vad_samples = percent_to_onehot(vad_percent, waveform.shape[-1])
            waveform *= vad_samples

        pitch = self._F0(waveform)
        torch.save(pitch, join(self.savepath, name))
    def wavform_to_log_mel(self, waveform, sample_rate):
        '''
        Args:
            waveform: torch tsr [num_audio_channels, num_time_steps]
            sample_rate: per second sample rate
        Returns:
            batched torch tsr of shape [N, C, T]
        '''
        x = waveform.mean(axis=0, keepdims=True)  # average over channels
        resampler = ta_trans.Resample(sample_rate,
                                      CommonParams.TARGET_SAMPLE_RATE)
        x = resampler(x)
        x = self.mel_trans_ope(x)
        x = x.squeeze(dim=0).T  # # [1, C, T] -> [T, C]
        spectrogram = x.cpu().numpy().copy()

        window_size_in_frames = int(
            round(CommonParams.PATCH_WINDOW_IN_SECONDS /
                  CommonParams.STFT_HOP_LENGTH_SECONDS))

        if YAMNetParams.PATCH_HOP_SECONDS == YAMNetParams.PATCH_WINDOW_SECONDS:
            num_chunks = x.shape[0] // window_size_in_frames

            # reshape into chunks of non-overlapping sliding window
            num_frames_to_use = num_chunks * window_size_in_frames
            x = x[:num_frames_to_use]
            # [num_chunks, 1, window_size, num_freq]
            x = x.reshape(num_chunks, 1, window_size_in_frames, x.shape[-1])
        else:  # generate chunks with custom sliding window length `patch_hop_seconds`
            patch_hop_in_frames = int(
                round(YAMNetParams.PATCH_HOP_SECONDS /
                      CommonParams.STFT_HOP_LENGTH_SECONDS))
            # TODO performance optimization with zero copy
            patch_hop_num_chunks = (
                x.shape[0] - window_size_in_frames) // patch_hop_in_frames + 1
            num_frames_to_use = window_size_in_frames + (
                patch_hop_num_chunks - 1) * patch_hop_in_frames
            x = x[:num_frames_to_use]
            x_in_frames = x.reshape(-1, x.shape[-1])
            x_output = np.empty(
                (patch_hop_num_chunks, window_size_in_frames, x.shape[-1]))
            for i in range(patch_hop_num_chunks):
                start_frame = i * patch_hop_in_frames
                x_output[i] = x_in_frames[start_frame:start_frame +
                                          window_size_in_frames]
            x = x_output.reshape(patch_hop_num_chunks, 1,
                                 window_size_in_frames, x.shape[-1])
            x = torch.tensor(x, dtype=torch.float32)
        return x, spectrogram
示例#19
0
 def load(
     path: str,
     sample_rate: int,
     mono: bool = True,
     device: torch.device = torch.device("cpu")) -> Tensor:
     waveform, original_sr = ta.load(path)
     waveform = waveform.to(device)
     if original_sr != sample_rate:
         resample = _transf.Resample(original_sr, sample_rate).to(device)
         waveform = resample(waveform)
     if mono:
         channels_dim = 0
         channels_count = waveform.shape[channels_dim]
         waveform = waveform.sum(dim=channels_dim) / channels_count
     return waveform
示例#20
0
def main():

    # ==============================================================
    # Config and parameters
    config = get_params()

    dataset = get_dataset(config)

    if config.resampling_rate != -1:
        tforms = transforms.Resample(orig_freq=dataset.fs,
                                     new_freq=config.resampling_rate)
    else:
        tforms = None

    start_id = config.start_id * config.chunk_size
    stop_id = start_id + config.chunk_size
    full_output_path = os.path.join(dataset.root, dataset.processed_directory)

    if not os.path.exists(full_output_path):
        os.mkdir(full_output_path)

    # ==============================================================
    # Start

    print("========== WORKER %04d ---> %04d" % (start_id, stop_id))

    result = []
    pbar = tqdm(range(start_id, stop_id, 1))
    ctr = 0
    for i in pbar:
        ctr += 1
        audio, _, fname = dataset[i]

        result.append(
            worker_proccess_audio(audio, fname, full_output_path,
                                  config.filetype))

        pbar.set_description("Processing ids [{} -- {}], Step [{}/{}]".format(
            start_id, stop_id, ctr, config.chunk_size))

    print("========== WORKER %04d ---> %04d         Processed  %d files" %
          (start_id, stop_id, len(result)))
def collate_fn(batch, resample_rate):

    # A data tuple has the form:
    # waveform, sample_rate, label, speaker_id, utterance_number

    tensors, targets = [], []

    # resampling func
    transform = transforms.Resample(orig_freq=16000, new_freq=resample_rate)

    # Gather in lists, and encode labels as indices
    for waveform, _, label, *_ in batch:
        tensors += [transform(waveform)]
        targets += [label_to_index(label)]

    # Group the list of tensors into a batched tensor
    tensors = pad_sequence(tensors)
    targets = torch.stack(targets)

    return tensors, targets
 def __getitem__(self, index):
     audio, sr = load(self.file_paths[index])
     audio = torch.mean(audio, dim=0, keepdim=True)
     if self.sr != sr:
         audio = transforms.Resample(sr, self.sr)(audio)
     mel_spectrogram = transforms.MelSpectrogram(sample_rate=self.sr,
                                                 n_fft=self.n_fft,
                                                 win_length=self.win_length,
                                                 hop_length=self.hop_length,
                                                 n_mels=self.n_mels,
                                                 f_max=self.sr / 2)(audio)
     if self.log_mel:
         offset = 1e-6
         mel_spectrogram = torch.log(mel_spectrogram + offset)
     else:
         mel_spectrogram = transforms.AmplitudeToDB(
             stype="power", top_db=80)(mel_spectrogram)
     if self.augment:
         audio = transforms.FrequencyMasking(freq_mask_param=20)(audio)
         audio = transforms.TimeMasking(time_mask_param=10)(audio)
     label = self.labels[index]
     return mel_spectrogram, label
示例#23
0
 def test_resample(self, orig_freq, new_freq):
     transform = T.Resample(orig_freq=orig_freq, new_freq=new_freq)
     waveform = get_whitenoise(sample_rate=8000, duration=0.05, n_channels=2)
     self.assert_grad(transform, [waveform])
示例#24
0
 def test_Resample(self):
     sr1, sr2 = 16000, 8000
     tensor = common_utils.get_whitenoise(sample_rate=sr1)
     self._assert_consistency(T.Resample(float(sr1), float(sr2)), tensor)
示例#25
0
    def __init__(self,
                 root_dir=constants.DATA_BASE_DIR,
                 result_mode=False,
                 chunk_size=constants.CHUNK_SIZE,
                 model_type='all'):

        assert chunk_size == 1, 'current implementation only supports 1 second chunks'

        fs = [
            f for f in os.listdir(root_dir)
            if f.endswith(constants.VISUAL_SUFFIX)
        ]
        self.data = []
        self.meta = []
        if not result_mode:
            labels = json.loads(
                open(os.path.join(root_dir, 'labels.json'), 'r').read())

        self.ensemble_audio_transforms = [
            IT.Compose([
                ReduceAudioChannels(),
                NormalizeAudio(),
                AT.Resample(constants.AUDIO_SAMPLE_RATE,
                            constants.RESAMPLED_AUDIO_SAMPLE_RATE),
                AT.MFCC(sample_rate=constants.RESAMPLED_AUDIO_SAMPLE_RATE,
                        n_mfcc=constants.N_MFCCS)
            ]),
            IT.Compose([
                ReduceAudioChannels(),
                NormalizeAudio(),
                AT.Resample(constants.AUDIO_SAMPLE_RATE,
                            constants.RESAMPLED_AUDIO_SAMPLE_RATE),
                AT.MFCC(sample_rate=constants.RESAMPLED_AUDIO_SAMPLE_RATE,
                        n_mfcc=constants.N_MFCCS)
            ])
        ]

        self.ensemble_video_transforms = [
            IT.Compose([
                VideoTransform(IT.ToPILImage()),
                VideoTransform(
                    IT.Resize((constants.INPUT_FRAME_WIDTH,
                               constants.INPUT_FRAME_WIDTH))),
                VideoTransform(IT.ToTensor()),
                VideoTransform(
                    IT.Normalize(mean=constants.MEAN, std=constants.STD)),
            ]),
            IT.Compose([
                VideoTransform(IT.ToPILImage()),
                VideoTransform(
                    IT.Resize((constants.INPUT_FRAME_WIDTH,
                               constants.INPUT_FRAME_WIDTH))),
                VideoTransform(IT.ToTensor()),
                VideoTransform(
                    IT.Normalize(mean=constants.MEAN, std=constants.STD)),
            ])
        ]

        self.ensemble_video_post_transforms = [
            lambda x: x.permute([1, 0, 2, 3]),
            lambda x: x.permute([1, 0, 2, 3])
        ]

        self.ensemble_audio_post_transforms = [lambda x: x, lambda x: x]

        if model_type == 'conv3D_MFCCs':
            self.ensemble_video_transforms = [
                self.ensemble_video_transforms[0]
            ]
            self.ensemble_audio_transforms = [
                self.ensemble_audio_transforms[0]
            ]
            self.ensemble_video_post_transforms = [
                self.ensemble_video_post_transforms[0]
            ]
            self.ensemble_audio_post_transforms = [
                self.ensemble_audio_post_transforms[0]
            ]

        for f in fs:
            # break in 1 sec chunks and add label
            audio_file = f[:-len(constants.VISUAL_SUFFIX
                                 )] + constants.AUDIO_SUFFIX
            chunks, meta = self.break_in_chunks(
                os.path.join(root_dir, f), os.path.join(root_dir, audio_file),
                [] if result_mode else labels[f], chunk_size)
            self.data += chunks
            self.meta += meta

        if not result_mode:
            self.print_data_stats()
 def test_Resample(self):
     tensor = torch.rand((2, 1000))
     sample_rate = 100.
     sample_rate_2 = 50.
     self._assert_consistency(T.Resample(sample_rate, sample_rate_2),
                              tensor)
示例#27
0
# load dataset
batch_size = 32
batchsize_for_val = 128
(train_loader, val_loader, test_loader) = fgnh.SpeechCommands_Dataloaders(
    resample_rate=8000,
    batch_size=batch_size,
    batchsize_for_val=batchsize_for_val,
    num_workers=5,
    pin_memory=True)

val_set = val_loader.dataset  # note these are not resampled

# necessary re-sampling
from torchaudio import transforms

transform = transforms.Resample(orig_freq=16000, new_freq=8000)

### values to iterate over
# max distance allowed based on epsilon
# precomputed bounds min and max input values
min_bound = -1.3844940662384033
max_bound = 1.3773366212844849
epsilons = [(max_bound - min_bound) * x * (1. / 256.) for x in [
    1. / 512,
    3. / 1024,
    1. / 256,
    3. / 512,
    1. / 128,
    3. / 256,
    1. / 64,
    3. / 128,
示例#28
0
    def test_resample_cache_dtype(self, resampling_method, dtype):
        """Providing dtype changes the kernel cache dtype"""
        transform = T.Resample(16000, 44100, resampling_method, dtype=dtype)

        assert transform.kernel.dtype == dtype if dtype is not None else torch.float32
示例#29
0
#
# The spectrograms below show the frequency representation of the signal,
# where the x-axis corresponds to the frequency of the original
# waveform (in log scale), y-axis the frequency of the
# plotted waveform, and color intensity the amplitude.
#


sample_rate = 48000
resample_rate = 32000

waveform = get_sine_sweep(sample_rate)
plot_sweep(waveform, sample_rate, title="Original Waveform")
play_audio(waveform, sample_rate)

resampler = T.Resample(sample_rate, resample_rate, dtype=waveform.dtype)
resampled_waveform = resampler(waveform)
plot_sweep(resampled_waveform, resample_rate, title="Resampled Waveform")
play_audio(waveform, sample_rate)


######################################################################
# Controling resampling quality with parameters
# ---------------------------------------------
#
# Lowpass filter width
# ~~~~~~~~~~~~~~~~~~~~
#
# Because the filter used for interpolation extends infinitely, the
# ``lowpass_filter_width`` parameter is used to control for the width of
# the filter to use to window the interpolation. It is also referred to as
示例#30
0
    def test_resample_identity(self, resampling_method, sample_rate):
        waveform = get_whitenoise(sample_rate=sample_rate, duration=1)

        resampler = T.Resample(sample_rate, sample_rate)
        resampled = resampler(waveform)
        self.assertEqual(waveform, resampled)