示例#1
0
 def test_save_channels_first(self, channels_first):
     """channels_first swaps axes"""
     path = self.get_temp_path('data.wav')
     data = get_wav_data(
         'int16', 2, channels_first=channels_first, normalize=False)
     sox_io_backend.save(
         path, data, 8000, channels_first=channels_first)
     found = load_wav(path, normalize=False)[0]
     expected = data if channels_first else data.transpose(1, 0)
     self.assertEqual(found, expected)
示例#2
0
 def read(self, frames, dtype, always_2d):
     assert always_2d
     data = get_wav_data(
         dtype,
         self._params["num_channels"],
         normalize=False,
         num_frames=self._params["num_frames"],
         channels_first=False,
     ).numpy()
     return data[self._start:self._start + frames]
示例#3
0
 def assert_wav(self, dtype, sample_rate, num_channels, num_frames):
     """`soundfile_backend.save` can save wav format."""
     path = self.get_temp_path("data.wav")
     expected = get_wav_data(
         dtype, num_channels, num_frames=num_frames, normalize=False
     )
     soundfile_backend.save(path, expected, sample_rate)
     found, sr = load_wav(path, normalize=False)
     assert sample_rate == sr
     self.assertEqual(found, expected)
示例#4
0
 def test_save_noncontiguous(self, dtype):
     """Noncontiguous tensors are saved correctly"""
     path = self.get_temp_path('data.wav')
     enc, bps = get_enc_params(dtype)
     expected = get_wav_data(dtype, 4, normalize=False)[::2, ::2]
     assert not expected.is_contiguous()
     sox_io_backend.save(
         path, expected, 8000, encoding=enc, bits_per_sample=bps)
     found = load_wav(path, normalize=False)[0]
     self.assertEqual(found, expected)
示例#5
0
 def test_wav(self, dtype, sample_rate, num_channels):
     """save/load round trip should not degrade data for wav formats"""
     original = get_wav_data(dtype, num_channels, normalize=False)
     data = original
     for i in range(10):
         path = self.get_temp_path(f'{i}.wav')
         sox_io_backend.save(path, data, sample_rate)
         data, sr = sox_io_backend.load(path, normalize=False)
         assert sr == sample_rate
         self.assertEqual(original, data)
示例#6
0
    def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first):
        """`apply_effects_file` without effects should return identical data as input"""
        path = self.get_temp_path('input.wav')
        expected = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(path, expected, sample_rate, channels_first=channels_first)

        found, output_sample_rate = sox_effects.apply_effects_file(
            path, [], normalize=False, channels_first=channels_first)

        assert output_sample_rate == sample_rate
        self.assertEqual(expected, found)
示例#7
0
 def test_wav_multiple_channels(self, dtype, sample_rate, num_channels):
     """`sox_io_backend.info` can check wav file with channels more than 2 correctly"""
     duration = 1
     path = self.get_temp_path('data.wav')
     data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate)
     save_wav(path, data, sample_rate)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == sox_utils.get_bit_depth(dtype)
     assert info.encoding == get_encoding('wav', dtype)
示例#8
0
    def assert_wav(self, dtype, sample_rate, num_channels, normalize, duration):
        """`sox_io_backend.load` can load wav format correctly.

        Wav data loaded with sox_io backend should match those with scipy
        """
        path = self.get_temp_path('reference.wav')
        data = get_wav_data(dtype, num_channels, normalize=normalize, num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        expected = load_wav(path, normalize=normalize)[0]
        data, sr = sox_io_backend.load(path, normalize=normalize)
        assert sr == sample_rate
        self.assertEqual(data, expected)
示例#9
0
    def _test_fileobj(self, ext):
        """Loading audio via file-like object works"""
        sample_rate = 16000
        path = self.get_temp_path(f'test.{ext}')

        data = get_wav_data('float32', num_channels=2).numpy().T
        soundfile.write(path, data, sample_rate)
        expected = soundfile.read(path, dtype='float32')[0].T

        with open(path, 'rb') as fileobj:
            found, sr = soundfile_backend.load(fileobj)
        assert sr == sample_rate
        self.assertEqual(expected, found)
示例#10
0
 def test_wav_multiple_channels(self, dtype, sample_rate, num_channels):
     """`soundfile_backend.info` can check wav file with channels more than 2 correctly"""
     duration = 1
     path = self.get_temp_path("data.wav")
     data = get_wav_data(dtype,
                         num_channels,
                         normalize=False,
                         num_frames=duration * sample_rate)
     save_wav(path, data, sample_rate)
     info = soundfile_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
示例#11
0
 def test_flac(self, sample_rate, num_channels, compression_level):
     """save/load round trip should not degrade data for flac formats"""
     original = get_wav_data('float32', num_channels)
     data = original
     for i in range(10):
         path = self.get_temp_path(f'{i}.flac')
         sox_io_backend.save(path,
                             data,
                             sample_rate,
                             compression=compression_level)
         data, sr = sox_io_backend.load(path)
         assert sr == sample_rate
         self.assertEqual(original, data)
示例#12
0
 def test_wav(self, dtype, sample_rate, num_channels):
     """`sox_io_backend.info` can check wav file correctly"""
     duration = 1
     path = self.get_temp_path('data.wav')
     data = get_wav_data(dtype,
                         num_channels,
                         normalize=False,
                         num_frames=duration * sample_rate)
     save_wav(path, data, sample_rate)
     info = sox_io_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
示例#13
0
    def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first):
        """`apply_effects_tensor` without effects should return identical data as input"""
        original = get_wav_data(dtype, num_channels, channels_first=channels_first)
        expected = original.clone()
        found, output_sample_rate = sox_effects.apply_effects_tensor(
            expected, sample_rate, [], channels_first)

        assert output_sample_rate == sample_rate
        # SoxEffect should not alter the input Tensor object
        self.assertEqual(original, expected)
        # SoxEffect should not return the same Tensor object
        assert expected is not found
        # Returned Tensor should equal to the input Tensor
        self.assertEqual(expected, found)
示例#14
0
    def test_apply_effects_file(self, args):
        """`apply_effects_file` should return identical data as sox command"""
        dtype = 'int32'
        channels_first = True
        effects = args['effects']
        num_channels = args.get("num_channels", 2)
        input_sr = args.get("input_sample_rate", 8000)

        input_path = self.get_temp_path('input.wav')
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)

        _found, _sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)
示例#15
0
    def _assert_vorbis(self, sample_rate, num_channels, quality_level,
                       duration):
        """`sox_io_backend.save` can save vorbis format.

        This test takes the same strategy as mp3 to compare the result
        """
        src_path = self.get_temp_path('1.reference.wav')
        vbs_path = self.get_temp_path('2.1.torchaudio.vorbis')
        wav_path = self.get_temp_path('2.2.torchaudio.wav')
        vbs_path_sox = self.get_temp_path('3.1.sox.vorbis')
        wav_path_sox = self.get_temp_path('3.2.sox.wav')

        # 1. Generate original wav
        data = get_wav_data('int16',
                            num_channels,
                            normalize=False,
                            num_frames=duration * sample_rate)
        save_wav(src_path, data, sample_rate)
        # 2.1. Convert the original wav to vorbis with torchaudio
        sox_io_backend.save(vbs_path,
                            load_wav(src_path)[0],
                            sample_rate,
                            compression=quality_level,
                            dtype=None)
        # 2.2. Convert the vorbis to wav with Sox
        sox_utils.convert_audio_file(vbs_path, wav_path)
        # 2.3. Load
        found = load_wav(wav_path)[0]

        # 3.1. Convert the original wav to vorbis with SoX
        sox_utils.convert_audio_file(src_path,
                                     vbs_path_sox,
                                     compression=quality_level)
        # 3.2. Convert the vorbis to wav with Sox
        sox_utils.convert_audio_file(vbs_path_sox, wav_path_sox)
        # 3.3. Load
        expected = load_wav(wav_path_sox)[0]

        # sox's vorbis encoding has some random boundary effect, which cause small number of
        # samples yields higher descrepency than the others.
        # so we allow small portions of data to be outside of absolute torelance.
        # make sure to pass somewhat long duration
        atol = 1.0e-4
        max_failure_allowed = 0.01  # this percent of samples are allowed to outside of atol.
        failure_ratio = (
            (found - expected).abs() > atol).sum().item() / found.numel()
        if failure_ratio > max_failure_allowed:
            # it's failed and this will give a better error message.
            self.assertEqual(found, expected, atol=atol, rtol=1.3e-6)
示例#16
0
 def test_wav(self, dtype, sample_rate, num_channels):
     """`soundfile_backend.info` can check wav file correctly"""
     duration = 1
     path = self.get_temp_path("data.wav")
     data = get_wav_data(dtype,
                         num_channels,
                         normalize=False,
                         num_frames=duration * sample_rate)
     save_wav(path, data, sample_rate)
     info = soundfile_backend.info(path)
     assert info.sample_rate == sample_rate
     assert info.num_frames == sample_rate * duration
     assert info.num_channels == num_channels
     assert info.bits_per_sample == get_bits_per_sample("wav", dtype)
     assert info.encoding == get_encoding("wav", dtype)
示例#17
0
    def test_frame(self, frame_offset, num_frames):
        """num_frames and frame_offset correctly specify the region of data"""
        sample_rate = 8000
        audio_file = 'test.wav'
        audio_path = self.get_temp_path(audio_file)

        original = get_wav_data('float32', num_channels=2)
        save_wav(audio_path, original, sample_rate)
        frame_end = None if num_frames == -1 else frame_offset + num_frames
        expected = original[:, frame_offset:frame_end]

        url = self.get_url(audio_file)
        with requests.get(url, stream=True) as resp:
            found, sr = sox_io_backend.load(resp.raw, frame_offset, num_frames)

        assert sr == sample_rate
        self.assertEqual(expected, found)
示例#18
0
    def test_wav(self, dtype, sample_rate, num_channels):
        """`apply_effects_file` works on various wav format"""
        channels_first = True
        effects = [['band', '300', '10']]

        input_path = self.get_temp_path('input.wav')
        reference_path = self.get_temp_path('reference.wav')
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)
        sox_utils.run_sox_effect(input_path, reference_path, effects)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)

        assert sr == expected_sr
        self.assertEqual(found, expected)
示例#19
0
    def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save non-wav format.

        Due to precision missmatch, and the lack of alternative way to decode the
        resulting files without using soundfile, only meta data are validated.
        """
        num_frames = sample_rate * 3
        path = self.get_temp_path(f"data.{fmt}")
        expected = get_wav_data(
            dtype, num_channels, num_frames=num_frames, normalize=False
        )
        soundfile_backend.save(path, expected, sample_rate)
        sinfo = soundfile.info(path)
        assert sinfo.format == fmt.upper()
        assert sinfo.frames == num_frames
        assert sinfo.channels == num_channels
        assert sinfo.samplerate == sample_rate
示例#20
0
    def _test_fileobj(self, ext):
        """Saving audio to file-like object works"""
        sample_rate = 16000
        path = self.get_temp_path(f'test.{ext}')

        subtype = 'FLOAT' if ext == 'wav' else None
        data = get_wav_data('float32', num_channels=2)
        soundfile.write(path, data.numpy().T, sample_rate, subtype=subtype)
        expected = soundfile.read(path, dtype='float32')[0]

        fileobj = io.BytesIO()
        soundfile_backend.save(fileobj, data, sample_rate, format=ext)
        fileobj.seek(0)
        found, sr = soundfile.read(fileobj, dtype='float32')

        assert sr == sample_rate
        self.assertEqual(expected, found, atol=1e-4, rtol=1e-8)
示例#21
0
    def test_info_wav(self, dtype, sample_rate, num_channels):
        """`sox_io_backend.info` is torchscript-able and returns the same result"""
        audio_path = self.get_temp_path(
            f'{dtype}_{sample_rate}_{num_channels}.wav')
        data = get_wav_data(dtype,
                            num_channels,
                            normalize=False,
                            num_frames=1 * sample_rate)
        save_wav(audio_path, data, sample_rate)

        ts_info_func = torch_script(py_info_func)

        py_info = py_info_func(audio_path)
        ts_info = ts_info_func(audio_path)

        assert py_info.sample_rate == ts_info.sample_rate
        assert py_info.num_frames == ts_info.num_frames
        assert py_info.num_channels == ts_info.num_channels
示例#22
0
    def _test_tarfile(self, ext):
        """Loading audio via file-like object works"""
        sample_rate = 16000
        audio_file = f'test.{ext}'
        audio_path = self.get_temp_path(audio_file)
        archive_path = self.get_temp_path('archive.tar.gz')

        data = get_wav_data('float32', num_channels=2).numpy().T
        soundfile.write(audio_path, data, sample_rate)
        expected = soundfile.read(audio_path, dtype='float32')[0].T

        with tarfile.TarFile(archive_path, 'w') as tarobj:
            tarobj.add(audio_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, 'r') as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            found, sr = soundfile_backend.load(fileobj)

        assert sr == sample_rate
        self.assertEqual(expected, found)
示例#23
0
    def test_save_wav(self, dtype, sample_rate, num_channels):
        ts_save_func = torch_script(py_save_func)

        expected = get_wav_data(dtype, num_channels, normalize=False)
        py_path = self.get_temp_path(
            f'test_save_py_{dtype}_{sample_rate}_{num_channels}.wav')
        ts_path = self.get_temp_path(
            f'test_save_ts_{dtype}_{sample_rate}_{num_channels}.wav')
        enc, bps = get_enc_params(dtype)

        py_save_func(py_path, expected, sample_rate, True, None, enc, bps)
        ts_save_func(ts_path, expected, sample_rate, True, None, enc, bps)

        py_data, py_sr = load_wav(py_path, normalize=False)
        ts_data, ts_sr = load_wav(ts_path, normalize=False)

        self.assertEqual(sample_rate, py_sr)
        self.assertEqual(sample_rate, ts_sr)
        self.assertEqual(expected, py_data)
        self.assertEqual(expected, ts_data)
示例#24
0
    def assert_flac(self, sample_rate, num_channels, compression_level,
                    duration):
        """`sox_io_backend.save` can save flac format.

        This test takes the same strategy as mp3 to compare the result
        """
        src_path = self.get_temp_path('1.reference.wav')
        flc_path = self.get_temp_path('2.1.torchaudio.flac')
        wav_path = self.get_temp_path('2.2.torchaudio.wav')
        flc_path_sox = self.get_temp_path('3.1.sox.flac')
        wav_path_sox = self.get_temp_path('3.2.sox.wav')

        # 1. Generate original wav
        data = get_wav_data('float32',
                            num_channels,
                            normalize=True,
                            num_frames=duration * sample_rate)
        save_wav(src_path, data, sample_rate)
        # 2.1. Convert the original wav to flac with torchaudio
        sox_io_backend.save(flc_path,
                            load_wav(src_path)[0],
                            sample_rate,
                            compression=compression_level,
                            dtype=None)
        # 2.2. Convert the flac to wav with Sox
        # converting to 32 bit because flac file has 24 bit depth which scipy cannot handle.
        sox_utils.convert_audio_file(flc_path, wav_path, bit_depth=32)
        # 2.3. Load
        found = load_wav(wav_path)[0]

        # 3.1. Convert the original wav to flac with SoX
        sox_utils.convert_audio_file(src_path,
                                     flc_path_sox,
                                     compression=compression_level)
        # 3.2. Convert the flac to wav with Sox
        # converting to 32 bit because flac file has 24 bit depth which scipy cannot handle.
        sox_utils.convert_audio_file(flc_path_sox, wav_path_sox, bit_depth=32)
        # 3.3. Load
        expected = load_wav(wav_path_sox)[0]

        self.assertEqual(found, expected)
示例#25
0
    def test_save_wav(self, dtype, sample_rate, num_channels):
        script_path = self.get_temp_path('save_func.zip')
        torch.jit.script(py_save_func).save(script_path)
        ts_save_func = torch.jit.load(script_path)

        expected = get_wav_data(dtype, num_channels)
        py_path = self.get_temp_path(
            f'test_save_py_{dtype}_{sample_rate}_{num_channels}.wav')
        ts_path = self.get_temp_path(
            f'test_save_ts_{dtype}_{sample_rate}_{num_channels}.wav')

        py_save_func(py_path, expected, sample_rate, True, None)
        ts_save_func(ts_path, expected, sample_rate, True, None)

        py_data, py_sr = load_wav(py_path)
        ts_data, ts_sr = load_wav(ts_path)

        self.assertEqual(sample_rate, py_sr)
        self.assertEqual(sample_rate, ts_sr)
        self.assertEqual(expected, py_data)
        self.assertEqual(expected, ts_data)
示例#26
0
    def assert_non_wav(
        self,
        fmt,
        dtype,
        sample_rate,
        num_channels,
        channels_first,
        mocked_write,
        encoding=None,
        bits_per_sample=None,
    ):
        """soundfile_backend.save passes correct subtype and format to soundfile.write when SPHERE"""
        filepath = f"foo.{fmt}"
        input_tensor = get_wav_data(
            dtype,
            num_channels,
            num_frames=3 * sample_rate,
            normalize=False,
            channels_first=channels_first,
        ).t()
        expected_data = input_tensor.t() if channels_first else input_tensor

        soundfile_backend.save(
            filepath,
            input_tensor,
            sample_rate,
            channels_first,
            encoding=encoding,
            bits_per_sample=bits_per_sample,
        )

        # on +Py3.8 call_args.kwargs is more descreptive
        args = mocked_write.call_args[1]
        assert args["file"] == filepath
        assert args["samplerate"] == sample_rate
        if fmt in ["sph", "nist", "nis"]:
            assert args["format"] == "NIST"
        else:
            assert args["format"] is None
        self.assertEqual(args["data"], expected_data)
示例#27
0
    def test_load_wav(self, dtype, sample_rate, num_channels, normalize,
                      channels_first):
        """`sox_io_backend.load` is torchscript-able and returns the same result"""
        audio_path = self.get_temp_path(
            f'test_load_{dtype}_{sample_rate}_{num_channels}_{normalize}.wav')
        data = get_wav_data(dtype,
                            num_channels,
                            normalize=False,
                            num_frames=1 * sample_rate)
        save_wav(audio_path, data, sample_rate)

        ts_load_func = torch_script(py_load_func)

        py_data, py_sr = py_load_func(audio_path,
                                      normalize=normalize,
                                      channels_first=channels_first)
        ts_data, ts_sr = ts_load_func(audio_path,
                                      normalize=normalize,
                                      channels_first=channels_first)

        self.assertEqual(py_sr, ts_sr)
        self.assertEqual(py_data, ts_data)
示例#28
0
    def test_apply_effects_str(self, args):
        """`apply_effects_file` should return identical data as sox command"""
        dtype = 'int32'
        channels_first = True
        effects = args['effects']
        num_channels = args.get("num_channels", 2)
        input_sr = args.get("input_sample_rate", 8000)
        output_sr = args.get("output_sample_rate")

        input_path = self.get_temp_path('input.wav')
        reference_path = self.get_temp_path('reference.wav')
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)

        assert sr == expected_sr
        self.assertEqual(found, expected)
示例#29
0
    def test_apply_effects_path(self):
        """`apply_effects_file` should return identical data as sox command when file path is given as a Path Object"""
        dtype = 'int32'
        channels_first = True
        effects = [["hilbert"]]
        num_channels = 2
        input_sr = 8000
        output_sr = 8000

        input_path = self.get_temp_path('input.wav')
        reference_path = self.get_temp_path('reference.wav')
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            Path(input_path), effects, normalize=False, channels_first=channels_first)

        assert sr == expected_sr
        self.assertEqual(found, expected)
示例#30
0
 def assert_flac(
     self,
     dtype,
     sample_rate,
     num_channels,
     channels_first=True,
     duration=1,
 ):
     """`soundfile_backend.load` can load FLAC format correctly."""
     path = self.get_temp_path("reference.flac")
     num_frames = duration * sample_rate
     raw = get_wav_data(
         dtype,
         num_channels,
         num_frames=num_frames,
         normalize=False,
         channels_first=False,
     )
     soundfile.write(path, raw, sample_rate)
     expected = normalize_wav(raw.t() if channels_first else raw)
     data, sr = soundfile_backend.load(path, channels_first=channels_first)
     assert sr == sample_rate
     self.assertEqual(data, expected, atol=1e-4, rtol=1e-8)