def test_freeze_and_unfreeze_parameters(self):
        torch.manual_seed(42)

        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(
                    min_gain_in_db=-16.000001,
                    max_gain_in_db=-2,
                    p=1.0,
                ),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        )

        processed_samples1 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        augment.freeze_parameters()
        processed_samples2 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        assert_array_equal(processed_samples1, processed_samples2)

        augment.unfreeze_parameters()
        processed_samples3 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        self.assertNotEqual(processed_samples1[0, 0, 0],
                            processed_samples3[0, 0, 0])
Exemplo n.º 2
0
    def setUp(self):
        self.sample_rate = 16000
        self.audio = torch.randn(1, 1, 16000)

        self.transforms = [
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-2, p=1.0),
            PolarityInversion(p=1.0),
            PeakNormalization(p=1.0),
        ]
Exemplo n.º 3
0
 def test_ndim_check(self):
     augment = PolarityInversion(p=1.0)
     # 1D tensor not allowed
     with pytest.raises(RuntimeError):
         augment(torch.tensor([1.0, 0.5, 0.25, 0.125], dtype=torch.float32))
     # 2D tensor not allowed
     with pytest.raises(RuntimeError):
         augment(torch.tensor([[1.0, 0.5, 0.25, 0.125]], dtype=torch.float32))
     # 4D tensor not allowed
     with pytest.raises(RuntimeError):
         augment(torch.tensor([[[[1.0, 0.5, 0.25, 0.125]]]], dtype=torch.float32))
Exemplo n.º 4
0
    def test_polarity_inversion_cuda(self):
        samples = np.array([[1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32)
        sample_rate = 16000

        augment = PolarityInversion(p=1.0).cuda()
        inverted_samples = (augment(samples=torch.from_numpy(samples).cuda(),
                                    sample_rate=sample_rate).cpu().numpy())
        assert_almost_equal(
            inverted_samples,
            np.array([[-1.0, -0.5, 0.25, 0.125, 0.0]], dtype=np.float32))
        self.assertEqual(inverted_samples.dtype, np.float32)
Exemplo n.º 5
0
    def test_polarity_inversion_zero_probability(self):
        samples = np.array([[1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32)
        sample_rate = 16000

        augment = PolarityInversion(p=0.0)
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        assert_almost_equal(
            processed_samples,
            np.array([[1.0, 0.5, -0.25, -0.125, 0.0]], dtype=np.float32),
        )
        self.assertEqual(processed_samples.dtype, np.float32)
Exemplo n.º 6
0
    def test_polarity_inversion(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = PolarityInversion(p=1.0, output_type="dict")
        inverted_samples = augment(samples=torch.from_numpy(samples),
                                   sample_rate=sample_rate).samples.numpy()
        assert_almost_equal(
            inverted_samples,
            np.array([[[-1.0, -0.5, 0.25, 0.125, 0.0]]], dtype=np.float32),
        )
        self.assertEqual(inverted_samples.dtype, np.float32)
Exemplo n.º 7
0
    def test_compose_with_p_zero(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
                PolarityInversion(p=1.0),
            ],
            p=0.0,
        )
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        assert_array_equal(samples, processed_samples)
Exemplo n.º 8
0
    def test_compose_with_torchaudio_transform(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            [Vol(gain=-6, gain_type="db"),
             PolarityInversion(p=1.0)])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
Exemplo n.º 9
0
    def test_compose(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose([
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
            PolarityInversion(p=1.0),
        ])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
Exemplo n.º 10
0
    def test_polarity_inversion_multichannel(self):
        samples = np.array(
            [[1.0, 0.5, -0.25, -0.125, 0.0], [1.0, 0.5, -0.25, -0.125, 0.0]],
            dtype=np.float32,
        )
        sample_rate = 16000

        augment = PolarityInversion(p=1.0)
        inverted_samples = augment(samples=torch.from_numpy(samples),
                                   sample_rate=sample_rate).numpy()
        assert_almost_equal(
            inverted_samples,
            np.array(
                [[-1.0, -0.5, 0.25, 0.125, 0.0],
                 [-1.0, -0.5, 0.25, 0.125, 0.0]],
                dtype=np.float32,
            ),
        )
        self.assertEqual(inverted_samples.dtype, np.float32)
    def test_compose_without_specifying_output_type(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose([
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
            PolarityInversion(p=1.0),
        ])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate)
        # This dtype should be torch.Tensor until we switch to ObjectDict as default
        assert type(processed_samples) == torch.Tensor
        processed_samples = processed_samples.numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
Exemplo n.º 12
0
    def test_polarity_inversion_variability_within_batch(self):
        samples = np.array([1.0, 0.5, 0.25, 0.125, 0.0], dtype=np.float32)
        samples_batch = np.vstack([samples] * 10000)
        sample_rate = 16000

        augment = PolarityInversion(p=0.5)
        processed_samples = augment(samples=torch.from_numpy(samples_batch),
                                    sample_rate=sample_rate).numpy()

        num_unprocessed_examples = 0
        num_processed_examples = 0
        for i in range(processed_samples.shape[0]):
            if sum(processed_samples[i]) > 0:
                num_unprocessed_examples += 1
            else:
                num_processed_examples += 1

        self.assertEqual(num_unprocessed_examples + num_processed_examples,
                         10000)

        print(num_processed_examples)
        self.assertGreater(num_processed_examples, 2000)
        self.assertLess(num_processed_examples, 8000)
Exemplo n.º 13
0
         shuffle=True,
     ),
     "name":
     "Shuffled Compose with Gain and PeakNormalization",
     "num_runs":
     5,
 },
 {
     "get_instance":
     lambda: Compose(
         transforms=[
             Gain(min_gain_in_db=-18.0,
                  max_gain_in_db=-16.0,
                  mode=mode,
                  p=0.5),
             PolarityInversion(mode=mode, p=0.5),
         ],
         shuffle=True,
     ),
     "name":
     "Compose with Gain and PolarityInversion",
     "num_runs":
     5,
 },
 {
     "get_instance": lambda: Gain(mode=mode, p=1.0),
     "num_runs": 5
 },
 {
     "get_instance": lambda: HighPassFilter(mode=mode, p=1.0),
     "num_runs": 5
Exemplo n.º 14
0
 def test_parameters(self):
     # Test that we can access the parameters function of nn.Module
     augment = PolarityInversion(p=1.0)
     params = augment.parameters()
     assert isinstance(params, types.GeneratorType)
Exemplo n.º 15
0
                            min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0
                        ),
                        PeakNormalization(mode=mode, p=1.0),
                    ],
                    shuffle=True,
                ),
                "name": "Shuffled Compose with Gain and PeakNormalization",
                "num_runs": 5,
            },
            {
                "get_instance": lambda: Compose(
                    transforms=[
                        Gain(
                            min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5
                        ),
                        PolarityInversion(mode=mode, p=0.5),
                    ],
                    shuffle=True,
                ),
                "name": "Compose with Gain and PolarityInversion",
                "num_runs": 5,
            },
            {"get_instance": lambda: Gain(mode=mode, p=1.0), "num_runs": 5},
            {"get_instance": lambda: PolarityInversion(mode=mode, p=1.0), "num_runs": 1},
            {"get_instance": lambda: PeakNormalization(mode=mode, p=1.0), "num_runs": 1},
            {"get_instance": lambda: Shift(mode=mode, p=1.0), "num_runs": 5},
            {"get_instance": lambda: ShuffleChannels(mode=mode, p=1.0), "num_runs": 5},
        ]

        execution_times = {}
Exemplo n.º 16
0
if __name__ == "__main__":
    """
    For each transformation, apply it to an example sound and write the transformed sounds to
    an output folder. Also crudely measure and print execution time.
    """
    output_dir = os.path.join(SCRIPTS_DIR, "output")
    os.makedirs(output_dir, exist_ok=True)

    np.random.seed(42)
    random.seed(42)

    samples, _ = librosa.load(
        os.path.join(TEST_FIXTURES_DIR, "acoustic_guitar_0.wav"), sr=SAMPLE_RATE
    )

    transforms = [{"instance": PolarityInversion(p=1.0), "num_runs": 1}]

    execution_times = {}

    for transform in transforms:
        augmenter = transform["instance"]
        run_name = (
            transform.get("name")
            if transform.get("name")
            else transform["instance"].__class__.__name__
        )
        execution_times[run_name] = []
        for i in range(transform["num_runs"]):
            output_file_path = os.path.join(
                output_dir, "{}_{:03d}.wav".format(run_name, i)
            )
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0, output_type="dict"),
        ApplyImpulseResponse(IR_PATH, p=1.0, output_type="dict"),
        Compose(
            transforms=[
                Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        ),
        Gain(min_gain_in_db=-6.000001,
             max_gain_in_db=-6,
             p=1.0,
             output_type="dict"),
        PolarityInversion(p=1.0, output_type="dict"),
        Shift(p=1.0, output_type="dict"),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
        pytest.param(
Exemplo n.º 18
0
         shuffle=True,
     ),
     "name":
     "Shuffled Compose with Gain and PeakNormalization",
     "num_runs":
     5,
 },
 {
     "instance":
     Compose(
         transforms=[
             Gain(min_gain_in_db=-18.0,
                  max_gain_in_db=-16.0,
                  mode=mode,
                  p=0.5),
             PolarityInversion(mode=mode, p=0.5),
         ],
         shuffle=True,
     ),
     "name":
     "Compose with Gain and PolarityInversion",
     "num_runs":
     5,
 },
 {
     "instance": Gain(mode=mode, p=1.0),
     "num_runs": 5
 },
 {
     "instance": PolarityInversion(mode=mode, p=1.0),
     "num_runs": 1
Exemplo n.º 19
0
                            min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0
                        ),
                        PeakNormalization(mode=mode, p=1.0),
                    ],
                    shuffle=True,
                ),
                "name": "Shuffled Compose with Gain and PeakNormalization",
                "num_runs": 5,
            },
            {
                "instance": Compose(
                    transforms=[
                        Gain(
                            min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5
                        ),
                        PolarityInversion(mode=mode, p=0.5),
                    ],
                    shuffle=True,
                ),
                "name": "Compose with Gain and PolarityInversion",
                "num_runs": 5,
            },
            {"instance": Gain(mode=mode, p=1.0), "num_runs": 5},
            {"instance": PolarityInversion(mode=mode, p=1.0), "num_runs": 1},
            {"instance": PeakNormalization(mode=mode, p=1.0), "num_runs": 1},
            {"instance": Shift(mode=mode, p=1.0), "num_runs": 5},
        ]

        execution_times = {}

        for transform in transforms:
Exemplo n.º 20
0
import torch
from torch_audiomentations import Compose, Gain, PolarityInversion
import torchaudio

# Initialize augmentation callable
apply_augmentation = Compose(transforms=[
    Gain(
        min_gain_in_db=-15.0,
        max_gain_in_db=40.0,
        p=1.0,
    ),
    PolarityInversion(p=0.0)
])

torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Make an example tensor with white noise.
# This tensor represents 8 audio snippets with 2 channels (stereo) and 2 s of 16 kHz audio.
# audio_samples = torch.rand(size=(8, 2, 32000), dtype=torch.float32, device=torch_device) - 0.5
audio_samples = torchaudio.load(
    "/Users/bdubel/Documents/ZHAW/BA/data/eth_ch_dialects/ag/ch_ag_0107.wav")

# Apply augmentation. This varies the gain and polarity of (some of)
# the audio snippets in the batch independently.
perturbed_audio_samples = apply_augmentation(audio_samples[0],
                                             sample_rate=16000)
torchaudio.save(
    '/Users/bdubel/Documents/ZHAW/BA/data/swiss_all/perturbation/test1.flac',
    perturbed_audio_samples,
    sample_rate=16000)
    HighPassFilter,
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0),
        ApplyImpulseResponse(IR_PATH, p=1.0),
        Compose(transforms=[
            Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
            PolarityInversion(p=1.0),
        ]),
        Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
        PolarityInversion(p=1.0),
        Shift(p=1.0),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
        pytest.param(HighPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
        pytest.param(LowPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
        pytest.param(PeakNormalization(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),