def test_freeze_and_unfreeze_parameters(self):
        torch.manual_seed(42)

        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(
                    min_gain_in_db=-16.000001,
                    max_gain_in_db=-2,
                    p=1.0,
                ),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        )

        processed_samples1 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        augment.freeze_parameters()
        processed_samples2 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        assert_array_equal(processed_samples1, processed_samples2)

        augment.unfreeze_parameters()
        processed_samples3 = augment(samples=torch.from_numpy(samples),
                                     sample_rate=sample_rate).samples.numpy()
        self.assertNotEqual(processed_samples1[0, 0, 0],
                            processed_samples3[0, 0, 0])
    def test_shuffle(self):
        random.seed(42)
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(min_gain_in_db=-18.0, max_gain_in_db=-16.0, p=1.0),
                PeakNormalization(p=1.0),
            ],
            shuffle=True,
            output_type="dict",
        )
        num_peak_normalization_last = 0
        num_gain_last = 0
        for i in range(100):
            processed_samples = augment(
                samples=torch.from_numpy(samples),
                sample_rate=sample_rate).samples.numpy()

            # Either PeakNormalization or Gain was applied last
            if processed_samples[0, 0, 0] < 0.2:
                num_gain_last += 1
            elif processed_samples[0, 0, 0] == 1.0:
                num_peak_normalization_last += 1
            else:
                raise AssertionError("Unexpected value!")

        self.assertGreater(num_peak_normalization_last, 10)
        self.assertGreater(num_gain_last, 10)
Exemplo n.º 3
0
    def setUp(self):
        self.sample_rate = 16000
        self.audio = torch.randn(1, 1, 16000)

        self.transforms = [
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-2, p=1.0),
            PolarityInversion(p=1.0),
            PeakNormalization(p=1.0),
        ]
Exemplo n.º 4
0
 def training_step(self, batch, batch_nb):
     mix, source = batch
     apply_augmentation = Compose(transforms=[
         Gain(min_gain_in_db=-15.0,
              max_gain_in_db=5.0,
              p=0.5,
              mode="per_channel")
     ])
     source = apply_augmentation(source, sample_rate=22050)
     loss = self.common_step((mix, source), batch_nb, train=True)
     self.log("loss", loss, logger=True)
     return loss
Exemplo n.º 5
0
    def test_compose_with_p_zero(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose(
            transforms=[
                Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
                PolarityInversion(p=1.0),
            ],
            p=0.0,
        )
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        assert_array_equal(samples, processed_samples)
Exemplo n.º 6
0
    def test_compose(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose([
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
            PolarityInversion(p=1.0),
        ])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate).numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
Exemplo n.º 7
0
    def training_step(self, batch, batch_nb):
        apply_augmentation = Compose(
            transforms=[
                Gain(
                    min_gain_in_db=-15.0,
                    max_gain_in_db=5.0,
                    p=0.5,
                    mode="per_channel"
                ),
                ShuffleChannels(
                    mode="per_example"
                ),
                PitchShift(min_transpose_semitones=-2, max_transpose_semitones=2, p=0.5, mode="per_example",sample_rate=44100),

            ]
        )
        batch = apply_augmentation(batch, sample_rate=44100)
        loss = self.common_step(batch, batch_nb, train=True)
        self.log("loss", loss, logger=True)
        return loss    
    def test_compose_without_specifying_output_type(self):
        samples = np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]],
                           dtype=np.float32)
        sample_rate = 16000

        augment = Compose([
            Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
            PolarityInversion(p=1.0),
        ])
        processed_samples = augment(samples=torch.from_numpy(samples),
                                    sample_rate=sample_rate)
        # This dtype should be torch.Tensor until we switch to ObjectDict as default
        assert type(processed_samples) == torch.Tensor
        processed_samples = processed_samples.numpy()
        expected_factor = -convert_decibels_to_amplitude_ratio(-6)
        assert_almost_equal(
            processed_samples,
            expected_factor *
            np.array([[[1.0, 0.5, -0.25, -0.125, 0.0]]], dtype=np.float32),
            decimal=6,
        )
        self.assertEqual(processed_samples.dtype, np.float32)
    HighPassFilter,
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0, output_type="dict"),
        ApplyImpulseResponse(IR_PATH, p=1.0, output_type="dict"),
        Compose(
            transforms=[
                Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
                PolarityInversion(p=1.0),
            ],
            output_type="dict",
        ),
        Gain(min_gain_in_db=-6.000001,
             max_gain_in_db=-6,
             p=1.0,
             output_type="dict"),
        PolarityInversion(p=1.0, output_type="dict"),
        Shift(p=1.0, output_type="dict"),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
    LowPassFilter,
    HighPassFilter,
)

BG_NOISE_PATH = TEST_FIXTURES_DIR / "bg"
IR_PATH = TEST_FIXTURES_DIR / "ir"


@pytest.mark.parametrize(
    "augment",
    [
        # Differentiable transforms:
        AddBackgroundNoise(BG_NOISE_PATH, 20, p=1.0),
        ApplyImpulseResponse(IR_PATH, p=1.0),
        Compose(transforms=[
            Gain(min_gain_in_db=-15.0, max_gain_in_db=5.0, p=1.0),
            PolarityInversion(p=1.0),
        ]),
        Gain(min_gain_in_db=-6.000001, max_gain_in_db=-6, p=1.0),
        PolarityInversion(p=1.0),
        Shift(p=1.0),
        # Non-differentiable transforms:
        # RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
        # [torch.DoubleTensor [1, 1, 5]], which is output 0 of IndexBackward, is at version 1; expected version 0 instead.
        # Hint: enable anomaly detection to find the operation that failed to compute its gradient,
        # with torch.autograd.set_detect_anomaly(True).
        pytest.param(HighPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
        pytest.param(LowPassFilter(p=1.0),
                     marks=pytest.mark.skip("Not differentiable")),
        pytest.param(PeakNormalization(p=1.0),
Exemplo n.º 11
0
     "instance": AddBackgroundNoise(
         background_paths=TEST_FIXTURES_DIR / "bg", mode=mode, p=1.0
     ),
     "num_runs": 5,
 },
 {
     "instance": ApplyImpulseResponse(
         ir_paths=TEST_FIXTURES_DIR / "ir", mode=mode, p=1.0
     ),
     "num_runs": 1,
 },
 {
     "instance": Compose(
         transforms=[
             Gain(
                 min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=1.0
             ),
             PeakNormalization(mode=mode, p=1.0),
         ],
         shuffle=True,
     ),
     "name": "Shuffled Compose with Gain and PeakNormalization",
     "num_runs": 5,
 },
 {
     "instance": Compose(
         transforms=[
             Gain(
                 min_gain_in_db=-18.0, max_gain_in_db=-16.0, mode=mode, p=0.5
             ),
             PolarityInversion(mode=mode, p=0.5),
Exemplo n.º 12
0
        channels=[1, 2, 4, 8],
        durations=[1, 2, 4, 8, 16],
        sample_rates=[16000],
        devices=["cpu", "cuda"],
    )

    if not torch.cuda.is_available():
        params["devices"].remove("cuda")

    devices = {
        device_name: torch.device(device_name)
        for device_name in params["devices"]
    }

    transforms = [
        Gain(p=1.0),
        PolarityInversion(p=1.0),
        PeakNormalization(p=1.0),
        Shift(p=1.0),
    ]

    perf_objects = []

    for device_name in params["devices"]:
        device = devices[device_name]
        for batch_size in tqdm(params["batch_sizes"]):
            for num_channels in params["channels"]:
                for duration in params["durations"]:
                    for sample_rate in params["sample_rates"]:
                        for transform in transforms:
                            perf_objects += measure_execution_time(
Exemplo n.º 13
0
                               mono=False)
    samples2, _ = librosa.load(os.path.join(TEST_FIXTURES_DIR, filenames[1]),
                               sr=SAMPLE_RATE,
                               mono=False)
    samples = np.stack((samples1, samples2), axis=0)
    samples = torch.from_numpy(samples)

    modes = ["per_batch", "per_example", "per_channel"]
    for mode in modes:
        transforms = [
            {
                "instance":
                Compose(
                    transforms=[
                        Gain(min_gain_in_db=-18.0,
                             max_gain_in_db=-16.0,
                             mode=mode,
                             p=1.0),
                        PeakNormalization(mode=mode, p=1.0),
                    ],
                    shuffle=True,
                ),
                "name":
                "Shuffled Compose with Gain and PeakNormalization",
                "num_runs":
                5,
            },
            {
                "instance":
                Compose(
                    transforms=[
                        Gain(min_gain_in_db=-18.0,
Exemplo n.º 14
0
    filenames = ["perfect-alley1.ogg", "perfect-alley2.ogg"]
    samples1, _ = librosa.load(os.path.join(TEST_FIXTURES_DIR, filenames[0]),
                               sr=SAMPLE_RATE,
                               mono=False)
    samples2, _ = librosa.load(os.path.join(TEST_FIXTURES_DIR, filenames[1]),
                               sr=SAMPLE_RATE,
                               mono=False)
    samples = np.stack((samples1, samples2), axis=0)
    samples = torch.from_numpy(samples)

    modes = ["per_batch", "per_example", "per_channel"]
    for mode in modes:
        transforms = [
            {
                "instance": Gain(mode=mode, p=1.0),
                "num_runs": 5
            },
            {
                "instance": PolarityInversion(mode=mode, p=1.0),
                "num_runs": 1
            },
            {
                "instance": PeakNormalization(mode=mode, p=1.0),
                "num_runs": 1
            },
        ]

        execution_times = {}

        for transform in transforms:
Exemplo n.º 15
0
     1,
 },
 {
     "get_instance": lambda: BandPassFilter(mode=mode, p=1.0),
     "num_runs": 5
 },
 {
     "get_instance": lambda: BandStopFilter(mode=mode, p=1.0),
     "num_runs": 5
 },
 {
     "get_instance":
     lambda: Compose(
         transforms=[
             Gain(min_gain_in_db=-18.0,
                  max_gain_in_db=-16.0,
                  mode=mode,
                  p=1.0),
             PeakNormalization(mode=mode, p=1.0),
         ],
         shuffle=True,
     ),
     "name":
     "Shuffled Compose with Gain and PeakNormalization",
     "num_runs":
     5,
 },
 {
     "get_instance":
     lambda: Compose(
         transforms=[
             Gain(min_gain_in_db=-18.0,
Exemplo n.º 16
0
import torch
from torch_audiomentations import Compose, Gain, PolarityInversion
import torchaudio

# Initialize augmentation callable
apply_augmentation = Compose(transforms=[
    Gain(
        min_gain_in_db=-15.0,
        max_gain_in_db=40.0,
        p=1.0,
    ),
    PolarityInversion(p=0.0)
])

torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Make an example tensor with white noise.
# This tensor represents 8 audio snippets with 2 channels (stereo) and 2 s of 16 kHz audio.
# audio_samples = torch.rand(size=(8, 2, 32000), dtype=torch.float32, device=torch_device) - 0.5
audio_samples = torchaudio.load(
    "/Users/bdubel/Documents/ZHAW/BA/data/eth_ch_dialects/ag/ch_ag_0107.wav")

# Apply augmentation. This varies the gain and polarity of (some of)
# the audio snippets in the batch independently.
perturbed_audio_samples = apply_augmentation(audio_samples[0],
                                             sample_rate=16000)
torchaudio.save(
    '/Users/bdubel/Documents/ZHAW/BA/data/swiss_all/perturbation/test1.flac',
    perturbed_audio_samples,
    sample_rate=16000)