def test_crop_time():
    for i in [1, 2, 5]:
        a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
        audio = test_audio_tensor(seconds=3)
        crop = CropTime(i * 1000)
        inp, out = apply_transform(crop, a2s(audio))
        _test_eq(i, round(out.duration))
        _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
Exemplo n.º 2
0
def test_mask_time():
    c, f, t = 2, 120, 80

    min_size = 5
    max_size = 7

    sg = AudioSpectrogram(torch.rand([c, f, t]))
    val = 10  # Use a value not in the original spectrogram
    gradient_sg = AudioSpectrogram(
        torch.linspace(0, 1, t).view(1, 1, t).repeat([c, f, 1]))
    ones = torch.ones_like(sg)

    # Test patching with mean
    with patch(
            "fastaudio.augment.functional.region_mask",
            side_effect=[
                torch.BoolTensor([[1] * 10 + [0] * (t - 10)]),
            ],
    ):
        mask_with_mean = MaskTimeGPU(min_size=min_size,
                                     max_size=max_size,
                                     mask_val=None)
        # Use a gradient so we can be sure the mean will never show up outside the mask
        inp, out = apply_transform(mask_with_mean, gradient_sg)
        channelwise_mean = inp[..., :10].mean(dim=(-2, -1)).reshape(-1, 1, 1)
        _test_close(
            out[..., :10],
            channelwise_mean * ones[..., :10],
        )
        assert not (out[..., 10:]
                    == channelwise_mean).any(), out == channelwise_mean

    # Test multiple masks (and patching with value)
    with patch(
            "fastaudio.augment.functional.region_mask",
            side_effect=[
                torch.BoolTensor([[1] * 10 + [0] * (t - 10),
                                  [0] * (t - 10) + [1] * 10]),
            ],
    ):
        mask_with_val = MaskTimeGPU(min_size=min_size,
                                    num_masks=2,
                                    max_size=max_size,
                                    mask_val=val)
        inp, out = apply_transform(mask_with_val, sg)
        _test_eq(
            out[..., :10],
            val * ones[..., :10],
        )
        _test_eq(
            out[..., t - 10:],
            val * ones[..., t - 10:],
        )
        matches = out[..., 10:t - 10] == val
        assert not matches.any(), matches
Exemplo n.º 3
0
def test_upsample(audio):
    """
    Make sure that the Upsampling is possible. This can
    take a while depending on the target sample rate
    """
    for _ in range(10):
        random_sr = random.randint(16000, 72000)
        random_upsample = Resample(random_sr)(audio)
        num_samples = random_upsample.nsamples
        _test_close(num_samples,
                    abs(audio.nsamples // (audio.sr / random_sr)),
                    eps=1.1)
Exemplo n.º 4
0
def test_upsample(audio):
    for _ in range(10):
        random_sr = random.randint(16000, 72000)
        random_upsample = Resample(random_sr)(audio)
        num_samples = random_upsample.nsamples
        _test_close(num_samples, abs(audio.nsamples // (audio.sr / random_sr)), eps=1.1)