Exemplo n.º 1
0
 def encodes(self, sg: AudioSpectrogram) -> AudioSpectrogram:
     channel_mean = sg.contiguous().view(sg.size(0), -1).mean(-1)[:, None, None]
     mask_val = ifnone(self.val, channel_mean)
     c, y, x = sg.shape
     # Position of the first mask
     start = ifnone(self.start, random.randint(0, y - self.size))
     for _ in range(self.num_masks):
         mask = torch.ones(self.size, x) * mask_val
         if not 0 <= start <= y - self.size:
             raise ValueError(
                 f"Start value '{start}' out of range for AudioSpectrogram of shape {sg.shape}"
             )
         sg[:, start : start + self.size, :] = mask
         # Setting start position for next mask
         start = random.randint(0, y - self.size)
     return sg
Exemplo n.º 2
0
 def encodes(self, ai: AudioTensor) -> AudioTensor:
     sig = ai.data
     orig_samples = ai.nsamples
     crop_samples = int((self.duration / 1000) * ai.sr)
     if orig_samples == crop_samples:
         return ai
     elif orig_samples < crop_samples:
         ai.data = _tfm_pad_signal(sig, crop_samples, pad_mode=self.pad_mode)
     else:
         crop_start = random.randint(0, int(orig_samples - crop_samples))
         ai.data = sig[:, crop_start : crop_start + crop_samples]
     return ai
Exemplo n.º 3
0
def _tfm_pad_signal(sig, width, pad_mode=AudioPadType.Zeros):
    """Pad spectrogram to specified width, using specified pad mode"""
    c, x = sig.shape
    if pad_mode in [AudioPadType.Zeros, AudioPadType.Zeros_After]:
        zeros_front = (random.randint(0, width - x)
                       if pad_mode == AudioPadType.Zeros else 0)
        pad_front = torch.zeros((c, zeros_front))
        pad_back = torch.zeros((c, width - x - zeros_front))
        return torch.cat((pad_front, sig, pad_back), 1)
    elif pad_mode == AudioPadType.Repeat:
        repeats = width // x + 1
        return sig.repeat(1, repeats)[:, :width]
Exemplo n.º 4
0
 def encodes(self, sg: AudioSpectrogram) -> AudioSpectrogram:
     sr, hop = sg.sr, sg.hop_length
     w_crop = int((sr * self.duration) / (1000 * hop)) + 1
     w_sg = sg.shape[-1]
     if w_sg == w_crop:
         sg_crop = sg
     elif w_sg < w_crop:
         sg_crop = _tfm_pad_spectro(sg, w_crop, pad_mode=self.pad_mode)
     else:
         crop_start = random.randint(0, int(w_sg - w_crop))
         sg_crop = sg[:, :, crop_start : crop_start + w_crop]
         sg_crop.sample_start = int(crop_start * hop)
         sg_crop.sample_end = sg_crop.sample_start + int(self.duration * sr)
     sg.data = sg_crop
     return sg
Exemplo n.º 5
0
def _tfm_pad_spectro(sg, width, pad_mode=AudioPadType.Zeros):
    """Pad spectrogram to specified width, using specified pad mode"""
    c, y, x = sg.shape
    if pad_mode in [AudioPadType.Zeros, AudioPadType.Zeros_After]:
        padded = torch.zeros((c, y, width))
        start = random.randint(0, width -
                               x) if pad_mode == AudioPadType.Zeros else 0
        padded[:, :, start:start + x] = sg.data
        return padded
    elif pad_mode == AudioPadType.Repeat:
        repeats = width // x + 1
        return sg.repeat(1, 1, repeats)[:, :, :width]
    else:
        raise ValueError(f"""pad_mode {pad_mode} not currently supported,
            only AudioPadType.Zeros, AudioPadType.Zeros_After,
            or AudioPadType.Repeat""")