def __getitem__(self, idx):

        temp = random.randint(0, 1)

        augment = Compose([
            TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
            Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5, rollover=False)
        ])

        self.wavPath = str(self.data.iloc[idx, 0])
        self.label = self.data.iloc[idx, 1]

        self.signal, self.sr = torchaudio.load(self.wavPath)

        if (temp == 1):
            self.signal = torch.from_numpy(
                augment(samples=self.signal.numpy(), sample_rate=self.sr))

        self.spectogram = torchaudio.transforms.Spectrogram(n_fft=320,
                                                            win_length=320,
                                                            hop_length=160)(
                                                                self.signal)
        self.logSpectogram = torchaudio.transforms.AmplitudeToDB()(
            self.spectogram)

        #self.tempImg=torchvision.transforms.ToPILImage()(self.logSpectogram)
        #self.tempImg=self.tempImg.convert("RGB")
        #self.spectogramImageTensor=self.vision_transform(self.tempImg)

        return self.logSpectogram, self.label
def build_transforms(train=True):
    return Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    ])
Пример #3
0
def augmented_feature_engineering(wavFile, settings):
    fs, rawWav = scipy.io.wavfile.read(wavFile)
    wavData = rawWav
    if (settings['CHANNELS'] == 2):
        wavData = rawWav[:, 0]

    augmenter = Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
    ])
    wavData = augmenter(samples=np.array(wavData, dtype="float32"),
                        sample_rate=fs)

    data_row = []
    input_type = settings['FEATURE_ENGINEERING_TYPE']
    if (input_type == TYPE_FEATURE_ENGINEERING_NORM_MFCC):
        mfcc_result1 = mfcc(wavData,
                            samplerate=fs,
                            nfft=1103,
                            numcep=30,
                            nfilt=40,
                            preemph=0.5,
                            winstep=0.005,
                            winlen=0.015,
                            appendEnergy=False)
        data_row.extend(mfcc_result1.ravel())
    elif (input_type == TYPE_FEATURE_ENGINEERING_RAW_WAVE):
        data_row = wavData
    else:
        print("OLD MFCC TYPE IS NOT SUPPORTED FOR TRAINING PYTORCH")
    return data_row
Пример #4
0
 def __init__(self, dataset):
     self.dataset = dataset
     self.sample_rate = TRAINING_CONFIG['audio_sample_rate']
     self.augmenter = Compose([
         AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
         TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
         PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
         Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
     ])
  def __getitem__(self,idx):

    augment = Compose([
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5,rollover=False)
    ])

    temp1=random.randint(0,1)
    temp2=random.randint(0,1)
    temp3=random.randint(0,1)

    self.anchor=str(self.data.iloc[idx,0])
    self.positive=self.data.iloc[idx,1]
    self.negative=self.data.iloc[idx,2]

    self.signalAnchor,self.srAnchor=torchaudio.load(self.anchor)
    self.signalPositive,self.srPositive=torchaudio.load(self.positive)
    self.signalNegative,self.srNegative=torchaudio.load(self.negative)

    if (temp1==1):
      self.signalAnchor=torch.from_numpy(augment(samples=self.signalAnchor.numpy(),sample_rate=self.srAnchor))

    if (temp2==1):
      self.signalPositive=torch.from_numpy(augment(samples=self.signalPositive.numpy(),sample_rate=self.srPositive))

    if (temp3==1):
      self.signalNegative=torch.from_numpy(augment(samples=self.signalNegative.numpy(),sample_rate=self.srNegative))


    self.spectogramAnchor=torchaudio.transforms.Spectrogram(n_fft=320,hop_length=160,win_length=320)(self.signalAnchor)
    self.logSpectogramAnchor=torchaudio.transforms.AmplitudeToDB()(self.spectogramAnchor)

    self.spectogramPositive=torchaudio.transforms.Spectrogram(n_fft=320,hop_length=160,win_length=320)(self.signalPositive)
    self.logSpectogramPositive=torchaudio.transforms.AmplitudeToDB()(self.spectogramPositive)

    self.spectogramNegative=torchaudio.transforms.Spectrogram(n_fft=320,hop_length=160,win_length=320)(self.signalNegative)
    self.logSpectogramNegative=torchaudio.transforms.AmplitudeToDB()(self.spectogramNegative)


    #self.tempImgAnchor=torchvision.transforms.ToPILImage()(self.logSpectogramAnchor)
    #self.tempImgAnchor=self.tempImgAnchor.convert("RGB")
    #self.spectogramAnchorImageTensor=self.vision_transform(self.tempImgAnchor)

    #self.tempImgPositive=torchvision.transforms.ToPILImage()(self.logSpectogramPositive)
    #self.tempImgPositive=self.tempImgPositive.convert("RGB")
    #self.spectogramPositiveImageTensor=self.vision_transform(self.tempImgPositive)

    #self.tempImgNegative=torchvision.transforms.ToPILImage()(self.logSpectogramNegative)
    #self.tempImgNegative=self.tempImgNegative.convert("RGB")
    #self.spectogramNegativeImageTensor=self.vision_transform(self.tempImgNegative)

    return self.logSpectogramAnchor,self.logSpectogramPositive,self.logSpectogramNegative
Пример #6
0
def compose_without_noise(ir_path='data/impulse'):
    _p = 0.25

    transforms = [
        AddGaussianNoise(p=_p),
        Shift(p=_p, min_fraction=-0.2, max_fraction=0.2),
        FrequencyMask(p=_p),
        TimeMask(p=_p, max_band_part=0.25),
        AddGaussianSNR(p=_p),
        ClippingDistortion(p=_p, max_percentile_threshold=20),
        MyAddImpulseResponse(p=_p, ir_path=ir_path),
        TimeStretch(p=_p / 10),
        PitchShift(p=_p / 25),
    ]

    return MyCompose(transforms, p=1.0, max_augs=3)
Пример #7
0
def compose(sounds_path):
  _p = 0.2

  transforms = [
    MyGain(p=_p),
    AddGaussianNoise(p=_p),
    Shift(p=_p, min_fraction=-0.25, max_fraction=0.25),
    FrequencyMask(p=_p),
    TimeMask(p=_p, max_band_part=0.25),
    AddGaussianSNR(p=_p),
    ClippingDistortion(p=_p, max_percentile_threshold=20),
    AddBackgroundNoise(sounds_path=sounds_path, p=_p),
    TimeStretch(p=_p/10),
    PitchShift(p=_p/30),
  ]
  
  return Compose(transforms, p=0.4, shuffle=True)
Пример #8
0
def raw_audio_process(transform_fn):
    augment_fn = Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5)
    ])

    @wraps(transform_fn)
    def augment_audio(audio, **kwargs):
        sr = kwargs.setdefault('sr', 22050)
        n_win = kwargs.setdefault('n_win', 20)
        win_length = int(n_win * sr / 1000)

        audio = augment_fn(audio)
        return transform_fn(audio,
                            win_length=win_length,
                            hop_length=win_length // 4)

    return augment_audio
Пример #9
0
def process_fn(output='stft', spec_aug=False, p=0.5, sr=22050):
    augment_fn = Compose([
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=p),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=p),
        PitchShift(min_semitones=-4, max_semitones=4, p=p),
        Shift(min_fraction=-0.5, max_fraction=0.5, p=p)
    ])

    win_length = int(20 * sr / 1000)
    if output == 'stft':

        def stft_transform(feats):
            if feats.ndim == 1:
                feats = augment_fn(samples=feats, sample_rate=sr)
                feats = np.log(
                    np.abs(librosa.stft(feats, 1023, win_length=win_length)).T
                    + 1e-12)
            if spec_aug:
                feats = spec_augment(feats)
            return feats

        return stft_transform
    if output == 'lms':

        def lms_transform(feats):
            if feats.ndim == 1:
                feats = augment_fn(samples=feats, sample_rate=sr)
                hop_length = win_length // 4
                feats = np.log(
                    np.abs(
                        librosa.feature.melspectrogram(
                            feats,
                            n_fft=win_length,
                            hop_length=hop_length,
                            win_length=win_length)).T + 1e-12)
            if spec_aug:
                feats = spec_augment(feats)
            return feats

        return lms_transform
Пример #10
0
 def __init__(self, path_audio, y, resample_freq = 32000, max_length=3, augmentation=[], validation=False, num_class=264, pseudo_labels=None):
     self.labels2idx = {'Pump': 0, 'Spinach': 1,  'abalimi': 2,  'afukirira': 3,  'agriculture': 4, 'akammwanyi': 5,  'akamonde': 6, 'akasaanyi': 7, 'akatunda': 8, 'akatungulu': 9,
   'akawuka': 10, 'amakoola': 11, 'amakungula': 12, 'amalagala': 13, 'amappapaali': 14, 'amatooke': 15, 'banana': 16, 'beans': 17, 'bibala': 18, 'bulimi': 19, 'butterfly': 20, 'cabbages': 21,
   'cassava': 22, 'caterpillar': 23, 'caterpillars': 24, 'coffee': 25, 'crop': 26, 'ddagala': 27, 'dig': 28, 'disease': 29, 'doodo': 30, 'drought': 31, 'ebbugga': 32, 'ebibala': 33, 'ebigimusa': 34,
   'ebijanjaalo': 35, 'ebijjanjalo': 36, 'ebikajjo': 37, 'ebikolo': 38, 'ebikongoliro': 39, 'ebikoola': 40, 'ebimera': 41, 'ebinyebwa': 42, 'ebirime': 43, 'ebisaanyi': 44, 'ebisooli': 45,
   'ebisoolisooli': 46, 'ebitooke': 47, 'ebiwojjolo': 48, 'ebiwuka': 49, 'ebyobulimi': 50, 'eddagala': 51, 'eggobe': 52, 'ejjobyo': 53, 'ekibala': 54, 'ekigimusa': 55, 'ekijanjaalo': 56,
   'ekikajjo': 57, 'ekikolo': 58, 'ekikoola': 59, 'ekimera': 60, 'ekirime': 61, 'ekirwadde': 62, 'ekisaanyi': 63, 'ekitooke': 64, 'ekiwojjolo': 65, 'ekyeya': 66, 'emboga': 67, 'emicungwa': 68,
   'emisiri': 69, 'emiyembe': 70, 'emmwanyi': 71, 'endagala': 72, 'endokwa': 73, 'endwadde': 74, 'enkota': 75, 'ennima': 76, 'ennimiro': 77, 'ennyaanya': 78, 'ensigo': 79, 'ensiringanyi': 80, 'ensujju': 81,
   'ensuku': 82, 'ensukusa': 83, 'enva endiirwa': 84, 'eppapaali': 85, 'faamu': 86, 'farm': 87, 'farmer': 88, 'farming instructor': 89, 'fertilizer': 90, 'fruit': 91, 'fruit picking': 92,
   'garden': 93, 'greens': 94, 'ground nuts': 95, 'harvest': 96, 'harvesting': 97, 'insect': 98, 'insects': 99, 'irish potatoes': 100, 'irrigate': 101, 'kaamulali': 102, 'kasaanyi': 103, 'kassooli': 104,
   'kikajjo': 105, 'kikolo': 106, 'kisaanyi': 107, 'kukungula': 108, 'leaf': 109, 'leaves': 110, 'lumonde': 111, 'lusuku': 112, 'maize': 113, 'maize stalk borer': 114, 'maize streak virus': 115, 'mango': 116, 'mangoes': 117, 'matooke': 118,
   'matooke seedlings': 119, 'medicine': 120, 'miceere': 121, 'micungwa': 122, 'mpeke': 123, 'muceere': 124, 'mucungwa': 125, 'mulimi': 126, 'munyeera': 127, 'muwogo': 128,
   'nakavundira': 129, 'nambaale': 130, 'namuginga': 131, 'ndwadde': 132, 'nfukirira': 133, 'nnakati': 134, 'nnasale beedi': 135, 'nnimiro': 136, 'nnyaanya': 137, 'npk': 138, 'nursery bed': 139,
   'obulimi': 140, 'obulwadde': 141, 'obumonde': 142, 'obusaanyi': 143, 'obutunda': 144, 'obutungulu': 145, 'obuwuka': 146, 'okufukirira': 147, 'okufuuyira': 148, 'okugimusa': 149, 'okukkoola': 150,
   'okukungula': 151, 'okulima': 152, 'okulimibwa': 153, 'okunnoga': 154, 'okusaasaana': 155, 'okusaasaanya': 156, 'okusiga': 157,
   'okusimba': 158, 'okuzifuuyira': 159, 'olusuku': 160, 'omuceere': 161, 'omucungwa': 162, 'omulimi': 163, 'omulimisa': 164, 'omusiri': 165, 'omuyembe': 166,
   'onion': 167, 'orange': 168, 'pampu': 169, 'passion fruit': 170, 'pawpaw': 171, 'pepper': 172, 'plant': 173, 'plantation': 174, 'ppaapaali': 175, 'pumpkin': 176, 'rice': 177, 'seed': 178,
   'sikungula': 179, 'sow': 180, 'spray': 181, 'spread': 182, 'suckers': 183, 'sugarcane': 184, 'sukumawiki': 185, 'super grow': 186, 'sweet potatoes': 187, 'tomatoes': 188, 'vegetables': 189,
   'watermelon': 190, 'weeding': 191, 'worm': 192}
     
     self.idx2labels = {k:v for v,k in self.labels2idx.items()}
     identity = np.eye(num_class)
     self.augmentation = set(augmentation)
     self.samples = path_audio #+ path_augment
     self.max_length = max_length # 99% are shorter than 3 sec
     self.resample_freq=resample_freq
     self.validation = validation
     self.y = np.array([identity[self.labels2idx[t]] for t in y]).astype(np.float32) #+ [self.labels2idx[t] for t in y_aug]
     self.num_class = num_class
     self.noise = Compose([AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.6),
                             TimeStretch(min_rate=0.8, max_rate=1.25, p=0.6),
                             PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
                             Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
                             Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.6), 
                             ])
 
     if pseudo_labels is not None:
         self.add_pl(pseudo_labels[0], pseudo_labels[1])
Пример #11
0
    def __init__(
        self,
        sound_file_paths,
        batch_size=8,
        augment=True,
        save_augmented_sounds_to_path=None,
        fixed_sound_length=FIXED_SOUND_LENGTH,
        num_mels=NUM_MELS,
        preprocessing_fn=None,
    ):
        self.sound_file_paths = sound_file_paths
        self.batch_size = batch_size
        self.augment = augment
        self.save_augmented_sounds_to_path = save_augmented_sounds_to_path
        self.fixed_sound_length = fixed_sound_length
        self.min_num_samples = (fixed_sound_length + 3) * HOP_LENGTH
        self.num_mels = num_mels
        self.preprocessing_fn = preprocessing_fn

        self.laughter_paths = self.sound_file_paths["laughter"]
        self.non_laughter_paths = []
        for category in self.sound_file_paths:
            if not is_laughter_category(category):
                self.non_laughter_paths += self.sound_file_paths[category]

        if save_augmented_sounds_to_path:
            os.makedirs(save_augmented_sounds_to_path, exist_ok=True)

        self.augmenter = Compose(
            [
                AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.002, p=0.1),
                TimeStretch(min_rate=0.8, max_rate=1.25, p=0.02),
                PitchShift(min_semitones=-3, max_semitones=3, p=0.02),
                Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
            ]
        )
Пример #12
0
def shift(data_path,
          file_info,
          n_repeats=3,
          min_fraction=-0.5,
          max_fraction=0.5):
    # Create the augmenter
    augmenter = Compose(
        [Shift(min_fraction=min_fraction, max_fraction=max_fraction, p=1.0)])

    # Iterate through the Gibbon audio files only
    for j in file_info[file_info.label == 1].index:
        for i in range(n_repeats):
            # Read audio file
            rate, samples = wavfile.read(data_path + 'Clean/' +
                                         file_info.at[j, 'fname'])
            # Set the output path
            output_file_path = data_path + 'Augmented/Shift_{:03d}_'.format(
                i) + file_info.at[j, 'fname']
            # Perform time stretch
            augmented_samples = augmenter(samples=samples, sample_rate=rate)
            # Save the new audio
            wavfile.write(filename=output_file_path,
                          rate=rate,
                          data=augmented_samples)
Пример #13
0
 def __init__(self, augment_type, p, cross_valid=False):
     self.cross_valid = cross_valid
     self.sample_rate = 8000
     self.type = augment_type
     self.p = p
     wham_path = '../../../librimix/data/wham_noise/cv' if self.cross_valid else '../../../librimix/data/wham_noise/tr'
     if self.type == 'wham_weak':
         self.augment = Compose([
             AddBackgroundNoise(sounds_path=wham_path,
                                min_snr_in_db=5,
                                max_snr_in_db=15,
                                p=1)
         ])
     elif self.type == 'wham_strong':
         self.augment = Compose([
             AddBackgroundNoise(sounds_path=wham_path,
                                min_snr_in_db=2,
                                max_snr_in_db=7,
                                p=1)
         ])
     elif self.type == 'reverb_weak':
         self.augment = AudioEffectsChain().reverb(
             reverberance=random.randrange(0, 50),
             room_scale=random.randrange(0, 50),
             stereo_depth=random.randrange(0, 50),
         )
     elif self.type == 'reverb_strong':
         self.augment = AudioEffectsChain().reverb(
             reverberance=random.randrange(50, 100),
             room_scale=random.randrange(50, 100),
             stereo_depth=random.randrange(50, 100),
         )
     elif self.type == 'cascade':
         self.augment = Compose([
             AddBackgroundNoise(sounds_path=wham_path,
                                min_snr_in_db=0,
                                max_snr_in_db=5,
                                p=self.p),
             AddGaussianSNR(min_SNR=0.001, max_SNR=0.25, p=self.p),
             ClippingDistortion(min_percentile_threshold=0,
                                max_percentile_threshold=40,
                                p=self.p),
             FrequencyMask(min_frequency_band=0.0,
                           max_frequency_band=0.5,
                           p=self.p),
             PolarityInversion(p=self.p),
             Shift(min_fraction=-0.5,
                   max_fraction=0.5,
                   rollover=True,
                   p=self.p),
             TimeMask(min_band_part=0.0,
                      max_band_part=0.2,
                      fade=False,
                      p=self.p)
         ])
     elif self.type == 'distort':
         self.augment = Compose([
             PitchShift(min_semitones=-4, max_semitones=4, p=self.p),
             TimeStretch(min_rate=0.8,
                         max_rate=1.25,
                         leave_length_unchanged=True,
                         p=self.p)
         ])
     elif self.type == 'none':
         self.augment = None
     else:
         raise ValueError(
             "Did not recognize augmentation type. Received %s, expected 'wham_weak', 'wham_strong', 'reverb_weak', 'reverb_strong', 'cascade', 'distort', or 'none'."
             % self.type)
    sample_rate, sound_np = wavfile.read(audio_file)
    if sound_np.dtype != np.float32:
        assert sound_np.dtype == np.int16
        sound_np = np.divide(
            sound_np, 32768, dtype=np.float32
        )
    number = os.path.split(audio_file)[-1][:-4]

    transforms = [
        {"instance": AddGaussianSNR(p=1.0), "num_runs": 3},
        {"instance": TimeStretch(min_rate=0.4, max_rate=1.25, p=1.0), "num_runs": 5},
        {
            "instance": PitchShift(min_semitones=-5, max_semitones=5, p=1.0),
            "num_runs": 6,
        },
        {"instance": Shift(min_fraction=-0.85, max_fraction=0.85, p=1.0), "num_runs": 4},
        {"instance": Resample(p=1.0), "num_runs": 5},
        {"instance": ClippingDistortion(p=1.0), "num_runs": 3},
    ]

    for transform in transforms:
        augmenter = Compose([transform["instance"]])
        run_name = (
            transform.get("name")
            if transform.get("name")
            else transform["instance"].__class__.__name__
        )
        for i in range(transform["num_runs"]):
            output_file_path = os.path.join(
                'augmented', "{}_{}_{:03d}.wav".format(number, run_name, i)
            )
def applyTransformations(fileName, output_dir, auxiliarSoundsDir):
    name = fileName.split(".")[0].split("/")[-1]
    samples = load_wav_file(fileName)

    # AddImpulseResponse
    augmenter = Compose([
        AddImpulseResponse(p=1.0,
                           ir_path=os.path.join(auxiliarSoundsDir,
                                                "helperSounds/ir"))
    ])
    output_file_path = os.path.join(
        output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0))

    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)

    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
    # FrequencyMask
    augmenter = Compose([FrequencyMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeMask
    augmenter = Compose([TimeMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_TimeMask_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianSNR
    augmenter = Compose([AddGaussianSNR(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_itchShift_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "{}_Shift_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift without rollover
    augmenter = Compose(
        [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Normalize
    augmenter = Compose([Normalize(p=1.0)])
    output_file_path = os.path.join(output_dir,
                                    "{}_Normalize_{:03d}.wav".format(name, 0))
    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # ClippingDistortion
    augmenter = Compose([ClippingDistortion(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddBackgroundNoise
    augmenter = Compose([
        AddBackgroundNoise(sounds_path=os.path.join(
            auxiliarSoundsDir, "helperSounds/background_noises"),
                           p=1.0)
    ])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddShortNoises
    augmenter = Compose([
        AddShortNoises(
            sounds_path=os.path.join(auxiliarSoundsDir,
                                     "helperSounds/short_noises"),
            min_snr_in_db=0,
            max_snr_in_db=8,
            min_time_between_sounds=2.0,
            max_time_between_sounds=4.0,
            burst_probability=0.4,
            min_pause_factor_during_burst=0.01,
            max_pause_factor_during_burst=0.95,
            min_fade_in_time=0.005,
            max_fade_in_time=0.08,
            min_fade_out_time=0.01,
            max_fade_out_time=0.1,
            p=1.0,
        )
    ])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)
Пример #16
0
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "PitchShift_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "Shift_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift without rollover
    augmenter = Compose(
        [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "ShiftWithoutRollover_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
Пример #17
0
    def generate(self, wave_file, output_dir):
        """
        For each transformation, apply it to an example sound and write the transformed sounds to
        an output folder.
        """
        samples = load_wav_file(wave_file)
        _filename = os.path.basename(wave_file).split('.')[0]
        # AddImpulseResponse
        if self.AddImpulseResponse[0]:
            augmenter = Compose([
                AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))
            ])
            output_file_path = os.path.join(
                output_dir,
                _filename + "_AddImpulseResponse{:03d}.wav".format(0))
            augmented_samples = augmenter(samples=samples,
                                          sample_rate=SAMPLE_RATE)
            wavfile.write(output_file_path,
                          rate=SAMPLE_RATE,
                          data=augmented_samples)
        # FrequencyMask
        if self.FrequencyMask[0]:
            augmenter = Compose([FrequencyMask(p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_FrequencyMask{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # TimeMask
        if self.TimeMask[0]:
            augmenter = Compose([TimeMask(p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_TimeMask{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # AddGaussianSNR
        if self.AddGaussianSNR[0]:
            augmenter = Compose([AddGaussianSNR(p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddGaussianSNR{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # AddGaussianNoise
        if self.AddGaussianNoise[0]:
            augmenter = Compose([
                AddGaussianNoise(min_amplitude=0.001,
                                 max_amplitude=0.015,
                                 p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddGaussianNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # TimeStretch
        if self.TimeStretch[0]:
            augmenter = Compose(
                [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_TimeStretch{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # PitchShift
        if self.PitchShift[0]:
            augmenter = Compose(
                [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_PitchShift{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # Shift
        if self.Shift[0]:
            augmenter = Compose(
                [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_Shift{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # Shift without rollover
        if self.ShiftWithoutRoll[0]:
            augmenter = Compose([
                Shift(min_fraction=-0.2,
                      max_fraction=0.2,
                      rollover=False,
                      p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_ShiftWithoutRollover{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # Normalize
        if self.Normalize[0]:
            augmenter = Compose([Normalize(p=1.0)])
            output_file_path = os.path.join(
                output_dir, _filename + "_Normalize{:03d}.wav".format(0))
            augmented_samples = augmenter(samples=samples,
                                          sample_rate=SAMPLE_RATE)
            wavfile.write(output_file_path,
                          rate=SAMPLE_RATE,
                          data=augmented_samples)

        # Resample
        if self.Resample[0]:
            augmenter = Compose([
                Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir, _filename + "_Resample{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # ClippingDistortion
        if self.ClippingDistortion[0]:
            augmenter = Compose(
                [ClippingDistortion(max_percentile_threshold=10, p=1.0)])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_ClippingDistortion{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)

        # AddBackgroundNoise
        if self.AddBackgroundNoise[0]:
            augmenter = Compose([
                AddBackgroundNoise(sounds_path=os.path.join(
                    DEMO_DIR, "background_noises"),
                                   p=1.0)
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddBackgroundNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
        # AddWhiteNoise
        if self.AddWhiteNoise[0]:
            augmenter = Compose([
                AddBackgroundNoise(sounds_path=os.path.join(
                    DEMO_DIR, "white_noises"),
                                   p=1.0)
            ])
            for i in range(self.AddWhiteNoise[1]):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddWhiteNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
        # AddPinkNoise
        if self.AddPinkNoise[0]:
            augmenter = Compose([
                AddBackgroundNoise(sounds_path=os.path.join(
                    DEMO_DIR, "pink_noises"),
                                   p=1.0)
            ])
            for i in range(self.AddPinkNoise[1]):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddPinkNoise{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
        # AddShortNoises
        if self.AddShortNoises[0]:
            augmenter = Compose([
                AddShortNoises(
                    sounds_path=os.path.join(DEMO_DIR, "short_noises"),
                    min_snr_in_db=0,
                    max_snr_in_db=8,
                    min_time_between_sounds=2.0,
                    max_time_between_sounds=4.0,
                    burst_probability=0.4,
                    min_pause_factor_during_burst=0.01,
                    max_pause_factor_during_burst=0.95,
                    min_fade_in_time=0.005,
                    max_fade_in_time=0.08,
                    min_fade_out_time=0.01,
                    max_fade_out_time=0.1,
                    p=1.0,
                )
            ])
            for i in range(5):
                output_file_path = os.path.join(
                    output_dir,
                    _filename + "_AddShortNoises{:03d}.wav".format(i))
                augmented_samples = augmenter(samples=samples,
                                              sample_rate=SAMPLE_RATE)
                wavfile.write(output_file_path,
                              rate=SAMPLE_RATE,
                              data=augmented_samples)
Пример #18
0
def transform(file_path, output_folder, iterations):
    """
    For each transformation, apply it to an example sound and write the transformed sounds to
    an output folder.
    """

    samples = load_wav_file(file_path)
    file_name = os.path.basename(file_path).replace('.wav', '')

    def produce(augmenter, name):
        for i in range(iterations):
            output_file_path = '{}/{}'.format(
                output_folder, "{}_{}_{}.wav".format(name, file_name, i))
            augmented_samples = augmenter(samples=samples,
                                          sample_rate=SAMPLE_RATE)
            wavfile.write(output_file_path,
                          rate=SAMPLE_RATE,
                          data=augmented_samples)

    # TimeMask
    augmenter = Compose([TimeMask(p=1.0)])
    produce(augmenter, 'TimeMask')

    # FrequencyMask
    augmenter = Compose([FrequencyMask(p=1.0)])
    produce(augmenter, 'FrequencyMask')

    # AddGaussianSNR
    augmenter = Compose([AddGaussianSNR(p=1.0)])
    produce(augmenter, 'AddGaussianSNR')

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    produce(augmenter, 'PitchShift')

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)])
    produce(augmenter, 'TimeStretch')

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)])
    produce(augmenter, 'AddGaussianNoise')

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    produce(augmenter, 'Shift')

    # Shift without rollover
    augmenter = Compose(
        [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)])
    produce(augmenter, 'Shift without rollover')

    # Normalize
    augmenter = Compose([Normalize(p=1.0)])
    produce(augmenter, 'Normalize')

    # AddImpulseResponse
    augmenter = Compose(
        [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))])
    produce(augmenter, 'AddImpulseResponse')

    # Resample
    augmenter = Compose([Resample(p=1.0)])
    produce(augmenter, 'Resample')

    # ClippingDistortion
    augmenter = Compose([ClippingDistortion(p=1.0)])
    produce(augmenter, 'ClippingDistortion')

    # AddBackgroundNoise
    augmenter = Compose([
        AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR,
                                                    "background_noises"),
                           p=1.0)
    ])
    produce(augmenter, 'AddBackgroundNoise')

    # AddShortNoises
    augmenter = Compose([
        AddShortNoises(
            sounds_path=os.path.join(DEMO_DIR, "short_noises"),
            min_snr_in_db=0,
            max_snr_in_db=8,
            min_time_between_sounds=2.0,
            max_time_between_sounds=4.0,
            burst_probability=0.4,
            min_pause_factor_during_burst=0.01,
            max_pause_factor_during_burst=0.95,
            min_fade_in_time=0.005,
            max_fade_in_time=0.08,
            min_fade_out_time=0.01,
            max_fade_out_time=0.1,
            p=1.0,
        )
    ])
    produce(augmenter, 'AddShortNoises')
Пример #19
0
def load_wav_file(sound_file_path):
    sample_rate, sound_np = wavfile.read(sound_file_path)
    if sample_rate != SAMPLE_RATE:
        raise Exception("Unexpected sample rate {} (expected {})".format(
            sample_rate, SAMPLE_RATE))

    if sound_np.dtype != np.float32:
        assert sound_np.dtype == np.int16
        sound_np = sound_np / 32767  # ends up roughly between -1 and 1

    return sound_np


augmenter = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
])

current_dir = os.path.dirname(__file__)
output_dir = os.path.join(current_dir, "output")
os.makedirs(output_dir, exist_ok=True)

samples = load_wav_file(os.path.join(current_dir, "acoustic_guitar_0.wav"))
for i in tqdm(range(20)):
    output_file_path = os.path.join(output_dir, "{:03d}.wav".format(i))
    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
Пример #20
0
        )
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir, "TimeStretch_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir, "PitchShift_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir, "Shift_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # Normalize
    augmenter = Compose([Normalize(p=1.0)])
    output_file_path = os.path.join(output_dir, "Normalize_{:03d}.wav".format(0))
    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
Пример #21
0
     "name": "Mp3CompressionPydub",
 },
 {
     "instance": Normalize(p=1.0),
     "num_runs": 1
 },
 {
     "instance": PolarityInversion(p=1.0),
     "num_runs": 1
 },
 {
     "instance": Resample(p=1.0),
     "num_runs": 5
 },
 {
     "instance": Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0),
     "num_runs": 5
 },
 {
     "instance":
     Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0),
     "num_runs":
     5,
     "name":
     "ShiftWithoutRollover",
 },
 {
     "instance": TimeMask(p=1.0),
     "num_runs": 5
 },
 {
Пример #22
0
 },
 {
     "instance": Normalize(p=1.0),
     "num_runs": 1
 },
 {
     "instance": PolarityInversion(p=1.0),
     "num_runs": 1
 },
 {
     "instance": Resample(p=1.0),
     "num_runs": 5
 },
 {
     "instance":
     Shift(min_fraction=-0.5, max_fraction=0.5, fade=False, p=1.0),
     "num_runs":
     5,
     "name":
     "ShiftWithoutFade",
 },
 {
     "instance":
     Shift(min_fraction=-0.5, max_fraction=0.5, fade=True, p=1.0),
     "num_runs":
     5,
     "name":
     "ShiftWithShortFade",
 },
 {
     "instance":