def __getitem__(self, idx): temp = random.randint(0, 1) augment = Compose([ TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5, rollover=False) ]) self.wavPath = str(self.data.iloc[idx, 0]) self.label = self.data.iloc[idx, 1] self.signal, self.sr = torchaudio.load(self.wavPath) if (temp == 1): self.signal = torch.from_numpy( augment(samples=self.signal.numpy(), sample_rate=self.sr)) self.spectogram = torchaudio.transforms.Spectrogram(n_fft=320, win_length=320, hop_length=160)( self.signal) self.logSpectogram = torchaudio.transforms.AmplitudeToDB()( self.spectogram) #self.tempImg=torchvision.transforms.ToPILImage()(self.logSpectogram) #self.tempImg=self.tempImg.convert("RGB") #self.spectogramImageTensor=self.vision_transform(self.tempImg) return self.logSpectogram, self.label
def build_transforms(train=True): return Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), PitchShift(min_semitones=-4, max_semitones=4, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ])
def augmented_feature_engineering(wavFile, settings): fs, rawWav = scipy.io.wavfile.read(wavFile) wavData = rawWav if (settings['CHANNELS'] == 2): wavData = rawWav[:, 0] augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ]) wavData = augmenter(samples=np.array(wavData, dtype="float32"), sample_rate=fs) data_row = [] input_type = settings['FEATURE_ENGINEERING_TYPE'] if (input_type == TYPE_FEATURE_ENGINEERING_NORM_MFCC): mfcc_result1 = mfcc(wavData, samplerate=fs, nfft=1103, numcep=30, nfilt=40, preemph=0.5, winstep=0.005, winlen=0.015, appendEnergy=False) data_row.extend(mfcc_result1.ravel()) elif (input_type == TYPE_FEATURE_ENGINEERING_RAW_WAVE): data_row = wavData else: print("OLD MFCC TYPE IS NOT SUPPORTED FOR TRAINING PYTORCH") return data_row
def __init__(self, dataset): self.dataset = dataset self.sample_rate = TRAINING_CONFIG['audio_sample_rate'] self.augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), PitchShift(min_semitones=-4, max_semitones=4, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ])
def __getitem__(self,idx): augment = Compose([ TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5,rollover=False) ]) temp1=random.randint(0,1) temp2=random.randint(0,1) temp3=random.randint(0,1) self.anchor=str(self.data.iloc[idx,0]) self.positive=self.data.iloc[idx,1] self.negative=self.data.iloc[idx,2] self.signalAnchor,self.srAnchor=torchaudio.load(self.anchor) self.signalPositive,self.srPositive=torchaudio.load(self.positive) self.signalNegative,self.srNegative=torchaudio.load(self.negative) if (temp1==1): self.signalAnchor=torch.from_numpy(augment(samples=self.signalAnchor.numpy(),sample_rate=self.srAnchor)) if (temp2==1): self.signalPositive=torch.from_numpy(augment(samples=self.signalPositive.numpy(),sample_rate=self.srPositive)) if (temp3==1): self.signalNegative=torch.from_numpy(augment(samples=self.signalNegative.numpy(),sample_rate=self.srNegative)) self.spectogramAnchor=torchaudio.transforms.Spectrogram(n_fft=320,hop_length=160,win_length=320)(self.signalAnchor) self.logSpectogramAnchor=torchaudio.transforms.AmplitudeToDB()(self.spectogramAnchor) self.spectogramPositive=torchaudio.transforms.Spectrogram(n_fft=320,hop_length=160,win_length=320)(self.signalPositive) self.logSpectogramPositive=torchaudio.transforms.AmplitudeToDB()(self.spectogramPositive) self.spectogramNegative=torchaudio.transforms.Spectrogram(n_fft=320,hop_length=160,win_length=320)(self.signalNegative) self.logSpectogramNegative=torchaudio.transforms.AmplitudeToDB()(self.spectogramNegative) #self.tempImgAnchor=torchvision.transforms.ToPILImage()(self.logSpectogramAnchor) #self.tempImgAnchor=self.tempImgAnchor.convert("RGB") #self.spectogramAnchorImageTensor=self.vision_transform(self.tempImgAnchor) #self.tempImgPositive=torchvision.transforms.ToPILImage()(self.logSpectogramPositive) #self.tempImgPositive=self.tempImgPositive.convert("RGB") #self.spectogramPositiveImageTensor=self.vision_transform(self.tempImgPositive) #self.tempImgNegative=torchvision.transforms.ToPILImage()(self.logSpectogramNegative) #self.tempImgNegative=self.tempImgNegative.convert("RGB") #self.spectogramNegativeImageTensor=self.vision_transform(self.tempImgNegative) return self.logSpectogramAnchor,self.logSpectogramPositive,self.logSpectogramNegative
def compose_without_noise(ir_path='data/impulse'): _p = 0.25 transforms = [ AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.2, max_fraction=0.2), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), MyAddImpulseResponse(p=_p, ir_path=ir_path), TimeStretch(p=_p / 10), PitchShift(p=_p / 25), ] return MyCompose(transforms, p=1.0, max_augs=3)
def compose(sounds_path): _p = 0.2 transforms = [ MyGain(p=_p), AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.25, max_fraction=0.25), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), AddBackgroundNoise(sounds_path=sounds_path, p=_p), TimeStretch(p=_p/10), PitchShift(p=_p/30), ] return Compose(transforms, p=0.4, shuffle=True)
def raw_audio_process(transform_fn): augment_fn = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), PitchShift(min_semitones=-4, max_semitones=4, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5) ]) @wraps(transform_fn) def augment_audio(audio, **kwargs): sr = kwargs.setdefault('sr', 22050) n_win = kwargs.setdefault('n_win', 20) win_length = int(n_win * sr / 1000) audio = augment_fn(audio) return transform_fn(audio, win_length=win_length, hop_length=win_length // 4) return augment_audio
def process_fn(output='stft', spec_aug=False, p=0.5, sr=22050): augment_fn = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=p), TimeStretch(min_rate=0.8, max_rate=1.25, p=p), PitchShift(min_semitones=-4, max_semitones=4, p=p), Shift(min_fraction=-0.5, max_fraction=0.5, p=p) ]) win_length = int(20 * sr / 1000) if output == 'stft': def stft_transform(feats): if feats.ndim == 1: feats = augment_fn(samples=feats, sample_rate=sr) feats = np.log( np.abs(librosa.stft(feats, 1023, win_length=win_length)).T + 1e-12) if spec_aug: feats = spec_augment(feats) return feats return stft_transform if output == 'lms': def lms_transform(feats): if feats.ndim == 1: feats = augment_fn(samples=feats, sample_rate=sr) hop_length = win_length // 4 feats = np.log( np.abs( librosa.feature.melspectrogram( feats, n_fft=win_length, hop_length=hop_length, win_length=win_length)).T + 1e-12) if spec_aug: feats = spec_augment(feats) return feats return lms_transform
def __init__(self, path_audio, y, resample_freq = 32000, max_length=3, augmentation=[], validation=False, num_class=264, pseudo_labels=None): self.labels2idx = {'Pump': 0, 'Spinach': 1, 'abalimi': 2, 'afukirira': 3, 'agriculture': 4, 'akammwanyi': 5, 'akamonde': 6, 'akasaanyi': 7, 'akatunda': 8, 'akatungulu': 9, 'akawuka': 10, 'amakoola': 11, 'amakungula': 12, 'amalagala': 13, 'amappapaali': 14, 'amatooke': 15, 'banana': 16, 'beans': 17, 'bibala': 18, 'bulimi': 19, 'butterfly': 20, 'cabbages': 21, 'cassava': 22, 'caterpillar': 23, 'caterpillars': 24, 'coffee': 25, 'crop': 26, 'ddagala': 27, 'dig': 28, 'disease': 29, 'doodo': 30, 'drought': 31, 'ebbugga': 32, 'ebibala': 33, 'ebigimusa': 34, 'ebijanjaalo': 35, 'ebijjanjalo': 36, 'ebikajjo': 37, 'ebikolo': 38, 'ebikongoliro': 39, 'ebikoola': 40, 'ebimera': 41, 'ebinyebwa': 42, 'ebirime': 43, 'ebisaanyi': 44, 'ebisooli': 45, 'ebisoolisooli': 46, 'ebitooke': 47, 'ebiwojjolo': 48, 'ebiwuka': 49, 'ebyobulimi': 50, 'eddagala': 51, 'eggobe': 52, 'ejjobyo': 53, 'ekibala': 54, 'ekigimusa': 55, 'ekijanjaalo': 56, 'ekikajjo': 57, 'ekikolo': 58, 'ekikoola': 59, 'ekimera': 60, 'ekirime': 61, 'ekirwadde': 62, 'ekisaanyi': 63, 'ekitooke': 64, 'ekiwojjolo': 65, 'ekyeya': 66, 'emboga': 67, 'emicungwa': 68, 'emisiri': 69, 'emiyembe': 70, 'emmwanyi': 71, 'endagala': 72, 'endokwa': 73, 'endwadde': 74, 'enkota': 75, 'ennima': 76, 'ennimiro': 77, 'ennyaanya': 78, 'ensigo': 79, 'ensiringanyi': 80, 'ensujju': 81, 'ensuku': 82, 'ensukusa': 83, 'enva endiirwa': 84, 'eppapaali': 85, 'faamu': 86, 'farm': 87, 'farmer': 88, 'farming instructor': 89, 'fertilizer': 90, 'fruit': 91, 'fruit picking': 92, 'garden': 93, 'greens': 94, 'ground nuts': 95, 'harvest': 96, 'harvesting': 97, 'insect': 98, 'insects': 99, 'irish potatoes': 100, 'irrigate': 101, 'kaamulali': 102, 'kasaanyi': 103, 'kassooli': 104, 'kikajjo': 105, 'kikolo': 106, 'kisaanyi': 107, 'kukungula': 108, 'leaf': 109, 'leaves': 110, 'lumonde': 111, 'lusuku': 112, 'maize': 113, 'maize stalk borer': 114, 'maize streak virus': 115, 'mango': 116, 'mangoes': 117, 'matooke': 118, 'matooke seedlings': 119, 'medicine': 120, 'miceere': 121, 'micungwa': 122, 'mpeke': 123, 'muceere': 124, 'mucungwa': 125, 'mulimi': 126, 'munyeera': 127, 'muwogo': 128, 'nakavundira': 129, 'nambaale': 130, 'namuginga': 131, 'ndwadde': 132, 'nfukirira': 133, 'nnakati': 134, 'nnasale beedi': 135, 'nnimiro': 136, 'nnyaanya': 137, 'npk': 138, 'nursery bed': 139, 'obulimi': 140, 'obulwadde': 141, 'obumonde': 142, 'obusaanyi': 143, 'obutunda': 144, 'obutungulu': 145, 'obuwuka': 146, 'okufukirira': 147, 'okufuuyira': 148, 'okugimusa': 149, 'okukkoola': 150, 'okukungula': 151, 'okulima': 152, 'okulimibwa': 153, 'okunnoga': 154, 'okusaasaana': 155, 'okusaasaanya': 156, 'okusiga': 157, 'okusimba': 158, 'okuzifuuyira': 159, 'olusuku': 160, 'omuceere': 161, 'omucungwa': 162, 'omulimi': 163, 'omulimisa': 164, 'omusiri': 165, 'omuyembe': 166, 'onion': 167, 'orange': 168, 'pampu': 169, 'passion fruit': 170, 'pawpaw': 171, 'pepper': 172, 'plant': 173, 'plantation': 174, 'ppaapaali': 175, 'pumpkin': 176, 'rice': 177, 'seed': 178, 'sikungula': 179, 'sow': 180, 'spray': 181, 'spread': 182, 'suckers': 183, 'sugarcane': 184, 'sukumawiki': 185, 'super grow': 186, 'sweet potatoes': 187, 'tomatoes': 188, 'vegetables': 189, 'watermelon': 190, 'weeding': 191, 'worm': 192} self.idx2labels = {k:v for v,k in self.labels2idx.items()} identity = np.eye(num_class) self.augmentation = set(augmentation) self.samples = path_audio #+ path_augment self.max_length = max_length # 99% are shorter than 3 sec self.resample_freq=resample_freq self.validation = validation self.y = np.array([identity[self.labels2idx[t]] for t in y]).astype(np.float32) #+ [self.labels2idx[t] for t in y_aug] self.num_class = num_class self.noise = Compose([AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.6), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.6), PitchShift(min_semitones=-4, max_semitones=4, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.6), ]) if pseudo_labels is not None: self.add_pl(pseudo_labels[0], pseudo_labels[1])
def __init__( self, sound_file_paths, batch_size=8, augment=True, save_augmented_sounds_to_path=None, fixed_sound_length=FIXED_SOUND_LENGTH, num_mels=NUM_MELS, preprocessing_fn=None, ): self.sound_file_paths = sound_file_paths self.batch_size = batch_size self.augment = augment self.save_augmented_sounds_to_path = save_augmented_sounds_to_path self.fixed_sound_length = fixed_sound_length self.min_num_samples = (fixed_sound_length + 3) * HOP_LENGTH self.num_mels = num_mels self.preprocessing_fn = preprocessing_fn self.laughter_paths = self.sound_file_paths["laughter"] self.non_laughter_paths = [] for category in self.sound_file_paths: if not is_laughter_category(category): self.non_laughter_paths += self.sound_file_paths[category] if save_augmented_sounds_to_path: os.makedirs(save_augmented_sounds_to_path, exist_ok=True) self.augmenter = Compose( [ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.002, p=0.1), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.02), PitchShift(min_semitones=-3, max_semitones=3, p=0.02), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ] )
def shift(data_path, file_info, n_repeats=3, min_fraction=-0.5, max_fraction=0.5): # Create the augmenter augmenter = Compose( [Shift(min_fraction=min_fraction, max_fraction=max_fraction, p=1.0)]) # Iterate through the Gibbon audio files only for j in file_info[file_info.label == 1].index: for i in range(n_repeats): # Read audio file rate, samples = wavfile.read(data_path + 'Clean/' + file_info.at[j, 'fname']) # Set the output path output_file_path = data_path + 'Augmented/Shift_{:03d}_'.format( i) + file_info.at[j, 'fname'] # Perform time stretch augmented_samples = augmenter(samples=samples, sample_rate=rate) # Save the new audio wavfile.write(filename=output_file_path, rate=rate, data=augmented_samples)
def __init__(self, augment_type, p, cross_valid=False): self.cross_valid = cross_valid self.sample_rate = 8000 self.type = augment_type self.p = p wham_path = '../../../librimix/data/wham_noise/cv' if self.cross_valid else '../../../librimix/data/wham_noise/tr' if self.type == 'wham_weak': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=5, max_snr_in_db=15, p=1) ]) elif self.type == 'wham_strong': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=2, max_snr_in_db=7, p=1) ]) elif self.type == 'reverb_weak': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(0, 50), room_scale=random.randrange(0, 50), stereo_depth=random.randrange(0, 50), ) elif self.type == 'reverb_strong': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(50, 100), room_scale=random.randrange(50, 100), stereo_depth=random.randrange(50, 100), ) elif self.type == 'cascade': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=0, max_snr_in_db=5, p=self.p), AddGaussianSNR(min_SNR=0.001, max_SNR=0.25, p=self.p), ClippingDistortion(min_percentile_threshold=0, max_percentile_threshold=40, p=self.p), FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.5, p=self.p), PolarityInversion(p=self.p), Shift(min_fraction=-0.5, max_fraction=0.5, rollover=True, p=self.p), TimeMask(min_band_part=0.0, max_band_part=0.2, fade=False, p=self.p) ]) elif self.type == 'distort': self.augment = Compose([ PitchShift(min_semitones=-4, max_semitones=4, p=self.p), TimeStretch(min_rate=0.8, max_rate=1.25, leave_length_unchanged=True, p=self.p) ]) elif self.type == 'none': self.augment = None else: raise ValueError( "Did not recognize augmentation type. Received %s, expected 'wham_weak', 'wham_strong', 'reverb_weak', 'reverb_strong', 'cascade', 'distort', or 'none'." % self.type)
sample_rate, sound_np = wavfile.read(audio_file) if sound_np.dtype != np.float32: assert sound_np.dtype == np.int16 sound_np = np.divide( sound_np, 32768, dtype=np.float32 ) number = os.path.split(audio_file)[-1][:-4] transforms = [ {"instance": AddGaussianSNR(p=1.0), "num_runs": 3}, {"instance": TimeStretch(min_rate=0.4, max_rate=1.25, p=1.0), "num_runs": 5}, { "instance": PitchShift(min_semitones=-5, max_semitones=5, p=1.0), "num_runs": 6, }, {"instance": Shift(min_fraction=-0.85, max_fraction=0.85, p=1.0), "num_runs": 4}, {"instance": Resample(p=1.0), "num_runs": 5}, {"instance": ClippingDistortion(p=1.0), "num_runs": 3}, ] for transform in transforms: augmenter = Compose([transform["instance"]]) run_name = ( transform.get("name") if transform.get("name") else transform["instance"].__class__.__name__ ) for i in range(transform["num_runs"]): output_file_path = os.path.join( 'augmented', "{}_{}_{:03d}.wav".format(number, run_name, i) )
def applyTransformations(fileName, output_dir, auxiliarSoundsDir): name = fileName.split(".")[0].split("/")[-1] samples = load_wav_file(fileName) # AddImpulseResponse augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(auxiliarSoundsDir, "helperSounds/ir")) ]) output_file_path = os.path.join( output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_itchShift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "{}_Shift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join(output_dir, "{}_Normalize_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( auxiliarSoundsDir, "helperSounds/background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(auxiliarSoundsDir, "helperSounds/short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "PitchShift_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "Shift_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "ShiftWithoutRollover_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
def generate(self, wave_file, output_dir): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(wave_file) _filename = os.path.basename(wave_file).split('.')[0] # AddImpulseResponse if self.AddImpulseResponse[0]: augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")) ]) output_file_path = os.path.join( output_dir, _filename + "_AddImpulseResponse{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask if self.FrequencyMask[0]: augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_FrequencyMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask if self.TimeMask[0]: augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR if self.AddGaussianSNR[0]: augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianSNR{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise if self.AddGaussianNoise[0]: augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch if self.TimeStretch[0]: augmenter = Compose( [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeStretch{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift if self.PitchShift[0]: augmenter = Compose( [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_PitchShift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift if self.Shift[0]: augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Shift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover if self.ShiftWithoutRoll[0]: augmenter = Compose([ Shift(min_fraction=-0.2, max_fraction=0.2, rollover=False, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ShiftWithoutRollover{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize if self.Normalize[0]: augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join( output_dir, _filename + "_Normalize{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Resample if self.Resample[0]: augmenter = Compose([ Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Resample{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion if self.ClippingDistortion[0]: augmenter = Compose( [ClippingDistortion(max_percentile_threshold=10, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ClippingDistortion{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise if self.AddBackgroundNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddBackgroundNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddWhiteNoise if self.AddWhiteNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "white_noises"), p=1.0) ]) for i in range(self.AddWhiteNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddWhiteNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddPinkNoise if self.AddPinkNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "pink_noises"), p=1.0) ]) for i in range(self.AddPinkNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddPinkNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises if self.AddShortNoises[0]: augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddShortNoises{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
def transform(file_path, output_folder, iterations): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(file_path) file_name = os.path.basename(file_path).replace('.wav', '') def produce(augmenter, name): for i in range(iterations): output_file_path = '{}/{}'.format( output_folder, "{}_{}_{}.wav".format(name, file_name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) produce(augmenter, 'TimeMask') # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) produce(augmenter, 'FrequencyMask') # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) produce(augmenter, 'AddGaussianSNR') # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) produce(augmenter, 'PitchShift') # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)]) produce(augmenter, 'TimeStretch') # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) produce(augmenter, 'AddGaussianNoise') # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) produce(augmenter, 'Shift') # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) produce(augmenter, 'Shift without rollover') # Normalize augmenter = Compose([Normalize(p=1.0)]) produce(augmenter, 'Normalize') # AddImpulseResponse augmenter = Compose( [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))]) produce(augmenter, 'AddImpulseResponse') # Resample augmenter = Compose([Resample(p=1.0)]) produce(augmenter, 'Resample') # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) produce(augmenter, 'ClippingDistortion') # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR, "background_noises"), p=1.0) ]) produce(augmenter, 'AddBackgroundNoise') # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) produce(augmenter, 'AddShortNoises')
def load_wav_file(sound_file_path): sample_rate, sound_np = wavfile.read(sound_file_path) if sample_rate != SAMPLE_RATE: raise Exception("Unexpected sample rate {} (expected {})".format( sample_rate, SAMPLE_RATE)) if sound_np.dtype != np.float32: assert sound_np.dtype == np.int16 sound_np = sound_np / 32767 # ends up roughly between -1 and 1 return sound_np augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), PitchShift(min_semitones=-4, max_semitones=4, p=0.5), Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ]) current_dir = os.path.dirname(__file__) output_dir = os.path.join(current_dir, "output") os.makedirs(output_dir, exist_ok=True) samples = load_wav_file(os.path.join(current_dir, "acoustic_guitar_0.wav")) for i in tqdm(range(20)): output_file_path = os.path.join(output_dir, "{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "TimeStretch_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "PitchShift_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "Shift_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join(output_dir, "Normalize_{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
"name": "Mp3CompressionPydub", }, { "instance": Normalize(p=1.0), "num_runs": 1 }, { "instance": PolarityInversion(p=1.0), "num_runs": 1 }, { "instance": Resample(p=1.0), "num_runs": 5 }, { "instance": Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0), "num_runs": 5 }, { "instance": Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0), "num_runs": 5, "name": "ShiftWithoutRollover", }, { "instance": TimeMask(p=1.0), "num_runs": 5 }, {
}, { "instance": Normalize(p=1.0), "num_runs": 1 }, { "instance": PolarityInversion(p=1.0), "num_runs": 1 }, { "instance": Resample(p=1.0), "num_runs": 5 }, { "instance": Shift(min_fraction=-0.5, max_fraction=0.5, fade=False, p=1.0), "num_runs": 5, "name": "ShiftWithoutFade", }, { "instance": Shift(min_fraction=-0.5, max_fraction=0.5, fade=True, p=1.0), "num_runs": 5, "name": "ShiftWithShortFade", }, { "instance":