def compose_without_noise(ir_path='data/impulse'): _p = 0.25 transforms = [ AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.2, max_fraction=0.2), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), MyAddImpulseResponse(p=_p, ir_path=ir_path), TimeStretch(p=_p / 10), PitchShift(p=_p / 25), ] return MyCompose(transforms, p=1.0, max_augs=3)
def compose(sounds_path): _p = 0.2 transforms = [ MyGain(p=_p), AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.25, max_fraction=0.25), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), AddBackgroundNoise(sounds_path=sounds_path, p=_p), TimeStretch(p=_p/10), PitchShift(p=_p/30), ] return Compose(transforms, p=0.4, shuffle=True)
min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ), "num_runs": 5, }, { "instance": ClippingDistortion(p=1.0), "num_runs": 5 }, { "instance": FrequencyMask(min_frequency_band=0.5, max_frequency_band=0.6, p=1.0), "num_runs": 5, }, { "instance": Gain(min_gain_in_db=-6, max_gain_in_db=6, p=1.0), "num_runs": 5 }, {
def generate(self, wave_file, output_dir): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(wave_file) _filename = os.path.basename(wave_file).split('.')[0] # AddImpulseResponse if self.AddImpulseResponse[0]: augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")) ]) output_file_path = os.path.join( output_dir, _filename + "_AddImpulseResponse{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask if self.FrequencyMask[0]: augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_FrequencyMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask if self.TimeMask[0]: augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR if self.AddGaussianSNR[0]: augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianSNR{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise if self.AddGaussianNoise[0]: augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch if self.TimeStretch[0]: augmenter = Compose( [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeStretch{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift if self.PitchShift[0]: augmenter = Compose( [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_PitchShift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift if self.Shift[0]: augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Shift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover if self.ShiftWithoutRoll[0]: augmenter = Compose([ Shift(min_fraction=-0.2, max_fraction=0.2, rollover=False, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ShiftWithoutRollover{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize if self.Normalize[0]: augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join( output_dir, _filename + "_Normalize{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Resample if self.Resample[0]: augmenter = Compose([ Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Resample{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion if self.ClippingDistortion[0]: augmenter = Compose( [ClippingDistortion(max_percentile_threshold=10, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ClippingDistortion{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise if self.AddBackgroundNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddBackgroundNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddWhiteNoise if self.AddWhiteNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "white_noises"), p=1.0) ]) for i in range(self.AddWhiteNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddWhiteNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddPinkNoise if self.AddPinkNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "pink_noises"), p=1.0) ]) for i in range(self.AddPinkNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddPinkNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises if self.AddShortNoises[0]: augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddShortNoises{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
assert sound_np.dtype == np.int16 sound_np = np.divide( sound_np, 32768, dtype=np.float32 ) number = os.path.split(audio_file)[-1][:-4] transforms = [ {"instance": AddGaussianSNR(p=1.0), "num_runs": 3}, {"instance": TimeStretch(min_rate=0.4, max_rate=1.25, p=1.0), "num_runs": 5}, { "instance": PitchShift(min_semitones=-5, max_semitones=5, p=1.0), "num_runs": 6, }, {"instance": Shift(min_fraction=-0.85, max_fraction=0.85, p=1.0), "num_runs": 4}, {"instance": Resample(p=1.0), "num_runs": 5}, {"instance": ClippingDistortion(p=1.0), "num_runs": 3}, ] for transform in transforms: augmenter = Compose([transform["instance"]]) run_name = ( transform.get("name") if transform.get("name") else transform["instance"].__class__.__name__ ) for i in range(transform["num_runs"]): output_file_path = os.path.join( 'augmented', "{}_{}_{:03d}.wav".format(number, run_name, i) ) augmented_samples = augmenter(samples=sound_np, sample_rate=sample_rate) wavfile.write(output_file_path, rate=sample_rate, data=augmented_samples)
def __init__(self, augment_type, p, cross_valid=False): self.cross_valid = cross_valid self.sample_rate = 8000 self.type = augment_type self.p = p wham_path = '../../../librimix/data/wham_noise/cv' if self.cross_valid else '../../../librimix/data/wham_noise/tr' if self.type == 'wham_weak': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=5, max_snr_in_db=15, p=1) ]) elif self.type == 'wham_strong': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=2, max_snr_in_db=7, p=1) ]) elif self.type == 'reverb_weak': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(0, 50), room_scale=random.randrange(0, 50), stereo_depth=random.randrange(0, 50), ) elif self.type == 'reverb_strong': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(50, 100), room_scale=random.randrange(50, 100), stereo_depth=random.randrange(50, 100), ) elif self.type == 'cascade': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=0, max_snr_in_db=5, p=self.p), AddGaussianSNR(min_SNR=0.001, max_SNR=0.25, p=self.p), ClippingDistortion(min_percentile_threshold=0, max_percentile_threshold=40, p=self.p), FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.5, p=self.p), PolarityInversion(p=self.p), Shift(min_fraction=-0.5, max_fraction=0.5, rollover=True, p=self.p), TimeMask(min_band_part=0.0, max_band_part=0.2, fade=False, p=self.p) ]) elif self.type == 'distort': self.augment = Compose([ PitchShift(min_semitones=-4, max_semitones=4, p=self.p), TimeStretch(min_rate=0.8, max_rate=1.25, leave_length_unchanged=True, p=self.p) ]) elif self.type == 'none': self.augment = None else: raise ValueError( "Did not recognize augmentation type. Received %s, expected 'wham_weak', 'wham_strong', 'reverb_weak', 'reverb_strong', 'cascade', 'distort', or 'none'." % self.type)
def transform(file_path, output_folder, iterations): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(file_path) file_name = os.path.basename(file_path).replace('.wav', '') def produce(augmenter, name): for i in range(iterations): output_file_path = '{}/{}'.format( output_folder, "{}_{}_{}.wav".format(name, file_name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) produce(augmenter, 'TimeMask') # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) produce(augmenter, 'FrequencyMask') # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) produce(augmenter, 'AddGaussianSNR') # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) produce(augmenter, 'PitchShift') # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)]) produce(augmenter, 'TimeStretch') # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) produce(augmenter, 'AddGaussianNoise') # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) produce(augmenter, 'Shift') # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) produce(augmenter, 'Shift without rollover') # Normalize augmenter = Compose([Normalize(p=1.0)]) produce(augmenter, 'Normalize') # AddImpulseResponse augmenter = Compose( [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))]) produce(augmenter, 'AddImpulseResponse') # Resample augmenter = Compose([Resample(p=1.0)]) produce(augmenter, 'Resample') # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) produce(augmenter, 'ClippingDistortion') # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR, "background_noises"), p=1.0) ]) produce(augmenter, 'AddBackgroundNoise') # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) produce(augmenter, 'AddShortNoises')
"Normalize_{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Resample augmenter = Compose([Resample(p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "Resample_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "ClippingDistortion_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR, "background_noises"), p=1.0) ]) for i in range(5):
def applyTransformations(fileName, output_dir, auxiliarSoundsDir): name = fileName.split(".")[0].split("/")[-1] samples = load_wav_file(fileName) # AddImpulseResponse augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(auxiliarSoundsDir, "helperSounds/ir")) ]) output_file_path = os.path.join( output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_itchShift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "{}_Shift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join(output_dir, "{}_Normalize_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( auxiliarSoundsDir, "helperSounds/background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(auxiliarSoundsDir, "helperSounds/short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)