def compose_augmentations(rir_path): impulse_path = os.path.join(rir_path, 'simulated_rirs') noise_path = os.path.join(rir_path, 'pointsource_noises') if not (os.path.exists(impulse_path) and os.path.exists(noise_path)): raise ValueError( 'Unable to augment signal, rir_path "{}" does not exist.'.format( rir_path)) return Compose([ AddGaussianSNR(min_SNR=0.2, max_SNR=0.5, p=0.5), AddImpulseResponse(impulse_path, leave_length_unchanged=True, p=0.3), AddBackgroundNoise(noise_path, p=0.3), AddShortNoises(noise_path, max_snr_in_db=80, p=0.3) ])
"num_runs": 5, }, { "instance": AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0), "num_runs": 5, }, { "instance": AddGaussianSNR(p=1.0), "num_runs": 5 }, { "instance": AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")), "num_runs": 1, }, { "instance": AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005,
def generate(self, wave_file, output_dir): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(wave_file) _filename = os.path.basename(wave_file).split('.')[0] # AddImpulseResponse if self.AddImpulseResponse[0]: augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")) ]) output_file_path = os.path.join( output_dir, _filename + "_AddImpulseResponse{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask if self.FrequencyMask[0]: augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_FrequencyMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask if self.TimeMask[0]: augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR if self.AddGaussianSNR[0]: augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianSNR{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise if self.AddGaussianNoise[0]: augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch if self.TimeStretch[0]: augmenter = Compose( [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeStretch{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift if self.PitchShift[0]: augmenter = Compose( [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_PitchShift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift if self.Shift[0]: augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Shift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover if self.ShiftWithoutRoll[0]: augmenter = Compose([ Shift(min_fraction=-0.2, max_fraction=0.2, rollover=False, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ShiftWithoutRollover{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize if self.Normalize[0]: augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join( output_dir, _filename + "_Normalize{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Resample if self.Resample[0]: augmenter = Compose([ Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Resample{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion if self.ClippingDistortion[0]: augmenter = Compose( [ClippingDistortion(max_percentile_threshold=10, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ClippingDistortion{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise if self.AddBackgroundNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddBackgroundNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddWhiteNoise if self.AddWhiteNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "white_noises"), p=1.0) ]) for i in range(self.AddWhiteNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddWhiteNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddPinkNoise if self.AddPinkNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "pink_noises"), p=1.0) ]) for i in range(self.AddPinkNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddPinkNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises if self.AddShortNoises[0]: augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddShortNoises{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
"num_runs": 5, }, { "instance": AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0), "num_runs": 5, }, { "instance": AddGaussianSNR(p=1.0), "num_runs": 5 }, { "instance": AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")), "num_runs": 1, }, { "instance": AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"), leave_length_unchanged=True), "num_runs": 1, "name": "AddImpulseResponseLeaveLengthUnchanged", }, { "instance":
def transform(file_path, output_folder, iterations): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(file_path) file_name = os.path.basename(file_path).replace('.wav', '') def produce(augmenter, name): for i in range(iterations): output_file_path = '{}/{}'.format( output_folder, "{}_{}_{}.wav".format(name, file_name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) produce(augmenter, 'TimeMask') # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) produce(augmenter, 'FrequencyMask') # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) produce(augmenter, 'AddGaussianSNR') # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) produce(augmenter, 'PitchShift') # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)]) produce(augmenter, 'TimeStretch') # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) produce(augmenter, 'AddGaussianNoise') # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) produce(augmenter, 'Shift') # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) produce(augmenter, 'Shift without rollover') # Normalize augmenter = Compose([Normalize(p=1.0)]) produce(augmenter, 'Normalize') # AddImpulseResponse augmenter = Compose( [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))]) produce(augmenter, 'AddImpulseResponse') # Resample augmenter = Compose([Resample(p=1.0)]) produce(augmenter, 'Resample') # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) produce(augmenter, 'ClippingDistortion') # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR, "background_noises"), p=1.0) ]) produce(augmenter, 'AddBackgroundNoise') # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) produce(augmenter, 'AddShortNoises')
def applyTransformations(fileName, output_dir, auxiliarSoundsDir): name = fileName.split(".")[0].split("/")[-1] samples = load_wav_file(fileName) # AddImpulseResponse augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(auxiliarSoundsDir, "helperSounds/ir")) ]) output_file_path = os.path.join( output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_itchShift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "{}_Shift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join(output_dir, "{}_Normalize_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( auxiliarSoundsDir, "helperSounds/background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(auxiliarSoundsDir, "helperSounds/short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)