def __init__(self): super(Augment_Time, self).__init__() self.p = 0.5 self.augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.01, p=0.3), TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5), PitchShift(min_semitones=-4, max_semitones=4, p=0.5), FrequencyMask(), TimeMask() #Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5), ])
def compose_without_noise(ir_path='data/impulse'): _p = 0.25 transforms = [ AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.2, max_fraction=0.2), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), MyAddImpulseResponse(p=_p, ir_path=ir_path), TimeStretch(p=_p / 10), PitchShift(p=_p / 25), ] return MyCompose(transforms, p=1.0, max_augs=3)
def compose(sounds_path): _p = 0.2 transforms = [ MyGain(p=_p), AddGaussianNoise(p=_p), Shift(p=_p, min_fraction=-0.25, max_fraction=0.25), FrequencyMask(p=_p), TimeMask(p=_p, max_band_part=0.25), AddGaussianSNR(p=_p), ClippingDistortion(p=_p, max_percentile_threshold=20), AddBackgroundNoise(sounds_path=sounds_path, p=_p), TimeStretch(p=_p/10), PitchShift(p=_p/30), ] return Compose(transforms, p=0.4, shuffle=True)
def __init__( self, manifest_path, sample_rate, max_sample_size=None, min_sample_size=None, shuffle=True, min_length=0, pad=False, normalize=False, ): super(AugmentedFileAudioDataset, self).__init__( manifest_path=manifest_path, sample_rate=sample_rate, max_sample_size=max_sample_size, min_sample_size=min_sample_size, shuffle=shuffle, min_length=min_length, pad=pad, normalize=normalize, ) self.pre_transform = Compose([ #AddGaussianNoise(min_amplitude=1e-3, max_amplitude=5e-2, p=0.8), #PitchShift(min_semitones=-4, max_semitones=4, p=0.8), FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.05, p=0.5), TimeMask(min_band_part=0.0, max_band_part=0.05, p=0.5) #ClippingDistortion(min_percentile_threshold=10, max_percentile_threshold=40, p=0.2), ]) random_reverb = RandomReverb() random_clip = RandomClip() random_time_dropout = RandomTimeDropout() self.post_transform = augment.EffectChain().reverb( random_reverb).channels(1).clip(random_clip) #.time_dropout(200)
"num_runs": 5 }, { "instance": Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0), "num_runs": 5 }, { "instance": Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0), "num_runs": 5, "name": "ShiftWithoutRollover", }, { "instance": TimeMask(p=1.0), "num_runs": 5 }, { "instance": TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0), "num_runs": 5 }, { "instance": Trim(p=1.0), "num_runs": 1 }, ] for sound_file_path in sound_file_paths: samples, sample_rate = load_sound_file(sound_file_path, sample_rate=None,
def generate(self, wave_file, output_dir): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(wave_file) _filename = os.path.basename(wave_file).split('.')[0] # AddImpulseResponse if self.AddImpulseResponse[0]: augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir")) ]) output_file_path = os.path.join( output_dir, _filename + "_AddImpulseResponse{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask if self.FrequencyMask[0]: augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_FrequencyMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask if self.TimeMask[0]: augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeMask{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR if self.AddGaussianSNR[0]: augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianSNR{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise if self.AddGaussianNoise[0]: augmenter = Compose([ AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddGaussianNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch if self.TimeStretch[0]: augmenter = Compose( [TimeStretch(min_rate=0.5, max_rate=1.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_TimeStretch{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift if self.PitchShift[0]: augmenter = Compose( [PitchShift(min_semitones=-6, max_semitones=12, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_PitchShift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift if self.Shift[0]: augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Shift{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover if self.ShiftWithoutRoll[0]: augmenter = Compose([ Shift(min_fraction=-0.2, max_fraction=0.2, rollover=False, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ShiftWithoutRollover{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize if self.Normalize[0]: augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join( output_dir, _filename + "_Normalize{:03d}.wav".format(0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Resample if self.Resample[0]: augmenter = Compose([ Resample(min_sample_rate=12000, max_sample_rate=44100, p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_Resample{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion if self.ClippingDistortion[0]: augmenter = Compose( [ClippingDistortion(max_percentile_threshold=10, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_ClippingDistortion{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise if self.AddBackgroundNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddBackgroundNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddWhiteNoise if self.AddWhiteNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "white_noises"), p=1.0) ]) for i in range(self.AddWhiteNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddWhiteNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddPinkNoise if self.AddPinkNoise[0]: augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( DEMO_DIR, "pink_noises"), p=1.0) ]) for i in range(self.AddPinkNoise[1]): output_file_path = os.path.join( output_dir, _filename + "_AddPinkNoise{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises if self.AddShortNoises[0]: augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, _filename + "_AddShortNoises{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
output_dir, "AddImpulseResponse_{:03d}.wav".format(0) ) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "FrequencyMask_{:03d}.wav".format(i) ) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "TimeMask_{:03d}.wav".format(i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "AddGaussianSNR_{:03d}.wav".format(i) ) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise
def __init__(self, augment_type, p, cross_valid=False): self.cross_valid = cross_valid self.sample_rate = 8000 self.type = augment_type self.p = p wham_path = '../../../librimix/data/wham_noise/cv' if self.cross_valid else '../../../librimix/data/wham_noise/tr' if self.type == 'wham_weak': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=5, max_snr_in_db=15, p=1) ]) elif self.type == 'wham_strong': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=2, max_snr_in_db=7, p=1) ]) elif self.type == 'reverb_weak': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(0, 50), room_scale=random.randrange(0, 50), stereo_depth=random.randrange(0, 50), ) elif self.type == 'reverb_strong': self.augment = AudioEffectsChain().reverb( reverberance=random.randrange(50, 100), room_scale=random.randrange(50, 100), stereo_depth=random.randrange(50, 100), ) elif self.type == 'cascade': self.augment = Compose([ AddBackgroundNoise(sounds_path=wham_path, min_snr_in_db=0, max_snr_in_db=5, p=self.p), AddGaussianSNR(min_SNR=0.001, max_SNR=0.25, p=self.p), ClippingDistortion(min_percentile_threshold=0, max_percentile_threshold=40, p=self.p), FrequencyMask(min_frequency_band=0.0, max_frequency_band=0.5, p=self.p), PolarityInversion(p=self.p), Shift(min_fraction=-0.5, max_fraction=0.5, rollover=True, p=self.p), TimeMask(min_band_part=0.0, max_band_part=0.2, fade=False, p=self.p) ]) elif self.type == 'distort': self.augment = Compose([ PitchShift(min_semitones=-4, max_semitones=4, p=self.p), TimeStretch(min_rate=0.8, max_rate=1.25, leave_length_unchanged=True, p=self.p) ]) elif self.type == 'none': self.augment = None else: raise ValueError( "Did not recognize augmentation type. Received %s, expected 'wham_weak', 'wham_strong', 'reverb_weak', 'reverb_strong', 'cascade', 'distort', or 'none'." % self.type)
def transform(file_path, output_folder, iterations): """ For each transformation, apply it to an example sound and write the transformed sounds to an output folder. """ samples = load_wav_file(file_path) file_name = os.path.basename(file_path).replace('.wav', '') def produce(augmenter, name): for i in range(iterations): output_file_path = '{}/{}'.format( output_folder, "{}_{}_{}.wav".format(name, file_name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) produce(augmenter, 'TimeMask') # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) produce(augmenter, 'FrequencyMask') # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) produce(augmenter, 'AddGaussianSNR') # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) produce(augmenter, 'PitchShift') # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5)]) produce(augmenter, 'TimeStretch') # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) produce(augmenter, 'AddGaussianNoise') # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) produce(augmenter, 'Shift') # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) produce(augmenter, 'Shift without rollover') # Normalize augmenter = Compose([Normalize(p=1.0)]) produce(augmenter, 'Normalize') # AddImpulseResponse augmenter = Compose( [AddImpulseResponse(p=1.0, ir_path=os.path.join(DEMO_DIR, "ir"))]) produce(augmenter, 'AddImpulseResponse') # Resample augmenter = Compose([Resample(p=1.0)]) produce(augmenter, 'Resample') # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) produce(augmenter, 'ClippingDistortion') # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join(DEMO_DIR, "background_noises"), p=1.0) ]) produce(augmenter, 'AddBackgroundNoise') # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(DEMO_DIR, "short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) produce(augmenter, 'AddShortNoises')
def applyTransformations(fileName, output_dir, auxiliarSoundsDir): name = fileName.split(".")[0].split("/")[-1] samples = load_wav_file(fileName) # AddImpulseResponse augmenter = Compose([ AddImpulseResponse(p=1.0, ir_path=os.path.join(auxiliarSoundsDir, "helperSounds/ir")) ]) output_file_path = os.path.join( output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # FrequencyMask augmenter = Compose([FrequencyMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeMask augmenter = Compose([TimeMask(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeMask_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianSNR augmenter = Compose([AddGaussianSNR(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddGaussianNoise augmenter = Compose( [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # TimeStretch augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # PitchShift augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_itchShift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)]) for i in range(5): output_file_path = os.path.join(output_dir, "{}_Shift_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Shift without rollover augmenter = Compose( [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # Normalize augmenter = Compose([Normalize(p=1.0)]) output_file_path = os.path.join(output_dir, "{}_Normalize_{:03d}.wav".format(name, 0)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # ClippingDistortion augmenter = Compose([ClippingDistortion(p=1.0)]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddBackgroundNoise augmenter = Compose([ AddBackgroundNoise(sounds_path=os.path.join( auxiliarSoundsDir, "helperSounds/background_noises"), p=1.0) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples) # AddShortNoises augmenter = Compose([ AddShortNoises( sounds_path=os.path.join(auxiliarSoundsDir, "helperSounds/short_noises"), min_snr_in_db=0, max_snr_in_db=8, min_time_between_sounds=2.0, max_time_between_sounds=4.0, burst_probability=0.4, min_pause_factor_during_burst=0.01, max_pause_factor_during_burst=0.95, min_fade_in_time=0.005, max_fade_in_time=0.08, min_fade_out_time=0.01, max_fade_out_time=0.1, p=1.0, ) ]) for i in range(5): output_file_path = os.path.join( output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i)) augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE) wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)