def test_jam_pack(): jam = jams.JAMS() sr = 22050 y = np.zeros(sr) muda.jam_pack(jam, y=y, sr=sr) # Make sure the jam now has a mudabox assert hasattr(jam.sandbox, 'muda') assert hasattr(jam.sandbox.muda, 'history') assert hasattr(jam.sandbox.muda, 'state') assert jam.sandbox.muda['y'] is y assert jam.sandbox.muda['sr'] == sr
def transformations(y, sample_rate, n_transforms): """Generate transformations for the given audio data. Args: y (np.ndarray): Input audio data. sample_rate (number): Sampling rate of audio. n_transforms (tuple): Number of transformations to apply. Yields: np.ndarray: The transformed audio data. """ # Return empty iterator if number of transforms is zero if n_transforms == (0, 0): return iter(()) drc_presets = ['radio', 'film standard'] n_pitches, n_drc = n_transforms # Create deformer for applying transformations # It is assumed that n_pitches is non-zero at this point deformer = LinearPitchShift(n_samples=n_pitches, lower=-3.5, upper=3.5) if n_drc > 0: drc = DynamicRangeCompression(preset=drc_presets[:n_drc]) deformer = Pipeline(steps=[('pitch_shift', Bypass(deformer)), ('drc', Bypass(drc))]) # Create JAMS object for input audio and return iterable transforms jam = muda.jam_pack(jams.JAMS(), _audio=dict(y=y, sr=sample_rate)) return map(lambda x: x.sandbox.muda._audio['y'], deformer.transform(jam))
def _make_noisy(self, audio): if np.random.uniform() > 0.5: # Create blank JAM variable jam_packed_audio = muda.jam_pack(self.JAM_OBJECT, _audio=dict( y=audio, sr=self.SAMPLING_RATE)) output_jam = [ x for x in self.noise_deformer.transform(jam_packed_audio) ] output = output_jam[0].sandbox.muda._audio['y'] sr = output_jam[0].sandbox.muda._audio['sr'] output = librosa.core.resample(output, sr, self.SAMPLING_RATE) return output else: return audio
def do_augmentation(self, item_data): n_augmentations = self.augmentation_samples n_segments = item_data.shape[0] sgmt_duration = item_data.shape[1] n_channels = item_data.shape[2] augmented_data = numpy.zeros( (n_segments + n_augmentations, sgmt_duration, n_channels)) # create a jam for each row and channel in item_data for s_idx, segment in enumerate(item_data): # jam = [] # for channel in range(numpy.shape(segment)[-1]): for channel, sgmt_ch in enumerate(segment.T): duration_smp = len(sgmt_ch) duration_sec = duration_smp / self.desired_fs jam = jams.JAMS() jam.file_metadata.duration = duration_sec j_orig = muda.jam_pack(jam, _audio=dict(y=sgmt_ch, sr=self.desired_fs)) for aug_idx, j_new in enumerate( self.augmentation_chain.transform(j_orig)): # for k in j_new['sandbox']['muda']['history']: print(k['transformer']['__class__']) # make sure output duration is the same as input augmented_audio = j_new['sandbox']['muda']['_audio']['y'] len_diff = duration_smp - len(augmented_audio) if len_diff > 0: augmented_audio = numpy.concatenate( (augmented_audio, numpy.zeros(len_diff))) if len_diff < 0: augmented_audio = augmented_audio[:len_diff] augmented_data[s_idx + aug_idx, :, channel] = augmented_audio return augmented_data
def jam_impulse(): sr=22050 impulse = np.zeros(round(1.5*sr)) impulse[len(impulse)//2]= 1.0 #make jam object for this audio - for testing purposes freq_dict = { 50.0: [(0.0,0.6),(0.8,1.2)], 100.0: [(0.0,0.6),(1.0,1.1)], 400.0: [(0.0,0.9),(1.0,1.1)], 800.0: [(0.5,0.9),(1.2,1.5)], 1200.0: [(1.2,1.5)] } jam = make_jam(freq_dict,sr,1.5) if jam.file_metadata.duration is None: jam.file_metadata.duration = 1.5 return muda.jam_pack(jam, _audio=dict(y=impulse, sr=sr))
in_path = os.path.join(original_dataset_h5_dir, fname) #TODO do I need os.path.basename(in_path) out_fname = os.path.basename(in_path).replace('original', aug_str + '-' + instance_str) out_path = os.path.join(aug_dataset_h5_dir, out_fname) if os.path.exists(out_path): continue with h5py.File(in_path, 'r') as f_in: with h5py.File(out_path, 'w') as f_out: f_out["sample_rate"] = localmodule.get_sample_rate() waveform_group = f_out.create_group("waveforms") for clip_name, data in f_in['waveforms'].items(): jam_in = create_jams(clip_name, in_path, data.value.flatten(), f_in['sample_rate'].value) jam_in = muda.jam_pack(jam_in, _audio=dict( y=data.value.flatten(), sr=localmodule.get_sample_rate())) # Apply data augmentation. jam_tf = deformer.transform(jam_in) # Get jam from jam iterator. The iterator has only one element. jam_out = next(jam_tf) # Add audio to new h5 file waveform_group[clip_name] = jam_out.sandbox.muda._audio.pop( 'y') #waveform_group[clip_name] = jam_out.sandbox.muda.pop('_audio') # Add augmentation parameters to attrs for k, v in localmodule.flatten_dict( jam_out.sandbox.muda).items():