Exemplo n.º 1
0
def test_jam_pack():

    jam = jams.JAMS()

    sr = 22050
    y = np.zeros(sr)

    muda.jam_pack(jam, y=y, sr=sr)

    # Make sure the jam now has a mudabox
    assert hasattr(jam.sandbox, 'muda')
    assert hasattr(jam.sandbox.muda, 'history')
    assert hasattr(jam.sandbox.muda, 'state')

    assert jam.sandbox.muda['y'] is y
    assert jam.sandbox.muda['sr'] == sr
Exemplo n.º 2
0
def test_jam_pack():

    jam = jams.JAMS()

    sr = 22050
    y = np.zeros(sr)

    muda.jam_pack(jam, y=y, sr=sr)

    # Make sure the jam now has a mudabox
    assert hasattr(jam.sandbox, 'muda')
    assert hasattr(jam.sandbox.muda, 'history')
    assert hasattr(jam.sandbox.muda, 'state')

    assert jam.sandbox.muda['y'] is y
    assert jam.sandbox.muda['sr'] == sr
Exemplo n.º 3
0
def transformations(y, sample_rate, n_transforms):
    """Generate transformations for the given audio data.

    Args:
        y (np.ndarray): Input audio data.
        sample_rate (number): Sampling rate of audio.
        n_transforms (tuple): Number of transformations to apply.

    Yields:
        np.ndarray: The transformed audio data.
    """
    # Return empty iterator if number of transforms is zero
    if n_transforms == (0, 0):
        return iter(())

    drc_presets = ['radio', 'film standard']
    n_pitches, n_drc = n_transforms

    # Create deformer for applying transformations
    # It is assumed that n_pitches is non-zero at this point
    deformer = LinearPitchShift(n_samples=n_pitches, lower=-3.5, upper=3.5)
    if n_drc > 0:
        drc = DynamicRangeCompression(preset=drc_presets[:n_drc])
        deformer = Pipeline(steps=[('pitch_shift',
                                    Bypass(deformer)), ('drc', Bypass(drc))])

    # Create JAMS object for input audio and return iterable transforms
    jam = muda.jam_pack(jams.JAMS(), _audio=dict(y=y, sr=sample_rate))
    return map(lambda x: x.sandbox.muda._audio['y'], deformer.transform(jam))
Exemplo n.º 4
0
 def _make_noisy(self, audio):
     if np.random.uniform() > 0.5:
         # Create blank JAM variable
         jam_packed_audio = muda.jam_pack(self.JAM_OBJECT,
                                          _audio=dict(
                                              y=audio,
                                              sr=self.SAMPLING_RATE))
         output_jam = [
             x for x in self.noise_deformer.transform(jam_packed_audio)
         ]
         output = output_jam[0].sandbox.muda._audio['y']
         sr = output_jam[0].sandbox.muda._audio['sr']
         output = librosa.core.resample(output, sr, self.SAMPLING_RATE)
         return output
     else:
         return audio
    def do_augmentation(self, item_data):

        n_augmentations = self.augmentation_samples
        n_segments = item_data.shape[0]
        sgmt_duration = item_data.shape[1]
        n_channels = item_data.shape[2]
        augmented_data = numpy.zeros(
            (n_segments + n_augmentations, sgmt_duration, n_channels))

        # create a jam for each row and channel in item_data
        for s_idx, segment in enumerate(item_data):
            # jam = []
            # for channel in range(numpy.shape(segment)[-1]):
            for channel, sgmt_ch in enumerate(segment.T):

                duration_smp = len(sgmt_ch)
                duration_sec = duration_smp / self.desired_fs

                jam = jams.JAMS()
                jam.file_metadata.duration = duration_sec

                j_orig = muda.jam_pack(jam,
                                       _audio=dict(y=sgmt_ch,
                                                   sr=self.desired_fs))

                for aug_idx, j_new in enumerate(
                        self.augmentation_chain.transform(j_orig)):
                    # for k in j_new['sandbox']['muda']['history']: print(k['transformer']['__class__'])

                    # make sure output duration is the same as input
                    augmented_audio = j_new['sandbox']['muda']['_audio']['y']

                    len_diff = duration_smp - len(augmented_audio)
                    if len_diff > 0:
                        augmented_audio = numpy.concatenate(
                            (augmented_audio, numpy.zeros(len_diff)))
                    if len_diff < 0:
                        augmented_audio = augmented_audio[:len_diff]

                    augmented_data[s_idx + aug_idx, :,
                                   channel] = augmented_audio

        return augmented_data
Exemplo n.º 6
0
def jam_impulse():
    sr=22050
    
    impulse = np.zeros(round(1.5*sr))
    impulse[len(impulse)//2]= 1.0

    #make jam object for this audio - for testing purposes
    freq_dict = {
        50.0: [(0.0,0.6),(0.8,1.2)],
        100.0: [(0.0,0.6),(1.0,1.1)],
        400.0: [(0.0,0.9),(1.0,1.1)],
        800.0: [(0.5,0.9),(1.2,1.5)],
        1200.0: [(1.2,1.5)]
    }

    jam = make_jam(freq_dict,sr,1.5)
    
    if jam.file_metadata.duration is None:
        jam.file_metadata.duration = 1.5

    return muda.jam_pack(jam, _audio=dict(y=impulse, sr=sr))
    in_path = os.path.join(original_dataset_h5_dir, fname)
    #TODO do I need os.path.basename(in_path)
    out_fname = os.path.basename(in_path).replace('original',
                                                  aug_str + '-' + instance_str)
    out_path = os.path.join(aug_dataset_h5_dir, out_fname)
    if os.path.exists(out_path):
        continue
    with h5py.File(in_path, 'r') as f_in:
        with h5py.File(out_path, 'w') as f_out:
            f_out["sample_rate"] = localmodule.get_sample_rate()
            waveform_group = f_out.create_group("waveforms")
            for clip_name, data in f_in['waveforms'].items():
                jam_in = create_jams(clip_name, in_path, data.value.flatten(),
                                     f_in['sample_rate'].value)
                jam_in = muda.jam_pack(jam_in,
                                       _audio=dict(
                                           y=data.value.flatten(),
                                           sr=localmodule.get_sample_rate()))

                # Apply data augmentation.
                jam_tf = deformer.transform(jam_in)

                # Get jam from jam iterator. The iterator has only one element.
                jam_out = next(jam_tf)

                # Add audio to new h5 file
                waveform_group[clip_name] = jam_out.sandbox.muda._audio.pop(
                    'y')
                #waveform_group[clip_name] = jam_out.sandbox.muda.pop('_audio')
                # Add augmentation parameters to attrs
                for k, v in localmodule.flatten_dict(
                        jam_out.sandbox.muda).items():