def spectrogram_features(song, fs, block_size, hop_size, spectrogram_type, to_log=True):
    x, times = split_to_blocks(song, block_size, hop_size=hop_size)
    w = create_window(block_size)

    if spectrogram_type == 'stft':
        spectrogram_func = stft_spectrogram
    elif spectrogram_type == 'reassigned':
        spectrogram_func = reassigned_spectrogram
    elif spectrogram_type == 'chromagram':
        spectrogram_func = lambda x, w, to_log: chromagram(x, w, fs, to_log=to_log)

    X = spectrogram_func(x, w, to_log)
    return X
 def transform(self, X, **transform_params):
     """
     Transforms audio clip X into a normalized chromagram.
     Input: X - mono audio clip - numpy array of shape (samples,)
     Ooutput: X_chromagram - numpy array of shape (blocks, bins)
     """
     X_blocks, X_times = split_to_blocks(X, self.block_size, self.hop_size,
                                         self.sample_rate)
     X_chromagram = chromagram(X_blocks,
                               self.window,
                               self.sample_rate,
                               to_log=True,
                               bin_range=self.bin_range,
                               bin_division=self.bin_division)
     # map from raw dB [-120.0, bin_count] to [0.0, 1.0]
     bin_count = X_blocks.shape[1]
     X_chromagram = (X_chromagram + 120) / (120 + bin_count)
     return X_chromagram
Exemplo n.º 3
0
### Chord labels

# df_labels = pd.read_csv(labels_file, sep='\t')
# labels_pcs = df_labels[df_labels.columns[1:]].as_matrix()

block_size = 4096
hop_size = 2048

print('loading audio:', audio_file)
x, fs = load_wav(audio_file)
print('splitting audio to blocks')
x_blocks, times = split_to_blocks(x, block_size, hop_size)
w = create_window(block_size)
print('computing chromagram')
X_chromagram = chromagram(x_blocks, w, fs, to_log=True)
features = X_chromagram

## Data preprocessing

### Features

print('scaling the input features')
# scaler = MinMaxScaler()
# X = scaler.fit_transform(features).astype('float32')
X = (features.astype('float32') - 120) / (features.shape[1] - 120)

# reshape for 1D convolution
def conv_reshape(X):
    return X.reshape(X.shape[0], X.shape[1], 1)
def prepare_chomagram_and_labels(album, song_title, block_size, hop_size,
                                 bin_range, bin_division):

    song = 'The_Beatles/' + album + '/' + song_title
    data_dir = 'data/beatles'
    audio_file = data_dir + '/audio-cd/' + song + '.wav'
    chord_file = data_dir + '/chordlab/' + song + '.lab.pcs.tsv'
    audio_file, chord_file

    # ## Load audio
    print('loading audio:', audio_file)

    x, fs = load_wav(audio_file)

    print('sampling rate:', fs, 'Hz')
    print('number of samples:', len(x))
    print('duration in audio:', len(x) / fs, 'sec')

    # ## Load chords
    print('loading chords:', chord_file)
    chords = pd.read_csv(chord_file, sep='\t')
    print('shape:', chords.shape)
    print('duration in chords:', chords['end'].iloc[-1])

    pcs_cols = [
        'C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B'
    ]
    label_cols = ['label', 'root', 'bass'] + pcs_cols

    # ## Split audio to blocks

    x_blocks, x_times = split_to_blocks(x, block_size, hop_size, fs)
    print('blocks shape:', x_blocks.shape)
    print('number of blocks:', len(x_blocks))
    # start times for each block
    print('last block starts at:', x_times[-1], 'sec')

    # ## Mapping of chords to blocks

    def chords_to_blocks(chords, block_center_times):
        chord_ix = 0
        for t in block_center_times:
            yield chords.iloc[i][pcs_cols]

    def time_to_samples(time):
        return np.round(time * fs)

    chords['start_sample'] = time_to_samples(chords['start'])
    chords['end_sample'] = time_to_samples(chords['end'])
    df_blocks = pd.DataFrame(
        {'start': time_to_samples(x_times).astype(np.int64)})
    df_blocks['end'] = df_blocks['start'] + block_size

    label_dict = chords[label_cols].drop_duplicates().set_index('label')

    df_labels = chords[['start_sample', 'end_sample', 'label']].copy()
    df_labels.rename(columns={
        'start_sample': 'start',
        'end_sample': 'end'
    },
                     inplace=True)

    df_labelled_blocks = block_labels(df_blocks, df_labels)

    df_block_pcs = df_labelled_blocks[['label']].join(
        label_dict, on='label')[['label'] + pcs_cols]

    assert len(df_block_pcs) == len(df_blocks)

    block_labels_file = '{}/chord-pcs/{}_{}/{}.pcs'.format(
        data_dir, block_size, hop_size, song)
    print('block labels file:', block_labels_file)

    os.makedirs(os.path.dirname(block_labels_file), exist_ok=True)
    df_block_pcs.to_csv(block_labels_file, sep='\t', index=False)

    # ## Chromagram features

    w = create_window(block_size)
    X_chromagram = chromagram(x_blocks,
                              w,
                              fs,
                              to_log=True,
                              bin_range=bin_range,
                              bin_division=bin_division)

    chromagram_file = '{}/chromagram/block={}_hop={}_bins={},{}_div={}/{}.npz'.format(
        data_dir, block_size, hop_size, bin_range[0], bin_range[1],
        bin_division, song)

    print('chomagram file:', chromagram_file)

    os.makedirs(os.path.dirname(chromagram_file), exist_ok=True)
    np.savez_compressed(chromagram_file, X=X_chromagram, times=x_times)
def prepare_chomagram_and_labels(
    album,
    song_title,
    block_size,
    hop_size,
    bin_range,
    bin_division):

    song = 'The_Beatles/'+album+'/'+song_title
    data_dir = 'data/beatles'
    audio_file = data_dir + '/audio-cd/' + song + '.wav'
    chord_file = data_dir  + '/chordlab/' + song + '.lab.pcs.tsv'
    audio_file, chord_file

    # ## Load audio
    print('loading audio:', audio_file)

    x, fs = load_wav(audio_file)

    print('sampling rate:', fs, 'Hz')
    print('number of samples:', len(x))
    print('duration in audio:', len(x) / fs, 'sec')

    # ## Load chords
    print('loading chords:', chord_file)
    chords = pd.read_csv(chord_file, sep='\t')
    print('shape:', chords.shape)
    print('duration in chords:', chords['end'].iloc[-1])

    pcs_cols = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
    label_cols = ['label','root','bass'] + pcs_cols

    # ## Split audio to blocks

    x_blocks, x_times = split_to_blocks(x, block_size, hop_size, fs)
    print('blocks shape:', x_blocks.shape)
    print('number of blocks:', len(x_blocks))
    # start times for each block
    print('last block starts at:', x_times[-1], 'sec')

    # ## Mapping of chords to blocks

    def chords_to_blocks(chords, block_center_times):
        chord_ix = 0
        for t in block_center_times:
            yield chords.iloc[i][pcs_cols]

    def time_to_samples(time):
        return np.round(time * fs)

    chords['start_sample'] = time_to_samples(chords['start'])
    chords['end_sample'] = time_to_samples(chords['end'])
    df_blocks = pd.DataFrame({'start': time_to_samples(x_times).astype(np.int64)})
    df_blocks['end'] = df_blocks['start'] + block_size

    label_dict = chords[label_cols].drop_duplicates().set_index('label')

    df_labels = chords[['start_sample', 'end_sample', 'label']].copy()
    df_labels.rename(columns={'start_sample': 'start', 'end_sample': 'end'}, inplace=True)

    df_labelled_blocks = block_labels(df_blocks, df_labels)

    df_block_pcs = df_labelled_blocks[['label']].join(label_dict, on='label')[['label'] + pcs_cols]

    assert len(df_block_pcs) == len(df_blocks)

    block_labels_file = '{}/chord-pcs/{}_{}/{}.pcs'.format(data_dir, block_size, hop_size, song)
    print('block labels file:', block_labels_file)

    os.makedirs(os.path.dirname(block_labels_file), exist_ok=True)
    df_block_pcs.to_csv(block_labels_file, sep='\t', index=False)

    # ## Chromagram features

    w = create_window(block_size)
    X_chromagram = chromagram(x_blocks, w, fs, to_log=True, bin_range=bin_range, bin_division=bin_division)

    chromagram_file = '{}/chromagram/block={}_hop={}_bins={},{}_div={}/{}.npz'.format(
        data_dir, block_size, hop_size, bin_range[0], bin_range[1], bin_division, song)

    print('chomagram file:', chromagram_file)

    os.makedirs(os.path.dirname(chromagram_file), exist_ok=True)
    np.savez_compressed(chromagram_file, X=X_chromagram, times=x_times)