def spectrogram(filename, block_size=2048, hop_size=512, to_log=True): song, fs = load_wav(filename) x, times = split_to_blocks(song, block_size, hop_size=hop_size) w = create_window(block_size) X = magnitude_spectrum(x * w) ** 2 if to_log: # dbFB X = 20 * np.log10(np.maximum(1e-6, X)) # imshow(X.T, interpolation='nearest', cmap='gray') image_filename = os.path.basename(filename).replace('.wav', '.png') scipy.misc.imsave('spectrogram/' + image_filename, X.T[::-1]) return X, x, times
def spectrogram_features(song, fs, block_size, hop_size, spectrogram_type, to_log=True): x, times = split_to_blocks(song, block_size, hop_size=hop_size) w = create_window(block_size) if spectrogram_type == 'stft': spectrogram_func = stft_spectrogram elif spectrogram_type == 'reassigned': spectrogram_func = reassigned_spectrogram elif spectrogram_type == 'chromagram': spectrogram_func = lambda x, w, to_log: chromagram(x, w, fs, to_log=to_log) X = spectrogram_func(x, w, to_log) return X
def transform(self, X, **transform_params): """ Transforms audio clip X into a normalized chromagram. Input: X - mono audio clip - numpy array of shape (samples,) Ooutput: X_chromagram - numpy array of shape (blocks, bins) """ X_blocks, X_times = split_to_blocks(X, self.block_size, self.hop_size, self.sample_rate) X_chromagram = chromagram(X_blocks, self.window, self.sample_rate, to_log=True, bin_range=self.bin_range, bin_division=self.bin_division) # map from raw dB [-120.0, bin_count] to [0.0, 1.0] bin_count = X_blocks.shape[1] X_chromagram = (X_chromagram + 120) / (120 + bin_count) return X_chromagram
# features = data['X'] # times = data['times'] ### Chord labels # df_labels = pd.read_csv(labels_file, sep='\t') # labels_pcs = df_labels[df_labels.columns[1:]].as_matrix() block_size = 4096 hop_size = 2048 print('loading audio:', audio_file) x, fs = load_wav(audio_file) print('splitting audio to blocks') x_blocks, times = split_to_blocks(x, block_size, hop_size) w = create_window(block_size) print('computing chromagram') X_chromagram = chromagram(x_blocks, w, fs, to_log=True) features = X_chromagram ## Data preprocessing ### Features print('scaling the input features') # scaler = MinMaxScaler() # X = scaler.fit_transform(features).astype('float32') X = (features.astype('float32') - 120) / (features.shape[1] - 120) # reshape for 1D convolution
from scipy.signal import chirp from analysis import split_to_blocks from synthesis import generate_and_save from reassignment import compute_spectra block_size = 2048 fs = 44100 f1, f2 = 440, 880 duration = block_size / fs times, x = generate_and_save(lambda t: chirp(t, f1, duration, f2), duration=duration, fade_ends=False) x_blocks = split_to_blocks(x, block_size, block_size) X, X_cross_time, X_cross_freq, X_inst_freqs, X_group_delays = compute_spectra( x_blocks[0], w) idx = (X_inst_freqs >= f1 / fs) & (X_inst_freqs <= f2 / fs) plt.scatter(X_group_delays[idx], X_inst_freqs[idx], alpha=0.5, s=abs(X)[idx], c=abs(X)[idx])
def prepare_chomagram_and_labels(album, song_title, block_size, hop_size, bin_range, bin_division): song = 'The_Beatles/' + album + '/' + song_title data_dir = 'data/beatles' audio_file = data_dir + '/audio-cd/' + song + '.wav' chord_file = data_dir + '/chordlab/' + song + '.lab.pcs.tsv' audio_file, chord_file # ## Load audio print('loading audio:', audio_file) x, fs = load_wav(audio_file) print('sampling rate:', fs, 'Hz') print('number of samples:', len(x)) print('duration in audio:', len(x) / fs, 'sec') # ## Load chords print('loading chords:', chord_file) chords = pd.read_csv(chord_file, sep='\t') print('shape:', chords.shape) print('duration in chords:', chords['end'].iloc[-1]) pcs_cols = [ 'C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B' ] label_cols = ['label', 'root', 'bass'] + pcs_cols # ## Split audio to blocks x_blocks, x_times = split_to_blocks(x, block_size, hop_size, fs) print('blocks shape:', x_blocks.shape) print('number of blocks:', len(x_blocks)) # start times for each block print('last block starts at:', x_times[-1], 'sec') # ## Mapping of chords to blocks def chords_to_blocks(chords, block_center_times): chord_ix = 0 for t in block_center_times: yield chords.iloc[i][pcs_cols] def time_to_samples(time): return np.round(time * fs) chords['start_sample'] = time_to_samples(chords['start']) chords['end_sample'] = time_to_samples(chords['end']) df_blocks = pd.DataFrame( {'start': time_to_samples(x_times).astype(np.int64)}) df_blocks['end'] = df_blocks['start'] + block_size label_dict = chords[label_cols].drop_duplicates().set_index('label') df_labels = chords[['start_sample', 'end_sample', 'label']].copy() df_labels.rename(columns={ 'start_sample': 'start', 'end_sample': 'end' }, inplace=True) df_labelled_blocks = block_labels(df_blocks, df_labels) df_block_pcs = df_labelled_blocks[['label']].join( label_dict, on='label')[['label'] + pcs_cols] assert len(df_block_pcs) == len(df_blocks) block_labels_file = '{}/chord-pcs/{}_{}/{}.pcs'.format( data_dir, block_size, hop_size, song) print('block labels file:', block_labels_file) os.makedirs(os.path.dirname(block_labels_file), exist_ok=True) df_block_pcs.to_csv(block_labels_file, sep='\t', index=False) # ## Chromagram features w = create_window(block_size) X_chromagram = chromagram(x_blocks, w, fs, to_log=True, bin_range=bin_range, bin_division=bin_division) chromagram_file = '{}/chromagram/block={}_hop={}_bins={},{}_div={}/{}.npz'.format( data_dir, block_size, hop_size, bin_range[0], bin_range[1], bin_division, song) print('chomagram file:', chromagram_file) os.makedirs(os.path.dirname(chromagram_file), exist_ok=True) np.savez_compressed(chromagram_file, X=X_chromagram, times=x_times)
def analyze_mean_energy(file, block_size=1024): x, fs = load_wav(file) blocks, t = split_to_blocks(x, block_size) y = mean_energy(blocks) plt.semilogy(t, y) plt.ylim(0, 1)
from scipy.signal import chirp from analysis import split_to_blocks from synthesis import generate_and_save from reassignment import compute_spectra block_size = 2048 fs = 44100 f1, f2 = 440, 880 duration = block_size / fs times, x = generate_and_save(lambda t: chirp(t, f1, duration, f2), duration=duration, fade_ends=False) x_blocks = split_to_blocks(x, block_size, block_size) X, X_cross_time, X_cross_freq, X_inst_freqs, X_group_delays = compute_spectra(x_blocks[0], w) idx = (X_inst_freqs >= f1 / fs) & (X_inst_freqs <= f2 / fs) plt.scatter(X_group_delays[idx], X_inst_freqs[idx], alpha=0.5, s=abs(X)[idx], c=abs(X)[idx])
def prepare_chomagram_and_labels( album, song_title, block_size, hop_size, bin_range, bin_division): song = 'The_Beatles/'+album+'/'+song_title data_dir = 'data/beatles' audio_file = data_dir + '/audio-cd/' + song + '.wav' chord_file = data_dir + '/chordlab/' + song + '.lab.pcs.tsv' audio_file, chord_file # ## Load audio print('loading audio:', audio_file) x, fs = load_wav(audio_file) print('sampling rate:', fs, 'Hz') print('number of samples:', len(x)) print('duration in audio:', len(x) / fs, 'sec') # ## Load chords print('loading chords:', chord_file) chords = pd.read_csv(chord_file, sep='\t') print('shape:', chords.shape) print('duration in chords:', chords['end'].iloc[-1]) pcs_cols = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B'] label_cols = ['label','root','bass'] + pcs_cols # ## Split audio to blocks x_blocks, x_times = split_to_blocks(x, block_size, hop_size, fs) print('blocks shape:', x_blocks.shape) print('number of blocks:', len(x_blocks)) # start times for each block print('last block starts at:', x_times[-1], 'sec') # ## Mapping of chords to blocks def chords_to_blocks(chords, block_center_times): chord_ix = 0 for t in block_center_times: yield chords.iloc[i][pcs_cols] def time_to_samples(time): return np.round(time * fs) chords['start_sample'] = time_to_samples(chords['start']) chords['end_sample'] = time_to_samples(chords['end']) df_blocks = pd.DataFrame({'start': time_to_samples(x_times).astype(np.int64)}) df_blocks['end'] = df_blocks['start'] + block_size label_dict = chords[label_cols].drop_duplicates().set_index('label') df_labels = chords[['start_sample', 'end_sample', 'label']].copy() df_labels.rename(columns={'start_sample': 'start', 'end_sample': 'end'}, inplace=True) df_labelled_blocks = block_labels(df_blocks, df_labels) df_block_pcs = df_labelled_blocks[['label']].join(label_dict, on='label')[['label'] + pcs_cols] assert len(df_block_pcs) == len(df_blocks) block_labels_file = '{}/chord-pcs/{}_{}/{}.pcs'.format(data_dir, block_size, hop_size, song) print('block labels file:', block_labels_file) os.makedirs(os.path.dirname(block_labels_file), exist_ok=True) df_block_pcs.to_csv(block_labels_file, sep='\t', index=False) # ## Chromagram features w = create_window(block_size) X_chromagram = chromagram(x_blocks, w, fs, to_log=True, bin_range=bin_range, bin_division=bin_division) chromagram_file = '{}/chromagram/block={}_hop={}_bins={},{}_div={}/{}.npz'.format( data_dir, block_size, hop_size, bin_range[0], bin_range[1], bin_division, song) print('chomagram file:', chromagram_file) os.makedirs(os.path.dirname(chromagram_file), exist_ok=True) np.savez_compressed(chromagram_file, X=X_chromagram, times=x_times)
def open_file(filename, block_size, hop_size): song, fs = load_wav(filename) x, times = split_to_blocks(song, block_size, hop_size=hop_size) return x, times, fs