def main(): name = 't3_train' directory = "./bin/labelled" audio, fs = fileutils.load_audio(name, audio_dir=directory) audio_labels = fileutils.load_labels(name, label_dir=directory) # audio_hashes, audio_pairs, _ = shazam.fingerprint(audio) # _, matches, segments = thumbnail_shazam(audio, fs, audio_pairs) # ax = vis.plot_similarity_curve(matches, segments, labels=audio_labels) # ax.set_title('Similarity Curve for {}'.format(name)) thumb, similarity, segments, sim_matrix = thumbnail(audio, fs, seg_method='onset', length=2) ax = vis.plot_similarity_matrix(sim_matrix) ax.set_title('Regular Segmentation Similarity Matrix') ax = vis.plot_similarity_curve(similarity, segment_times=segments, labels=audio_labels) ax.set_title('Regular Segmentation Similarity') ax.legend() # ax = vis.plot_window_overlap(segments, np.ones(segments.shape) * 2, tick_step=3) # ax.set_title('Regular Segmentation Overlap') # ax.grid() vis.show() return
def main(): name = 't20' in_dir = './bin/' out_dir = './bin/results' audio, fs = fileutils.load_audio(name, audio_dir=in_dir) # audio_labels = fileutils.load_labels(name, label_dir=in_dir) # Should be sensitive to the length of the track, as well as k # Perhaps length should be extended as song goes longer than 30 seconds; # 3 second = 30 seconds, 18 seconds = 3 min # length = tune_length_with_audio(audio, fs) length = cfg.SEGMENT_LENGTH # explots.draw_results_reference(audio, fs, audio_labels, name=name, # show_plot=('motif',)) # segmentation_analysis(audio, fs, length, num_motifs=3, name=name, # show_plot=('arc',)) # k_means_analysis(audio, fs, length, name=name, k_clusters=(5, 25, 50), # show_plot=('motif',)) thresh = 11 audio_sample = audio results, G_set = threshold_analysis(audio_sample, fs, length, name=name, show_plot=('motif', 'matrix'), threshold=thresh) write_motifs(audio_sample, fs, name, out_dir, results[thresh]) visutils.show() return
def main(): name = 'genre_test_3' directory = "./bin/labelled" audio, fs = fileutils.load_audio(name, audio_dir=directory) pairs_hash, pairs, peaks = fingerprint(audio) sxx = stft(audio, n_fft=cfg.WINDOW_SIZE, win_length=cfg.WINDOW_SIZE, hop_length=int(cfg.WINDOW_SIZE * cfg.OVERLAP_RATIO), window='hann') sxx = np.abs(sxx) sxx = 10 * np.log10(sxx) sxx[sxx == -np.inf] = 0 # replace infs with zeros seg_hash_one, _, _ = fingerprint(audio[0 * fs:3 * fs]) seg_hash_two, _, _ = fingerprint(audio[0 * fs:3 * fs]) print(hash_search(seg_hash_one, seg_hash_two)) vis.plot_stft(sxx, fs=fs, frames=False) vis.plot_peaks(peaks) vis.plot_pairs(peaks, pairs) vis.plot_stft_with_pairs(sxx, peaks, pairs) vis.show() return None
def setUp(self): # Read in the audio name = 't1' directory = "../bin/" audio, fs = fileutils.load_audio(name, audio_dir=directory) # Identify beats in the audio tempo, beats = lb.beat.beat_track(y=audio, sr=fs, units='time') beats = np.concatenate([[0], beats, [audio.shape[0]]]) # Construct graph from beats G = nx.DiGraph(tempo=tempo) for i in range(0, beats.shape[0] - 1): G.add_node(i, start=beats[i], end=beats[i + 1], length=(beats[i + 1] - beats[i])) # Create randomly-weighted edges for segments with exactly the same length for i in range(0, G.number_of_nodes()): for j in range(0, G.number_of_nodes()): if i <= j: # No self loops continue if G.node[i]['length'] == G.node[j]['length']: e_weight = np.random.randn() G.add_edge(i, j, value=e_weight) self.G = G
def main(): name = 't1' directory = "./bin/" audio, fs = fileutils.load_audio(name, audio_dir=directory) length = 2 overlap = 0.5 print(segment_onset(audio, fs, length=length, overlap=overlap)) print(segment_onset(audio, fs, length=length, overlap=overlap, method='beat'))
def main(): # Run example with regular segmentation name = 't1' directory = "./bin/test" audio, fs = fileutils.load_audio(name, audio_dir=directory) audio_labels = fileutils.load_labels(name, label_dir=directory) thumb, similarity, segments, sim_matrix = thumbnail(audio, fs, seg_method='regular', length=2) ax = vis.plot_similarity_matrix(sim_matrix) ax.set_title('Regular Segmentation Similarity Matrix') ax = vis.plot_similarity_curve(similarity, segment_times=segments, labels=audio_labels) ax.set_title('Regular Segmentation Similarity') ax.legend() vis.show()
def main(): name = 'Repeat' in_dir = './bin/test' audio, fs = fileutils.load_audio(name, audio_dir=in_dir) length = cfg.SEGMENT_LENGTH # # fig = draw_segmentation_evolution(audio, fs) # vis.save_fig(fig, './bin/graphs/', 'SEG_{audio_name}'.format(audio_name=name)) # # thresh = 0.95 thresh = 0.985 starts, ends, labels, G = analyzer.analyze(audio, fs, seg_length=length, threshold=thresh, seg_method='beat') # # fig = vis.get_fig() # fig.suptitle('Arc Graph Clustering') # ax = draw_super_axis(fig) # ax = draw_simple_arc(G, with_labels=True, with_color=True, ax=ax) # vis.save_fig(fig, './bin/graphs/', 'ARC_{audio_name}_clustered'.format(audio_name=name)) fig = draw_matrix_arc_chord_demo(G, name, with_chord=False) vis.save_fig(fig, './bin/graphs/', 'SSM2ARC_{audio_name}'.format(audio_name=name)) # # name = 't3' # in_dir = './bin/test' # audio, fs = fileutils.load_audio(name, audio_dir=in_dir) # # fig = draw_matrix_evolution(audio, fs, length, name) # vis.save_fig(fig, './bin/graphs/', 'SSM_{audio_name}'.format(audio_name=name)) # results = {} # methods = ('With Clustering', 'With Join') # # name = 'Avril' # in_dir = "./bin/test" # audio, fs = fileutils.load_audio(name, audio_dir=in_dir) # length = cfg.SEGMENT_LENGTH # # audio_labels = fileutils.load_labels(name, label_dir=in_dir) # ref_starts, ref_ends, ref_labels = motif.df_to_motif(audio_labels) # fig = vis.get_fig() # ax = fig.add_subplot(1, 1, 1) # ax = vis.plot_motif_segmentation(audio, fs, ref_starts, ref_ends, ref_labels, ax=ax) # fig.suptitle('Hand-Labelled Description of {}'.format(name)) # vis.save_fig(fig, './bin/graphs/', 'IDEAL_{audio_name}'.format(audio_name=name)) # starts, ends, labels, G = analyzer.analyze(audio, fs, seg_length=length, with_join=False) # this_result = motif.pack_motif(starts, ends, labels) # results['With Merging'] = this_result # # starts, ends, labels, G = analyzer.analyze(audio, fs, seg_length=length, with_join=True) # this_result = motif.pack_motif(starts, ends, labels) # results['With Join'] = this_result # # fig = draw_motif_group(audio, fs, results, methods=methods, title='Joining Motif Segments', subplots=(2, 1)) # vis.save_fig(fig, './bin/graphs/', 'MOTIF_{audio_name}'.format(audio_name=name)) # name = 't1' # in_dir = "./bin/" # audio, fs = fileutils.load_audio(name, audio_dir=in_dir) # pairs_hash, pairs, peaks = shazam.fingerprint(audio) # # sxx = shazam.stft(audio, # n_fft=cfg.WINDOW_SIZE, # win_length=cfg.WINDOW_SIZE, # hop_length=int(cfg.WINDOW_SIZE * cfg.OVERLAP_RATIO), # window='hann') # sxx = np.abs(sxx) # # fig = vis.get_fig() # ax = fig.add_subplot(1, 1, 1) # ax = vis.plot_stft_with_pairs(sxx, peaks, pairs, ax=ax) # fig.suptitle('Spectrogram With Peaks and Pairs') # # vis.save_fig(fig, './bin/graphs/', 'SHAZAM_{audio_name}'.format(audio_name=name)) vis.show() return
def _experiment(exp_name, audio_name, in_dir, out_dir, methods, write_motifs=False, show_plot=()): audio, fs = fileutils.load_audio(audio_name, audio_dir=in_dir) audio_labels = fileutils.load_labels(audio_name, label_dir=in_dir) ref_starts, ref_ends, ref_labels = motif.df_to_motif(audio_labels) ref_motifs = motif.pack_motif(ref_starts, ref_ends, ref_labels) length = cfg.SEGMENT_LENGTH if exp_name == 'Segmentation': results, _ = exputils.segmentation_analysis(audio, fs, length, audio_name, methods=methods, k=cfg.N_ClUSTERS, show_plot=show_plot) elif exp_name == 'Similarity': results, _ = exputils.similarity_analysis(audio, fs, length, audio_name, methods=methods, k=cfg.N_ClUSTERS, show_plot=show_plot) elif exp_name == 'K-Means': results, _ = exputils.k_means_analysis(audio, fs, length, audio_name, k_clusters=methods, show_plot=show_plot) elif exp_name == 'Clustering': results, _ = exputils.clustering_analysis(audio, fs, length, audio_name, methods=methods, k=cfg.N_ClUSTERS, show_plot=show_plot) else: print("Unrecognized experiment name: {exp_name}".format( exp_name=exp_name)) return metric_dict = results_to_metrics(results, methods, ref_motifs) # Output Plots if exp_name == 'K-Means': lp = 'k=' else: lp = '' # Plot the recall, precision, f-measure, boundary measure, and edit distance as bar plots. if 'bar' in show_plot: fig = vis.get_fig() ax = fig.add_subplot(1, 1, 1) ax = explots.draw_results_rpf(methods, metric_dict, label_prefix=lp, ax=ax) fig.suptitle('{exp_name} Performance for {audio_name}'.format( exp_name=exp_name, audio_name=audio_name)) vis.save_fig(fig, './bin/graphs/', 'RPF_{}_{}'.format(audio_name, exp_name)) fig = vis.get_fig() explots.draw_results_bed(methods, metric_dict, audio_name, exp_name, fig=fig) fig.suptitle("{exp_name} Accuracy on {audio_name}".format( exp_name=exp_name, audio_name=audio_name), fontsize=24) if exp_name == 'K-Means': ax = fig.get_axes()[0] ax.set_xlabel('Number of clusters') ax = fig.get_axes()[1] ax.set_xlabel('Number of clusters') vis.save_fig(fig, './bin/graphs/', 'BED_{}_{}'.format(audio_name, exp_name)) # Plot the motif segmentations as subplots in a larger figure if 'group' in show_plot: label_key = 'Ideal' methods_grp = (label_key, ) + methods results[label_key] = ref_motifs fig = visualizations.draw_motif_group(audio, fs, results, methods_grp, title='', subplots=(2, 2), label_prefix=lp) fig.suptitle('{exp_name} Motifs on {audio_name}'.format( exp_name=exp_name, audio_name=audio_name)) vis.save_fig(fig, './bin/graphs/', 'GRP_{}_{}'.format(audio_name, exp_name)) if exp_name == 'K-Means': ax = fig.get_axes()[1] ax.set_title(label_key, fontsize=18) if write_motifs: exputils.write_results(audio, fs, audio_name, out_dir, methods, results) return metric_dict