示例#1
0
def main():
    name = 't3_train'
    directory = "./bin/labelled"
    audio, fs = fileutils.load_audio(name, audio_dir=directory)
    audio_labels = fileutils.load_labels(name, label_dir=directory)

    # audio_hashes, audio_pairs, _ = shazam.fingerprint(audio)
    # _, matches, segments = thumbnail_shazam(audio, fs, audio_pairs)
    # ax = vis.plot_similarity_curve(matches, segments, labels=audio_labels)
    # ax.set_title('Similarity Curve for {}'.format(name))

    thumb, similarity, segments, sim_matrix = thumbnail(audio,
                                                        fs,
                                                        seg_method='onset',
                                                        length=2)

    ax = vis.plot_similarity_matrix(sim_matrix)
    ax.set_title('Regular Segmentation Similarity Matrix')
    ax = vis.plot_similarity_curve(similarity,
                                   segment_times=segments,
                                   labels=audio_labels)
    ax.set_title('Regular Segmentation Similarity')
    ax.legend()
    # ax = vis.plot_window_overlap(segments, np.ones(segments.shape) * 2, tick_step=3)
    # ax.set_title('Regular Segmentation Overlap')
    # ax.grid()
    vis.show()

    return
示例#2
0
def main():
    name = 't20'
    in_dir = './bin/'
    out_dir = './bin/results'
    audio, fs = fileutils.load_audio(name, audio_dir=in_dir)
    # audio_labels = fileutils.load_labels(name, label_dir=in_dir)
    # Should be sensitive to the length of the track, as well as k
    # Perhaps length should be extended as song goes longer than 30 seconds;
    # 3 second = 30 seconds, 18 seconds = 3 min
    # length = tune_length_with_audio(audio, fs)
    length = cfg.SEGMENT_LENGTH

    # explots.draw_results_reference(audio, fs, audio_labels, name=name,
    #                show_plot=('motif',))
    # segmentation_analysis(audio, fs, length, num_motifs=3, name=name,
    #                         show_plot=('arc',))
    # k_means_analysis(audio, fs, length, name=name, k_clusters=(5, 25, 50),
    #                  show_plot=('motif',))
    thresh = 11
    audio_sample = audio
    results, G_set = threshold_analysis(audio_sample,
                                        fs,
                                        length,
                                        name=name,
                                        show_plot=('motif', 'matrix'),
                                        threshold=thresh)
    write_motifs(audio_sample, fs, name, out_dir, results[thresh])

    visutils.show()
    return
示例#3
0
def main():
    name = 'genre_test_3'
    directory = "./bin/labelled"
    audio, fs = fileutils.load_audio(name, audio_dir=directory)
    pairs_hash, pairs, peaks = fingerprint(audio)

    sxx = stft(audio,
               n_fft=cfg.WINDOW_SIZE,
               win_length=cfg.WINDOW_SIZE,
               hop_length=int(cfg.WINDOW_SIZE * cfg.OVERLAP_RATIO),
               window='hann')
    sxx = np.abs(sxx)
    sxx = 10 * np.log10(sxx)
    sxx[sxx == -np.inf] = 0  # replace infs with zeros

    seg_hash_one, _, _ = fingerprint(audio[0 * fs:3 * fs])
    seg_hash_two, _, _ = fingerprint(audio[0 * fs:3 * fs])
    print(hash_search(seg_hash_one, seg_hash_two))

    vis.plot_stft(sxx, fs=fs, frames=False)
    vis.plot_peaks(peaks)
    vis.plot_pairs(peaks, pairs)
    vis.plot_stft_with_pairs(sxx, peaks, pairs)
    vis.show()
    return None
示例#4
0
    def setUp(self):
        # Read in the audio
        name = 't1'
        directory = "../bin/"
        audio, fs = fileutils.load_audio(name, audio_dir=directory)

        # Identify beats in the audio
        tempo, beats = lb.beat.beat_track(y=audio, sr=fs, units='time')
        beats = np.concatenate([[0], beats, [audio.shape[0]]])

        # Construct graph from beats
        G = nx.DiGraph(tempo=tempo)
        for i in range(0, beats.shape[0] - 1):
            G.add_node(i,
                       start=beats[i],
                       end=beats[i + 1],
                       length=(beats[i + 1] - beats[i]))

        # Create randomly-weighted edges for segments with exactly the same length
        for i in range(0, G.number_of_nodes()):
            for j in range(0, G.number_of_nodes()):
                if i <= j:  # No self loops
                    continue
                if G.node[i]['length'] == G.node[j]['length']:
                    e_weight = np.random.randn()
                    G.add_edge(i, j, value=e_weight)
        self.G = G
示例#5
0
def main():
    name = 't1'
    directory = "./bin/"
    audio, fs = fileutils.load_audio(name, audio_dir=directory)
    length = 2
    overlap = 0.5
    print(segment_onset(audio, fs, length=length, overlap=overlap))
    print(segment_onset(audio, fs, length=length, overlap=overlap, method='beat'))
示例#6
0
def main():
    # Run example with regular segmentation
    name = 't1'
    directory = "./bin/test"
    audio, fs = fileutils.load_audio(name, audio_dir=directory)
    audio_labels = fileutils.load_labels(name, label_dir=directory)

    thumb, similarity, segments, sim_matrix = thumbnail(audio, fs,
                                                        seg_method='regular',
                                                        length=2)

    ax = vis.plot_similarity_matrix(sim_matrix)
    ax.set_title('Regular Segmentation Similarity Matrix')
    ax = vis.plot_similarity_curve(similarity, segment_times=segments, labels=audio_labels)
    ax.set_title('Regular Segmentation Similarity')
    ax.legend()
    vis.show()
示例#7
0
def main():
    name = 'Repeat'
    in_dir = './bin/test'
    audio, fs = fileutils.load_audio(name, audio_dir=in_dir)
    length = cfg.SEGMENT_LENGTH
    #
    # fig = draw_segmentation_evolution(audio, fs)
    # vis.save_fig(fig, './bin/graphs/', 'SEG_{audio_name}'.format(audio_name=name))
    #
    # thresh = 0.95
    thresh = 0.985
    starts, ends, labels, G = analyzer.analyze(audio,
                                               fs,
                                               seg_length=length,
                                               threshold=thresh,
                                               seg_method='beat')
    #
    # fig = vis.get_fig()
    # fig.suptitle('Arc Graph Clustering')
    # ax = draw_super_axis(fig)
    # ax = draw_simple_arc(G, with_labels=True, with_color=True, ax=ax)
    # vis.save_fig(fig, './bin/graphs/', 'ARC_{audio_name}_clustered'.format(audio_name=name))

    fig = draw_matrix_arc_chord_demo(G, name, with_chord=False)
    vis.save_fig(fig, './bin/graphs/',
                 'SSM2ARC_{audio_name}'.format(audio_name=name))
    #
    # name = 't3'
    # in_dir = './bin/test'
    # audio, fs = fileutils.load_audio(name, audio_dir=in_dir)
    #
    # fig = draw_matrix_evolution(audio, fs, length, name)
    # vis.save_fig(fig, './bin/graphs/', 'SSM_{audio_name}'.format(audio_name=name))

    # results = {}
    # methods = ('With Clustering', 'With Join')
    #
    # name = 'Avril'
    # in_dir = "./bin/test"
    # audio, fs = fileutils.load_audio(name, audio_dir=in_dir)
    # length = cfg.SEGMENT_LENGTH
    #
    # audio_labels = fileutils.load_labels(name, label_dir=in_dir)
    # ref_starts, ref_ends, ref_labels = motif.df_to_motif(audio_labels)
    # fig = vis.get_fig()
    # ax = fig.add_subplot(1, 1, 1)
    # ax = vis.plot_motif_segmentation(audio, fs, ref_starts, ref_ends, ref_labels, ax=ax)
    # fig.suptitle('Hand-Labelled Description of {}'.format(name))
    # vis.save_fig(fig, './bin/graphs/', 'IDEAL_{audio_name}'.format(audio_name=name))

    # starts, ends, labels, G = analyzer.analyze(audio, fs, seg_length=length, with_join=False)
    # this_result = motif.pack_motif(starts, ends, labels)
    # results['With Merging'] = this_result
    #
    # starts, ends, labels, G = analyzer.analyze(audio, fs, seg_length=length, with_join=True)
    # this_result = motif.pack_motif(starts, ends, labels)
    # results['With Join'] = this_result
    #
    # fig = draw_motif_group(audio, fs, results, methods=methods, title='Joining Motif Segments', subplots=(2, 1))
    # vis.save_fig(fig, './bin/graphs/', 'MOTIF_{audio_name}'.format(audio_name=name))

    # name = 't1'
    # in_dir = "./bin/"
    # audio, fs = fileutils.load_audio(name, audio_dir=in_dir)
    # pairs_hash, pairs, peaks = shazam.fingerprint(audio)
    #
    # sxx = shazam.stft(audio,
    #                   n_fft=cfg.WINDOW_SIZE,
    #                   win_length=cfg.WINDOW_SIZE,
    #                   hop_length=int(cfg.WINDOW_SIZE * cfg.OVERLAP_RATIO),
    #                   window='hann')
    # sxx = np.abs(sxx)
    #
    # fig = vis.get_fig()
    # ax = fig.add_subplot(1, 1, 1)
    # ax = vis.plot_stft_with_pairs(sxx, peaks, pairs, ax=ax)
    # fig.suptitle('Spectrogram With Peaks and Pairs')
    #
    # vis.save_fig(fig, './bin/graphs/', 'SHAZAM_{audio_name}'.format(audio_name=name))

    vis.show()

    return
示例#8
0
def _experiment(exp_name,
                audio_name,
                in_dir,
                out_dir,
                methods,
                write_motifs=False,
                show_plot=()):
    audio, fs = fileutils.load_audio(audio_name, audio_dir=in_dir)

    audio_labels = fileutils.load_labels(audio_name, label_dir=in_dir)
    ref_starts, ref_ends, ref_labels = motif.df_to_motif(audio_labels)
    ref_motifs = motif.pack_motif(ref_starts, ref_ends, ref_labels)

    length = cfg.SEGMENT_LENGTH

    if exp_name == 'Segmentation':
        results, _ = exputils.segmentation_analysis(audio,
                                                    fs,
                                                    length,
                                                    audio_name,
                                                    methods=methods,
                                                    k=cfg.N_ClUSTERS,
                                                    show_plot=show_plot)
    elif exp_name == 'Similarity':
        results, _ = exputils.similarity_analysis(audio,
                                                  fs,
                                                  length,
                                                  audio_name,
                                                  methods=methods,
                                                  k=cfg.N_ClUSTERS,
                                                  show_plot=show_plot)
    elif exp_name == 'K-Means':
        results, _ = exputils.k_means_analysis(audio,
                                               fs,
                                               length,
                                               audio_name,
                                               k_clusters=methods,
                                               show_plot=show_plot)

    elif exp_name == 'Clustering':
        results, _ = exputils.clustering_analysis(audio,
                                                  fs,
                                                  length,
                                                  audio_name,
                                                  methods=methods,
                                                  k=cfg.N_ClUSTERS,
                                                  show_plot=show_plot)
    else:
        print("Unrecognized experiment name: {exp_name}".format(
            exp_name=exp_name))
        return

    metric_dict = results_to_metrics(results, methods, ref_motifs)

    # Output Plots
    if exp_name == 'K-Means':
        lp = 'k='
    else:
        lp = ''

    # Plot the recall, precision, f-measure, boundary measure, and edit distance as bar plots.
    if 'bar' in show_plot:
        fig = vis.get_fig()
        ax = fig.add_subplot(1, 1, 1)
        ax = explots.draw_results_rpf(methods,
                                      metric_dict,
                                      label_prefix=lp,
                                      ax=ax)
        fig.suptitle('{exp_name} Performance for {audio_name}'.format(
            exp_name=exp_name, audio_name=audio_name))
        vis.save_fig(fig, './bin/graphs/',
                     'RPF_{}_{}'.format(audio_name, exp_name))

        fig = vis.get_fig()
        explots.draw_results_bed(methods,
                                 metric_dict,
                                 audio_name,
                                 exp_name,
                                 fig=fig)
        fig.suptitle("{exp_name} Accuracy on {audio_name}".format(
            exp_name=exp_name, audio_name=audio_name),
                     fontsize=24)
        if exp_name == 'K-Means':
            ax = fig.get_axes()[0]
            ax.set_xlabel('Number of clusters')
            ax = fig.get_axes()[1]
            ax.set_xlabel('Number of clusters')
        vis.save_fig(fig, './bin/graphs/',
                     'BED_{}_{}'.format(audio_name, exp_name))

    # Plot the motif segmentations as subplots in a larger figure
    if 'group' in show_plot:
        label_key = 'Ideal'
        methods_grp = (label_key, ) + methods
        results[label_key] = ref_motifs
        fig = visualizations.draw_motif_group(audio,
                                              fs,
                                              results,
                                              methods_grp,
                                              title='',
                                              subplots=(2, 2),
                                              label_prefix=lp)
        fig.suptitle('{exp_name} Motifs on {audio_name}'.format(
            exp_name=exp_name, audio_name=audio_name))
        vis.save_fig(fig, './bin/graphs/',
                     'GRP_{}_{}'.format(audio_name, exp_name))

        if exp_name == 'K-Means':
            ax = fig.get_axes()[1]
            ax.set_title(label_key, fontsize=18)

    if write_motifs:
        exputils.write_results(audio, fs, audio_name, out_dir, methods,
                               results)

    return metric_dict