示例#1
0
    def as_figure(self, dpi=300, autoshow=True, title=None, export_png=True):
        """
        Creates a time-aligned figure from all the elements in the MgList.

        Args:
            dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300.
            autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True.
            title (str, optional): Optionally add a title to the figure. Defaults to None (no title).
            export_png (bool, optional): Whether to export a png image of the resulting figure automatically. Defaults to True.

        Returns:
            MgFigure: The MgFigure with all the elements from the MgList as layers.
        """
        import os
        import librosa
        import librosa.display
        import matplotlib.pyplot as plt
        import matplotlib.image as mpimg
        import matplotlib
        import numpy as np

        there_were_layers, first_slot_was_img, img_to_redo = None, None, None

        def count_elems(elems_list, elem_count):
            """
            Counts the all elements in a list recursively.

            Args:
                elems_list (list): The list to count.
                elem_count (int): The current count. (Pass 0 on the top level.)

            Returns:
                int: The number of elements in `elems_list`.
            """
            _count = elem_count

            for obj in elems_list:
                if type(obj) == MgImage:
                    _count += 1

                elif type(obj) == MgFigure:
                    if obj.figure_type == 'audio.tempogram':
                        _count += 2
                    elif obj.figure_type == 'audio.descriptors':
                        _count += 3
                    elif obj.figure_type == 'audio.spectrogram':
                        _count += 1
                    elif obj.figure_type == 'audio.waveform':
                        _count += 1
                    elif obj.figure_type == 'layers':
                        _count = count_elems(obj.layers, _count)

                elif type(obj) == MgList:
                    _count = count_elems(obj.objectlist, _count)

            return _count

        elem_count = count_elems(self.objectlist, 0)

        def build_figure(elems_list, elem_count, fig, ax, index_of_first_plot,
                         plot_counter, of):
            """
            Recursively crawls through the list of objects, and builds a single top-level figure from them.

            Args:
                elems_list (list): List of MgImage, MgFigure or MgList objects.
                elem_count (int): The total number of subplots to make.
                fig (matplotlib.pyplot.figure): The figure to fill.
                ax (list): The list of subplots (or their placeholders).
                index_of_first_plot (int): The index of the first plot.
                plot_counter (int): The running count of subplots (increments while crawling through all levels and building layers).
                of (str): The "running" string for the final output file name (each subplot increments it). 

            Returns:
                str: The final output file name in the current level.
                int: The final count of subplots including the current level.
                bool: Whether there were deeper levels inside the current one.
                bool: Whether the first slot in the figure will come from an MgImage.
                str: The path to the image from the MgImage on the first slot.
            """

            there_were_layers, first_slot_was_img, img_to_redo = None, None, None

            for obj in elems_list:
                if type(obj) == MgImage:
                    if plot_counter == 0:
                        first_slot_was_img = True
                        img_to_redo = obj.filename
                    ax[plot_counter] = fig.add_subplot(elem_count, 1,
                                                       plot_counter + 1)
                    ax[plot_counter].imshow(mpimg.imread(obj.filename))
                    ax[plot_counter].set_aspect('auto')
                    ax[plot_counter].axes.xaxis.set_visible(False)
                    ax[plot_counter].axes.yaxis.set_visible(False)

                    # add title based on content
                    last_tag = os.path.splitext(obj.filename)[0].split('_')[-1]
                    if last_tag == 'mgx':
                        ax[plot_counter].set(title='Motiongram X')
                    elif last_tag == 'mgy':
                        ax[plot_counter].set(title='Motiongram Y')
                    elif last_tag == 'vgx':
                        ax[plot_counter].set(title='Videogram X')
                    elif last_tag == 'vgy':
                        ax[plot_counter].set(title='Videogram Y')
                    else:
                        ax[plot_counter].set(
                            title=os.path.basename(obj.filename))

                    # increment output filename
                    if plot_counter == 0:
                        of = os.path.splitext(obj.filename)[0]
                    else:
                        of += '_'
                        of += os.path.splitext(obj.filename)[0].split('_')[-1]

                    plot_counter += 1

                elif type(obj) == MgFigure:
                    first_plot = False
                    if index_of_first_plot == None:
                        index_of_first_plot = plot_counter  # 0-based!
                        first_plot = True

                    if obj.figure_type == 'audio.tempogram':
                        # increment output filename
                        if plot_counter == 0:
                            of = obj.data['of'] + '_tempogram'
                        else:
                            of += '_tempogram'

                        if first_plot:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count, 1, plot_counter + 1)
                        else:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count,
                                1,
                                plot_counter + 1,
                                sharex=ax[index_of_first_plot])

                        # make plot for onset strength
                        ax[plot_counter].plot(obj.data['times'],
                                              obj.data['onset_env'],
                                              label='Onset strength')
                        ax[plot_counter].label_outer()
                        ax[plot_counter].legend(frameon=True)
                        plot_counter += 1

                        # make plot for tempogram
                        ax[plot_counter] = fig.add_subplot(
                            elem_count,
                            1,
                            plot_counter + 1,
                            sharex=ax[index_of_first_plot])
                        librosa.display.specshow(
                            obj.data['tempogram'],
                            sr=obj.data['sr'],
                            hop_length=obj.data['hop_size'],
                            x_axis='time',
                            y_axis='tempo',
                            cmap='magma',
                            ax=ax[plot_counter])
                        ax[plot_counter].axhline(
                            obj.data['tempo'],
                            color='w',
                            linestyle='--',
                            alpha=1,
                            label='Estimated tempo={:g}'.format(
                                obj.data['tempo']))
                        ax[plot_counter].legend(loc='upper right')
                        ax[plot_counter].set(title='Tempogram')
                        plot_counter += 1

                    elif obj.figure_type == 'audio.descriptors':
                        # increment output filename
                        if plot_counter == 0:
                            of = obj.data['of'] + '_descriptors'
                        else:
                            of += '_descriptors'

                        if first_plot:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count, 1, plot_counter + 1)
                        else:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count,
                                1,
                                plot_counter + 1,
                                sharex=ax[index_of_first_plot])

                        # make plot for rms
                        ax[plot_counter].semilogy(obj.data['times'],
                                                  obj.data['rms'][0],
                                                  label='RMS Energy')
                        ax[plot_counter].legend(loc='upper right')
                        plot_counter += 1

                        # make plot for flatness
                        ax[plot_counter] = fig.add_subplot(
                            elem_count,
                            1,
                            plot_counter + 1,
                            sharex=ax[index_of_first_plot])
                        ax[plot_counter].plot(obj.data['times'],
                                              obj.data['flatness'].T,
                                              label='Flatness',
                                              color='y')
                        ax[plot_counter].legend(loc='upper right')
                        plot_counter += 1

                        # make plot for spectrogram, centroid, bandwidth and rolloff
                        ax[plot_counter] = fig.add_subplot(
                            elem_count,
                            1,
                            plot_counter + 1,
                            sharex=ax[index_of_first_plot])
                        librosa.display.specshow(
                            librosa.power_to_db(obj.data['S'],
                                                ref=np.max,
                                                top_db=120),
                            sr=obj.data['sr'],
                            y_axis='mel',
                            fmax=obj.data['sr'] / 2,
                            x_axis='time',
                            hop_length=obj.data['hop_size'],
                            ax=ax[plot_counter])
                        # get rid of "default" ticks
                        ax[plot_counter].yaxis.set_minor_locator(
                            matplotlib.ticker.NullLocator())
                        plot_xticks = np.arange(0, obj.data['length'] + 0.1,
                                                obj.data['length'] / 20)
                        ax[plot_counter].set(xticks=plot_xticks)

                        freq_ticks = [elem * 100 for elem in range(10)]
                        freq_ticks = [250]
                        freq = 500
                        while freq < obj.data['sr'] / 2:
                            freq_ticks.append(freq)
                            freq *= 1.5

                        freq_ticks = [round(elem, -1) for elem in freq_ticks]
                        freq_ticks_labels = [
                            str(round(elem / 1000, 1)) +
                            'k' if elem > 1000 else int(round(elem))
                            for elem in freq_ticks
                        ]

                        ax[plot_counter].set(yticks=(freq_ticks))
                        ax[plot_counter].set(yticklabels=(freq_ticks_labels))

                        ax[plot_counter].fill_between(
                            obj.data['times'],
                            obj.data['cent'][0] - obj.data['spec_bw'][0],
                            obj.data['cent'][0] + obj.data['spec_bw'][0],
                            alpha=0.5,
                            label='Centroid +- bandwidth')
                        ax[plot_counter].plot(obj.data['times'],
                                              obj.data['cent'].T,
                                              label='Centroid',
                                              color='y')
                        ax[plot_counter].plot(
                            obj.data['times'],
                            obj.data['rolloff'][0],
                            label='Roll-off frequency (0.99)')
                        ax[plot_counter].plot(
                            obj.data['times'],
                            obj.data['rolloff_min'][0],
                            color='r',
                            label='Roll-off frequency (0.01)')

                        ax[plot_counter].legend(loc='upper right')

                        plot_counter += 1

                    elif obj.figure_type == 'audio.spectrogram':
                        # increment output filename
                        if plot_counter == 0:
                            of = obj.data['of'] + '_spectrogram'
                        else:
                            of += '_spectrogram'

                        if first_plot:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count, 1, plot_counter + 1)
                        else:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count,
                                1,
                                plot_counter + 1,
                                sharex=ax[index_of_first_plot])

                        librosa.display.specshow(
                            librosa.power_to_db(obj.data['S'],
                                                ref=np.max,
                                                top_db=120),
                            sr=obj.data['sr'],
                            y_axis='mel',
                            fmax=obj.data['sr'] / 2,
                            x_axis='time',
                            hop_length=obj.data['hop_size'],
                            ax=ax[plot_counter])
                        # get rid of "default" ticks
                        ax[plot_counter].yaxis.set_minor_locator(
                            matplotlib.ticker.NullLocator())
                        plot_xticks = np.arange(0, obj.data['length'] + 0.1,
                                                obj.data['length'] / 20)
                        ax[plot_counter].set(xticks=plot_xticks)

                        freq_ticks = [elem * 100 for elem in range(10)]
                        freq_ticks = [250]
                        freq = 500
                        while freq < obj.data['sr'] / 2:
                            freq_ticks.append(freq)
                            freq *= 1.5

                        freq_ticks = [round(elem, -1) for elem in freq_ticks]
                        freq_ticks_labels = [
                            str(round(elem / 1000, 1)) +
                            'k' if elem > 1000 else int(round(elem))
                            for elem in freq_ticks
                        ]

                        ax[plot_counter].set(yticks=(freq_ticks))
                        ax[plot_counter].set(yticklabels=(freq_ticks_labels))
                        ax[plot_counter].set(title='Spectrogram')

                        plot_counter += 1

                    elif obj.figure_type == 'audio.waveform':
                        # increment output filename
                        if plot_counter == 0:
                            of = obj.data['of'] + '_waveform'
                        else:
                            of += '_waveform'

                        if first_plot:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count, 1, plot_counter + 1)
                        else:
                            ax[plot_counter] = fig.add_subplot(
                                elem_count,
                                1,
                                plot_counter + 1,
                                sharex=ax[index_of_first_plot])

                        librosa.display.waveplot(obj.data['y'],
                                                 sr=obj.data['sr'],
                                                 ax=ax[plot_counter])

                        plot_xticks = np.arange(0, obj.data['length'] + 0.1,
                                                obj.data['length'] / 20)
                        ax[plot_counter].set(xticks=plot_xticks)

                        ax[plot_counter].set(title='Waveform')

                        plot_counter += 1

                    elif obj.figure_type == 'layers':
                        there_were_layers = True
                        if plot_counter == 0:
                            of, plot_counter, _, first_slot_was_img, img_to_redo = build_figure(
                                obj.layers, elem_count, fig, ax,
                                index_of_first_plot, plot_counter, of)
                        else:
                            of, plot_counter, _, _, _ = build_figure(
                                obj.layers, elem_count, fig, ax,
                                index_of_first_plot, plot_counter, of)

                elif type(obj) == MgList:
                    of, plot_counter, _, _, _ = build_figure(
                        obj.objectlist, elem_count, fig, ax,
                        index_of_first_plot, plot_counter, of)

            return of, plot_counter, there_were_layers, first_slot_was_img, img_to_redo

        fig = plt.figure(dpi=dpi, figsize=(10, 3 * elem_count))

        # make sure background is white
        fig.patch.set_facecolor('white')
        fig.patch.set_alpha(1)

        if title != None and type(title) == str:
            # add title
            fig.suptitle(title, fontsize=16, y=0.99)

        ax = [None for elem in range(elem_count)]
        index_of_first_plot = None
        plot_counter = 0
        of = None

        of, plot_counter, there_were_layers, first_slot_was_img, img_to_redo = build_figure(
            self.objectlist, elem_count, fig, ax, index_of_first_plot,
            plot_counter, of)

        # workaround matplotlib bug: if there was a layered figure where the first slot shows an image, delete and redo that slot
        if first_slot_was_img and there_were_layers:
            ax[0].remove()
            ax[0] = fig.add_subplot(elem_count, 1, 1)
            ax[0].imshow(mpimg.imread(img_to_redo))
            ax[0].set_aspect('auto')
            ax[0].axes.xaxis.set_visible(False)
            ax[0].axes.yaxis.set_visible(False)

            # add title based on content
            last_tag = os.path.splitext(img_to_redo)[0].split('_')[-1]
            if last_tag == 'mgx':
                ax[0].set(title='Motiongram X')
            elif last_tag == 'mgy':
                ax[0].set(title='Motiongram Y')
            elif last_tag == 'vgx':
                ax[0].set(title='Videogram X')
            elif last_tag == 'vgy':
                ax[0].set(title='Videogram Y')
            else:
                ax[0].set(title=os.path.basename(img_to_redo))

        fig.tight_layout()

        # save figure as png
        if export_png:
            plt.savefig(of + '.png', format='png', transparent=False)

        if not autoshow:
            plt.close()

        # create MgFigure
        mgf = MgFigure(figure=fig,
                       figure_type='layers',
                       data=None,
                       layers=self.objectlist,
                       image=of + '.png')

        return mgf
示例#2
0
def mg_audio_tempogram(filename=None,
                       window_size=4096,
                       overlap=8,
                       mel_filters=512,
                       power=2,
                       dpi=300,
                       autoshow=True):
    """
    Renders a figure with a plots of onset strength and tempogram of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_tempogram.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """
    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_size)

    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_size)

    # Estimate the global tempo for display purposes
    tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr,
                               hop_length=hop_size)[0]

    fig, ax = plt.subplots(nrows=2, figsize=(10, 6), dpi=dpi, sharex=True)

    times = librosa.times_like(oenv, sr=sr, hop_length=hop_size)

    ax[0].plot(times, oenv, label='Onset strength')
    ax[0].label_outer()
    ax[0].legend(frameon=True)

    librosa.display.specshow(tempogram,
                             sr=sr,
                             hop_length=hop_size,
                             x_axis='time',
                             y_axis='tempo',
                             cmap='magma',
                             ax=ax[1])
    ax[1].axhline(tempo,
                  color='w',
                  linestyle='--',
                  alpha=1,
                  label='Estimated tempo={:g}'.format(tempo))
    ax[1].legend(loc='upper right')
    ax[1].set(title='Tempogram')

    plt.savefig('%s_tempogram.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "onset_env": oenv,
        "tempogram": tempogram,
        "tempo": tempo
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.tempogram',
                   data=data,
                   layers=None,
                   image=of + '_tempogram.png')

    return mgf
示例#3
0
def mg_audio_descriptors(filename=None,
                         window_size=4096,
                         overlap=8,
                         mel_filters=512,
                         power=2,
                         dpi=300,
                         autoshow=True):
    """
    Renders a figure of plots showing spectral/loudness descriptors, including RMS energy, spectral flatness,
    centroid, bandwidth, rolloff of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_descriptors.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """

    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    cent = librosa.feature.spectral_centroid(y=y,
                                             sr=sr,
                                             n_fft=window_size,
                                             hop_length=hop_size)
    spec_bw = librosa.feature.spectral_bandwidth(y=y,
                                                 sr=sr,
                                                 n_fft=window_size,
                                                 hop_length=hop_size)
    flatness = librosa.feature.spectral_flatness(y=y,
                                                 n_fft=window_size,
                                                 hop_length=hop_size)
    rolloff = librosa.feature.spectral_rolloff(y=y,
                                               sr=sr,
                                               n_fft=window_size,
                                               hop_length=hop_size,
                                               roll_percent=0.99)
    rolloff_min = librosa.feature.spectral_rolloff(y=y,
                                                   sr=sr,
                                                   n_fft=window_size,
                                                   hop_length=hop_size,
                                                   roll_percent=0.01)
    rms = librosa.feature.rms(y=y,
                              frame_length=window_size,
                              hop_length=hop_size)

    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=mel_filters,
                                       fmax=sr / 2,
                                       n_fft=window_size,
                                       hop_length=hop_size,
                                       power=power)

    fig, ax = plt.subplots(figsize=(12, 8), dpi=dpi, nrows=3, sharex=True)

    img = librosa.display.specshow(librosa.power_to_db(S,
                                                       ref=np.max,
                                                       top_db=120),
                                   sr=sr,
                                   y_axis='mel',
                                   fmax=sr / 2,
                                   x_axis='time',
                                   hop_length=hop_size,
                                   ax=ax[2])

    # get rid of "default" ticks
    ax[2].yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

    ax[0].set(title=os.path.basename(filename))
    length = get_length(filename)
    plot_xticks = np.arange(0, length + 0.1, length / 20)
    ax[2].set(xticks=plot_xticks)

    freq_ticks = [elem * 100 for elem in range(10)]
    freq_ticks = [250]
    freq = 500
    while freq < sr / 2:
        freq_ticks.append(freq)
        freq *= 1.5

    freq_ticks = [round(elem, -1) for elem in freq_ticks]
    freq_ticks_labels = [
        str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem))
        for elem in freq_ticks
    ]

    ax[2].set(yticks=(freq_ticks))
    ax[2].set(yticklabels=(freq_ticks_labels))

    times = librosa.times_like(cent,
                               sr=sr,
                               n_fft=window_size,
                               hop_length=hop_size)

    ax[2].fill_between(times,
                       cent[0] - spec_bw[0],
                       cent[0] + spec_bw[0],
                       alpha=0.5,
                       label='Centroid +- bandwidth')
    ax[2].plot(times, cent.T, label='Centroid', color='y')
    ax[2].plot(times, rolloff[0], label='Roll-off frequency (0.99)')
    ax[2].plot(times,
               rolloff_min[0],
               color='r',
               label='Roll-off frequency (0.01)')

    ax[2].legend(loc='upper right')

    ax[1].plot(times, flatness.T, label='Flatness', color='y')
    ax[1].legend(loc='upper right')

    ax[0].semilogy(times, rms[0], label='RMS Energy')
    ax[0].legend(loc='upper right')

    plt.tight_layout()
    plt.savefig('%s_descriptors.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "S": S,
        "length": length,
        "cent": cent,
        "spec_bw": spec_bw,
        "rolloff": rolloff,
        "rolloff_min": rolloff_min,
        "flatness": flatness,
        "rms": rms
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.descriptors',
                   data=data,
                   layers=None,
                   image=of + '_descriptors.png')

    return mgf
示例#4
0
def mg_audio_spectrogram(filename=None,
                         window_size=4096,
                         overlap=8,
                         mel_filters=512,
                         power=2,
                         dpi=300,
                         autoshow=True):
    """
    Renders a figure showing the mel-scaled spectrogram of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_spectrogram.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """
    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=mel_filters,
                                       fmax=sr / 2,
                                       n_fft=window_size,
                                       hop_length=hop_size,
                                       power=power)

    fig, ax = plt.subplots(figsize=(12, 6), dpi=dpi)

    img = librosa.display.specshow(librosa.power_to_db(S,
                                                       ref=np.max,
                                                       top_db=120),
                                   sr=sr,
                                   y_axis='mel',
                                   fmax=sr / 2,
                                   x_axis='time',
                                   hop_length=hop_size,
                                   ax=ax)

    colorbar_ticks = range(-120, 1, 10)
    fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks)

    # get rid of "default" ticks
    ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

    ax.set(title=os.path.basename(filename))
    length = get_length(filename)
    plot_xticks = np.arange(0, length + 0.1, length / 20)
    ax.set(xticks=plot_xticks)

    freq_ticks = [elem * 100 for elem in range(10)]
    freq_ticks = []
    freq = 100
    while freq < sr / 2:
        freq_ticks.append(freq)
        freq *= 1.3

    freq_ticks = [round(elem, -2) for elem in freq_ticks]
    freq_ticks.append(sr / 2)
    freq_ticks_labels = [
        str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem))
        for elem in freq_ticks
    ]

    ax.set(yticks=(freq_ticks))
    ax.set(yticklabels=(freq_ticks_labels))

    plt.tight_layout()

    plt.savefig('%s_spectrogram.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {"hop_size": hop_size, "sr": sr, "of": of, "S": S, "length": length}

    mgf = MgFigure(figure=fig,
                   figure_type='audio.spectrogram',
                   data=data,
                   layers=None,
                   image=of + '_spectrogram.png')

    return mgf
示例#5
0
def mg_audio_tempogram(filename=None,
                       window_size=4096,
                       overlap=8,
                       mel_filters=512,
                       power=2,
                       dpi=300,
                       autoshow=True,
                       title=None):
    """
    Renders a figure with a plots of onset strength and tempogram of the video/audio file.

    Args:
        filename (str, optional): Path to the audio/video file to be processed. Defaults to None.
        window_size (int, optional): The size of the FFT frame. Defaults to 4096.
        overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8.
        mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512.
        power (float, optional): The steepness of the curve for the color mapping. Defaults to 2.
        dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300.
        autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True.
        title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title.

    Outputs:
        `filename`_tempogram.png

    Returns:
        MgFigure: An MgFigure object referring to the internal figure and its data.
    """

    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_size)

    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_size)

    # Estimate the global tempo for display purposes
    tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr,
                               hop_length=hop_size)[0]

    fig, ax = plt.subplots(nrows=2, figsize=(10, 6), dpi=dpi, sharex=True)

    # make sure background is white
    fig.patch.set_facecolor('white')
    fig.patch.set_alpha(1)

    # add title
    if title == None:
        title = os.path.basename(filename)
    fig.suptitle(title, fontsize=16)

    times = librosa.times_like(oenv, sr=sr, hop_length=hop_size)

    ax[0].plot(times, oenv, label='Onset strength')
    ax[0].label_outer()
    ax[0].legend(frameon=True)

    librosa.display.specshow(tempogram,
                             sr=sr,
                             hop_length=hop_size,
                             x_axis='time',
                             y_axis='tempo',
                             cmap='magma',
                             ax=ax[1])
    ax[1].axhline(tempo,
                  color='w',
                  linestyle='--',
                  alpha=1,
                  label='Estimated tempo={:g}'.format(tempo))
    ax[1].legend(loc='upper right')
    ax[1].set(title='Tempogram')

    plt.savefig('%s_tempogram.png' % of, format='png', transparent=False)

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "onset_env": oenv,
        "tempogram": tempogram,
        "tempo": tempo
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.tempogram',
                   data=data,
                   layers=None,
                   image=of + '_tempogram.png')

    return mgf
示例#6
0
    def spectrogram(self,
                    window_size=4096,
                    overlap=8,
                    mel_filters=512,
                    power=2,
                    dpi=300,
                    autoshow=True,
                    title=None):
        """
        Renders a figure showing the mel-scaled spectrogram of the video/audio file.

        Args:
            window_size (int, optional): The size of the FFT frame. Defaults to 4096.
            overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8.
            mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512.
            power (float, optional): The steepness of the curve for the color mapping. Defaults to 2.
            dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300.
            autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True.
            title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title.

        Outputs:
            `self.filename`_spectrogram.png

        Returns:
            MgFigure: An MgFigure object referring to the internal figure and its data.
        """

        if not has_audio(self.filename):
            print('The video has no audio track.')
            return

        hop_size = int(window_size / overlap)

        y, sr = librosa.load(self.filename, sr=None)

        S = librosa.feature.melspectrogram(y=y,
                                           sr=sr,
                                           n_mels=mel_filters,
                                           fmax=sr / 2,
                                           n_fft=window_size,
                                           hop_length=hop_size,
                                           power=power)

        fig, ax = plt.subplots(figsize=(12, 6), dpi=300)

        # make sure background is white
        fig.patch.set_facecolor('white')
        fig.patch.set_alpha(1)

        # add title
        if title == None:
            title = os.path.basename(self.filename)
        fig.suptitle(title, fontsize=16)

        img = librosa.display.specshow(librosa.power_to_db(S,
                                                           ref=np.max,
                                                           top_db=120),
                                       sr=sr,
                                       y_axis='mel',
                                       fmax=sr / 2,
                                       x_axis='time',
                                       hop_length=hop_size,
                                       ax=ax)

        print(type(img))

        colorbar_ticks = range(-120, 1, 10)
        fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks)

        # get rid of "default" ticks
        ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

        # ax.set(title=os.path.basename(self.filename))
        length = get_length(self.filename)
        plot_xticks = np.arange(0, length + 0.1, length / 20)
        ax.set(xticks=plot_xticks)

        freq_ticks = [elem * 100 for elem in range(10)]
        freq_ticks = []
        freq = 100
        while freq < sr / 2:
            freq_ticks.append(freq)
            freq *= 1.3

        freq_ticks = [round(elem, -2) for elem in freq_ticks]
        freq_ticks.append(sr / 2)
        freq_ticks_labels = [
            str(round(elem / 1000, 1)) +
            'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks
        ]

        ax.set(yticks=(freq_ticks))
        ax.set(yticklabels=(freq_ticks_labels))

        plt.tight_layout()

        plt.savefig('%s_spectrogram.png' % self.of,
                    format='png',
                    transparent=False)

        if not autoshow:
            plt.close()

        # create MgFigure
        data = {
            "hop_size": hop_size,
            "sr": sr,
            "of": self.of,
            "S": S,
            "length": length
        }

        mgf = MgFigure(figure=fig,
                       figure_type='audio.spectrogram',
                       data=data,
                       layers=None,
                       image=self.of + '_spectrogram.png')

        return mgf