Exemplo n.º 1
0
def skip_frames_ffmpeg(filename, skip=0):
    if skip == 0:
        return

    import os

    of, fex = os.path.splitext(filename)

    pts_ratio = 1 / (skip + 1)
    atempo_ratio = skip + 1

    outname = of + '_skip' + fex

    if has_audio(filename):
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v];[0:a]atempo={atempo_ratio}[a]',
            '-map', '[v]', '-map', '[a]', '-q:v', '3', '-shortest', outname
        ]
    else:
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v]', '-map', '[v]', '-q:v', '3',
            outname
        ]

    ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Skipping frames:')
Exemplo n.º 2
0
def skip_frames_ffmpeg(filename, skip=0):
    """
    Time-shrinks the video by skipping (discarding) every n frames determined by `skip`. To discard half of the frames (ie. double the speed of the video) use `skip=1`.

    Args:
        filename (str): Path to the video to process.
        skip (int, optional): Discard `skip` frames before keeping one. Defaults to 0.

    Outputs:
        `filename`_skip.<file extension>
    """
    if skip == 0:
        return

    import os

    of, fex = os.path.splitext(filename)

    pts_ratio = 1 / (skip + 1)
    atempo_ratio = skip + 1

    outname = of + '_skip' + fex

    if has_audio(filename):
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v];[0:a]atempo={atempo_ratio}[a]',
            '-map', '[v]', '-map', '[a]', '-q:v', '3', '-shortest', outname
        ]
    else:
        cmd = [
            'ffmpeg', '-y', '-i', filename, '-filter_complex',
            f'[0:v]setpts={pts_ratio}*PTS[v]', '-map', '[v]', '-q:v', '3',
            outname
        ]

    ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Skipping frames:')
Exemplo n.º 3
0
def mg_videoreader(filename,
                   starttime=0,
                   endtime=0,
                   skip=0,
                   rotate=0,
                   contrast=0,
                   brightness=0,
                   crop='None',
                   color=True,
                   keep_all=False,
                   returned_by_process=False):
    """
    Reads in a video file, and optionally apply several different processes on it. These include:
    - trimming,
    - skipping,
    - rotating,
    - applying brightness and contrast,
    - cropping,
    - converting to grayscale.

    Parameters
    ----------
    - filename : str

        Path to the input video file.
    - starttime : int or float, optional

        Trims the video from this start time (s).

    - endtime : int or float, optional

        Trims the video until this end time (s).

    - skip : int, optional

        Time-shrinks the video by skipping (discarding) every n frames determined by `skip`.
    - rotate : int or float, optional

        Rotates the video by a `rotate` degrees.

    - contrast : int or float, optional

        Applies +/- 100 contrast to video.
    - brightness : int or float, optional

        Applies +/- 100 brightness to video.

    - crop : {'none', 'manual', 'auto'}, optional

        If `manual`, opens a window displaying the first frame of the input video file,
        where the user can draw a rectangle to which cropping is applied.
        If `auto` the cropping function attempts to determine the area of significant motion 
        and applies the cropping to that area.

    - color : bool, optional

        Default is `True`. If `False`, converts the video to grayscale and sets every method in grayscale mode.
    - keep_all : bool, optional

        Default is `False`. If `True`, preserves an output video file after each used preprocessing stage.

    Outputs
    -------
    - A video file with the applied processes. The name of the file will be `filename` + a suffix for each process.

    Returns
    -------
    - length : int

        The number of frames in the output video file.

    - width : int

        The pixel width of the output video file. 
    - height : int

        The pixel height of the output video file. 
    - fps : int

        The FPS (frames per second) of the output video file.
    - endtime : float

        The length of the output video file in seconds.

    - of: str

        The path to the output video file without its extension.
        The file name gets a suffix for each used process.
    - fex : str

        The file extension of the output video file.
        Currently it is always 'avi'.
    """
    # Separate filename from file extension
    of, fex = os.path.splitext(filename)

    trimming = False
    skipping = False
    rotating = False
    cbing = False
    cropping = False

    # Cut out relevant bit of video using starttime and endtime
    if starttime != 0 or endtime != 0:
        extract_subclip(filename,
                        starttime,
                        endtime,
                        targetname=of + '_trim' + fex)
        of = of + '_trim'
        trimming = True

    # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms
    # if fex != '.avi':
    #     convert_to_avi(of + fex)
    #     fex = '.avi'
    #     filename = of + fex

    # vidcap = cv2.VideoCapture(of + fex)

    # Get props from vidcap
    # fps = int(vidcap.get(cv2.CAP_PROP_FPS))
    # width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    # height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

    # test reading
    # success, _ = vidcap.read()
    # if fps == 0 or length == 0 or not success:
    #     raise ReadError(f"Could not open {filename}.")

    # source_length_s = length / fps
    # source_name = of + fex
    # new_length_s = source_length_s
    # dilation_ratio = 1
    # need_to_embed_audio = False
    # video_has_audio_track = has_audio(source_name)

    # if skip != 0 or contrast != 0 or brightness != 0 or crop.lower() != 'none':
    # if contrast != 0 or brightness != 0 or crop.lower() != 'none':
    #     if video_has_audio_track:
    #         source_audio = extract_wav(source_name)
    #         need_to_embed_audio = True

    # To skip ahead a few frames before the next sample set skip to a value above 0
    # if skip != 0:
    #     vidcap, length, fps, width, height = mg_skip_frames(
    #         of, fex, vidcap, skip, fps, length, width, height)
    #     if not keep_all and trimming:
    #         os.remove(of + fex)
    #     of = of + '_skip'
    #     skipping = True
    #     new_length_s = length / fps
    #     dilation_ratio = source_length_s / new_length_s
    #     if keep_all:
    #         vidcap.release()
    #         if video_has_audio_track:
    #             embed_audio_in_video(source_audio, of + fex, dilation_ratio)

    if skip != 0:
        #skipped_video = skip_frames_ffmpeg(of + fex, skip)
        skip_frames_ffmpeg(of + fex, skip)
        if not keep_all and trimming:
            # vidcap.release()
            os.remove(of + fex)

        of = of + '_skip'
        skipping = True

        # vidcap.release()
        # vidcap = cv2.VideoCapture(of + fex)
        # length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
        # fps = int(vidcap.get(cv2.CAP_PROP_FPS))
        # width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
        # height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # new_length_s = length / fps
        # dilation_ratio = source_length_s / new_length_s

    length = get_framecount(of + fex)
    fps = get_fps(of + fex)

    # Overwrite the inputvalue for endtime not to cut the video at 0...
    if endtime == 0:
        endtime = length / fps

    if rotate != 0:
        # vidcap.release()
        rotate_video(of + fex, rotate)
        if not keep_all and (skipping or trimming):
            os.remove(of + fex)
        of = of + '_rot'
        rotating = True
        # if keep_all and video_has_audio_track:
        #     embed_audio_in_video(source_audio, of + fex, dilation_ratio)

    # Apply contrast/brightness before the motion analysis
    if contrast != 0 or brightness != 0:
        # if keep_all or rotating:
        #     vidcap = cv2.VideoCapture(of + fex)
        # vidcap = mg_contrast_brightness(
        #     of, fex, vidcap, fps, length, width, height, contrast, brightness)
        contrast_brightness_ffmpeg(of + fex,
                                   contrast=contrast,
                                   brightness=brightness)

        if not keep_all and (rotating or skipping or trimming):
            # vidcap.release()
            os.remove(of + fex)
        of = of + '_cb'
        cbing = True

        # vidcap.release()
        # vidcap = cv2.VideoCapture(of + fex)
        # length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
        # fps = int(vidcap.get(cv2.CAP_PROP_FPS))
        # width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
        # height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # if keep_all:
        #     vidcap.release()
        #     if video_has_audio_track:
        #         embed_audio_in_video(source_audio, of + fex, dilation_ratio)

    # Crops video either manually or automatically
    if crop.lower() != 'none':
        # if keep_all:
        #     vidcap = cv2.VideoCapture(of + fex)
        # [vidcap, width, height] = mg_cropvideo(
        #     fps, width, height, length, of, fex, crop, motion_box_thresh=0.1, motion_box_margin=1)
        mg_cropvideo_ffmpeg(of + fex, crop_movement=crop)

        if not keep_all and (cbing or rotating or skipping or trimming):
            # vidcap.release()
            os.remove(of + fex)
        of = of + '_crop'
        cropping = True
        # if keep_all:
        #     vidcap.release()
        #     if video_has_audio_track:
        #         embed_audio_in_video(source_audio, of + fex, dilation_ratio)

    if color == False and returned_by_process == False:
        # vidcap.release()
        of_gray, fex = convert_to_grayscale(of + fex)
        if not keep_all and (cropping or cbing or rotating or skipping
                             or trimming):
            os.remove(of + fex)
        of = of_gray

    # if color == True or returned_by_process == True:
    #     vidcap.release()

    # if need_to_embed_audio:
    #     embed_audio_in_video(source_audio, of + fex, dilation_ratio)
    #     os.remove(source_audio)

    # if vidcap:
    #     vidcap.release()

    width, height = get_widthheight(of + fex)
    video_has_audio_track = has_audio(of + fex)

    return length, width, height, fps, endtime, of, fex, video_has_audio_track
Exemplo n.º 4
0
def mg_audio_tempogram(filename=None,
                       window_size=4096,
                       overlap=8,
                       mel_filters=512,
                       power=2,
                       dpi=300,
                       autoshow=True):
    """
    Renders a figure with a plots of onset strength and tempogram of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_tempogram.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """
    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_size)

    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_size)

    # Estimate the global tempo for display purposes
    tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr,
                               hop_length=hop_size)[0]

    fig, ax = plt.subplots(nrows=2, figsize=(10, 6), dpi=dpi, sharex=True)

    times = librosa.times_like(oenv, sr=sr, hop_length=hop_size)

    ax[0].plot(times, oenv, label='Onset strength')
    ax[0].label_outer()
    ax[0].legend(frameon=True)

    librosa.display.specshow(tempogram,
                             sr=sr,
                             hop_length=hop_size,
                             x_axis='time',
                             y_axis='tempo',
                             cmap='magma',
                             ax=ax[1])
    ax[1].axhline(tempo,
                  color='w',
                  linestyle='--',
                  alpha=1,
                  label='Estimated tempo={:g}'.format(tempo))
    ax[1].legend(loc='upper right')
    ax[1].set(title='Tempogram')

    plt.savefig('%s_tempogram.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "onset_env": oenv,
        "tempogram": tempogram,
        "tempo": tempo
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.tempogram',
                   data=data,
                   layers=None,
                   image=of + '_tempogram.png')

    return mgf
Exemplo n.º 5
0
def mg_audio_descriptors(filename=None,
                         window_size=4096,
                         overlap=8,
                         mel_filters=512,
                         power=2,
                         dpi=300,
                         autoshow=True):
    """
    Renders a figure of plots showing spectral/loudness descriptors, including RMS energy, spectral flatness,
    centroid, bandwidth, rolloff of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_descriptors.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """

    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    cent = librosa.feature.spectral_centroid(y=y,
                                             sr=sr,
                                             n_fft=window_size,
                                             hop_length=hop_size)
    spec_bw = librosa.feature.spectral_bandwidth(y=y,
                                                 sr=sr,
                                                 n_fft=window_size,
                                                 hop_length=hop_size)
    flatness = librosa.feature.spectral_flatness(y=y,
                                                 n_fft=window_size,
                                                 hop_length=hop_size)
    rolloff = librosa.feature.spectral_rolloff(y=y,
                                               sr=sr,
                                               n_fft=window_size,
                                               hop_length=hop_size,
                                               roll_percent=0.99)
    rolloff_min = librosa.feature.spectral_rolloff(y=y,
                                                   sr=sr,
                                                   n_fft=window_size,
                                                   hop_length=hop_size,
                                                   roll_percent=0.01)
    rms = librosa.feature.rms(y=y,
                              frame_length=window_size,
                              hop_length=hop_size)

    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=mel_filters,
                                       fmax=sr / 2,
                                       n_fft=window_size,
                                       hop_length=hop_size,
                                       power=power)

    fig, ax = plt.subplots(figsize=(12, 8), dpi=dpi, nrows=3, sharex=True)

    img = librosa.display.specshow(librosa.power_to_db(S,
                                                       ref=np.max,
                                                       top_db=120),
                                   sr=sr,
                                   y_axis='mel',
                                   fmax=sr / 2,
                                   x_axis='time',
                                   hop_length=hop_size,
                                   ax=ax[2])

    # get rid of "default" ticks
    ax[2].yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

    ax[0].set(title=os.path.basename(filename))
    length = get_length(filename)
    plot_xticks = np.arange(0, length + 0.1, length / 20)
    ax[2].set(xticks=plot_xticks)

    freq_ticks = [elem * 100 for elem in range(10)]
    freq_ticks = [250]
    freq = 500
    while freq < sr / 2:
        freq_ticks.append(freq)
        freq *= 1.5

    freq_ticks = [round(elem, -1) for elem in freq_ticks]
    freq_ticks_labels = [
        str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem))
        for elem in freq_ticks
    ]

    ax[2].set(yticks=(freq_ticks))
    ax[2].set(yticklabels=(freq_ticks_labels))

    times = librosa.times_like(cent,
                               sr=sr,
                               n_fft=window_size,
                               hop_length=hop_size)

    ax[2].fill_between(times,
                       cent[0] - spec_bw[0],
                       cent[0] + spec_bw[0],
                       alpha=0.5,
                       label='Centroid +- bandwidth')
    ax[2].plot(times, cent.T, label='Centroid', color='y')
    ax[2].plot(times, rolloff[0], label='Roll-off frequency (0.99)')
    ax[2].plot(times,
               rolloff_min[0],
               color='r',
               label='Roll-off frequency (0.01)')

    ax[2].legend(loc='upper right')

    ax[1].plot(times, flatness.T, label='Flatness', color='y')
    ax[1].legend(loc='upper right')

    ax[0].semilogy(times, rms[0], label='RMS Energy')
    ax[0].legend(loc='upper right')

    plt.tight_layout()
    plt.savefig('%s_descriptors.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "S": S,
        "length": length,
        "cent": cent,
        "spec_bw": spec_bw,
        "rolloff": rolloff,
        "rolloff_min": rolloff_min,
        "flatness": flatness,
        "rms": rms
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.descriptors',
                   data=data,
                   layers=None,
                   image=of + '_descriptors.png')

    return mgf
Exemplo n.º 6
0
def mg_audio_spectrogram(filename=None,
                         window_size=4096,
                         overlap=8,
                         mel_filters=512,
                         power=2,
                         dpi=300,
                         autoshow=True):
    """
    Renders a figure showing the mel-scaled spectrogram of the video/audio file.

    Parameters
    ----------
    - filename : str, optional

        Path to the audio/video file to be processed.

    - window_size : int, optional

        The size of the FFT frame. Default is 4096.

    - overlap : int, optional

        The window overlap. The hop size is window_size / overlap.
        Example: window_size=1024, overlap=4 -> hop=256

    - mel_filters : int, optional

        The number of filters to use for filtering the frequency domain. Affects the
        vertical resolution (sharpness) of the spectrogram. NB: Too high values with
        relatively small window sizes can result in artifacts (typically black lines)
        in the resulting image. Default is 512.

    - power : int, float, optional

        The steepness of the curve for the color mapping. Default is 2.

    - dpi : int, optional

        Image quality of the rendered figure. Default is 300 DPI.

    - autoshow: bool, optional

        Whether to show the resulting figure automatically. Default is `True` (figure is shown).

    Outputs
    -------

    - `filename` + '_spectrogram.png'

    Returns
    -------
    - MgFigure

        An MgFigure object referring to the internal figure and its data.
    """
    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    S = librosa.feature.melspectrogram(y=y,
                                       sr=sr,
                                       n_mels=mel_filters,
                                       fmax=sr / 2,
                                       n_fft=window_size,
                                       hop_length=hop_size,
                                       power=power)

    fig, ax = plt.subplots(figsize=(12, 6), dpi=dpi)

    img = librosa.display.specshow(librosa.power_to_db(S,
                                                       ref=np.max,
                                                       top_db=120),
                                   sr=sr,
                                   y_axis='mel',
                                   fmax=sr / 2,
                                   x_axis='time',
                                   hop_length=hop_size,
                                   ax=ax)

    colorbar_ticks = range(-120, 1, 10)
    fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks)

    # get rid of "default" ticks
    ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

    ax.set(title=os.path.basename(filename))
    length = get_length(filename)
    plot_xticks = np.arange(0, length + 0.1, length / 20)
    ax.set(xticks=plot_xticks)

    freq_ticks = [elem * 100 for elem in range(10)]
    freq_ticks = []
    freq = 100
    while freq < sr / 2:
        freq_ticks.append(freq)
        freq *= 1.3

    freq_ticks = [round(elem, -2) for elem in freq_ticks]
    freq_ticks.append(sr / 2)
    freq_ticks_labels = [
        str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem))
        for elem in freq_ticks
    ]

    ax.set(yticks=(freq_ticks))
    ax.set(yticklabels=(freq_ticks_labels))

    plt.tight_layout()

    plt.savefig('%s_spectrogram.png' % of, format='png')

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {"hop_size": hop_size, "sr": sr, "of": of, "S": S, "length": length}

    mgf = MgFigure(figure=fig,
                   figure_type='audio.spectrogram',
                   data=data,
                   layers=None,
                   image=of + '_spectrogram.png')

    return mgf
Exemplo n.º 7
0
def mg_videoreader(filename,
                   starttime=0,
                   endtime=0,
                   skip=0,
                   rotate=0,
                   contrast=0,
                   brightness=0,
                   crop='None',
                   color=True,
                   keep_all=False,
                   returned_by_process=False):
    """
    Reads in a video file, and optionally apply several different processes on it. These include:
    - trimming,
    - skipping,
    - rotating,
    - applying brightness and contrast,
    - cropping,
    - converting to grayscale.

    Args:
        filename (str): Path to the input video file.
        starttime (int or float, optional): Trims the video from this start time (s). Defaults to 0.
        endtime (int or float, optional): Trims the video until this end time (s). Defaults to 0 (which will make the algorithm use the full length of the input video instead).
        skip (int, optional): Time-shrinks the video by skipping (discarding) every n frames determined by `skip`. Defaults to 0.
        rotate (int or float, optional): Rotates the video by a `rotate` degrees. Defaults to 0.
        contrast (int or float, optional): Applies +/- 100 contrast to video. Defaults to 0.
        brightness (int or float, optional): Applies +/- 100 brightness to video. Defaults to 0.
        crop (str, optional): If 'manual', opens a window displaying the first frame of the input video file, where the user can draw a rectangle to which cropping is applied. If 'auto' the cropping function attempts to determine the area of significant motion and applies the cropping to that area. Defaults to 'None'.
        color (bool, optional): If False, converts the video to grayscale and sets every method in grayscale mode. Defaults to True.
        keep_all (bool, optional): If True, preserves an output video file after each used preprocessing stage. Defaults to False.
        returned_by_process (bool, optional): This parameter is only for internal use, do not use it. Defaults to False.

    Outputs:
        A video file with the applied processes. The name of the file will be `filename` + a suffix for each process.

    Returns:
        int: The number of frames in the output video file.
        int: The pixel width of the output video file.
        int: The pixel height of the output video file.
        int: The FPS (frames per second) of the output video file.
        float: The length of the output video file in seconds.
        str: The path to the output video file without its extension. The file name gets a suffix for each used process.
        str: The file extension of the output video file.
        bool: Whether the video has an audio track.
    """

    # Separate filename from file extension
    of, fex = os.path.splitext(filename)

    trimming = False
    skipping = False
    rotating = False
    cbing = False
    cropping = False

    # Cut out relevant bit of video using starttime and endtime
    if starttime != 0 or endtime != 0:
        extract_subclip(filename,
                        starttime,
                        endtime,
                        targetname=of + '_trim' + fex)
        of = of + '_trim'
        trimming = True

    if skip != 0:
        skip_frames_ffmpeg(of + fex, skip)
        if not keep_all and trimming:
            os.remove(of + fex)

        of = of + '_skip'
        skipping = True

    length = get_framecount(of + fex)
    fps = get_fps(of + fex)

    # 0 means full length
    if endtime == 0:
        endtime = length / fps

    if rotate != 0:
        rotate_video(of + fex, rotate)
        if not keep_all and (skipping or trimming):
            os.remove(of + fex)
        of = of + '_rot'
        rotating = True

    # Apply contrast/brightness before the motion analysis
    if contrast != 0 or brightness != 0:
        contrast_brightness_ffmpeg(of + fex,
                                   contrast=contrast,
                                   brightness=brightness)

        if not keep_all and (rotating or skipping or trimming):
            os.remove(of + fex)
        of = of + '_cb'
        cbing = True

    # Crops video either manually or automatically
    if crop.lower() != 'none':
        mg_cropvideo_ffmpeg(of + fex, crop_movement=crop)

        if not keep_all and (cbing or rotating or skipping or trimming):
            os.remove(of + fex)
        of = of + '_crop'
        cropping = True

    if color == False and returned_by_process == False:
        of_gray, fex = convert_to_grayscale(of + fex)
        if not keep_all and (cropping or cbing or rotating or skipping
                             or trimming):
            os.remove(of + fex)
        of = of_gray

    width, height = get_widthheight(of + fex)
    video_has_audio_track = has_audio(of + fex)
    return length, width, height, fps, endtime, of, fex, video_has_audio_track
Exemplo n.º 8
0
def mg_audio_tempogram(filename=None,
                       window_size=4096,
                       overlap=8,
                       mel_filters=512,
                       power=2,
                       dpi=300,
                       autoshow=True,
                       title=None):
    """
    Renders a figure with a plots of onset strength and tempogram of the video/audio file.

    Args:
        filename (str, optional): Path to the audio/video file to be processed. Defaults to None.
        window_size (int, optional): The size of the FFT frame. Defaults to 4096.
        overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8.
        mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512.
        power (float, optional): The steepness of the curve for the color mapping. Defaults to 2.
        dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300.
        autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True.
        title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title.

    Outputs:
        `filename`_tempogram.png

    Returns:
        MgFigure: An MgFigure object referring to the internal figure and its data.
    """

    if filename == None:
        print("No filename was given.")
        return

    if not has_audio(filename):
        print('The video has no audio track.')
        return

    of, fex = os.path.splitext(filename)

    hop_size = int(window_size / overlap)

    y, sr = librosa.load(filename, sr=None)

    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_size)

    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_size)

    # Estimate the global tempo for display purposes
    tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr,
                               hop_length=hop_size)[0]

    fig, ax = plt.subplots(nrows=2, figsize=(10, 6), dpi=dpi, sharex=True)

    # make sure background is white
    fig.patch.set_facecolor('white')
    fig.patch.set_alpha(1)

    # add title
    if title == None:
        title = os.path.basename(filename)
    fig.suptitle(title, fontsize=16)

    times = librosa.times_like(oenv, sr=sr, hop_length=hop_size)

    ax[0].plot(times, oenv, label='Onset strength')
    ax[0].label_outer()
    ax[0].legend(frameon=True)

    librosa.display.specshow(tempogram,
                             sr=sr,
                             hop_length=hop_size,
                             x_axis='time',
                             y_axis='tempo',
                             cmap='magma',
                             ax=ax[1])
    ax[1].axhline(tempo,
                  color='w',
                  linestyle='--',
                  alpha=1,
                  label='Estimated tempo={:g}'.format(tempo))
    ax[1].legend(loc='upper right')
    ax[1].set(title='Tempogram')

    plt.savefig('%s_tempogram.png' % of, format='png', transparent=False)

    if not autoshow:
        plt.close()

    # create MgFigure
    data = {
        "hop_size": hop_size,
        "sr": sr,
        "of": of,
        "times": times,
        "onset_env": oenv,
        "tempogram": tempogram,
        "tempo": tempo
    }

    mgf = MgFigure(figure=fig,
                   figure_type='audio.tempogram',
                   data=data,
                   layers=None,
                   image=of + '_tempogram.png')

    return mgf
Exemplo n.º 9
0
    def spectrogram(self,
                    window_size=4096,
                    overlap=8,
                    mel_filters=512,
                    power=2,
                    dpi=300,
                    autoshow=True,
                    title=None):
        """
        Renders a figure showing the mel-scaled spectrogram of the video/audio file.

        Args:
            window_size (int, optional): The size of the FFT frame. Defaults to 4096.
            overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8.
            mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512.
            power (float, optional): The steepness of the curve for the color mapping. Defaults to 2.
            dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300.
            autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True.
            title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title.

        Outputs:
            `self.filename`_spectrogram.png

        Returns:
            MgFigure: An MgFigure object referring to the internal figure and its data.
        """

        if not has_audio(self.filename):
            print('The video has no audio track.')
            return

        hop_size = int(window_size / overlap)

        y, sr = librosa.load(self.filename, sr=None)

        S = librosa.feature.melspectrogram(y=y,
                                           sr=sr,
                                           n_mels=mel_filters,
                                           fmax=sr / 2,
                                           n_fft=window_size,
                                           hop_length=hop_size,
                                           power=power)

        fig, ax = plt.subplots(figsize=(12, 6), dpi=300)

        # make sure background is white
        fig.patch.set_facecolor('white')
        fig.patch.set_alpha(1)

        # add title
        if title == None:
            title = os.path.basename(self.filename)
        fig.suptitle(title, fontsize=16)

        img = librosa.display.specshow(librosa.power_to_db(S,
                                                           ref=np.max,
                                                           top_db=120),
                                       sr=sr,
                                       y_axis='mel',
                                       fmax=sr / 2,
                                       x_axis='time',
                                       hop_length=hop_size,
                                       ax=ax)

        print(type(img))

        colorbar_ticks = range(-120, 1, 10)
        fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks)

        # get rid of "default" ticks
        ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator())

        # ax.set(title=os.path.basename(self.filename))
        length = get_length(self.filename)
        plot_xticks = np.arange(0, length + 0.1, length / 20)
        ax.set(xticks=plot_xticks)

        freq_ticks = [elem * 100 for elem in range(10)]
        freq_ticks = []
        freq = 100
        while freq < sr / 2:
            freq_ticks.append(freq)
            freq *= 1.3

        freq_ticks = [round(elem, -2) for elem in freq_ticks]
        freq_ticks.append(sr / 2)
        freq_ticks_labels = [
            str(round(elem / 1000, 1)) +
            'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks
        ]

        ax.set(yticks=(freq_ticks))
        ax.set(yticklabels=(freq_ticks_labels))

        plt.tight_layout()

        plt.savefig('%s_spectrogram.png' % self.of,
                    format='png',
                    transparent=False)

        if not autoshow:
            plt.close()

        # create MgFigure
        data = {
            "hop_size": hop_size,
            "sr": sr,
            "of": self.of,
            "S": S,
            "length": length
        }

        mgf = MgFigure(figure=fig,
                       figure_type='audio.spectrogram',
                       data=data,
                       layers=None,
                       image=self.of + '_spectrogram.png')

        return mgf