def as_figure(self, dpi=300, autoshow=True, title=None, export_png=True): """ Creates a time-aligned figure from all the elements in the MgList. Args: dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300. autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True. title (str, optional): Optionally add a title to the figure. Defaults to None (no title). export_png (bool, optional): Whether to export a png image of the resulting figure automatically. Defaults to True. Returns: MgFigure: The MgFigure with all the elements from the MgList as layers. """ import os import librosa import librosa.display import matplotlib.pyplot as plt import matplotlib.image as mpimg import matplotlib import numpy as np there_were_layers, first_slot_was_img, img_to_redo = None, None, None def count_elems(elems_list, elem_count): """ Counts the all elements in a list recursively. Args: elems_list (list): The list to count. elem_count (int): The current count. (Pass 0 on the top level.) Returns: int: The number of elements in `elems_list`. """ _count = elem_count for obj in elems_list: if type(obj) == MgImage: _count += 1 elif type(obj) == MgFigure: if obj.figure_type == 'audio.tempogram': _count += 2 elif obj.figure_type == 'audio.descriptors': _count += 3 elif obj.figure_type == 'audio.spectrogram': _count += 1 elif obj.figure_type == 'audio.waveform': _count += 1 elif obj.figure_type == 'layers': _count = count_elems(obj.layers, _count) elif type(obj) == MgList: _count = count_elems(obj.objectlist, _count) return _count elem_count = count_elems(self.objectlist, 0) def build_figure(elems_list, elem_count, fig, ax, index_of_first_plot, plot_counter, of): """ Recursively crawls through the list of objects, and builds a single top-level figure from them. Args: elems_list (list): List of MgImage, MgFigure or MgList objects. elem_count (int): The total number of subplots to make. fig (matplotlib.pyplot.figure): The figure to fill. ax (list): The list of subplots (or their placeholders). index_of_first_plot (int): The index of the first plot. plot_counter (int): The running count of subplots (increments while crawling through all levels and building layers). of (str): The "running" string for the final output file name (each subplot increments it). Returns: str: The final output file name in the current level. int: The final count of subplots including the current level. bool: Whether there were deeper levels inside the current one. bool: Whether the first slot in the figure will come from an MgImage. str: The path to the image from the MgImage on the first slot. """ there_were_layers, first_slot_was_img, img_to_redo = None, None, None for obj in elems_list: if type(obj) == MgImage: if plot_counter == 0: first_slot_was_img = True img_to_redo = obj.filename ax[plot_counter] = fig.add_subplot(elem_count, 1, plot_counter + 1) ax[plot_counter].imshow(mpimg.imread(obj.filename)) ax[plot_counter].set_aspect('auto') ax[plot_counter].axes.xaxis.set_visible(False) ax[plot_counter].axes.yaxis.set_visible(False) # add title based on content last_tag = os.path.splitext(obj.filename)[0].split('_')[-1] if last_tag == 'mgx': ax[plot_counter].set(title='Motiongram X') elif last_tag == 'mgy': ax[plot_counter].set(title='Motiongram Y') elif last_tag == 'vgx': ax[plot_counter].set(title='Videogram X') elif last_tag == 'vgy': ax[plot_counter].set(title='Videogram Y') else: ax[plot_counter].set( title=os.path.basename(obj.filename)) # increment output filename if plot_counter == 0: of = os.path.splitext(obj.filename)[0] else: of += '_' of += os.path.splitext(obj.filename)[0].split('_')[-1] plot_counter += 1 elif type(obj) == MgFigure: first_plot = False if index_of_first_plot == None: index_of_first_plot = plot_counter # 0-based! first_plot = True if obj.figure_type == 'audio.tempogram': # increment output filename if plot_counter == 0: of = obj.data['of'] + '_tempogram' else: of += '_tempogram' if first_plot: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1) else: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) # make plot for onset strength ax[plot_counter].plot(obj.data['times'], obj.data['onset_env'], label='Onset strength') ax[plot_counter].label_outer() ax[plot_counter].legend(frameon=True) plot_counter += 1 # make plot for tempogram ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) librosa.display.specshow( obj.data['tempogram'], sr=obj.data['sr'], hop_length=obj.data['hop_size'], x_axis='time', y_axis='tempo', cmap='magma', ax=ax[plot_counter]) ax[plot_counter].axhline( obj.data['tempo'], color='w', linestyle='--', alpha=1, label='Estimated tempo={:g}'.format( obj.data['tempo'])) ax[plot_counter].legend(loc='upper right') ax[plot_counter].set(title='Tempogram') plot_counter += 1 elif obj.figure_type == 'audio.descriptors': # increment output filename if plot_counter == 0: of = obj.data['of'] + '_descriptors' else: of += '_descriptors' if first_plot: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1) else: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) # make plot for rms ax[plot_counter].semilogy(obj.data['times'], obj.data['rms'][0], label='RMS Energy') ax[plot_counter].legend(loc='upper right') plot_counter += 1 # make plot for flatness ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) ax[plot_counter].plot(obj.data['times'], obj.data['flatness'].T, label='Flatness', color='y') ax[plot_counter].legend(loc='upper right') plot_counter += 1 # make plot for spectrogram, centroid, bandwidth and rolloff ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) librosa.display.specshow( librosa.power_to_db(obj.data['S'], ref=np.max, top_db=120), sr=obj.data['sr'], y_axis='mel', fmax=obj.data['sr'] / 2, x_axis='time', hop_length=obj.data['hop_size'], ax=ax[plot_counter]) # get rid of "default" ticks ax[plot_counter].yaxis.set_minor_locator( matplotlib.ticker.NullLocator()) plot_xticks = np.arange(0, obj.data['length'] + 0.1, obj.data['length'] / 20) ax[plot_counter].set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [250] freq = 500 while freq < obj.data['sr'] / 2: freq_ticks.append(freq) freq *= 1.5 freq_ticks = [round(elem, -1) for elem in freq_ticks] freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax[plot_counter].set(yticks=(freq_ticks)) ax[plot_counter].set(yticklabels=(freq_ticks_labels)) ax[plot_counter].fill_between( obj.data['times'], obj.data['cent'][0] - obj.data['spec_bw'][0], obj.data['cent'][0] + obj.data['spec_bw'][0], alpha=0.5, label='Centroid +- bandwidth') ax[plot_counter].plot(obj.data['times'], obj.data['cent'].T, label='Centroid', color='y') ax[plot_counter].plot( obj.data['times'], obj.data['rolloff'][0], label='Roll-off frequency (0.99)') ax[plot_counter].plot( obj.data['times'], obj.data['rolloff_min'][0], color='r', label='Roll-off frequency (0.01)') ax[plot_counter].legend(loc='upper right') plot_counter += 1 elif obj.figure_type == 'audio.spectrogram': # increment output filename if plot_counter == 0: of = obj.data['of'] + '_spectrogram' else: of += '_spectrogram' if first_plot: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1) else: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) librosa.display.specshow( librosa.power_to_db(obj.data['S'], ref=np.max, top_db=120), sr=obj.data['sr'], y_axis='mel', fmax=obj.data['sr'] / 2, x_axis='time', hop_length=obj.data['hop_size'], ax=ax[plot_counter]) # get rid of "default" ticks ax[plot_counter].yaxis.set_minor_locator( matplotlib.ticker.NullLocator()) plot_xticks = np.arange(0, obj.data['length'] + 0.1, obj.data['length'] / 20) ax[plot_counter].set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [250] freq = 500 while freq < obj.data['sr'] / 2: freq_ticks.append(freq) freq *= 1.5 freq_ticks = [round(elem, -1) for elem in freq_ticks] freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax[plot_counter].set(yticks=(freq_ticks)) ax[plot_counter].set(yticklabels=(freq_ticks_labels)) ax[plot_counter].set(title='Spectrogram') plot_counter += 1 elif obj.figure_type == 'audio.waveform': # increment output filename if plot_counter == 0: of = obj.data['of'] + '_waveform' else: of += '_waveform' if first_plot: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1) else: ax[plot_counter] = fig.add_subplot( elem_count, 1, plot_counter + 1, sharex=ax[index_of_first_plot]) librosa.display.waveplot(obj.data['y'], sr=obj.data['sr'], ax=ax[plot_counter]) plot_xticks = np.arange(0, obj.data['length'] + 0.1, obj.data['length'] / 20) ax[plot_counter].set(xticks=plot_xticks) ax[plot_counter].set(title='Waveform') plot_counter += 1 elif obj.figure_type == 'layers': there_were_layers = True if plot_counter == 0: of, plot_counter, _, first_slot_was_img, img_to_redo = build_figure( obj.layers, elem_count, fig, ax, index_of_first_plot, plot_counter, of) else: of, plot_counter, _, _, _ = build_figure( obj.layers, elem_count, fig, ax, index_of_first_plot, plot_counter, of) elif type(obj) == MgList: of, plot_counter, _, _, _ = build_figure( obj.objectlist, elem_count, fig, ax, index_of_first_plot, plot_counter, of) return of, plot_counter, there_were_layers, first_slot_was_img, img_to_redo fig = plt.figure(dpi=dpi, figsize=(10, 3 * elem_count)) # make sure background is white fig.patch.set_facecolor('white') fig.patch.set_alpha(1) if title != None and type(title) == str: # add title fig.suptitle(title, fontsize=16, y=0.99) ax = [None for elem in range(elem_count)] index_of_first_plot = None plot_counter = 0 of = None of, plot_counter, there_were_layers, first_slot_was_img, img_to_redo = build_figure( self.objectlist, elem_count, fig, ax, index_of_first_plot, plot_counter, of) # workaround matplotlib bug: if there was a layered figure where the first slot shows an image, delete and redo that slot if first_slot_was_img and there_were_layers: ax[0].remove() ax[0] = fig.add_subplot(elem_count, 1, 1) ax[0].imshow(mpimg.imread(img_to_redo)) ax[0].set_aspect('auto') ax[0].axes.xaxis.set_visible(False) ax[0].axes.yaxis.set_visible(False) # add title based on content last_tag = os.path.splitext(img_to_redo)[0].split('_')[-1] if last_tag == 'mgx': ax[0].set(title='Motiongram X') elif last_tag == 'mgy': ax[0].set(title='Motiongram Y') elif last_tag == 'vgx': ax[0].set(title='Videogram X') elif last_tag == 'vgy': ax[0].set(title='Videogram Y') else: ax[0].set(title=os.path.basename(img_to_redo)) fig.tight_layout() # save figure as png if export_png: plt.savefig(of + '.png', format='png', transparent=False) if not autoshow: plt.close() # create MgFigure mgf = MgFigure(figure=fig, figure_type='layers', data=None, layers=self.objectlist, image=of + '.png') return mgf
def mg_audio_tempogram(filename=None, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True): """ Renders a figure with a plots of onset strength and tempogram of the video/audio file. Parameters ---------- - filename : str, optional Path to the audio/video file to be processed. - window_size : int, optional The size of the FFT frame. Default is 4096. - overlap : int, optional The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256 - mel_filters : int, optional The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Default is 512. - power : int, float, optional The steepness of the curve for the color mapping. Default is 2. - dpi : int, optional Image quality of the rendered figure. Default is 300 DPI. - autoshow: bool, optional Whether to show the resulting figure automatically. Default is `True` (figure is shown). Outputs ------- - `filename` + '_tempogram.png' Returns ------- - MgFigure An MgFigure object referring to the internal figure and its data. """ if filename == None: print("No filename was given.") return if not has_audio(filename): print('The video has no audio track.') return of, fex = os.path.splitext(filename) hop_size = int(window_size / overlap) y, sr = librosa.load(filename, sr=None) oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_size) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_size) # Estimate the global tempo for display purposes tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr, hop_length=hop_size)[0] fig, ax = plt.subplots(nrows=2, figsize=(10, 6), dpi=dpi, sharex=True) times = librosa.times_like(oenv, sr=sr, hop_length=hop_size) ax[0].plot(times, oenv, label='Onset strength') ax[0].label_outer() ax[0].legend(frameon=True) librosa.display.specshow(tempogram, sr=sr, hop_length=hop_size, x_axis='time', y_axis='tempo', cmap='magma', ax=ax[1]) ax[1].axhline(tempo, color='w', linestyle='--', alpha=1, label='Estimated tempo={:g}'.format(tempo)) ax[1].legend(loc='upper right') ax[1].set(title='Tempogram') plt.savefig('%s_tempogram.png' % of, format='png') if not autoshow: plt.close() # create MgFigure data = { "hop_size": hop_size, "sr": sr, "of": of, "times": times, "onset_env": oenv, "tempogram": tempogram, "tempo": tempo } mgf = MgFigure(figure=fig, figure_type='audio.tempogram', data=data, layers=None, image=of + '_tempogram.png') return mgf
def mg_audio_descriptors(filename=None, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True): """ Renders a figure of plots showing spectral/loudness descriptors, including RMS energy, spectral flatness, centroid, bandwidth, rolloff of the video/audio file. Parameters ---------- - filename : str, optional Path to the audio/video file to be processed. - window_size : int, optional The size of the FFT frame. Default is 4096. - overlap : int, optional The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256 - mel_filters : int, optional The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Default is 512. - power : int, float, optional The steepness of the curve for the color mapping. Default is 2. - dpi : int, optional Image quality of the rendered figure. Default is 300 DPI. - autoshow: bool, optional Whether to show the resulting figure automatically. Default is `True` (figure is shown). Outputs ------- - `filename` + '_descriptors.png' Returns ------- - MgFigure An MgFigure object referring to the internal figure and its data. """ if filename == None: print("No filename was given.") return if not has_audio(filename): print('The video has no audio track.') return of, fex = os.path.splitext(filename) hop_size = int(window_size / overlap) y, sr = librosa.load(filename, sr=None) cent = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=window_size, hop_length=hop_size) spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=window_size, hop_length=hop_size) flatness = librosa.feature.spectral_flatness(y=y, n_fft=window_size, hop_length=hop_size) rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=window_size, hop_length=hop_size, roll_percent=0.99) rolloff_min = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=window_size, hop_length=hop_size, roll_percent=0.01) rms = librosa.feature.rms(y=y, frame_length=window_size, hop_length=hop_size) S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_filters, fmax=sr / 2, n_fft=window_size, hop_length=hop_size, power=power) fig, ax = plt.subplots(figsize=(12, 8), dpi=dpi, nrows=3, sharex=True) img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max, top_db=120), sr=sr, y_axis='mel', fmax=sr / 2, x_axis='time', hop_length=hop_size, ax=ax[2]) # get rid of "default" ticks ax[2].yaxis.set_minor_locator(matplotlib.ticker.NullLocator()) ax[0].set(title=os.path.basename(filename)) length = get_length(filename) plot_xticks = np.arange(0, length + 0.1, length / 20) ax[2].set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [250] freq = 500 while freq < sr / 2: freq_ticks.append(freq) freq *= 1.5 freq_ticks = [round(elem, -1) for elem in freq_ticks] freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax[2].set(yticks=(freq_ticks)) ax[2].set(yticklabels=(freq_ticks_labels)) times = librosa.times_like(cent, sr=sr, n_fft=window_size, hop_length=hop_size) ax[2].fill_between(times, cent[0] - spec_bw[0], cent[0] + spec_bw[0], alpha=0.5, label='Centroid +- bandwidth') ax[2].plot(times, cent.T, label='Centroid', color='y') ax[2].plot(times, rolloff[0], label='Roll-off frequency (0.99)') ax[2].plot(times, rolloff_min[0], color='r', label='Roll-off frequency (0.01)') ax[2].legend(loc='upper right') ax[1].plot(times, flatness.T, label='Flatness', color='y') ax[1].legend(loc='upper right') ax[0].semilogy(times, rms[0], label='RMS Energy') ax[0].legend(loc='upper right') plt.tight_layout() plt.savefig('%s_descriptors.png' % of, format='png') if not autoshow: plt.close() # create MgFigure data = { "hop_size": hop_size, "sr": sr, "of": of, "times": times, "S": S, "length": length, "cent": cent, "spec_bw": spec_bw, "rolloff": rolloff, "rolloff_min": rolloff_min, "flatness": flatness, "rms": rms } mgf = MgFigure(figure=fig, figure_type='audio.descriptors', data=data, layers=None, image=of + '_descriptors.png') return mgf
def mg_audio_spectrogram(filename=None, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True): """ Renders a figure showing the mel-scaled spectrogram of the video/audio file. Parameters ---------- - filename : str, optional Path to the audio/video file to be processed. - window_size : int, optional The size of the FFT frame. Default is 4096. - overlap : int, optional The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256 - mel_filters : int, optional The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Default is 512. - power : int, float, optional The steepness of the curve for the color mapping. Default is 2. - dpi : int, optional Image quality of the rendered figure. Default is 300 DPI. - autoshow: bool, optional Whether to show the resulting figure automatically. Default is `True` (figure is shown). Outputs ------- - `filename` + '_spectrogram.png' Returns ------- - MgFigure An MgFigure object referring to the internal figure and its data. """ if filename == None: print("No filename was given.") return if not has_audio(filename): print('The video has no audio track.') return of, fex = os.path.splitext(filename) hop_size = int(window_size / overlap) y, sr = librosa.load(filename, sr=None) S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_filters, fmax=sr / 2, n_fft=window_size, hop_length=hop_size, power=power) fig, ax = plt.subplots(figsize=(12, 6), dpi=dpi) img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max, top_db=120), sr=sr, y_axis='mel', fmax=sr / 2, x_axis='time', hop_length=hop_size, ax=ax) colorbar_ticks = range(-120, 1, 10) fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks) # get rid of "default" ticks ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator()) ax.set(title=os.path.basename(filename)) length = get_length(filename) plot_xticks = np.arange(0, length + 0.1, length / 20) ax.set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [] freq = 100 while freq < sr / 2: freq_ticks.append(freq) freq *= 1.3 freq_ticks = [round(elem, -2) for elem in freq_ticks] freq_ticks.append(sr / 2) freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax.set(yticks=(freq_ticks)) ax.set(yticklabels=(freq_ticks_labels)) plt.tight_layout() plt.savefig('%s_spectrogram.png' % of, format='png') if not autoshow: plt.close() # create MgFigure data = {"hop_size": hop_size, "sr": sr, "of": of, "S": S, "length": length} mgf = MgFigure(figure=fig, figure_type='audio.spectrogram', data=data, layers=None, image=of + '_spectrogram.png') return mgf
def mg_audio_tempogram(filename=None, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True, title=None): """ Renders a figure with a plots of onset strength and tempogram of the video/audio file. Args: filename (str, optional): Path to the audio/video file to be processed. Defaults to None. window_size (int, optional): The size of the FFT frame. Defaults to 4096. overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8. mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512. power (float, optional): The steepness of the curve for the color mapping. Defaults to 2. dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300. autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True. title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title. Outputs: `filename`_tempogram.png Returns: MgFigure: An MgFigure object referring to the internal figure and its data. """ if filename == None: print("No filename was given.") return if not has_audio(filename): print('The video has no audio track.') return of, fex = os.path.splitext(filename) hop_size = int(window_size / overlap) y, sr = librosa.load(filename, sr=None) oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_size) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_size) # Estimate the global tempo for display purposes tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr, hop_length=hop_size)[0] fig, ax = plt.subplots(nrows=2, figsize=(10, 6), dpi=dpi, sharex=True) # make sure background is white fig.patch.set_facecolor('white') fig.patch.set_alpha(1) # add title if title == None: title = os.path.basename(filename) fig.suptitle(title, fontsize=16) times = librosa.times_like(oenv, sr=sr, hop_length=hop_size) ax[0].plot(times, oenv, label='Onset strength') ax[0].label_outer() ax[0].legend(frameon=True) librosa.display.specshow(tempogram, sr=sr, hop_length=hop_size, x_axis='time', y_axis='tempo', cmap='magma', ax=ax[1]) ax[1].axhline(tempo, color='w', linestyle='--', alpha=1, label='Estimated tempo={:g}'.format(tempo)) ax[1].legend(loc='upper right') ax[1].set(title='Tempogram') plt.savefig('%s_tempogram.png' % of, format='png', transparent=False) if not autoshow: plt.close() # create MgFigure data = { "hop_size": hop_size, "sr": sr, "of": of, "times": times, "onset_env": oenv, "tempogram": tempogram, "tempo": tempo } mgf = MgFigure(figure=fig, figure_type='audio.tempogram', data=data, layers=None, image=of + '_tempogram.png') return mgf
def spectrogram(self, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True, title=None): """ Renders a figure showing the mel-scaled spectrogram of the video/audio file. Args: window_size (int, optional): The size of the FFT frame. Defaults to 4096. overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8. mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512. power (float, optional): The steepness of the curve for the color mapping. Defaults to 2. dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300. autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True. title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title. Outputs: `self.filename`_spectrogram.png Returns: MgFigure: An MgFigure object referring to the internal figure and its data. """ if not has_audio(self.filename): print('The video has no audio track.') return hop_size = int(window_size / overlap) y, sr = librosa.load(self.filename, sr=None) S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_filters, fmax=sr / 2, n_fft=window_size, hop_length=hop_size, power=power) fig, ax = plt.subplots(figsize=(12, 6), dpi=300) # make sure background is white fig.patch.set_facecolor('white') fig.patch.set_alpha(1) # add title if title == None: title = os.path.basename(self.filename) fig.suptitle(title, fontsize=16) img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max, top_db=120), sr=sr, y_axis='mel', fmax=sr / 2, x_axis='time', hop_length=hop_size, ax=ax) print(type(img)) colorbar_ticks = range(-120, 1, 10) fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks) # get rid of "default" ticks ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator()) # ax.set(title=os.path.basename(self.filename)) length = get_length(self.filename) plot_xticks = np.arange(0, length + 0.1, length / 20) ax.set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [] freq = 100 while freq < sr / 2: freq_ticks.append(freq) freq *= 1.3 freq_ticks = [round(elem, -2) for elem in freq_ticks] freq_ticks.append(sr / 2) freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax.set(yticks=(freq_ticks)) ax.set(yticklabels=(freq_ticks_labels)) plt.tight_layout() plt.savefig('%s_spectrogram.png' % self.of, format='png', transparent=False) if not autoshow: plt.close() # create MgFigure data = { "hop_size": hop_size, "sr": sr, "of": self.of, "S": S, "length": length } mgf = MgFigure(figure=fig, figure_type='audio.spectrogram', data=data, layers=None, image=self.of + '_spectrogram.png') return mgf