def skip_frames_ffmpeg(filename, skip=0): if skip == 0: return import os of, fex = os.path.splitext(filename) pts_ratio = 1 / (skip + 1) atempo_ratio = skip + 1 outname = of + '_skip' + fex if has_audio(filename): cmd = [ 'ffmpeg', '-y', '-i', filename, '-filter_complex', f'[0:v]setpts={pts_ratio}*PTS[v];[0:a]atempo={atempo_ratio}[a]', '-map', '[v]', '-map', '[a]', '-q:v', '3', '-shortest', outname ] else: cmd = [ 'ffmpeg', '-y', '-i', filename, '-filter_complex', f'[0:v]setpts={pts_ratio}*PTS[v]', '-map', '[v]', '-q:v', '3', outname ] ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Skipping frames:')
def mg_cropvideo_ffmpeg(filename, crop_movement='Auto', motion_box_thresh=0.1, motion_box_margin=12): """ Crops the video using ffmpeg. Args: filename (str): Path to the video file. crop_movement (str, optional): 'Auto' finds the bounding box that contains the total motion in the video. Motion threshold is given by motion_box_thresh. 'Manual' opens up a simple GUI that is used to crop the video manually by looking at the first frame. Defaults to 'Auto'. motion_box_thresh (float, optional): Only meaningful if `crop_movement='Auto'`. Takes floats between 0 and 1, where 0 includes all the motion and 1 includes none. Defaults to 0.1. motion_box_margin (int, optional): Only meaningful if `crop_movement='Auto'`. Adds margin to the bounding box. Defaults to 12. Returns: str: Path to the cropped video. """ global w, h, x, y pb = MgProgressbar(total=get_length(filename), prefix='Rendering cropped video:') if crop_movement.lower() == 'manual': scale_ratio = get_screen_video_ratio(filename) width, height = get_widthheight(filename) scaled_width, scaled_height = [ int(elem * scale_ratio) for elem in [width, height] ] first_frame_as_image = get_first_frame_as_image(filename, pict_format='.jpg') # Cropping UI moved to another subprocess to avoid cv2.waitKey crashing Python with segmentation fault on Linux in Terminal import threading x = threading.Thread(target=run_cropping_window, args=(first_frame_as_image, scale_ratio, scaled_width, scaled_height)) # run_cropping_window(first_frame_as_image, scale_ratio, scaled_width, scaled_height) x.start() x.join() elif crop_movement.lower() == 'auto': w, h, x, y = find_motion_box_ffmpeg( filename, motion_box_thresh=motion_box_thresh, motion_box_margin=motion_box_margin) cropped_video = crop_ffmpeg(filename, w, h, x, y) if crop_movement.lower() == 'manual': cv2.destroyAllWindows() os.remove(first_frame_as_image) return cropped_video
def contrast_brightness_ffmpeg(filename, contrast=0, brightness=0): """ Applies contrast and brightness adjustments on the source video using ffmpeg. Args: filename (str): Path to the video to process. contrast (int or float, optional): Increase or decrease contrast. Values range from -100 to 100. Defaults to 0. brightness (int or float, optional): Increase or decrease brightness. Values range from -100 to 100. Defaults to 0. Outputs: `filename`_cb.<file extension> """ if contrast == 0 and brightness == 0: return import os import numpy as np of, fex = os.path.splitext(filename) # keeping values in sensible range contrast = np.clip(contrast, -100.0, 100.0) brightness = np.clip(brightness, -100.0, 100.0) # ranges are "handpicked" so that the results are close to the results of contrast_brightness_cv2 (deprecated) if contrast == 0: p_saturation, p_contrast, p_brightness = 0, 0, 0 elif contrast > 0: p_saturation = scale_num(contrast, 0, 100, 1, 1.9) p_contrast = scale_num(contrast, 0, 100, 1, 2.3) p_brightness = scale_num(contrast, 0, 100, 0, 0.04) elif contrast < 0: p_saturation = scale_num(contrast, 0, -100, 1, 0) p_contrast = scale_num(contrast, 0, -100, 1, 0) p_brightness = 0 if brightness != 0: p_brightness += brightness / 100 outname = of + '_cb' + fex cmd = [ 'ffmpeg', '-y', '-i', filename, '-vf', f'eq=saturation={p_saturation}:contrast={p_contrast}:brightness={p_brightness}', '-q:v', '3', "-c:a", "copy", outname ] ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Adjusting contrast and brightness:')
def contrast_brightness_ffmpeg(filename, contrast=0, brightness=0): if contrast == 0 and brightness == 0: return import os import numpy as np of, fex = os.path.splitext(filename) # keeping values in sensible range contrast = np.clip(contrast, -100.0, 100.0) brightness = np.clip(brightness, -100.0, 100.0) # ranges are "handpicked" so that the results are close to the results of mg_contrast_brightness if contrast == 0: p_saturation, p_contrast, p_brightness = 0, 0, 0 elif contrast > 0: p_saturation = scale_num(contrast, 0, 100, 1, 1.9) p_contrast = scale_num(contrast, 0, 100, 1, 2.3) p_brightness = scale_num(contrast, 0, 100, 0, 0.04) elif contrast < 0: p_saturation = scale_num(contrast, 0, -100, 1, 0) p_contrast = scale_num(contrast, 0, -100, 1, 0) p_brightness = 0 if brightness != 0: p_brightness += brightness / 100 outname = of + '_cb' + fex cmd = [ 'ffmpeg', '-y', '-i', filename, '-vf', f'eq=saturation={p_saturation}:contrast={p_contrast}:brightness={p_brightness}', '-q:v', '3', "-c:a", "copy", outname ] ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Adjusting contrast and brightness:')
def skip_frames_ffmpeg(filename, skip=0): """ Time-shrinks the video by skipping (discarding) every n frames determined by `skip`. To discard half of the frames (ie. double the speed of the video) use `skip=1`. Args: filename (str): Path to the video to process. skip (int, optional): Discard `skip` frames before keeping one. Defaults to 0. Outputs: `filename`_skip.<file extension> """ if skip == 0: return import os of, fex = os.path.splitext(filename) pts_ratio = 1 / (skip + 1) atempo_ratio = skip + 1 outname = of + '_skip' + fex if has_audio(filename): cmd = [ 'ffmpeg', '-y', '-i', filename, '-filter_complex', f'[0:v]setpts={pts_ratio}*PTS[v];[0:a]atempo={atempo_ratio}[a]', '-map', '[v]', '-map', '[a]', '-q:v', '3', '-shortest', outname ] else: cmd = [ 'ffmpeg', '-y', '-i', filename, '-filter_complex', f'[0:v]setpts={pts_ratio}*PTS[v]', '-map', '[v]', '-q:v', '3', outname ] ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Skipping frames:')
def videograms_ffmpeg(self): """ Usees FFMPEG as backend. Averages videoframes by axes, and creates two images of the horizontal-axis and vertical-axis stacks. In these stacks, a single row or column corresponds to a frame from the source video, and the index of the row or column corresponds to the index of the source frame. Outputs ------- - `filename`_vgx.png A horizontal videogram of the source video. - `filename`_vgy.png A vertical videogram of the source video. Returns ------- - list(MgImage, MgImage) A tuple with the string paths to the horizontal and vertical videograms respectively. """ width, height = get_widthheight(self.filename) framecount = get_framecount(self.filename) length = get_length(self.filename) outname = self.of + '_vgy.png' cmd = ['ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf', f'scale=1:{height}:sws_flags=area,normalize,tile={framecount}x1', outname] ffmpeg_cmd(cmd, length, pb_prefix="Rendering horizontal videogram:") outname = self.of + '_vgx.png' cmd = ['ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf', f'scale={width}:1:sws_flags=area,normalize,tile=1x{framecount}', outname] ffmpeg_cmd(cmd, length, pb_prefix="Rendering vertical videogram:") return MgList([MgImage(self.of+'_vgx.png'), MgImage(self.of+'_vgy.png')])
def mg_cropvideo_ffmpeg( filename, crop_movement='Auto', motion_box_thresh=0.1, motion_box_margin=12): global frame_mask, drawing, g_val, x_start, x_stop, y_start, y_stop x_start, y_start = -1, -1 x_stop, y_stop = -1, -1 drawing = False pb = MgProgressbar(total=get_length(filename), prefix='Rendering cropped video:') if crop_movement.lower() == 'manual': scale_ratio = get_screen_video_ratio(filename) width, height = get_widthheight(filename) scaled_width, scaled_height = [ int(elem * scale_ratio) for elem in [width, height]] first_frame_as_image = get_first_frame_as_image( filename, pict_format='.jpg') frame = cv2.imread(first_frame_as_image) frame_scaled = cv2.resize(frame, (scaled_width, scaled_height)) frame_mask = np.zeros(frame_scaled.shape) name_str = 'Draw rectangle and press "C" to crop' cv2.namedWindow(name_str, cv2.WINDOW_AUTOSIZE) cv2.setMouseCallback(name_str, draw_rectangle, param=frame_scaled) g_val = 220 while(1): cv2.imshow(name_str, frame_scaled*(frame_mask != g_val) + frame_mask.astype(np.uint8)) k = cv2.waitKey(1) & 0xFF if k == ord('c') or k == ord('C'): break cv2.destroyAllWindows() if x_stop < x_start: temp = x_start x_start = x_stop x_stop = temp if y_stop < y_start: temp = y_start y_start = y_stop y_stop = temp w, h, x, y = x_stop - x_start, y_stop - y_start, x_start, y_start if scale_ratio < 1: w, h, x, y = [int(elem / scale_ratio) for elem in [w, h, x, y]] elif crop_movement.lower() == 'auto': w, h, x, y = find_motion_box_ffmpeg( filename, motion_box_thresh=motion_box_thresh, motion_box_margin=motion_box_margin) cropped_video = crop_ffmpeg(filename, w, h, x, y) if crop_movement.lower() == 'manual': cv2.destroyAllWindows() os.remove(first_frame_as_image) return cropped_video
def find_motion_box_ffmpeg(filename, motion_box_thresh=0.1, motion_box_margin=12): import subprocess import os import matplotlib import numpy as np total_time = get_length(filename) width, height = get_widthheight(filename) crop_str = '' thresh_color = matplotlib.colors.to_hex( [motion_box_thresh, motion_box_thresh, motion_box_thresh]) thresh_color = '0x' + thresh_color[1:] pb = MgProgressbar(total=total_time, prefix='Finding area of motion:') command = ['ffmpeg', '-y', '-i', filename, '-f', 'lavfi', '-i', f'color={thresh_color},scale={width}:{height}', '-f', 'lavfi', '-i', f'color=black,scale={width}:{height}', '-f', 'lavfi', '-i', f'color=white,scale={width}:{height}', '-lavfi', 'format=gray,tblend=all_mode=difference,threshold,cropdetect=round=2:limit=0:reset=0', '-f', 'null', '/dev/null'] process = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) try: while True: out = process.stdout.readline() if out == '': process.wait() break else: out_list = out.split() crop_and_time = sorted( [elem for elem in out_list if elem.startswith('t:') or elem.startswith('crop=')]) if len(crop_and_time) != 0: crop_str = crop_and_time[0] time_float = float(crop_and_time[1][2:]) pb.progress(time_float) pb.progress(total_time) crop_width, crop_height, crop_x, crop_y = [ int(elem) for elem in crop_str[5:].split(':')] motion_box_margin = roundup(motion_box_margin, 4) crop_width = np.clip(crop_width+motion_box_margin, 4, width) crop_height = np.clip(crop_height+motion_box_margin, 4, height) crop_x = np.clip(crop_x-(motion_box_margin/2), 4, width) crop_y = np.clip(crop_y-(motion_box_margin/2), 4, height) if crop_x + crop_width > width: crop_x = width - crop_width else: crop_x = np.clip(crop_x, 0, width) if crop_y + crop_height > height: crop_y = height - crop_height else: crop_y = np.clip(crop_y, 0, height) crop_width, crop_height, crop_x, crop_y = [ int(elem) for elem in [crop_width, crop_height, crop_x, crop_y]] return crop_width, crop_height, crop_x, crop_y except KeyboardInterrupt: try: process.terminate() except OSError: pass process.wait() raise KeyboardInterrupt
def history_ffmpeg(self, filename='', history_length=10, weights=1, normalize=False, norm_strength=1, norm_smooth=0): """ This function creates a video where each frame is the average of the n previous frames, where n is determined by `history_length`. The history frames are summed up and normalized, and added to the current frame to show the history. Based on ffmpeg. Parameters ---------- - filename : str, optional Path to the input video file. If not specified the video file pointed to by the MgObject is used. - history_length : int, optional Default is 10. Number of frames to be saved in the history tail. - weights: int, float, str, list, optional Default is 1. Defines the weight or weights applied to the frames in the history tail. If given as list the first element in the list will correspond to the weight of the newest frame in the tail. If given as a str - like "3 1.2 1" - it will be automatically converted to a list - like [3, 1.2, 1]. - normalize: bool, optional Default is `False` (no normalization). If `True`, the history video will be normalized. This can be useful when processing motion (frame difference) videos. - norm_strength: int, float, optional Default is 1. Defines the strength of the normalization where 1 represents full strength. - norm_smooth: int, optional Default is 0 (no smoothing). Defines the number of previous frames to use for temporal smoothing. The input range of each channel is smoothed using a rolling average over the current frame and the `norm_smooth` previous frames. Outputs ------- - `filename`_history.avi Returns ------- - MgObject A new MgObject pointing to the output '_history' video file. """ if filename == '': filename = self.filename of, fex = os.path.splitext(filename) if type(weights) in [int, float]: weights_map = np.ones(history_length) weights_map[-1] = weights str_weights = ' '.join([str(weight) for weight in weights_map]) elif type(weights) == list: typecheck_list = [type(item) in [int, float] for item in weights] if False in typecheck_list: raise ParameterError( 'Found wrong type(s) in the list of weights. Use ints and floats.' ) elif len(weights) > history_length: raise ParameterError( 'history_length must be greater than or equal to the number of weights specified in weights.' ) else: weights_map = np.ones(history_length - len(weights)) weights.reverse() weights_map = list(weights_map) weights_map += weights str_weights = ' '.join([str(weight) for weight in weights_map]) elif type(weights) == str: weights_as_list = weights.split() typecheck_list = [ type(item) in [int, float] for item in weights_as_list ] if False in typecheck_list: raise ParameterError( 'Found wrong type(s) in the list of weights. Use ints and floats.' ) elif len(weights) > history_length: raise ParameterError( 'history_length must be greater than or equal to the number of weights specified in weights.' ) else: weights_map = np.ones(history_length - len(weights_as_list)) weights_as_list.reverse() weights_map += weights_as_list str_weights = ' '.join([str(weight) for weight in weights_map]) else: raise ParameterError( 'Wrong type used for weights. Use int, float, str, or list.') if type(normalize) != bool: raise ParameterError('Wrong type used for normalize. Use only bool.') if normalize: if type(norm_strength) not in [int, float]: raise ParameterError( 'Wrong type used for norm_strength. Use int or float.') if type(norm_smooth) != int: raise ParameterError( 'Wrong type used for norm_smooth. Use only int.') outname = of + '_history' + fex if normalize: if norm_smooth != 0: cmd = [ 'ffmpeg', '-y', '-i', filename, '-filter_complex', f'tmix=frames={history_length}:weights={str_weights},normalize=independence=0:strength={norm_strength}:smoothing={norm_smooth}', '-q:v', '3', '-c:a', 'copy', outname ] else: cmd = [ 'ffmpeg', '-y', '-i', filename, '-filter_complex', f'tmix=frames={history_length}:weights={str_weights},normalize=independence=0:strength={norm_strength}', '-q:v', '3', '-c:a', 'copy', outname ] else: cmd = [ 'ffmpeg', '-y', '-i', filename, '-vf', f'tmix=frames={history_length}:weights={str_weights}', '-q:v', '3', '-c:a', 'copy', outname ] # success = ffmpeg_cmd(cmd, get_length(filename), # pb_prefix='Rendering history video:') ffmpeg_cmd(cmd, get_length(filename), pb_prefix='Rendering history video:') # if success: # destination_video = self.of + '_history' + self.fex # return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True) destination_video = self.of + '_history' + self.fex return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def videograms_ffmpeg(self): """ Renders horizontal and vertical videograms of the source video using ffmpeg. Averages videoframes by axes, and creates two images of the horizontal-axis and vertical-axis stacks. In these stacks, a single row or column corresponds to a frame from the source video, and the index of the row or column corresponds to the index of the source frame. Outputs: `self.filename`_vgx.png `self.filename`_vgy.png Returns: MgList(MgImage, MgImage): An MgList with the MgImage objects referring to the horizontal and vertical videograms respectively. """ width, height = get_widthheight(self.filename) framecount = get_framecount(self.filename) def calc_skipfactor(width, height, framecount): """ Helper function to calculate the necessary frame-skipping to avoid integer overflow. This makes sure that we can succesfully create videograms even on many-hours-long videos as well. Args: width (int): The width of the video. height (int): The height of the video. framecount (int): The number of frames in the video. Returns: list(int, int): The necessary dilation factors to apply on the video for the horizontal and vertical videograms, respectively. """ intmax = 2147483647 skipfactor_x = int( math.ceil(framecount * 8 / (intmax / (height + 128) - 1024))) skipfactor_y = int( math.ceil(framecount / (intmax / ((width * 8) + 1024) - 128))) return skipfactor_x, skipfactor_y testx, testy = calc_skipfactor(width, height, framecount) if testx > 1 or testy > 1: necessary_skipfactor = max([testx, testy]) print( f'{os.path.basename(self.filename)} is too large to process. Applying minimal skipping necessary...' ) skip_frames_ffmpeg(self.filename, skip=necessary_skipfactor - 1) shortened_file = self.of + '_skip' + self.fex framecount = get_framecount(shortened_file) length = get_length(shortened_file) outname = self.of + '_skip_vgy.png' cmd = [ 'ffmpeg', '-y', '-i', shortened_file, '-vf', f'scale=1:{height}:sws_flags=area,normalize,tile={framecount}x1', '-aspect', f'{framecount}:{height}', '-frames', '1', outname ] ffmpeg_cmd(cmd, length, stream=False, pb_prefix="Rendering horizontal videogram:") outname = self.of + '_skip_vgx.png' cmd = [ 'ffmpeg', '-y', '-i', shortened_file, '-vf', f'scale={width}:1:sws_flags=area,normalize,tile=1x{framecount}', '-aspect', f'{width}:{framecount}', '-frames', '1', outname ] ffmpeg_cmd(cmd, length, stream=False, pb_prefix="Rendering vertical videogram:") return MgList([ MgImage(self.of + '_skip_vgx.png'), MgImage(self.of + '_skip_vgy.png') ]) else: length = get_length(self.filename) outname = self.of + '_vgy.png' cmd = [ 'ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf', f'scale=1:{height}:sws_flags=area,normalize,tile={framecount}x1', '-aspect', f'{framecount}:{height}', outname ] ffmpeg_cmd(cmd, length, stream=False, pb_prefix="Rendering horizontal videogram:") outname = self.of + '_vgx.png' cmd = [ 'ffmpeg', '-y', '-i', self.filename, '-frames', '1', '-vf', f'scale={width}:1:sws_flags=area,normalize,tile=1x{framecount}', '-aspect', f'{width}:{framecount}', outname ] ffmpeg_cmd(cmd, length, stream=False, pb_prefix="Rendering vertical videogram:") return MgList( [MgImage(self.of + '_vgx.png'), MgImage(self.of + '_vgy.png')])
def mg_audio_descriptors(filename=None, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True): """ Renders a figure of plots showing spectral/loudness descriptors, including RMS energy, spectral flatness, centroid, bandwidth, rolloff of the video/audio file. Parameters ---------- - filename : str, optional Path to the audio/video file to be processed. - window_size : int, optional The size of the FFT frame. Default is 4096. - overlap : int, optional The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256 - mel_filters : int, optional The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Default is 512. - power : int, float, optional The steepness of the curve for the color mapping. Default is 2. - dpi : int, optional Image quality of the rendered figure. Default is 300 DPI. - autoshow: bool, optional Whether to show the resulting figure automatically. Default is `True` (figure is shown). Outputs ------- - `filename` + '_descriptors.png' Returns ------- - MgFigure An MgFigure object referring to the internal figure and its data. """ if filename == None: print("No filename was given.") return if not has_audio(filename): print('The video has no audio track.') return of, fex = os.path.splitext(filename) hop_size = int(window_size / overlap) y, sr = librosa.load(filename, sr=None) cent = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=window_size, hop_length=hop_size) spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=window_size, hop_length=hop_size) flatness = librosa.feature.spectral_flatness(y=y, n_fft=window_size, hop_length=hop_size) rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=window_size, hop_length=hop_size, roll_percent=0.99) rolloff_min = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=window_size, hop_length=hop_size, roll_percent=0.01) rms = librosa.feature.rms(y=y, frame_length=window_size, hop_length=hop_size) S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_filters, fmax=sr / 2, n_fft=window_size, hop_length=hop_size, power=power) fig, ax = plt.subplots(figsize=(12, 8), dpi=dpi, nrows=3, sharex=True) img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max, top_db=120), sr=sr, y_axis='mel', fmax=sr / 2, x_axis='time', hop_length=hop_size, ax=ax[2]) # get rid of "default" ticks ax[2].yaxis.set_minor_locator(matplotlib.ticker.NullLocator()) ax[0].set(title=os.path.basename(filename)) length = get_length(filename) plot_xticks = np.arange(0, length + 0.1, length / 20) ax[2].set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [250] freq = 500 while freq < sr / 2: freq_ticks.append(freq) freq *= 1.5 freq_ticks = [round(elem, -1) for elem in freq_ticks] freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax[2].set(yticks=(freq_ticks)) ax[2].set(yticklabels=(freq_ticks_labels)) times = librosa.times_like(cent, sr=sr, n_fft=window_size, hop_length=hop_size) ax[2].fill_between(times, cent[0] - spec_bw[0], cent[0] + spec_bw[0], alpha=0.5, label='Centroid +- bandwidth') ax[2].plot(times, cent.T, label='Centroid', color='y') ax[2].plot(times, rolloff[0], label='Roll-off frequency (0.99)') ax[2].plot(times, rolloff_min[0], color='r', label='Roll-off frequency (0.01)') ax[2].legend(loc='upper right') ax[1].plot(times, flatness.T, label='Flatness', color='y') ax[1].legend(loc='upper right') ax[0].semilogy(times, rms[0], label='RMS Energy') ax[0].legend(loc='upper right') plt.tight_layout() plt.savefig('%s_descriptors.png' % of, format='png') if not autoshow: plt.close() # create MgFigure data = { "hop_size": hop_size, "sr": sr, "of": of, "times": times, "S": S, "length": length, "cent": cent, "spec_bw": spec_bw, "rolloff": rolloff, "rolloff_min": rolloff_min, "flatness": flatness, "rms": rms } mgf = MgFigure(figure=fig, figure_type='audio.descriptors', data=data, layers=None, image=of + '_descriptors.png') return mgf
def mg_audio_spectrogram(filename=None, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True): """ Renders a figure showing the mel-scaled spectrogram of the video/audio file. Parameters ---------- - filename : str, optional Path to the audio/video file to be processed. - window_size : int, optional The size of the FFT frame. Default is 4096. - overlap : int, optional The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256 - mel_filters : int, optional The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Default is 512. - power : int, float, optional The steepness of the curve for the color mapping. Default is 2. - dpi : int, optional Image quality of the rendered figure. Default is 300 DPI. - autoshow: bool, optional Whether to show the resulting figure automatically. Default is `True` (figure is shown). Outputs ------- - `filename` + '_spectrogram.png' Returns ------- - MgFigure An MgFigure object referring to the internal figure and its data. """ if filename == None: print("No filename was given.") return if not has_audio(filename): print('The video has no audio track.') return of, fex = os.path.splitext(filename) hop_size = int(window_size / overlap) y, sr = librosa.load(filename, sr=None) S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_filters, fmax=sr / 2, n_fft=window_size, hop_length=hop_size, power=power) fig, ax = plt.subplots(figsize=(12, 6), dpi=dpi) img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max, top_db=120), sr=sr, y_axis='mel', fmax=sr / 2, x_axis='time', hop_length=hop_size, ax=ax) colorbar_ticks = range(-120, 1, 10) fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks) # get rid of "default" ticks ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator()) ax.set(title=os.path.basename(filename)) length = get_length(filename) plot_xticks = np.arange(0, length + 0.1, length / 20) ax.set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [] freq = 100 while freq < sr / 2: freq_ticks.append(freq) freq *= 1.3 freq_ticks = [round(elem, -2) for elem in freq_ticks] freq_ticks.append(sr / 2) freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax.set(yticks=(freq_ticks)) ax.set(yticklabels=(freq_ticks_labels)) plt.tight_layout() plt.savefig('%s_spectrogram.png' % of, format='png') if not autoshow: plt.close() # create MgFigure data = {"hop_size": hop_size, "sr": sr, "of": of, "S": S, "length": length} mgf = MgFigure(figure=fig, figure_type='audio.spectrogram', data=data, layers=None, image=of + '_spectrogram.png') return mgf
def find_motion_box_ffmpeg(filename, motion_box_thresh=0.1, motion_box_margin=12): """ Helper function to find the area of motion in a video, using ffmpeg. Args: filename (str): Path to the video file. motion_box_thresh (float, optional): Pixel threshold to apply to the video before assessing the area of motion. Defaults to 0.1. motion_box_margin (int, optional): Margin (in pixels) to add to the detected motion box. Defaults to 12. Raises: KeyboardInterrupt: In case we stop the process manually. Returns: int: The width of the motion box. int: The height of the motion box. int: The X coordinate of the top left corner of the motion box. int: The Y coordinate of the top left corner of the motion box. """ import subprocess import os import matplotlib import numpy as np total_time = get_length(filename) width, height = get_widthheight(filename) crop_str = '' thresh_color = matplotlib.colors.to_hex( [motion_box_thresh, motion_box_thresh, motion_box_thresh]) thresh_color = '0x' + thresh_color[1:] pb = MgProgressbar(total=total_time, prefix='Finding area of motion:') command = [ 'ffmpeg', '-y', '-i', filename, '-f', 'lavfi', '-i', f'color={thresh_color},scale={width}:{height}', '-f', 'lavfi', '-i', f'color=black,scale={width}:{height}', '-f', 'lavfi', '-i', f'color=white,scale={width}:{height}', '-lavfi', 'format=gray,tblend=all_mode=difference,threshold,cropdetect=round=2:limit=0:reset=0', '-f', 'null', '/dev/null' ] process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) try: while True: out = process.stdout.readline() if out == '': process.wait() break else: out_list = out.split() crop_and_time = sorted([ elem for elem in out_list if elem.startswith('t:') or elem.startswith('crop=') ]) if len(crop_and_time) != 0: crop_str = crop_and_time[0] time_float = float(crop_and_time[1][2:]) pb.progress(time_float) pb.progress(total_time) crop_width, crop_height, crop_x, crop_y = [ int(elem) for elem in crop_str[5:].split(':') ] motion_box_margin = roundup(motion_box_margin, 4) crop_width = np.clip(crop_width + motion_box_margin, 4, width) crop_height = np.clip(crop_height + motion_box_margin, 4, height) crop_x = np.clip(crop_x - (motion_box_margin / 2), 4, width) crop_y = np.clip(crop_y - (motion_box_margin / 2), 4, height) if crop_x + crop_width > width: crop_x = width - crop_width else: crop_x = np.clip(crop_x, 0, width) if crop_y + crop_height > height: crop_y = height - crop_height else: crop_y = np.clip(crop_y, 0, height) crop_width, crop_height, crop_x, crop_y = [ int(elem) for elem in [crop_width, crop_height, crop_x, crop_y] ] return crop_width, crop_height, crop_x, crop_y except KeyboardInterrupt: try: process.terminate() except OSError: pass process.wait() raise KeyboardInterrupt
def spectrogram(self, window_size=4096, overlap=8, mel_filters=512, power=2, dpi=300, autoshow=True, title=None): """ Renders a figure showing the mel-scaled spectrogram of the video/audio file. Args: window_size (int, optional): The size of the FFT frame. Defaults to 4096. overlap (int, optional): The window overlap. The hop size is window_size / overlap. Example: window_size=1024, overlap=4 -> hop=256. Defaults to 8. mel_filters (int, optional): The number of filters to use for filtering the frequency domain. Affects the vertical resolution (sharpness) of the spectrogram. NB: Too high values with relatively small window sizes can result in artifacts (typically black lines) in the resulting image. Defaults to 512. power (float, optional): The steepness of the curve for the color mapping. Defaults to 2. dpi (int, optional): Image quality of the rendered figure in DPI. Defaults to 300. autoshow (bool, optional): Whether to show the resulting figure automatically. Defaults to True. title (str, optional): Optionally add title to the figure. Defaults to None, which uses the file name as a title. Outputs: `self.filename`_spectrogram.png Returns: MgFigure: An MgFigure object referring to the internal figure and its data. """ if not has_audio(self.filename): print('The video has no audio track.') return hop_size = int(window_size / overlap) y, sr = librosa.load(self.filename, sr=None) S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_filters, fmax=sr / 2, n_fft=window_size, hop_length=hop_size, power=power) fig, ax = plt.subplots(figsize=(12, 6), dpi=300) # make sure background is white fig.patch.set_facecolor('white') fig.patch.set_alpha(1) # add title if title == None: title = os.path.basename(self.filename) fig.suptitle(title, fontsize=16) img = librosa.display.specshow(librosa.power_to_db(S, ref=np.max, top_db=120), sr=sr, y_axis='mel', fmax=sr / 2, x_axis='time', hop_length=hop_size, ax=ax) print(type(img)) colorbar_ticks = range(-120, 1, 10) fig.colorbar(img, format='%+2.0f dB', ticks=colorbar_ticks) # get rid of "default" ticks ax.yaxis.set_minor_locator(matplotlib.ticker.NullLocator()) # ax.set(title=os.path.basename(self.filename)) length = get_length(self.filename) plot_xticks = np.arange(0, length + 0.1, length / 20) ax.set(xticks=plot_xticks) freq_ticks = [elem * 100 for elem in range(10)] freq_ticks = [] freq = 100 while freq < sr / 2: freq_ticks.append(freq) freq *= 1.3 freq_ticks = [round(elem, -2) for elem in freq_ticks] freq_ticks.append(sr / 2) freq_ticks_labels = [ str(round(elem / 1000, 1)) + 'k' if elem > 1000 else int(round(elem)) for elem in freq_ticks ] ax.set(yticks=(freq_ticks)) ax.set(yticklabels=(freq_ticks_labels)) plt.tight_layout() plt.savefig('%s_spectrogram.png' % self.of, format='png', transparent=False) if not autoshow: plt.close() # create MgFigure data = { "hop_size": hop_size, "sr": sr, "of": self.of, "S": S, "length": length } mgf = MgFigure(figure=fig, figure_type='audio.spectrogram', data=data, layers=None, image=self.of + '_spectrogram.png') return mgf