def read_video(self, path): # Return: Numpy.ndarray 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>) capt = FFmpegReader(filename=path) self.fps = int(capt.inputfps) list_of_frames = [] for index, frame in enumerate(capt.nextFrame()): # frame -> (<height>, <width>, 3) capture_frame = True if self.required_fps != None: is_valid = range(self.required_fps) capture_frame = (index % self.fps) in is_valid if capture_frame: if self.target_size is not None: temp_image = image.array_to_img(frame) frame = image.img_to_array( temp_image.resize(self.target_size, Image.ANTIALIAS)).astype('uint8') list_of_frames.append(frame) temp_video = np.stack(list_of_frames) capt.close() if self.to_gray: temp_video = rgb2gray(temp_video) if self.max_frames is not None: temp_video = self.process_video(video=temp_video) return np.expand_dims(temp_video, axis=0)
def get_frames(self, filename, wanted): v = FFmpegReader(filename) # , outputdict={'-pix_fmt': 'yuv444p'}) frames = None n_frames = 0 for n, frame in enumerate(v.nextFrame()): # the FFmpegReader API actually renders every frame; so it's rather # slow; but it ensures that every frame is rendered, not just # i-frames... getting i-frames would be faster, but might increase # false-negative rate due to picking out different frames from # different encodings if n not in wanted: continue if frames is None: frames = np.ndarray(shape=(self.grab_n_frames, ) + frame.shape, dtype=np.float64) frames[n_frames] = frame n_frames += 1 if n_frames == self.grab_n_frames: break v.close() if n_frames != self.grab_n_frames: raise RuntimeError( 'Video has invalid number of frames: {}: {}'.format( filename, len(frames))) frames = self._crop_bars(frames) return [ self.process_frame(n, filename, frame) for n, frame in enumerate(frames) ]
def iterate_video(filename, x1, y1, x2, y2, x3, y3, x4, y4, down_scale=True): """ itereer over alle frames van de video tel het aantal wagens die door een van de twee rechthoeken rijden de visualizatie wordt opgeslaan als video in trafic.avi druk Q om te stoppen :param filename: bestandsnaam van de video :param x1, y1, x2, y2: twee hoekpunten van de eerste rechthoek :param x3, y3, x4, y4: twee hoekpunten van de tweede rechthoek :param down_scale: boolean: als True wordt de resolutie van de video gehalveerd :return: None """ queue = collections.deque() if not os.path.isfile(filename): raise Exception("file not found") reader = FFmpegReader(filename) shape = reader.getShape()[1:3] if down_scale: shape = [shape[0] // 2, shape[1] // 2] stepsize = 5 video_writer = cv2.VideoWriter('traffic.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (shape[1], shape[0])) for frame in reader.nextFrame(): if down_scale: frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA) queue.append(frame[:, :, ::-1]) if len(queue) > 2 * stepsize: res = traffic(queue[0], queue[stepsize], queue[stepsize * 2], x1, y1, x2, y2, x3, y3, x4, y4) cv2.imshow("Traffic", res) k = cv2.waitKey(1) queue.popleft() video_writer.write(res) if k == 113: # press Q to break break video_writer.release()
def PreProcessVideo(fmt, filename, output, start=250, n_frames=5): info = ffprobe(filename) vinfo = info['video'] v = FFmpegReader(filename, outputdict={'-pix_fmt': fmt}) X = np.ndarray((int(vinfo['@height']) * int(vinfo['@width']) * 5, 6)) n = 0 t = 0 frames = v.nextFrame() for t, frame in enumerate(frames): if t < start: continue if t >= start + n_frames: break print(t) sys.stdout.flush() printed = False for row_n, line in enumerate(frame): for col_n, pixel in enumerate(line): c1, c2, c3 = pixel t_scaled = (float(t - start) / float(vinfo['@width'])) * 255.0 x_scaled = (float(col_n) / float(vinfo['@width'])) * 255.0 y_scaled = (float(row_n) / float(vinfo['@width'])) * 255.0 X[n] = np.array([t_scaled, x_scaled, y_scaled, c1, c2, c3]) n += 1 print("Done with the encode part") np.save(output, X, allow_pickle=False, fix_imports=False)
def read_video(video_path): """ Read a video file as a numpy array Resizes frames so that the minimum side is 256 pixels Args: video_path: Path to video file Returns: video: Numpy data array """ vinfo = ffprobe(video_path)['video'] width = int(vinfo['@width']) height = int(vinfo['@height']) scaling = 256.0 / min(width, height) new_width = int(math.ceil(scaling * width)) new_height = int(math.ceil(scaling * height)) # Resize frames reader = FFmpegReader(video_path, outputdict={'-s': "{}x{}".format(new_width, new_height) }) frames = [] for frame in reader.nextFrame(): frames.append(frame) reader.close() return frames
def __init__( self, filename: str, trim: Tuple[int, int], crop: Tuple[int, int, int, int], frame_rate: float = 15, ) -> None: super().__init__() # Get video frames with scikit-video reader = FFmpegReader( filename + ".mp4", inputdict={"-r": str(frame_rate)}, outputdict={"-r": str(frame_rate)}, ) self.frames: np.ndarray = [] for frame_idx, frame in enumerate(reader.nextFrame()): # Trim video (time) if frame_idx < trim[0]: continue if frame_idx >= trim[1]: break frame_idx += 1 # Crop frames (space) frame = frame[crop[1] : crop[3], crop[0] : crop[2], :] self.frames.append(cv2.resize(frame, (140, 140))) # Change to NumPy array with PyTorch dimension format self.frames = np.array(self.frames, dtype=float) self.frames = np.transpose(self.frames, axes=(0, 3, 1, 2)) y, _ = librosa.load(filename + ".wav", sr=2000) D = librosa.core.stft(y, n_fft=510) self.samples = np.abs(D)
def _get_frame(self, seek, video_idx, last): opened_video = None # handle to opened target video if self.opened_videos[ video_idx]: # if handle(s) exists for target video current = self.opened_videos[video_idx] # get handles list opened_video = next((ov for ov in current if ov[0] == seek), None) # look for matching seek if opened_video is None: # no (matching) handle found video_path = join(self.root, self.videos[video_idx][1][0]) # build video path video_file = FFmpegReader(video_path) # get a video file pointer video_iter = video_file.nextFrame() # get an iterator opened_video = [seek, islice(video_iter, seek, None), video_file] # seek video and create o.v. item self.opened_videos[video_idx].append( opened_video) # add opened video object to o.v. list opened_video[0] = seek + 1 # update seek pointer frame = next(opened_video[1]) # cache output frame if last: opened_video[2]._close() # close video file (private method?!) self.opened_videos[video_idx].remove( opened_video) # remove o.v. item return frame
def read_mj2_frames(fname): from skvideo.io import FFmpegReader sq = FFmpegReader(fname, outputdict={'-pix_fmt': 'gray16le'}) imgs = [] for s in sq: imgs.append(s) sq.close() return np.stack(imgs).squeeze()
def __getitem__(self, index): item = self.json_data[index] framerate_sampled = self.augmentor.jitter_fps(FRAMERATE) optional_args = {"-r": "%d" % framerate_sampled} duration = self.get_duration(item.path) if duration is not None: nframes = int(duration * framerate_sampled) optional_args["-vframes"] = "%d" % nframes # Open video file reader = FFmpegReader(item.path, inputdict={}, outputdict=optional_args) try: imgs = [] for img in reader.nextFrame(): imgs.append(img) except (RuntimeError, ZeroDivisionError) as exception: print('{}: WEBM reader cannot open {}. Empty ' 'list returned.'.format(type(exception).__name__, item.path)) imgs = self.transform_pre(imgs) imgs, label = self.augmentor(imgs, item.label) imgs = self.transform_post(imgs) num_frames = len(imgs) target_idx = self.classes_dict[label] if self.nclips > -1: num_frames_necessary = self.clip_size * self.nclips * self.step_size else: num_frames_necessary = num_frames offset = 0 if num_frames_necessary < num_frames: # If there are more frames, then sample starting offset. diff = (num_frames - num_frames_necessary) # temporal augmentation if not self.is_val: offset = np.random.randint(0, diff) imgs = imgs[offset:num_frames_necessary + offset:self.step_size] if len(imgs) < (self.clip_size * self.nclips): imgs.extend([imgs[-1]] * ((self.clip_size * self.nclips) - len(imgs))) # format data to torch data = torch.stack(imgs) data = data.permute(1, 0, 2, 3) if self.get_item_id: return (data, target_idx, item.id) else: return (data, target_idx)
def __init__( self, filenames: List[str], trims: List[Tuple[int, int]], crops: List[Tuple[int, int, int, int]], frame_rate: float = 15, ): # TDCCMCDataset is an unconvential dataset, where each data is # dynamically sampled whenever needed instead of a static dataset. # Therefore, in `__init__`, we do not define a static dataset. Instead, # we simply preprocess the video and audio for faster `__getitem__`. super().__init__() self.sources: List[Tuple[np.ndarray, np.ndarray]] = [] for filename, trim, crop in zip(filenames, trims, crops): # Get video frames with scikit-video reader = FFmpegReader( filename + ".mp4", inputdict={"-r": str(frame_rate)}, outputdict={"-r": str(frame_rate)}, ) frames = [] for frame_idx, frame in enumerate(reader.nextFrame()): # Trim video (time) if frame_idx < trim[0]: continue if frame_idx >= trim[1]: break # Crop frames (space) frame = frame[crop[1]:crop[3], crop[0]:crop[2], :] frames.append(cv2.resize(frame, (140, 140))) # Change to NumPy array with PyTorch dimension format frames = np.array(frames, dtype=float) frames = np.transpose(frames, axes=(0, 3, 1, 2)) # STFT audio # TODO Magic number sr=2000, n_fft=510 y, _ = librosa.load(filename + ".wav", sr=2000) D = librosa.core.stft(y, n_fft=510) D = np.abs(D) # Save video frames and audio self.sources.append((frames, D))
def _read_video(self, path): """ Parameters: path (str): Required Path of the video to be read Returns: Numpy.ndarray A 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>) """ cap = FFmpegReader(filename=path) list_of_frames = [] self.fps = int(cap.inputfps) # Frame Rate for index, frame in enumerate(cap.nextFrame()): capture_frame = True if self.required_fps != None: is_valid = range(self.required_fps) capture_frame = (index % self.fps) in is_valid if capture_frame: if self.target_size is not None: temp_image = image.array_to_img(frame) frame = image.img_to_array( temp_image.resize( self.target_size, Image.ANTIALIAS)).astype('uint8') # Shape of each frame -> (<height>, <width>, 3) list_of_frames.append(frame) temp_video = np.stack(list_of_frames) cap.close() if self.to_gray: temp_video = rgb2gray(temp_video) if self.max_frames is not None: temp_video = self._process_video(video=temp_video) return temp_video
def image_streamer(sources, start=0, remap_func=None): """A generator that produces image frames from multiple sources. Currently accepts video, images and COCO datasets and globs of these. sources: list of str; The file paths to the image sources. Can be an image, video or COCO json, globs accepted. start: int (optional); Start from this position in the list. remap_func: lambda or function; A function that accepts a filename parameter and outputs the path to the file. Used to change relative directories of COCO datasets. """ from warnings import warn from glob import glob from skvideo.io import FFmpegReader from contextlib import closing, redirect_stdout def is_image(path): return path.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff')) def is_video(path): return path.lower().endswith(('.avi', '.mpg', '.mp4')) remap_func = remap_func or (lambda x: x) # Expand any globbed paths, but not for images since we want to keep the sequence full_sources = [] for source in sources: if '*' in source and not is_image(source): full_sources += glob(source, recursive=True) else: full_sources.append(source) for source in full_sources[start:]: if is_video(source): with closing(FFmpegReader(source)) as reader: for frame_no, frame in enumerate(reader.nextFrame()): yield source, frame_no, frame elif is_image(source): for frame_no, image_path in enumerate(glob(source, recursive=True)): yield image_path, frame_no, imread(remap_func(image_path)) elif source.endswith('.json'): # COCO database with redirect_stdout(None): coco = COCO(source) for frame_no, image in enumerate(coco.loadImgs(coco.getImgIds())): # TODO: It's not clear how to address relative paths image_path = image['path'] if 'path' in image else remap_func( image['file_name']) yield image_path, frame_no, imread(image_path) del coco else: warn("Skipped an unknown source type {}.".format(source))
def get_frame_count(self, paths): """ Can be used to determine the value of `max_frames` Parameters: paths (list): Required A list of paths of the videos to be read Returns: dict (python dictionary) For each video, the total number of frames in that video is stored in the dictionary. """ frame_count = {} for path in paths: cap = FFmpegReader(filename=path) frame_count[path] = cap.inputframenum cap.close() return frame_count
def __init__( self, filenames, extensions=[ '.avi', '.mov', '.mj2' ], # this will try this extension first and then .tif, .TIFF and .TIF extension=None, nchannels=None): ''' Select a stack from a sequence of mov stack files ''' self.extension = extension if type(filenames) is str: # check if it is a folder if os.path.isdir(filenames): dirname = filenames filenames = [] for extension in extensions: if not len(filenames): # try other self.extension = extension filenames = natsorted( glob(pjoin(dirname, '*' + self.extension))) if not len(filenames): raise (OSError('Could not find files.')) super(VideoStack, self).__init__(filenames, extension) from skvideo.io import FFmpegReader self.reader = FFmpegReader offsets = [0] for fname in self.filenames: # Parse all files in the stack with FFmpegReader(fname) as f: dims = f.getShape()[:-1] if f.pix_fmt == 'gray16le': dtype = 'uint16' else: dtype = 'uint8' self.pix_fmt = f.pix_fmt offsets.append(dims[0]) self.framerate = f.inputfps # offset for each file self.frames_offset = np.cumsum(offsets) if nchannels is None: nchannels = 1 self.frames_offset = (self.frames_offset / nchannels).astype(int) self.dims = dims[1:] self.dims = [nchannels, *self.dims] self.dtype = dtype self.nframes = self.frames_offset[-1] self.shape = tuple([self.nframes, *self.dims]) self.current_fileidx = -1 self.current_frameidx = 0
def __init__( self, filenames: List[str], trims: List[Tuple[int, int]], crops: List[Tuple[int, int, int, int]], frame_rate: float = 15, ): # TDCCMCDataset is an unconvential dataset, where each data is # dynamically sampled whenever needed instead of a static dataset. # Therefore, in `__init__`, we do not define a static dataset. Instead, # we simply preprocess the video and audio for faster `__getitem__`. super().__init__() self.filenames = filenames self.trims = trims self.crops = crops self.audios: List[np.ndarray] = [] self.readers: List[Any] = [] for filename in filenames: # Get video frames with scikit-video reader = FFmpegReader( filename + ".mp4", inputdict={"-r": str(frame_rate)}, outputdict={"-r": str(frame_rate)}, ) self.readers.append(reader) # STFT audio # TODO Magic number sr=2000, n_fft=510 y, _ = librosa.load(filename + ".wav", sr=2000) D = librosa.core.stft(y, n_fft=510) D = np.abs(D) # Save audio self.audios.append(D)
type=float, default=0, help="seconds to skip at the begining of the video. Default 0.") parser.add_argument("--ouput", type=str, default='output.csv', help="Name of the output file. Default output.csv.") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG) file_path = args.file_path output_file_name = args.ouput with open(output_file_name, 'w') as csvfile: fieldnames = ['frame', 'x', 'y'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() metadata = ffprobe(file_path)['video'] fr = float(metadata['@r_frame_rate'].split('/')[0]) / float( metadata['@r_frame_rate'].split('/')[1]) nframes = float(metadata['@duration_ts']) time_length = float(metadata['@duration']) frame_shape = (int(metadata['@height']), int(metadata['@width']), 3) skip = args.skip SIZE = frame_shape[:-1][::-1] secs = skip / fr video = enumerate(FFmpegReader(file_path, inputdict={'-ss': str(secs)}), skip) main(video)
def infer_video(paths): item_path, item_ann_path = paths vreader = FFmpegReader(item_path) return (vreader.getShape(), )
import os # import numpy as np for num in range(1, 31): fnm = '/home/alex/Downloads/train/%d.mp4' % num print(fnm) try: shutil.rmtree('/mnt/data/pigs/imgs/{}'.format(num)) shutil.rmtree('/mnt/data/pigs/val_imgs/{}'.format(num)) except: pass os.mkdir('/mnt/data/pigs/imgs/{}'.format(num)) os.mkdir('/mnt/data/pigs/val_imgs/{}'.format(num)) vid = FFmpegReader(fnm) # vid = cv2.VideoCapture(fnm) # success, images = vid.read() # print(success) # images = [images] # images = [] # split_num = 30 # frame_num = 2950 # split_size = frame_num // split_num val_split = 2200 count = 0 for frame_num, frame in enumerate(vid.nextFrame()):
def get_mp4_frames(mp4_path, skip_frames, num_frames_per_event, do_flip, brighten_val, is_high_res, do_aug): # Get mp4 reader try: reader = FFmpegReader(mp4_path) except Exception as e: if PRINT_ERRORS: print(e) return None # Get starting frame and offsets frame_shape = EXPECTED_HIGH_RES if is_high_res else EXPECTED_LOW_RES start_frame = (reader.inputframenum - (num_frames_per_event * skip_frames)) // 2 if start_frame <= 0: reader.close() return None start_x = int((frame_shape[0] - reader.outputheight) // 2) if start_x < 0: reader.close() return None start_y = int((frame_shape[1] - reader.outputwidth) // 2) if start_y < 0: reader.close() return None start_z = int((frame_shape[2] - reader.outputdepth) // 2) if start_z < 0: reader.close() return None # Put middle (num_frames_per_event * skip_frames) input frames in numpy array cur_i = 0 cur_frame = 0 frame_array = np.zeros(shape=((num_frames_per_event, ) + frame_shape), dtype=np.uint8) for frame in reader.nextFrame(): if cur_frame >= start_frame: cur_offset = cur_frame - start_frame if cur_i < num_frames_per_event and (cur_offset % skip_frames) == 0: frame_array[cur_i, start_x:start_x+reader.outputheight, start_y:start_y+reader.outputwidth, start_z:start_z+reader.outputdepth] = frame if brighten_val < 1.0: frame_array[cur_i, :, :, :] = adj_brightness(frame_array[cur_i, :, :, :], brighten_val) if do_flip: frame_array[cur_i, :, :, :] = hflip_img(frame_array[cur_i, :, :, :]) cur_i += 1 cur_frame += 1 reader.close() # Return array with frames return frame_array
if os.path.isdir(os.path.join(rootDirLoad, subDir)): # Check if it is a folder classes.append(subDir) # Create a path files = os.listdir(os.path.join(rootDirLoad, subDir)) nVideos = 0 # Videos in class X pbar2 = trange(len(files), ncols=100, position=2, desc='Within-class progress ') for file in files: # Get all the videos if file.lower().endswith('.avi') or file.lower().endswith('.mp4'): filename = os.path.join(rootDirLoad, subDir, file) reader = FFmpegReader(filename) nFrames = reader.getShape()[0] nVideos += 1 dataList.write('\n{:<8} {:12} {:<12} {:02}.pyt'.format( nClasses, subDir, nFrames, nVideos)) # Create class directories if they do not exist classDir = os.path.join(rootDirSave, subDir) if not os.path.exists(classDir): os.makedirs(classDir) pbar3 = trange(nFrames, ncols=100, position=4, desc='Video progress ')