def loadvideo_decord(self, sample, sample_rate_scale=1): """Load video content using Decord""" # pylint: disable=line-too-long, bare-except, unnecessary-comprehension fname = self.data_path + sample if not (os.path.exists(fname)): return [] # avoid hanging issue if os.path.getsize(fname) < 1 * 1024: print('SKIP: ', fname, " - ", os.path.getsize(fname)) return [] try: if self.keep_aspect_ratio: vr = VideoReader(fname, num_threads=1, ctx=cpu(0)) else: vr = VideoReader(fname, width=self.new_width, height=self.new_height, num_threads=1, ctx=cpu(0)) except: print("video cannot be loaded by decord: ", fname) return [] if self.mode == 'test': all_index = [x for x in range(0, len(vr), self.frame_sample_rate)] while len(all_index) < self.clip_len: all_index.append(all_index[-1]) vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer # handle temporal segments converted_len = int(self.clip_len * self.frame_sample_rate) seg_len = len(vr) // self.num_segment all_index = [] for i in range(self.num_segment): if seg_len <= converted_len: index = np.linspace(0, seg_len, num=seg_len // self.frame_sample_rate) index = np.concatenate( (index, np.ones(self.clip_len - seg_len // self.frame_sample_rate) * seg_len)) index = np.clip(index, 0, seg_len - 1).astype(np.int64) else: end_idx = np.random.randint(converted_len, seg_len) str_idx = end_idx - converted_len index = np.linspace(str_idx, end_idx, num=self.clip_len) index = np.clip(index, str_idx, end_idx - 1).astype(np.int64) index = index + i * seg_len all_index.extend(list(index)) all_index = all_index[::int(sample_rate_scale)] vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer
def load_real_and_fake_frame(real, fake, random): vr = decord.VideoReader(real, ctx=decord.cpu()) frame_index = list(np.random.choice(range(len(vr)), random)) real = vr.get_batch(frame_index).asnumpy() vr = decord.VideoReader(fake, ctx=decord.cpu()) fake = vr.get_batch(frame_index).asnumpy() return real, fake, frame_index
def test_video(video_name): """loads the given video and feeds frames through the inference engine""" f = os.path.join(cachedir, os.path.basename(os.path.splitext(video_name)[0])) if os.path.isfile(f + ".npy"): print(f"FOUND EXISTING CLASSIFICATIONS: {f}.npy") return np.load(f + ".npy") vr = VideoReader(video_name, ctx=cpu(0)) frames = len(vr) print("video frames:", frames) decord.bridge.set_bridge('tensorflow') # Assuming 60 fps sample_rate = 60 images_per_batch = 32 samples = int(frames / sample_rate) batches = int(samples / images_per_batch) persample = np.empty((batches * images_per_batch, 4), dtype=np.uint32) for i in range(batches): print("batch", i, "of", batches) # Create a collection of frame indexes at each sample rate within the batch frameIdxs = [(x * sample_rate) + (i * images_per_batch * sample_rate) for x in range(32)] frames = vr.get_batch(frameIdxs) res = inferLocal(frameIdxs, frames) persample[i * images_per_batch:(i + 1) * images_per_batch, :] = res print("saving to", f) np.save(f, persample) return persample
def __iter__(self): for bag in self.bag_files: vl_batches = de.VideoLoader(bag, ctx=[de.cpu(0)], shape=(self.window_size, self.frame_size, self.frame_size, 3), interval=0, skip=0, shuffle=0) vl_batches.reset() frames = [] for n in range(len(vl_batches)): de.bridge.set_bridge('native') vl_batch = vl_batches.next() file_ids = vl_batch[1].asnumpy()[:, 0] frame_ids = vl_batch[1].asnumpy()[:, 1] #make sure all frames in a batch come from same video #make sure frame_ids strictly increase file_id = set(file_ids) is_strict_increase = all( i < j for i, j in zip(frame_ids, frame_ids[1:])) if len(file_id) != 1 or not is_strict_increase: raise NotImplementedError file_id = file_id.pop() frame_rng = '{}:{}'.format(frame_ids[0], frame_ids[-1]) file = bag[file_id] label = '|'.join([file, frame_rng]) batch_frames = vl_batch[0].asnumpy() yield batch_frames, label
def worker_func(idx, data_queue, msg_queue, anno_lst): while True: msg = msg_queue.get() if msg == 'stop': break elif msg == 'new_epoch': for anno in anno_lst: if Enable_Time_Log: t1 = time.time() anno_copy = {k: v for k, v in anno.items()} vr = VideoReader(anno['Video'], ctx=cpu(idx)) h, w, _ = Cfg.input_frame_shape anno_copy['Frames'] = [ pickle.dumps(cv2.resize(img[:, :, ::-1], (w, h))) \ for img in \ list(vr.get_batch(anno['FrameIDs']).asnumpy())] data_queue.put(anno_copy) if Enable_Time_Log: t2 = time.time() print('Decord reader takes {:.3f}s'.format(t2 - t1)) elif len(msg) == 2 and msg[0] == 'update': anno_lst = msg[1]
def __init__(self, video_file, img_size=(416, 416), gpu=None, num_threads=8, offset=0, is_torch=True): self.is_torch = is_torch if is_torch: decord.bridge.set_bridge('torch') if type(img_size) is tuple: self.img_size = img_size else: self.img_size = (img_size, img_size) self.offset = offset if gpu is None: ctx = decord.cpu() else: ctx = decord.gpu(gpu) if type(img_size) == int: img_size = (img_size, img_size) self._vr = VideoReader(video_file, ctx=ctx, width=img_size[0], height=img_size[1], num_threads=num_threads)
def extract_frames(video, hi_dir, hi_size, times): info = get_video_info(video) w, h = info['coded_width'], info['coded_height'] aspect_ratio = w / h if aspect_ratio > hi_size[0] / hi_size[1]: # Wide format wo, ho = hi_size[0], int(hi_size[0] // aspect_ratio) else: wo, ho = int(hi_size[1] * aspect_ratio), hi_size[1] framerate = int(info['nb_frames']) / float(info['duration']) nframes = [] for time in times: nframes.append(int(framerate * (2 * (time + 1)))) vr = VideoReader(video, ctx=cpu(0)) nframes = [min(vr._num_frame - 1, x) for x in nframes] frames = vr.get_batch(nframes).asnumpy() for i in range(len(nframes)): frame = frames[i, :, :, :] # Now clear why r and b are mixed up. frame = frame[:, :, np.array([2, 1, 0])] assert frame.ndim == 3 assert frame.shape[-1] == 3 cv2.imwrite(os.path.join(hi_dir, f'thumb-{times[i]+1:04}.png'), cv2.resize(frame, (wo, ho)))
def get(self, record, indices, path): images = list() if not self.video_source: # print(path) for seg_ind in indices: p = int(seg_ind) seg_imgs = self._load_image(path, p) images.extend(seg_imgs) else: vr = VideoReader(os.path.join(self.root_path, record.path), ctx=cpu(0)) for seg_ind in indices: try: images.append(Image.fromarray(vr[seg_ind - 1].asnumpy())) except Exception as e: images.append(Image.fromarray(vr[0].asnumpy())) process_data = self.transform(images) # import ipdb;ipdb.set_trace() # print(path) if self.multi_class: # print(record.mlabel) return process_data, record.mlabel else: return process_data, record.label
def extractSlides(videoPath): print(f"Reading {videoPath.as_posix()}...") vr = VideoReader(videoPath.as_posix(), ctx=cpu(0)) fps = vr.get_avg_fps() print(f"Successfully read. FPS: {fps}") slides = [] frameCount = 1 prevImageHash = None imageChanged = False for i in trange(0, len(vr), int(fps)): frame = vr[i].asnumpy() pilImage = Image.fromarray(frame) prevImageHash = imagehash.average_hash(pilImage) if not prevImageHash else currentImageHash currentImageHash = imagehash.average_hash(pilImage) imageDiff = currentImageHash - prevImageHash if imageChanged and imageDiff < DIFF_THRESHOLD: slides.append(pilImage) imageChanged = False if imageDiff > DIFF_THRESHOLD: imageChanged = True return slides
def get_decord(path): images_d = [] vr = VideoReader(path, ctx=cpu(0)) len_vr = len(vr) for i in range(len(vr)): # the video reader will handle seeking and skipping in the most efficient manner images_d.append(vr[i]) print("decord", len(images_d))
def __getitem__(self, i): try: vr = decord.VideoReader(self.videos[i], ctx=decord.cpu()) start = np.random.choice(len(vr)-self.NUMFRAMES) vid = vr.get_batch(list(range(start, start+self.NUMFRAMES))).asnumpy() except Exception as e: return 0 return (vid, start)
def load_video(filepath, num_frames, scale_factor): vr = decord.VideoReader(filepath, ctx=decord.cpu()) vid = vr[:NUMFRAMES].asnumpy() if scale_factor != 1: vid = zoom(vid, [1, scale_factor, scale_factor, 1], prefilter=False, order=0) return vid
def __init__(self, url, num_threads=1, batch=64): self.num_threads = multiprocessing.cpu_count() print("cpu count ", self.num_threads) self.vr = VideoReader(url, ctx=cpu(0)) self.img_size = 640 # self.detection = YOLOV5() self.n = len(self.vr) self.batch = batch
def __getitem__(self, i): try: vr = decord.VideoReader(self.videos[i], ctx=decord.cpu()) vid = np.asarray([vr[i].asnumpy() for i in range(self.NUMFRAMES)]) start = 0 except Exception as e: print(e) return 0 return (vid, start)
def __init__(self, video_url=None): super().__init__() self.video_url = video_url self.run_flag = True self.back_flag = False self.pause_flag = False self.vr = de.VideoReader(self.video_url, ctx=de.cpu(0)) self.frame_numbers = range(len(self.vr))
def run(): for f in files: file = open(f, "rb") bs = file.read() file.close() a = datetime.datetime.now() with open('/dev/shm/a.mp4', 'wb') as bf: bf.write(bs) vr = VideoReader('/dev/shm/a.mp4', ctx=cpu(0), num_threads=0) a = datetime.datetime.now() vr = VideoReader(f, ctx=cpu(0), num_threads=0) b = datetime.datetime.now() print("init: ", (b - a).microseconds, "us") for i in np.array([10, 12, 14, 60, 62, 64]) + 0: vr[i] c = datetime.datetime.now() print(f, "decode: ", (c - b).microseconds, "us")
def test_rotated_video(): # Input videos are all h=320 w=568 in metadata, but # rotation should be applied to recover correctly # displayed image (from rotation metadata). for rot in [0, 180]: # shot in landscape; correct video orientation has # same shape as "original" frame vr = _get_rotated_test_video(rot, ctx=cpu(0)) assert vr[0].shape == (320, 568, 3) assert vr[:].shape == (3, 320, 568, 3) for rot in [90, 270]: # shot in portrait mode; correct video orientation has # swapped width and height (height>>width) vr = _get_rotated_test_video(rot, ctx=cpu(0)) assert vr[0].shape == (568, 320, 3), vr[0].shape assert vr[:].shape == (3, 568, 320, 3) # resize is applied in target shape vr = _get_rotated_test_video(rot, height=300, width=200, ctx=cpu(0)) assert vr[0].shape == (300, 200, 3), vr[0].shape
def __getitem__(self, idx): """ Returns: tuple_frame (tensor): [tuple_len x channel x height x width] tuple_order (tensor): [tuple_len] """ if self.train: videoname = self.train_split[idx] else: videoname = self.test_split[idx] filename = os.path.join(self.root_dir, 'video', videoname) #videodata = skvideo.io.vread(filename) #length, height, width, channel = videodata.shape videodata = VideoReader(filename, ctx=cpu(0)) length = len(videodata) height = videodata[0].shape[0] width = videodata[0].shape[1] channel = videodata[0].shape[2] tuple_frame = [] tuple_order = list(range(0, self.tuple_len)) # random select frame for train, deterministic random select for test if self.train: tuple_start = random.randint(0, length - self.tuple_total_frames) else: random.seed(idx) tuple_start = random.randint(0, length - self.tuple_total_frames) frame_idx = tuple_start for _ in range(self.tuple_len): tuple_frame.append(videodata[frame_idx]) frame_idx = frame_idx + self.interval frame_and_order = list(zip(tuple_frame, tuple_order)) # random shuffle for train, the same shuffle for test if self.train: random.shuffle(frame_and_order) else: random.seed(idx) random.shuffle(frame_and_order) tuple_frame, tuple_order = zip(*frame_and_order) if self.transforms_: trans_tuple = [] for frame in tuple_frame: frame = self.toPIL(frame) # PIL image frame = self.transforms_(frame) # tensor [C x H x W] trans_tuple.append(frame) tuple_frame = trans_tuple else: tuple_frame = [torch.tensor(frame) for frame in tuple_frame] return torch.stack(tuple_frame), torch.tensor(tuple_order)
def get_decord_loader(path): images = [] vr = VideoLoader([path], shape=(20, 256, 340, 3), ctx=cpu(0), interval=0, skip=0, shuffle=1) for i in range(len(vr)): frames, _ = vr.next() images.append(frames) print("decord VL", len(images))
def __init__(self, video_file, frame_idxs=None): """ :param video_file: video file path :param frame_idxs: frame that are to be processed, a list of integers """ self.vr = VideoReader(video_file, ctx=cpu(0)) self._rotation = check_rotation(video_file) if frame_idxs is None: self._frame_idxs = np.arange(len(self.vr)) else: self._frame_idxs = sorted(frame_idxs)
def load_video(self, path): ''' https://github.com/dmlc/decord#installation https://github.com/dmlc/decord/blob/master/examples/video_reader.ipynb A decord wrapper implemented per the instruction Load the video as an object Args: path: the path to the video file Returns: none ''' self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0))
def decord_sequential_cpu_benchmark(config): """Benchmarking decord library with seqeuential read""" device = "cpu" if device == "gpu": ctx = decord.gpu(0) else: ctx = decord.cpu() video_reader = decord.VideoReader(config["video_path"], ctx) assert config["resize_shape"] is False, "TODO: implement tranformation of image size for " \ "decord_sequential_cpu_benchmark; note it has inbuilt" \ "support for this. " assert config["downsample"] == 1, "TODO: implement downsampling," \ " note that decord has options " \ "to sample frames every N frames" \ " https://github.com/dmlc/decord#videoloader" \ "Also the video reader has " \ "video_reader.skip_frames(N) function" # video_reader = decord.VideoReader(config["video_path"], ctx, # width=resize_width, # height=resize_height) for timer in tqdm( _TIME.measure_many(inspect.currentframe().f_code.co_name, samples=config["repeats"])): frames_read = 0 with tqdm(total=config["n_frames"]) as pbar: while frames_read < config["n_frames"]: try: img = video_reader.next() except StopIteration: break img = cv2.cvtColor(img.asnumpy(), cv2.COLOR_BGR2RGB) if config["show_img"]: cv2.imshow("img", img) k = cv2.waitKey(1) if ord("q") == k: break blocking_call(config["consumer_blocking_config"]["io_limited"], config["consumer_blocking_config"]["duration"]) frames_read += 1 pbar.update() assert frames_read == config["n_frames"] timer.stop() del img del video_reader video_reader = decord.VideoReader(config["video_path"], ctx)
def __getitem__(self, idx): """ Returns: clip (tensor): [channel x time x height x width] class_idx (tensor): class index [0-50] """ if self.train: videoname = self.train_split[idx] else: videoname = self.test_split[idx] class_idx = self.class_label2idx[videoname[:videoname.find('/')]] - 1 filename = os.path.join(self.root_dir, 'video', videoname) videodata = VideoReader(filename, ctx=cpu(0)) #length, height, width, channel = videodata.shape length = len(videodata) height = videodata[0].shape[0] width = videodata[0].shape[1] channel = videodata[0].shape[2] all_clips = [] all_idx = [] for i in np.linspace(self.clip_len / 2, length - self.clip_len / 2, self.sample_num): clip_start = int(i - self.clip_len / 2) clip = videodata[clip_start:clip_start + self.clip_len] if self.transforms_: trans_clip = [] # fix seed, apply the sample `random transformation` for all frames in the clip seed = random.random() for frame in clip.asnumpy(): random.seed(seed) frame = self.toPIL(frame) # PIL image frame = self.transforms_(frame) # tensor [C x H x W] trans_clip.append(frame) # (T x C X H x W) to (C X T x H x W) clip = torch.stack(trans_clip).permute([1, 0, 2, 3]) #frequency clip_mean = torch.mean(clip, 1, keepdim=True) clip = clip - clip_mean else: clip = torch.tensor(clip) all_clips.append(clip) all_idx.append(torch.tensor(int(class_idx))) return torch.stack(all_clips), torch.stack(all_idx)
def __init__(self, dir: str, width: int, height: int, limit: int = None): super(VideoDataset, self).__init__() videos = [] for f in os.listdir(dir): if limit is not None and len(videos) >= limit: break if f.endswith('.mp4'): videos.append(f) self.vr = [ VideoReader(os.path.join(dir, f), ctx=cpu(0), width=width, height=height) for f in videos ] n = 0 for vr in self.vr: n += len(vr) self.n = n
def extract_frames_from_video(video_file, video_id, target_dir): results = [] """ for each video file creates the corresponding directory if not exists csv item: video_id;path_to_frame;frame_index;avg_fps;yolo3_classes;caption;score; """ # a file like object works as well, for in-memory decoding with open(video_file, 'rb') as f: vr = VideoReader(f, ctx=cpu(0)) print('video frames:', len(vr)) total_frames = len(vr) avg_fps = int(vr.get_avg_fps()) # 1. the simplest way is to directly access frames print('get_avg_fps=', vr.get_avg_fps()) for i in range(0, len(vr), avg_fps): # # the video reader will handle seeking and skipping in the most efficient manner frame = vr[i] save_path = os.path.join(target_dir,"{:010d}.jpg".format(i)) if not os.path.exists(save_path): print(frame.shape) img = frame.asnumpy() img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) detected_classes_list = detect_objects_single_image(img) words = get_caption_single_image(img) score= get_score(words, detected_classes_list) if score > 2: frame_result = [] frame_result.append(video_id) frame_result.append(save_path) frame_result.append(i) frame_result.append(avg_fps) frame_result.append(detected_classes_list) frame_result.append(words) frame_result.append(score) cv2.imwrite(save_path, img) results.extend(frame_result) return results
def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int, int], transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]: try: if decode_gpu: video = VideoReader(str(video_path), ctx=gpu(0)) else: video = VideoReader(str(video_path), ctx=cpu(0)) len_video = len(video) if num_frames is None: frame_ids = list(range(len_video)) else: if len_video < num_frames: step = 1 else: step = int(len_video / num_frames) frame_ids = list(range(0, len_video, step))[:num_frames] frames = video.get_batch(frame_ids).asnumpy() torched_frames, resize_factor = prepare_frames(frames, resize_coeff, transform) result = { "torched_frames": torched_frames, "resize_factor": resize_factor, "video_path": video_path, "frame_ids": np.array(frame_ids), "frames": frames, } except DECORDError: print(f"{video_path} is broken") result = {} return result
def __init__(self, dir: str, batch_size: int, num_frames: int, width: int, height: int, interval: int = 0, skip: int = 0, shuffle: int = 1, limit: int = None): super(BatchVideoDataLoader, self).__init__() self.batch_size = batch_size videos = [ os.path.join(dir, f) for f in os.listdir(dir) if f.endswith('.mp4') ] if limit is not None: videos = videos[:limit] self.vl = VideoLoader(videos, ctx=[cpu(0)], shape=(num_frames, width, height, 3), interval=interval, skip=skip, shuffle=shuffle)
import os from decord import VideoReader from decord import cpu, gpu path = "../videos/SOX5yA1l24A.mp4" for i in range(1000): images_d = [] vr = VideoReader(path, ctx=cpu(0)) for i in range(len(vr)): # the video reader will handle seeking and skipping in the most efficient manner images_d.append(vr[i]) print(len(images_d))
parser = argparse.ArgumentParser("Decord benchmark") parser.add_argument('--gpu', type=int, default=-1, help='context to run, use --gpu=-1 to use cpu only') parser.add_argument('--file', type=str, default='/tmp/testsrc_h264_100s_default.mp4', help='Test video') parser.add_argument('--seed', type=int, default=666, help='numpy random seed for random access indices') parser.add_argument('--random-frames', type=int, default=300, help='number of random frames to run') parser.add_argument('--width', type=int, default=320, help='resize frame width') parser.add_argument('--height', type=int, default=240, help='resize frame height') args = parser.parse_args() test_video = args.file if args.gpu > -1: ctx = de.gpu(args.gpu) else: ctx = de.cpu() vr = de.VideoReader(test_video, ctx, width=args.width, height=args.height) cnt = 0 tic = time.time() while True: try: frame = vr.next() except StopIteration: break cnt += 1 print(cnt, ' frames, elapsed time for sequential read: ', time.time() - tic) np.random.seed(args.seed) # fix seed for all random tests acc_indices = np.arange(len(vr)) np.random.shuffle(acc_indices)
def get_frames(self, sample): #frames = (self.root / "features" / type / sample["folder"]).glob("*.png") video_path = os.path.join("/home/asandygulova/dock/krsl_173_1708/videos/", sample["video"]) vr = VideoReader(video_path, ctx=cpu(0)) return vr