def __init__(self, video_file, img_size=(416, 416), gpu=None, num_threads=8, offset=0, is_torch=True): self.is_torch = is_torch if is_torch: decord.bridge.set_bridge('torch') if type(img_size) is tuple: self.img_size = img_size else: self.img_size = (img_size, img_size) self.offset = offset if gpu is None: ctx = decord.cpu() else: ctx = decord.gpu(gpu) if type(img_size) == int: img_size = (img_size, img_size) self._vr = VideoReader(video_file, ctx=ctx, width=img_size[0], height=img_size[1], num_threads=num_threads)
def decord_sequential_cpu_benchmark(config): """Benchmarking decord library with seqeuential read""" device = "cpu" if device == "gpu": ctx = decord.gpu(0) else: ctx = decord.cpu() video_reader = decord.VideoReader(config["video_path"], ctx) assert config["resize_shape"] is False, "TODO: implement tranformation of image size for " \ "decord_sequential_cpu_benchmark; note it has inbuilt" \ "support for this. " assert config["downsample"] == 1, "TODO: implement downsampling," \ " note that decord has options " \ "to sample frames every N frames" \ " https://github.com/dmlc/decord#videoloader" \ "Also the video reader has " \ "video_reader.skip_frames(N) function" # video_reader = decord.VideoReader(config["video_path"], ctx, # width=resize_width, # height=resize_height) for timer in tqdm( _TIME.measure_many(inspect.currentframe().f_code.co_name, samples=config["repeats"])): frames_read = 0 with tqdm(total=config["n_frames"]) as pbar: while frames_read < config["n_frames"]: try: img = video_reader.next() except StopIteration: break img = cv2.cvtColor(img.asnumpy(), cv2.COLOR_BGR2RGB) if config["show_img"]: cv2.imshow("img", img) k = cv2.waitKey(1) if ord("q") == k: break blocking_call(config["consumer_blocking_config"]["io_limited"], config["consumer_blocking_config"]["duration"]) frames_read += 1 pbar.update() assert frames_read == config["n_frames"] timer.stop() del img del video_reader video_reader = decord.VideoReader(config["video_path"], ctx)
def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int, int], transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]: try: if decode_gpu: video = VideoReader(str(video_path), ctx=gpu(0)) else: video = VideoReader(str(video_path), ctx=cpu(0)) len_video = len(video) if num_frames is None: frame_ids = list(range(len_video)) else: if len_video < num_frames: step = 1 else: step = int(len_video / num_frames) frame_ids = list(range(0, len_video, step))[:num_frames] frames = video.get_batch(frame_ids).asnumpy() torched_frames, resize_factor = prepare_frames(frames, resize_coeff, transform) result = { "torched_frames": torched_frames, "resize_factor": resize_factor, "video_path": video_path, "frame_ids": np.array(frame_ids), "frames": frames, } except DECORDError: print(f"{video_path} is broken") result = {} return result
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../python')) import decord as de parser = argparse.ArgumentParser("Decord benchmark") parser.add_argument('--gpu', type=int, default=-1, help='context to run, use --gpu=-1 to use cpu only') parser.add_argument('--file', type=str, default='/tmp/testsrc_h264_100s_default.mp4', help='Test video') parser.add_argument('--seed', type=int, default=666, help='numpy random seed for random access indices') parser.add_argument('--random-frames', type=int, default=300, help='number of random frames to run') parser.add_argument('--width', type=int, default=320, help='resize frame width') parser.add_argument('--height', type=int, default=240, help='resize frame height') args = parser.parse_args() test_video = args.file if args.gpu > -1: ctx = de.gpu(args.gpu) else: ctx = de.cpu() vr = de.VideoReader(test_video, ctx, width=args.width, height=args.height) cnt = 0 tic = time.time() while True: try: frame = vr.next() except StopIteration: break cnt += 1 print(cnt, ' frames, elapsed time for sequential read: ', time.time() - tic) np.random.seed(args.seed) # fix seed for all random tests
def get_decord_gpu(path): images_av = [] vr = VideoReader(path, ctx=gpu(0)) for i in range(len(vr)): # the video reader will handle seeking and skipping in the most efficient manner images_av.append(vr[i])
loaders = [] times = [] times_random_seek = [] video = [] num_frames = [] lib_version = [] for i in range(args.n): for file in os.listdir("../videos"): if file in ["README", ".ipynb_checkpoints", "avadl.py"]: print(f"Skipping {file}") continue path = os.path.join("../videos/", file) images_av = [] vr = VideoReader(path, ctx=gpu(0)) for i in range(len(vr)): # the video reader will handle seeking and skipping in the most efficient manner images_av.append(vr[i]) nframes = len(images_av) print(path, nframes) video.append(file) loaders.append("decord_gpu") num_frames.append(nframes) lib_version.append(decord.__version__) times.append( timeit.timeit(
def decord_batch_cpu_benchmark(config, buffer_size): """Benchmarking decord library with a batched implementation for reaching sequentially""" device = "cpu" if device == "gpu": ctx = decord.gpu(0) else: ctx = decord.cpu() np_arr_shape = get_video_shape(config["video_path"]) video_loader = decord.VideoLoader([config["video_path"]], ctx, shape=(buffer_size, *np_arr_shape), interval=1, skip=1, shuffle=0) assert config["resize_shape"] is False, "TODO: implement tranformation of image size for " \ "decord_sequential_cpu_benchmark; note it has inbuilt" \ "support for this. " assert config["downsample"] == 1, "TODO: implement downsampling, " \ "note that decord has options " \ "to sample frames every N frames" \ " https://github.com/dmlc/decord#videoloader" \ "Also the video reader has" \ " video_reader.skip_frames(N) function" for timer in tqdm( _TIME.measure_many(inspect.currentframe().f_code.co_name, samples=config["repeats"])): frames_read = 0 with tqdm(total=config["n_frames"]) as pbar: for batch in video_loader: if frames_read >= config["n_frames"]: break data = batch[0].asnumpy() for img in data: # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if config["show_img"]: cv2.imshow("img", img) k = cv2.waitKey(1) if ord("q") == k: break blocking_call( config["consumer_blocking_config"]["io_limited"], config["consumer_blocking_config"]["duration"]) frames_read += 1 pbar.update() if frames_read >= config["n_frames"]: break assert frames_read == config["n_frames"] timer.stop() video_loader.reset() try: del img # pylint: disable = undefined-loop-variable except NameError: pass del video_loader video_loader = decord.VideoLoader([config["video_path"]], ctx, shape=(buffer_size, *np_arr_shape), interval=1, skip=1, shuffle=0)
def main(): args = get_args() torch.set_grad_enabled(False) if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 else: raise NotImplementedError(f"Only mobile0.25 and resnet50 are suppoted.") # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() if args.fp16: net = net.half() print("Finished loading model!") cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) file_paths = sorted(args.input_path.rglob("*.mp4"))[: args.num_videos] if args.num_gpu is not None: start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id) file_paths = file_paths[start:end] output_path = args.output_path if args.save_boxes: output_label_path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if args.save_crops: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) if args.video_decoder == "cpu": decode_device = cpu(0) elif args.video_decoder == "gpu": decode_device = gpu(0) else: raise NotImplementedError(f"Only CPU and GPU devices are supported by decard, but got {args.video_decoder}") transform = albu.Compose([albu.Normalize(p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1)], p=1) with torch.no_grad(): for video_path in tqdm(file_paths): labels = [] video_id = video_path.stem with video_reader(str(video_path), ctx=decode_device) as video: len_video = len(video) if args.num_frames is None or args.num_frames == 1: frame_ids = list(range(args.num_frames)) elif args.num_frames > 1: if len_video < args.num_frames: step = 1 else: step = int(len_video / args.num_frames) frame_ids = list(range(0, len_video, step))[: args.num_frames] else: raise ValueError(f"Expect None or integer > 1 for args.num_frames, but got {args.num_frames}") frames = video.get_batch(frame_ids) if args.video_decoder == "cpu": frames = frames.asnumpy() elif args.video_decoder == "gpu": frames = dlpack.from_dlpack(frames.to_dlpack()) if args.video_decoder == "gpu": del video torch.cuda.empty_cache() gc.collect() num_frames = len(frames) image_height = frames.shape[1] image_width = frames.shape[2] scale1 = torch.Tensor( [ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ] ) scale1 = scale1.to(device) scale = torch.Tensor([image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data if args.resize_coeff is not None: target_size = min(args.resize_coeff) max_size = max(args.resize_coeff) image_height = frames.shape[1] image_width = frames.shape[2] image_size_min = min([image_width, image_height]) image_size_max = max([image_width, image_height]) resize = float(target_size) / float(image_size_min) if np.round(resize * image_size_max) > max_size: resize = float(max_size) / float(image_size_max) else: resize = 1 for pred_id in range(num_frames): frame = frames[pred_id] torched_image = prepare_image(frame, transform, args.video_decoder).to(device) if args.fp16: torched_image = torched_image.half() loc, conf, land = net(torched_image) # forward pass frame_id = frame_ids[pred_id] boxes = decode(loc.data[0], prior_data, cfg["variance"]) boxes *= scale / resize boxes = boxes.cpu().numpy() scores = conf[0].data.cpu().numpy()[:, 1] landmarks = decode_landm(land.data[0], prior_data, cfg["variance"]) landmarks *= scale1 / resize landmarks = landmarks.cpu().numpy() # ignore low scores valid_index = np.where(scores > args.confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS detection = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(detection, args.nms_threshold) # keep = nms(detection, args.nms_threshold,force_cpu=args.cpu) # x_min, y_min, x_max, y_max, score detection = detection[keep, :] landmarks = landmarks[keep].astype(int) if detection.shape[0] == 0: continue bboxes = detection[:, :4].astype(int) confidence = detection[:, 4].astype(np.float64) for crop_id in range(len(detection)): bbox = bboxes[crop_id] labels += [ { "frame_id": int(frame_id), "crop_id": crop_id, "bbox": bbox.tolist(), "score": confidence[crop_id], "landmarks": landmarks[crop_id].tolist(), } ] if args.save_crops: x_min, y_min, x_max, y_max = bbox x_min = max(0, x_min) y_min = max(0, y_min) crop = frame[y_min:y_max, x_min:x_max] target_folder = output_image_path / f"{video_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if args.save_boxes: result = { "file_path": str(video_path), "file_id": video_id, "bboxes": labels, } with open(output_label_path / f"{video_id}.json", "w") as f: json.dump(result, f, indent=2)
def extract_frames(video_path, frames_dir, custom_coordinates, start=-1, end=-1, seconds=0.1, meet=True): """ Extract frames from a video using decord's VideoReader :param video_path: path of the video :param frames_dir: the directory to save the frames :param overwrite: to overwrite frames that already exist? :param start: start frame :param end: end frame :param seconds: frame spacing :return: count of images saved """ video_path = os.path.normpath( video_path) # make the paths OS (Windows) compatible frames_dir = os.path.normpath( frames_dir) # make the paths OS (Windows) compatible video_dir, video_filename = os.path.split( video_path) # get the video path and filename from the path assert os.path.exists(video_path) # assert the video file exists vidcap = cv2.VideoCapture(video_path) fps = int(vidcap.get(cv2.CAP_PROP_FPS)) if fps == 0: return False seconds = int(seconds * fps) frameToStore = None try: vr = VideoReader(video_path, ctx=gpu(0)) # can set to cpu or gpu except: vr = VideoReader(video_path, ctx=cpu(0)) # can set to cpu or gpu if meet: shareScreenCoverage = {"h": float(0.75), "w": float(0.75)} else: shareScreenCoverage = {"h": float(1), "w": float(1)} if start < 0: # if start isn't specified lets assume 0 start = 0 if end < 0: # if end isn't specified assume the end of the video end = len(vr) frames_list = list(range(start, end, seconds)) saved_count = 0 frames = vr.get_batch(frames_list).asnumpy() for index, frame in zip( frames_list, frames): # lets loop through the frames until the end save_path = os.path.join( frames_dir, video_filename, f"frame{saved_count}.jpg") # create the save path newFrame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) h, w, dimension = newFrame.shape if meet: croppedImageAttributes = { "top": int(0.125 * h), "bottom": int(0.875 * h), "left": int(0), "right": int(0.75 * w), } else: croppedImageAttributes = { "top": int(custom_coordinates["top"] * h), "bottom": int((1 - custom_coordinates["bottom"]) * h), "left": int(custom_coordinates["left"] * w), "right": int((1 - custom_coordinates["right"]) * w), } # to crop Google meet slides frame only and ignore the speaker part of screen newFrame = newFrame[ croppedImageAttributes["top"]:croppedImageAttributes["bottom"], croppedImageAttributes["left"]:croppedImageAttributes["right"], ] # have seen atleast 1 frame before. if frameToStore is not None: # compare new frame with last frame same: bool = CheckSimilarity(frameToStore, newFrame) # save last frame if last frame is not same as new frame if not same: cv2.imwrite(save_path, frameToStore) # save the extracted image saved_count += 1 # increment our counter by one frameToStore = newFrame # save the last image too if it was diff from prev frame if not same: cv2.imwrite(save_path, frameToStore) # save the extracted image saved_count += 1 return True