def __getitem__(self, idx): import cv2 tic_cpu = time.clock() if torch.is_tensor(idx): idx = idx.tolist() logger.debug("[{}] getitem {}".format(os.getpid(), idx)) if self.smart or self.sort_fie: # idx is ignored. Pop a path from the (pre-sorted) queue img_path = self.context.q.get() else: img_path = self.files[idx] # img_path = self.context.q.get() # initialize smart client on the first use if self.smart and self.ss_client is None: logger.info("[Worker {}] Creating a SmartStorageClient".format( torch.utils.data.get_worker_info().id)) self.ss_client = SmartStorageClient() # get decoded Image if self.smart: # PPM tic = time.time() arr = self.ss_client.read_decode(img_path) image = cv2.resize(arr, RESOL) # resize here rather than transform disk_read = arr.size elapsed = time.time() - tic logger.debug("Smart decode {:.3f} ms".format(1000 * elapsed)) else: image = self.image_loader(img_path) disk_read = os.path.getsize(img_path) # transform if self.transform: tic = time.time() image_tensor = self.transform(image) elapsed = time.time() - tic logger.debug("Transform {:.3f} ms".format(1000 * elapsed)) # high overhead locking with self.context.lock: elapsed_cpu = time.clock() - tic_cpu # logger.debug("[{}] Writing to global cotnext: {}, {}".format(os.getpid(), elapsed_cpu, disk_read)) self.context.stats['cpu_time'] += elapsed_cpu self.context.stats['bytes_from_disk'] += disk_read return image_tensor
class SmartReadFilter(Filter): def __init__(self, map_from_dir, map_to_ppm_dir): super(SmartReadFilter, self).__init__() self.ss_client = SmartStorageClient(map_from_dir, map_to_ppm_dir) def __call__(self, item): content = self.ss_client.read(item.src) item.data = content self.session_stats['bytes_from_disk'] += len(content) return True
class SmartDecodeFilter(Filter): def __init__(self, map_from_dir, map_to_ppm_dir): super(SmartDecodeFilter, self).__init__(map_from_dir) self.ss_client = SmartStorageClient(map_from_dir, map_to_ppm_dir) def __call__(self, item): path = item.src arr = self.ss_client.read_decode(path) item.array = arr self.session_stats['bytes_from_disk'] += arr.size return True
class SmartFaceFilter(Filter): def __init__(self, map_from_dir, map_to_ppm_dir, min_faces=1): super(SmartFaceFilter, self).__init__(map_from_dir, min_faces) self.ss_client = SmartStorageClient(map_from_dir, map_to_ppm_dir) self.min_faces = 1 def __call__(self, item): path = item.src arr, boxes = self.ss_client.read_decode_face(path) item.array = arr item['face_detection'] = boxes self.session_stats['bytes_from_disk'] += sum( map(lambda b: abs(3 * (b[0] - b[2]) * (b[1] - b[3])), boxes)) return len(boxes) >= self.min_faces
def run(video_path='/mnt/hdd/fast20/video/VIRAT/mp4/VIRAT_S_000200_02_000479_000635.mp4', diff_threshold=100., delta_frames=30, every_frame=10, detect=False, confidence=0.95, num_workers=8, smart=False, expname=None, verbose=False): """Run NoScope's frame skipping + image difference detection on videos. Optionally, pass passing frames to a DNN object detector. Keyword Arguments: video_path {str} -- Path of a video or directory of videos diff_threshold {float} -- For the diff detector to fire (default: {1000.}) delta_frames {int} -- For diff detector: compare with the frame delta_frames ago (default: {30}) detect {bool} -- If true, run DNN on passing frames (default: {False}) every_frame {int} -- For frame skipping, run diff detector every `every_frame` (default: {1}) num_workers {int} -- Parallel workers (default: {4}) smart {bool} -- Use smart disk or not (default: {False}) expname {[type]} -- If not None, will store to DB with expname (default: {None}) Raises: ValueError: [description] """ if verbose: logzero.loglevel(logging.DEBUG) # expand paths if os.path.isfile(video_path): paths = [video_path] elif os.path.isdir(video_path): paths = list(recursive_glob(video_path, '*.mp4')) else: raise ValueError("Invalid: {}".format(video_path)) logger.info("Found {} files".format(len(paths))) # set CPU affinity assert num_workers == 1 or num_workers % 2 == 0, "Must give an even number for num_workers or 1: {}".format( num_workers) if num_workers > 1: cpuset = range(CPU_START[0], CPU_START[0] + num_workers / 2) + range( CPU_START[1], CPU_START[1] + num_workers / 2) else: cpuset = [ CPU_START[0], ] logger.info("cpuset: {}".format(cpuset)) psutil.Process().cpu_affinity(cpuset) # Setup and start workers context = Context() workers = [] for _ in range(num_workers): w = threading.Thread(target=worker, args=(context, diff_threshold, detect), kwargs={ 'targets': [ 'person', ], 'confidence': confidence }) # search for persons w.daemon = True w.start() workers.append(w) # Exclude preload time from measurement if smart: ss_client = SmartStorageClient( map_from_dir='/mnt/hdd/fast20/video/VIRAT/mp4', preload=True) tic = time.time() tic_cpu = time.clock() total_frames = 0 for path in paths: num_frames = get_num_video_frames(path) logger.info("Processing {} with {} frames".format(path, num_frames)) window = [] if smart: gen = smart_decoder(ss_client, path, context, every_frame) else: gen = cv2_decoder(path, context, every_frame) for i, (frame, frame_id) in enumerate(gen): window.append(frame) reference = window.pop(0) if frame_id > delta_frames else None item = Item('{}-{}'.format(path, frame_id)) item.array = frame context.q.put((item, reference)) logger.debug("Pushed {}".format(item.src)) if frame_id % 100 == 0: logger.info("Procssed {} frames, frame id {}, {}".format( i, frame_id, path)) total_frames += num_frames # push sentinels for _ in workers: context.q.put(None) logger.info("All frames pushed, waiting for queue join") context.q.join() for w in workers: w.join() elapsed = time.time() - tic elapsed_cpu = time.clock() - tic_cpu logger.info("Elapsed {:.2f} s, Elapsed CPU {:.2f} s".format( elapsed, elapsed_cpu)) logger.info(str(context.stats)) logger.info("Total frames: {}".format(total_frames)) keys_dict = { 'expname': expname, 'basedir': str(video_path), 'ext': 'video', 'num_workers': num_workers, 'hostname': this_hostname } vals_dict = { 'num_items': total_frames, # different from image because we have frame skipping 'avg_wall_ms': 1e3 * elapsed / total_frames, 'avg_cpu_ms': 1e3 * elapsed_cpu / total_frames, 'avg_mbyteps': context.stats['bytes_from_disk'] * 1e-6 / elapsed, } logger.info(str(keys_dict)) logger.info(str(vals_dict)) if expname is not None: sess = dbutils.get_session() dbutils.insert_or_update_one(sess, dbmodles.EurekaExp, keys_dict=keys_dict, vals_dict=vals_dict) sess.commit() sess.close()
class ImageDataset(torch.utils.data.Dataset): def __init__(self, files, context, transform=None, smart=False, sort_fie=False, image_loader=default_loader): self.files = files self.transform = transform self.smart = smart self.context = context self.ss_client = None self.sort_fie = sort_fie self.image_loader = image_loader # Acccording to https://pytorch.org/docs/stable/data.html#multi-process-data-loading # This data structure will be passed to multiprocessing if num_workers > 0. # So we defer creation of client after the process is created, # otherwise ZMQ doesn't work properly def __len__(self): return len(self.files) def __getitem__(self, idx): import cv2 tic_cpu = time.clock() if torch.is_tensor(idx): idx = idx.tolist() logger.debug("[{}] getitem {}".format(os.getpid(), idx)) if self.smart or self.sort_fie: # idx is ignored. Pop a path from the (pre-sorted) queue img_path = self.context.q.get() else: img_path = self.files[idx] # img_path = self.context.q.get() # initialize smart client on the first use if self.smart and self.ss_client is None: logger.info("[Worker {}] Creating a SmartStorageClient".format( torch.utils.data.get_worker_info().id)) self.ss_client = SmartStorageClient() # get decoded Image if self.smart: # PPM tic = time.time() arr = self.ss_client.read_decode(img_path) image = cv2.resize(arr, RESOL) # resize here rather than transform disk_read = arr.size elapsed = time.time() - tic logger.debug("Smart decode {:.3f} ms".format(1000 * elapsed)) else: image = self.image_loader(img_path) disk_read = os.path.getsize(img_path) # transform if self.transform: tic = time.time() image_tensor = self.transform(image) elapsed = time.time() - tic logger.debug("Transform {:.3f} ms".format(1000 * elapsed)) # high overhead locking with self.context.lock: elapsed_cpu = time.clock() - tic_cpu # logger.debug("[{}] Writing to global cotnext: {}, {}".format(os.getpid(), elapsed_cpu, disk_read)) self.context.stats['cpu_time'] += elapsed_cpu self.context.stats['bytes_from_disk'] += disk_read return image_tensor def __del__(self): logger.info("Destroying ImageDataset Worker")
def __init__(self, map_from_dir, map_to_ppm_dir): super(SmartReadFilter, self).__init__() self.ss_client = SmartStorageClient(map_from_dir, map_to_ppm_dir)
def __init__(self, map_from_dir, map_to_ppm_dir, min_faces=1): super(SmartFaceFilter, self).__init__(map_from_dir, min_faces) self.ss_client = SmartStorageClient(map_from_dir, map_to_ppm_dir) self.min_faces = 1