def test_dataset_cache_speedup(): dataset = ExampleDataset(0, 5).map(artificial_slowdown).cache() with torchfunc.Timer() as timer: index_is_sample(dataset) assert timer.checkpoint() > 5 index_is_sample(dataset) assert timer.checkpoint() < 0.2
def test_timer_context_manager(): with torchfunc.Timer() as timer: time.sleep(1) last_in_block = timer.checkpoint() # register checkpoint last_time = timer.checkpoint() time.sleep(1) assert last_time == timer.checkpoint() == timer.time() assert last_in_block != last_time
def test_pickle_cache_slowdown(): with torchdatasets.cachers.Pickle(pathlib.Path("./disk")) as pickler: dataset = ExampleDataset(0, 5).map(artificial_slowdown).cache(pickler) with torchfunc.Timer() as timer: index_is_sample(dataset) assert timer.checkpoint() > 5 index_is_sample(dataset) assert timer.checkpoint() < 0.2
def test_memory_cache(): dataset = (ExampleTensorDataset(1000).map(lambda tensor: tensor * 2).map( lambda tensor: tensor + tensor).cache(torchdatasets.cachers.Memory())) dataloader = torch.utils.data.DataLoader(dataset, num_workers=0, batch_size=10) with torchfunc.Timer() as timer: for _ in dataloader: pass initial_pass = timer.checkpoint() for _ in dataloader: pass cached_pass = timer.checkpoint() assert cached_pass < initial_pass
def shared_subprocess(cache, refs): cacher = torchdatasets.cachers.Memory(cache) if id(cacher.cache) in refs.keys(): refs[id(cacher.cache)] += 1 dataset = (ExampleTensorDataset(1000).map(lambda tensor: tensor * 2).map( lambda tensor: tensor + tensor).cache(cacher)) dataloader = torch.utils.data.DataLoader(dataset, num_workers=4, batch_size=10) with torchfunc.Timer() as timer: for _ in dataloader: pass initial_pass = timer.checkpoint() for _ in dataloader: pass cached_pass = timer.checkpoint() assert cached_pass < initial_pass assert len(cacher.cache) > 0
], [ torch.zeros([1, 64, 28, 28]).cuda(), torch.zeros([1, 128, 14, 14]).cuda(), torch.zeros([1, 128, 14, 14]).cuda(), torch.zeros([1, 128, 14, 14]).cuda(), torch.zeros([1, 128, 14, 14]).cuda(), torch.zeros([1, 128, 14, 14]).cuda(), ], [ torch.zeros([1, 128, 14, 14]).cuda(), torch.zeros([1, 256, 7, 7]).cuda(), torch.zeros([1, 256, 7, 7]).cuda(), torch.zeros([1, 256, 7, 7]).cuda(), ]] with torchfunc.Timer() as timer: for i in range(100): _, shift_buffer = model([torch.rand(1, 3, 1, 224, 224).cuda()], shift_buffer=shift_buffer) # model([torch.rand(1,3,8,224,224), torch.rand(1,3,32,224,224)]) total += timer.checkpoint() print(total, total / 100.0) elif cfg.MODEL.MODEL_NAME == 'ResNet' or cfg.MODEL.MODEL_NAME == 'ResNetTSM': with torchfunc.Timer() as timer: for i in range(100): model([torch.rand(1, 3, 8, 224, 224).cuda()]) total += timer.checkpoint() print(total, total / 100.0) # misc.log_model_info(model, cfg, is_train = False)
def perform_test(test_loader, model, test_meter, cfg, writer=None): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter object, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable eval mode. model.eval() test_meter.iter_tic() inference_time = 0 result = [] with open(os.path.join(cfg.DATA.PATH_TO_DATA_DIR, 'class_list.txt'), 'r+') as f: cls_label = [line.strip() for line in f.readlines()] for cur_iter, (inputs, labels, video_idx, meta, path) in enumerate(test_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. labels = labels.cuda() video_idx = video_idx.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. with torchfunc.Timer() as timer: preds = model(inputs, meta["boxes"]) inference_time += timer.checkpoint() ori_boxes = meta["ori_boxes"] metadata = meta["metadata"] preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach() ori_boxes = (ori_boxes.detach().cpu() if cfg.NUM_GPUS else ori_boxes.detach()) metadata = (metadata.detach().cpu() if cfg.NUM_GPUS else metadata.detach()) if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds, ori_boxes, metadata) test_meter.log_iter_stats(None, cur_iter) else: # Perform the forward pass. with torchfunc.Timer() as timer: preds = model(inputs) inference_time += timer.checkpoint() # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, video_idx = du.all_gather( [preds, labels, video_idx]) if cfg.NUM_GPUS: preds = preds.cpu() labels = labels.cpu() video_idx = video_idx.cpu() # Output to file if du.is_master_proc(): for i in range(len(preds)): result.append( f'{path[0]}, {cls_label[labels.detach().cpu().numpy()[i]]}, {cls_label[preds.detach().cpu().numpy().argmax(axis=1)[i]]}' ) test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds.detach(), labels.detach(), video_idx.detach()) test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. if not cfg.DETECTION.ENABLE: all_preds = test_meter.video_preds.clone().detach() all_labels = test_meter.video_labels if cfg.NUM_GPUS: all_preds = all_preds.cpu() all_labels = all_labels.cpu() if writer is not None: writer.plot_eval(preds=all_preds, labels=all_labels) if cfg.TEST.SAVE_RESULTS_PATH != "": save_path = os.path.join(cfg.OUTPUT_DIR, cfg.TEST.SAVE_RESULTS_PATH) with PathManager.open(save_path, "wb") as f: pickle.dump([all_labels, all_labels], f) logger.info("Successfully saved prediction results to {}".format( save_path)) test_meter.finalize_metrics() test_meter.reset() with open(os.path.join(cfg.OUTPUT_DIR, 'result.txt'), 'w+') as f: f.write('\n'.join(result)) logger.info('=> Mean inference time for %d video clips: %.3fs' % (len(test_loader), inference_time / len(test_loader)))