def inference(cfg): # # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video loaders. video_loader = loader.construct_loader(cfg, "test") # Create saver saver = Saver(cfg.DATA.PATH_TO_DATA_DIR, video_loader.dataset) model.eval() for i, (inputs, index) in tqdm(enumerate(video_loader), total=len(video_loader)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) index = index.cuda() feats = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: feats, index = du.all_gather([feats, index]) saver.save(feats, index) saver.merge()
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( test_loader.dataset.num_videos % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( test_loader.dataset.num_videos // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def __init__(self, cfg, gpu_id=None): """ Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py gpu_id (Optional[int]): GPU id. """ if cfg.NUM_GPUS: self.gpu_id = ( torch.cuda.current_device() if gpu_id is None else gpu_id ) # Build the video model and print model statistics. # self.model = build_model(cfg, gpu_id=gpu_id) self.model = build_model(cfg, gpu_id=None) self.model.eval() self.cfg = cfg if cfg.DETECTION.ENABLE: # self.object_detector = Detectron2Predictor(cfg, gpu_id=self.gpu_id) self.object_detector = Detectron2Predictor(cfg, gpu_id= None) logger.info("Start loading model weights.") cu.load_test_checkpoint(cfg, self.model) logger.info("Finish loading model weights")
def __init__(self, cfg): """ Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Build the video model and print model statistics. self.model = build_model(cfg) self.model.eval() self.cfg = cfg logger.info("Start loading model info") misc.log_model_info(self.model, cfg, use_train_input=False) logger.info("Start loading model weights") cu.load_test_checkpoint(cfg, self.model) logger.info("Finish loading model weights")
def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and cfg.TENSORBOARD.MODEL_VIS.ENABLE: # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, is_train=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") logger.info( "Visualize model for {} data points".format(len(vis_loader)) ) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if writer is not None: writer.close()
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) # Create meters for loss tracking test_meter = TrainMeter(test_loader.dataset.num_videos, cfg) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS ): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def main(): args = parser_args() print(args) cfg_file = args.cfg_file checkpoint_file = args.checkpoint save_checkpoint_file = args.save half_flag = args.half cfg = get_cfg() cfg.merge_from_file(cfg_file) cfg.TEST.CHECKPOINT_FILE_PATH = checkpoint_file print("simplifier model!\n") with torch.no_grad(): model = build_model(cfg) model.eval() cu.load_test_checkpoint(cfg, model) if half_flag: model.half() with open(save_checkpoint_file, 'wb') as file: torch.save({"model_state": model.state_dict()}, file)
def test(cfg): # Build model model = build_model(cfg) optimizer = optim.construct_optimizer(model, cfg) # load checkpoint start_epoch = cu.load_test_checkpoint(cfg, model) print("Load model epoch", start_epoch) # Build data loader test_loader = dataloader.construct_loader(cfg, "test") # Perform test results = perform_test(test_loader, model, cfg)
def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and (cfg.TENSORBOARD.MODEL_VIS.ENABLE or cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE): # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) model.eval() if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None if cfg.TENSORBOARD.PREDICTIONS_PATH != "": assert not cfg.DETECTION.ENABLE, "Detection is not supported." logger.info( "Visualizing class-level performance from saved results...") if writer is not None: with g_pathmgr.open(cfg.TENSORBOARD.PREDICTIONS_PATH, "rb") as f: preds, labels = pickle.load(f, encoding="latin1") writer.plot_eval(preds, labels) if cfg.TENSORBOARD.MODEL_VIS.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: assert ( not cfg.DETECTION.ENABLE ), "Detection task is currently not supported for Grad-CAM visualization." if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 1 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST)) elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 2 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST)) else: raise NotImplementedError( "Model arch {} is not in {}".format( cfg.MODEL.ARCH, cfg.MODEL.SINGLE_PATHWAY_ARCH + cfg.MODEL.MULTI_PATHWAY_ARCH, )) logger.info("Visualize model analysis for {} iterations".format( len(vis_loader))) # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE: logger.info("Visualize Wrong Predictions for {} iterations".format( len(vis_loader))) perform_wrong_prediction_vis(vis_loader, model, cfg) if writer is not None: writer.close()
def get_predictions(self): """ Predict and append prediction results to each box in each keyframe in `self.pred_boxes` dictionary. """ # Set random seed from configs. np.random.seed(self.cfg.RNG_SEED) torch.manual_seed(self.cfg.RNG_SEED) # Setup logging format. logging.setup_logging(self.cfg.OUTPUT_DIR) # Print config. logger.info("Run demo with config:") logger.info(self.cfg) assert (self.cfg.NUM_GPUS <= 1), "Cannot run demo visualization on multiple GPUs." # Build the video model and print model statistics. model = build_model(self.cfg) model.eval() logger.info("Start loading model info") misc.log_model_info(model, self.cfg, use_train_input=False) logger.info("Start loading model weights") cu.load_test_checkpoint(self.cfg, model) logger.info("Finish loading model weights") logger.info("Start making predictions for precomputed boxes.") for keyframe_idx, boxes_and_labels in tqdm.tqdm( self.pred_boxes.items()): inputs = self.get_input_clip(keyframe_idx) boxes = boxes_and_labels[0] boxes = torch.from_numpy(np.array(boxes)).float() box_transformed = scale_boxes( self.cfg.DATA.TEST_CROP_SIZE, boxes, self.display_height, self.display_width, ) # Pad frame index for each box. box_inputs = torch.cat( [ torch.full((box_transformed.shape[0], 1), float(0)), box_transformed, ], axis=1, ) if self.cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) box_inputs = box_inputs.cuda() preds = model(inputs, box_inputs) preds = preds.detach() if self.cfg.NUM_GPUS: preds = preds.cpu() boxes_and_labels[1] = preds
def test(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) de.bridge.set_bridge('native') gpu_id = 1 # sample_rate = 1 # resize_h = 270 # resize_w = 360 # augment = ['FiveCrop', 'TenCrop', 'None'][1] sample_rate = 1 resize_h = 270 resize_w = 360 augment = ['FiveCrop', 'TenCrop', 'None'][0] crop_h = cfg.DATA.TEST_CROP_SIZE # 256 crop_w = cfg.DATA.TEST_CROP_SIZE # 256 if 'SLOWFAST' in cfg.TEST.CHECKPOINT_FILE_PATH and 'I3D' not in cfg.TEST.CHECKPOINT_FILE_PATH: model_type = 'slowfast' feature_dim = 2304 elif 'SLOWFAST' not in cfg.TEST.CHECKPOINT_FILE_PATH and 'I3D' in cfg.TEST.CHECKPOINT_FILE_PATH: model_type = 'i3d' feature_dim = 2048 else: raise Exception('Invalid Model.') video_dir = cfg.DATA.PATH_TO_DATA_DIR if augment == 'FiveCrop': feature_dir = os.path.join( cfg.OUTPUT_DIR, 'feature_{}_{}x{}_{}x{}_{}_5'.format(model_type, resize_h, resize_w, crop_h, crop_w, sample_rate)) elif augment == 'TenCrop': feature_dir = os.path.join( cfg.OUTPUT_DIR, 'feature_{}_{}x{}_{}x{}_{}_10'.format(model_type, resize_h, resize_w, crop_h, crop_w, sample_rate)) elif augment == 'None': feature_dir = os.path.join( cfg.OUTPUT_DIR, 'feature_{}_{}x{}_{}_1'.format(model_type, resize_h, resize_w, sample_rate)) else: raise Exception('Invalid Augment.') norm_transform = transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD) if augment == 'FiveCrop': frame_transform = transforms.Compose([ transforms.Resize(size=(resize_h, resize_w)), transforms.FiveCrop(size=(crop_h, crop_w)), transforms.Lambda( lambda crops: [transforms.ToTensor()(crop) for crop in crops]), transforms.Lambda( lambda crops: [norm_transform(crop) for crop in crops]), transforms.Lambda(lambda crops: torch.stack(crops)) ]) elif augment == 'TenCrop': frame_transform = transforms.Compose([ transforms.Resize(size=(resize_h, resize_w)), transforms.TenCrop(size=(crop_h, crop_w)), transforms.Lambda( lambda crops: [transforms.ToTensor()(crop) for crop in crops]), transforms.Lambda( lambda crops: [norm_transform(crop) for crop in crops]), transforms.Lambda(lambda crops: torch.stack(crops)) ]) elif augment == 'None': frame_transform = transforms.Compose([ transforms.Resize(size=(resize_h, resize_w)), transforms.ToTensor(), norm_transform, transforms.Lambda(lambda img: img.unsqueeze(0)) ]) else: raise Exception('Invalid Augment.') # Build the video model and print model statistics. model = build_model(cfg) print(model) cu.load_test_checkpoint(cfg, model) model.eval() model.to(torch.device('cuda:{}'.format(gpu_id))) if not os.path.exists(feature_dir): os.makedirs(feature_dir) video_files = os.listdir(video_dir) video_files.sort() for video_file in video_files: video_name = video_file[:-4] video_file = os.path.join(video_dir, video_file) feature_file = '{}.npy'.format(video_name) if feature_file in os.listdir(feature_dir): print('Skipped.') continue feature_file = os.path.join(feature_dir, feature_file) print(video_file) print(feature_file) video_feature = [] vr = de.VideoReader(video_file, ctx=de.cpu(0)) frame_num = len(vr) video_meta = skvideo.io.ffprobe(video_file) assert (frame_num == int(video_meta['video']['@nb_frames'])) sample_idxs = np.arange(0, frame_num, sample_rate) clip_size = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE # cfg.DATA.NUM_FRAMES # cfg.DATA.SAMPLING_RATE # cfg.SLOWFAST.ALPHA frame_buffer = {} buffer_size = 128 with torch.no_grad(): for _, sample_idx in enumerate(tqdm(sample_idxs)): fast_pathway_idxs = np.arange( sample_idx - clip_size // 2, sample_idx - clip_size // 2 + clip_size, cfg.DATA.SAMPLING_RATE) fast_pathway_idxs[fast_pathway_idxs < 0] = 0 fast_pathway_idxs[fast_pathway_idxs > frame_num - 1] = frame_num - 1 assert (fast_pathway_idxs.size == cfg.DATA.NUM_FRAMES) fast_pathway_frames = [] for idx in fast_pathway_idxs: if idx not in frame_buffer: frame = vr[idx].asnumpy() #(540, 960, 3) frame = Image.fromarray(frame) frame = frame_transform(frame) frame = frame.to(torch.device( 'cuda:{}'.format(gpu_id))) if augment == 'FiveCrop': assert (frame.shape[0] == 5) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'TenCrop': assert (frame.shape[0] == 10) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'None': assert (frame.shape[0] == 1) assert (frame.shape[1] == 3) assert (frame.shape[2] == resize_h) assert (frame.shape[3] == resize_w) else: raise Exception('Invalid Augment.') frame_buffer[idx] = frame if len(frame_buffer) > buffer_size: frame_buffer.pop(min(list(frame_buffer.keys()))) fast_pathway_frames.append(frame_buffer[idx].unsqueeze(2)) fast_pathway_frames = torch.cat(fast_pathway_frames, 2) if model_type == 'slowfast': slow_pathway_idxs = fast_pathway_idxs[::cfg.SLOWFAST.ALPHA] assert (slow_pathway_idxs.size == cfg.DATA.NUM_FRAMES / cfg.SLOWFAST.ALPHA) slow_pathway_frames = [] for idx in slow_pathway_idxs: if idx not in frame_buffer: frame = vr[idx].asnumpy() #(540, 960, 3) frame = Image.fromarray(frame) frame = frame_transform(frame) frame = frame.to( torch.device('cuda:{}'.format(gpu_id))) if augment == 'FiveCrop': assert (frame.shape[0] == 5) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'TenCrop': assert (frame.shape[0] == 10) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'None': assert (frame.shape[0] == 1) assert (frame.shape[1] == 3) assert (frame.shape[2] == resize_h) assert (frame.shape[3] == resize_w) else: raise Exception('Invalid Augment.') frame_buffer[idx] = frame if len(frame_buffer) > buffer_size: frame_buffer.pop(min(list( frame_buffer.keys()))) slow_pathway_frames.append( frame_buffer[idx].unsqueeze(2)) slow_pathway_frames = torch.cat(slow_pathway_frames, 2) if model_type == 'slowfast': frame_feature = model( [slow_pathway_frames, fast_pathway_frames], extract_feature=True) elif model_type == 'i3d': frame_feature = model([fast_pathway_frames], extract_feature=True) else: raise Exception('Invalid Model.') # (Pdb) fast_pathway_frames.shape # torch.Size([5, 3, 32, 256, 256]) # (Pdb) slow_pathway_frames.shape # torch.Size([5, 3, 8, 256, 256]) assert (frame_feature.shape[1] == feature_dim) if augment == 'FiveCrop': assert (frame_feature.shape[0] == 5) elif augment == 'TenCrop': assert (frame_feature.shape[0] == 10) elif augment == 'None': assert (frame_feature.shape[0] == 1) else: raise Exception('Invalid Augment.') # slowfast is for 30 fps! be careful! # re-extract all! frame_feature = torch.unsqueeze(frame_feature, dim=0) frame_feature = frame_feature.cpu().numpy() video_feature.append(frame_feature) video_feature = np.concatenate(video_feature, axis=0) print(video_feature.shape) np.save(feature_file, video_feature)
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) out_str_prefix = "lin" if cfg.MODEL.DETACH_FINAL_FC else "" if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) if (cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel" and cfg.CONTRASTIVE.KNN_ON): train_loader = loader.construct_loader(cfg, "train") out_str_prefix = "knn" if hasattr(model, "module"): model.module.init_knn_labels(train_loader) else: model.init_knn_labels(train_loader) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( test_loader.dataset.num_videos % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( test_loader.dataset.num_videos // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES if not cfg.TASK == "ssl" else cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close() result_string = ( "_a{}{}{} Top1 Acc: {} Top5 Acc: {} MEM: {:.2f} dataset: {}{}" "".format( out_str_prefix, cfg.TEST.DATASET[0], test_meter.stats["top1_acc"], test_meter.stats["top1_acc"], test_meter.stats["top5_acc"], misc.gpu_mem_usage(), cfg.TEST.DATASET[0], cfg.MODEL.NUM_CLASSES, )) logger.info("testing done: {}".format(result_string)) return result_string
def test(cfg): """ Perform multi-view testing on the pretrained audio model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the audio model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg) cu.load_test_checkpoint(cfg, model) # Create audio testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) assert ( len(test_loader.dataset) % cfg.TEST.NUM_ENSEMBLE_VIEWS == 0 ) # Create meters for multi-view testing. if cfg.TEST.DATASET == 'epickitchens': test_meter = EPICTestMeter( len(test_loader.dataset) // cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.ENSEMBLE_METHOD, ) else: test_meter = TestMeter( len(test_loader.dataset) // cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.MODEL.NUM_CLASSES[0], len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS ): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter, preds, preds_clips, labels, metadata = perform_test(test_loader, model, test_meter, cfg, writer) if du.is_master_proc(): if cfg.TEST.DATASET == 'epickitchens': results = {'verb_output': preds[0], 'noun_output': preds[1], 'narration_id': metadata} scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) file_path = os.path.join(scores_path, cfg.EPICKITCHENS.TEST_SPLIT+'.pkl') pickle.dump(results, open(file_path, 'wb')) else: if cfg.TEST.DATASET == 'vggsound': get_stats(preds, labels) results = {'scores': preds, 'labels': labels} scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) file_path = os.path.join(scores_path, 'test.pkl') pickle.dump(results, open(file_path, 'wb')) if writer is not None: writer.close()
inputs = inputs.cuda(non_blocking=True) out = model(inputs) slow_ft = out[0].detach().cpu().numpy() fast_ft = out[1].detach().cpu().numpy() slow_dset[i_batch * batch_size:(i_batch + 1) * batch_size] = slow_ft fast_dset[i_batch * batch_size:(i_batch + 1) * batch_size] = fast_ft slow_h5.close() fast_h5.close() if __name__ == "__main__": args = parse_args() cfg = load_config(args) logger = logging.get_logger(__name__) logging.setup_logging(cfg.OUTPUT_DIR) use_gpu = cfg.NUM_GPUS > 0 #Set model model = SlowFast(cfg) if use_gpu: cur_device = torch.cuda.current_device() model = model.cuda(device=cur_device) cu.load_test_checkpoint(cfg, model) model.forward = forward.__get__(model, SlowFast) model.eval() #Proccess datasets root = cfg.DATA.PATH_TO_DATA_DIR gen_dataset(cfg, 'train', root) gen_dataset(cfg, 'val', root)
def test(cfg): """ Perform multi-view testing/feature extraction on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) vid_root = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.DATA.PATH_PREFIX) videos_list_file = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, "vid_list.csv") print("Loading Video List ...") with open(videos_list_file) as f: videos = sorted([x.strip() for x in f.readlines() if len(x.strip()) > 0]) print("Done") print("----------------------------------------------------------") if cfg.DATA.READ_VID_FILE: rejected_vids = [] print("{} videos to be processed...".format(len(videos))) print("----------------------------------------------------------") start_time = time.time() for vid_no, vid in enumerate(videos): # Create video testing loaders. path_to_vid = os.path.join(vid_root, os.path.split(vid)[0]) vid_id = os.path.split(vid)[1] if cfg.DATA.READ_VID_FILE: try: _ = VideoFileClip( os.path.join(path_to_vid, vid_id) + cfg.DATA.VID_FILE_EXT, audio=False, fps_source="fps", ) except Exception as e: print("{}. {} cannot be read with error {}".format(vid_no, vid, e)) print("----------------------------------------------------------") rejected_vids.append(vid) continue out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0]) out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES) if os.path.exists(os.path.join(out_path, out_file)): print("{}. {} already exists".format(vid_no, out_file)) print("----------------------------------------------------------") continue print("{}. Processing {}...".format(vid_no, vid)) dataset = VideoSet( cfg, path_to_vid, vid_id, read_vid_file=cfg.DATA.READ_VID_FILE ) test_loader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, sampler=None, num_workers=cfg.DATA_LOADER.NUM_WORKERS, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, drop_last=False, ) # Perform multi-view test on the entire dataset. feat_arr = perform_inference(test_loader, model, cfg) os.makedirs(out_path, exist_ok=True) np.save(os.path.join(out_path, out_file), feat_arr) print("Done.") print("----------------------------------------------------------") if cfg.DATA.READ_VID_FILE: print("Rejected Videos: {}".format(rejected_vids)) end_time = time.time() hours, minutes, seconds = calculate_time_taken(start_time, end_time) print( "Time taken: {} hour(s), {} minute(s) and {} second(s)".format( hours, minutes, seconds ) ) print("----------------------------------------------------------")