def build_model(cfg): """ Build slowfast model Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Build the video model and print model statistics. model = model_builder.build_model(cfg) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") print("Testing with random initialization. Only for debugging.") return model
def load_checkpoint(cfg, model): # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2="caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], )
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS): writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR) else: writer = None if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG: tags = [] if 'TAGS' in cfg and cfg.TAGS !=[]: tags=list(cfg.TAGS) neptune.set_project('Serre-Lab/motion') ###################### overrides = sys.argv[1:] overrides_dict = {} for i in range(len(overrides)//2): overrides_dict[overrides[2*i]] = overrides[2*i+1] overrides_dict['dir'] = cfg.OUTPUT_DIR ###################### if 'NEP_ID' in cfg and cfg.NEP_ID != "": session = Session() project = session.get_project(project_qualified_name='Serre-Lab/motion') nep_experiment = project.get_experiments(id=cfg.NEP_ID)[0] else: nep_experiment = neptune.create_experiment (name=cfg.NAME, params=overrides_dict, tags=tags) else: nep_experiment=None # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(num_gpus=cfg.NUM_GPUS): misc.log_model_info(model, cfg, is_train=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint( last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer ) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep_experiment, cfg) # Compute precise BN stats. # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: # calculate_and_update_precise_bn( # train_loader, model, cfg.BN.NUM_BATCHES_PRECISE # ) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): eval_epoch(val_loader, model, val_meter, cur_epoch, nep_experiment, cfg) if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG: nep_experiment.log_metric('epoch', cur_epoch)
def demo(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) model.eval() misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2="caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) if cfg.DETECTION.ENABLE: # Load object detector from detectron2 dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG dtron2_cfg = get_cfg() dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file)) dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5 dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS object_predictor = DefaultPredictor(dtron2_cfg) # Load the labels of AVA dataset with open(cfg.DEMO.LABEL_FILE_PATH) as f: labels = f.read().split('\n')[:-1] palette = np.random.randint(64, 128, (len(labels), 3)).tolist() boxes = [] else: # Load the labels of Kinectics-400 dataset labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH) labels = labels_df['name'].values frame_provider = VideoReader(cfg) seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE frames = [] pred_labels = [] s = 0. for able_to_read, frame in frame_provider: if not able_to_read: # when reaches the end frame, clear the buffer and continue to the next one. frames = [] continue if len(frames) != seq_len: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed) frames.append(frame_processed) if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1: mid_frame = frame if len(frames) == seq_len: start = time() if cfg.DETECTION.ENABLE: outputs = object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes( cfg.DATA.TEST_CROP_SIZE, pred_boxes, frame_provider.display_height, frame_provider.display_width) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) inputs = torch.as_tensor(frames).float() inputs = inputs / 255.0 # Perform color normalization. inputs = inputs - torch.tensor(cfg.DATA.MEAN) inputs = inputs / torch.tensor(cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs.unsqueeze(0) # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape)) # Sample frames for the slow pathway. index = torch.linspace(0, fast_pathway.shape[2] - 1, fast_pathway.shape[2] // cfg.SLOWFAST.ALPHA).long() slow_pathway = torch.index_select(fast_pathway, 2, index) # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape)) inputs = [slow_pathway, fast_pathway] """ # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) """ # Perform the forward pass. if cfg.DETECTION.ENABLE: # When there is nothing in the scene, # use a dummy variable to disable all computations below. if not len(boxes): preds = torch.tensor([]) else: preds = model(inputs, boxes) else: preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] if cfg.DETECTION.ENABLE: # This post processing was intendedly assigned to the cpu since my laptop GPU # RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend # to change this section to make CUDA does the processing. preds = preds.cpu().detach().numpy() pred_masks = preds > .1 label_ids = [ np.nonzero(pred_mask)[0] for pred_mask in pred_masks ] pred_labels = [[ labels[label_id] for label_id in perbox_label_ids ] for perbox_label_ids in label_ids] # I'm unsure how to detectron2 rescales boxes to image original size, so I use # input boxes of slowfast and rescale back it instead, it's safer and even if boxes # was not rescaled by cv2_transform.rescale_boxes, it still works. boxes = boxes.cpu().detach().numpy() ratio = np.min([ frame_provider.display_height, frame_provider.display_width ]) / cfg.DATA.TEST_CROP_SIZE boxes = boxes[:, 1:] * ratio else: ## Option 1: single label inference selected from the highest probability entry. # label_id = preds.argmax(-1).cpu() # pred_label = labels[label_id] # Option 2: multi-label inferencing selected from probability entries > threshold label_ids = torch.nonzero( preds.squeeze() > .1).reshape(-1).cpu().detach().numpy() pred_labels = labels[label_ids] logger.info(pred_labels) if not list(pred_labels): pred_labels = ['Unknown'] # # option 1: remove the oldest frame in the buffer to make place for the new one. # frames.pop(0) # option 2: empty the buffer frames = [] s = time() - start if cfg.DETECTION.ENABLE and pred_labels and boxes.any(): for box, box_labels in zip(boxes.astype(int), pred_labels): cv2.rectangle(frame, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), thickness=2) label_origin = box[:2] for label in box_labels: label_origin[-1] -= 5 (label_width, label_height), _ = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2) cv2.rectangle(frame, (label_origin[0], label_origin[1] + 5), (label_origin[0] + label_width, label_origin[1] - label_height - 5), palette[labels.index(label)], -1) cv2.putText(frame, label, tuple(label_origin), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1) label_origin[-1] -= label_height + 5 if not cfg.DETECTION.ENABLE: # Display predicted labels to frame. y_offset = 50 cv2.putText(frame, 'Action:', (10, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) for pred_label in pred_labels: y_offset += 30 cv2.putText(frame, '{}'.format(pred_label), (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display prediction speed cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (10, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display the frame cv2.imshow('SlowFast', frame) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break frame_provider.clean()
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) assert (len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def demo(cfg, backbone): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Build the video model and print model statistics. model = model_builder.build_model(cfg) model.eval() misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2= "caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) darknetlib_path = '/home/ubuntu/hanhbd/SlowFast/detector/libdarknet.so' config_path = '/home/ubuntu/hanhbd/SlowFast/detector/yolov4.cfg' meta_path = '/home/ubuntu/hanhbd/SlowFast/detector/coco.data' classes_path = '/home/ubuntu/hanhbd/SlowFast/detector/coco.names' weight_path = '/home/ubuntu/hanhbd/SlowFast/detector/yolov4.weights' if backbone == 'yolo': object_predictor = YOLO.get_instance(darknetlib_path, config_path, meta_path, classes_path, weight_path) else: dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG dtron2_cfg = get_cfg() dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file)) dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5 dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS object_predictor = DefaultPredictor(dtron2_cfg) with open(cfg.DEMO.LABEL_FILE_PATH) as f: labels = f.read().split('\n')[:-1] palette = np.random.randint(64, 128, (len(labels), 3)).tolist() count_xxx = 0 seq_len = cfg.DATA.NUM_FRAMES*cfg.DATA.SAMPLING_RATE frames = [] org_frames = [] mid_frame = None pred_labels = [] draw_imgs = [] cap = cv2.VideoCapture(cfg.DEMO.DATA_SOURCE) was_read, frame = cap.read() display_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) display_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) fourcc = cv2.VideoWriter_fourcc(*'DIVX') videowriter = cv2.VideoWriter('./result/testset_fighting_05.avi',fourcc, fps, (display_width,display_height)) while was_read : was_read, frame = cap.read() if not was_read: videowriter.release() break if len(frames) != seq_len: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed) frames.append(frame_processed) org_frames.append(frame) else: #predict all person box in all frame start = time() mid_frame = org_frames[seq_len//2 - 2] # just draw half of number frame because we will use slide = 1/2 length of sequence if cfg.DETECTION.ENABLE and len(draw_imgs) == 0: for idx in range(seq_len//2 - 1): image = org_frames[idx] boxes = detector(object_predictor , image, backbone, cfg , display_height, display_width ) # boxes = object_predictor.detect_image(img) # boxes = torch.as_tensor(boxes).float().cuda() boxes = torch.cat([torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) boxes = boxes.cpu().detach().numpy() if backbone == 'yolo': boxes = boxes[:, 1:] else: ratio = np.min( [display_height, display_width] ) / cfg.DATA.TEST_CROP_SIZE boxes = boxes[:, 1:] * ratio for box in boxes: xmin, ymin, xmax, ymax = box cv2.rectangle(image, (xmin, ymin), (xmax , ymax), (0, 255, 0), thickness=2) draw_imgs.append(image) # detect box in mid frame if cfg.DETECTION.ENABLE: boxes = detector(object_predictor , mid_frame, backbone, cfg , display_height, display_width ) boxes = torch.cat([torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) inputs = torch.from_numpy(np.array(frames)).float() inputs = inputs / 255.0 # Perform color normalization. inputs = inputs - torch.tensor(cfg.DATA.MEAN) inputs = inputs / torch.tensor(cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs.unsqueeze(0) # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) # Sample frames for the slow pathway. index = torch.linspace(0, fast_pathway.shape[2] - 1, fast_pathway.shape[2]//cfg.SLOWFAST.ALPHA).long() slow_pathway = torch.index_select(fast_pathway, 2, index) inputs = [slow_pathway, fast_pathway] # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # use a dummy variable to disable all computations below. if not len(boxes): preds = torch.tensor([]) else: preds = model(inputs, boxes) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] # post processing preds = preds.cpu().detach().numpy() pred_masks = preds > .1 label_ids = [np.nonzero(pred_mask)[0] for pred_mask in pred_masks] pred_labels = [ [labels[label_id] for label_id in perbox_label_ids] for perbox_label_ids in label_ids ] print(pred_labels) boxes = boxes.cpu().detach().numpy() if backbone == 'yolo': boxes = boxes[:, 1:] else: ratio = np.min( [display_height, display_width] ) / cfg.DATA.TEST_CROP_SIZE boxes = boxes[:, 1:] * ratio # draw result on mid frame if pred_labels and boxes.any(): for box, box_labels in zip(boxes.astype(int), pred_labels): xmin, ymin, xmax, ymax = box cv2.rectangle(mid_frame, (xmin, ymin), (xmax , ymax), (0, 255, 0), thickness=2) label_origin = box[:2] for label in box_labels: label_origin[-1] -= 5 (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2) cv2.rectangle( mid_frame, (label_origin[0], label_origin[1] + 5), (label_origin[0] + label_width, label_origin[1] - label_height - 5), palette[labels.index(label)], -1 ) cv2.putText( mid_frame, label, tuple(label_origin), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1 ) label_origin[-1] -= label_height + 5 # append mid frame to the draw array draw_imgs.append(mid_frame) # write image to videos for img_ in draw_imgs: videowriter.write(img_) print("time process", (time() - start) /64 ) # clean the buffer of frames and org_frames with slide 1/2 seq_len # frames = frames[seq_len//2 - 1:] # org_frames = org_frames[seq_len//2 - 1:] frames = frames[1:] org_frames = org_frames[1:] draw_imgs = draw_imgs[-1:] count_xxx += 1
def test(cfg, cnt=-1): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # # Perform multi-view test on the entire dataset. scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) filename_root = cfg.EPICKITCHENS.TEST_LIST.split('.')[0] if cnt >= 0: file_name = '{}_{}_{}.pkl'.format(filename_root, cnt, cfg.MODEL.MODEL_NAME) else: file_name = '{}_{}_{}.pkl'.format(filename_root, 'test_only', cfg.MODEL.MODEL_NAME) file_path = os.path.join(scores_path, file_name) logger.info(file_path) # Print config. # if cnt < 0: # logger.info("Test with config:") # logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if cfg.EPICKITCHENS.USE_BBOX: model.module.load_weight_slowfast() # if du.is_master_proc(): # misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. if cfg.TEST.EXTRACT_FEATURES_MODE != "" and cfg.TEST.EXTRACT_FEATURES_MODE in ["test","train","val"]: test_loader = loader.construct_loader(cfg, cfg.TEST.EXTRACT_FEATURES_MODE) else: test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0 ) # Create meters for multi-view testing. if cfg.TEST.DATASET == 'epickitchens': test_meter = EPICTestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) else: test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) pickle.dump([], open(file_path, 'wb+')) if cfg.TEST.EXTRACT_FEATURES: preds, labels, metadata, x_feat_list = perform_test(test_loader, model, test_meter, cfg) else: preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg) if du.is_master_proc(): if cfg.TEST.DATASET == 'epickitchens': results = {'verb_output': preds[0], 'noun_output': preds[1], 'verb_gt': labels[0], 'noun_gt': labels[1], 'narration_id': metadata} scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) pickle.dump(results, open(file_path, 'wb')) if cfg.TEST.EXTRACT_FEATURES: pid = cfg.EPICKITCHENS.FEATURE_VID.split("_")[0] if not os.path.exists(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid)): os.mkdir(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid)) if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: arr_slow = torch.cat(x_feat_list[0], dim=0).numpy() arr_fast = torch.cat(x_feat_list[1], dim=0).numpy() print(arr_slow.shape, arr_fast.shape) fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.pkl'.format(cfg.EPICKITCHENS.FEATURE_VID)) with open(fpath_feat,'wb+') as f: pickle.dump([arr_slow, arr_fast], f) elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.npy'.format(cfg.EPICKITCHENS.FEATURE_VID)) with open(fpath_feat,'wb+') as f: arr = torch.cat(x_feat_list, dim=0).numpy() print(arr.shape) np.save(f, arr)
def main(): """ Main function to spawn the train and test process. """ args = parse_args() cfg = load_config(args) last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cfg.NUM_GPUS = 1 # fix single gpu to demo model = model_builder.build_model(cfg) cu.load_checkpoint(last_checkpoint, model, False) path = args.path cat = {1: 'walking', 2: 'standing', 3: 'rising', 4: 'lying', 5: 'falling'} model.eval().cuda() with torch.no_grad(): for inputs in load(path, cfg.DATA.TEST_CROP_SIZE, False, cfg.DATA.MEAN, cfg.DATA.STD, cfg.SLOWFAST.ALPHA): if isinstance(inputs, (list, tuple)) and isinstance( inputs[0], list) and torch.is_tensor(inputs[0][0]): data, frame = inputs startTime = time.time() for i in range(len(data)): data[i] = data[i].cuda(non_blocking=True) results = model(data) endTime = time.time() - startTime print(endTime) scores = results[0].get_field('scores') index = scores > 0.3 results = results[0][index] bbox = results.bbox.int() scores = results.get_field("scores").tolist() labels = results.get_field("labels").tolist() labels = [cat[i] for i in labels] # bbox = results[0].bbox.int() # print(data[0].shape) # print(data[1].shape) # frame = data[1][0, :, -1].permute(1, 2, 0).cpu().numpy() # print(frame.shape) template = "{}: {:.2f}" if bbox.shape[0] > 0: for box, score, label in zip(bbox, scores, labels): x, y = box[:2] s = template.format(label, score) top_left, bottom_right = box[:2].tolist( ), box[2:].tolist() frame = cv2.rectangle(frame, tuple(top_left), tuple(bottom_right), (255, 0, 0), 2) frame = cv2.putText(frame, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1) cv2.imshow('show', frame) else: cv2.imshow('show', frame) cv2.waitKey(5) # print(results[0].bbox) # print(results[0].get_field('scores')) else: break
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0 ) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg)
def demo(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) model.eval() misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2="caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) # Load the labels of Kinectics-400 dataset labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH) labels = labels_df['name'].values img_provider = VideoReader(cfg) frames = [] # # Option 1 # pred_label = '' # Option 2 pred_labels = [] s = 0. for able_to_read, frame in img_provider: if not able_to_read: # when reaches the end frame, clear the buffer and continue to the next one. frames = [] continue if len(frames) != cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(256, frame_processed) frames.append(frame_processed) if len(frames) == cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE: start = time() # Perform color normalization. inputs = torch.tensor(frames).float() inputs = inputs / 255.0 inputs = inputs - torch.tensor(cfg.DATA.MEAN) inputs = inputs / torch.tensor(cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs[None, :, :, :, :] # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) logger.info('fast_pathway.shape={}'.format(fast_pathway.shape)) # Sample frames for the slow pathway. index = torch.linspace(0, fast_pathway.shape[2] - 1, fast_pathway.shape[2] // cfg.SLOWFAST.ALPHA).long() slow_pathway = torch.index_select(fast_pathway, 2, index) logger.info('slow_pathway.shape={}'.format(slow_pathway.shape)) inputs = [slow_pathway, fast_pathway] # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Perform the forward pass. preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] ## Option 1: single label inference selected from the highest probability entry. # label_id = preds.argmax(-1).cpu() # pred_label = labels[label_id] # Option 2: multi-label inferencing selected from probability entries > threshold label_ids = torch.nonzero( preds.squeeze() > .1).reshape(-1).cpu().detach().numpy() pred_labels = labels[label_ids] logger.info(pred_labels) if not list(pred_labels): pred_labels = ['Unknown'] # remove the oldest frame in the buffer to make place for the new one. # frames.pop(0) frames = [] s = time() - start # #************************************************************ # # Option 1 # #************************************************************ # # Display prediction speed to frame # cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (20, 30), # fontFace=cv2.FONT_HERSHEY_SIMPLEX, # fontScale=1, color=(0, 235, 0), thickness=3) # # Display predicted label to frame. # cv2.putText(frame, 'Action: {}'.format(pred_label), (20, 60), # fontFace=cv2.FONT_HERSHEY_SIMPLEX, # fontScale=1, color=(0, 235, 0), thickness=3) #************************************************************ # Option 2 #************************************************************ # Display prediction speed to frame cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (20, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 235, 0), thickness=3) # Display predicted labels to frame. y_offset = 60 cv2.putText(frame, 'Action:', (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 235, 0), thickness=3) for pred_label in pred_labels: y_offset += 30 cv2.putText(frame, '{}'.format(pred_label), (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 235, 0), thickness=3) # Display the frame cv2.imshow('SlowFast', frame) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break img_provider.clean()
def test(cfg): """ Perform multi-view testing/feature extraction on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: raise NotImplementedError("Unknown way to load checkpoint.") vid_root = cfg.DATA.PATH_TO_DATA_DIR videos_list_file = os.path.join(vid_root, "vid_list.csv") print("Loading Video List ...") with open(videos_list_file) as f: videos = sorted( [x.strip() for x in f.readlines() if len(x.strip()) > 0]) print("Done") print("----------------------------------------------------------") print("{} videos to be processed...".format(len(videos))) print("----------------------------------------------------------") start_time = time.time() for vid in videos: # Create video testing loaders. path_to_vid = os.path.join(vid_root, os.path.split(vid)[0]) vid_id = os.path.split(vid)[1] out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0]) out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES) if os.path.exists(os.path.join(out_path, out_file)): print("{} already exists".format(out_file)) continue print("Processing {}...".format(vid)) dataset = VideoSet(cfg, path_to_vid, vid_id) test_loader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, sampler=None, num_workers=cfg.DATA_LOADER.NUM_WORKERS, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, drop_last=False, ) # Perform multi-view test on the entire dataset. feat_arr = multi_view_test(test_loader, model, cfg) os.makedirs(out_path, exist_ok=True) np.save(os.path.join(out_path, out_file), feat_arr) print("Done.") print("----------------------------------------------------------") end_time = time.time() hours, minutes, seconds = calculate_time_taken(start_time, end_time) print("Time taken: {} hour(s), {} minute(s) and {} second(s)".format( hours, minutes, seconds)) print("----------------------------------------------------------")
def feature_extract(kwargs): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ cfg = kwargs.pop('cfg') path_to_video_list = kwargs.pop('path_to_video_list') path_to_video_dir = kwargs.pop('path_to_video_dir') path_to_feat_dir = kwargs.pop('path_to_feat_dir') num_features = kwargs.pop('num_features') if not os.path.isdir(path_to_feat_dir): os.makedirs(path_to_feat_dir) # Setup logging format. logging.setup_logging() # Print config. logger.info("Extract feature with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg, feature_extraction=True) if du.is_master_proc(): misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Extracting features with random initialization. Only for debugging.") feature_extract_fn = perform_bbox_feature_extract if cfg.DETECTION.ENABLE else perform_feature_extract # Create video feature extraction loaders. if osp.isfile(path_to_video_list): path_to_videos = [] with open(path_to_video_list, 'r') as f: video_list = json.load(f) for video_name in video_list: path_to_video = osp.join(path_to_video_dir, video_name) if osp.isfile(path_to_video): path_to_videos.append(path_to_video) else: path_to_videos = glob.glob(osp.join(path_to_video_dir, '*')) for video_idx, path_to_video in enumerate(tqdm(path_to_videos)): path_to_feature = osp.join(path_to_feat_dir, osp.splitext(osp.basename(path_to_video))[0] + '.json') if osp.isfile(path_to_feature) and json.load(open(path_to_feature))['num_features'] == num_features: continue video_extraction_loader = loader.construct_loader(cfg, path_to_video) video_data = { 'num_features': 0, 'video_features': {} } if len(video_extraction_loader) > 0: video_features = feature_extract_fn(video_extraction_loader, model, cfg) assert all([feature is not None for feature in video_features]), 'Missing some features !' video_data['num_features'] = len(video_features) video_data['video_features'] = video_features if video_data['num_features'] != num_features: print('Warning! Video %s has %d features.' % (path_to_video, video_data['num_features'])) with open(path_to_feature, 'w') as f: json.dump(video_data, f)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Init multigrid. multigrid = None if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE: multigrid = MultigridSchedule() cfg = multigrid.init_multigrid(cfg) if cfg.MULTIGRID.LONG_CYCLE: cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Create a GradScaler for mixed precision training scaler = torch.cuda.amp.GradScaler(enabled=cfg.TRAIN.MIXED_PRECISION) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR, task=cfg.TASK) if last_checkpoint is not None: checkpoint_epoch = cu.load_checkpoint( last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer, scaler if cfg.TRAIN.MIXED_PRECISION else None, ) start_epoch = checkpoint_epoch + 1 elif "ssl_eval" in cfg.TASK: last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR, task="ssl") checkpoint_epoch = cu.load_checkpoint( last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer, scaler if cfg.TRAIN.MIXED_PRECISION else None, epoch_reset=True, clear_name_pattern=cfg.TRAIN.CHECKPOINT_CLEAR_NAME_PATTERN, ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, scaler if cfg.TRAIN.MIXED_PRECISION else None, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", epoch_reset=cfg.TRAIN.CHECKPOINT_EPOCH_RESET, clear_name_pattern=cfg.TRAIN.CHECKPOINT_CLEAR_NAME_PATTERN, ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") precise_bn_loader = (loader.construct_loader( cfg, "train", is_precise_bn=True) if cfg.BN.USE_PRECISE_STATS else None) # if ( # cfg.TASK == "ssl" # and cfg.MODEL.MODEL_NAME == "ContrastiveModel" # and cfg.CONTRASTIVE.KNN_ON # ): # if hasattr(model, "module"): # model.module.init_knn_labels(train_loader) # else: # model.init_knn_labels(train_loader) # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(1e6, cfg) val_meter = ValMeter(1e6, cfg) # set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) epoch_timer = EpochTimer() for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): if cur_epoch > 0 and cfg.DATA.LOADER_CHUNK_SIZE > 0: num_chunks = math.ceil(cfg.DATA.LOADER_CHUNK_OVERALL_SIZE / cfg.DATA.LOADER_CHUNK_SIZE) skip_rows = (cur_epoch) % num_chunks * cfg.DATA.LOADER_CHUNK_SIZE logger.info( f"=================+++ num_chunks {num_chunks} skip_rows {skip_rows}" ) cfg.DATA.SKIP_ROWS = skip_rows logger.info(f"|===========| skip_rows {skip_rows}") train_loader = loader.construct_loader(cfg, "train") loader.shuffle_dataset(train_loader, cur_epoch) if cfg.MULTIGRID.LONG_CYCLE: cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch) if changed: ( model, optimizer, train_loader, val_loader, precise_bn_loader, train_meter, val_meter, ) = build_trainer(cfg) # Load checkpoint. if cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR, task=cfg.TASK) assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint else: last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH logger.info("Load from {}".format(last_checkpoint)) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) if hasattr(train_loader.dataset, "_set_epoch_num"): train_loader.dataset._set_epoch_num(cur_epoch) # Train for one epoch. epoch_timer.epoch_tic() train_epoch( train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer, ) epoch_timer.epoch_toc() logger.info( f"Epoch {cur_epoch} takes {epoch_timer.last_epoch_time():.2f}s. Epochs " f"from {start_epoch} to {cur_epoch} take " f"{epoch_timer.avg_epoch_time():.2f}s in average and " f"{epoch_timer.median_epoch_time():.2f}s in median.") logger.info( f"For epoch {cur_epoch}, each iteraction takes " f"{epoch_timer.last_epoch_time()/len(train_loader):.2f}s in average. " f"From epoch {start_epoch} to {cur_epoch}, each iteraction takes " f"{epoch_timer.avg_epoch_time()/len(train_loader):.2f}s in average." ) is_checkp_epoch = (cu.is_checkpoint_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule, ) or cur_epoch == cfg.SOLVER.MAX_EPOCH - 1) is_eval_epoch = misc.is_eval_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule) # Compute precise BN stats. if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0): calculate_and_update_precise_bn( precise_bn_loader, model, min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)), cfg.NUM_GPUS > 0, ) _ = misc.aggregate_sub_bn_stats(model) # Save a checkpoint. if is_checkp_epoch: cu.save_checkpoint( cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg, scaler if cfg.TRAIN.MIXED_PRECISION else None, ) # Evaluate the model on validation set. if is_eval_epoch: eval_epoch( val_loader, model, val_meter, cur_epoch, cfg, train_loader, writer, ) if writer is not None: writer.close() result_string = "Top1 Acc: {:.2f} Top5 Acc: {:.2f} MEM: {:.2f}" "".format( 100 - val_meter.min_top1_err, 100 - val_meter.min_top5_err, misc.gpu_mem_usage(), ) logger.info("training done: {}".format(result_string)) return result_string
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # ADD Demo if cfg.TEST.DEMO_PATH != "": generate_subclip(cfg) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), (cfg.TEST.DEMO_PATH != ""), ) # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg) if cfg.TEST.DEMO_PATH != "": end_time_dir = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, "end_time.npy") end_time_array = np.load(end_time_dir) cwd = os.getcwd() probability_dir = os.path.join(cwd, "tmp/probability.npy") p_array = np.load(probability_dir) print(end_time_array) print(p_array) end_time_dir = os.path.join(cfg.DATA.PATH_TO_DATA_DIR) if end_time_dir.split("/")[-1] == "demo": try: shutil.rmtree(end_time_dir) except OSError as e: print("Error: %s - %s." % (e.filename, e.strerror)) json_list = [] for i in range(len(end_time_array)): json_list.append([end_time_array[i], p_array[i]]) print(json_list) with open('timeLabel.json', 'w') as f: json.dump({"jogging": str(json_list)}, f) plt.plot(end_time_array, p_array, 'ro-') plt.axis( [0, end_time_array[len(end_time_array) - 1] + 0.5, -0.01, 1.01]) plt.xlabel('time(s)') plt.ylabel('probability') plt.suptitle("Jogging/Running") plt.savefig("fig.png")
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg) # Compute precise BN stats. if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: calculate_and_update_precise_bn(train_loader, model, cfg.BN.NUM_BATCHES_PRECISE) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. if cfg.TEST.DATASET == 'epickitchens': test_meter = EPICTestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) else: test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) # # Perform multi-view test on the entire dataset. preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg) if du.is_master_proc(): if cfg.TEST.DATASET == 'epickitchens': results = { 'scores': { 'verb': preds[0], 'noun': preds[1] }, 'labels': { 'verb': labels[0], 'noun': labels[1] }, 'narration_id': metadata } scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) file_path = os.path.join(scores_path, cfg.EPICKITCHENS.TEST_SPLIT + '.pkl') pickle.dump(results, open(file_path, 'wb'))
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() if du.is_master_proc(): if cfg.ENABLE_WANDB: wandb.login() wandb.init(project='bbox', entity='slowfast') # Print config. # logger.info("Train with config:") # logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if cfg.EPICKITCHENS.USE_BBOX and not cu.has_checkpoint(cfg.OUTPUT_DIR): slow_fast_model = SlowFast(cfg) if cfg.EPICKITCHENS.USE_BBOX and cfg.EPICKITCHENS.LOAD_SLOWFAST_PRETRAIN: if cfg.EPICKITCHENS.SLOWFAST_PRETRAIN_CHECKPOINT_FILE_PATH != "": _ = cu.load_checkpoint( cfg.EPICKITCHENS.SLOWFAST_PRETRAIN_CHECKPOINT_FILE_PATH, slow_fast_model, False, optimizer=None, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=False, ) # cfg.TRAIN.CHECKPOINT_TYPE == "caffe2" logger.info("Load from slowfast.") if cfg.NUM_GPUS > 1: model.module.load_weight_slowfast(slow_fast_model) else: model.load_weight_slowfast(slow_fast_model) # if du.is_master_proc(): # misc.log_model_info(model, cfg, is_train=True) if cfg.BN.FREEZE: model.freeze_fn('bn_parameters') if cfg.EPICKITCHENS.USE_BBOX and cfg.EPICKITCHENS.FREEZE_BACKBONE: model.freeze_fn('slowfast_bbox') # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and not cfg.TRAIN.FINETUNE: logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and cfg.TRAIN.FINETUNE: logger.info("Load from given checkpoint file. Finetuning.") _ = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = 0 else: start_epoch = 0 # Create the video train and val loaders. if cfg.TRAIN.DATASET != 'epickitchens' or not cfg.EPICKITCHENS.TRAIN_PLUS_VAL: train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") logger.info("Train loader size: {}".format(len(train_loader))) logger.info("Val loader size: {}".format(len(val_loader))) else: train_loader = loader.construct_loader(cfg, "train+val") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: if cfg.TRAIN.DATASET == 'epickitchens': train_meter = EPICTrainMeterSimple(len(train_loader), cfg) val_meter = EPICValMeterSimple(len(val_loader), cfg) else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) cnt = 0 # eval_epoch(val_loader, model, val_meter, 0, cfg, cnt) # test_from_train(model, cfg, cnt=cnt) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. cnt = train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, cnt) # Compute precise BN stats. if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: calculate_and_update_precise_bn(train_loader, model, cfg.BN.NUM_BATCHES_PRECISE) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): is_best_epoch = eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, cnt) if is_best_epoch: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg, is_best_epoch=is_best_epoch)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Init multigrid. multigrid = None if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE: multigrid = MultigridSchedule() cfg = multigrid.init_multigrid(cfg) if cfg.MULTIGRID.LONG_CYCLE: cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") precise_bn_loader = (loader.construct_loader( cfg, "train", is_precise_bn=True) if cfg.BN.USE_PRECISE_STATS else None) # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): if cfg.MULTIGRID.LONG_CYCLE: cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch) if changed: ( model, optimizer, train_loader, val_loader, precise_bn_loader, train_meter, val_meter, ) = build_trainer(cfg) # Load checkpoint. if cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint else: last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH logger.info("Load from {}".format(last_checkpoint)) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer) is_checkp_epoch = (cu.is_checkpoint_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule, )) is_eval_epoch = misc.is_eval_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule) # Compute precise BN stats. if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0): calculate_and_update_precise_bn( precise_bn_loader, model, min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)), cfg.NUM_GPUS > 0, ) _ = misc.aggregate_sub_bn_stats(model) # Save a checkpoint. if is_checkp_epoch: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if is_eval_epoch: eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer) if writer is not None: writer.close()
def test_videos(cfg): """ Ouputs a reconstruction of 1 video from each class. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) assert cfg.PREDICTIVE.ENABLE, "Model doesn't generate frames" # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. # test_loader = loader.construct_loader(cfg, "test") test_set = build_dataset(cfg.TEST.DATASET, cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) # # Perform multi-view test on the entire dataset. # perform_test(test_loader, model, test_meter, cfg) # create directory with video for output for i in range(cfg.MODEL.NUM_CLASSES): # get video input # get model prediction # output frames or GIF of video+pred side by side + cpc_loss + predictive loss ## needs function frames, label, index, _ = test_set.get_example_by_class(i) input_frames = frames.cuda(non_blocking=True) labels = labels.cuda() preds = model(input_frames[None, :], return_frames=True) class_name = test_set._classes[i] loss_pred loss_cpc if cfg.PREDICTIVE.ENABLE: errors = preds['pred_errors'] if cfg.PREDICTIVE.CPC: cpc_loss = preds['cpc_loss'] preds = preds['frames'] frames = frames.permute(0, 2, 3, 4, 1) preds = preds.permute(0, 2, 3, 4, 1) images = [ Image.fromarray(image.astype(np.uint8), 'RGB') for image in images ] images[0].save(os.path.join( cfg.OUTPUT_DIR, '%s_pred-%f_cpc-%d.gif' % (class_name, loss_pred, loss_cpc)), save_all=True, append_images=images[1:], optimize=False, duration=40, loop=0)