def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( test_loader.dataset.num_videos % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( test_loader.dataset.num_videos // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def video_extract(cfg): ctx = multiprocessing.get_context("spawn") # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) logger.info("Extract with config:") logger.info(cfg) # initialize model name = cfg.MODEL.MODEL_NAME model = MODEL_REGISTRY.get(name)(cfg) if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() model.eval() # initialize data loader dataloader = Extractor(cfg) logger.info("Testing model for {} videos".format(len(dataloader))) if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH, model=model, data_parallel=True, optimizer=None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) else: logger.info("Testing with random initialization. Only for debugging.") index_queue = ctx.Queue() result_queue = ctx.Queue() workers = [ ctx.Process(target=get_video, args=(dataloader, index_queue, result_queue)) for i in range(cfg.TEST.WORKERS) ] for w in workers: w.daemon = True w.start() num_video = len(dataloader) for i in range(num_video): index_queue.put(i) # NUM_FRAMES must be divided by ALPHA. num_frames = cfg.DATA.NUM_FRAMES step_frames = num_frames fout = open(cfg.TEST.OUTPUT_FEATURE_FILE, "w") start_time = time.time() for i in range(num_video): video_data = result_queue.get() run(cfg, model, video_data, num_frames, step_frames, fout) period = time.time() - start_time logger.info( "video index: %d, period: %.2f sec, speed: %.2f sec/video." % (i, period, period / (i + 1))) fout.close()
def train_des(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_clevrer_model(cfg) # Construct the optimizer. optimizer = AdamW(model.parameters(), lr=cfg.SOLVER.BASE_LR, eps=1e-8) start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Create the video train and val loaders. train_loader = build_dataloader(cfg, "train") val_loader = build_dataloader(cfg, "val") total_steps = len(train_loader) * cfg.SOLVER.MAX_EPOCH # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, # Default value in run_glue.py num_training_steps=total_steps) # Create meters. train_meter = ClevrerTrainMeter(len(train_loader), cfg) val_meter = ClevrerValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Train for one epoch. train_epoch(train_loader, model, optimizer, scheduler, train_meter, cur_epoch, cfg) is_checkp_epoch = cu.is_checkpoint_epoch( cfg, cur_epoch, None, ) is_eval_epoch = misc.is_eval_epoch(cfg, cur_epoch, None) # Save a checkpoint. if is_checkp_epoch: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if is_eval_epoch: eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
def extract(cfg): # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) logger.info("Extract with config:") logger.info(cfg) # initialize model name = cfg.MODEL.MODEL_NAME model = MODEL_REGISTRY.get(name)(cfg) if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() # initialize data loader dataloader = Extractor(cfg) logger.info("Testing model for {} videos".format(len(dataloader))) if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH, model=model, data_parallel=True, optimizer=None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) else: logger.info("Testing with random initialization. Only for debugging.") run(dataloader, model, cfg)
def run_demo(cfg, frame_provider): """ Run demo visualization. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py frame_provider (iterator): Python iterator that return task objects that are filled with necessary information such as `frames`, `id` and `num_buffer_frames` for the prediction and visualization pipeline. """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Run demo with config:") logger.info(cfg) assert cfg.NUM_GPUS <= 1, "Cannot run demo on multiple GPUs." # Print config. logger.info("Run demo with config:") logger.info(cfg) video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.DEMO.LABEL_FILE_PATH, cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) if cfg.DETECTION.ENABLE: object_detector = Detectron2Predictor(cfg) model = ActionPredictor(cfg) seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE assert (cfg.DEMO.BUFFER_SIZE <= seq_len // 2), "Buffer size cannot be greater than half of sequence length." init_task_info( frame_provider.display_height, frame_provider.display_width, cfg.DATA.TEST_CROP_SIZE, cfg.DEMO.CLIP_VIS_SIZE, ) for able_to_read, task in frame_provider: if not able_to_read: break if cfg.DETECTION.ENABLE: task = object_detector(task) task = model(task) frames = draw_predictions(task, video_vis) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break yield frames
def run_demo(cfg, progress_callback=None): """ :param cfg: :return: """ # Set up environment. setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) logger.info("=== Demo started ===") multi_process_demo = MultiProcessDemo(cfg, progress_callback) multi_process_demo.run_demo() logger.info("=== Demo finished ===")
def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and cfg.TENSORBOARD.MODEL_VIS.ENABLE: # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, is_train=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") logger.info( "Visualize model for {} data points".format(len(vis_loader)) ) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if writer is not None: writer.close()
def init_model(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) model = build_and_switch_demo_model(cfg) load_checkpoint(cfg, model) return model
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) # Create meters for loss tracking test_meter = TrainMeter(test_loader.dataset.num_videos, cfg) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS ): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def experiment(cfg): # Setup logging format. logging.setup_logging() # Print config. logger.info("Infer with config:") logger.info(cfg) # Build the SlowFast model and print its statistics model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # load weights if cfg.INFERENCE.WEIGHTS_FILE_PATH != "": cu.load_checkpoint(cfg.INFERENCE.WEIGHTS_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.INFERENCE.WEIGHTS_TYPE == "caffe2") else: raise FileNotFoundError("Model weights file could not be found") perform_inference(model, cfg)
def video_extract(cfg): ctx = multiprocessing.get_context("spawn") # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) logger.info("Extract with config:") logger.info(cfg) # initialize data loader dataloader = Extractor(cfg) logger.info("Testing model for {} videos".format(len(dataloader))) index_queue = ctx.Queue() video_queue = ctx.Queue() result_queue = ctx.Queue() video_workers = [ctx.Process(target=get_video, args=(dataloader, index_queue, video_queue)) for i in range(cfg.TEST.WORKERS)] for w in video_workers: w.daemon = True w.start() result_workers = [ctx.Process(target=get_result, args=(cfg, gpu_id, video_queue, result_queue)) for gpu_id in range(cfg.NUM_GPUS)] for w in result_workers: w.daemon = True w.start() num_video = len(dataloader) for i in range(num_video): index_queue.put(i) # NUM_FRAMES must be divided by ALPHA. start_time = time.time() fout = open(cfg.TEST.OUTPUT_FEATURE_FILE, "w") for i in range(num_video): result = result_queue.get() fout.write(result + "\n") period = time.time() - start_time logger.info("video index: %d, period: %.2f sec, speed: %.2f sec/video." %(i, period, period/(i+1))) fout.close()
DATA.PATH_TO_DATA_DIR /datasets/clevrer \ DATA.PATH_PREFIX /datasets/clevrer \ MONET.CHECKPOINT_LOAD ./monet_checkpoints/checkpoint_epoch_00140.pyth """ #https://discuss.pytorch.org/t/how-do-i-check-the-number-of-parameters-of-a-model/4325 def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) args = parse_args() cfg = load_config(args) logger = logging.get_logger(__name__) logging.setup_logging(cfg.OUTPUT_DIR) dataset = Clevrer(cfg, 'train') print("Dataset len = {}".format(len(dataset))) #Test DataLoader dataloader = DataLoader(dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, num_workers=0) vocab_len = dataset.get_vocab_len() ans_vocab_len = dataset.get_ans_vocab_len() model = ClevrerMain(cfg, vocab_len, ans_vocab_len) if cfg.NUM_GPUS:
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg) # Compute precise BN stats. if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: calculate_and_update_precise_bn(train_loader, model, cfg.BN.NUM_BATCHES_PRECISE) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
def visualize_activations(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Setup logging format. logging.setup_logging(cfg) # Print config. logger.info("Vizualize activations") # logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) # Construct the optimizer. # optimizer = optim.construct_optimizer(model, cfg) logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer=None, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) # if du.is_master_proc(): # misc.log_model_info(model, cfg, is_train=True) # Create the video train and val loaders. # train_loader = loader.construct_loader(cfg, "train") # val_loader = loader.construct_loader(cfg, "val") train_set = build_dataset(cfg.TEST.DATASET, cfg, "train") for i in np.random.choice(len(train_set), 5): # frames, label, _, _ = train_set.get_augmented_examples(i) frames, label, _, _ = train_set[i] inputs = frames inputs[0] = inputs[0][None, :] logger.info(frames[0].shape) # frames = frames[0].permute(0,2,3,4,1) frames = frames[0].squeeze().transpose(0, 1) #.permute(1,2,3,0) logger.info(frames.shape) tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_%d.jpg' % i), nrow=18, normalize=True) for j in range(len(inputs)): inputs[j] = inputs[j].cuda(non_blocking=True) with torch.no_grad(): # logger.info(inputs[i].shape) # sys.stdout.flush() inputs[0] = inputs[0][:min(3, len(inputs[0]))] output = model(inputs, extra=['frames']) # frames = frames[0].transpose(0,1)#.permute(1,2,3,0) # tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_target_%d.jpg'%i), nrow=18, normalize=True) input_aug = output['input_aug'] logger.info(input_aug.shape) input_aug = input_aug[0].transpose(0, 1) tv.utils.save_image(input_aug, os.path.join(cfg.OUTPUT_DIR, 'example_input_%d.jpg' % i), nrow=18, normalize=True) # mix_layer [1, timesteps, layers, activations] mix_out = output['mix_layer'] #.cpu().data.numpy().squeeze() for layer in range(len(mix_out)): logger.info('mix layer %d' % layer) logger.info(mix_out[layer].view([18, -1]).mean(1)) images = mix_out[layer].transpose(1, 2).transpose(0, 1) logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) images = (images - images.min()) images = images / images.max() tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_mix_layer_l%d.jpg' % (i, layer)), nrow=18, normalize=True) # BU errors per timestep per layer (choose a random activation or the mean) also write out the mean/norm # [1, timesteps, layers, channels, height, width] bu_errors = output['bu_errors'] #.cpu()#.data.numpy().squeeze() for layer in range(len(bu_errors)): images = bu_errors[layer].transpose(1, 2).transpose(0, 1) images = (images - images.min()) images = images / images.max() logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_bu_errors_l%d.jpg' % (i, layer)), nrow=18, normalize=True) # horiz inhibition per timestep per layer (choose a random activation or the mean) also write out the mean/norm # [1, timesteps, layers, channels, height, width] inhibition = output['H_inh'] #.cpu()#.data.numpy().squeeze() for layer in range(len(inhibition)): images = inhibition[layer].transpose(1, 2).transpose(0, 1) images = (images - images.min()) images = images / images.max() logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_H_inh_l%d.jpg' % (i, layer)), nrow=18, normalize=True) # persistent state in between timesteps # [1, timesteps, layers, channels, height, width] hidden = output['hidden'] #.cpu()#.data.numpy().squeeze() for layer in range(len(hidden)): images = hidden[layer].transpose(1, 2).transpose(0, 1) images = (images - images.min()) images = images / images.max() logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_hidden_l%d.jpg' % (i, layer)), nrow=18, normalize=True)
def benchmark_data_loading(cfg): """ Benchmark the speed of data loading in PySlowFast. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Benchmark data loading with config:") logger.info(pprint.pformat(cfg)) timer = Timer() dataloader = loader.construct_loader(cfg, "train") logger.info("Initialize loader using {:.2f} seconds.".format( timer.seconds())) # Total batch size across different machines. batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS log_period = cfg.BENCHMARK.LOG_PERIOD epoch_times = [] # Test for a few epochs. for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS): timer = Timer() timer_epoch = Timer() iter_times = [] for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)): if cur_iter > 0 and cur_iter % log_period == 0: iter_times.append(timer.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, log_period, log_period * batch_size, iter_times[-1], ram_usage, ram_total, )) timer.reset() epoch_times.append(timer_epoch.seconds()) ram_usage, ram_total = misc.cpu_mem_usage() logger.info( "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. " "RAM Usage: {:.2f}/{:.2f} GB.".format( cur_epoch, len(dataloader), len(dataloader) * batch_size, epoch_times[-1], ram_usage, ram_total, )) logger.info( "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} " "(avg/std) seconds.".format( cur_epoch, log_period, log_period * batch_size, np.mean(iter_times), np.std(iter_times), )) logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} " "(avg/std) seconds.".format( len(dataloader) * batch_size, np.mean(epoch_times), np.std(epoch_times), ))
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) assert (len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def visualize(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Setup logging format. logging.setup_logging(cfg) # Print config. logger.info("Train with config:") # logger.info(pprint.pformat(cfg)) # # Build the video model and print model statistics. # model = build_model(cfg) # if du.is_master_proc(): # misc.log_model_info(model, cfg, is_train=True) # Create the video train and val loaders. # train_loader = loader.construct_loader(cfg, "train") # val_loader = loader.construct_loader(cfg, "val") train_set = build_dataset(cfg.TEST.DATASET, cfg, "train") for i in np.random.choice(len(train_set), 100): i = 14693 # frames, label, _, _ = train_set.get_augmented_examples(i) logger.info(i) frames, label, _, meta = train_set[i] #logger.info(frames[0].shape) logger.info('done') # frames = frames[0].permute(0,2,3,4,1) frames = frames[0].transpose(0, 1) #.permute(1,2,3,0) # logger.info('### Z score ##########') # logger.info('min') # logger.info(frames.min()) # logger.info('max') # logger.info(frames.max()) # logger.info('mean') # logger.info(frames.mean()) # logger.info('var') # logger.info(frames.var()) frames = frames * torch.tensor( cfg.DATA.STD)[None, :, None, None] #[None,:,None,None,None] frames = frames + torch.tensor( cfg.DATA.MEAN)[None, :, None, None] #[None,:,None,None,None] # logger.info('### normal ##########') # logger.info('min') # logger.info(frames.min()) # logger.info('max') # logger.info(frames.max()) # logger.info('mean') # logger.info(frames.mean()) # logger.info('var') # logger.info(frames.var()) masks = meta['masks'] masks = torch.cat([masks] * 3, 0).transpose(0, 1) / masks.max() frames = frames / frames.max()
def run_demo(cfg, frame_provider): """ Run demo visualization. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py frame_provider (iterator): Python iterator that return task objects that are filled with necessary information such as `frames`, `id` and `num_buffer_frames` for the prediction and visualization pipeline. """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Run demo with config:") logger.info(cfg) common_classes = (cfg.DEMO.COMMON_CLASS_NAMES if len(cfg.DEMO.LABEL_FILE_PATH) != 0 else None) video_vis = VideoVisualizer( num_classes=cfg.MODEL.NUM_CLASSES, class_names_path=cfg.DEMO.LABEL_FILE_PATH, top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, thres=cfg.DEMO.COMMON_CLASS_THRES, lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES, common_class_names=common_classes, colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP, mode=cfg.DEMO.VIS_MODE, ) # VA edits begin #async_vis = AsyncVis(video_vis, n_workers=cfg.DEMO.NUM_VIS_INSTANCES) async_vis = AsyncVis(video_vis, n_workers=cfg.DEMO.NUM_VIS_INSTANCES, label_filepath=cfg.DEMO.LABEL_FILE_PATH) # VA edits end if cfg.NUM_GPUS <= 1: model = ActionPredictor(cfg=cfg, async_vis=async_vis) else: model = AsyncDemo(cfg=cfg, async_vis=async_vis) seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE assert (cfg.DEMO.BUFFER_SIZE <= seq_len // 2), "Buffer size cannot be greater than half of sequence length." num_task = 0 # Start reading frames. frame_provider.start() for able_to_read, task in frame_provider: if not able_to_read: break if task is None: time.sleep(0.02) continue num_task += 1 model.put(task) try: task = model.get() num_task -= 1 yield task except IndexError: continue while num_task != 0: try: task = model.get() num_task -= 1 yield task except IndexError: continue
def __init__(self, cfg, progress_callback): # Set up environment. setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) logger.info("Demo with config:") logger.info(pprint.pformat(cfg)) # Prepare the input video for best demo results cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS = self.create_demo_video_at_target_framerate( cfg.DEMO.VIDEO_SOURCE_PATH, cfg.CUSTOM_DATASET.FRAME_RATE) self.cfg = cfg # An output folder for all demo-related output output_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S") self.cfg.DEMO.OUTPUT_FOLDER = os.path.join( self.cfg.CUSTOM_DATASET.DEMO_DIR, output_datetime) create_folder(self.cfg.DEMO.OUTPUT_FOLDER) logger.info("Created output-folder for demo results at: " + self.cfg.DEMO.OUTPUT_FOLDER) # (pyqtSignal) used for signaling back the progress for the GUI # We currently take the progress as the percentage of distributed images self.progress_callback = progress_callback # Used for extracting the data frames from the video file self.file_video_stream = FileVideoStream( self.cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS) self.video_file_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem # Whether we display our results self.use_video_visualizer = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE or self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE # Whether we export our output self.export_output = self.cfg.DEMO.EXPORT_EXPORT_RESULTS # The fps of the video video source self.frames_per_second = self.file_video_stream.frames_per_second self.video_length_seconds = self.file_video_stream.video_length_seconds # Information on the sampling requirements for the # video data self.sample_rate = self.cfg.DATA.SAMPLING_RATE self.num_frames = self.cfg.DATA.NUM_FRAMES self.seq_len = self.sample_rate * self.num_frames self.half_seq_len = int(self.seq_len / 2) self.half_seq_len_seconds = self.half_seq_len / self.frames_per_second # The seconds in the video that are suited for inference self.earliest_full_start_second = np.math.ceil( self.half_seq_len_seconds) self.final_full_second = math.floor( self.video_length_seconds) - math.ceil(self.half_seq_len_seconds) # Set the current_second to start. The current second is the second for which we make the prediction self.current_video_second = self.earliest_full_start_second # Used for telling the gui the progress of our distribute images function [0, final_full_second] seconds self.number_of_relevant_frames = (self.final_full_second + 1) * self.frames_per_second # The corresponding frame index to any middle_frame_timestamp of interest self.first_middle_frame_index = sec_to_frame( self.earliest_full_start_second, self.cfg, mode="demo") - 1 # Used to determine whether an index is a middle frame index for which action recognition is done self.current_middle_frame_index = self.first_middle_frame_index # The inference frame indices are sampled around the middle frame as defined for slowfast # when using ava_dataset. # Here we have indices. index = frame number - 1 self.inference_frame_indices = list( range(self.current_middle_frame_index + 1 - self.half_seq_len, self.current_middle_frame_index + 1 + self.half_seq_len, self.sample_rate)) # Indicates whether the main process should put the next image in the input_detection_queue self.next_image_in_relevant_range = self.current_video_second <= self.final_full_second # Multiprocessing configs: # How many cpus we have self.num_cpu = mp.cpu_count() # We have 5 processes in parallel in the simplest case of the demo # 1. Main, 2. Object Predictor, 3. Deep Sort Tracker, 4. Video Visualizer, 5. Action Recognizer self.num_occupied_processes = 5 assert self.num_cpu >= self.num_occupied_processes, "You need at least " + str( self.num_occupied_processes ) + " cores for the multiprocessing demo" self.free_cpu_cores = self.num_cpu - self.num_occupied_processes # How many gpus we have for the demo self.num_gpu = self.cfg.NUM_GPUS # How many gpus should be used for object detection (increasing number) self.num_gpu_object_detection = min(self.free_cpu_cores, self.num_gpu) # The gpuid for action recognition (decreasing or in our case last gpuid # We take the las possible gpuid for action recognition because this is beneficiary, if we have # less processes than free_cpu_cores (object detection and action recognition are separated this way) self.gpuid_action_recognition = self.num_gpu - 1 # The queue sizes as specified in the config files self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE # Queues # Contains the original images with an idx each: # 1. img_idx (int) # 2. image of shape (H, W, C) (in BGR order) and [0,255]) self.input_detection_queue = mp.Queue(maxsize=self.queue_size) # Queue containing the detections per image in form # 1. img_idx (int), # 2. image of shape (H, W, C) (in BGR order) and [0,255]), # 3. predictions {dict}: a dict with the following keys # pred_boxes: tensor of shape num_predictions, 4 = # the coordinates of the predicted boxes [x1, y1, x2, y2]) --> if empty it is [] # scores: tensor of shape (num_predictions) containing the confidence scores [0,1]) --> if empty it is [] self.output_detection_queue = mp.Queue(maxsize=self.queue_size) # Contains the images with the corresponding ids and person_tracking_outputs -> used for visualization # 1. img_idx (int) # 2. image of shape (H, W, C) (in BGR order) and [0,255]) # 3. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] self.output_tracker_queue_visualization = mp.Queue( maxsize=self.queue_size) # Contains the images with the corresponding ids and person_tracking_outputs -> used for action recognition # 1. img_idx (int) # 2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] self.output_tracker_queue_action_recognition = mp.Queue( maxsize=self.queue_size) # Contains the input for action_recognition (only for img_idxs that are middle_frames) # 1. current_video_second: (int) the current video second for which the prediction data is given # 2. img_idxs=current_middle_frame_index (int) the image img_idx, which is always the next middle_frame_index # 3. img_idx (int) = the idx of the current middle_frame # 4. image of shape (H, W, C) (in BGR order) and [0,255]) # It is bigger than the other queues self.input_action_recognition_queue = mp.Queue( maxsize=int(self.queue_size * 1.5)) # Contains the input for action_recognition (only for img_idxs that are middle_frames) # 1. img_idx (int), only for middle frames # 2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] # 3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories), # the scores for each person and each action category # --> if empty it is a list [] self.output_action_recognition_queue_visualization = mp.Queue( maxsize=self.queue_size) # Contains the input for action_recognition (only for img_idxs that are middle_frames) # 1. current_video_second: (int) the current video second for which the prediction data is given # 2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number) # --> if empty it is a list [] # 3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories), # the scores for each person and each action category # --> if empty it is a list [] self.output_action_recognition_queue_result_export = mp.Queue( maxsize=int(self.video_length_seconds * self.frames_per_second)) # A list of dicts that contains detected middle_frame_seconds self.middle_frame_seconds = [] # The detectron2_object_predictor_class for person detection self.object_predictor = DemoDetectron2ObjectPredictor( self.cfg, self.file_video_stream.height, self.file_video_stream.width, parallel=True, num_gpu=self.num_gpu_object_detection, input_queue=self.input_detection_queue, output_queue=self.output_detection_queue, gpuid_action_recognition=self.gpuid_action_recognition) # The deep sort tracker class for person tracking self.deep_sort_tracker = DeepSortTracker( self.cfg, input_queue=self.output_detection_queue, output_queue_vis=self.output_tracker_queue_visualization, output_queue_action_pred=self. output_tracker_queue_action_recognition, show_video=self.use_video_visualizer) # The action recognition class self.action_recognizer = ActionRecognizer( self.cfg, self.file_video_stream.height, self.file_video_stream.width, model_device=self.gpuid_action_recognition, first_middle_frame_index=self.first_middle_frame_index, sample_rate=self.sample_rate, half_seq_len=self.half_seq_len, current_video_second=self.current_video_second, input_queue_tracker=self.output_tracker_queue_action_recognition, input_queue_images=self.input_action_recognition_queue, output_queue=self.output_action_recognition_queue_visualization, output_action_recognition_queue_result_export=self. output_action_recognition_queue_result_export) if self.export_output: # Our demo meter to store and finally print the results self.demo_meter = DemoMeter(self.cfg, self.file_video_stream.height, self.file_video_stream.width) # Used to control the completeness of our export self.current_export_second = self.earliest_full_start_second - 1 if self.use_video_visualizer: self.demo_visualizer = VideoVisualizer( self.cfg, self.file_video_stream.height, self.first_middle_frame_index, self.frames_per_second, input_detection_queue=self.input_detection_queue, output_detection_queue=self.output_detection_queue, output_tracker_queue_visualization=self. output_tracker_queue_visualization, output_tracker_queue_action_recognition=self. output_tracker_queue_action_recognition, input_action_recognition_queue=self. input_action_recognition_queue, output_action_recognition_queue_visualization=self. output_action_recognition_queue_visualization, output_action_recognition_queue_result_export=self. output_action_recognition_queue_result_export)
def test_implementation_des(cfg): """ Simulates a train and val epoch to check if the gradients are being updated, metrics are being calculated correctly Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test implementation") # Build the video model and print model statistics. model = build_clevrer_model(cfg) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Create the video train and val loaders. if cfg.TRAIN.DATASET != 'Clevrer_des': print("This train script does not support your dataset: -{}-. Only Clevrer_des".format(cfg.TRAIN.DATASET)) exit() train_loader = build_dataloader(cfg, "train") val_loader = build_dataloader(cfg, "val") # Create meters. train_meter = ClevrerTrainMeter(len(train_loader), cfg) val_meter = ClevrerValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) # Train for one epoch. model_before = copy.deepcopy(model) cur_epoch = start_epoch train_epoch( train_loader, model, optimizer, train_meter, cur_epoch, cfg, test_imp=True ) print("Check how much parameters changed") for (p_b_name, p_b), (p_name, p) in zip(model_before.named_parameters(), model.named_parameters()): if p.requires_grad: print("Parameter requires grad:") print(p_name, p_b_name) #Calculate ratio of change change = torch.abs(torch.norm(p) - torch.norm(p_b)) print("Ratio of change = {}".format(torch.true_divide(change, torch.norm(p_b)))) if (p_b != p).any(): print("--Check--") else: print("ALERT - WEIGHTS DID NOT CHANGE WITH TRAINING.") else: print("Parameter does not require grad:") print(p_name) print(p) print("Val epoch") eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, test_imp=True)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS): writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR) else: writer = None if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG: tags = [] if 'TAGS' in cfg and cfg.TAGS !=[]: tags=list(cfg.TAGS) neptune.set_project('Serre-Lab/motion') ###################### overrides = sys.argv[1:] overrides_dict = {} for i in range(len(overrides)//2): overrides_dict[overrides[2*i]] = overrides[2*i+1] overrides_dict['dir'] = cfg.OUTPUT_DIR ###################### if 'NEP_ID' in cfg and cfg.NEP_ID != "": session = Session() project = session.get_project(project_qualified_name='Serre-Lab/motion') nep_experiment = project.get_experiments(id=cfg.NEP_ID)[0] else: nep_experiment = neptune.create_experiment (name=cfg.NAME, params=overrides_dict, tags=tags) else: nep_experiment=None # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(num_gpus=cfg.NUM_GPUS): misc.log_model_info(model, cfg, is_train=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint( last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer ) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep_experiment, cfg) # Compute precise BN stats. # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: # calculate_and_update_precise_bn( # train_loader, model, cfg.BN.NUM_BATCHES_PRECISE # ) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): eval_epoch(val_loader, model, val_meter, cur_epoch, nep_experiment, cfg) if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG: nep_experiment.log_metric('epoch', cur_epoch)
def get_predictions(self): """ Predict and append prediction results to each box in each keyframe in `self.pred_boxes` dictionary. """ # Set random seed from configs. np.random.seed(self.cfg.RNG_SEED) torch.manual_seed(self.cfg.RNG_SEED) # Setup logging format. logging.setup_logging(self.cfg.OUTPUT_DIR) # Print config. logger.info("Run demo with config:") logger.info(self.cfg) assert (self.cfg.NUM_GPUS <= 1), "Cannot run demo visualization on multiple GPUs." # Build the video model and print model statistics. model = build_model(self.cfg) model.eval() logger.info("Start loading model info") misc.log_model_info(model, self.cfg, use_train_input=False) logger.info("Start loading model weights") cu.load_test_checkpoint(self.cfg, model) logger.info("Finish loading model weights") logger.info("Start making predictions for precomputed boxes.") for keyframe_idx, boxes_and_labels in tqdm.tqdm( self.pred_boxes.items()): inputs = self.get_input_clip(keyframe_idx) boxes = boxes_and_labels[0] boxes = torch.from_numpy(np.array(boxes)).float() box_transformed = scale_boxes( self.cfg.DATA.TEST_CROP_SIZE, boxes, self.display_height, self.display_width, ) # Pad frame index for each box. box_inputs = torch.cat( [ torch.full((box_transformed.shape[0], 1), float(0)), box_transformed, ], axis=1, ) if self.cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) box_inputs = box_inputs.cuda() preds = model(inputs, box_inputs) preds = preds.detach() if self.cfg.NUM_GPUS: preds = preds.cpu() boxes_and_labels[1] = preds
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0 ) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg)
def __init__(self, cfg, img_height, first_middle_frame_index, frames_per_second, input_detection_queue=None, output_detection_queue=None, output_tracker_queue_visualization=None, output_tracker_queue_action_recognition=None, input_action_recognition_queue=None, output_action_recognition_queue_visualization=None, output_action_recognition_queue_result_export=None): """ Initialize the object :param cfg: our demo config :param img_height: (int) the height of the image :param first_middle_frame_index: (int) the index of the first middle_frame index :param frames_per_second: (float) the fps of the video -> required for determining middle frames :param input_detection_queue: please refer to class MultiProcessDemo :param output_detection_queue: please refer to class MultiProcessDemo :param output_tracker_queue_visualization: please refer to class MultiProcessDemo :param output_tracker_queue_action_recognition: please refer to class MultiProcessDemo :param input_action_recognition_queue: please refer to class MultiProcessDemo :param output_action_recognition_queue_visualization: please refer to class MultiProcessDemo :param output_action_recognition_queue_result_export: please refer to class MultiProcessDemo """ setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) self.cfg = cfg # The name of the input video self.demo_video_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem # Whether we will export an image self.export_video = self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE if self.export_video: # number of digits for exporting the images (determines how many images can be stored) self.number_of_digits_for_image_export = 10 # The path of the to be created video self.export_video_path = os.path.join( self.cfg.DEMO.OUTPUT_FOLDER, self.demo_video_name + "_annotated.mp4") # Whether we will display an image self.display_video = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE self.cv2_display_name = "Demo: " + self.demo_video_name # Whether we will display the meta information (Queues Sizes and img idx) self.display_meta_info = cfg.DEMO.VIDEO_SHOW_VIDEO_DEBUGGING_INFO # Used for finding the position of meta info self.img_height = img_height # Used for determining middle_frame_indices (they have the action prediction) self.first_middle_frame_index = first_middle_frame_index self.frames_per_second = frames_per_second # Additional options for displaying the video self.video_display_scaling_factor = cfg.DEMO.VIDEO_DISPLAY_SCALING_FACTOR self.video_action_display_duration_milliseconds = cfg.DEMO.VIDEO_ACTION_DISPLAY_DURATION_MILLISECONDS # The queues containing relevant information self.input_detection_queue = input_detection_queue self.output_detection_queue = output_detection_queue, self.output_tracker_queue_visualization = output_tracker_queue_visualization self.output_tracker_queue_action_recognition = output_tracker_queue_action_recognition, self.input_action_recognition_queue = input_action_recognition_queue self.output_action_recognition_queue_visualization = output_action_recognition_queue_visualization self.output_action_recognition_queue_result_export = output_action_recognition_queue_result_export # The queue sizes as specified in the config files self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE # Used for terminating the process successfully self.action_recognition_input_finished = False # The information for displaying actions # Load the categories: self.path_to_label_map_file = os.path.join(cfg.CUSTOM_DATASET.ANNOTATION_DIR, cfg.CUSTOM_DATASET.LABEL_MAP_FILE) \ if not os.path.isfile(cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE) \ else cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE # List of dicts (id, name) self.action_categories, _ = read_labelmap(self.path_to_label_map_file) # A color value for every category self.palette_actions = np.random.randint( 64, 128, (len(self.action_categories), 3)).tolist() # The information required for displaying person_tracking info self.palette_person_ids = (2**11 - 1, 2**15 - 1, 2**20 - 1) # The process for person detection self.display_next_frame_process = mp.Process( target=self.display_and_or_export_next_frame, args=()) # Used to test the correct order of images self.display_img_idx = -1 # The information for action info display self.current_action_output_img_idx = "" self.current_pred_action_category_scores = ""
def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and (cfg.TENSORBOARD.MODEL_VIS.ENABLE or cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE): # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) model.eval() if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None if cfg.TENSORBOARD.PREDICTIONS_PATH != "": assert not cfg.DETECTION.ENABLE, "Detection is not supported." logger.info( "Visualizing class-level performance from saved results...") if writer is not None: with g_pathmgr.open(cfg.TENSORBOARD.PREDICTIONS_PATH, "rb") as f: preds, labels = pickle.load(f, encoding="latin1") writer.plot_eval(preds, labels) if cfg.TENSORBOARD.MODEL_VIS.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: assert ( not cfg.DETECTION.ENABLE ), "Detection task is currently not supported for Grad-CAM visualization." if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 1 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST)) elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 2 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST)) else: raise NotImplementedError( "Model arch {} is not in {}".format( cfg.MODEL.ARCH, cfg.MODEL.SINGLE_PATHWAY_ARCH + cfg.MODEL.MULTI_PATHWAY_ARCH, )) logger.info("Visualize model analysis for {} iterations".format( len(vis_loader))) # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE: logger.info("Visualize Wrong Predictions for {} iterations".format( len(vis_loader))) perform_wrong_prediction_vis(vis_loader, model, cfg) if writer is not None: writer.close()
def __init__(self, cfg, img_height, img_width, parallel=False, num_gpu=None, input_queue=None, output_queue=None, gpuid_action_recognition=None): """ Creates an Detectron2 based prediction class which is optimized for demo and should be used for it. The code is slightly modified from the original detectron2 demo content :param cfg: the config file for the prototype :param img_height: (int) the height of the input images :param img_width: (int) the width of input images :param parallel: (boolean) whether, we will do asynchronous computation :param num_gpu: (int) number of gpus we will use for asynchronous computation :param input_queue: (multiprocessing.queue) containing the input images (img_idx, image of shape (H, W, C) (in BGR order) and [0,255]) :param output_queue: (multiprocessing.queue) containing the computed predictions :param gpuid_action_recognition: (int) the gpuid for object tracking """ setup_environment() # Setup logging format logging.setup_logging(cfg.OUTPUT_DIR) # The cfg file for the prototype self.cfg = cfg # The original image resolution: used for resizing provided images self.img_height = img_height self.img_width = img_width # We only use the demo config self.detectron2_cfg_file = self.cfg.DETECTRON.DETECTION_MODEL_CFG self.detectron2_model_weights = self.cfg.DETECTRON.MODEL_WEIGHTS self.detectron2_score_tresh_test = self.cfg.DETECTRON.DEMO_PERSON_SCORE_THRESH # Load the detectron config self.detectron_config = self.setup_detectron_config() # Can be useful for displaying the object classes self.metadata = MetadataCatalog.get( self.detectron_config.DATASETS.TEST[0] if len(self.detectron_config.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") # Determines whether we will use async processing self.parallel = parallel if self.parallel: # Used for async processing self.predictor = AsyncPredictor( self.cfg, self.detectron_config, self.img_height, self.img_width, num_gpus=num_gpu, input_queue=input_queue, output_queue=output_queue, gpuid_action_recognition=gpuid_action_recognition) # Used to count the frames provided for detect_persons self.provided_image_count = 0 self.buffer_size = self.predictor.default_buffer_size # In the original version this attribute was used to store # the images in chronological order as well as a counter that represents the size of the task_queue # attribute. Since we do not return the images, we only use it as a counter representing the task_queue and # thus insert a dummy int variable instead of an image, because it is more memory efficient self.frame_data = deque() else: # Use the modified predictor for the demo self.predictor = DemoDefaultPredictor(self.cfg, self.detectron_config, self.img_height, self.img_width)
def demo(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) model.eval() misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2="caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) if cfg.DETECTION.ENABLE: # Load object detector from detectron2 dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG dtron2_cfg = get_cfg() dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file)) dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5 dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS object_predictor = DefaultPredictor(dtron2_cfg) # Load the labels of AVA dataset with open(cfg.DEMO.LABEL_FILE_PATH) as f: labels = f.read().split('\n')[:-1] palette = np.random.randint(64, 128, (len(labels), 3)).tolist() boxes = [] else: # Load the labels of Kinectics-400 dataset labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH) labels = labels_df['name'].values frame_provider = VideoReader(cfg) seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE frames = [] pred_labels = [] s = 0. for able_to_read, frame in frame_provider: if not able_to_read: # when reaches the end frame, clear the buffer and continue to the next one. frames = [] continue if len(frames) != seq_len: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed) frames.append(frame_processed) if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1: mid_frame = frame if len(frames) == seq_len: start = time() if cfg.DETECTION.ENABLE: outputs = object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes( cfg.DATA.TEST_CROP_SIZE, pred_boxes, frame_provider.display_height, frame_provider.display_width) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) inputs = torch.as_tensor(frames).float() inputs = inputs / 255.0 # Perform color normalization. inputs = inputs - torch.tensor(cfg.DATA.MEAN) inputs = inputs / torch.tensor(cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs.unsqueeze(0) # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape)) # Sample frames for the slow pathway. index = torch.linspace(0, fast_pathway.shape[2] - 1, fast_pathway.shape[2] // cfg.SLOWFAST.ALPHA).long() slow_pathway = torch.index_select(fast_pathway, 2, index) # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape)) inputs = [slow_pathway, fast_pathway] """ # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) """ # Perform the forward pass. if cfg.DETECTION.ENABLE: # When there is nothing in the scene, # use a dummy variable to disable all computations below. if not len(boxes): preds = torch.tensor([]) else: preds = model(inputs, boxes) else: preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] if cfg.DETECTION.ENABLE: # This post processing was intendedly assigned to the cpu since my laptop GPU # RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend # to change this section to make CUDA does the processing. preds = preds.cpu().detach().numpy() pred_masks = preds > .1 label_ids = [ np.nonzero(pred_mask)[0] for pred_mask in pred_masks ] pred_labels = [[ labels[label_id] for label_id in perbox_label_ids ] for perbox_label_ids in label_ids] # I'm unsure how to detectron2 rescales boxes to image original size, so I use # input boxes of slowfast and rescale back it instead, it's safer and even if boxes # was not rescaled by cv2_transform.rescale_boxes, it still works. boxes = boxes.cpu().detach().numpy() ratio = np.min([ frame_provider.display_height, frame_provider.display_width ]) / cfg.DATA.TEST_CROP_SIZE boxes = boxes[:, 1:] * ratio else: ## Option 1: single label inference selected from the highest probability entry. # label_id = preds.argmax(-1).cpu() # pred_label = labels[label_id] # Option 2: multi-label inferencing selected from probability entries > threshold label_ids = torch.nonzero( preds.squeeze() > .1).reshape(-1).cpu().detach().numpy() pred_labels = labels[label_ids] logger.info(pred_labels) if not list(pred_labels): pred_labels = ['Unknown'] # # option 1: remove the oldest frame in the buffer to make place for the new one. # frames.pop(0) # option 2: empty the buffer frames = [] s = time() - start if cfg.DETECTION.ENABLE and pred_labels and boxes.any(): for box, box_labels in zip(boxes.astype(int), pred_labels): cv2.rectangle(frame, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), thickness=2) label_origin = box[:2] for label in box_labels: label_origin[-1] -= 5 (label_width, label_height), _ = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2) cv2.rectangle(frame, (label_origin[0], label_origin[1] + 5), (label_origin[0] + label_width, label_origin[1] - label_height - 5), palette[labels.index(label)], -1) cv2.putText(frame, label, tuple(label_origin), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1) label_origin[-1] -= label_height + 5 if not cfg.DETECTION.ENABLE: # Display predicted labels to frame. y_offset = 50 cv2.putText(frame, 'Action:', (10, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) for pred_label in pred_labels: y_offset += 30 cv2.putText(frame, '{}'.format(pred_label), (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display prediction speed cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (10, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display the frame cv2.imshow('SlowFast', frame) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break frame_provider.clean()
def setup_environment(): # Setup logging format. logging.setup_logging()
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Init multigrid. multigrid = None if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE: multigrid = MultigridSchedule() cfg = multigrid.init_multigrid(cfg) if cfg.MULTIGRID.LONG_CYCLE: cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") precise_bn_loader = (loader.construct_loader( cfg, "train", is_precise_bn=True) if cfg.BN.USE_PRECISE_STATS else None) # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): if cfg.MULTIGRID.LONG_CYCLE: cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch) if changed: ( model, optimizer, train_loader, val_loader, precise_bn_loader, train_meter, val_meter, ) = build_trainer(cfg) # Load checkpoint. if cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint else: last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH logger.info("Load from {}".format(last_checkpoint)) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer) is_checkp_epoch = (cu.is_checkpoint_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule, )) is_eval_epoch = misc.is_eval_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule) # Compute precise BN stats. if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0): calculate_and_update_precise_bn( precise_bn_loader, model, min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)), cfg.NUM_GPUS > 0, ) _ = misc.aggregate_sub_bn_stats(model) # Save a checkpoint. if is_checkp_epoch: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if is_eval_epoch: eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer) if writer is not None: writer.close()
def train_des(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_clevrer_model(cfg) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Create the video train and val loaders. if cfg.TRAIN.DATASET != 'Clevrer_des': print("This train script does not support your dataset: -{}-. Only Clevrer_des".format(cfg.TRAIN.DATASET)) exit() # Create the video train and val loaders. train_loader = build_dataloader(cfg, "train") val_loader = build_dataloader(cfg, "val") # Create meters. train_meter = ClevrerTrainMeter(len(train_loader), cfg) val_meter = ClevrerValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. #loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch( train_loader, model, optimizer, train_meter, cur_epoch, cfg ) is_checkp_epoch = cu.is_checkpoint_epoch( cfg, cur_epoch, None, ) is_eval_epoch = misc.is_eval_epoch( cfg, cur_epoch, None ) # Save a checkpoint. # if is_checkp_epoch: # cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if is_eval_epoch: eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)