Exemplo n.º 1
0
def export_pytorch_model():
    weights = 'checkpoints/checkpoint_epoch_00028.pyth'
    args = parse_args()
    cfg = load_config(args)

    # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    device = torch.device("cpu")

    model = build_model(cfg).to(device)

    # chkpt = torch.load(weights, map_location="cpu")
    chkpt = torch.load(weights, map_location=device)

    model.load_state_dict(chkpt['model_state'])

    # try:
    #     model_dict = model.module.state_dict()
    # except AttributeError:
    #     # 读取原始状态及参数,                                         ## 多GPU训练,导致训练存储的模型时key会加上model
    #     model_dict = model.state_dict()
    # # 将pretrained_dict里不属于model_dict的键剔除掉
    #     chkpt = {k: v for k, v in chkpt.items() if k in model_dict}
    # print("load pretrain model")
    # model_dict.update(chkpt)
    # # model.state_dict(model_dict)
    # model.load_state_dict(model_dict)

    # z转换为评估模型
    model.eval()
    # e1 = torch.rand(1, 3, 8, 224, 224).cuda()
    # e2 = torch.rand(1, 3, 32, 224, 224).cuda()
    e1 = torch.rand(8, 3, 256, 455)  # .fill_(0)
    e2 = torch.rand(32, 3, 256, 455)  # .fill_(0)
    e3 = [e1, e2]
    # e4 = torch.rand(1, 5).cuda()
    # cuda()
    e4 = torch.rand(1, 1, 1, 5)  # .fill_(0)   rand(1, 1, 3, 5)

    import numpy
    import numpy as np
    numpy.save("input00.npy", e3[0].numpy())
    numpy.save("input11.npy", e3[1].numpy())
    numpy.save("input22.npy", e4.numpy())
    input0 = torch.from_numpy(np.load("input0.npy"))
    input1 = torch.from_numpy(np.load("input1.npy"))
    input2 = torch.from_numpy(np.load("input2.npy"))

    pred = model(e3, e4)
    print(pred)
    # exit(0)
    input3 = [input0, input1]

    # traced_script_module = torch.jit.trace(model, (e3, e4))
    traced_script_module = torch.jit.trace(model, (input3, input2))
    # print(traced_script_module.graph)
    print(traced_script_module(input3, input2))  # .forward

    traced_script_module.save("weights/sf18_pytorch_cpu4503.pt")
    print("out put save")
Exemplo n.º 2
0
    def saveOnnxModel(self):
        model = build_model(self.cfg)
        optimizer = optim.construct_optimizer(model, self.cfg)
        start_epoch = cu.load_train_checkpoint(self.cfg, model, optimizer,
                                               self.logger)

        self.cfg.TRAIN['BATCH_SIZE'] = self.cfg.ONNX.BATCH_SIZE
        dl = loader.construct_loader(self.cfg, "train")

        inputs, labels, _, _ = next(iter(dl))
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i].to(self.onnxDevice)

        model.to(torch.device(self.onnxDevice))
        model.eval()
        onnxPath, _ = self.getOnnxModelPath()

        with torch.no_grad():
            torch.onnx.export(
                model,
                inputs,
                onnxPath,
                opset_version=self.cfg.ONNX.OPSET_VER,
                verbose=True,
                input_names=self.cfg.ONNX.INPUT_NAMES,
                output_names=self.cfg.ONNX.OUTPUT_NAMES,
            )

        self.logger.info("Exported {}".format(onnxPath))
Exemplo n.º 3
0
def inference(cfg):
    # # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=False)

    cu.load_test_checkpoint(cfg, model)

    # Create video loaders.
    video_loader = loader.construct_loader(cfg, "test")

    # Create saver
    saver = Saver(cfg.DATA.PATH_TO_DATA_DIR, video_loader.dataset)

    model.eval()
    for i, (inputs, index) in tqdm(enumerate(video_loader),
                                   total=len(video_loader)):
        for i in range(len(inputs)):
            inputs[i] = inputs[i].cuda(non_blocking=True)
        index = index.cuda()
        feats = model(inputs)

        # Gather all the predictions across all the devices to perform ensemble.
        if cfg.NUM_GPUS > 1:
            feats, index = du.all_gather([feats, index])

        saver.save(feats, index)

    saver.merge()
Exemplo n.º 4
0
    def __init__(self, cfg, gpu_id=None):
        """
        Args:
            cfg (CfgNode): configs. Details can be found in
                slowfast/config/defaults.py
            gpu_id (Optional[int]): GPU id.
        """
        if cfg.NUM_GPUS:
            self.gpu_id = (
                torch.cuda.current_device() if gpu_id is None else gpu_id
            )

        # Build the video model and print model statistics.
      #  self.model = build_model(cfg, gpu_id=gpu_id)
        self.model = build_model(cfg, gpu_id=None)
        self.model.eval()
        self.cfg = cfg

        if cfg.DETECTION.ENABLE:
#            self.object_detector = Detectron2Predictor(cfg, gpu_id=self.gpu_id)
            self.object_detector = Detectron2Predictor(cfg, gpu_id= None)

        logger.info("Start loading model weights.")
        cu.load_test_checkpoint(cfg, self.model)
        logger.info("Finish loading model weights")
Exemplo n.º 5
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=False)

    cu.load_test_checkpoint(cfg, model)

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            test_loader.dataset.num_videos %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            test_loader.dataset.num_videos //
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
            cfg.DATA.MULTI_LABEL,
            cfg.DATA.ENSEMBLE_METHOD,
        )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
Exemplo n.º 6
0
    def load_model(self):
        #logger.info("Model Config")
        #logger.info(self.cfg)
        self.model = build_model(self.cfg)
        self.model.eval()
        #if du.is_master_proc():
        misc.log_model_info(self.model, self.cfg, is_train=False)

        model_path = cfg.TRAIN.CHECKPOINT_FILE_PATH
        assert os.path.exists(
            model_path), "%s. Model Path Not Found" % model_path

        cu.load_checkpoint(model_path, self.model, self.cfg.NUM_GPUS > 1)  #
Exemplo n.º 7
0
def test(cfg):
    # Build model
    model = build_model(cfg)
    optimizer = optim.construct_optimizer(model, cfg)
    # load checkpoint
    start_epoch = cu.load_test_checkpoint(cfg, model)
    print("Load model epoch", start_epoch)

    # Build data loader
    test_loader = dataloader.construct_loader(cfg, "test")

    # Perform test
    results = perform_test(test_loader, model, cfg)
Exemplo n.º 8
0
def main():
    """
    Main function to spawn the train and test process.
    """
    args = parse_args()
    cfg = load_config(args)
    ####################################################################################################

    # overrides = sys.argv[1:]

    # overrides_dict = {}
    # for i in range(len(overrides)//2):
    #     overrides_dict[overrides[2*i]] = overrides[2*i+1]
    # overrides_dict['dir'] = cfg.OUTPUT_DIR

    # print(overrides_dict)

    ####################################################################################################
    import torch

    import time

    # train_loader = loader.construct_loader(cfg, "train")
    # val_loader = loader.construct_loader(cfg, "val")

    # start = time.perf_counter()
    # sample = next(iter(train_loader))
    # print('data time' , time.perf_counter()-start)
    # print(sample[0].shape)
    # print(sample[-1])

    cfg.NUM_GPUS = 1
    # Build the video model and print model statistics.
    model = build_model(cfg)
    # misc.log_model_info(model, cfg, is_train=True)

    input_ = torch.rand([2, 1, 16, 96, 96]).cuda()
    #input_[input_<0.5] = 0
    #input_[input_>=0.5] = 1

    # input_ = sample[0].transpose(1,2).cuda()
    #input_ = sample[0][:10]
    start = time.perf_counter()

    output = model(input_)

    print('model time', time.perf_counter() - start)

    # print(output.keys())
    for k, v in output.items():
        print(k, v)
Exemplo n.º 9
0
 def __init__(self, cfg):
     """
     Args:
         cfg (CfgNode): configs. Details can be found in
             slowfast/config/defaults.py
     """
     # Build the video model and print model statistics.
     self.model = build_model(cfg)
     self.model.eval()
     self.cfg = cfg
     logger.info("Start loading model info")
     misc.log_model_info(self.model, cfg, use_train_input=False)
     logger.info("Start loading model weights")
     cu.load_test_checkpoint(cfg, self.model)
     logger.info("Finish loading model weights")
Exemplo n.º 10
0
def visualize(cfg):
    """
    Perform layer weights and activations visualization on the model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    if cfg.TENSORBOARD.ENABLE and cfg.TENSORBOARD.MODEL_VIS.ENABLE:
        # Set up environment.
        du.init_distributed_training(cfg)
        # Set random seed from configs.
        np.random.seed(cfg.RNG_SEED)
        torch.manual_seed(cfg.RNG_SEED)

        # Setup logging format.
        logging.setup_logging(cfg.OUTPUT_DIR)

        # Print config.
        logger.info("Model Visualization with config:")
        logger.info(cfg)

        # Build the video model and print model statistics.
        model = build_model(cfg)
        if du.is_master_proc() and cfg.LOG_MODEL_INFO:
            misc.log_model_info(model, cfg, is_train=False)

        cu.load_test_checkpoint(cfg, model)

        # Create video testing loaders.
        vis_loader = loader.construct_loader(cfg, "test")
        logger.info(
            "Visualize model for {} data points".format(len(vis_loader))
        )

        if cfg.DETECTION.ENABLE:
            assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE

        # Set up writer for logging to Tensorboard format.
        if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS):
            writer = tb.TensorboardWriter(cfg)
        else:
            writer = None

        # Run visualization on the model
        run_visualization(vis_loader, model, cfg, writer)

        if writer is not None:
            writer.close()
Exemplo n.º 11
0
def build_trainer(cfg):
    """
    Build training model and its associated tools, including optimizer,
    dataloaders and meters.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    Returns:
        model (nn.Module): training model.
        optimizer (Optimizer): optimizer.
        train_loader (DataLoader): training data loader.
        val_loader (DataLoader): validatoin data loader.
        precise_bn_loader (DataLoader): training data loader for computing
            precise BN.
        train_meter (TrainMeter): tool for measuring training stats.
        val_meter (ValMeter): tool for measuring validation stats.
    """
    # Build the video model and print model statistics.
    model = build_model(cfg)
    # if du.is_master_proc() and cfg.LOG_MODEL_INFO:
    # misc.log_model_info(model, cfg, use_train_input=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Create the video train and val loaders.
    # train_loader = loader.construct_loader(cfg, "train")
    # val_loader = loader.construct_loader(cfg, "val")
    precise_bn_loader = loader.construct_loader(cfg,
                                                "train",
                                                is_precise_bn=True)
    # Create meters.
    # train_meter = TrainMeter(1000, cfg)
    # val_meter = ValMeter(1000, cfg)

    train_meter = TrainMeter(1e6, cfg)
    val_meter = ValMeter(1e6, cfg)

    return (
        model,
        optimizer,
        precise_bn_loader,
        train_meter,
        val_meter,
    )
Exemplo n.º 12
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)

    cu.load_test_checkpoint(cfg, model)

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    # Create meters for loss tracking
    test_meter = TrainMeter(test_loader.dataset.num_videos, cfg)

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
        cfg.NUM_GPUS * cfg.NUM_SHARDS
    ):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
Exemplo n.º 13
0
def train(cfg):
    # Build model
    model = build_model(cfg)
    optimizer = optim.construct_optimizer(model, cfg)
    # load checkpoint
    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)

    # Build data loader
    train_loader = dataloader.construct_loader(cfg, "train")
    val_loader = dataloader.construct_loader(cfg, "val")
    precise_bn_loader = dataloader.construct_loader(cfg, "train")

    best_accuracy = 0
    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, cur_epoch, cfg)

        is_eval_epoch = cur_epoch > 0
        # Compute precise BN stats.
        if (is_eval_epoch and cfg.BN.USE_PRECISE_STATS
                and len(get_bn_modules(model)) > 0):
            calculate_and_update_precise_bn(
                precise_bn_loader,
                model,
                min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)),
                cfg.NUM_GPUS > 0,
            )
        _ = aggregate_sub_bn_stats(
            model)  # for SubBatchNorm3d call before eval

        # Evaluate the model on validation set.
        if is_eval_epoch:
            results = eval_epoch(val_loader, model, cur_epoch, cfg)
            accuracy = results['top1']
            if accuracy > best_accuracy:
                print("*** Saving best ****")
                best_accuracy = accuracy
                torch.save(
                    {
                        'epoch': cur_epoch + 1,
                        'model_state': model.state_dict(),
                        'optimizer_state': optimizer.state_dict()
                    }, os.path.join(cfg.OUTPUT_DIR, 'best_ckpt.pth'))
Exemplo n.º 14
0
def main():
    args = parser_args()
    print(args)
    cfg_file = args.cfg_file
    checkpoint_file = args.checkpoint
    save_checkpoint_file = args.save
    half_flag = args.half
    cfg = get_cfg()
    cfg.merge_from_file(cfg_file)
    cfg.TEST.CHECKPOINT_FILE_PATH = checkpoint_file

    print("simplifier model!\n")
    with torch.no_grad():
        model = build_model(cfg)
        model.eval()
        cu.load_test_checkpoint(cfg, model)
        if half_flag:
            model.half()
        with open(save_checkpoint_file, 'wb') as file:
            torch.save({"model_state": model.state_dict()}, file)
Exemplo n.º 15
0
def infer(cfg):
     # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Infer with config:")
    logger.info(cfg)

    # Build the SlowFast model and print its statistics
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # load weights
    if cfg.INFERENCE.WEIGHTS_FILE_PATH != "":
        cu.load_checkpoint(cfg.INFERENCE.WEIGHTS_FILE_PATH, model, cfg.NUM_GPUS > 1, None,
                           inflation=False, convert_from_caffe2=cfg.INFERENCE.WEIGHTS_TYPE == "caffe2")
    else:
        raise FileNotFoundError("Model weights file could not be found")

    inference_loader = loader.construct_loader(cfg, "inference")

    perform_inference(inference_loader, model, cfg)
Exemplo n.º 16
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=True)

    if cfg.BN.FREEZE:
        model.freeze_fn('bn_parameters')

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                              cfg.NUM_GPUS > 1, optimizer)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and not cfg.TRAIN.FINETUNE:
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and cfg.TRAIN.FINETUNE:
        logger.info("Load from given checkpoint file. Finetuning.")
        _ = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = 0
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    if cfg.TRAIN.DATASET != 'epickitchens' or not cfg.EPICKITCHENS.TRAIN_PLUS_VAL:
        train_loader = loader.construct_loader(cfg, "train")
        val_loader = loader.construct_loader(cfg, "val")
    else:
        train_loader = loader.construct_loader(cfg, "train+val")
        val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        if cfg.TRAIN.DATASET == 'epickitchens':
            train_meter = EPICTrainMeter(len(train_loader), cfg)
            val_meter = EPICValMeter(len(val_loader), cfg)
        else:
            train_meter = TrainMeter(len(train_loader), cfg)
            val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg)

        # Compute precise BN stats.
        if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
            calculate_and_update_precise_bn(train_loader, model,
                                            cfg.BN.NUM_BATCHES_PRECISE)

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            is_best_epoch = eval_epoch(val_loader, model, val_meter, cur_epoch,
                                       cfg)
            if is_best_epoch:
                cu.save_checkpoint(cfg.OUTPUT_DIR,
                                   model,
                                   optimizer,
                                   cur_epoch,
                                   cfg,
                                   is_best_epoch=is_best_epoch)
Exemplo n.º 17
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """


    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS):
        writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR)

    else:
        writer = None

    if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG:
        tags = []
        if 'TAGS' in cfg and cfg.TAGS !=[]:
            tags=list(cfg.TAGS)
        neptune.set_project('Serre-Lab/motion')

        ######################
        overrides = sys.argv[1:]

        overrides_dict = {}
        for i in range(len(overrides)//2):
            overrides_dict[overrides[2*i]] = overrides[2*i+1]
        overrides_dict['dir'] = cfg.OUTPUT_DIR
        ######################


        if 'NEP_ID' in cfg and cfg.NEP_ID != "":
            session = Session()
            project = session.get_project(project_qualified_name='Serre-Lab/motion')
            nep_experiment = project.get_experiments(id=cfg.NEP_ID)[0]

        else:
            nep_experiment = neptune.create_experiment (name=cfg.NAME,
                                        params=overrides_dict,
                                        tags=tags)
    else:
        nep_experiment=None

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc(num_gpus=cfg.NUM_GPUS):
        misc.log_model_info(model, cfg, is_train=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(
            last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer
        )
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep_experiment, cfg)

        # Compute precise BN stats.
        # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
        #     calculate_and_update_precise_bn(
        #         train_loader, model, cfg.BN.NUM_BATCHES_PRECISE
        #     )

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            eval_epoch(val_loader, model, val_meter, cur_epoch, nep_experiment, cfg)

        if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG:
            nep_experiment.log_metric('epoch', cur_epoch)
Exemplo n.º 18
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset)
            % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
            == 0
        )
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            len(test_loader.dataset)
            // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
        )

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg)
Exemplo n.º 19
0
    def get_predictions(self):
        """
        Predict and append prediction results to each box in each keyframe in
        `self.pred_boxes` dictionary.
        """
        # Set random seed from configs.
        np.random.seed(self.cfg.RNG_SEED)
        torch.manual_seed(self.cfg.RNG_SEED)

        # Setup logging format.
        logging.setup_logging(self.cfg.OUTPUT_DIR)

        # Print config.
        logger.info("Run demo with config:")
        logger.info(self.cfg)
        assert (self.cfg.NUM_GPUS <=
                1), "Cannot run demo visualization on multiple GPUs."

        # Build the video model and print model statistics.
        model = build_model(self.cfg)
        model.eval()
        logger.info("Start loading model info")
        misc.log_model_info(model, self.cfg, use_train_input=False)
        logger.info("Start loading model weights")
        cu.load_test_checkpoint(self.cfg, model)
        logger.info("Finish loading model weights")
        logger.info("Start making predictions for precomputed boxes.")
        for keyframe_idx, boxes_and_labels in tqdm.tqdm(
                self.pred_boxes.items()):
            inputs = self.get_input_clip(keyframe_idx)
            boxes = boxes_and_labels[0]
            boxes = torch.from_numpy(np.array(boxes)).float()

            box_transformed = scale_boxes(
                self.cfg.DATA.TEST_CROP_SIZE,
                boxes,
                self.display_height,
                self.display_width,
            )

            # Pad frame index for each box.
            box_inputs = torch.cat(
                [
                    torch.full((box_transformed.shape[0], 1), float(0)),
                    box_transformed,
                ],
                axis=1,
            )
            if self.cfg.NUM_GPUS:
                # Transfer the data to the current GPU device.
                if isinstance(inputs, (list, )):
                    for i in range(len(inputs)):
                        inputs[i] = inputs[i].cuda(non_blocking=True)
                else:
                    inputs = inputs.cuda(non_blocking=True)

                box_inputs = box_inputs.cuda()

            preds = model(inputs, box_inputs)

            preds = preds.detach()

            if self.cfg.NUM_GPUS:
                preds = preds.cpu()

            boxes_and_labels[1] = preds
Exemplo n.º 20
0
def train_epoch(
    train_loader,
    model,
    optimizer,
    scaler,
    train_meter,
    cur_epoch,
    cfg,
    writer=None,
):
    """
    Perform the video training for one epoch.
    Args:
        train_loader (loader): video training loader.
        model (model): the video model to train.
        optimizer (optim): the optimizer to perform optimization on the model's
            parameters.
        train_meter (TrainMeter): training meters to log the training performance.
        cur_epoch (int): current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    print(model)
    # Enable train mode.
    model.train()
    train_meter.iter_tic()
    data_size = len(train_loader)

    if cfg.MIXUP.ENABLE:
        mixup_fn = MixUp(
            mixup_alpha=cfg.MIXUP.ALPHA,
            cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA,
            mix_prob=cfg.MIXUP.PROB,
            switch_prob=cfg.MIXUP.SWITCH_PROB,
            label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE,
            num_classes=cfg.MODEL.NUM_CLASSES,
        )

    # print(model.patch_embed.proj.weight.device)
    # if cfg.NUM_GPUS >= 2 and not cfg.MODEL.DDP:
    #     blk_size = int(16/cfg.NUM_GPUS)
    #     start = blk_size
    #     for g in range(cfg.NUM_GPUS-1):
    #         dev = f"cuda:{g+1}"
    #         for i in range(start, start + blk_size):
    #             model.blocks[i] = model.blocks[i].to(dev)
    #         start += blk_size
    #     model.norm = model.norm.to(dev)
    #     model.head = model.head.to(dev)

    profiler.log_tic("loop_time")
    # extra_model = Mlp(400, 1000000, 400)
    # print(extra_model)
    # extra_model = extra_model.to("cuda:4")

    if cfg.MODEL.MODEL_NAME == "MViTHybridP1":
        cfg.MODEL.MODEL_NAME = "MViTHybridP2"
        original_ddp = cfg.MODEL.DDP
        cfg.MODEL.DDP = False
        model_p2 = build_model(cfg)
        model_p2 = model_p2.to("cuda:2")  # cuda()
        # because the rest of the logic is about the P1 model
        cfg.MODEL.MODEL_NAME = "MViTHybridP1"
        cfg.MODEL.DDP = original_ddp

    for cur_iter, (inputs, labels, index, time,
                   meta) in enumerate(train_loader):
        print(f"Iteration: {cur_iter}, {inputs.shape}")
        # print(inputs.shape)
        # batchsize = 18
        # inputs = [
        #     torch.rand((batchsize, 3, 16, 224, 224)),
        # ]
        # labels = torch.zeros(batchsize)
        # meta =
        # Transfer the data to the current GPU device.
        if cfg.MODEL.MODEL_NAME in ["MViT", "MViTHybridP1"] and cfg.NUM_GPUS:

            print("in MViT model if statement")

            # if cfg.NUM_GPUS:
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            labels = labels.cuda()
            # for key, val in meta.items():
            #     if isinstance(val, (list,)):
            #         for i in range(len(val)):
            #             val[i] = val[i].cuda(non_blocking=True)
            #     else:
            #         meta[key] = val.cuda(non_blocking=True)
        # else:
        # inputs[0] = inputs[0].to("cuda:0")
        # inputs = inputs.to("cuda:0")
        # labels = labels.to("cuda:0")
        # print(inputs.shape)
        # Update the learning rate.
        lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg)
        optim.set_lr(optimizer, lr)

        train_meter.data_toc()
        if cfg.MIXUP.ENABLE:
            samples, labels = mixup_fn(inputs[0], labels)
            inputs[0] = samples

        with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION):
            # if cfg.DETECTION.ENABLE:
            #     preds = model(inputs, meta["boxes"])
            # else:

            profiler.log_tic("model_time")

            if cfg.MODEL.MODEL_NAME == "MViTHybridP1":
                preds, thw = model(inputs)
                preds = preds.to("cuda:2")
                # import ipdb; ipdb.set_trace()
                preds = model_p2(preds, thw)

            else:
                preds = model(inputs)

            # preds = preds.to("cuda:4")
            # pred = extra_model(preds)

            profiler.log_toc("model_time", shape=inputs.shape)
            # Explicitly declare reduction to mean.
            # loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean")

            # Compute the loss.
            # loss = loss_fun(preds, labels)
            loss = preds.norm()
            # loss = loss_fun(preds, labels)

        # check Nan Loss.
        misc.check_nan_losses(loss)

        # Perform the backward pass.
        optimizer.zero_grad()
        profiler.log_tic("backward_time")
        scaler.scale(loss).backward()

        # Unscales the gradients of optimizer's assigned params in-place
        scaler.unscale_(optimizer)
        # Clip gradients if necessary
        if cfg.SOLVER.CLIP_GRAD_VAL:
            torch.nn.utils.clip_grad_value_(model.parameters(),
                                            cfg.SOLVER.CLIP_GRAD_VAL)
        elif cfg.SOLVER.CLIP_GRAD_L2NORM:
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           cfg.SOLVER.CLIP_GRAD_L2NORM)
        # Update the parameters.
        scaler.step(optimizer)
        scaler.update()
        profiler.log_toc("backward_time", shape=inputs.shape)

        if cfg.MIXUP.ENABLE:
            _top_max_k_vals, top_max_k_inds = torch.topk(labels,
                                                         2,
                                                         dim=1,
                                                         largest=True,
                                                         sorted=True)
            idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0]
            idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1]
            preds = preds.detach()
            preds[idx_top1] += preds[idx_top2]
            preds[idx_top2] = 0.0
            labels = top_max_k_inds[:, 0]

        if cfg.DETECTION.ENABLE:
            if cfg.NUM_GPUS > 1:
                loss = du.all_reduce([loss])[0]
            loss = loss.item()

            # Update and log stats.
            train_meter.update_stats(None, None, None, loss, lr)
            # write to tensorboard format if available.
            if writer is not None:
                writer.add_scalars(
                    {
                        "Train/loss": loss,
                        "Train/lr": lr
                    },
                    global_step=data_size * cur_epoch + cur_iter,
                )

        # else:
        #     top1_err, top5_err = None, None
        #     if cfg.DATA.MULTI_LABEL:
        #         # Gather all the predictions across all the devices.
        #         if cfg.NUM_GPUS > 1:
        #             [loss] = du.all_reduce([loss])
        #         loss = loss.item()
        #     else:
        #         Compute the errors.
        #         num_topks_correct = metrics.topks_correct(preds, labels, (1, 5))
        #         top1_err, top5_err = [
        #             (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct
        #         ]
        #         Gather all the predictions across all the devices.
        #         if cfg.NUM_GPUS > 1:
        #             loss, top1_err, top5_err = du.all_reduce([loss, top1_err, top5_err])

        #         # Copy the stats from GPU to CPU (sync point).
        #         loss, top1_err, top5_err = (
        #             loss.item(),
        #             top1_err.item(),
        #             top5_err.item(),
        #         )

        #     # Update and log stats.
        #     train_meter.update_stats(
        #         top1_err,
        #         top5_err,
        #         loss,
        #         lr,
        #         inputs[0].size(0)
        #         * max(
        #             cfg.NUM_GPUS, 1
        #         ),  # If running  on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU.
        #     )
        #     write to tensorboard format if available.
        #     if writer is not None:
        #         writer.add_scalars(
        #             {
        #                 "Train/loss": loss,
        #                 "Train/lr": lr,
        #                 "Train/Top1_err": top1_err,
        #                 "Train/Top5_err": top5_err,
        #             },
        #             global_step=data_size * cur_epoch + cur_iter,
        #         )

        train_meter.iter_toc()  # measure allreduce for this meter
        train_meter.log_iter_stats(cur_epoch, cur_iter)
        train_meter.iter_tic()
        profiler.log_toc("loop_time", shape=inputs.shape)
        profiler.log_tic("loop_time")

        profiler.report(25)
    # Log epoch stats.
    train_meter.log_epoch_stats(cur_epoch)
    train_meter.reset()
def test(cfg):
    """
    Perform multi-view testing on the pretrained audio model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the audio model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg)

    cu.load_test_checkpoint(cfg, model)

    # Create audio testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    assert (
        len(test_loader.dataset)
        % cfg.TEST.NUM_ENSEMBLE_VIEWS
        == 0
    )
    # Create meters for multi-view testing.
    if cfg.TEST.DATASET == 'epickitchens':
        test_meter = EPICTestMeter(
            len(test_loader.dataset)
            // cfg.TEST.NUM_ENSEMBLE_VIEWS,
            cfg.TEST.NUM_ENSEMBLE_VIEWS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
            cfg.DATA.ENSEMBLE_METHOD,
        )
    else:
        test_meter = TestMeter(
            len(test_loader.dataset)
            // cfg.TEST.NUM_ENSEMBLE_VIEWS,
            cfg.TEST.NUM_ENSEMBLE_VIEWS,
            cfg.MODEL.NUM_CLASSES[0],
            len(test_loader),
            cfg.DATA.MULTI_LABEL,
            cfg.DATA.ENSEMBLE_METHOD,
        )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
        cfg.NUM_GPUS * cfg.NUM_SHARDS
    ):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    test_meter, preds, preds_clips, labels, metadata = perform_test(test_loader, model, test_meter, cfg, writer)

    if du.is_master_proc():
        if cfg.TEST.DATASET == 'epickitchens':
            results = {'verb_output': preds[0],
                       'noun_output': preds[1],
                       'narration_id': metadata}
            scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
            if not os.path.exists(scores_path):
                os.makedirs(scores_path)
            file_path = os.path.join(scores_path, cfg.EPICKITCHENS.TEST_SPLIT+'.pkl')
            pickle.dump(results, open(file_path, 'wb'))
        else:
            if cfg.TEST.DATASET == 'vggsound':
                get_stats(preds, labels)
            results = {'scores': preds, 'labels': labels}
            scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
            if not os.path.exists(scores_path):
                os.makedirs(scores_path)
            file_path = os.path.join(scores_path, 'test.pkl')
            pickle.dump(results, open(file_path, 'wb'))

    if writer is not None:
        writer.close()
Exemplo n.º 22
0
def visualize(cfg):
    """
    Perform layer weights and activations visualization on the model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    if cfg.TENSORBOARD.ENABLE and (cfg.TENSORBOARD.MODEL_VIS.ENABLE
                                   or cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE):
        # Set up environment.
        du.init_distributed_training(cfg)
        # Set random seed from configs.
        np.random.seed(cfg.RNG_SEED)
        torch.manual_seed(cfg.RNG_SEED)

        # Setup logging format.
        logging.setup_logging(cfg.OUTPUT_DIR)

        # Print config.
        logger.info("Model Visualization with config:")
        logger.info(cfg)

        # Build the video model and print model statistics.
        model = build_model(cfg)
        model.eval()
        if du.is_master_proc() and cfg.LOG_MODEL_INFO:
            misc.log_model_info(model, cfg, use_train_input=False)

        cu.load_test_checkpoint(cfg, model)

        # Create video testing loaders.
        vis_loader = loader.construct_loader(cfg, "test")

        if cfg.DETECTION.ENABLE:
            assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0

        # Set up writer for logging to Tensorboard format.
        if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS):
            writer = tb.TensorboardWriter(cfg)
        else:
            writer = None
        if cfg.TENSORBOARD.PREDICTIONS_PATH != "":
            assert not cfg.DETECTION.ENABLE, "Detection is not supported."
            logger.info(
                "Visualizing class-level performance from saved results...")
            if writer is not None:
                with g_pathmgr.open(cfg.TENSORBOARD.PREDICTIONS_PATH,
                                    "rb") as f:
                    preds, labels = pickle.load(f, encoding="latin1")

                writer.plot_eval(preds, labels)

        if cfg.TENSORBOARD.MODEL_VIS.ENABLE:
            if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
                assert (
                    not cfg.DETECTION.ENABLE
                ), "Detection task is currently not supported for Grad-CAM visualization."
                if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH:
                    assert (
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 1
                    ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format(
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST))
                elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH:
                    assert (
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 2
                    ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format(
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST))
                else:
                    raise NotImplementedError(
                        "Model arch {} is not in {}".format(
                            cfg.MODEL.ARCH,
                            cfg.MODEL.SINGLE_PATHWAY_ARCH +
                            cfg.MODEL.MULTI_PATHWAY_ARCH,
                        ))
            logger.info("Visualize model analysis for {} iterations".format(
                len(vis_loader)))
            # Run visualization on the model
            run_visualization(vis_loader, model, cfg, writer)
        if cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE:
            logger.info("Visualize Wrong Predictions for {} iterations".format(
                len(vis_loader)))
            perform_wrong_prediction_vis(vis_loader, model, cfg)

        if writer is not None:
            writer.close()
Exemplo n.º 23
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset) %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
        # Create meters for multi-view testing.
        if cfg.TEST.DATASET == 'epickitchens':
            test_meter = EPICTestMeter(
                len(test_loader.dataset) //
                (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )
        else:
            test_meter = TestMeter(
                len(test_loader.dataset) //
                (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )

    # # Perform multi-view test on the entire dataset.
    preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg)

    if du.is_master_proc():
        if cfg.TEST.DATASET == 'epickitchens':
            results = {
                'scores': {
                    'verb': preds[0],
                    'noun': preds[1]
                },
                'labels': {
                    'verb': labels[0],
                    'noun': labels[1]
                },
                'narration_id': metadata
            }
            scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
            if not os.path.exists(scores_path):
                os.makedirs(scores_path)
            file_path = os.path.join(scores_path,
                                     cfg.EPICKITCHENS.TEST_SPLIT + '.pkl')
            pickle.dump(results, open(file_path, 'wb'))
Exemplo n.º 24
0
def visualize_activations(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Vizualize activations")
    # logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    # Construct the optimizer.
    # optimizer = optim.construct_optimizer(model, cfg)

    logger.info("Load from given checkpoint file.")
    checkpoint_epoch = cu.load_checkpoint(
        cfg.TRAIN.CHECKPOINT_FILE_PATH,
        model,
        cfg.NUM_GPUS > 1,
        optimizer=None,
        inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
        convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
    )

    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=True)

    # Create the video train and val loaders.
    # train_loader = loader.construct_loader(cfg, "train")
    # val_loader = loader.construct_loader(cfg, "val")

    train_set = build_dataset(cfg.TEST.DATASET, cfg, "train")

    for i in np.random.choice(len(train_set), 5):

        # frames, label, _, _ = train_set.get_augmented_examples(i)
        frames, label, _, _ = train_set[i]
        inputs = frames
        inputs[0] = inputs[0][None, :]
        logger.info(frames[0].shape)
        # frames = frames[0].permute(0,2,3,4,1)
        frames = frames[0].squeeze().transpose(0, 1)  #.permute(1,2,3,0)
        logger.info(frames.shape)
        tv.utils.save_image(frames,
                            os.path.join(cfg.OUTPUT_DIR, 'example_%d.jpg' % i),
                            nrow=18,
                            normalize=True)

        for j in range(len(inputs)):
            inputs[j] = inputs[j].cuda(non_blocking=True)
        with torch.no_grad():
            # logger.info(inputs[i].shape)
            # sys.stdout.flush()
            inputs[0] = inputs[0][:min(3, len(inputs[0]))]
            output = model(inputs, extra=['frames'])

            # frames = frames[0].transpose(0,1)#.permute(1,2,3,0)
            # tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_target_%d.jpg'%i), nrow=18, normalize=True)

            input_aug = output['input_aug']
            logger.info(input_aug.shape)

            input_aug = input_aug[0].transpose(0, 1)
            tv.utils.save_image(input_aug,
                                os.path.join(cfg.OUTPUT_DIR,
                                             'example_input_%d.jpg' % i),
                                nrow=18,
                                normalize=True)

            # mix_layer [1, timesteps, layers, activations]
            mix_out = output['mix_layer']  #.cpu().data.numpy().squeeze()
            for layer in range(len(mix_out)):
                logger.info('mix layer %d' % layer)
                logger.info(mix_out[layer].view([18, -1]).mean(1))
                images = mix_out[layer].transpose(1, 2).transpose(0, 1)
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])
                images = (images - images.min())
                images = images / images.max()
                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_mix_layer_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # BU errors per timestep per layer (choose a random activation or the mean) also write out the mean/norm
            # [1, timesteps, layers, channels, height, width]

            bu_errors = output['bu_errors']  #.cpu()#.data.numpy().squeeze()

            for layer in range(len(bu_errors)):
                images = bu_errors[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])
                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_bu_errors_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # horiz inhibition per timestep per layer (choose a random activation or the mean) also write out the mean/norm
            # [1, timesteps, layers, channels, height, width]
            inhibition = output['H_inh']  #.cpu()#.data.numpy().squeeze()
            for layer in range(len(inhibition)):
                images = inhibition[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])

                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_H_inh_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # persistent state in between timesteps
            # [1, timesteps, layers, channels, height, width]
            hidden = output['hidden']  #.cpu()#.data.numpy().squeeze()
            for layer in range(len(hidden)):
                images = hidden[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])

                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_hidden_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)
Exemplo n.º 25
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    assert (len(test_loader.dataset) %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
    # Create meters for multi-view testing.
    test_meter = TestMeter(
        len(test_loader.dataset) //
        (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
        cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
        cfg.MODEL.NUM_CLASSES,
        len(test_loader),
        cfg.DATA.MULTI_LABEL,
        cfg.DATA.ENSEMBLE_METHOD,
    )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
Exemplo n.º 26
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)

    out_str_prefix = "lin" if cfg.MODEL.DETACH_FINAL_FC else ""

    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=False)

    if (cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel"
            and cfg.CONTRASTIVE.KNN_ON):
        train_loader = loader.construct_loader(cfg, "train")
        out_str_prefix = "knn"
        if hasattr(model, "module"):
            model.module.init_knn_labels(train_loader)
        else:
            model.init_knn_labels(train_loader)

    cu.load_test_checkpoint(cfg, model)

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            test_loader.dataset.num_videos %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            test_loader.dataset.num_videos //
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES if not cfg.TASK == "ssl" else
            cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM,
            len(test_loader),
            cfg.DATA.MULTI_LABEL,
            cfg.DATA.ENSEMBLE_METHOD,
        )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
    result_string = (
        "_a{}{}{} Top1 Acc: {} Top5 Acc: {} MEM: {:.2f} dataset: {}{}"
        "".format(
            out_str_prefix,
            cfg.TEST.DATASET[0],
            test_meter.stats["top1_acc"],
            test_meter.stats["top1_acc"],
            test_meter.stats["top5_acc"],
            misc.gpu_mem_usage(),
            cfg.TEST.DATASET[0],
            cfg.MODEL.NUM_CLASSES,
        ))
    logger.info("testing done: {}".format(result_string))

    return result_string
Exemplo n.º 27
0
                # Save gif
                if (i+1) % 5 == 0:
                    layer_path = self.dir_path + 'layer/'
                    if not os.path.exists(layer_path):
                        os.mkdir(layer_path)

                    path = layer_path + str(self.model_name) + '_iter' + str(i) + '_path' + str(j) + '_' + \
                           str(self.layer) + '_f' + str(self.filter) + '_lr' + str(self.initial_learning_rate) + "_wd" \
                           + str(self.weight_decay)
                    save_gif(created_video, path, stream_type="rgb")


if __name__ == '__main__':

    args = parse_args()
    cfg = load_config(args)
    # Construct the model
    model = build_model(cfg)
    load_checkpoint(checkpoint_path, model, data_parallel=False, optimizer=None, inflation=False, convert_from_caffe2=True)
    cnn_layer = "s2.pathway0_res0.branch1"     # "conv3d_0c_1x1.conv3d"
    filter_pos = 0

    device = torch.device('cuda:0')
    model = model.to(device)

    layer_vis = CNNLayerVisualization(model, cnn_layer, filter_pos, device)

    # Layer visualization with pytorch hooks
    layer_vis.visualise_layer_with_hooks()

Exemplo n.º 28
0
def test(cfg, cnt=-1):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # # Perform multi-view test on the entire dataset.
    scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
    if not os.path.exists(scores_path):
        os.makedirs(scores_path)
    
    filename_root = cfg.EPICKITCHENS.TEST_LIST.split('.')[0]
    if cnt >= 0:
        file_name = '{}_{}_{}.pkl'.format(filename_root, cnt, cfg.MODEL.MODEL_NAME)
    else:
        file_name = '{}_{}_{}.pkl'.format(filename_root, 'test_only', cfg.MODEL.MODEL_NAME)
    file_path = os.path.join(scores_path, file_name)
    logger.info(file_path)

    # Print config.
    # if cnt < 0:
    #     logger.info("Test with config:")
    #     logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if cfg.EPICKITCHENS.USE_BBOX:
        model.module.load_weight_slowfast()

    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=False)
    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    if cfg.TEST.EXTRACT_FEATURES_MODE != "" and cfg.TEST.EXTRACT_FEATURES_MODE in ["test","train","val"]:
        test_loader = loader.construct_loader(cfg, cfg.TEST.EXTRACT_FEATURES_MODE)
    else:
        test_loader = loader.construct_loader(cfg, "test")

    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset)
            % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
            == 0
        )
        # Create meters for multi-view testing.
        if cfg.TEST.DATASET == 'epickitchens':
            test_meter = EPICTestMeter(
                len(test_loader.dataset)
                // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )
        else:
            test_meter = TestMeter(
                len(test_loader.dataset)
                // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )

    
    pickle.dump([], open(file_path, 'wb+'))
    if cfg.TEST.EXTRACT_FEATURES:
        preds, labels, metadata, x_feat_list = perform_test(test_loader, model, test_meter, cfg)
    else:
        preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg)

    if du.is_master_proc():
        if cfg.TEST.DATASET == 'epickitchens':
            results = {'verb_output': preds[0],
                       'noun_output': preds[1],
                       'verb_gt': labels[0],
                       'noun_gt': labels[1],
                       'narration_id': metadata}
            scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
            if not os.path.exists(scores_path):
                os.makedirs(scores_path)
            pickle.dump(results, open(file_path, 'wb'))

            if cfg.TEST.EXTRACT_FEATURES:
                pid = cfg.EPICKITCHENS.FEATURE_VID.split("_")[0]
                if not os.path.exists(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid)):
                    os.mkdir(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid))
                if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                    arr_slow = torch.cat(x_feat_list[0], dim=0).numpy()
                    arr_fast = torch.cat(x_feat_list[1], dim=0).numpy()
                    print(arr_slow.shape, arr_fast.shape)
                    fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.pkl'.format(cfg.EPICKITCHENS.FEATURE_VID))
                    with open(fpath_feat,'wb+') as f:
                        pickle.dump([arr_slow, arr_fast], f)
                elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                    fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.npy'.format(cfg.EPICKITCHENS.FEATURE_VID))
                    with open(fpath_feat,'wb+') as f:
                        arr = torch.cat(x_feat_list, dim=0).numpy()
                        print(arr.shape)
                        np.save(f, arr)
Exemplo n.º 29
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Init multigrid.
    multigrid = None
    if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE:
        multigrid = MultigridSchedule()
        cfg = multigrid.init_multigrid(cfg)
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0)
    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")
    precise_bn_loader = (loader.construct_loader(
        cfg, "train", is_precise_bn=True)
                         if cfg.BN.USE_PRECISE_STATS else None)

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch)
            if changed:
                (
                    model,
                    optimizer,
                    train_loader,
                    val_loader,
                    precise_bn_loader,
                    train_meter,
                    val_meter,
                ) = build_trainer(cfg)

                # Load checkpoint.
                if cu.has_checkpoint(cfg.OUTPUT_DIR):
                    last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
                    assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint
                else:
                    last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH
                logger.info("Load from {}".format(last_checkpoint))
                cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1,
                                   optimizer)

        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)

        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg, writer)

        is_checkp_epoch = (cu.is_checkpoint_epoch(
            cfg,
            cur_epoch,
            None if multigrid is None else multigrid.schedule,
        ))
        is_eval_epoch = misc.is_eval_epoch(
            cfg, cur_epoch, None if multigrid is None else multigrid.schedule)

        # Compute precise BN stats.
        if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS
                and len(get_bn_modules(model)) > 0):
            calculate_and_update_precise_bn(
                precise_bn_loader,
                model,
                min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)),
                cfg.NUM_GPUS > 0,
            )
        _ = misc.aggregate_sub_bn_stats(model)

        # Save a checkpoint.
        if is_checkp_epoch:
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if is_eval_epoch:
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer)

    if writer is not None:
        writer.close()
Exemplo n.º 30
0
def test(cfg):

    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    de.bridge.set_bridge('native')

    gpu_id = 1

    #     sample_rate = 1
    #     resize_h = 270
    #     resize_w = 360
    #     augment = ['FiveCrop', 'TenCrop', 'None'][1]

    sample_rate = 1
    resize_h = 270
    resize_w = 360
    augment = ['FiveCrop', 'TenCrop', 'None'][0]

    crop_h = cfg.DATA.TEST_CROP_SIZE  # 256
    crop_w = cfg.DATA.TEST_CROP_SIZE  # 256

    if 'SLOWFAST' in cfg.TEST.CHECKPOINT_FILE_PATH and 'I3D' not in cfg.TEST.CHECKPOINT_FILE_PATH:
        model_type = 'slowfast'
        feature_dim = 2304
    elif 'SLOWFAST' not in cfg.TEST.CHECKPOINT_FILE_PATH and 'I3D' in cfg.TEST.CHECKPOINT_FILE_PATH:
        model_type = 'i3d'
        feature_dim = 2048
    else:
        raise Exception('Invalid Model.')

    video_dir = cfg.DATA.PATH_TO_DATA_DIR

    if augment == 'FiveCrop':
        feature_dir = os.path.join(
            cfg.OUTPUT_DIR,
            'feature_{}_{}x{}_{}x{}_{}_5'.format(model_type, resize_h,
                                                 resize_w, crop_h, crop_w,
                                                 sample_rate))
    elif augment == 'TenCrop':
        feature_dir = os.path.join(
            cfg.OUTPUT_DIR,
            'feature_{}_{}x{}_{}x{}_{}_10'.format(model_type, resize_h,
                                                  resize_w, crop_h, crop_w,
                                                  sample_rate))
    elif augment == 'None':
        feature_dir = os.path.join(
            cfg.OUTPUT_DIR,
            'feature_{}_{}x{}_{}_1'.format(model_type, resize_h, resize_w,
                                           sample_rate))
    else:
        raise Exception('Invalid Augment.')

    norm_transform = transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD)

    if augment == 'FiveCrop':
        frame_transform = transforms.Compose([
            transforms.Resize(size=(resize_h, resize_w)),
            transforms.FiveCrop(size=(crop_h, crop_w)),
            transforms.Lambda(
                lambda crops: [transforms.ToTensor()(crop) for crop in crops]),
            transforms.Lambda(
                lambda crops: [norm_transform(crop) for crop in crops]),
            transforms.Lambda(lambda crops: torch.stack(crops))
        ])
    elif augment == 'TenCrop':
        frame_transform = transforms.Compose([
            transforms.Resize(size=(resize_h, resize_w)),
            transforms.TenCrop(size=(crop_h, crop_w)),
            transforms.Lambda(
                lambda crops: [transforms.ToTensor()(crop) for crop in crops]),
            transforms.Lambda(
                lambda crops: [norm_transform(crop) for crop in crops]),
            transforms.Lambda(lambda crops: torch.stack(crops))
        ])
    elif augment == 'None':
        frame_transform = transforms.Compose([
            transforms.Resize(size=(resize_h, resize_w)),
            transforms.ToTensor(), norm_transform,
            transforms.Lambda(lambda img: img.unsqueeze(0))
        ])
    else:
        raise Exception('Invalid Augment.')

    # Build the video model and print model statistics.
    model = build_model(cfg)
    print(model)
    cu.load_test_checkpoint(cfg, model)
    model.eval()
    model.to(torch.device('cuda:{}'.format(gpu_id)))

    if not os.path.exists(feature_dir):
        os.makedirs(feature_dir)

    video_files = os.listdir(video_dir)
    video_files.sort()

    for video_file in video_files:

        video_name = video_file[:-4]
        video_file = os.path.join(video_dir, video_file)

        feature_file = '{}.npy'.format(video_name)
        if feature_file in os.listdir(feature_dir):
            print('Skipped.')
            continue
        feature_file = os.path.join(feature_dir, feature_file)

        print(video_file)
        print(feature_file)

        video_feature = []

        vr = de.VideoReader(video_file, ctx=de.cpu(0))

        frame_num = len(vr)
        video_meta = skvideo.io.ffprobe(video_file)
        assert (frame_num == int(video_meta['video']['@nb_frames']))

        sample_idxs = np.arange(0, frame_num, sample_rate)

        clip_size = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE

        #         cfg.DATA.NUM_FRAMES
        #         cfg.DATA.SAMPLING_RATE
        #         cfg.SLOWFAST.ALPHA

        frame_buffer = {}
        buffer_size = 128

        with torch.no_grad():

            for _, sample_idx in enumerate(tqdm(sample_idxs)):

                fast_pathway_idxs = np.arange(
                    sample_idx - clip_size // 2,
                    sample_idx - clip_size // 2 + clip_size,
                    cfg.DATA.SAMPLING_RATE)

                fast_pathway_idxs[fast_pathway_idxs < 0] = 0
                fast_pathway_idxs[fast_pathway_idxs > frame_num -
                                  1] = frame_num - 1
                assert (fast_pathway_idxs.size == cfg.DATA.NUM_FRAMES)

                fast_pathway_frames = []

                for idx in fast_pathway_idxs:

                    if idx not in frame_buffer:
                        frame = vr[idx].asnumpy()  #(540, 960, 3)
                        frame = Image.fromarray(frame)
                        frame = frame_transform(frame)
                        frame = frame.to(torch.device(
                            'cuda:{}'.format(gpu_id)))

                        if augment == 'FiveCrop':
                            assert (frame.shape[0] == 5)
                            assert (frame.shape[1] == 3)
                            assert (frame.shape[2] == crop_h)
                            assert (frame.shape[3] == crop_w)
                        elif augment == 'TenCrop':
                            assert (frame.shape[0] == 10)
                            assert (frame.shape[1] == 3)
                            assert (frame.shape[2] == crop_h)
                            assert (frame.shape[3] == crop_w)
                        elif augment == 'None':
                            assert (frame.shape[0] == 1)
                            assert (frame.shape[1] == 3)
                            assert (frame.shape[2] == resize_h)
                            assert (frame.shape[3] == resize_w)
                        else:
                            raise Exception('Invalid Augment.')

                        frame_buffer[idx] = frame
                        if len(frame_buffer) > buffer_size:
                            frame_buffer.pop(min(list(frame_buffer.keys())))

                    fast_pathway_frames.append(frame_buffer[idx].unsqueeze(2))

                fast_pathway_frames = torch.cat(fast_pathway_frames, 2)

                if model_type == 'slowfast':

                    slow_pathway_idxs = fast_pathway_idxs[::cfg.SLOWFAST.ALPHA]
                    assert (slow_pathway_idxs.size == cfg.DATA.NUM_FRAMES /
                            cfg.SLOWFAST.ALPHA)
                    slow_pathway_frames = []

                    for idx in slow_pathway_idxs:

                        if idx not in frame_buffer:
                            frame = vr[idx].asnumpy()  #(540, 960, 3)
                            frame = Image.fromarray(frame)
                            frame = frame_transform(frame)
                            frame = frame.to(
                                torch.device('cuda:{}'.format(gpu_id)))

                            if augment == 'FiveCrop':
                                assert (frame.shape[0] == 5)
                                assert (frame.shape[1] == 3)
                                assert (frame.shape[2] == crop_h)
                                assert (frame.shape[3] == crop_w)
                            elif augment == 'TenCrop':
                                assert (frame.shape[0] == 10)
                                assert (frame.shape[1] == 3)
                                assert (frame.shape[2] == crop_h)
                                assert (frame.shape[3] == crop_w)
                            elif augment == 'None':
                                assert (frame.shape[0] == 1)
                                assert (frame.shape[1] == 3)
                                assert (frame.shape[2] == resize_h)
                                assert (frame.shape[3] == resize_w)
                            else:
                                raise Exception('Invalid Augment.')

                            frame_buffer[idx] = frame
                            if len(frame_buffer) > buffer_size:
                                frame_buffer.pop(min(list(
                                    frame_buffer.keys())))

                        slow_pathway_frames.append(
                            frame_buffer[idx].unsqueeze(2))

                    slow_pathway_frames = torch.cat(slow_pathway_frames, 2)

                if model_type == 'slowfast':
                    frame_feature = model(
                        [slow_pathway_frames, fast_pathway_frames],
                        extract_feature=True)
                elif model_type == 'i3d':
                    frame_feature = model([fast_pathway_frames],
                                          extract_feature=True)
                else:
                    raise Exception('Invalid Model.')

                # (Pdb) fast_pathway_frames.shape
                # torch.Size([5, 3, 32, 256, 256])

                # (Pdb) slow_pathway_frames.shape
                # torch.Size([5, 3, 8, 256, 256])

                assert (frame_feature.shape[1] == feature_dim)
                if augment == 'FiveCrop':
                    assert (frame_feature.shape[0] == 5)
                elif augment == 'TenCrop':
                    assert (frame_feature.shape[0] == 10)
                elif augment == 'None':
                    assert (frame_feature.shape[0] == 1)
                else:
                    raise Exception('Invalid Augment.')

                # slowfast is for 30 fps! be careful!
                # re-extract all!

                frame_feature = torch.unsqueeze(frame_feature, dim=0)
                frame_feature = frame_feature.cpu().numpy()

                video_feature.append(frame_feature)

        video_feature = np.concatenate(video_feature, axis=0)

        print(video_feature.shape)

        np.save(feature_file, video_feature)