Exemplo n.º 1
0
def extract(cfg):
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("Extract with config:")
    logger.info(cfg)

    # initialize model
    name = cfg.MODEL.MODEL_NAME
    model = MODEL_REGISTRY.get(name)(cfg)
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model).cuda()

    # initialize data loader
    dataloader = Extractor(cfg)
    logger.info("Testing model for {} videos".format(len(dataloader)))

    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH,
            model=model,
            data_parallel=True,
            optimizer=None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        logger.info("Testing with random initialization. Only for debugging.")

    run(dataloader, model, cfg)
Exemplo n.º 2
0
def video_extract(cfg):
    ctx = multiprocessing.get_context("spawn")
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("Extract with config:")
    logger.info(cfg)

    # initialize model
    name = cfg.MODEL.MODEL_NAME
    model = MODEL_REGISTRY.get(name)(cfg)
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model).cuda()
    model.eval()

    # initialize data loader
    dataloader = Extractor(cfg)
    logger.info("Testing model for {} videos".format(len(dataloader)))

    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH,
            model=model,
            data_parallel=True,
            optimizer=None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        logger.info("Testing with random initialization. Only for debugging.")

    index_queue = ctx.Queue()
    result_queue = ctx.Queue()
    workers = [
        ctx.Process(target=get_video,
                    args=(dataloader, index_queue, result_queue))
        for i in range(cfg.TEST.WORKERS)
    ]

    for w in workers:
        w.daemon = True
        w.start()

    num_video = len(dataloader)

    for i in range(num_video):
        index_queue.put(i)

    # NUM_FRAMES must be divided by ALPHA.
    num_frames = cfg.DATA.NUM_FRAMES
    step_frames = num_frames
    fout = open(cfg.TEST.OUTPUT_FEATURE_FILE, "w")
    start_time = time.time()
    for i in range(num_video):
        video_data = result_queue.get()
        run(cfg, model, video_data, num_frames, step_frames, fout)
        period = time.time() - start_time
        logger.info(
            "video index: %d, period: %.2f sec, speed: %.2f sec/video." %
            (i, period, period / (i + 1)))
    fout.close()
Exemplo n.º 3
0
def get_result(cfg, gpu_id, video_queue, result_queue):
    torch.cuda.set_device(gpu_id)
    # initialize model
    name = cfg.MODEL.MODEL_NAME
    model = MODEL_REGISTRY.get(name)(cfg)
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH,
            model=model,
            data_parallel=False,
            optimizer=None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        logger.info("Testing with random initialization. Only for debugging.")

    if torch.cuda.is_available():
        model = model.cuda()
    model.eval()
    num_frames = cfg.DATA.NUM_FRAMES
    batch_size = cfg.TEST.BATCH_SIZE
    while True:
        video_data = video_queue.get()
        result = run(cfg, model, video_data, num_frames, batch_size)
        result_queue.put(result)
Exemplo n.º 4
0
    def load_model(self):
        #logger.info("Model Config")
        #logger.info(self.cfg)
        self.model = build_model(self.cfg)
        self.model.eval()
        #if du.is_master_proc():
        misc.log_model_info(self.model, self.cfg, is_train=False)

        model_path = cfg.TRAIN.CHECKPOINT_FILE_PATH
        assert os.path.exists(
            model_path), "%s. Model Path Not Found" % model_path

        cu.load_checkpoint(model_path, self.model, self.cfg.NUM_GPUS > 1)  #
Exemplo n.º 5
0
 def load_model_initial(self, model):
     assert self.model_id in ['c2d_1x1', 'SLOWFAST_8x8_R50']
     CHECKPOINT_FILE_PATH = CHECKPOINTS_PREFIX / CHECKPOINTS[self.model_id]
     # Load model
     # with vt_log.logging_disabled(logging.WARNING):
     sf_cu.load_checkpoint(
         CHECKPOINT_FILE_PATH,
         model,
         False,
         None,
         inflation=False,
         convert_from_caffe2=True,
     )
Exemplo n.º 6
0
def load_teacher_model(cfg):
    logger.info("Load teacher config: {}".format(cfg.KD.CONFIG))
    logger.info("Load teacher model: {}".format(cfg.KD.CHECKPOINT_FILE_PATH))
    teacher_model = build_teacher_model(cfg)
    if cfg.KD.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.KD.CHECKPOINT_FILE_PATH,
            teacher_model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.KD.CHECKPOINT_TYPE == "caffe2",
        )
    return teacher_model
Exemplo n.º 7
0
    def __init__(self, cfg, backbone):
        if SlowFast.__instance != None:
            raise Exception('This class is a singleton!')
        else:
            SlowFast.__instance = self

        self.cfg = cfg
        self.backbone = backbone
        print(self.cfg.MODEL.ARCH)
        self.model = model_builder.build_model(self.cfg)
        self.model.eval()
        misc.log_model_info(self.model)

        self.checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH

        cu.load_checkpoint(
            self.checkpoint,
            self.model,
            self.cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2="caffe2"
            in [self.cfg.TEST.CHECKPOINT_TYPE, self.cfg.TRAIN.CHECKPOINT_TYPE],
        )

        if self.backbone == 'yolo':
            self.object_predictor = YOLO.get_instance(
                self.cfg.DEMO.YOLO_LIB, self.cfg.DEMO.YOLO_CFG,
                self.cfg.DEMO.YOLO_META, self.cfg.DEMO.YOLO_CLASS,
                self.cfg.DEMO.YOLO_WEIGHTS)
        else:
            dtron2_cfg_file = self.cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG
            dtron2_cfg = get_cfg()
            dtron2_cfg.merge_from_file(
                model_zoo.get_config_file(dtron2_cfg_file))
            dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
            dtron2_cfg.MODEL.WEIGHTS = self.cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS
            self.object_predictor = DefaultPredictor(dtron2_cfg)

        with open(self.cfg.DEMO.LABEL_FILE_PATH) as f:
            self.labels = f.read().split('\n')[:-1]
        self.palette = np.random.randint(64, 128,
                                         (len(self.labels), 3)).tolist()

        self.images_queue = queue.Queue()
        self.write_queue = queue.Queue()

        self.cap = cv2.VideoCapture(self.cfg.DEMO.DATA_SOURCE)
        self.seq_len = self.cfg.DATA.NUM_FRAMES * self.cfg.DATA.SAMPLING_RATE
Exemplo n.º 8
0
    def prepare(cf):
        model_id = cf['extractor.model_id']

        rel_yml_path = REL_YAML_PATHS[model_id]
        sf_cfg = basic_sf_cfg(rel_yml_path)
        if model_id == 'c2d_1x1':
            sf_cfg.DATA.NUM_FRAMES = 1
            sf_cfg.DATA.SAMPLING_RATE = 1
        sf_cfg.NUM_GPUS = 1
        norm_mean = sf_cfg.DATA.MEAN
        norm_std = sf_cfg.DATA.STD

        IMAGE_SIZE = cf['extractor.image_size']
        ffmode = cf['extractor.fullframe_mode']

        if cf['extractor.extraction_mode'] == 'roi':
            fextractor = FExtractor_roi(sf_cfg, model_id)
        elif cf['extractor.extraction_mode'] == 'fullframe':
            fextractor = FExtractor_fullframe(sf_cfg, model_id, IMAGE_SIZE, ffmode)
        else:
            raise RuntimeError()

        # Load model
        CHECKPOINT_FILE_PATH = CHECKPOINTS_PREFIX/CHECKPOINTS[model_id]
        if model_id in ['c2d_imnet']:
            sf_cu.load_checkpoint(
                CHECKPOINT_FILE_PATH, fextractor.model, False, None,
                inflation=True, convert_from_caffe2=False,)
        else:
            with vst.logging_disabled(logging.WARNING):
                sf_cu.load_checkpoint(
                    CHECKPOINT_FILE_PATH, fextractor.model, False, None,
                    inflation=False, convert_from_caffe2=True,)

        norm_mean_t = np_to_gpu(norm_mean)
        norm_std_t = np_to_gpu(norm_std)

        model_nframes = sf_cfg.DATA.NUM_FRAMES
        model_sample = sf_cfg.DATA.SAMPLING_RATE
        if (samprate := cf['extractor.sampling_rate']) is not None:
            model_sample = samprate
Exemplo n.º 9
0
def experiment(cfg):
     # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Infer with config:")
    logger.info(cfg)

    # Build the SlowFast model and print its statistics
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # load weights
    if cfg.INFERENCE.WEIGHTS_FILE_PATH != "":
        cu.load_checkpoint(cfg.INFERENCE.WEIGHTS_FILE_PATH, model, cfg.NUM_GPUS > 1, None,
                           inflation=False, convert_from_caffe2=cfg.INFERENCE.WEIGHTS_TYPE == "caffe2")
    else:
        raise FileNotFoundError("Model weights file could not be found")

    perform_inference(model, cfg)
Exemplo n.º 10
0
def load_checkpoint(cfg, model):
    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2="caffe2"
        in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )
Exemplo n.º 11
0
def build_model(cfg):
    """
    Build slowfast model
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        print("Testing with random initialization. Only for debugging.")

    return model
Exemplo n.º 12
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset)
            % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
            == 0
        )
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            len(test_loader.dataset)
            // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
        )

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg)
Exemplo n.º 13
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """


    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS):
        writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR)

    else:
        writer = None

    if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG:
        tags = []
        if 'TAGS' in cfg and cfg.TAGS !=[]:
            tags=list(cfg.TAGS)
        neptune.set_project('Serre-Lab/motion')

        ######################
        overrides = sys.argv[1:]

        overrides_dict = {}
        for i in range(len(overrides)//2):
            overrides_dict[overrides[2*i]] = overrides[2*i+1]
        overrides_dict['dir'] = cfg.OUTPUT_DIR
        ######################


        if 'NEP_ID' in cfg and cfg.NEP_ID != "":
            session = Session()
            project = session.get_project(project_qualified_name='Serre-Lab/motion')
            nep_experiment = project.get_experiments(id=cfg.NEP_ID)[0]

        else:
            nep_experiment = neptune.create_experiment (name=cfg.NAME,
                                        params=overrides_dict,
                                        tags=tags)
    else:
        nep_experiment=None

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc(num_gpus=cfg.NUM_GPUS):
        misc.log_model_info(model, cfg, is_train=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(
            last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer
        )
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep_experiment, cfg)

        # Compute precise BN stats.
        # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
        #     calculate_and_update_precise_bn(
        #         train_loader, model, cfg.BN.NUM_BATCHES_PRECISE
        #     )

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            eval_epoch(val_loader, model, val_meter, cur_epoch, nep_experiment, cfg)

        if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG:
            nep_experiment.log_metric('epoch', cur_epoch)
def demo(cfg):
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    model.eval()
    misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2="caffe2"
        in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )

    if cfg.DETECTION.ENABLE:
        # Load object detector from detectron2
        dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG
        dtron2_cfg = get_cfg()
        dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file))
        dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
        dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS
        object_predictor = DefaultPredictor(dtron2_cfg)
        # Load the labels of AVA dataset
        with open(cfg.DEMO.LABEL_FILE_PATH) as f:
            labels = f.read().split('\n')[:-1]
        palette = np.random.randint(64, 128, (len(labels), 3)).tolist()
        boxes = []
    else:
        # Load the labels of Kinectics-400 dataset
        labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH)
        labels = labels_df['name'].values

    frame_provider = VideoReader(cfg)
    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
    frames = []
    pred_labels = []
    s = 0.
    for able_to_read, frame in frame_provider:
        if not able_to_read:
            # when reaches the end frame, clear the buffer and continue to the next one.
            frames = []
            continue

        if len(frames) != seq_len:
            frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed)
            frames.append(frame_processed)
            if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1:
                mid_frame = frame

        if len(frames) == seq_len:
            start = time()
            if cfg.DETECTION.ENABLE:
                outputs = object_predictor(mid_frame)
                fields = outputs["instances"]._fields
                pred_classes = fields["pred_classes"]
                selection_mask = pred_classes == 0
                # acquire person boxes
                pred_classes = pred_classes[selection_mask]
                pred_boxes = fields["pred_boxes"].tensor[selection_mask]
                scores = fields["scores"][selection_mask]
                boxes = cv2_transform.scale_boxes(
                    cfg.DATA.TEST_CROP_SIZE, pred_boxes,
                    frame_provider.display_height,
                    frame_provider.display_width)
                boxes = torch.cat(
                    [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes],
                    axis=1)

            inputs = torch.as_tensor(frames).float()
            inputs = inputs / 255.0
            # Perform color normalization.
            inputs = inputs - torch.tensor(cfg.DATA.MEAN)
            inputs = inputs / torch.tensor(cfg.DATA.STD)
            # T H W C -> C T H W.
            inputs = inputs.permute(3, 0, 1, 2)

            # 1 C T H W.
            inputs = inputs.unsqueeze(0)

            # Sample frames for the fast pathway.
            index = torch.linspace(0, inputs.shape[2] - 1,
                                   cfg.DATA.NUM_FRAMES).long()
            fast_pathway = torch.index_select(inputs, 2, index)
            # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape))

            # Sample frames for the slow pathway.
            index = torch.linspace(0, fast_pathway.shape[2] - 1,
                                   fast_pathway.shape[2] //
                                   cfg.SLOWFAST.ALPHA).long()
            slow_pathway = torch.index_select(fast_pathway, 2, index)
            # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape))
            inputs = [slow_pathway, fast_pathway]
            """
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list,)):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            """
            # Perform the forward pass.
            if cfg.DETECTION.ENABLE:
                # When there is nothing in the scene,
                #   use a dummy variable to disable all computations below.
                if not len(boxes):
                    preds = torch.tensor([])
                else:
                    preds = model(inputs, boxes)
            else:
                preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds = du.all_gather(preds)[0]

            if cfg.DETECTION.ENABLE:
                # This post processing was intendedly assigned to the cpu since my laptop GPU
                #   RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend
                #   to change this section to make CUDA does the processing.
                preds = preds.cpu().detach().numpy()
                pred_masks = preds > .1
                label_ids = [
                    np.nonzero(pred_mask)[0] for pred_mask in pred_masks
                ]
                pred_labels = [[
                    labels[label_id] for label_id in perbox_label_ids
                ] for perbox_label_ids in label_ids]
                # I'm unsure how to detectron2 rescales boxes to image original size, so I use
                #   input boxes of slowfast and rescale back it instead, it's safer and even if boxes
                #   was not rescaled by cv2_transform.rescale_boxes, it still works.
                boxes = boxes.cpu().detach().numpy()
                ratio = np.min([
                    frame_provider.display_height, frame_provider.display_width
                ]) / cfg.DATA.TEST_CROP_SIZE
                boxes = boxes[:, 1:] * ratio
            else:
                ## Option 1: single label inference selected from the highest probability entry.
                # label_id = preds.argmax(-1).cpu()
                # pred_label = labels[label_id]
                # Option 2: multi-label inferencing selected from probability entries > threshold
                label_ids = torch.nonzero(
                    preds.squeeze() > .1).reshape(-1).cpu().detach().numpy()
                pred_labels = labels[label_ids]
                logger.info(pred_labels)
                if not list(pred_labels):
                    pred_labels = ['Unknown']

            # # option 1: remove the oldest frame in the buffer to make place for the new one.
            # frames.pop(0)
            # option 2: empty the buffer
            frames = []
            s = time() - start

        if cfg.DETECTION.ENABLE and pred_labels and boxes.any():
            for box, box_labels in zip(boxes.astype(int), pred_labels):
                cv2.rectangle(frame,
                              tuple(box[:2]),
                              tuple(box[2:]), (0, 255, 0),
                              thickness=2)
                label_origin = box[:2]
                for label in box_labels:
                    label_origin[-1] -= 5
                    (label_width, label_height), _ = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2)
                    cv2.rectangle(frame,
                                  (label_origin[0], label_origin[1] + 5),
                                  (label_origin[0] + label_width,
                                   label_origin[1] - label_height - 5),
                                  palette[labels.index(label)], -1)
                    cv2.putText(frame, label, tuple(label_origin),
                                cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255),
                                1)
                    label_origin[-1] -= label_height + 5
        if not cfg.DETECTION.ENABLE:
            # Display predicted labels to frame.
            y_offset = 50
            cv2.putText(frame,
                        'Action:', (10, y_offset),
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=.65,
                        color=(0, 235, 0),
                        thickness=2)
            for pred_label in pred_labels:
                y_offset += 30
                cv2.putText(frame,
                            '{}'.format(pred_label), (20, y_offset),
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale=.65,
                            color=(0, 235, 0),
                            thickness=2)

        # Display prediction speed
        cv2.putText(frame,
                    'Speed: {:.2f}s'.format(s), (10, 25),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=.65,
                    color=(0, 235, 0),
                    thickness=2)
        # Display the frame
        cv2.imshow('SlowFast', frame)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break

    frame_provider.clean()
Exemplo n.º 15
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Init multigrid.
    multigrid = None
    if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE:
        multigrid = MultigridSchedule()
        cfg = multigrid.init_multigrid(cfg)
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0)
    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)
    # Create a GradScaler for mixed precision training
    scaler = torch.cuda.amp.GradScaler(enabled=cfg.TRAIN.MIXED_PRECISION)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR, task=cfg.TASK)
        if last_checkpoint is not None:
            checkpoint_epoch = cu.load_checkpoint(
                last_checkpoint,
                model,
                cfg.NUM_GPUS > 1,
                optimizer,
                scaler if cfg.TRAIN.MIXED_PRECISION else None,
            )
            start_epoch = checkpoint_epoch + 1
        elif "ssl_eval" in cfg.TASK:
            last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR,
                                                     task="ssl")
            checkpoint_epoch = cu.load_checkpoint(
                last_checkpoint,
                model,
                cfg.NUM_GPUS > 1,
                optimizer,
                scaler if cfg.TRAIN.MIXED_PRECISION else None,
                epoch_reset=True,
                clear_name_pattern=cfg.TRAIN.CHECKPOINT_CLEAR_NAME_PATTERN,
            )
            start_epoch = checkpoint_epoch + 1
        else:
            start_epoch = 0
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            scaler if cfg.TRAIN.MIXED_PRECISION else None,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
            epoch_reset=cfg.TRAIN.CHECKPOINT_EPOCH_RESET,
            clear_name_pattern=cfg.TRAIN.CHECKPOINT_CLEAR_NAME_PATTERN,
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    precise_bn_loader = (loader.construct_loader(
        cfg, "train", is_precise_bn=True)
                         if cfg.BN.USE_PRECISE_STATS else None)

    # if (
    #     cfg.TASK == "ssl"
    #     and cfg.MODEL.MODEL_NAME == "ContrastiveModel"
    #     and cfg.CONTRASTIVE.KNN_ON
    # ):
    #     if hasattr(model, "module"):
    #         model.module.init_knn_labels(train_loader)
    #     else:
    #         model.init_knn_labels(train_loader)

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(1e6, cfg)
        val_meter = ValMeter(1e6, cfg)

    # set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    epoch_timer = EpochTimer()
    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):

        if cur_epoch > 0 and cfg.DATA.LOADER_CHUNK_SIZE > 0:
            num_chunks = math.ceil(cfg.DATA.LOADER_CHUNK_OVERALL_SIZE /
                                   cfg.DATA.LOADER_CHUNK_SIZE)
            skip_rows = (cur_epoch) % num_chunks * cfg.DATA.LOADER_CHUNK_SIZE
            logger.info(
                f"=================+++ num_chunks {num_chunks} skip_rows {skip_rows}"
            )
            cfg.DATA.SKIP_ROWS = skip_rows
            logger.info(f"|===========| skip_rows {skip_rows}")
            train_loader = loader.construct_loader(cfg, "train")
            loader.shuffle_dataset(train_loader, cur_epoch)

        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch)
            if changed:
                (
                    model,
                    optimizer,
                    train_loader,
                    val_loader,
                    precise_bn_loader,
                    train_meter,
                    val_meter,
                ) = build_trainer(cfg)

                # Load checkpoint.
                if cu.has_checkpoint(cfg.OUTPUT_DIR):
                    last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR,
                                                             task=cfg.TASK)
                    assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint
                else:
                    last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH
                logger.info("Load from {}".format(last_checkpoint))
                cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1,
                                   optimizer)

        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)

        if hasattr(train_loader.dataset, "_set_epoch_num"):
            train_loader.dataset._set_epoch_num(cur_epoch)

        # Train for one epoch.
        epoch_timer.epoch_tic()

        train_epoch(
            train_loader,
            model,
            optimizer,
            scaler,
            train_meter,
            cur_epoch,
            cfg,
            writer,
        )
        epoch_timer.epoch_toc()
        logger.info(
            f"Epoch {cur_epoch} takes {epoch_timer.last_epoch_time():.2f}s. Epochs "
            f"from {start_epoch} to {cur_epoch} take "
            f"{epoch_timer.avg_epoch_time():.2f}s in average and "
            f"{epoch_timer.median_epoch_time():.2f}s in median.")
        logger.info(
            f"For epoch {cur_epoch}, each iteraction takes "
            f"{epoch_timer.last_epoch_time()/len(train_loader):.2f}s in average. "
            f"From epoch {start_epoch} to {cur_epoch}, each iteraction takes "
            f"{epoch_timer.avg_epoch_time()/len(train_loader):.2f}s in average."
        )

        is_checkp_epoch = (cu.is_checkpoint_epoch(
            cfg,
            cur_epoch,
            None if multigrid is None else multigrid.schedule,
        ) or cur_epoch == cfg.SOLVER.MAX_EPOCH - 1)
        is_eval_epoch = misc.is_eval_epoch(
            cfg, cur_epoch, None if multigrid is None else multigrid.schedule)

        # Compute precise BN stats.
        if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS
                and len(get_bn_modules(model)) > 0):
            calculate_and_update_precise_bn(
                precise_bn_loader,
                model,
                min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)),
                cfg.NUM_GPUS > 0,
            )
        _ = misc.aggregate_sub_bn_stats(model)

        # Save a checkpoint.
        if is_checkp_epoch:
            cu.save_checkpoint(
                cfg.OUTPUT_DIR,
                model,
                optimizer,
                cur_epoch,
                cfg,
                scaler if cfg.TRAIN.MIXED_PRECISION else None,
            )
        # Evaluate the model on validation set.
        if is_eval_epoch:
            eval_epoch(
                val_loader,
                model,
                val_meter,
                cur_epoch,
                cfg,
                train_loader,
                writer,
            )
    if writer is not None:
        writer.close()
    result_string = "Top1 Acc: {:.2f} Top5 Acc: {:.2f} MEM: {:.2f}" "".format(
        100 - val_meter.min_top1_err,
        100 - val_meter.min_top5_err,
        misc.gpu_mem_usage(),
    )
    logger.info("training done: {}".format(result_string))

    return result_string
Exemplo n.º 16
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    assert (len(test_loader.dataset) %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
    # Create meters for multi-view testing.
    test_meter = TestMeter(
        len(test_loader.dataset) //
        (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
        cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
        cfg.MODEL.NUM_CLASSES,
        len(test_loader),
        cfg.DATA.MULTI_LABEL,
        cfg.DATA.ENSEMBLE_METHOD,
    )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
Exemplo n.º 17
0
def demo(cfg, backbone):
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    model.eval()
    misc.log_model_info(model)

   # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2= "caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )
    
    darknetlib_path = '/home/ubuntu/hanhbd/SlowFast/detector/libdarknet.so'
    config_path = '/home/ubuntu/hanhbd/SlowFast/detector/yolov4.cfg'
    meta_path = '/home/ubuntu/hanhbd/SlowFast/detector/coco.data'
    classes_path = '/home/ubuntu/hanhbd/SlowFast/detector/coco.names'
    weight_path = '/home/ubuntu/hanhbd/SlowFast/detector/yolov4.weights'
    
    if backbone == 'yolo':
        object_predictor =  YOLO.get_instance(darknetlib_path, config_path, meta_path, classes_path, weight_path)
    else:
        dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG
        dtron2_cfg = get_cfg()
        dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file))
        dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
        dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS
        object_predictor = DefaultPredictor(dtron2_cfg)

    with open(cfg.DEMO.LABEL_FILE_PATH) as f:
        labels = f.read().split('\n')[:-1]
    palette = np.random.randint(64, 128, (len(labels), 3)).tolist()
    count_xxx = 0
    seq_len = cfg.DATA.NUM_FRAMES*cfg.DATA.SAMPLING_RATE

    frames = []
    org_frames = []
    mid_frame = None
    pred_labels = []
    draw_imgs = []

    cap  = cv2.VideoCapture(cfg.DEMO.DATA_SOURCE)
    was_read, frame = cap.read()
    display_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    display_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    videowriter = cv2.VideoWriter('./result/testset_fighting_05.avi',fourcc, fps, (display_width,display_height))

    while was_read :
        was_read, frame = cap.read()
        if not was_read:
            videowriter.release()
            break

        if len(frames) != seq_len:
            frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed)
            frames.append(frame_processed)
            org_frames.append(frame)

        else:
            #predict all person box in all frame 
            start = time()
            mid_frame = org_frames[seq_len//2 - 2]
            # just draw half of number frame because we will use slide = 1/2 length of sequence
            if cfg.DETECTION.ENABLE and len(draw_imgs) == 0:
                for idx in range(seq_len//2 - 1):
                    image = org_frames[idx]
                    boxes = detector(object_predictor , image, backbone, cfg , display_height, display_width )
                    # boxes = object_predictor.detect_image(img)
                    # boxes = torch.as_tensor(boxes).float().cuda()

                    boxes = torch.cat([torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1)
                    boxes = boxes.cpu().detach().numpy()
                    if backbone == 'yolo':
                        boxes = boxes[:, 1:]
                    else:
                        ratio = np.min(
                            [display_height, display_width]
                        ) / cfg.DATA.TEST_CROP_SIZE
                        boxes = boxes[:, 1:] * ratio

                    for box in boxes:
                        xmin, ymin, xmax, ymax = box
                        cv2.rectangle(image, (xmin, ymin), (xmax , ymax), (0, 255, 0), thickness=2)

                    draw_imgs.append(image)

            # detect box in mid frame
            if cfg.DETECTION.ENABLE:
                boxes = detector(object_predictor , mid_frame, backbone, cfg , display_height, display_width )
                boxes = torch.cat([torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1)

            inputs = torch.from_numpy(np.array(frames)).float()
            inputs = inputs / 255.0
            # Perform color normalization.
            inputs = inputs - torch.tensor(cfg.DATA.MEAN)
            inputs = inputs / torch.tensor(cfg.DATA.STD)
            # T H W C -> C T H W.
            inputs = inputs.permute(3, 0, 1, 2)

            # 1 C T H W.
            inputs = inputs.unsqueeze(0)

            # Sample frames for the fast pathway.
            index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long()
            fast_pathway = torch.index_select(inputs, 2, index)
            

            # Sample frames for the slow pathway.
            index = torch.linspace(0, fast_pathway.shape[2] - 1, 
                                    fast_pathway.shape[2]//cfg.SLOWFAST.ALPHA).long()
            slow_pathway = torch.index_select(fast_pathway, 2, index)
            inputs = [slow_pathway, fast_pathway]

            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list,)):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # use a dummy variable to disable all computations below.
            if not len(boxes):
                preds = torch.tensor([])
            else:
                preds = model(inputs, boxes)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds = du.all_gather(preds)[0]
                
            # post processing
            preds = preds.cpu().detach().numpy()
            pred_masks = preds > .1
            label_ids = [np.nonzero(pred_mask)[0] for pred_mask in pred_masks]
            pred_labels = [
                [labels[label_id] for label_id in perbox_label_ids]
                for perbox_label_ids in label_ids
            ]
            print(pred_labels)
            boxes = boxes.cpu().detach().numpy()
            if backbone == 'yolo':
                boxes = boxes[:, 1:]
            else:
                ratio = np.min(
                    [display_height, display_width]
                ) / cfg.DATA.TEST_CROP_SIZE
                boxes = boxes[:, 1:] * ratio


            # draw result on mid frame
            if pred_labels and boxes.any():
                for box, box_labels in zip(boxes.astype(int), pred_labels):
                    xmin, ymin, xmax, ymax = box
                    cv2.rectangle(mid_frame, (xmin, ymin), (xmax , ymax), (0, 255, 0), thickness=2)
                
                    label_origin = box[:2]
                    for label in box_labels:
                        label_origin[-1] -= 5
                        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2)
                        cv2.rectangle(
                            mid_frame, 
                            (label_origin[0], label_origin[1] + 5), 
                            (label_origin[0] + label_width, label_origin[1] - label_height - 5),
                            palette[labels.index(label)], -1
                        )
                        cv2.putText(
                            mid_frame, label, tuple(label_origin), 
                            cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
                        )
                        label_origin[-1] -= label_height + 5

            # append mid frame to the draw array
            draw_imgs.append(mid_frame)

            # write image to videos
            for img_ in draw_imgs:
                videowriter.write(img_)
            print("time process", (time() - start) /64 )
            # clean the buffer of frames and org_frames with slide 1/2 seq_len
            # frames = frames[seq_len//2 - 1:]
            # org_frames = org_frames[seq_len//2 - 1:]

            frames = frames[1:]
            org_frames = org_frames[1:]
            draw_imgs = draw_imgs[-1:]

            count_xxx += 1
Exemplo n.º 18
0
def test(cfg, cnt=-1):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # # Perform multi-view test on the entire dataset.
    scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
    if not os.path.exists(scores_path):
        os.makedirs(scores_path)
    
    filename_root = cfg.EPICKITCHENS.TEST_LIST.split('.')[0]
    if cnt >= 0:
        file_name = '{}_{}_{}.pkl'.format(filename_root, cnt, cfg.MODEL.MODEL_NAME)
    else:
        file_name = '{}_{}_{}.pkl'.format(filename_root, 'test_only', cfg.MODEL.MODEL_NAME)
    file_path = os.path.join(scores_path, file_name)
    logger.info(file_path)

    # Print config.
    # if cnt < 0:
    #     logger.info("Test with config:")
    #     logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if cfg.EPICKITCHENS.USE_BBOX:
        model.module.load_weight_slowfast()

    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=False)
    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    if cfg.TEST.EXTRACT_FEATURES_MODE != "" and cfg.TEST.EXTRACT_FEATURES_MODE in ["test","train","val"]:
        test_loader = loader.construct_loader(cfg, cfg.TEST.EXTRACT_FEATURES_MODE)
    else:
        test_loader = loader.construct_loader(cfg, "test")

    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset)
            % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
            == 0
        )
        # Create meters for multi-view testing.
        if cfg.TEST.DATASET == 'epickitchens':
            test_meter = EPICTestMeter(
                len(test_loader.dataset)
                // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )
        else:
            test_meter = TestMeter(
                len(test_loader.dataset)
                // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )

    
    pickle.dump([], open(file_path, 'wb+'))
    if cfg.TEST.EXTRACT_FEATURES:
        preds, labels, metadata, x_feat_list = perform_test(test_loader, model, test_meter, cfg)
    else:
        preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg)

    if du.is_master_proc():
        if cfg.TEST.DATASET == 'epickitchens':
            results = {'verb_output': preds[0],
                       'noun_output': preds[1],
                       'verb_gt': labels[0],
                       'noun_gt': labels[1],
                       'narration_id': metadata}
            scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
            if not os.path.exists(scores_path):
                os.makedirs(scores_path)
            pickle.dump(results, open(file_path, 'wb'))

            if cfg.TEST.EXTRACT_FEATURES:
                pid = cfg.EPICKITCHENS.FEATURE_VID.split("_")[0]
                if not os.path.exists(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid)):
                    os.mkdir(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid))
                if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                    arr_slow = torch.cat(x_feat_list[0], dim=0).numpy()
                    arr_fast = torch.cat(x_feat_list[1], dim=0).numpy()
                    print(arr_slow.shape, arr_fast.shape)
                    fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.pkl'.format(cfg.EPICKITCHENS.FEATURE_VID))
                    with open(fpath_feat,'wb+') as f:
                        pickle.dump([arr_slow, arr_fast], f)
                elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                    fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.npy'.format(cfg.EPICKITCHENS.FEATURE_VID))
                    with open(fpath_feat,'wb+') as f:
                        arr = torch.cat(x_feat_list, dim=0).numpy()
                        print(arr.shape)
                        np.save(f, arr)
_C.DATA.MEAN = [0.45, 0.45, 0.45]
# List of input frame channel dimensions.

_C.DATA.INPUT_CHANNEL_NUM = [3, 3]

# The std value of the video raw pixels across the R G B channels.
_C.DATA.STD = [0.225, 0.225, 0.225]
'''

yaml_location = '/media/esat/6a7c4273-8106-47bc-b992-6760dfcea9a1/tsnCoffe/two-stream-pytorch/models/SlowFast/configs/Kinetics/SLOWFAST_8x8_R50.yaml'

path_to_checkpoint = '/media/esat/6a7c4273-8106-47bc-b992-6760dfcea9a1/tsnCoffe/two-stream-pytorch/weights/SLOWFAST_8x8_R50.pkl'
parser = argparse.ArgumentParser(description='PyTorch Two-Stream Action Recognition')
#parser.add_argument('--data', metavar='DIR', default='./datasets/ucf101_frames',
#                    help='path to dataset')
parser.add_argument('--cfg_file', metavar='DIR', default=yaml_location,
                    help='path to datset setting files')

parser.add_argument('--opts', metavar='DIR', default=None,
                    help='path to datset setting files')

args = parser.parse_args()

cfg = load_config(args)

model = SlowFast(cfg)

load_checkpoint(path_to_checkpoint,
    model,data_parallel=False, convert_from_caffe2=True)

Exemplo n.º 20
0
def visualize_activations(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Vizualize activations")
    # logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    # Construct the optimizer.
    # optimizer = optim.construct_optimizer(model, cfg)

    logger.info("Load from given checkpoint file.")
    checkpoint_epoch = cu.load_checkpoint(
        cfg.TRAIN.CHECKPOINT_FILE_PATH,
        model,
        cfg.NUM_GPUS > 1,
        optimizer=None,
        inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
        convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
    )

    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=True)

    # Create the video train and val loaders.
    # train_loader = loader.construct_loader(cfg, "train")
    # val_loader = loader.construct_loader(cfg, "val")

    train_set = build_dataset(cfg.TEST.DATASET, cfg, "train")

    for i in np.random.choice(len(train_set), 5):

        # frames, label, _, _ = train_set.get_augmented_examples(i)
        frames, label, _, _ = train_set[i]
        inputs = frames
        inputs[0] = inputs[0][None, :]
        logger.info(frames[0].shape)
        # frames = frames[0].permute(0,2,3,4,1)
        frames = frames[0].squeeze().transpose(0, 1)  #.permute(1,2,3,0)
        logger.info(frames.shape)
        tv.utils.save_image(frames,
                            os.path.join(cfg.OUTPUT_DIR, 'example_%d.jpg' % i),
                            nrow=18,
                            normalize=True)

        for j in range(len(inputs)):
            inputs[j] = inputs[j].cuda(non_blocking=True)
        with torch.no_grad():
            # logger.info(inputs[i].shape)
            # sys.stdout.flush()
            inputs[0] = inputs[0][:min(3, len(inputs[0]))]
            output = model(inputs, extra=['frames'])

            # frames = frames[0].transpose(0,1)#.permute(1,2,3,0)
            # tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_target_%d.jpg'%i), nrow=18, normalize=True)

            input_aug = output['input_aug']
            logger.info(input_aug.shape)

            input_aug = input_aug[0].transpose(0, 1)
            tv.utils.save_image(input_aug,
                                os.path.join(cfg.OUTPUT_DIR,
                                             'example_input_%d.jpg' % i),
                                nrow=18,
                                normalize=True)

            # mix_layer [1, timesteps, layers, activations]
            mix_out = output['mix_layer']  #.cpu().data.numpy().squeeze()
            for layer in range(len(mix_out)):
                logger.info('mix layer %d' % layer)
                logger.info(mix_out[layer].view([18, -1]).mean(1))
                images = mix_out[layer].transpose(1, 2).transpose(0, 1)
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])
                images = (images - images.min())
                images = images / images.max()
                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_mix_layer_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # BU errors per timestep per layer (choose a random activation or the mean) also write out the mean/norm
            # [1, timesteps, layers, channels, height, width]

            bu_errors = output['bu_errors']  #.cpu()#.data.numpy().squeeze()

            for layer in range(len(bu_errors)):
                images = bu_errors[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])
                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_bu_errors_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # horiz inhibition per timestep per layer (choose a random activation or the mean) also write out the mean/norm
            # [1, timesteps, layers, channels, height, width]
            inhibition = output['H_inh']  #.cpu()#.data.numpy().squeeze()
            for layer in range(len(inhibition)):
                images = inhibition[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])

                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_H_inh_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # persistent state in between timesteps
            # [1, timesteps, layers, channels, height, width]
            hidden = output['hidden']  #.cpu()#.data.numpy().squeeze()
            for layer in range(len(hidden)):
                images = hidden[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])

                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_hidden_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)
Exemplo n.º 21
0
    def __post_init__(self):
        "Setup log file, load model if required"

        # Get rank
        self.rank = get_rank()

        self.init_log_dirs()

        self.prepare_log_keys()

        self.logger = self.init_logger()

        self.mlf_logger = MLFlowTracker(
            self.cfg,
            loss_keys=self.loss_keys,
            met_keys=self.met_keys,
            txt_log_file=self.txt_log_file,
        )
        self.prepare_log_file()

        # Set the number of iterations, epochs, best_met to 0.
        # Updated in loading if required
        self.num_it = 0
        self.num_epoch = 0
        self.best_met = 0

        # Resume if given a path
        if self.cfg.train["resume"]:
            self.load_model_dict(
                resume_path=self.cfg.train["resume_path"],
                load_opt=self.cfg.train["load_opt"],
            )
        elif self.cfg.mdl["load_sf_pretrained"]:
            if self.cfg.task_type == "vb":
                convert_from_caffe2 = self.cfg.sf_mdl.TRAIN.CHECKPOINT_TYPE == "caffe2"
                ckpt_pth = self.cfg.sf_mdl.TRAIN.CHECKPOINT_FILE_PATH
                assert Path(ckpt_pth).exists()
                if self.cfg.do_dist:
                    mdl1 = self.mdl.module.sf_mdl
                else:
                    mdl1 = self.mdl.sf_mdl
                load_checkpoint(
                    ckpt_pth,
                    model=mdl1,
                    data_parallel=False,
                    convert_from_caffe2=convert_from_caffe2,
                )
                self.logger.info(
                    f"Loaded model from Pretrained Weights from {ckpt_pth} Correctly"
                )
            elif self.cfg.task_type == "vb_arg":
                if is_main_process():
                    ckpt_pth = self.cfg.train.sfbase_pret_path
                    if self.cfg.do_dist:
                        mdl1 = self.mdl.module.sf_mdl
                    else:
                        mdl1 = self.mdl.sf_mdl
                    ckpt_data = torch.load(ckpt_pth)
                    ckpt_mdl_sd = ckpt_data["model_state_dict"]

                    # ckpt_mdl_sd = ckpt_mdl.state_dict()

                    def strip_module_from_key(key):
                        assert "module" == key.split(".")[0]
                        return ".".join(key.split(".")[1:])

                    if "module" == list(ckpt_mdl_sd.keys())[0].split(".")[0]:
                        ckpt_mdl_sd = {
                            strip_module_from_key(k): v
                            for k, v in ckpt_mdl_sd.items()
                        }

                    sf_mdl_ckpt_dct = {
                        k.split(".", 1)[1]: v
                        for k, v in ckpt_mdl_sd.items()
                        if k.split(".", 1)[0] == "sf_mdl"
                    }
                    mdl1.load_state_dict(sf_mdl_ckpt_dct)
                    self.logger.info(
                        f"Loaded model from Pretrained SFBase from {ckpt_pth} Correctly"
                    )

                if self.cfg.train.freeze_sfbase:
                    if self.cfg.do_dist:
                        mdl1 = self.mdl.module.sf_mdl
                    else:
                        mdl1 = self.mdl.sf_mdl
                    for param in mdl1.parameters():
                        param.requires_grad = False
                    self.logger.info("Freezing SFBase")
Exemplo n.º 22
0
def demo(cfg):
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    model.eval()
    misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2="caffe2"
        in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )

    # Load the labels of Kinectics-400 dataset
    labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH)
    labels = labels_df['name'].values
    img_provider = VideoReader(cfg)
    frames = []
    # # Option 1
    # pred_label = ''
    # Option 2
    pred_labels = []
    s = 0.
    for able_to_read, frame in img_provider:
        if not able_to_read:
            # when reaches the end frame, clear the buffer and continue to the next one.
            frames = []
            continue

        if len(frames) != cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE:
            frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_processed = scale(256, frame_processed)
            frames.append(frame_processed)

        if len(frames) == cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE:
            start = time()
            # Perform color normalization.
            inputs = torch.tensor(frames).float()
            inputs = inputs / 255.0
            inputs = inputs - torch.tensor(cfg.DATA.MEAN)
            inputs = inputs / torch.tensor(cfg.DATA.STD)
            # T H W C -> C T H W.
            inputs = inputs.permute(3, 0, 1, 2)
            # 1 C T H W.
            inputs = inputs[None, :, :, :, :]
            # Sample frames for the fast pathway.
            index = torch.linspace(0, inputs.shape[2] - 1,
                                   cfg.DATA.NUM_FRAMES).long()
            fast_pathway = torch.index_select(inputs, 2, index)
            logger.info('fast_pathway.shape={}'.format(fast_pathway.shape))
            # Sample frames for the slow pathway.
            index = torch.linspace(0, fast_pathway.shape[2] - 1,
                                   fast_pathway.shape[2] //
                                   cfg.SLOWFAST.ALPHA).long()
            slow_pathway = torch.index_select(fast_pathway, 2, index)
            logger.info('slow_pathway.shape={}'.format(slow_pathway.shape))
            inputs = [slow_pathway, fast_pathway]
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Perform the forward pass.
            preds = model(inputs)
            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds = du.all_gather(preds)[0]

            ## Option 1: single label inference selected from the highest probability entry.
            # label_id = preds.argmax(-1).cpu()
            # pred_label = labels[label_id]
            # Option 2: multi-label inferencing selected from probability entries > threshold
            label_ids = torch.nonzero(
                preds.squeeze() > .1).reshape(-1).cpu().detach().numpy()
            pred_labels = labels[label_ids]
            logger.info(pred_labels)
            if not list(pred_labels):
                pred_labels = ['Unknown']

            # remove the oldest frame in the buffer to make place for the new one.
            # frames.pop(0)
            frames = []
            s = time() - start

        # #************************************************************
        # # Option 1
        # #************************************************************
        # # Display prediction speed to frame
        # cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (20, 30),
        #             fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        #             fontScale=1, color=(0, 235, 0), thickness=3)
        # # Display predicted label to frame.
        # cv2.putText(frame, 'Action: {}'.format(pred_label), (20, 60),
        #             fontFace=cv2.FONT_HERSHEY_SIMPLEX,
        #             fontScale=1, color=(0, 235, 0), thickness=3)
        #************************************************************
        # Option 2
        #************************************************************
        # Display prediction speed to frame
        cv2.putText(frame,
                    'Speed: {:.2f}s'.format(s), (20, 30),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1,
                    color=(0, 235, 0),
                    thickness=3)
        # Display predicted labels to frame.
        y_offset = 60
        cv2.putText(frame,
                    'Action:', (20, y_offset),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1,
                    color=(0, 235, 0),
                    thickness=3)
        for pred_label in pred_labels:
            y_offset += 30
            cv2.putText(frame,
                        '{}'.format(pred_label), (20, y_offset),
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=1,
                        color=(0, 235, 0),
                        thickness=3)

        # Display the frame
        cv2.imshow('SlowFast', frame)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break

    img_provider.clean()
Exemplo n.º 23
0
def test(cfg):
    """
    Perform multi-view testing/feature extraction on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    vid_root = cfg.DATA.PATH_TO_DATA_DIR
    videos_list_file = os.path.join(vid_root, "vid_list.csv")

    print("Loading Video List ...")
    with open(videos_list_file) as f:
        videos = sorted(
            [x.strip() for x in f.readlines() if len(x.strip()) > 0])
    print("Done")
    print("----------------------------------------------------------")

    print("{} videos to be processed...".format(len(videos)))
    print("----------------------------------------------------------")

    start_time = time.time()
    for vid in videos:
        # Create video testing loaders.
        path_to_vid = os.path.join(vid_root, os.path.split(vid)[0])
        vid_id = os.path.split(vid)[1]

        out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0])
        out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES)
        if os.path.exists(os.path.join(out_path, out_file)):
            print("{} already exists".format(out_file))
            continue

        print("Processing {}...".format(vid))

        dataset = VideoSet(cfg, path_to_vid, vid_id)
        test_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=cfg.TEST.BATCH_SIZE,
            shuffle=False,
            sampler=None,
            num_workers=cfg.DATA_LOADER.NUM_WORKERS,
            pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
            drop_last=False,
        )

        # Perform multi-view test on the entire dataset.
        feat_arr = multi_view_test(test_loader, model, cfg)

        os.makedirs(out_path, exist_ok=True)
        np.save(os.path.join(out_path, out_file), feat_arr)
        print("Done.")
        print("----------------------------------------------------------")
    end_time = time.time()
    hours, minutes, seconds = calculate_time_taken(start_time, end_time)
    print("Time taken: {} hour(s), {} minute(s) and {} second(s)".format(
        hours, minutes, seconds))
    print("----------------------------------------------------------")
Exemplo n.º 24
0
def feature_extract(kwargs):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    cfg = kwargs.pop('cfg')
    path_to_video_list = kwargs.pop('path_to_video_list')
    path_to_video_dir = kwargs.pop('path_to_video_dir')
    path_to_feat_dir = kwargs.pop('path_to_feat_dir')
    num_features = kwargs.pop('num_features')

    if not os.path.isdir(path_to_feat_dir):
        os.makedirs(path_to_feat_dir)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Extract feature with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg, feature_extraction=True)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Extracting features with random initialization. Only for debugging.")

    feature_extract_fn = perform_bbox_feature_extract if cfg.DETECTION.ENABLE else perform_feature_extract

    # Create video feature extraction loaders.
    if osp.isfile(path_to_video_list):
        path_to_videos = []
        with open(path_to_video_list, 'r') as f:
            video_list = json.load(f)

            for video_name in video_list:
                path_to_video = osp.join(path_to_video_dir, video_name)
                if osp.isfile(path_to_video):
                    path_to_videos.append(path_to_video)

    else:
        path_to_videos = glob.glob(osp.join(path_to_video_dir, '*'))

    for video_idx, path_to_video in enumerate(tqdm(path_to_videos)):
        path_to_feature = osp.join(path_to_feat_dir, osp.splitext(osp.basename(path_to_video))[0] + '.json')
        if osp.isfile(path_to_feature) and json.load(open(path_to_feature))['num_features'] == num_features:
            continue

        video_extraction_loader = loader.construct_loader(cfg, path_to_video)

        video_data = {
            'num_features': 0,
            'video_features': {}
        }

        if len(video_extraction_loader) > 0:
            video_features = feature_extract_fn(video_extraction_loader, model, cfg)
            assert all([feature is not None for feature in video_features]), 'Missing some features !'
            video_data['num_features'] = len(video_features)
            video_data['video_features'] = video_features

        if video_data['num_features'] != num_features:
            print('Warning! Video %s has %d features.' % (path_to_video, video_data['num_features']))

        with open(path_to_feature, 'w') as f:
            json.dump(video_data, f)
Exemplo n.º 25
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                              cfg.NUM_GPUS > 1, optimizer)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg)

        # Compute precise BN stats.
        if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
            calculate_and_update_precise_bn(train_loader, model,
                                            cfg.BN.NUM_BATCHES_PRECISE)

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
Exemplo n.º 26
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()
    if du.is_master_proc():
        if cfg.ENABLE_WANDB:
            wandb.login()
            wandb.init(project='bbox', entity='slowfast')

    # Print config.
    # logger.info("Train with config:")
    # logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)

    if cfg.EPICKITCHENS.USE_BBOX and not cu.has_checkpoint(cfg.OUTPUT_DIR):
        slow_fast_model = SlowFast(cfg)
        if cfg.EPICKITCHENS.USE_BBOX and cfg.EPICKITCHENS.LOAD_SLOWFAST_PRETRAIN:
            if cfg.EPICKITCHENS.SLOWFAST_PRETRAIN_CHECKPOINT_FILE_PATH != "":
                _ = cu.load_checkpoint(
                    cfg.EPICKITCHENS.SLOWFAST_PRETRAIN_CHECKPOINT_FILE_PATH,
                    slow_fast_model,
                    False,
                    optimizer=None,
                    inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
                    convert_from_caffe2=False,
                )
        # cfg.TRAIN.CHECKPOINT_TYPE == "caffe2"
        logger.info("Load from slowfast.")
        if cfg.NUM_GPUS > 1:
            model.module.load_weight_slowfast(slow_fast_model)
        else:
            model.load_weight_slowfast(slow_fast_model)
    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=True)

    if cfg.BN.FREEZE:
        model.freeze_fn('bn_parameters')
    if cfg.EPICKITCHENS.USE_BBOX and cfg.EPICKITCHENS.FREEZE_BACKBONE:
        model.freeze_fn('slowfast_bbox')

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                              cfg.NUM_GPUS > 1, optimizer)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and not cfg.TRAIN.FINETUNE:
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and cfg.TRAIN.FINETUNE:
        logger.info("Load from given checkpoint file. Finetuning.")
        _ = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = 0
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    if cfg.TRAIN.DATASET != 'epickitchens' or not cfg.EPICKITCHENS.TRAIN_PLUS_VAL:
        train_loader = loader.construct_loader(cfg, "train")
        val_loader = loader.construct_loader(cfg, "val")
        logger.info("Train loader size: {}".format(len(train_loader)))
        logger.info("Val loader size: {}".format(len(val_loader)))
    else:
        train_loader = loader.construct_loader(cfg, "train+val")
        val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        if cfg.TRAIN.DATASET == 'epickitchens':
            train_meter = EPICTrainMeterSimple(len(train_loader), cfg)
            val_meter = EPICValMeterSimple(len(val_loader), cfg)
        else:
            train_meter = TrainMeter(len(train_loader), cfg)
            val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    cnt = 0
    # eval_epoch(val_loader, model, val_meter, 0, cfg, cnt)
    # test_from_train(model, cfg, cnt=cnt)
    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        cnt = train_epoch(train_loader, model, optimizer, train_meter,
                          cur_epoch, cfg, cnt)

        # Compute precise BN stats.
        if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
            calculate_and_update_precise_bn(train_loader, model,
                                            cfg.BN.NUM_BATCHES_PRECISE)

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            is_best_epoch = eval_epoch(val_loader, model, val_meter, cur_epoch,
                                       cfg, cnt)
            if is_best_epoch:
                cu.save_checkpoint(cfg.OUTPUT_DIR,
                                   model,
                                   optimizer,
                                   cur_epoch,
                                   cfg,
                                   is_best_epoch=is_best_epoch)
Exemplo n.º 27
0
                # Save gif
                if (i+1) % 5 == 0:
                    layer_path = self.dir_path + 'layer/'
                    if not os.path.exists(layer_path):
                        os.mkdir(layer_path)

                    path = layer_path + str(self.model_name) + '_iter' + str(i) + '_path' + str(j) + '_' + \
                           str(self.layer) + '_f' + str(self.filter) + '_lr' + str(self.initial_learning_rate) + "_wd" \
                           + str(self.weight_decay)
                    save_gif(created_video, path, stream_type="rgb")


if __name__ == '__main__':

    args = parse_args()
    cfg = load_config(args)
    # Construct the model
    model = build_model(cfg)
    load_checkpoint(checkpoint_path, model, data_parallel=False, optimizer=None, inflation=False, convert_from_caffe2=True)
    cnn_layer = "s2.pathway0_res0.branch1"     # "conv3d_0c_1x1.conv3d"
    filter_pos = 0

    device = torch.device('cuda:0')
    model = model.to(device)

    layer_vis = CNNLayerVisualization(model, cnn_layer, filter_pos, device)

    # Layer visualization with pytorch hooks
    layer_vis.visualise_layer_with_hooks()

Exemplo n.º 28
0
    'num_slots',  # Number of slots k,
    'num_blocks',  # Number of blochs in attention U-Net 
    'channel_base',  # Number of channels used for the first U-Net conv layer
    'bg_sigma',  # Sigma of the decoder distributions for the first slot
    'fg_sigma',  # Sigma of the decoder distributions for all other slots
]
MonetConfig = namedtuple('MonetConfig', config_options)
clevr_conf = MonetConfig(
    num_slots=cfg.MONET.NUM_SLOTS,
    num_blocks=6,
    channel_base=64,
    bg_sigma=0.09,
    fg_sigma=0.11,
)
model = Monet(clevr_conf, cfg.DATA.RESIZE_H, cfg.DATA.RESIZE_W)
cu.load_checkpoint(cfg.MONET.CHECKPOINT_LOAD, model, data_parallel=False)
if cfg.NUM_GPUS:
    cur_device = torch.cuda.current_device()
    model = model.cuda(device=cur_device)
model.eval()
"""
Train dataset
Directory organization:
video_train 
Sub_dirs:
  video_00000-01000  video_01000-02000  video_02000-03000  video_03000-04000  video_04000-05000  
  video_05000-06000  video_06000-07000  video_07000-08000  video_08000-09000  video_09000-10000
"""
sub_dirs = [
    "video_00000-01000", "video_01000-02000", "video_02000-03000",
    "video_03000-04000", "video_04000-05000", "video_05000-06000",
Exemplo n.º 29
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Init multigrid.
    multigrid = None
    if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE:
        multigrid = MultigridSchedule()
        cfg = multigrid.init_multigrid(cfg)
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0)
    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")
    precise_bn_loader = (loader.construct_loader(
        cfg, "train", is_precise_bn=True)
                         if cfg.BN.USE_PRECISE_STATS else None)

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch)
            if changed:
                (
                    model,
                    optimizer,
                    train_loader,
                    val_loader,
                    precise_bn_loader,
                    train_meter,
                    val_meter,
                ) = build_trainer(cfg)

                # Load checkpoint.
                if cu.has_checkpoint(cfg.OUTPUT_DIR):
                    last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
                    assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint
                else:
                    last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH
                logger.info("Load from {}".format(last_checkpoint))
                cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1,
                                   optimizer)

        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)

        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg, writer)

        is_checkp_epoch = (cu.is_checkpoint_epoch(
            cfg,
            cur_epoch,
            None if multigrid is None else multigrid.schedule,
        ))
        is_eval_epoch = misc.is_eval_epoch(
            cfg, cur_epoch, None if multigrid is None else multigrid.schedule)

        # Compute precise BN stats.
        if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS
                and len(get_bn_modules(model)) > 0):
            calculate_and_update_precise_bn(
                precise_bn_loader,
                model,
                min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)),
                cfg.NUM_GPUS > 0,
            )
        _ = misc.aggregate_sub_bn_stats(model)

        # Save a checkpoint.
        if is_checkp_epoch:
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if is_eval_epoch:
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer)

    if writer is not None:
        writer.close()
Exemplo n.º 30
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset) %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
        # Create meters for multi-view testing.
        if cfg.TEST.DATASET == 'epickitchens':
            test_meter = EPICTestMeter(
                len(test_loader.dataset) //
                (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )
        else:
            test_meter = TestMeter(
                len(test_loader.dataset) //
                (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
                cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
                cfg.MODEL.NUM_CLASSES,
                len(test_loader),
            )

    # # Perform multi-view test on the entire dataset.
    preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg)

    if du.is_master_proc():
        if cfg.TEST.DATASET == 'epickitchens':
            results = {
                'scores': {
                    'verb': preds[0],
                    'noun': preds[1]
                },
                'labels': {
                    'verb': labels[0],
                    'noun': labels[1]
                },
                'narration_id': metadata
            }
            scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores')
            if not os.path.exists(scores_path):
                os.makedirs(scores_path)
            file_path = os.path.join(scores_path,
                                     cfg.EPICKITCHENS.TEST_SPLIT + '.pkl')
            pickle.dump(results, open(file_path, 'wb'))