예제 #1
0
def get_train_dataset(cfg, root_dataset_path, train_dataset_name,
                      eval_binary_path):
    # We only create the train dataset if we need PCA or whitening training.
    # Otherwise not.
    if cfg.IMG_RETRIEVAL.SHOULD_TRAIN_PCA_OR_WHITENING:
        train_data_path = f"{root_dataset_path}/{train_dataset_name}"
        assert PathManager.exists(
            train_data_path), f"Unknown path: {train_data_path}"

        num_samples = 10 if cfg.IMG_RETRIEVAL.DEBUG_MODE else None

        if is_revisited_dataset(train_dataset_name):
            train_dataset = RevisitedInstanceRetrievalDataset(
                train_dataset_name, root_dataset_path)
        elif is_whiten_dataset(train_dataset_name):
            train_dataset = WhiteningTrainingImageDataset(
                train_data_path,
                cfg.IMG_RETRIEVAL.WHITEN_IMG_LIST,
                num_samples=num_samples,
            )
        else:
            train_dataset = InstanceRetrievalDataset(train_data_path,
                                                     eval_binary_path,
                                                     num_samples=num_samples)
    else:
        train_dataset = None
    return train_dataset
예제 #2
0
 def process_train_image(i, out_dir):
     if i % LOG_FREQUENCY == 0:
         logging.info(f"Train Image: {i}"),
     fname_out = f"{out_dir}/{i}.npy"
     if PathManager.exists(fname_out):
         feat = load_file(fname_out)
         train_features.append(feat)
     else:
         fname_in = train_dataset.get_filename(i)
         if is_revisited_dataset(train_dataset_name):
             img = image_helper.load_and_prepare_revisited_image(fname_in)
         elif is_whiten_dataset(train_dataset_name):
             img = image_helper.load_and_prepare_whitening_image(fname_in)
         else:
             img = image_helper.load_and_prepare_image(fname_in, roi=None)
         v = torch.autograd.Variable(img.unsqueeze(0))
         vc = v.cuda()
         # the model output is a list always.
         activation_map = model(vc)[0].cpu()
         # once we have the features,
         # we can perform: rmac | gem pooling | l2 norm
         if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
             descriptors = get_rmac_descriptors(activation_map,
                                                spatial_levels)
         else:
             descriptors = activation_map
         save_file(descriptors.data.numpy(), fname_out)
         train_features.append(descriptors.data.numpy())
예제 #3
0
def get_transforms(cfg, dataset_name):
    # Setup the data transforms (basic) that we apply on the train/eval dataset.
    if is_instre_dataset(dataset_name) or is_whiten_dataset(dataset_name):
        transforms = torchvision.transforms.Compose([
            MultigrainResize(int((256 / 224) * cfg.IMG_RETRIEVAL.RESIZE_IMG)),
            torchvision.transforms.CenterCrop(cfg.IMG_RETRIEVAL.RESIZE_IMG),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
        ])
    else:
        transforms = [
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
        ]

        if cfg.IMG_RETRIEVAL.CENTER_CROP:
            transforms = [
                torchvision.transforms.Resize(
                    int((256 / 224) * cfg.IMG_RETRIEVAL.RESIZE_IMG)),
                torchvision.transforms.CenterCrop(
                    cfg.IMG_RETRIEVAL.RESIZE_IMG),
            ] + transforms

        transforms = torchvision.transforms.Compose(transforms)

    return transforms
예제 #4
0
    def process_train_image(i, out_dir, verbose=False):
        if i % LOG_FREQUENCY == 0:
            logging.info(f"Train Image: {i}"),

        fname_out = None
        if out_dir:
            fname_out = f"{out_dir}/{i}.npy"

        if fname_out and PathManager.exists(fname_out):
            feat = load_file(fname_out)
            train_features.append(feat)
        else:
            fname_in = train_dataset.get_filename(i)
            if is_revisited_dataset(train_dataset_name):
                img = image_helper.load_and_prepare_revisited_image(fname_in,
                                                                    roi=None)
            elif is_whiten_dataset(train_dataset_name):
                img = image_helper.load_and_prepare_whitening_image(fname_in)
            else:
                img = image_helper.load_and_prepare_image(fname_in, roi=None)
            v = torch.autograd.Variable(img.unsqueeze(0))
            vc = v.cuda()
            # the model output is a list always.
            activation_map = model(vc)[0].cpu()

            if verbose:
                print(
                    f"Train Image raw activation map shape: { activation_map.shape }"
                )

            # once we have the features,
            # we can perform: rmac | gem pooling | l2 norm
            if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
                descriptors = get_rmac_descriptors(
                    activation_map,
                    spatial_levels,
                    normalize=cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES,
                )
            elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
                descriptors = gem(
                    activation_map,
                    p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
                    add_bias=True,
                )
            else:
                descriptors = activation_map

            # Optionally l2 normalize the features.
            if (cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES
                    and cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE != "rmac"):
                # RMAC performs normalization within the algorithm, hence we skip it here.
                descriptors = l2n(descriptors, dim=1)

            if fname_out:
                save_file(descriptors.data.numpy(), fname_out, verbose=False)
            train_features.append(descriptors.data.numpy())
예제 #5
0
def get_train_dataset(cfg, root_dataset_path, train_dataset_name, eval_binary_path):
    # We only create the train dataset if we need PCA or whitening training.
    # Otherwise not.
    if cfg.IMG_RETRIEVAL.TRAIN_PCA_WHITENING:
        train_data_path = f"{root_dataset_path}/{train_dataset_name}"

        assert PathManager.exists(train_data_path), f"Unknown path: {train_data_path}"

        num_samples = (
            None
            if cfg.IMG_RETRIEVAL.NUM_TRAINING_SAMPLES == -1
            else cfg.IMG_RETRIEVAL.NUM_TRAINING_SAMPLES
        )

        if is_revisited_dataset(train_dataset_name):
            train_dataset = RevisitedInstanceRetrievalDataset(
                train_dataset_name, root_dataset_path, num_samples=num_samples
            )
        elif is_whiten_dataset(train_dataset_name):
            train_dataset = WhiteningTrainingImageDataset(
                train_data_path,
                cfg.IMG_RETRIEVAL.WHITEN_IMG_LIST,
                num_samples=num_samples,
            )
        elif is_copdays_dataset(train_dataset_name):
            train_dataset = CopyDaysDataset(
                data_path=train_data_path,
                num_samples=num_samples,
                use_distractors=cfg.IMG_RETRIEVAL.USE_DISTRACTORS,
            )
        elif is_oxford_paris_dataset(train_dataset_name):
            train_dataset = InstanceRetrievalDataset(
                train_data_path, eval_binary_path, num_samples=num_samples
            )
        else:
            train_dataset = GenericInstanceRetrievalDataset(
                train_data_path, num_samples=num_samples
            )
    else:
        train_dataset = None
    return train_dataset
예제 #6
0
    def process_train_image(i, out_dir, verbose=False):
        if i % LOG_FREQUENCY == 0:
            logging.info(f"Train Image: {i}"),

        fname_out = None
        if out_dir:
            fname_out = f"{out_dir}/{i}.npy"

        if fname_out and g_pathmgr.exists(fname_out):
            feat = load_file(fname_out)
            train_features.append(feat)
        else:
            with PerfTimer("read_sample", PERF_STATS):
                fname_in = train_dataset.get_filename(i)
                if is_revisited_dataset(train_dataset_name):
                    img = image_helper.load_and_prepare_revisited_image(
                        fname_in, roi=None)
                elif is_whiten_dataset(train_dataset_name):
                    img = image_helper.load_and_prepare_whitening_image(
                        fname_in)
                else:
                    img = image_helper.load_and_prepare_image(fname_in,
                                                              roi=None)

            with PerfTimer("extract_features", PERF_STATS):
                img_scalings = cfg.IMG_RETRIEVAL.IMG_SCALINGS or [1]
                activation_maps = extract_activation_maps(
                    img, model, img_scalings)

            if verbose:
                print(
                    f"Example train Image raw activation map shape: { activation_maps[0].shape }"  # NOQA
                )

            with PerfTimer("post_process_features", PERF_STATS):
                # once we have the features,
                # we can perform: rmac | gem pooling | l2 norm
                if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "rmac":
                    descriptors = get_rmac_descriptors(
                        activation_maps[0],
                        spatial_levels,
                        normalize=cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES,
                    )
                elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
                    descriptors = get_average_gem(
                        activation_maps,
                        p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
                        add_bias=True,
                    )
                else:
                    descriptors = torch.mean(torch.stack(activation_maps),
                                             dim=0)
                    descriptors = descriptors.reshape(descriptors.shape[0], -1)

                # Optionally l2 normalize the features.
                if (cfg.IMG_RETRIEVAL.NORMALIZE_FEATURES
                        and cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE != "rmac"):
                    # RMAC performs normalization within the algorithm, hence we skip it here.
                    descriptors = l2n(descriptors, dim=1)

            if fname_out:
                save_file(descriptors.data.numpy(), fname_out, verbose=False)
            train_features.append(descriptors.data.numpy())