Exemplo n.º 1
0
def save_checkpoint(path_to_job, model, optimizer, epoch, cfg):
    """
    Save a checkpoint.
    Args:
        model (model): model to save the weight to the checkpoint.
        optimizer (optim): optimizer to save the historical state.
        epoch (int): current number of epoch of the model.
        cfg (CfgNode): configs to save.
    """
    # Save checkpoints only from the master process.
    if not du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS):
        return
    # Ensure that the checkpoint dir exists.
    g_pathmgr.mkdirs(get_checkpoint_dir(path_to_job))
    # Omit the DDP wrapper in the multi-gpu setting.
    sd = model.module.state_dict() if cfg.NUM_GPUS > 1 else model.state_dict()
    normalized_sd = sub_to_normal_bn(sd)

    # Record the state.
    checkpoint = {
        "epoch": epoch,
        "model_state": normalized_sd,
        "optimizer_state": optimizer.state_dict(),
        "cfg": cfg.dump(),
    }
    # Write the checkpoint.
    path_to_checkpoint = get_path_to_checkpoint(path_to_job, epoch + 1)
    with g_pathmgr.open(path_to_checkpoint, "wb") as f:
        torch.save(checkpoint, f)
    return path_to_checkpoint
Exemplo n.º 2
0
Arquivo: io.py Projeto: zlapp/vissl
def makedir(dir_path):
    """
    Create the directory if it does not exist.
    """
    is_success = False
    try:
        if not g_pathmgr.exists(dir_path):
            g_pathmgr.mkdirs(dir_path)
        is_success = True
    except BaseException:
        logging.info(f"Error creating directory: {dir_path}")
    return is_success
Exemplo n.º 3
0
def make_checkpoint_dir(path_to_job):
    """
    Creates the checkpoint directory (if not present already).
    Args:
        path_to_job (string): the path to the folder of the current job.
    """
    checkpoint_dir = os.path.join(path_to_job, "checkpoints")
    # Create the checkpoint dir from the master process
    if du.is_master_proc() and not g_pathmgr.exists(checkpoint_dir):
        try:
            g_pathmgr.mkdirs(checkpoint_dir)
        except Exception:
            pass
    return checkpoint_dir
Exemplo n.º 4
0
def load_cfg_fom_args(description="Config options."):
    """Load config from command line args and set any specified options."""
    current_time = datetime.now().strftime("%y%m%d_%H%M%S")
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("--cfg",
                        dest="cfg_file",
                        type=str,
                        required=True,
                        help="Config file location")
    parser.add_argument("opts",
                        default=None,
                        nargs=argparse.REMAINDER,
                        help="See conf.py for all options")
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args()

    merge_from_file(args.cfg_file)
    cfg.merge_from_list(args.opts)

    log_dest = os.path.basename(args.cfg_file)
    log_dest = log_dest.replace('.yaml', '_{}.txt'.format(current_time))

    g_pathmgr.mkdirs(cfg.SAVE_DIR)
    cfg.LOG_TIME, cfg.LOG_DEST = current_time, log_dest
    cfg.freeze()

    logging.basicConfig(
        level=logging.INFO,
        format="[%(asctime)s] [%(filename)s: %(lineno)4d]: %(message)s",
        datefmt="%y/%m/%d %H:%M:%S",
        handlers=[
            logging.FileHandler(os.path.join(cfg.SAVE_DIR, cfg.LOG_DEST)),
            logging.StreamHandler()
        ])

    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    random.seed(cfg.RNG_SEED)
    torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK

    logger = logging.getLogger(__name__)
    version = [
        torch.__version__, torch.version.cuda,
        torch.backends.cudnn.version()
    ]
    logger.info(
        "PyTorch Version: torch={}, cuda={}, cudnn={}".format(*version))
    logger.info(cfg)
Exemplo n.º 5
0
def setup_seed(rank):
    """Sets up environment for training or testing."""
    if rank == 0:
        g_pathmgr.mkdirs(cfg.OUT_DIR)
        config.dump_cfg()

    if cfg.RNG_SEED:
        np.random.seed(cfg.RNG_SEED + rank)
        torch.manual_seed(cfg.RNG_SEED + rank)
        random.seed(cfg.RNG_SEED + rank)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
    else:
        torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK
        torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
Exemplo n.º 6
0
def create_sun397_disk_filelist_dataset(input_path: str, output_path: str,
                                        seed: int):
    """
    Create partitions "train", "trainval", "val", "test" from the input path of SUN397
    by allocating 70% of labels to "train", 10% to "val" and 20% to "test".
    """
    random.seed(seed)
    g_pathmgr.mkdirs(output_path)

    # List all the available classes in SUN397 and their path
    image_folder = os.path.join(input_path, "SUN397")
    class_names_file = os.path.join(image_folder, "ClassName.txt")
    class_paths = []
    with open(class_names_file, "r") as f:
        for line in f:
            path = line.strip()
            if path.startswith("/"):
                path = path[1:]
            class_paths.append(path)

    # For each label, split the samples in train/val/test and add them
    # to the list of samples associated to each split
    splits_data = {
        "train": SplitData(),
        "val": SplitData(),
        "test": SplitData(),
        "trainval": SplitData(),
    }
    for i, class_path in tqdm(enumerate(class_paths), total=len(class_paths)):
        full_class_path = os.path.join(image_folder, class_path)
        image_names = os.listdir(full_class_path)
        splits = split_sample_list(image_names)
        for split, images in splits.items():
            for image_name in images:
                image_path = os.path.join(full_class_path, image_name)
                splits_data[split].image_paths.append(image_path)
                splits_data[split].image_labels.append(i)

    # Save each split
    for split, samples in splits_data.items():
        image_output_path = os.path.join(output_path, f"{split}_images.npy")
        with g_pathmgr.open(image_output_path, mode="wb") as f:
            np.save(f, np.array(samples.image_paths))
        label_output_path = os.path.join(output_path, f"{split}_labels.npy")
        with g_pathmgr.open(label_output_path, mode="wb") as f:
            np.save(f, np.array(samples.image_labels))
Exemplo n.º 7
0
def cache_url(url_or_file, cache_dir, base_url=_PYCLS_BASE_URL):
    """Download the file specified by the URL to the cache_dir and return the path to
    the cached file. If the argument is not a URL, simply return it as is.
    """
    is_url = re.match(r"^(?:http)s?://", url_or_file,
                      re.IGNORECASE) is not None
    if not is_url:
        return url_or_file
    url = url_or_file
    assert url.startswith(base_url), "url must start with: {}".format(base_url)
    cache_file_path = url.replace(base_url, cache_dir)
    if g_pathmgr.exists(cache_file_path):
        return cache_file_path
    cache_file_dir = os.path.dirname(cache_file_path)
    if not g_pathmgr.exists(cache_file_dir):
        g_pathmgr.mkdirs(cache_file_dir)
    logger.info("Downloading remote file {} to {}".format(
        url, cache_file_path))
    download_url(url, cache_file_path)
    return cache_file_path
Exemplo n.º 8
0
def setup_env():
    """Sets up environment for training or testing."""
    if dist.is_master_proc():
        # Ensure that the output dir exists
        g_pathmgr.mkdirs(cfg.OUT_DIR)
        # Save the config
        config.dump_cfg()
    # Setup logging
    logging.setup_logging()
    # Log torch, cuda, and cudnn versions
    version = [torch.__version__, torch.version.cuda, torch.backends.cudnn.version()]
    logger.info("PyTorch Version: torch={}, cuda={}, cudnn={}".format(*version))
    # Log the config as both human readable and as a json
    logger.info("Config:\n{}".format(cfg)) if cfg.VERBOSE else ()
    logger.info(logging.dump_log_data(cfg, "cfg", None))
    # Fix the RNG seeds (see RNG comment in core/config.py for discussion)
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    random.seed(cfg.RNG_SEED)
    # Configure the CUDNN backend
    torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK
Exemplo n.º 9
0
def save_checkpoint(model, optimizer, epoch, best_acc1, best):
    """Saves a checkpoint."""
    # Save checkpoints only from the master process
    if torch.distributed.get_rank() != 0:
        return
    # Ensure that the checkpoint dir exists
    g_pathmgr.mkdirs(get_checkpoint_dir())
    # Record the state
    checkpoint = {
        "epoch": epoch,
        "state_dict": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "best_acc1": best_acc1,
    }
    # Write the checkpoint
    checkpoint_file = get_checkpoint(epoch + 1)
    torch.save(checkpoint, checkpoint_file)
    # If best copy checkpoint to the best checkpoint
    if best:
        shutil.copyfile(checkpoint_file, os.path.join(cfg.OUT_DIR, "best.pth.tar"))
    return checkpoint_file
Exemplo n.º 10
0
def adapt_train_database_extract_config(config, checkpoint_folder):
    config.DATA.TRAIN.DATA_SOURCES = []
    config.DATA.TRAIN.DATA_PATHS = []
    config.DATA.TRAIN.DATA_LIMIT = -1

    if config.IMG_RETRIEVAL.TRAIN_PCA_WHITENING:
        config.DATA.TRAIN.DATA_SOURCES = ["disk_filelist"]
        config.DATA.TRAIN.DATA_PATHS = [
            f"{config.IMG_RETRIEVAL.DATASET_PATH}/{config.IMG_RETRIEVAL.TRAIN_DATASET_NAME}/train_images.npy"  # NOQA
        ]

    config.DATA.TEST.DATA_SOURCES = ["disk_filelist"]
    if config.IMG_RETRIEVAL.USE_DISTRACTORS:
        config.DATA.TEST.DATA_PATHS = [
            f"{config.IMG_RETRIEVAL.DATASET_PATH}/{config.IMG_RETRIEVAL.EVAL_DATASET_NAME}/database_with_distractors_images.npy"  # NOQA
        ]
    else:
        config.DATA.TEST.DATA_PATHS = [
            f"{config.IMG_RETRIEVAL.DATASET_PATH}/{config.IMG_RETRIEVAL.EVAL_DATASET_NAME}/database_images.npy"  # NOQA
        ]

    output_dir = os.path.join(checkpoint_folder, "train_database")
    g_pathmgr.mkdirs(output_dir)
    config.EXTRACT_FEATURES.OUTPUT_DIR = output_dir

    if config.IMG_RETRIEVAL.DEBUG_MODE:
        config.DATA.TRAIN.DATA_LIMIT = 10
        config.DATA.TEST.DATA_LIMIT = 50

    # Images are all of different sizes.
    config.DATA.TRAIN.BATCHSIZE_PER_REPLICA = 1
    config.DATA.TEST.BATCHSIZE_PER_REPLICA = 1

    config.DATA.TRAIN.TRANSFORMS = get_extract_features_transforms(config)
    config.DATA.TEST.TRANSFORMS = get_extract_features_transforms(config)

    return config
Exemplo n.º 11
0
def adapt_query_extract_config(config, checkpoint_folder):
    config.DATA.TRAIN.DATA_SOURCES = []
    config.DATA.TRAIN.DATA_PATHS = []
    config.DATA.TRAIN.DATASET_NAMES = []
    config.DATA.TRAIN.DATA_LIMIT = 0

    config.DATA.TEST.DATA_SOURCES = ["disk_filelist"]
    config.DATA.TEST.DATA_PATHS = [
        f"{config.IMG_RETRIEVAL.DATASET_PATH}/{config.IMG_RETRIEVAL.EVAL_DATASET_NAME}/query_images.npy"  # NOQA
    ]

    output_dir = os.path.join(checkpoint_folder, "query")
    g_pathmgr.mkdirs(output_dir)
    config.EXTRACT_FEATURES.OUTPUT_DIR = output_dir

    if config.IMG_RETRIEVAL.DEBUG_MODE:
        config.DATA.TEST.DATA_LIMIT = 10

    # Images are all of different sizes.
    config.DATA.TEST.BATCHSIZE_PER_REPLICA = 1

    config.DATA.TEST.TRANSFORMS = get_extract_features_transforms(config)

    return config
Exemplo n.º 12
0
def save_checkpoint(model, optimizer, epoch, best):
    """Saves a checkpoint."""
    # Save checkpoints only from the master process
    if not dist.is_master_proc():
        return
    # Ensure that the checkpoint dir exists
    g_pathmgr.mkdirs(get_checkpoint_dir())
    # Record the state
    checkpoint = {
        "epoch": epoch,
        "model_state": unwrap_model(model).state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "cfg": cfg.dump(),
    }
    # Write the checkpoint
    checkpoint_file = get_checkpoint(epoch + 1)
    with g_pathmgr.open(checkpoint_file, "wb") as f:
        torch.save(checkpoint, f)
    # If best copy checkpoint to the best checkpoint
    if best:
        with g_pathmgr.open(checkpoint_file, "rb") as src:
            with g_pathmgr.open(get_checkpoint_best(), "wb") as dst:
                copyfileobj(src, dst)
    return checkpoint_file
Exemplo n.º 13
0
 def mkdirs(path: str) -> None:
     if IOPathManager:
         return IOPathManager.mkdirs(path)
     os.makedirs(path, exist_ok=True)
Exemplo n.º 14
0
 def _get_output_dir(self, cfg_out_dir):
     odir = f"{cfg_out_dir}/{self.layer}"
     g_pathmgr.mkdirs(odir)
     logging.info(f"Output directory for SVM results: {odir}")
     return odir
Exemplo n.º 15
0
def _create_dataset_split(cfg: AttrDict,
                          data_split: str,
                          features_dim: int,
                          kmeans,
                          pca: Optional[PCA] = None):
    """
    Scan the dataset split and create a new classification dataset out of it
    where each image is associated to the centroid the closest in feature space.
    """
    num_clusters = cfg.CLUSTERFIT.NUM_CLUSTERS
    data_name = cfg.CLUSTERFIT.FEATURES.DATASET_NAME
    layer_name = cfg.CLUSTERFIT.FEATURES.LAYER_NAME

    logging.info(
        f"Computing cluster label assignment for each sample in {data_split}..."
    )
    indices = []
    distances = []
    target_clusters = []
    shard_paths = ExtractedFeaturesLoader.get_shard_file_names(
        input_dir=cfg.CLUSTERFIT.FEATURES.PATH,
        split=data_split.lower(),
        layer=cfg.CLUSTERFIT.FEATURES.LAYER_NAME,
    )
    for shard_path in shard_paths:
        shard_content = ExtractedFeaturesLoader.load_feature_shard(shard_path)
        shard_features = shard_content.features

        # TODO - factorize this with above??? normalization at least???
        # Reshape and normalize the loaded features
        shard_features = shard_features.reshape(shard_features.shape[0], -1)
        shard_features_norm = np.linalg.norm(shard_features, axis=1) + 1e-5
        shard_features = shard_features / shard_features_norm[:, np.newaxis]

        if pca is not None:
            shard_features = pca.transform(shard_features)
            shard_features = np.ascontiguousarray(shard_features)
        shard_distances, shard_cluster_labels = kmeans.index.search(
            shard_features, 1)
        indices.extend(shard_content.indices)
        distances.extend(shard_distances)
        target_clusters.extend(shard_cluster_labels)

    # Step 5: save clustering data and hard cluster labels for the images
    logging.info("Saving centroids and cluster assignments to file...")
    dataset_image_paths = get_image_paths(cfg, split=data_split)
    image_paths = [dataset_image_paths[i] for i in indices]
    data_split = data_split.lower()
    clustering_output_dict = {
        "sample_indices": indices,
        "hard_labels": target_clusters,
        "centroids": kmeans.centroids,
        "distances": distances,
        "images": image_paths,
    }
    output_dir = cfg.CLUSTERFIT.OUTPUT_DIR
    g_pathmgr.mkdirs(output_dir)
    output_prefix = (
        f"{data_name}_{data_split}_{layer_name}_N{num_clusters}_D{features_dim}"
    )
    cluster_output_filepath = os.path.join(output_dir, f"{output_prefix}.pkl")
    labels_output_filepath = os.path.join(output_dir,
                                          f"{output_prefix}_labels.npy")
    image_path_filepath = os.path.join(output_dir,
                                       f"{output_prefix}_images.npy")
    out_images = np.array(image_paths)
    out_hard_labels = np.array(target_clusters, dtype=np.int64).reshape(-1)
    save_file(clustering_output_dict, cluster_output_filepath)
    save_file(out_images, image_path_filepath)
    save_file(out_hard_labels, labels_output_filepath)