Пример #1
0
def do_camelyon16_WSI_level():
    """
    GlaS.

    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    username = getpass.getuser()
    if username == "brian":
        baseurl = "/media/brian/Seagate Backup Plus Drive/datasets/camelyon16"
    elif username == "sbelharb":
        baseurl = "/project/6004986/sbelharb/workspace/datasets/camelyon16"
    else:
        raise ValueError("username `{}` unknown .... [NOT OK]".format(username))

    args = {"baseurl": baseurl,
            "folding": {"vl": 20},  # 80 % for train, 20% for validation.
            "dataset": "camelyon16",
            "fold_folder": "folds/camelyon16/WSI-level",
            "img_extension": "tif",
            "nbr_folds": 5,
            "nbr_splits": 2  # how many times to perform the k-folds over the available train samples.
            }
    split_valid_camelyon16_WSI_level(Dict2Obj(args))
Пример #2
0
def do_breakhis():
    """
    BreakHis.
    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    username = getpass.getuser()
    if username == "brian":
        baseurl = "/media/brian/Seagate Backup Plus Drive/datasets/" \
                  "Breast-Cancer-Histopathological-Database-BreakHis/mkfold"
    elif username == "sbelharb":
        baseurl = "/project/6004986/sbelharb/workspace/datasets/" \
                  "Breast-Cancer-Histopathological-Database-BreakHis/mkfold"
    else:
        raise ValueError("username `{}` unknown .... [NOT OK]".format(username))

    args = {"baseurl": baseurl,
            "folding": {"vl": 20},  # 80% for train, 20% for validation.
            "dataset": "breakhis",
            "fold_folder": "folds/breakhis",
            "img_extension": "png",
            "nbr_folds": 5,
            "magnification": ["40X", "100X", "200X", "400X"],
            "nbr_splits": 2  # how many times to perform the k-folds over the available train samples.
            }
    split_valid_breakhis(Dict2Obj(args))
Пример #3
0
def do_Caltech_UCSD_Birds_200_2011():
    """
    Caltech-UCSD-Birds-200-2011.

    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    username = getpass.getuser()
    if username == "xxxx2020":
        baseurl = "xxxx2020/datasets/Caltech-UCSD-Birds-200-2011"
    elif username == "xxxx2020":
        baseurl = "xxxx2020/datasets/Caltech-UCSD-Birds-200-2011"
    else:
        raise ValueError(
            "Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the Caltech-UCSD-Birds-200-2011 dataset. See above for an example .... [NOT OK]"
            .format(username))

    args = {
        "baseurl": baseurl,
        "folding": {
            "vl": 20
        },  # 80 % for train, 20% for validation.
        "dataset": "Caltech-UCSD-Birds-200-2011",
        "fold_folder": "folds/Caltech-UCSD-Birds-200-2011",
        "img_extension": "bmp",
        "nbr_splits":
        2,  # how many times to perform the k-folds over the available train samples.
        "path_encoding":
        "folds/Caltech-UCSD-Birds-200-2011/encoding-origine.yaml",
        "nbr_classes":
        None  # Keep only 5 random classes. If you want to use the entire dataset, set this to None.
    }
    args["nbr_folds"] = math.ceil(100. / args["folding"]["vl"])
    split_valid_Caltech_UCSD_Birds_200_2011(Dict2Obj(args))
Пример #4
0
def do_glas():
    """
    GlaS.

    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    username = getpass.getuser()
    if username == "xxxx2020":
        baseurl = "xxxx2020/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)"
    elif username == "sbelharb":
        baseurl = "xxxx2020/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)"
    else:
        raise ValueError(
            "Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the Caltech-UCSD-Birds-200-2011 dataset. See above for an example .... [NOT OK]"
            .format(username))

    args = {
        "baseurl": baseurl,
        "folding": {
            "vl": 20
        },  # 80 % for train, 20% for validation.
        "dataset": "glas",
        "fold_folder": "folds/glas-test",
        "img_extension": "bmp",
        "nbr_splits":
        2  # how many times to perform the k-folds over the available train samples.
    }
    args["nbr_folds"] = math.ceil(100. / args["folding"]["vl"])
    split_valid_glas(Dict2Obj(args))
def do_glas():
    """
    GlaS.

    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    warnings.warn("You are accessing an anonymized part of the code. We are going to exit. Come here and fix this "
                  "according to your setup. Issue: absolute path to GlaS dataset.")
    sys.exit(0)

    username = getpass.getuser()
    if username == "XXXXXXXXXX":
        baseurl = "/XXXXXXXXX/XXXXXXXXXX/XXXXXXXXXXXXXXX/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)"
    elif username == "XXXXXXXXXXXX":
        baseurl = "/XXXXXX/XXXXXXX/XXXXXXXXXXXXXXXXX/workspace/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)"
    else:
        raise ValueError("Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the GlaS dataset. See above for an example .... [NOT OK]".format(username))

    args = {"baseurl": baseurl,
            "folding": {"vl": 20},  # 80 % for train, 20% for validation.
            "dataset": "glas",
            "fold_folder": "folds/glas-test",
            "img_extension": "bmp",
            "nbr_splits": 2  # how many times to perform the k-folds over the available train samples.
            }
    args["nbr_folds"] = math.ceil(100. / args["folding"]["vl"])
    split_valid_glas(Dict2Obj(args))
Пример #6
0
def do_bach_parta_2018():
    """
    BACH (PART A) 2018.
    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    username = getpass.getuser()
    if username == "brian":
        baseurl = "/media/brian/Seagate Backup Plus Drive/datasets/ICIAR-2018-BACH-Challenge"
    elif username == "sbelharb":
        baseurl = "/project/6004986/sbelharb/workspace/datasets/ICIAR-2018-BACH-Challenge"
    else:
        raise ValueError("username `{}` unknown .... [NOT OK]".format(username))

    args = {"baseurl": baseurl,
            "test_portion": 0.5,  # percentage of samples to take from test. The left over if for train; and it will
            # be divided into actual train, and validation sets.
            "folding": {"vl": 20},  # vl/100 % of train set will be used for validation, while the leftover (
            # 100-vl)/100% will be used for actual training.
            "name_classes": {'Normal': 0, 'Benign': 1, 'InSitu': 2, 'Invasive': 3},
            "dataset": "bc18bch",
            "fold_folder": "folds/bach-part-a-2018",
            "img_extension": "tif",
            "nbr_folds": 5,
            "nbr_splits": 2  # how many times to perform the k-folds over the available train samples.
            }
    create_k_folds_csv_bach_part_a(Dict2Obj(args))
Пример #7
0
from loader import csv_loader, PhotoDataset, default_collate, MyDataParallel

from instantiators import instantiate_models, instantiate_optimizer, instantiate_train_loss, instantiate_eval_loss

import torch
import torch.nn as nn

import reproducibility

FACTOR_MUL_WORKERS = 2  # args.num_workers * this_factor. Useful when setting set_for_eval to False, batch size =1,
# and we are in an evaluation mode (to go faster and coop with the lag between the CPU and GPU).
DEBUG_MODE = False  # Can be activated only for "Caltech-UCSD-Birds-200-2011" or "Oxford-flowers-102"
# dataset to go fast. If True, we select only few samples for training, validation, and test.
PLOT_STATS = False

reproducibility.set_seed(
    None)  # use the default seed. Copy the see into the os.environ("MYSEED")

NBRGPUS = torch.cuda.device_count()

ALLOW_MULTIGPUS = check_if_allow_multgpu_mode()


def _init_fn(worker_id):
    """
    Init. function for the worker in dataloader.
    :param worker_id:
    :return:
    """
    pass
    # np.random.seed(int(os.environ["MYSEED"]))
    # random.seed(int(os.environ["MYSEED"]))
Пример #8
0
def do_Oxford_flowers_102():
    """
    Oxford-flowers-102.
    The train/valid/test sets are already provided.

    :return:
    """
    # ===============
    # Reproducibility
    # ===============

    # ===========================

    reproducibility.set_seed()

    # ===========================

    username = getpass.getuser()
    if username == "xxxx2020":
        baseurl = "xxxxx2020/datasets/Oxford-flowers-102"
    elif username == "xxxx2020":
        baseurl = "xxxx2020/datasets/Oxford-flowers-102"
    else:
        raise ValueError(
            "Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the Caltech-UCSD-Birds-200-2011 dataset. See above for an example .... [NOT OK]"
            .format(username))

    args = {
        "baseurl": baseurl,
        "dataset": "Oxford-flowers-102",
        "fold_folder": "folds/Oxford-flowers-102",
        "img_extension": "jpg",
        "path_encoding": "folds/Oxford-flowers-102/encoding-origine.yaml"
    }
    # Convert masks into binary masks.
    create_bin_mask_Oxford_flowers_102(Dict2Obj(args))
    reproducibility.set_seed()
    split_Oxford_flowers_102(Dict2Obj(args))

    # Find min max size.
    def find_stats(argsx):
        """

        :param argsx:
        :return:
        """
        minh, maxh, minw, maxw = None, None, None, None
        baseurl = argsx.baseurl
        fin = find_files_pattern(join(baseurl, 'jpg'), '*.jpg')
        print("Computing stats from {} dataset ...".format(argsx.dataset))
        for f in tqdm.tqdm(fin, ncols=80, total=len(fin)):
            w, h = Image.open(f, 'r').convert('RGB').size
            if minh is None:
                minh = h
                maxh = h
                minw = w
                maxw = w
            else:
                minh = min(minh, h)
                maxh = max(maxh, h)
                minw = min(minw, w)
                maxw = max(maxw, w)

        print('Stats {}:\n'
              'min h: {} \n'
              'max h: {} \n'
              'min w: {} \n'
              'max w: {} \n'.format(argsx.dataset, minh, maxh, minw, maxw))

    find_stats(Dict2Obj(args))
Пример #9
0
from prologues import get_eval_dataset

import torch

import reproducibility
import constants

# and we are in an evaluation mode (to go faster and coop with the lag between
# the CPU and GPU).
DEBUG_MODE = False  # Can be activated only for
# "Caltech-UCSD-Birds-200-2011" or "Oxford-flowers-102"
# dataset to go fast. If True, we select only few samples for training,
# validation, and test.
PLOT_STATS = False

reproducibility.set_seed(None)  # use the default seed.
# Copy the see into the os.environ("MYSEED")

NBRGPUS = torch.cuda.device_count()

ALLOW_MULTIGPUS = check_if_allow_multgpu_mode()

if __name__ == "__main__":

    # =============================================
    # Parse the inputs and deal with the yaml file.
    # =============================================

    parser = argparse.ArgumentParser()
    parser.add_argument("--yaml",
                        type=str,