def do_camelyon16_WSI_level(): """ GlaS. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== username = getpass.getuser() if username == "brian": baseurl = "/media/brian/Seagate Backup Plus Drive/datasets/camelyon16" elif username == "sbelharb": baseurl = "/project/6004986/sbelharb/workspace/datasets/camelyon16" else: raise ValueError("username `{}` unknown .... [NOT OK]".format(username)) args = {"baseurl": baseurl, "folding": {"vl": 20}, # 80 % for train, 20% for validation. "dataset": "camelyon16", "fold_folder": "folds/camelyon16/WSI-level", "img_extension": "tif", "nbr_folds": 5, "nbr_splits": 2 # how many times to perform the k-folds over the available train samples. } split_valid_camelyon16_WSI_level(Dict2Obj(args))
def do_breakhis(): """ BreakHis. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== username = getpass.getuser() if username == "brian": baseurl = "/media/brian/Seagate Backup Plus Drive/datasets/" \ "Breast-Cancer-Histopathological-Database-BreakHis/mkfold" elif username == "sbelharb": baseurl = "/project/6004986/sbelharb/workspace/datasets/" \ "Breast-Cancer-Histopathological-Database-BreakHis/mkfold" else: raise ValueError("username `{}` unknown .... [NOT OK]".format(username)) args = {"baseurl": baseurl, "folding": {"vl": 20}, # 80% for train, 20% for validation. "dataset": "breakhis", "fold_folder": "folds/breakhis", "img_extension": "png", "nbr_folds": 5, "magnification": ["40X", "100X", "200X", "400X"], "nbr_splits": 2 # how many times to perform the k-folds over the available train samples. } split_valid_breakhis(Dict2Obj(args))
def do_Caltech_UCSD_Birds_200_2011(): """ Caltech-UCSD-Birds-200-2011. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== username = getpass.getuser() if username == "xxxx2020": baseurl = "xxxx2020/datasets/Caltech-UCSD-Birds-200-2011" elif username == "xxxx2020": baseurl = "xxxx2020/datasets/Caltech-UCSD-Birds-200-2011" else: raise ValueError( "Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the Caltech-UCSD-Birds-200-2011 dataset. See above for an example .... [NOT OK]" .format(username)) args = { "baseurl": baseurl, "folding": { "vl": 20 }, # 80 % for train, 20% for validation. "dataset": "Caltech-UCSD-Birds-200-2011", "fold_folder": "folds/Caltech-UCSD-Birds-200-2011", "img_extension": "bmp", "nbr_splits": 2, # how many times to perform the k-folds over the available train samples. "path_encoding": "folds/Caltech-UCSD-Birds-200-2011/encoding-origine.yaml", "nbr_classes": None # Keep only 5 random classes. If you want to use the entire dataset, set this to None. } args["nbr_folds"] = math.ceil(100. / args["folding"]["vl"]) split_valid_Caltech_UCSD_Birds_200_2011(Dict2Obj(args))
def do_glas(): """ GlaS. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== username = getpass.getuser() if username == "xxxx2020": baseurl = "xxxx2020/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)" elif username == "sbelharb": baseurl = "xxxx2020/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)" else: raise ValueError( "Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the Caltech-UCSD-Birds-200-2011 dataset. See above for an example .... [NOT OK]" .format(username)) args = { "baseurl": baseurl, "folding": { "vl": 20 }, # 80 % for train, 20% for validation. "dataset": "glas", "fold_folder": "folds/glas-test", "img_extension": "bmp", "nbr_splits": 2 # how many times to perform the k-folds over the available train samples. } args["nbr_folds"] = math.ceil(100. / args["folding"]["vl"]) split_valid_glas(Dict2Obj(args))
def do_glas(): """ GlaS. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== warnings.warn("You are accessing an anonymized part of the code. We are going to exit. Come here and fix this " "according to your setup. Issue: absolute path to GlaS dataset.") sys.exit(0) username = getpass.getuser() if username == "XXXXXXXXXX": baseurl = "/XXXXXXXXX/XXXXXXXXXX/XXXXXXXXXXXXXXX/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)" elif username == "XXXXXXXXXXXX": baseurl = "/XXXXXX/XXXXXXX/XXXXXXXXXXXXXXXXX/workspace/datasets/GlaS-2015/Warwick QU Dataset (Released 2016_07_08)" else: raise ValueError("Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the GlaS dataset. See above for an example .... [NOT OK]".format(username)) args = {"baseurl": baseurl, "folding": {"vl": 20}, # 80 % for train, 20% for validation. "dataset": "glas", "fold_folder": "folds/glas-test", "img_extension": "bmp", "nbr_splits": 2 # how many times to perform the k-folds over the available train samples. } args["nbr_folds"] = math.ceil(100. / args["folding"]["vl"]) split_valid_glas(Dict2Obj(args))
def do_bach_parta_2018(): """ BACH (PART A) 2018. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== username = getpass.getuser() if username == "brian": baseurl = "/media/brian/Seagate Backup Plus Drive/datasets/ICIAR-2018-BACH-Challenge" elif username == "sbelharb": baseurl = "/project/6004986/sbelharb/workspace/datasets/ICIAR-2018-BACH-Challenge" else: raise ValueError("username `{}` unknown .... [NOT OK]".format(username)) args = {"baseurl": baseurl, "test_portion": 0.5, # percentage of samples to take from test. The left over if for train; and it will # be divided into actual train, and validation sets. "folding": {"vl": 20}, # vl/100 % of train set will be used for validation, while the leftover ( # 100-vl)/100% will be used for actual training. "name_classes": {'Normal': 0, 'Benign': 1, 'InSitu': 2, 'Invasive': 3}, "dataset": "bc18bch", "fold_folder": "folds/bach-part-a-2018", "img_extension": "tif", "nbr_folds": 5, "nbr_splits": 2 # how many times to perform the k-folds over the available train samples. } create_k_folds_csv_bach_part_a(Dict2Obj(args))
from loader import csv_loader, PhotoDataset, default_collate, MyDataParallel from instantiators import instantiate_models, instantiate_optimizer, instantiate_train_loss, instantiate_eval_loss import torch import torch.nn as nn import reproducibility FACTOR_MUL_WORKERS = 2 # args.num_workers * this_factor. Useful when setting set_for_eval to False, batch size =1, # and we are in an evaluation mode (to go faster and coop with the lag between the CPU and GPU). DEBUG_MODE = False # Can be activated only for "Caltech-UCSD-Birds-200-2011" or "Oxford-flowers-102" # dataset to go fast. If True, we select only few samples for training, validation, and test. PLOT_STATS = False reproducibility.set_seed( None) # use the default seed. Copy the see into the os.environ("MYSEED") NBRGPUS = torch.cuda.device_count() ALLOW_MULTIGPUS = check_if_allow_multgpu_mode() def _init_fn(worker_id): """ Init. function for the worker in dataloader. :param worker_id: :return: """ pass # np.random.seed(int(os.environ["MYSEED"])) # random.seed(int(os.environ["MYSEED"]))
def do_Oxford_flowers_102(): """ Oxford-flowers-102. The train/valid/test sets are already provided. :return: """ # =============== # Reproducibility # =============== # =========================== reproducibility.set_seed() # =========================== username = getpass.getuser() if username == "xxxx2020": baseurl = "xxxxx2020/datasets/Oxford-flowers-102" elif username == "xxxx2020": baseurl = "xxxx2020/datasets/Oxford-flowers-102" else: raise ValueError( "Cause: anonymization of the code. username `{}` unknown. Set the absolute path to the Caltech-UCSD-Birds-200-2011 dataset. See above for an example .... [NOT OK]" .format(username)) args = { "baseurl": baseurl, "dataset": "Oxford-flowers-102", "fold_folder": "folds/Oxford-flowers-102", "img_extension": "jpg", "path_encoding": "folds/Oxford-flowers-102/encoding-origine.yaml" } # Convert masks into binary masks. create_bin_mask_Oxford_flowers_102(Dict2Obj(args)) reproducibility.set_seed() split_Oxford_flowers_102(Dict2Obj(args)) # Find min max size. def find_stats(argsx): """ :param argsx: :return: """ minh, maxh, minw, maxw = None, None, None, None baseurl = argsx.baseurl fin = find_files_pattern(join(baseurl, 'jpg'), '*.jpg') print("Computing stats from {} dataset ...".format(argsx.dataset)) for f in tqdm.tqdm(fin, ncols=80, total=len(fin)): w, h = Image.open(f, 'r').convert('RGB').size if minh is None: minh = h maxh = h minw = w maxw = w else: minh = min(minh, h) maxh = max(maxh, h) minw = min(minw, w) maxw = max(maxw, w) print('Stats {}:\n' 'min h: {} \n' 'max h: {} \n' 'min w: {} \n' 'max w: {} \n'.format(argsx.dataset, minh, maxh, minw, maxw)) find_stats(Dict2Obj(args))
from prologues import get_eval_dataset import torch import reproducibility import constants # and we are in an evaluation mode (to go faster and coop with the lag between # the CPU and GPU). DEBUG_MODE = False # Can be activated only for # "Caltech-UCSD-Birds-200-2011" or "Oxford-flowers-102" # dataset to go fast. If True, we select only few samples for training, # validation, and test. PLOT_STATS = False reproducibility.set_seed(None) # use the default seed. # Copy the see into the os.environ("MYSEED") NBRGPUS = torch.cuda.device_count() ALLOW_MULTIGPUS = check_if_allow_multgpu_mode() if __name__ == "__main__": # ============================================= # Parse the inputs and deal with the yaml file. # ============================================= parser = argparse.ArgumentParser() parser.add_argument("--yaml", type=str,