def custom_2head_dataloader(config): '''my custom dataloader to load custom images/data for unsupervisde clustering''' greyscale = True #for my custom data train_data_path = os.path.join(config.dataset_root, "train") test_val_data_path = os.path.join(config.dataset_root, "none") test_data_path = os.path.join(config.dataset_root, "none") assert (config.batchnorm_track) # recommended (for test time invariance to batch size) # Transforms: if greyscale: tf1, tf2, tf3 = greyscale_make_transforms(config) else: tf1, tf2, tf3 = sobel_make_transforms(config)
def cluster_twohead_create_dataloaders(config): assert (config.mode == "IID") assert (config.twohead) target_transform = None if "CIFAR" in config.dataset: config.train_partitions_head_A = [True, False] config.train_partitions_head_B = config.train_partitions_head_A config.mapping_assignment_partitions = [True, False] config.mapping_test_partitions = [True, False] if config.dataset == "CIFAR10": dataset_class = torchvision.datasets.CIFAR10 elif config.dataset == "CIFAR100": dataset_class = torchvision.datasets.CIFAR100 elif config.dataset == "CIFAR20": dataset_class = torchvision.datasets.CIFAR100 target_transform = _cifar100_to_cifar20 else: assert (False) # datasets produce either 2 or 5 channel images based on config.include_rgb tf1, tf2, tf3 = sobel_make_transforms(config) elif config.dataset == "STL10": assert (config.mix_train) if not config.stl_leave_out_unlabelled: print("adding unlabelled data for STL10") config.train_partitions_head_A = ["train+unlabeled", "test"] else: print("not using unlabelled data for STL10") config.train_partitions_head_A = ["train", "test"] config.train_partitions_head_B = ["train", "test"] config.mapping_assignment_partitions = ["train", "test"] config.mapping_test_partitions = ["train", "test"] dataset_class = torchvision.datasets.STL10 # datasets produce either 2 or 5 channel images based on config.include_rgb tf1, tf2, tf3 = sobel_make_transforms(config) elif config.dataset == "MNIST": config.train_partitions_head_A = [True, False] config.train_partitions_head_B = config.train_partitions_head_A config.mapping_assignment_partitions = [True, False] config.mapping_test_partitions = [True, False] dataset_class = torchvision.datasets.MNIST tf1, tf2, tf3 = greyscale_make_transforms(config) else: assert (False) print("Making datasets with %s and %s" % (dataset_class, target_transform)) sys.stdout.flush() dataloaders_head_A = \ _create_dataloaders(config, dataset_class, tf1, tf2, partitions=config.train_partitions_head_A, target_transform=target_transform) dataloaders_head_B = \ _create_dataloaders(config, dataset_class, tf1, tf2, partitions=config.train_partitions_head_B, target_transform=target_transform) mapping_assignment_dataloader = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_assignment_partitions, target_transform=target_transform) mapping_test_dataloader = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_test_partitions, target_transform=target_transform) return dataloaders_head_A, dataloaders_head_B, \ mapping_assignment_dataloader, mapping_test_dataloader
def cluster_create_dataloaders(config): assert (config.mode == "IID+") assert (not config.twohead) target_transform = None # separate train/test sets if "CIFAR" in config.dataset: config.train_partitions = [True] config.mapping_assignment_partitions = [True] config.mapping_test_partitions = [False] if config.dataset == "CIFAR10": dataset_class = torchvision.datasets.CIFAR10 elif config.dataset == "CIFAR100": dataset_class = torchvision.datasets.CIFAR100 elif config.dataset == "CIFAR20": dataset_class = torchvision.datasets.CIFAR100 target_transform = _cifar100_to_cifar20 else: assert (False) # datasets produce either 2 or 5 channel images based on config.include_rgb tf1, tf2, tf3 = sobel_make_transforms(config) elif config.dataset == "STL10": config.train_partitions = ["train+unlabeled"] config.mapping_assignment_partitions = ["train"] config.mapping_test_partitions = ["test"] dataset_class = torchvision.datasets.STL10 # datasets produce either 2 or 5 channel images based on config.include_rgb tf1, tf2, tf3 = sobel_make_transforms(config) elif config.dataset == "MNIST": config.train_partitions = [True] config.mapping_assignment_partitions = [True] config.mapping_test_partitions = [False] dataset_class = torchvision.datasets.MNIST tf1, tf2, tf3 = greyscale_make_transforms(config) else: assert (False) print("Making datasets with %s and %s" % (dataset_class, target_transform)) sys.stdout.flush() dataloaders = \ _create_dataloaders(config, dataset_class, tf1, tf2, partitions=config.train_partitions, target_transform=target_transform) mapping_assignment_dataloader = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_assignment_partitions, target_transform=target_transform) mapping_test_dataloader = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_test_partitions, target_transform=target_transform) return dataloaders, mapping_assignment_dataloader, mapping_test_dataloader
def create_basic_clustering_dataloaders(config): """ My original data loading code is complex to cover all my experiments. Here is a simple version. Use it to replace cluster_twohead_create_dataloaders() in the scripts. This uses ImageFolder but you could use your own subclass of torch.utils.data.Dataset. (ImageFolder data is not shuffled so an ideally deterministic random sampler is needed.) :param config: Requires num_dataloaders and values used by *make_transforms(), e.g. crop size, input size etc. :return: Training and testing dataloaders """ # Change these according to your data: greyscale = False train_data_path = os.path.join(config.dataset_root, "train") test_val_data_path = os.path.join(config.dataset_root, "none") test_data_path = os.path.join(config.dataset_root, "none") assert (config.batchnorm_track ) # recommended (for test time invariance to batch size) # Transforms: if greyscale: tf1, tf2, tf3 = greyscale_make_transforms(config) else: tf1, tf2, tf3 = sobel_make_transforms(config) # Training data: # main output head (B), auxiliary overclustering head (A), same data for both dataloaders_head_B = [torch.utils.data.DataLoader( torchvision.datasets.ImageFolder(root=train_data_path, transform=tf1), batch_size=config.dataloader_batch_sz, shuffle=False, sampler=DeterministicRandomSampler(), num_workers=0, drop_last=False)] + \ [torch.utils.data.DataLoader( torchvision.datasets.ImageFolder(root=train_data_path, transform=tf2), batch_size=config.dataloader_batch_sz, shuffle=False, sampler=DeterministicRandomSampler(), num_workers=0, drop_last=False) for _ in range(config.num_dataloaders)] dataloaders_head_A = [torch.utils.data.DataLoader( torchvision.datasets.ImageFolder(root=train_data_path, transform=tf1), batch_size=config.dataloader_batch_sz, shuffle=False, sampler=DeterministicRandomSampler(), num_workers=0, drop_last=False)] + \ [torch.utils.data.DataLoader( torchvision.datasets.ImageFolder(root=train_data_path, transform=tf2), batch_size=config.dataloader_batch_sz, shuffle=False, sampler=DeterministicRandomSampler(), num_workers=0, drop_last=False) for _ in range(config.num_dataloaders)] # Testing data (labelled): mapping_assignment_dataloader, mapping_test_dataloader = None, None if os.path.exists(test_data_path): mapping_assignment_dataloader = torch.utils.data.DataLoader( torchvision.datasets.ImageFolder(test_val_data_path, transform=tf3), batch_size=config.batch_sz, shuffle=False, sampler=DeterministicRandomSampler(), num_workers=0, drop_last=False) mapping_test_dataloader = torch.utils.data.DataLoader( torchvision.datasets.ImageFolder(test_data_path, transform=tf3), batch_size=config.batch_sz, shuffle=False, sampler=DeterministicRandomSampler(), num_workers=0, drop_last=False) return dataloaders_head_A, dataloaders_head_B, \ mapping_assignment_dataloader, mapping_test_dataloader
def make_triplets_data(config): target_transform = None if "CIFAR" in config.dataset: config.train_partitions_head_A = [True, False] config.train_partitions_head_B = config.train_partitions_head_A config.mapping_assignment_partitions = [True, False] config.mapping_test_partitions = [True, False] if config.dataset == "CIFAR10": dataset_class = torchvision.datasets.CIFAR10 elif config.dataset == "CIFAR100": dataset_class = torchvision.datasets.CIFAR100 elif config.dataset == "CIFAR20": dataset_class = torchvision.datasets.CIFAR100 target_transform = _cifar100_to_cifar20 else: assert (False) # datasets produce either 2 or 5 channel images based on config.include_rgb tf1, tf2, tf3 = sobel_make_transforms(config) elif config.dataset == "STL10": assert (config.mix_train) if not config.stl_leave_out_unlabelled: print("adding unlabelled data for STL10") config.train_partitions_head_A = ["train+unlabeled", "test"] else: print("not using unlabelled data for STL10") config.train_partitions_head_A = ["train", "test"] config.train_partitions_head_B = ["train", "test"] config.mapping_assignment_partitions = ["train", "test"] config.mapping_test_partitions = ["train", "test"] dataset_class = torchvision.datasets.STL10 # datasets produce either 2 or 5 channel images based on config.include_rgb tf1, tf2, tf3 = sobel_make_transforms(config) elif config.dataset == "MNIST": config.train_partitions_head_A = [True, False] config.train_partitions_head_B = config.train_partitions_head_A config.mapping_assignment_partitions = [True, False] config.mapping_test_partitions = [True, False] dataset_class = torchvision.datasets.MNIST tf1, tf2, tf3 = greyscale_make_transforms(config) else: assert (False) dataloaders = \ _create_dataloaders(config, dataset_class, tf1, tf2, partitions=config.train_partitions_head_A, target_transform=target_transform) dataloader_original = dataloaders[0] dataloader_positive = dataloaders[1] shuffled_dataloaders = \ _create_dataloaders(config, dataset_class, tf1, tf2, partitions=config.train_partitions_head_A, target_transform=target_transform, shuffle=True) dataloader_negative = shuffled_dataloaders[0] # since this is fully unsupervised, assign dataloader = test dataloader dataloader_test = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_test_partitions, target_transform=target_transform) return dataloader_original, dataloader_positive, dataloader_negative, \ dataloader_test
assert ("MNIST" == config.dataset) dataset_class = torchvision.datasets.MNIST assert (config.train_partitions == [True]) assert (config.mapping_assignment_partitions == [True]) assert (config.mapping_test_partitions == [False]) # append to old results if not hasattr(config, "assign_set_szs_pc_acc") or given_config.rewrite: print("resetting config.assign_set_szs_pc_acc to empty") config.assign_set_szs_pc_acc = {} for pc in new_assign_set_szs_pc: print("doing %f" % pc) sysout.flush() tf1, tf2, tf3 = greyscale_make_transforms(config) mapping_assignment_dataloader = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_assignment_partitions, truncate=True, truncate_pc=pc) mapping_test_dataloader = \ _create_mapping_loader(config, dataset_class, tf3, partitions=config.mapping_test_partitions) print("num assign batches: %d" % len(mapping_assignment_dataloader)) num_imgs = len(mapping_assignment_dataloader.dataset) print("num imgs in assign dataset: %d" % num_imgs) # networks and optimisers