def prepare_data(args, train_eval_list, test_list, resize): # Data Uplaod print('\n[Phase 1] : Data Preparation') transform_train = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) # meanstd transformation transform_test = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) if (args.dataset == 'cifar10'): print("| Preparing CIFAR-10 dataset...") sys.stdout.write("| ") trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test) outputs = 10 inputs = 3 elif (args.dataset == 'cifar100'): print("| Preparing CIFAR-100 dataset...") sys.stdout.write("| ") trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=False, transform=transform_test) outputs = 100 inputs = 3 elif (args.dataset == 'fashion-mnist'): print("| Preparing Fashion-MNIST dataset...") sys.stdout.write("| ") if args.debug == True: train_eval_set = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, list_idx=train_eval_list, transform=transform_train) # only get subset of original dataset small_size = int(0.01 * len(train_eval_set)) drop_size = len(train_eval_set) - small_size train_eval_set, _ = torch.utils.data.random_split( train_eval_set, [small_size, drop_size]) # split train_eval_set into trainset and evalset train_size = int(0.8 * len(train_eval_set)) eval_size = len(train_eval_set) - train_size trainset, evalset = torch.utils.data.random_split( train_eval_set, [train_size, eval_size]) testset = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, list_idx=test_list, transform=transform_test) small_size = int(0.01 * len(testset)) drop_size = len(testset) - small_size testset, _ = torch.utils.data.random_split(testset, [small_size, drop_size]) outputs = 10 inputs = 1 else: train_eval_set = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, list_idx=train_eval_list, transform=transform_train) # split train_eval_set into trainset and evalset train_size = int(0.8 * len(train_eval_set)) eval_size = len(train_eval_set) - train_size trainset, evalset = torch.utils.data.random_split( train_eval_set, [train_size, eval_size]) testset = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, list_idx=test_list, transform=transform_test) outputs = 10 inputs = 1 return trainset, evalset, testset, inputs, outputs
transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) # meanstd transformation transform_test = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type trainset_org = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train) trainset_refactor = refactor_dataset_class.VGMMDataset( transform=transform_train) trainloader_org = torch.utils.data.DataLoader(trainset_org, batch_size=batch_size, shuffle=True, num_workers=4) trainloader_refactor = torch.utils.data.DataLoader(trainset_refactor, batch_size=batch_size, shuffle=False, num_workers=4) # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name import utils_parent X, y = utils_parent.load_mnist("fashion-mnist") type(y) y.shape
def prepare_data_for_normal_cv(args, train_eval_list, test_list, resize): # Data Uplaod print('\n[Phase 1] : Data Preparation') transform_train = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) # meanstd transformation transform_test = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) if (args.dataset == 'mnist'): print("| Preparing fashion MNIST dataset for random cv...") sys.stdout.write("| ") if args.debug == True: train_eval_set = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, index=train_eval_list, transform=transform_train, cluster=False) # only get subset of original dataset small_size = int(0.01 * len(train_eval_set)) drop_size = len(train_eval_set) - small_size train_eval_set, _ = torch.utils.data.random_split( train_eval_set, [small_size, drop_size]) # split train_eval_set into trainset and evalset train_size = int(0.8 * len(train_eval_set)) eval_size = len(train_eval_set) - train_size trainset, evalset = torch.utils.data.random_split( train_eval_set, [train_size, eval_size]) testset = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, index=test_list, transform=transform_test, cluster=False) small_size = int(0.01 * len(testset)) drop_size = len(testset) - small_size testset, _ = torch.utils.data.random_split(testset, [small_size, drop_size]) outputs = 10 inputs = 1 else: train_eval_set = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, index=train_eval_list, transform=transform_train, cluster=False) # split train_eval_set into trainset and evalset train_size = int(0.8 * len(train_eval_set)) eval_size = len(train_eval_set) - train_size trainset, evalset = torch.utils.data.random_split( train_eval_set, [train_size, eval_size]) testset = refactor_dataset_class.VGMMDataset( pattern=config_parent.global_index_name, root_dir="../" + config_parent.data_path, index=test_list, transform=transform_test, cluster=False) outputs = 10 inputs = 1 return trainset, evalset, testset, inputs, outputs
outputs = 10 inputs = 3 elif (args.dataset == 'cifar100'): print("| Preparing CIFAR-100 dataset...") sys.stdout.write("| ") trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=False, transform=transform_test) outputs = 100 inputs = 3 elif (args.dataset == 'mnist'): print("| Preparing MNIST dataset...") sys.stdout.write("| ") #trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train) trainset = refactor_dataset_class.VGMMDataset(transform = transform_train) #testset = torchvision.datasets.MNIST(root='./data', train=False, download=False, transform=transform_test) #testset = torchvision.datasets.MNIST(root='./data', train=False, download=False, transform=transform_test) testset = refactor_dataset_class.VGMMDataset(transform = transform_test) outputs = 10 # number of labels inputs = 1 # input channel trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4) # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name def getNetwork(args): if (args.net_type == 'lenet'): net = BBBLeNet(outputs,inputs) # inputs is number of input channels file_name = 'lenet'