def dataset_read(source, target, batch_size, scale=False, all_use='no'): S = {} S_test = {} T = {} T_test = {} usps = False if source == 'usps' or target == 'usps': usps = True train_source, s_label_train, test_source, s_label_test = return_dataset( source, scale=scale, usps=usps, all_use=all_use) train_target, t_label_train, test_target, t_label_test = return_dataset( target, scale=scale, usps=usps, all_use=all_use) S['imgs'] = train_source S['labels'] = s_label_train T['imgs'] = train_target T['labels'] = t_label_train # input target samples for both S_test['imgs'] = test_target S_test['labels'] = t_label_test T_test['imgs'] = test_target T_test['labels'] = t_label_test scale = 40 if source == 'synth' else 28 if source == 'usps' or target == 'usps' else 32 train_loader = UnalignedDataLoader() train_loader.initialize(S, T, batch_size, batch_size, scale=scale) dataset = train_loader.load_data() test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale) dataset_test = test_loader.load_data() return dataset, dataset_test
def dataset_read(target, batch_size): S1 = {} S1_test = {} S2 = {} S2_test = {} S3 = {} S3_test = {} S4 = {} S4_test = {} S = [S1, S2, S3, S4] S_test = [S1_test, S2_test, S3_test, S4_test] T = {} T_test = {} domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn'] domain_all.remove(target) dataset_size = list() target_train, target_train_label , target_test, target_test_label = return_dataset(target) dataset_size.append(target_train.shape[0]) for i in range(len(domain_all)): source_train, source_train_label, source_test , source_test_label = return_dataset(domain_all[i]) S[i]['imgs'] = source_train S[i]['labels'] = source_train_label #input target sample when test, source performance is not important S_test[i]['imgs'] = source_test S_test[i]['labels'] = source_test_label dataset_size.append(source_train.shape[0]) #S['imgs'] = train_source #S['labels'] = s_label_train T['imgs'] = target_train T['labels'] = target_train_label # input target samples for both #S_test['imgs'] = test_target #S_test['labels'] = t_label_test T_test['imgs'] = target_test T_test['labels'] = target_test_label scale = 32 train_loader = UnalignedDataLoader() train_loader.initialize(S, T, batch_size, batch_size, scale=scale) dataset = train_loader.load_data() test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale) dataset_test = test_loader.load_data() # return dataset, dataset_test, min(dataset_size), max(dataset_size) return dataset, dataset_test, min(dataset_size)
def dataset_read(source, target, batch_size, scale=False, all_use='no'): S = {} S_test = {} T = {} T_test = {} usps = False if source == 'usps' or target == 'usps': usps = True domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn'] domain_all.remove(source) train_source, s_label_train, test_source, s_label_test = return_dataset( source, scale=scale, usps=usps, all_use=all_use) train_target, t_label_train, test_target, t_label_test = return_dataset( domain_all[0], scale=scale, usps=usps, all_use=all_use) for i in range(1, len(domain_all)): train_target_, t_label_train_, test_target_, t_label_test_ = return_dataset( domain_all[i], scale=scale, usps=usps, all_use=all_use) train_target = np.concatenate((train_target, train_target_), axis=0) t_label_train = np.concatenate((t_label_train, t_label_train_), axis=0) test_target = np.concatenate((test_target, test_target_), axis=0) t_label_test = np.concatenate((t_label_test, t_label_test_), axis=0) # print(domain) print('Source Training: ', train_source.shape) print('Source Training label: ', s_label_train.shape) print('Source Test: ', test_source.shape) print('Source Test label: ', s_label_test.shape) print('Target Training: ', train_target.shape) print('Target Training label: ', t_label_train.shape) print('Target Test: ', test_target.shape) print('Target Test label: ', t_label_test.shape) S['imgs'] = train_source S['labels'] = s_label_train T['imgs'] = train_target T['labels'] = t_label_train # input target samples for both S_test['imgs'] = test_target S_test['labels'] = t_label_test T_test['imgs'] = test_target T_test['labels'] = t_label_test scale = 32 if source == 'synth' else 32 if source == 'usps' or target == 'usps' else 32 train_loader = UnalignedDataLoader() train_loader.initialize(S, T, batch_size, batch_size, scale=scale) dataset = train_loader.load_data() test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale) dataset_test = test_loader.load_data() return dataset, dataset_test
def dataset_read(source, target, batch_size, pixel_norm=True, scale=False, all_use=False): S = {} S_test = {} T = {} T_test = {} usps = False if source == 'usps' or target == 'usps': usps = True train_source, s_label_train, test_source, s_label_test = return_dataset( source, scale=scale, usps=usps, all_use=all_use) train_target, t_label_train, test_target, t_label_test = return_dataset( target, scale=scale, usps=usps, all_use=all_use) #normalize with mean value of pixels if pixel_norm: pixel_mean = np.vstack([train_source, train_target]).mean((0, )) train_source = (train_source - pixel_mean) / float(255) train_target = (train_target - pixel_mean) / float(255) test_target = (test_target - pixel_mean) / float(255) S['imgs'] = torch.from_numpy(train_source).float() S['labels'] = torch.from_numpy(s_label_train).long() T['imgs'] = torch.from_numpy(train_target).float() T['labels'] = torch.from_numpy(t_label_train).long() # input target samples for both S_test['imgs'] = torch.from_numpy(test_target).float() S_test['labels'] = torch.from_numpy(t_label_test).long() T_test['imgs'] = torch.from_numpy(test_target).float() T_test['labels'] = torch.from_numpy(t_label_test).long() train_loader = UnalignedDataLoader() train_loader.initialize(S, T, batch_size, batch_size) dataset = train_loader.load_data() test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size) dataset_test = test_loader.load_data() return dataset, dataset_test
def CreateDataLoader(opt): if opt.align_data > 0: from cyclegan_arch.data.aligned_data_loader import AlignedDataLoader data_loader = AlignedDataLoader() else: from unaligned_data_loader import UnalignedDataLoader data_loader = UnalignedDataLoader() print(data_loader.name()) data_loader.initialize(opt) return data_loader
def dataset_read(target, batch_size): S1 = {} S1_test = {} S2 = {} S2_test = {} S3 = {} S3_test = {} S4 = {} S4_test = {} S = [S1, S2, S3, S4] S_test = [S1_test, S2_test, S3_test, S4_test] T = {} T_test = {} T_val = {} domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn'] domain_all.remove(target) target_train, target_train_label, target_test, target_test_label = return_dataset( target) indices_tar = np.arange(0, target_test.shape[0]) np.random.seed(42) np.random.shuffle(indices_tar) val_split = int(0.05 * target_test.shape[0]) target_val = target_test[indices_tar[:val_split]] target_val_label = target_test_label[indices_tar[:val_split]] target_test = target_test[indices_tar[val_split:]] target_test_label = target_test_label[indices_tar[val_split:]] for i in range(len(domain_all)): source_train, source_train_label, source_test, source_test_label = return_dataset( domain_all[i]) S[i]['imgs'] = source_train S[i]['labels'] = source_train_label # input target sample when test, source performance is not important S_test[i]['imgs'] = target_test S_test[i]['labels'] = target_test_label # S['imgs'] = train_source # S['labels'] = s_label_train T['imgs'] = target_train T['labels'] = target_train_label # input target samples for both # S_test['imgs'] = test_target # S_test['labels'] = t_label_test T_test['imgs'] = target_test T_test['labels'] = target_test_label T_val['imgs'] = target_val T_val['labels'] = target_val_label scale = 32 train_loader = UnalignedDataLoader() train_loader.initialize(S, T, batch_size, batch_size, scale=scale) dataset = train_loader.load_data() test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale) dataset_test = test_loader.load_data() S_val = {} S_val['imgs'] = np.zeros((20, 3, 32, 32)) S_val['labels'] = np.zeros((20)) val_loader = UnalignedDataLoader() val_loader.initialize([S_val], T_val, batch_size, batch_size, scale=scale) dataset_valid = val_loader.load_data() return dataset, dataset_test, dataset_valid
def dataset_hard_cluster(target, batch_size, num_clus): # Number of components for PCA n_comp = 50 T = {} T_test = {} T_val = {} domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn'] domain_all.remove(target) target_train, target_train_label, target_test, target_test_label = return_dataset( target) indices_tar = np.arange(0, target_test.shape[0]) np.random.seed(42) np.random.shuffle(indices_tar) val_split = int(0.05 * target_test.shape[0]) target_val = target_test[indices_tar[:val_split]] target_val_label = target_test_label[indices_tar[:val_split]] target_test = target_test[indices_tar[val_split:]] target_test_label = target_test_label[indices_tar[val_split:]] # Generate target dataset label splits #target_train, target_train_label, target_test, target_test_label = return_dataset(target_dataset) T['imgs'] = target_train T['labels'] = target_train_label # input target samples for both T_test['imgs'] = target_test T_test['labels'] = target_test_label T_val['imgs'] = target_val T_val['labels'] = target_val_label S_train = [] S_train_labels = [] #S_train_std = [] # Read the respective source domain datasets for i in range(len(domain_all)): source_train, source_train_label, source_test, source_test_label = return_dataset( domain_all[i]) # Convert all the datasets to (3,28,28) image size for (2352 Feature vector) # Broadcast to three channels if source_train.shape[1] == 1: source_train = np.repeat(source_train, 3, 1) # Clip to 28x28 if source_train.shape[2] == 32: source_train = source_train[:, :, 2:30, 2:30] S_train.append(source_train) S_train_labels.append(source_train_label) #S_train_std.append(StandardScaler().fit_transform(source_train.reshape(source_train.shape[0], -1))) X_combined = np.concatenate(S_train, axis=0) X_labels = np.concatenate(S_train_labels, axis=0) source_num_train_ex = X_combined.shape[0] X_vec = X_combined.reshape(source_num_train_ex, -1) mean = X_vec.mean(0) X_vec = X_vec - mean pca_transformed = PCA(n_components=n_comp).fit_transform(X_vec) kmeans = KMeans(n_clusters=num_clus, n_init=1) predict = kmeans.fit(pca_transformed).predict(pca_transformed) print("Hard Clustering Ends") S = [] S_test = [] for i in range(num_clus): S.append({}) S[i]['imgs'] = X_combined[predict == i] S[i]['labels'] = X_labels[predict == i] # input target sample when test, source performance is not important S_test.append({}) S_test[i]['imgs'] = target_test S_test[i]['labels'] = target_test_label scale = 32 train_loader = UnalignedDataLoader() train_loader.initialize(S, T, batch_size, batch_size, scale=scale) dataset = train_loader.load_data() test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale) dataset_test = test_loader.load_data() S_val = {} S_val['imgs'] = np.zeros((20, 3, 32, 32)) S_val['labels'] = np.zeros((20)) val_loader = UnalignedDataLoader() val_loader.initialize([S_val], T_val, batch_size, batch_size, scale=scale) dataset_valid = val_loader.load_data() return dataset, dataset_test, dataset_valid
def dataset_read(target, batch_size, args): """ S1 = {} S1_test = {} S2 = {} S2_test = {} S3 = {} S3_test = {} S4 = {} S4_test = {} #S = [S1, S2, S3, S4] S=[S1, S2, S3] #S_test = [S1_test, S2_test, S3_test, S4_test] S_test = [S1_test, S2_test, S3_test] T = {} T_test = {} """ #domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn'] #domain_all.remove(target) domain_all = ['real', 'sketch', 'painting'] domain_all.remove(target) print("all domains") print(domain_all) print("target domain") print(target) print() #target_train, target_train_label , target_test, target_test_label = return_dataset(args) """ target_train, target_train_label , target_test, target_test_label = return_dataset2(target) for i in range(len(domain_all)): source_train, source_train_label, source_test , source_test_label = return_dataset2(domain_all[i]) S[i]['imgs'] = source_train S[i]['labels'] = source_train_label #input target sample when test, source performance is not important S_test[i]['imgs'] = target_test S_test[i]['labels'] = target_test_label #S['imgs'] = train_source #S['labels'] = s_label_train T['imgs'] = target_train T['labels'] = target_train_label # input target samples for both #S_test['imgs'] = test_target #S_test['labels'] = t_label_test T_test['imgs'] = target_test T_test['labels'] = target_test_label """ print(args) scale = 32 print("Here after defining scale") print(args.save_epoch) source_loader1, source_loader2, target_loader, class_list1, class_list2 = return_dataset( args) #use_gpu = torch.cuda.is_available() batch_size1 = batch_size batch_size2 = batch_size train_loader = UnalignedDataLoader() #train_loader.initialize(S, T, batch_size1, batch_size2, scale=scale) train_loader.initialize(source_loader1, source_loader2, target_loader, batch_size1, batch_size2, scale=scale) dataset = train_loader.load_data() """ test_loader = UnalignedDataLoader() test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale) dataset_test = test_loader.load_data() "" return dataset, dataset_test """ return dataset