def __init__(self, dataset_path, lr, vis_screen, save_path, l1_coef, l2_coef, batch_size, num_workers, epochs, gpu_id): self.generator = torch.nn.DataParallel(model.generator().cuda(), device_ids=self.gpu_id) self.discriminator = torch.nn.DataParallel( model.discriminator().cuda(), device_ids=self.gpu_id) self.discriminator.apply(Utils.weights_init) self.generator.apply(Utils.weights_init) self.dataset = Train_Dataset(dataset_path, dataset_name='Market-1501') self.noise_dim = 100 self.batch_size = batch_size self.num_workers = num_workers self.lr = lr self.beta1 = 0.5 self.num_epochs = epochs self.l1_coef = l1_coef self.l2_coef = l2_coef self.data_loader = DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers) self.optimD = torch.optim.Adam(self.discriminator.parameters(), lr=self.lr, betas=(self.beta1, 0.999)) self.optimG = torch.optim.Adam(self.generator.parameters(), lr=self.lr, betas=(self.beta1, 0.999)) #self.logger = Logger(vis_screen) self.checkpoints_path = 'checkpoints' self.save_path = save_path self.gpu_id = gpu_id
ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val') ax1.plot(x_epoch, y_err['train'], 'bo-', label='train') ax1.plot(x_epoch, y_err['val'], 'ro-', label='val') if current_epoch == 0: ax0.legend() ax1.legend() fig.savefig(os.path.join(model_dir, 'train.jpg')) ###################################################################### # DataLoader # --------- image_datasets = { 'train': Train_Dataset(data_dir, dataset_name=dataset_dict[args.dataset], train_val='train'), 'val': Train_Dataset(data_dir, dataset_name=dataset_dict[args.dataset], train_val='query') } dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) for x in ['train', 'val'] } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
def prepare_student_data(nb_teachers, save=False): """ Takes a dataset name and the size of the teacher ensemble and prepares training data for the student model, according to parameters indicated in flags above. :param dataset: string corresponding to mnist, cifar10, or svhn :param nb_teachers: number of teachers (in the ensemble) to learn from :param save: if set to True, will dump student training labels predicted by the ensemble of teachers (with Laplacian noise) as npy files. It also dumps the clean votes for each class (without noise) and the labels assigned by teachers :return: pairs of (data, labels) to be used for student training and testing """ # Load the dataset if config.dataset == 'celeba': dataset = data_manager.init_img_dataset(root=config.data_dir, name=config.dataset) test_data = dataset.test_data test_labels = dataset.test_label train_data = dataset.train_data train_labels = dataset.train_label elif config.dataset =='market': data_dir = '../dataset/market1501' train_dataset = Train_Dataset(data_dir, dataset_name=dataset_dict[config.dataset], train_val='train') test_dataset = Test_Dataset(data_dir, dataset_name=dataset_dict[config.dataset], query_gallery='gallery') train_data = train_dataset.train_data train_labels = train_dataset.train_label test_data = test_dataset.data test_labels = test_dataset.label train_labels = np.array(train_labels,dtype =np.int32) test_labels = np.array(test_labels,dtype = np.int32) print('len of total test data in market',len(test_labels)) else: return False # Make sure there is data leftover to be used as a test set assert config.stdnt_share < len(test_data) ori_test_data = test_data # for test train_data, test_data = extract_feature(train_data, test_data) stdnt_data = test_data[:config.stdnt_share] # the remaining 1000 records is the holdout for evaluating share_index =np.random.choice(test_data[:-1000].shape[0],config.stdnt_share) stdnt_data = test_data[share_index] picked_stdnt_data = [ori_test_data[idx] for idx in share_index] num_train = train_data.shape[0] teachers_preds = np.zeros([stdnt_data.shape[0], config.nb_labels]) tau_teachers_preds=[] # a weighted teacher predtion with clippling for idx in range(len(stdnt_data)): if idx % 100 == 0: print('idx=', idx) query_data = stdnt_data[idx] select_teacher = np.random.choice(train_data.shape[0], int(prob * num_train)) dis = np.linalg.norm(train_data[select_teacher] - query_data, axis=1) k_index = select_teacher[np.argsort(dis)[:config.nb_teachers]] # sum over the number of teachers, which make it easy to compute their votings if config.use_tau: tau_teachers_preds.append(tau_limit(train_labels[k_index,:])) teachers_preds[idx] = np.sum(train_labels[k_index, :], axis=0) teachers_preds = np.asarray(teachers_preds, dtype=np.int32) if config.use_tau: preds_tau = np.asarray(tau_teachers_preds, dtype = np.float32) acct.compose_poisson_subsampled_mechanisms(gaussian, prob, coeff=config.stdnt_share) count_zero_list = config.nb_teachers * np.ones([config.stdnt_share,config.nb_labels]) - teachers_preds idx, stdnt_labels = aggregation.aggregation_knn(teachers_preds, config.gau_scale,count_zero_list=count_zero_list) else: acct.compose_poisson_subsampled_mechanisms(gaussian, prob, coeff=config.stdnt_share) idx, stdnt_labels = aggregation.aggregation_knn(teachers_preds, config.gau_scale) # compute privacy loss print("Composition of student subsampled Gaussian mechanisms gives ", (acct.get_eps(delta), delta)) # Print accuracy of aggregated label #ac_ag_labels = hamming_accuracy(stdnt_labels, test_labels[:config.stdnt_share], torch=False) ac_ag_labels = hamming_accuracy(stdnt_labels, test_labels[share_index], torch=False) precision = hamming_precision(stdnt_labels, test_labels[share_index], torch=False) print("Accuracy of the aggregated labels: " + str(ac_ag_labels)) print('precision of the aggregated labels'+str(precision)) current_eps = acct.get_eps(config.delta) # Store unused part of test set for use as a test set after student training stdnt_test_data = ori_test_data[-1000:] stdnt_test_labels = test_labels[-1000:] if save: # Prepare filepath for numpy dump of labels produced by noisy aggregation dir_path = os.path.join(config.save_model, 'knn_num_neighbor_' + str(config.nb_teachers)) utils.mkdir_if_missing(dir_path) filepath = dir_path + '_knn_voting.npy' #NOLINT(long-line) # Dump student noisy labels array with open(filepath, 'wb') as file_obj: np.save(file_obj, teachers_preds) return picked_stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels
train_all = '' if opt.train_all: train_all = '_all' image_datasets = {} # image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all), # data_transforms['train']) # image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'), # data_transforms['val']) # dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, # shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster # for x in ['train', 'val']} image_datasets['train'] = Train_Dataset(data_dir, dataset_name=dataset_dict['market'], train_val='train') image_datasets['val'] = Train_Dataset(data_dir, dataset_name=dataset_dict['market'], train_val='query') dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize, shuffle=True, num_workers=8) for x in ['train', 'val'] } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} id_class_number = image_datasets['train'].num_id()
from torch.optim import lr_scheduler import network import sys sys.path.append('../dataset/duke') from datafolder.folder import Train_Dataset from dataset_loader import ImageDataset from utils import Hamming_Score as hamming_accuracy import market_config as config config = config.config dataset_dict = { 'market' : 'Market-1501', 'duke' : 'DukeMTMC-reID', } data_dir = '../dataset/market1501' image_datasets = {} image_datasets['train'] = Train_Dataset(data_dir, dataset_name=dataset_dict[config.dataset], train_val='train') image_datasets['val'] = Train_Dataset(data_dir, dataset_name=dataset_dict[config.dataset], train_val='query') dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=config.batch_size, shuffle=True, num_workers=config.workers) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} images, labels = next(iter(dataloaders['train'])) num_label = image_datasets['train'].num_label() num_id = image_datasets['train'].num_id() labels_list = image_datasets['train'].labels() def train_teacher():
fig.savefig(os.path.join(model_dir, 'train.jpg')) ###################################################################### # DataLoader # --------- image_datasets = {} if args.dataset == 'rap': image_datasets['train'] = RapTrain_Dataset( data_dir, dataset_name=dataset_dict[args.dataset], train_val='train') image_datasets['test'] = RapTrain_Dataset( data_dir, dataset_name=dataset_dict[args.dataset], train_val='val') else: image_datasets['train'] = Train_Dataset( data_dir, dataset_name=dataset_dict[args.dataset], train_val='train') image_datasets['test'] = Train_Dataset( data_dir, dataset_name=dataset_dict[args.dataset], train_val='val') dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) for x in ['train', 'test'] } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']} images, labels = next(iter(dataloaders['train']))