示例#1
0
    def __init__(self, dataset_path, lr, vis_screen, save_path, l1_coef,
                 l2_coef, batch_size, num_workers, epochs, gpu_id):

        self.generator = torch.nn.DataParallel(model.generator().cuda(),
                                               device_ids=self.gpu_id)
        self.discriminator = torch.nn.DataParallel(
            model.discriminator().cuda(), device_ids=self.gpu_id)

        self.discriminator.apply(Utils.weights_init)

        self.generator.apply(Utils.weights_init)

        self.dataset = Train_Dataset(dataset_path, dataset_name='Market-1501')

        self.noise_dim = 100
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.lr = lr
        self.beta1 = 0.5
        self.num_epochs = epochs

        self.l1_coef = l1_coef
        self.l2_coef = l2_coef

        self.data_loader = DataLoader(self.dataset,
                                      batch_size=self.batch_size,
                                      shuffle=True,
                                      num_workers=self.num_workers)

        self.optimD = torch.optim.Adam(self.discriminator.parameters(),
                                       lr=self.lr,
                                       betas=(self.beta1, 0.999))
        self.optimG = torch.optim.Adam(self.generator.parameters(),
                                       lr=self.lr,
                                       betas=(self.beta1, 0.999))

        #self.logger = Logger(vis_screen)
        self.checkpoints_path = 'checkpoints'
        self.save_path = save_path
        self.gpu_id = gpu_id
    ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val')
    ax1.plot(x_epoch, y_err['train'], 'bo-', label='train')
    ax1.plot(x_epoch, y_err['val'], 'ro-', label='val')
    if current_epoch == 0:
        ax0.legend()
        ax1.legend()
    fig.savefig(os.path.join(model_dir, 'train.jpg'))


######################################################################
# DataLoader
# ---------
image_datasets = {
    'train':
    Train_Dataset(data_dir,
                  dataset_name=dataset_dict[args.dataset],
                  train_val='train'),
    'val':
    Train_Dataset(data_dir,
                  dataset_name=dataset_dict[args.dataset],
                  train_val='query')
}
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x],
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers)
    for x in ['train', 'val']
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
示例#3
0
def prepare_student_data(nb_teachers, save=False):
    """
    Takes a dataset name and the size of the teacher ensemble and prepares
    training data for the student model, according to parameters indicated
    in flags above.
    :param dataset: string corresponding to mnist, cifar10, or svhn
    :param nb_teachers: number of teachers (in the ensemble) to learn from
    :param save: if set to True, will dump student training labels predicted by
                 the ensemble of teachers (with Laplacian noise) as npy files.
                 It also dumps the clean votes for each class (without noise) and
                 the labels assigned by teachers
    :return: pairs of (data, labels) to be used for student training and testing

    """

    # Load the dataset
    if config.dataset == 'celeba':
        dataset = data_manager.init_img_dataset(root=config.data_dir, name=config.dataset)
        test_data = dataset.test_data
        test_labels = dataset.test_label
        train_data = dataset.train_data
        train_labels = dataset.train_label

    elif config.dataset =='market':
        data_dir = '../dataset/market1501'
        train_dataset = Train_Dataset(data_dir, dataset_name=dataset_dict[config.dataset],
                                        train_val='train')
        test_dataset = Test_Dataset(data_dir, dataset_name=dataset_dict[config.dataset],
                                             query_gallery='gallery')

        train_data = train_dataset.train_data
        train_labels = train_dataset.train_label
        test_data = test_dataset.data
        test_labels = test_dataset.label
        train_labels = np.array(train_labels,dtype =np.int32)
        test_labels = np.array(test_labels,dtype = np.int32)
        print('len of total test data in market',len(test_labels))
    else:
        return False



    # Make sure there is data leftover to be used as a test set
    assert config.stdnt_share < len(test_data)



    ori_test_data = test_data
    # for test


    train_data, test_data = extract_feature(train_data, test_data)

    stdnt_data = test_data[:config.stdnt_share]
    # the remaining 1000 records is the holdout for evaluating
    share_index =np.random.choice(test_data[:-1000].shape[0],config.stdnt_share)
    stdnt_data = test_data[share_index]
    picked_stdnt_data = [ori_test_data[idx] for idx in share_index]
    num_train = train_data.shape[0]
    teachers_preds = np.zeros([stdnt_data.shape[0], config.nb_labels])

    tau_teachers_preds=[]
    # a weighted teacher predtion with clippling
    for idx in range(len(stdnt_data)):
        if idx % 100 == 0:
            print('idx=', idx)
        query_data = stdnt_data[idx]
        select_teacher = np.random.choice(train_data.shape[0], int(prob * num_train))
        dis = np.linalg.norm(train_data[select_teacher] - query_data, axis=1)
        k_index = select_teacher[np.argsort(dis)[:config.nb_teachers]]
        # sum over the number of teachers, which make it easy to compute their votings
        if config.use_tau:
            tau_teachers_preds.append(tau_limit(train_labels[k_index,:]))
        teachers_preds[idx] = np.sum(train_labels[k_index, :], axis=0)


    teachers_preds = np.asarray(teachers_preds, dtype=np.int32)
    if config.use_tau:
    
        preds_tau = np.asarray(tau_teachers_preds, dtype = np.float32)
        acct.compose_poisson_subsampled_mechanisms(gaussian, prob, coeff=config.stdnt_share)
        count_zero_list = config.nb_teachers * np.ones([config.stdnt_share,config.nb_labels]) - teachers_preds
        idx, stdnt_labels = aggregation.aggregation_knn(teachers_preds, config.gau_scale,count_zero_list=count_zero_list)
    else:    
        acct.compose_poisson_subsampled_mechanisms(gaussian, prob, coeff=config.stdnt_share)
        idx, stdnt_labels = aggregation.aggregation_knn(teachers_preds, config.gau_scale)
    # compute privacy loss
    print("Composition of student  subsampled Gaussian mechanisms gives ", (acct.get_eps(delta), delta))

    # Print accuracy of aggregated label
    #ac_ag_labels = hamming_accuracy(stdnt_labels, test_labels[:config.stdnt_share], torch=False)
    ac_ag_labels = hamming_accuracy(stdnt_labels, test_labels[share_index], torch=False)
    precision = hamming_precision(stdnt_labels, test_labels[share_index], torch=False)
    print("Accuracy of the aggregated labels: " + str(ac_ag_labels))
    print('precision of the aggregated labels'+str(precision))
    current_eps = acct.get_eps(config.delta)
    # Store unused part of test set for use as a test set after student training
    stdnt_test_data = ori_test_data[-1000:]
    stdnt_test_labels = test_labels[-1000:]

    if save:
      # Prepare filepath for numpy dump of labels produced by noisy aggregation
      dir_path = os.path.join(config.save_model, 'knn_num_neighbor_' + str(config.nb_teachers))
      utils.mkdir_if_missing(dir_path)
      filepath = dir_path + '_knn_voting.npy' #NOLINT(long-line)

      # Dump student noisy labels array
      with open(filepath, 'wb') as file_obj:
        np.save(file_obj, teachers_preds)

    return picked_stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels
示例#4
0
train_all = ''
if opt.train_all:
    train_all = '_all'

image_datasets = {}
# image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all),
#                                           data_transforms['train'])
# image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'),
#                                           data_transforms['val'])

# dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize,
#                                              shuffle=True, num_workers=8, pin_memory=True)  # 8 workers may work faster
#               for x in ['train', 'val']}

image_datasets['train'] = Train_Dataset(data_dir,
                                        dataset_name=dataset_dict['market'],
                                        train_val='train')
image_datasets['val'] = Train_Dataset(data_dir,
                                      dataset_name=dataset_dict['market'],
                                      train_val='query')
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x],
                                   batch_size=opt.batchsize,
                                   shuffle=True,
                                   num_workers=8)
    for x in ['train', 'val']
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
id_class_number = image_datasets['train'].num_id()
示例#5
0
from torch.optim import lr_scheduler
import network
import sys
sys.path.append('../dataset/duke')
from datafolder.folder import Train_Dataset
from dataset_loader import ImageDataset
from utils import Hamming_Score as hamming_accuracy
import market_config as config
config = config.config
dataset_dict = {
    'market'  :  'Market-1501',
    'duke'  :  'DukeMTMC-reID',
}
data_dir = '../dataset/market1501'
image_datasets = {}
image_datasets['train'] = Train_Dataset(data_dir, dataset_name=dataset_dict[config.dataset],
                                        train_val='train')

image_datasets['val'] = Train_Dataset(data_dir, dataset_name=dataset_dict[config.dataset],
                                      train_val='query')
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=config.batch_size,
                                             shuffle=True, num_workers=config.workers)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

images, labels = next(iter(dataloaders['train']))

num_label = image_datasets['train'].num_label()
num_id = image_datasets['train'].num_id()
labels_list = image_datasets['train'].labels()

def train_teacher():
示例#6
0
    fig.savefig(os.path.join(model_dir, 'train.jpg'))


######################################################################
# DataLoader
# ---------
image_datasets = {}

if args.dataset == 'rap':
    image_datasets['train'] = RapTrain_Dataset(
        data_dir, dataset_name=dataset_dict[args.dataset], train_val='train')

    image_datasets['test'] = RapTrain_Dataset(
        data_dir, dataset_name=dataset_dict[args.dataset], train_val='val')
else:
    image_datasets['train'] = Train_Dataset(
        data_dir, dataset_name=dataset_dict[args.dataset], train_val='train')

    image_datasets['test'] = Train_Dataset(
        data_dir, dataset_name=dataset_dict[args.dataset], train_val='val')

dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x],
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers)
    for x in ['train', 'test']
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}

images, labels = next(iter(dataloaders['train']))