def compute_acc(p, args, train_data_ori, val_data_ori, test_data_ori): train_data = copy.deepcopy(train_data_ori) val_data = copy.deepcopy(val_data_ori) test_data = copy.deepcopy(test_data_ori) # randomly flip groundtruth with probability p for i in range(len(train_data)): for j in range(len(object2id)): if random.random() < p: train_data.object_ann[i, j] = 1 - train_data.object_ann[i, j] for i in range(len(val_data)): for j in range(len(object2id)): if random.random() < p: val_data.object_ann[i, j] = 1 - val_data.object_ann[i, j] for i in range(len(test_data)): for j in range(len(object2id)): if random.random() < p: test_data.object_ann[i, j] = 1 - test_data.object_ann[i, j] # Data samplers train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=6, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) model = GenderClassifier(args, args.num_object) model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=1e-5) model_save_dir = args.save_dir train_genderclassifier(model, args.num_epochs, optimizer, train_loader, val_loader, model_save_dir, \ args.print_every) model.load_state_dict( torch.load(model_save_dir + '/model_best.pth.tar')['state_dict']) loss, acc = epoch_pass(0, test_loader, model, None, False, print_every=500) acc = 0.5 + abs(acc - 0.5) print(' when p is {}, gender acc on test set: {}'.format(p, acc * 100)) return acc
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save_dir', type=str, default='./dataset_leakage', help='path for saving checkpoints') parser.add_argument('--num_rounds', type=int, default = 5) parser.add_argument('--balanced', action='store_true') parser.add_argument('--ratio', type=str, default = '0') parser.add_argument('--num_object', type=int, default = 79) parser.add_argument('--annotation_dir', type=str, default='./data', help='annotation files path') parser.add_argument('--image_dir', default = './data', help='image directory') parser.add_argument('--hid_size', type=int, default = 300) parser.add_argument('--no_image', action='store_true') parser.add_argument('--num_epochs', type=int, default=100) parser.add_argument('--learning_rate', type=float, default=0.00005) parser.add_argument('--print_every', type=int, default=500) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--crop_size', type=int, default=224) parser.add_argument('--image_size', type=int, default=256) args = parser.parse_args() args.gender_balanced = True # always True as we want to compute the leakage args.no_image = True args.blur = False args.blackout_face = False args.blackout = False args.blackout_box = False args.grayscale = False args.edges = False normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize]) test_transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.crop_size), transforms.ToTensor(), normalize]) acc_list = [] for i in range(args.num_rounds): train_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'train', transform = train_transform) train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size, shuffle = True, num_workers = 6, pin_memory = True) # Data samplersi for val set. val_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'val', transform = test_transform) val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) # Data samplers for test set. test_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'test', transform = test_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) # initialize gender classifier model = GenderClassifier(args, args.num_object) model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay = 1e-5) model_save_dir = os.path.join(args.save_dir, 'ratio_'+args.ratio) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) train_genderclassifier(model, args.num_epochs, optimizer, train_loader, val_loader, \ model_save_dir, args.print_every) model.load_state_dict(torch.load(model_save_dir+'/model_best.pth.tar')['state_dict']) loss, acc = epoch_pass(0, test_loader, model, None, False, print_every=500) loss, val_acc = epoch_pass(0, val_loader, model, None, False, print_every=500) acc = 0.5 + abs(acc - 0.5) val_acc = 0.5 + abs(val_acc - 0.5) print('round {} acc on test set: {}, val acc: {}'.format(i, acc*100, val_acc*100)) acc_list.append(acc) print acc_list acc_ = np.array(acc_list) mean_acc = np.mean(acc_) std_acc = np.std(acc_) print mean_acc, std_acc
def main(): parser = argparse.ArgumentParser() parser.add_argument('--exp_id', type=str, help='experiment id, e.g. conv4_300_1.0_0.2_1') parser.add_argument('--num_rounds', type=int, default = 5) parser.add_argument('--annotation_dir', type=str, default='./data', help='annotation files path') parser.add_argument('--image_dir', default = './data', help='image directory') parser.add_argument('--gender_balanced', action='store_true', help='use gender balanced subset for training') parser.add_argument('--balanced', action='store_true', help='use balanced subset for training') parser.add_argument('--ratio', type=str, default = '0') parser.add_argument('--num_object', type=int, default = 79) parser.add_argument('--no_image', action='store_true') parser.add_argument('--blackout', action='store_true') parser.add_argument('--blackout_face', action='store_true') parser.add_argument('--blackout_box', action='store_true') parser.add_argument('--blur', action='store_true') parser.add_argument('--grayscale', action='store_true') parser.add_argument('--edges', action='store_true') parser.add_argument('--noise', action='store_true', help='add noise to image features') parser.add_argument('--noise_scale', type=float, default=0.2, help='std in gaussian noise') parser.add_argument('--hid_size', type=int, default=300, help='linear layer dimension for attacker') ## training setting for attacker parser.add_argument('--num_epochs', type=int, default=100) parser.add_argument('--finetune', action='store_true') parser.add_argument('--learning_rate', type=float, default=0.00005, help='attacker learning rate') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--crop_size', type=int, default=224) parser.add_argument('--image_size', type=int, default=256) parser.add_argument('--seed', type=int, default=1) args = parser.parse_args() normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize]) test_transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.crop_size), transforms.ToTensor(), normalize]) #Build the encoder encoder = ObjectMultiLabelEncoder(args, args.num_object).cuda() model_path = os.path.join('./models', args.exp_id) if os.path.isfile(os.path.join(model_path, 'model_best.pth.tar')): print("=> loading encoder from '{}'".format(model_path)) checkpoint = torch.load(os.path.join(model_path, 'model_best.pth.tar')) best_score = checkpoint['best_performance'] encoder.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(model_path)) encoder.eval() # Data samplers. val_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'val', transform = test_transform) val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) test_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'test', transform = test_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) print('val set performance:') test(args, encoder, val_loader) print('test set performance:') test(args, encoder, test_loader) acc_list = {} #acc_list['image_feature'] = [] acc_list['potential'] = [] args.gender_balanced = True for i in range(args.num_rounds): train_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'train', transform = train_transform) train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size, shuffle = True, num_workers = 6, pin_memory = True) # Data samplers for val set. val_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'val', transform = test_transform) val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) # Data samplers for test set. test_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \ image_dir = args.image_dir,split = 'test', transform = test_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \ shuffle = False, num_workers = 4,pin_memory = True) image_features_path = os.path.join(model_path, 'image_features') if not os.path.exists(image_features_path): os.makedirs(image_features_path) # get image features from encoder generate_image_feature('train', image_features_path, train_loader, encoder) generate_image_feature('val', image_features_path, val_loader, encoder) generate_image_feature('test', image_features_path, test_loader, encoder) train_data = CocoObjectGenderFeature(args, image_features_path, split = 'train') train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size, shuffle = True, num_workers = 6, pin_memory = True) val_data = CocoObjectGenderFeature(args, image_features_path, split = 'val') val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, shuffle = True, num_workers = 6, pin_memory = True) test_data = CocoObjectGenderFeature(args, image_features_path, split = 'test') test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, shuffle = True, num_workers = 6, pin_memory = True) model_save_dir = './attacker' if args.noise: args.exp_id += '_noise' + str(args.noise_scale) model_save_dir = os.path.join(model_save_dir, str(args.exp_id)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) for feature_type in acc_list.keys(): #import pdb #pdb.set_trace() attacker = GenderClassifier(args, args.num_object) attacker = attacker.cuda() optimizer = optim.Adam(attacker.parameters(), lr=args.learning_rate, weight_decay = 1e-5) train_attacker(args.num_epochs, optimizer, attacker, encoder, train_loader, val_loader, \ model_save_dir, feature_type) # evaluate best attacker on balanced test split best_attacker = torch.load(model_save_dir + '/best_attacker.pth.tar') attacker.load_state_dict(best_attacker['state_dict']) _, val_acc = epoch_pass(0, val_loader, attacker, encoder, None, False, feature_type) val_acc = 0.5 + abs(val_acc - 0.5) _, test_acc = epoch_pass(0, test_loader, attacker, encoder, None, False, feature_type) test_acc = 0.5 + abs(test_acc - 0.5) acc_list[feature_type].append(test_acc) print('round {} feature type: {}, test acc: {}, val acc: {}'.format(i, feature_type, \ test_acc, val_acc)) for feature_type in acc_list.keys(): print(acc_list[feature_type], np.std(np.array(acc_list[feature_type]))) print('{} average leakage: {}'.format(feature_type, np.mean(np.array(acc_list[feature_type]))))