def setUp_private_model( self, noise_multiplier=1.3, max_grad_norm=1.0, ): # Deep copy self.private_model = SampleConvNet() # create the structure self.private_model.load_state_dict( self.original_model.state_dict()) # fill it self.private_optimizer = torch.optim.SGD( self.private_model.parameters(), lr=self.LR, momentum=0) privacy_engine = PrivacyEngine( self.private_model, self.dl, alphas=self.ALPHAS, noise_multiplier=noise_multiplier, max_grad_norm=max_grad_norm, ) privacy_engine.attach(self.private_optimizer) for x, y in self.dl: logits = self.private_model(x) loss = self.criterion(logits, y) loss.backward() # puts grad in self.private_model.parameters() self.private_optimizer.step() self.private_grad_norms = torch.stack( [p.grad.norm() for p in self.private_model.parameters()], dim=-1)
def prepare_private_training(model, train_loaders, num_workers, batch_size, alphas, lr): model_pool = list() optimizer_pool = list() priv_eng_pool = list() # We use deepcopy to make wholly independent copies of the shared model for _ in range(num_workers): model_pool.append(copy.deepcopy(model)) # We call the SGD constructor each time to ensure model updates are correctly applied for model in model_pool: opt = optim.SGD(model.parameters(), lr=lr) optimizer_pool.append(opt) # Attaches privacy engine for each model to each optimiser, effectively replacing # gradient calculation functions with similar DP-enabled ones. for i in range(len(model_pool)): privacy_engine = PrivacyEngine(model_pool[i], batch_size=batch_size, sample_size=len( train_loaders[i].dataset), alphas=alphas, noise_multiplier=1.0, max_grad_norm=1.0) privacy_engine.attach(optimizer_pool[i]) return model_pool, optimizer_pool
def hybrid_model(arch="vgg16", hidden_units=4096, class_idx_mapping=None, args=None): """ Return a model based on `arch` pre-trained one and 2 new fully connected layers. """ # Model adapted to chosen architecture, thanks to dynamic execution my_local = dict() exec(f'model = models.{arch}(pretrained=True)', globals(), my_local) model = my_local['model'] ## model = utils.convert_batchnorm_modules(model) # ===== Monitoring ===== # Freeze existing model parameters for training for param in model.parameters(): param.requires_grad = False # Get last child module of imported model last_child = list(model.children())[-1] if type(last_child) == torch.nn.modules.linear.Linear: input_features = last_child.in_features elif type(last_child) == torch.nn.modules.container.Sequential: input_features = last_child[0].in_features # Add some neww layers to train classifier = nn.Sequential( OrderedDict([ ### vgg16 : input_features = 25088 ('fc1', nn.Linear(input_features, hidden_units)), ('relu', nn.ReLU()), ###('dropout', nn.Dropout(p=0.5)), ('fc2', nn.Linear(hidden_units, 102)), ###('relu2', nn.ReLU()), ## Traces of ###('fc3', nn.Linear(256, 102)), ## experiments. ('output', nn.LogSoftmax(dim=1)) ])) model.classifier = classifier model.class_idx_mapping = class_idx_mapping model = model.to(args.device) ## _mem_monitor("1. HYBRID_MODEL : model loaded ", args.device) # ===== Monitoring ===== #optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate) optimizer = optim.SGD(model.classifier.parameters(), lr=args.learning_rate) if not args.disable_dp: privacy_engine = PrivacyEngine( classifier, ### = model, idem with classifier batch_size=args.batch_size, sample_size=args.sample_size, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.noise, max_grad_norm=args.clip, ) privacy_engine.attach(optimizer) ## _mem_monitor("HYBRID_MODEL after DP tranfo. ", args.device) # ===== Monitoring ===== return model, optimizer
def dp_update_weights(self, model, global_round, args): # Set mode to train model model.train() epoch_loss = [] # Set optimizer for the local updates if self.args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=self.args.lr, momentum=0.5) privacy_engine = PrivacyEngine( model, self.trainloader, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=0.65, max_grad_norm=1.0, ) privacy_engine.attach(optimizer) elif self.args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=1e-4) for iter in range(self.args.local_ep): batch_loss = [] for batch_idx, (images, labels) in enumerate(self.trainloader): images, labels = images.to(self.device), labels.to(self.device) model.zero_grad() log_probs = model(images) loss = self.criterion(log_probs, labels) loss.backward() optimizer.step() # if self.args.verbose and (batch_idx % 10 == 0): # print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( # global_round, iter, batch_idx * len(images), # len(self.trainloader.dataset), # 100. * batch_idx / len(self.trainloader), loss.item())) self.logger.add_scalar('loss', loss.item()) batch_loss.append(loss.item()) epoch_loss.append(sum(batch_loss) / len(batch_loss)) epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent( args.delta ) print( f"(Ɛ = {epsilon}, 𝛿 = {args.delta}) for α = {best_alpha}" ) # if epsilon > args.epsilon: # break return model.state_dict(), sum(epoch_loss) / len(epoch_loss)
def train(self): """ train/update the curr model of the agent """ optimizer = optim.Adadelta(self.model.parameters(), lr=self.lr) scheduler = StepLR(optimizer, step_size=1, gamma=self.gamma) loss_func = nn.CrossEntropyLoss() if self.dp: self.model.zero_grad() optimizer.zero_grad() clear_backprops(self.model) privacy_engine = PrivacyEngine( self.model, batch_size=self.bs, sample_size=self.num_train_samples, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=self.sigma, max_grad_norm=self.C) privacy_engine.attach(optimizer) if self.device == 'cuda': self.model.to('cuda') self.model.train() for _ in range(self.epochs): num_batches = len(self.train_loader) start, end = 0, num_batches if self.fed_avg: start, end = self.random_idx, self.random_idx + 1 self.random_idx += 1 if self.random_idx >= num_batches: self.random_idx = 0 with torch.set_grad_enabled(True): for batch_idx, (data, target) in enumerate(self.train_loader): if start <= batch_idx < end: if self.device == 'cuda': data, target = data.to('cuda'), target.to('cuda') optimizer.zero_grad() output = self.model(data) loss = loss_func(output, target) loss.backward() optimizer.step() self.logs['train_loss'].append(copy.deepcopy(loss.item())) scheduler.step() self.lr = get_lr(optimizer) if self.fl_train is False: curr_acc = eval(self.model, self.test_loader, self.device) self.logs['val_acc'].append(copy.deepcopy(curr_acc))
def test_model_validator(self): """ Test that the privacy engine throws on attach if there are unsupported modules """ privacy_engine = PrivacyEngine( models.resnet18(), batch_size=self.BATCH_SIZE, sample_size=self.DATA_SIZE, alphas=self.ALPHAS, noise_multiplier=1.3, max_grad_norm=1, ) with self.assertRaises(IncompatibleModuleException): privacy_engine.attach(self.private_optimizer)
def train(self): """ train/update the curr model of the agent """ #optimizer = optim.Adadelta(self.model.parameters(), lr=self.lr) optimizer = optim.Adam(self.model.parameters(), lr=1e-3) #scheduler = StepLR(optimizer, step_size=1, gamma=self.gamma) if self.dp: self.model.zero_grad() optimizer.zero_grad() clear_backprops(self.model) privacy_engine = PrivacyEngine( self.model, batch_size=self.bs, sample_size=self.num_train_samples, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=self.sigma, max_grad_norm=self.C) privacy_engine.attach(optimizer) if self.device == 'cuda': self.model.to('cuda') self.model.train() for _ in range(self.epochs): num_batches = len(self.train_loader) default_list = list(range(num_batches)) if self.fed_avg: default_list = np.random.choice(default_list, 1, replace=False) for batch_idx, (data, target) in enumerate(self.train_loader): if batch_idx in default_list: if self.device == 'cuda': data, target = data.to('cuda'), target.to('cuda') optimizer.zero_grad() output = self.model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() self.logs['train_loss'].append(copy.deepcopy(loss.item())) #scheduler.step() self.lr = get_lr(optimizer) if self.fl_train is False: curr_acc = eval(self.model, self.test_loader, self.device) self.logs['val_acc'].append(copy.deepcopy(curr_acc))
def test_privacy_engine_class_example(self): # IMPORTANT: When changing this code you also need to update # the docstring for torchdp.privacy_engine.PrivacyEngine batch_size = 8 sample_size = 64 model = torch.nn.Linear(16, 32) # An example model optimizer = torch.optim.SGD(model.parameters(), lr=0.05) privacy_engine = PrivacyEngine( model, batch_size, sample_size, alphas=range(2, 32), noise_multiplier=1.3, max_grad_norm=1.0, ) privacy_engine.attach( optimizer) # That's it! Now it's business as usual.
def train_model(model, dataloader, lr, epoch_num, dldp_setting=(0.0, 5.0), verbose=True, testloader=None): optimizer = torch.optim.Adam(model.parameters(), lr=lr) if dldp_setting[0] != 0: from torchdp import PrivacyEngine privacy_engine = PrivacyEngine(model, dataloader, alphas=0.0, noise_multiplier=dldp_setting[0], max_grad_norm=dldp_setting[1]) privacy_engine.attach(optimizer) for epoch in range(epoch_num): cum_loss = 0.0 cum_acc = 0.0 cum_pred = [] cum_lab = [] tot = 0.0 for i, (x_in, y_in) in enumerate(dataloader): B = x_in.size()[0] pred = model(x_in).squeeze(1) loss = model.loss(pred, y_in) model.zero_grad() loss.backward() optimizer.step() cum_loss += loss.item() * B cum_acc += ((pred > 0).cpu().long().eq(y_in)).sum().item() cum_pred = cum_pred + list(pred.detach().cpu().numpy()) cum_lab = cum_lab + list(y_in.numpy()) tot = tot + B if verbose: print("Epoch %d, loss = %.4f, acc = %.4f, auc = %.4f" % (epoch, cum_loss / tot, cum_acc / tot, roc_auc_score(cum_lab, cum_pred))) if testloader is not None: print(eval_binary_model(model, testloader)) model.train()
def setUpOptimizer(self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False): # sample parameter values optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) optimizer.zero_grad() if privacy_engine: pe = PrivacyEngine( model, batch_size=data_loader.batch_size, sample_size=len(data_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=1.3, max_grad_norm=1, ) pe.attach(optimizer) return optimizer
def setUpOptimizer( self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False ): # sample parameter values optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) optimizer.zero_grad() if privacy_engine: pe = PrivacyEngine( model, # pyre-fixme[6]: Expected `int` for 2nd param but got `Optional[int]`. batch_size=data_loader.batch_size, # pyre-fixme[6]: Expected `Sized` for 1st param but got # `Dataset[typing.Any]`. sample_size=len(data_loader.dataset), # pyre-fixme[6]: `+` is not supported for operand types # `List[float]` and `List[int]`. alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=1.3, max_grad_norm=1, ) pe.attach(optimizer) return optimizer
def setUp_init_model(self, private=False, state_dict=None, model=None, **privacy_engine_kwargs): model = model or SampleConvNet() optimizer = torch.optim.SGD(model.parameters(), lr=self.LR, momentum=0) if state_dict: model.load_state_dict(state_dict) if private: if len(privacy_engine_kwargs) == 0: privacy_engine_kwargs = self.privacy_default_params privacy_engine = PrivacyEngine( model, batch_size=self.BATCH_SIZE, sample_size=self.DATA_SIZE, alphas=self.ALPHAS, **privacy_engine_kwargs, ) privacy_engine.attach(optimizer) return model, optimizer
def test_privacy_engine_virtual_step_example(self): # IMPORTANT: When changing this code you also need to update # the docstring for torchdp.privacy_engine.PrivacyEngine.virtual_step() model = nn.Linear(16, 2) dataloader = [] batch_size = 64 sample_size = 256 for _ in range(64): data = torch.randn(4, 16) labels = torch.randint(0, 2, (4, )) dataloader.append((data, labels)) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.05) privacy_engine = PrivacyEngine( model, batch_size, sample_size, alphas=range(5, 64), noise_multiplier=0.8, max_grad_norm=0.5, ) privacy_engine.attach(optimizer) for i, (X, y) in enumerate(dataloader): logits = model(X) loss = criterion(logits, y) loss.backward() if i % 16 == 15: optimizer.step() # this will call privacy engine's step() optimizer.zero_grad() else: optimizer.virtual_step( ) # this will call privacy engine's virtual_step()
def main(): parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training") parser.add_argument( "-j", "--workers", default=2, type=int, metavar="N", help="number of data loading workers (default: 2)", ) parser.add_argument( "--epochs", default=90, type=int, metavar="N", help="number of total epochs to run", ) parser.add_argument( "--start-epoch", default=1, type=int, metavar="N", help="manual epoch number (useful on restarts)", ) parser.add_argument( "-b", "--batch-size", default=256, type=int, metavar="N", help="mini-batch size (default: 256), this is the total " "batch size of all GPUs on the current node when " "using Data Parallel or Distributed Data Parallel", ) parser.add_argument( "-na", "--n_accumulation_steps", default=1, type=int, metavar="N", help="number of mini-batches to accumulate into an effective batch", ) parser.add_argument( "--lr", "--learning-rate", default=0.001, type=float, metavar="LR", help="initial learning rate", dest="lr", ) parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="SGD momentum") parser.add_argument( "--wd", "--weight-decay", default=5e-4, type=float, metavar="W", help="SGD weight decay (default: 1e-4)", dest="weight_decay", ) parser.add_argument( "-p", "--print-freq", default=10, type=int, metavar="N", help="print frequency (default: 10)", ) parser.add_argument( "--resume", default="", type=str, metavar="PATH", help="path to latest checkpoint (default: none)", ) parser.add_argument( "-e", "--evaluate", dest="evaluate", action="store_true", help="evaluate model on validation set", ) parser.add_argument("--seed", default=None, type=int, help="seed for initializing training. ") parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--checkpoint-file", type=str, default="checkpoint", help="path to save check points", ) parser.add_argument( "--data-root", type=str, default="../cifar10", help="Where CIFAR10 is/will be stored", ) parser.add_argument("--log-dir", type=str, default="", help="Where Tensorboard log will be stored") parser.add_argument( "--optim", type=str, default="Adam", help="Optimizer to use (Adam, RMSprop, SGD)", ) args = parser.parse_args() # The following few lines, enable stats gathering about the run # 1. where the stats should be logged stats.set_global_summary_writer( tensorboard.SummaryWriter(os.path.join("/tmp/stat", args.log_dir))) # 2. enable stats stats.add( # stats about gradient norms aggregated for all layers stats.Stat(stats.StatType.CLIPPING, "AllLayers", frequency=0.1), # stats about gradient norms per layer stats.Stat(stats.StatType.CLIPPING, "PerLayer", frequency=0.1), # stats about clipping stats.Stat(stats.StatType.CLIPPING, "ClippingStats", frequency=0.1), # stats on training accuracy stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01), # stats on validation accuracy stats.Stat(stats.StatType.TEST, "accuracy"), ) # The following lines enable stat gathering for the clipping process # and set a default of per layer clipping for the Privacy Engine clipping = {"clip_per_layer": False, "enable_stat": True} augmentations = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] normalize = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] train_transform = transforms.Compose( augmentations + normalize if args.disable_dp else normalize) test_transform = transforms.Compose(normalize) train_dataset = CIFAR10(root=args.data_root, train=True, download=True, transform=train_transform) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True, ) test_dataset = CIFAR10(root=args.data_root, train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, ) best_acc1 = 0 device = torch.device(args.device) model = utils.convert_batchnorm_modules(models.resnet18(num_classes=10)) model = model.to(device) if args.optim == "SGD": optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optim == "RMSprop": optimizer = optim.RMSprop(model.parameters(), lr=args.lr) elif args.optim == "Adam": optimizer = optim.Adam(model.parameters(), lr=args.lr) else: raise NotImplementedError( "Optimizer not recognized. Please check spelling") if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size * args.n_accumulation_steps, sample_size=len(train_dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, **clipping, ) privacy_engine.attach(optimizer) for epoch in range(args.start_epoch, args.epochs + 1): train(args, model, train_loader, optimizer, epoch, device) top1_acc = test(args, model, test_loader, device) # remember best acc@1 and save checkpoint is_best = top1_acc > best_acc1 best_acc1 = max(top1_acc, best_acc1) save_checkpoint( { "epoch": epoch + 1, "arch": "ResNet18", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", )
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=.1, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and only train with vanilla SGD", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) args = parser.parse_args() device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} # Create directory for model files os.system("mkdir -p models") train_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.batch_size, shuffle=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] for _ in range(args.n_runs): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) run_results.append(test(args, model, device, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100)) if args.save_model: # File to export results file = ( f"{model.name()}_lr{args.lr}_nm{args.sigma}_" f"cl{args.max_per_sample_grad_norm}_bs{args.batch_size}_ep{args.epochs}_" ) file += "noDP_" if args.disable_dp else "DP_" file += str(datetime.datetime.today()).replace(' ', '_') torch.save(run_results, f"models/{file}.pt")
def train(architecture='softmax'): n = nn.Sequential( nn.Flatten(), nn.Linear(in_features=112 * 92, out_features=40), ) if architecture == 'softmax' else nn.Sequential( nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5, padding=2, stride=1), nn.Flatten(), nn.Linear(in_features=112 * 92, out_features=40), ) if architecture == 'conv 1 channel' else nn.Sequential( nn.Conv2d(in_channels=1, out_channels=3, kernel_size=5, padding=2, stride=1), nn.Flatten(), nn.Linear(in_features=112 * 92 * 3, out_features=40), ) if architecture == 'conv 3 channel' else nn.Sequential( nn.Flatten(), nn.Linear(in_features=112 * 92, out_features=1500), nn.ReLU(), nn.Linear(in_features=1500, out_features=40), ) lr = 0.01 optimizer = torch.optim.Adam(n.parameters(), lr=lr) train_features = torch.load(join(os.curdir, dirname(__file__), f'train_features{os.extsep}pt')).float() train_labels = torch.load(join(os.curdir, dirname(__file__), f'train_labels{os.extsep}pt')).long() test_features = torch.load(join(os.curdir, dirname(__file__), f'test_features{os.extsep}pt')).float() test_labels = torch.load(join(os.curdir, dirname(__file__), f'test_labels{os.extsep}pt')).long() if len(sys.argv) > 1: privacy_engine = PrivacyEngine( n, batch_size=train_labels.shape[0], sample_size=train_labels.shape[0], alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=float(sys.argv[1]), max_grad_norm=1.5, ) privacy_engine.attach(optimizer) train_losses = [] test_losses = [] train_accuracy = [] test_accuracy = [] print(f'Train Network {architecture} with learning rate {lr}' + (f' and sigma {float(sys.argv[1])}' if len(sys.argv) > 1 else '')) num_epochs = 101 with tqdm(total=num_epochs, dynamic_ncols=True) as pbar: for i in range(num_epochs): pred_train_labels = n(train_features) loss = F.cross_entropy(pred_train_labels, train_labels) train_losses.append(loss.item()) train_accuracy.append((pred_train_labels.max(axis=1).indices == train_labels).sum().item() / len(train_labels)) optimizer.zero_grad() loss.backward() optimizer.step() if i % 5 == 0: n.eval() with torch.no_grad(): pred_test_labels = n(test_features) loss = F.cross_entropy(pred_test_labels, test_labels) test_losses.append((i, loss.item())) test_accuracy.append((i, (pred_test_labels.max(axis=1).indices == test_labels).sum().item() / len(test_labels))) n.train() if len(sys.argv) > 1: delta = 1e-5 epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(delta) pbar.set_description(f'Loss = {np.mean(train_losses):.4f}, ε = {epsilon:.2f}') pbar.update(1) with torch.no_grad(): n.eval() print(f'Train performance: {(n(train_features).max(axis=1).indices == train_labels).sum().item() / len(train_labels) * 100:.2f}%') print(f'Test performance: {(n(test_features).max(axis=1).indices == test_labels).sum().item() / len(test_labels) * 100:.2f}%') plt.plot(range(len(train_losses)), train_losses, label='Train loss') plt.plot([t[0] for t in test_losses], [t[1] for t in test_losses], label='Validation loss') plt.legend() plt.title('Loss of training and validation') plt.show() plt.plot(range(len(train_accuracy)), train_accuracy, label='Train accuracy') plt.plot([t[0] for t in test_accuracy], [t[1] for t in test_accuracy], label='Validation accuracy') plt.legend() plt.title('Accuracy of training and validation') plt.show() model_invert(1, 200, 0.01, n)
def experience_mnist(config, path, param): print("START MNIST") use_cuda = config.general.use_cuda and torch.cuda.is_available() torch.manual_seed(config.general.seed) device = torch.device("cuda" if use_cuda else "cpu") print("START TRAINING TARGET MODEL") data_train_target = custum_MNIST(True, 0, config, '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) data_test_target = custum_MNIST(True, 0, config, '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) criterion = nn.CrossEntropyLoss() train_loader_target = torch.utils.data.DataLoader( data_train_target, batch_size=config.learning.batch_size, shuffle=True) test_loader_target = torch.utils.data.DataLoader( data_test_target, batch_size=config.learning.batch_size, shuffle=True) dataloaders_target = { "train": train_loader_target, "val": test_loader_target } dataset_sizes_target = { "train": len(data_train_target), "val": len(data_test_target) } print("TAILLE dataset", dataset_sizes_target) model_target = Net_mnist().to(device) optimizer = optim.SGD(model_target.parameters(), lr=config.learning.learning_rate, momentum=config.learning.momentum) # Add DP noise! privacy_engine = PrivacyEngine( model_target, batch_size=config.learning.batch_size, sample_size=len(train_loader_target.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=1.0, # sigma max_grad_norm=1.0, # Clip per-sample gradients to this norm ) privacy_engine.attach(optimizer) exp_lr_scheduler = lr_scheduler.StepLR( optimizer, step_size=config.learning.decrease_lr_factor, gamma=config.learning.decrease_lr_every) model_target, best_acc_target, data_test_set, label_test_set, class_test_set = train_model( model_target, criterion, optimizer, exp_lr_scheduler, dataloaders_target, dataset_sizes_target, num_epochs=config.learning.epochs) np.save(path + "/res_train_target_" + str(param) + ".npy", best_acc_target) print("START TRAINING SHADOW MODEL") all_shadow_models = [] all_dataloaders_shadow = [] data_train_set = [] label_train_set = [] class_train_set = [] for num_model_sahdow in range(config.general.number_shadow_model): criterion = nn.CrossEntropyLoss() data_train_shadow = custum_MNIST(False, num_model_sahdow, config, '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) data_test_shadow = custum_MNIST(False, num_model_sahdow, config, '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) train_loader_shadow = torch.utils.data.DataLoader( data_train_shadow, batch_size=config.learning.batch_size, shuffle=True) test_loader_shadow = torch.utils.data.DataLoader( data_test_shadow, batch_size=config.learning.batch_size, shuffle=True) dataloaders_shadow = { "train": train_loader_shadow, "val": test_loader_shadow } dataset_sizes_shadow = { "train": len(data_train_shadow), "val": len(data_test_shadow) } print("TAILLE dataset", dataset_sizes_shadow) model_shadow = Net_mnist().to(device) optimizer = optim.SGD(model_shadow.parameters(), lr=config.learning.learning_rate, momentum=config.learning.momentum) exp_lr_scheduler = lr_scheduler.StepLR( optimizer, step_size=config.learning.decrease_lr_factor, gamma=config.learning.decrease_lr_every) model_shadow, best_acc_sh, data_train_set_unit, label_train_set_unit, class_train_set_unit = train_model( model_shadow, criterion, optimizer, exp_lr_scheduler, dataloaders_target, dataset_sizes_target, num_epochs=config.learning.epochs) data_train_set.append(data_train_set_unit) label_train_set.append(label_train_set_unit) class_train_set.append(class_train_set_unit) np.save( path + "/res_train_shadow_" + str(num_model_sahdow) + "_" + str(param) + ".npy", best_acc_sh) all_shadow_models.append(model_shadow) all_dataloaders_shadow.append(dataloaders_shadow) print("START GETTING DATASET ATTACK MODEL") data_train_set = np.concatenate(data_train_set) label_train_set = np.concatenate(label_train_set) class_train_set = np.concatenate(class_train_set) #data_test_set, label_test_set, class_test_set = get_data_for_final_eval([model_target], [dataloaders_target], device) #data_train_set, label_train_set, class_train_set = get_data_for_final_eval(all_shadow_models, all_dataloaders_shadow, device) data_train_set, label_train_set, class_train_set = shuffle( data_train_set, label_train_set, class_train_set, random_state=config.general.seed) data_test_set, label_test_set, class_test_set = shuffle( data_test_set, label_test_set, class_test_set, random_state=config.general.seed) print("Taille dataset train", len(label_train_set)) print("Taille dataset test", len(label_test_set)) print("START FITTING ATTACK MODEL") model = lgb.LGBMClassifier(objective='binary', reg_lambda=config.learning.ml.reg_lambd, n_estimators=config.learning.ml.n_estimators) model.fit(data_train_set, label_train_set) y_pred_lgbm = model.predict(data_test_set) precision_general, recall_general, _, _ = precision_recall_fscore_support( y_pred=y_pred_lgbm, y_true=label_test_set, average="macro") accuracy_general = accuracy_score(y_true=label_test_set, y_pred=y_pred_lgbm) precision_per_class, recall_per_class, accuracy_per_class = [], [], [] for idx_class, classe in enumerate(data_train_target.classes): all_index_class = np.where(class_test_set == idx_class) precision, recall, _, _ = precision_recall_fscore_support( y_pred=y_pred_lgbm[all_index_class], y_true=label_test_set[all_index_class], average="macro") accuracy = accuracy_score(y_true=label_test_set[all_index_class], y_pred=y_pred_lgbm[all_index_class]) precision_per_class.append(precision) recall_per_class.append(recall) accuracy_per_class.append(accuracy) print("END MNIST") return (precision_general, recall_general, accuracy_general, precision_per_class, recall_per_class, accuracy_per_class)
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=.1, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default='cuda' if torch.cuda.is_available() else 'cpu', help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=True, help="Save the trained model", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) parser.add_argument( "--seed", type=int, default=0, help="Random seed for deterministic runs", ) args = parser.parse_args() print(dumps(vars(args), indent=4, sort_keys=True)) device = torch.device(args.device) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) kwargs = {"num_workers": 1, "pin_memory": True} train_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.batch_size, shuffle=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] for _ in range(args.n_runs): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) run_results.append(test(args, model, device, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100)) repro_str = ( f"{model.name()}_{args.lr}_{args.sigma}_" f"{args.max_per_sample_grad_norm}_{args.batch_size}_{args.epochs}") torch.save(run_results, f"run_results_{repro_str}.pt") if args.save_model: torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group( backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank, ) # create model: resnet 18 # since our differential privacy engine does not support BatchNormXd # we need to replace all such blocks with DP-aware normalisation modules model = utils.convert_batchnorm_modules(models.resnet18(num_classes=10)) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = "cuda:{}".format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint["epoch"] best_acc1 = checkpoint["best_acc1"] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint["epoch"])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, "train") valdir = os.path.join(args.data, "val") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), ) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, ) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]), ), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, ) if not args.disable_dp: print("PRIVACY ENGINE ON") privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, **clipping, ) privacy_engine.attach(optimizer) else: print("PRIVACY ENGINE OFF") if args.evaluate: validate(val_loader, model, criterion, args) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { "epoch": epoch + 1, "arch": "SampleConvNet", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", )
def train(self, data): if isinstance(data, pd.DataFrame): for col in data.columns: data[col] = pd.to_numeric(data[col], errors='ignore') self.pd_cols = data.columns self.pd_index = data.pd_index data = data.to_numpy() elif not isinstance(data, np.ndarray): raise ValueError("Data must be a numpy array or pandas dataframe") dataset = TensorDataset( torch.from_numpy(data.astype('float32')).to(self.device)) dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, drop_last=True) self.generator = Generator(self.latent_dim, data.shape[1], binary=self.binary).to(self.device) discriminator = Discriminator(data.shape[1]).to(self.device) optimizer_d = optim.Adam(discriminator.parameters(), lr=4e-4) privacy_engine = PrivacyEngine( discriminator, batch_size=self.batch_size, sample_size=len(data), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=3.5, max_grad_norm=1.0, clip_per_layer=True) privacy_engine.attach(optimizer_d) optimizer_g = optim.Adam(self.generator.parameters(), lr=1e-4) criterion = nn.BCELoss() for epoch in range(self.epochs): for i, data in enumerate(dataloader): discriminator.zero_grad() real_data = data[0].to(self.device) # train with fake data noise = torch.randn(self.batch_size, self.latent_dim, 1, 1, device=self.device) noise = noise.view(-1, self.latent_dim) fake_data = self.generator(noise) label_fake = torch.full((self.batch_size, ), 0, device=self.device) output = discriminator(fake_data.detach()) loss_d_fake = criterion(output, label_fake) loss_d_fake.backward() optimizer_d.step() # train with real data label_true = torch.full((self.batch_size, ), 1, device=self.device) output = discriminator(real_data.float()) loss_d_real = criterion(output, label_true) loss_d_real.backward() optimizer_d.step() loss_d = loss_d_real + loss_d_fake max_grad_norm = [] for p in discriminator.parameters(): param_norm = p.grad.data.norm(2).item() max_grad_norm.append(param_norm) privacy_engine.max_grad_norm = max_grad_norm # train generator self.generator.zero_grad() label_g = torch.full((self.batch_size, ), 1, device=self.device) output_g = discriminator(fake_data) loss_g = criterion(output_g, label_g) loss_g.backward() optimizer_g.step() # manually clear gradients for p in discriminator.parameters(): if hasattr(p, "grad_sample"): del p.grad_sample if self.delta is None: self.delta = 1 / data.shape[0] eps, best_alpha = optimizer_d.privacy_engine.get_privacy_spent( self.delta) if self.epsilon < eps: break
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch Adult Example") parser.add_argument( "-b", "--batch-size", type=int, default=256, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=0.15, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=0.55, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) args = parser.parse_args() device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} """Loads ADULT a2a as in LIBSVM and preprocesses to combine training and validation data.""" # https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html x = pd.read_csv('adult.csv') trainData, testData = train_test_split(x, test_size=0.1, random_state=218) # have to reset index, see https://discuss.pytorch.org/t/keyerror-when-enumerating-over-dataloader/54210/13 trainData = trainData.reset_index() testData = testData.reset_index() train_data = trainData.iloc[:, 1:-1].astype('float32') test_data = testData.iloc[:, 1:-1].astype('float32') train_labels = (trainData.iloc[:, -1] == 1).astype('int32') test_labels = (testData.iloc[:, -1] == 1).astype('int32') train_loader = torch.utils.data.DataLoader(AdultDataset( train_data, train_labels), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(AdultDataset( test_data, test_labels), batch_size=args.test_batch_size, shuffle=True, **kwargs) run_results = [] for _ in range(args.n_runs): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(np.arange(12, 60, 0.1)) + list(np.arange(61, 100, 1)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) test(args, model, device, test_loader) run_results.append(test(args, model, device, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100))
def main(): args = parser.parse_args() device = torch.device(args.device) all_filenames = glob.glob(args.training_path) all_letters = string.ascii_letters + " .,;'#" n_letters = len(all_letters) category_lines, all_categories, n_categories = build_category_lines( all_filenames, all_letters ) category_lines_train, category_lines_val = split_data_train_eval( category_lines, args.train_eval_split ) rnn = CharNNClassifier( n_letters, args.n_hidden, n_categories, n_letters, args.batch_size ).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(rnn.parameters(), lr=args.learning_rate) if not args.disable_dp: privacy_engine = PrivacyEngine( rnn, batch_size=args.batch_size, sample_size=get_dataset_size(category_lines_train), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, batch_first=False, ) privacy_engine.attach(optimizer) # Measure time elapsed for profiling training def time_since(since): now = time.time() s = now - since m = math.floor(s / 60) s -= m * 60 return "%dm %ds" % (m, s) # Keep track of losses for tracking current_loss = 0 start_time = time.time() for iteration in tqdm(range(1, args.iterations + 1)): # Get a random training input and target batch _, _, category_tensors, line_tensors = get_random_batch( category_lines_train, args.batch_size, all_categories, all_letters, n_letters, args, device, ) output, loss = train( rnn, criterion, optimizer, category_tensors, line_tensors, device ) current_loss += loss # Print iteration number, loss, name and guess if iteration % print_every == 0: acc = get_eval_metrics( rnn, category_lines_val, all_categories, all_letters, n_letters, args.batch_size, args.max_seq_length, device, ) time_elapsed = time_since(start_time) if not args.disable_dp: epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent( args.delta ) print( f"Iteration={iteration} / Time elapsed: {time_elapsed} / Loss={loss:.4f} / " f"Eval Accuracy:{acc*100:.2f} / " f"Ɛ = {epsilon:.2f}, 𝛿 = {args.delta:.2f}) for α = {best_alpha:.2f}" ) else: print( f"Iteration={iteration} / Time elapsed: {time_elapsed} / Loss={loss:.4f} / " f"Eval Accuracy:{acc*100:.2f}" )
def main(): parser = argparse.ArgumentParser(description="PyTorch IMDB Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)", ) parser.add_argument( "--lr", type=float, default=0.02, metavar="LR", help="learning rate (default: .02)", ) parser.add_argument( "--sigma", type=float, default=0.56, metavar="S", help="Noise multiplier (default 0.56)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--vocab-size", type=int, default=10_000, metavar="MV", help="Max vocab size (default: 10000)", ) parser.add_argument( "--sequence-length", type=int, default=256, metavar="SL", help="Longer sequences will be cut to this length, shorter sequences will be padded to this length (default: 256)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla optimizer", ) parser.add_argument( "--data-root", type=str, default="../imdb", help="Where IMDB is/will be stored", ) args = parser.parse_args() device = torch.device(args.device) text_field = torchtext.data.Field( tokenize=get_tokenizer("basic_english"), init_token="<sos>", eos_token="<eos>", fix_length=args.sequence_length, lower=True, ) label_field = torchtext.data.LabelField(dtype=torch.long) train_data, test_data = torchtext.datasets.imdb.IMDB.splits( text_field, label_field, root=args.data_root ) text_field.build_vocab(train_data, max_size=args.vocab_size) label_field.build_vocab(train_data) (train_iterator, test_iterator) = torchtext.data.BucketIterator.splits( (train_data, test_data), batch_size=args.batch_size, device=device ) model = SampleNet(vocab_size=args.vocab_size).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_data), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, train_iterator, optimizer, epoch) evaluate(args, model, test_iterator)
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=14, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="For Saving the current Model", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) args = parser.parse_args() device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} train_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ), batch_size=args.batch_size, shuffle=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] for _run in range(1, args.n_runs + 1): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, train_loader, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) run_results.append(test(args, model, device, test_loader)) run_results = torch.Tensor(run_results) print( f"Accuracy: {torch.mean(run_results).item()} ± {torch.std(run_results).item()}" ) repro_str = ( f"{model.name()}_{args.lr}_{args.sigma}_" f"{args.max_per_sample_grad_norm}_{args.batch_size}_{args.epochs}" ) torch.save(run_results, f"run_results_{repro_str}.pt") if args.save_model: torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
def _fit_model( self, ind, samples, labels, weights, batch_size, num_epochs, learning_rate, max_grad_norm, noise_multiplier, delta, ): dataset = TensorDataset( torch.from_numpy(samples).float(), torch.from_numpy(labels)) kwargs = {"num_workers": 1, "pin_memory": True} if HAS_CUDA else {} if type(weights).__name__ == "ndarray": sampler = WeightedRandomSampler(weights=weights, num_samples=len(samples)) loader = DataLoader(dataset=dataset, batch_size=batch_size, sampler=sampler, **kwargs) else: loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, **kwargs) model = self.models[ind] optimizer = optim.Adam(model.parameters(), lr=learning_rate) privacy_engine = PrivacyEngine( model, batch_size, len(dataset), alphas=[1, 10, 100], noise_multiplier=noise_multiplier, max_grad_norm=max_grad_norm, target_delta=delta, loss_reduction='sum', ) privacy_engine.attach(optimizer) for epoch in range(1, num_epochs + 1): model.train() train_loss = 0 for batch_ind, (data, cond) in enumerate(loader): data, cond = data.to(DEVICE), self._one_hot(cond).to(DEVICE) optimizer.zero_grad() recon_batch, mu, log_var = model(data, cond) loss = self._loss_function(recon_batch, data, mu, log_var) loss.backward() train_loss += loss.item() optimizer.step() if batch_ind % 100 == 0: print("epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch, batch_ind * len(data), len(loader.dataset), 100.0 * batch_ind / len(loader), loss.item() / len(data), )) print("====> epoch: {} avg loss: {:.4f}".format( epoch, train_loss / len(loader.dataset)))
def main(): # ===== DETECT CUDA IF AVAILABLE ===== device_name = "cuda" if torch.cuda.is_available() else "cpu" device = torch.device(device_name) print("Running on", device_name.upper()) # ===== LOAD DATA ===== # PIL [0, 1] images to [-1, 1] Tensors transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=4) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=4) args.sample_size = len(trainloader.dataset) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # ===== BUILD NET MODEL ===== # REM: to restart from a saved model #net = Net() # Or another choice, then #net.load_state_dict(torch.load(PATH)) # ********************************************************* # 0: home-made net # 1: not pre-trained VGG16 # 2: pre-trained VGG16 # 3: (not frozen) pre-trained VGG16 + fully connected layer # 4: frozen pre-trained VGG16 + fully connected layer # ********************************************************* if args.mode == 0: # Home made, local definition : Net or Net2 net = Net() optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9) elif args.mode in [1, 2]: net = vgg16(pretrained=(args.mode == 2)) # Adapt output to 10 classes input_features = net.classifier[6].in_features net.classifier[6] = nn.Linear(input_features, 10) optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9) else: # Pre-trained net = vgg16(pretrained=True) if args.mode == 4: # Freeze existing model parameters for training # (or juste first convolutional layers != "classifier") #for param in net.parameters(): # param.requires_grad = False for name, param in net.named_parameters(): if name[:10] != "classifier": param.requires_grad = False if args.mode > 2: # Add some neww layers to train # Verification (before) # last_child = list(net.children())[-1] # print("\tLAST CHILD:", last_child) # Just adapt to 10 categories #input_features = last_child[0].in_features input_features = net.classifier[6].in_features net.classifier[6] = nn.Linear(input_features, 10) dp_mod = net.classifier if args.mode == 4 else net optimizer = optim.SGD(dp_mod.parameters(), lr=args.learning_rate, momentum=0.9) # Verification # last_child = list(net.children())[-1] # print("\tLAST CHILD:", last_child) # else: # args.mode == 2 # # Adapt output to 10 classes # input_features = net.classifier[6].in_features # net.classifier[6] = nn.Linear(input_features, 10) # optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9) net.to(device) # ===== DP =============================================== dp_mod = net.classifier if args.mode == 4 else net if not args.disable_dp: privacy_engine = PrivacyEngine( dp_mod, batch_size=args.batch_size, sample_size=args.sample_size, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.noise, max_grad_norm=args.clip, ) privacy_engine.attach(optimizer) # ======================================================== criterion = nn.CrossEntropyLoss() # Structure of network # param_show(net) # ===== TRAIN MODEL ===== print( f"Dataset and network are ready (mode {args.mode}), let's train our model " f"(×{args.epochs} epoch" + ("s" if args.epochs > 1 else "") + ")...") # (Just use `testloader=None` to avoid tests after each epoch) accur = train(net, optimizer, criterion, trainloader, args.epochs, device, save=False, testloader=testloader, args=args) #===== TEST MODEL ===== # # Already done during training (except details) # acc, categ_acc = test(net, testloader, categories=args.categories, device=device) # accur.append(acc) # print(f'Accuracy of the network on the 10000 test images: {acc:.2f} %') # if args.categories: # for i in range(10): # print(f'Accuracy of {classes[i]:5s} : {categ_acc[i]:.2f} %') print(f'size={args.sample_size}, ' f'bs={args.batch_size}, ' f'nm={args.noise}, ' f'ep={args.epochs}, ' f'd={args.delta}, ' f'cl={args.clip}, ' f'lr={args.learning_rate}, ' f'hu={args.hidden_units}, ' f'M={args.mode}\n' f'acc={accur}')
REAL_LABEL = 1 FAKE_LABEL = 0 # setup optimizer optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) privacy_engine = PrivacyEngine( netD, batch_size=opt.batch_size, sample_size=len(dataloader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=opt.sigma, max_grad_norm=opt.max_per_sample_grad_norm ) if not opt.disable_dp: privacy_engine.attach(optimizerD) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) for epoch in range(opt.epochs): for i, data in enumerate(dataloader, 0): ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### netD.zero_grad() real_data = data[0].to(device) batch_size = real_data.size(0) # train with fake noise = torch.randn(batch_size, nz, 1, 1, device=device) fake = netG(noise)
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=3, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=.1, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) parser.add_argument( "--save_e", type=str, default="epsilon.png", help="Path of output chart", ) parser.add_argument( "--save_l", type=str, default="loss.png", help="Path of output chart", ) args = parser.parse_args() #fo = open(parser.savefile, 'w') device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} train_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.batch_size, shuffle=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] lr_list = [0.25, 0.25, 0.25, 0.15, 0.25, 0.25] sigma_list = [1.3, 1.1, 0.7, 1.1, 1.0, 1.1] c_list = [1.5, 1.0, 1.5, 1.0, 1.0, 1.5] #plt.figure() #ax1 = plt.subplot(211) #ax2 = plt.subplot(212) fig1 = plt.figure() fig2 = plt.figure() ax1 = fig1.add_subplot(111) ax1.set_title('Epsilon over epochs') ax2 = fig2.add_subplot(111) ax2.set_title('Loss over epochs') for _ in range(len(lr_list)): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=lr_list[_], momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=sigma_list[_], max_grad_norm=c_list[_], ) privacy_engine.attach(optimizer) loss_list, epsilon_list = [], [] for epoch in range(1, args.epochs + 1): l, e = train(args, model, device, train_loader, optimizer, epoch) loss_list.append(l) epsilon_list.append(e) color = np.random.rand(3, ) #plt.sca(ax1) ax1.plot(epsilon_list, c=color, label="lr={:.2f} σ={:.1f} c={:.1f}".format( lr_list[_], sigma_list[_], c_list[_])) #plt.sca(ax2) ax2.plot(loss_list, c=color, label="lr={:.2f} σ={:.1f} c={:.1f}".format( lr_list[_], sigma_list[_], c_list[_])) run_results.append(test(args, model, device, test_loader)) ax1.legend() fig1.savefig(args.save_e) ax2.legend() fig2.savefig(args.save_l) ''' plt.sca(ax1) plt.title('Epsilon over epochs') plt.legend() plt.sca(ax2) plt.title('Loss over epochs') plt.legend() plt.savefig(args.savefile) ''' ''' if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100 ) ) ''' repro_str = ( f"{model.name()}_{args.lr}_{args.sigma}_" f"{args.max_per_sample_grad_norm}_{args.batch_size}_{args.epochs}") torch.save(run_results, f"run_results_{repro_str}.pt") if args.save_model: torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
class GradientAccumulation_test(unittest.TestCase): def setUp(self): self.DATA_SIZE = 64 self.BATCH_SIZE = 16 self.LR = 0 # we want to call optimizer.step() without modifying the model self.ALPHAS = [1 + x / 10.0 for x in range(1, 100, 10)] self.criterion = nn.CrossEntropyLoss() self.setUp_data() self.setUp_model_and_optimizer() def setUp_data(self): self.ds = FakeData( size=self.DATA_SIZE, image_size=(1, 35, 35), num_classes=10, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ) self.dl = DataLoader(self.ds, batch_size=self.BATCH_SIZE) def setUp_model_and_optimizer(self): self.model = SampleConvNet() self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.LR, momentum=0) self.optimizer.zero_grad() # accumulate .grad over the entire dataset for x, y in self.dl: logits = self.model(x) loss = self.criterion(logits, y) loss.backward() self.effective_batch_grad = torch.cat([ p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad ]) * (self.BATCH_SIZE / self.DATA_SIZE) self.optimizer.zero_grad() def setUp_privacy_engine(self, batch_size): self.privacy_engine = PrivacyEngine( self.model, batch_size=batch_size, sample_size=self.DATA_SIZE, alphas=self.ALPHAS, noise_multiplier=0, max_grad_norm=999, ) self.privacy_engine.attach(self.optimizer) def calc_per_sample_grads(self, data_iter, num_steps=1): for x, y in data_iter: num_steps -= 1 logits = self.model(x) loss = self.criterion(logits, y) loss.backward() if num_steps == 0: break def test_grad_sample_accumulation(self): """ Calling loss.backward() multiple times should sum up the gradients in .grad and accumulate all the individual gradients in .grad-sample """ self.setUp_privacy_engine(self.DATA_SIZE) data_iter = iter(self.dl) # 4 batches of size 4 each self.calc_per_sample_grads(data_iter, num_steps=4) # should accumulate grads in .grad and .grad_sample # the accumulated per-sample gradients per_sample_grads = torch.cat( [ p.grad_sample.reshape(self.DATA_SIZE, -1) for p in self.model.parameters() if p.requires_grad ], dim=-1, ) # average up all the per-sample gradients accumulated_grad = torch.mean(per_sample_grads, dim=0) # the full data gradient accumulated in .grad grad = torch.cat([ p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad ]) * (self.BATCH_SIZE / self.DATA_SIZE) self.optimizer.step() # the accumulated gradients in .grad without any hooks orig_grad = self.effective_batch_grad self.assertTrue( torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3)) self.assertTrue(torch.allclose(grad, orig_grad, atol=10e-5, rtol=10e-3)) def test_clipper_accumulation(self): """ Calling optimizer.virtual_step() should accumulate clipped gradients to form one large batch. """ self.setUp_privacy_engine(self.DATA_SIZE) data = iter(self.dl) # 4 batches of size 4 each for _ in range(3): # take 3 virtual steps self.calc_per_sample_grads(data, num_steps=1) self.optimizer.virtual_step() # accumulate on the last step self.calc_per_sample_grads(data, num_steps=1) self.optimizer.step() # .grad should contain the average gradient over the entire dataset accumulated_grad = torch.cat([ p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad ]) # the accumulated gradients in .grad without any hooks orig_grad = self.effective_batch_grad self.assertTrue( torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3), f"Values are {accumulated_grad} vs {orig_grad}." f"MAD is {(orig_grad - accumulated_grad).abs().mean()}") def test_mixed_accumulation(self): """ Calling loss.backward() multiple times aggregates all per-sample gradients in .grad-sample. Then, calling optimizer.virtual_step() should clip all gradients and aggregate them into one large batch. """ self.setUp_privacy_engine(self.DATA_SIZE) data = iter(self.dl) # 4 batches of size 4 each # accumulate per-sample grads for two mini batches self.calc_per_sample_grads(data, num_steps=2) # take a virtual step self.optimizer.virtual_step() # accumulate another two mini batches self.calc_per_sample_grads(data, num_steps=2) # take a step self.optimizer.step() # .grad should contain the average gradient over the entire dataset accumulated_grad = torch.cat([ p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad ]) # the accumulated gradients in .grad without any hooks orig_grad = self.effective_batch_grad self.assertTrue( torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3)) def test_grad_sample_erased(self): """ Calling optimizer.step() should erase any accumulated per-sample gradients. """ self.setUp_privacy_engine(2 * self.BATCH_SIZE) data = iter(self.dl) # 4 batches of size 4 each for _ in range(2): # accumulate per-sample gradients for two mini-batches to form an # effective batch of size `2*BATCH_SIZE`. Once an effective batch # has been accumulated, we call `optimizer.step()` to clip and # average the per-sample gradients. This should erase the # `grad_sample` fields for each parameter self.calc_per_sample_grads(data, num_steps=2) self.optimizer.step() for param_name, param in self.model.named_parameters(): if param.requires_grad: self.assertFalse( hasattr(param, "grad_sample"), f"Per-sample gradients haven't been erased " f"for {param_name}", ) def test_summed_grad_erased(self): """ Calling optimizer.step() should erase any accumulated clipped gradients. """ self.setUp_privacy_engine(2 * self.BATCH_SIZE) data = iter(self.dl) # 4 batches of size 4 each for idx in range(4): self.calc_per_sample_grads(data, num_steps=1) if idx % 2 == 0: # perform a virtual step for each mini-batch # this will accumulate clipped gradients in each parameter's # `summed_grads` field. self.optimizer.virtual_step() for param_name, param in self.model.named_parameters(): if param.requires_grad: self.assertTrue( hasattr(param, "summed_grad"), f"Clipped gradients aren't accumulated " f"for {param_name}", ) else: # accumulate gradients for two mini-batches to form an # effective batch of size `2*BATCH_SIZE`. Once an effective batch # has been accumulated, we call `optimizer.step()` to compute the # average gradient for the entire batch. This should erase the # `summed_grads` fields for each parameter. # take a step. The clipper will compute the mean gradient # for the entire effective batch and populate each parameter's # `.grad` field. self.optimizer.step() for param_name, param in self.model.named_parameters(): if param.requires_grad: self.assertFalse( hasattr(param, "summed_grad"), f"Accumulated clipped gradients haven't been erased " f"¨for {param_name}", ) def test_throws_wrong_batch_size(self): """ If we accumulate the wrong number of gradients and feed this batch to the privacy engine, we expect a failure. """ self.setUp_privacy_engine(2 * self.BATCH_SIZE) data = iter(self.dl) # 4 batches of size 4 each # consuming a batch that is smaller than expected should work self.calc_per_sample_grads(data, num_steps=1) with self.assertWarns(Warning): self.optimizer.step() self.optimizer.zero_grad() # consuming a larger batch than expected should fail for _ in range(2): self.calc_per_sample_grads(data, num_steps=1) self.optimizer.virtual_step() with self.assertRaises(ValueError): self.calc_per_sample_grads(data, num_steps=1) self.optimizer.step()
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=.1, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) args = parser.parse_args() device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} file_name = 'mem_stats_mnist.csv' if not os.path.exists(file_name): with open(file_name, 'w') as f: f.write('Batch_size, Differential_Privacy, ' 'Mem_Model_Loaded, Mem_Images_Loaded, Mem_Loss_Gradient, ' 'Mem_Model_Computed\n') infos = dict() train_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.batch_size, shuffle=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] for _ in range(args.n_runs): model = SampleConvNet().to(device) _mem_monitor("1. HYBRID_MODEL : model loaded ", args.device) # ===== Monitoring ===== optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) _mem_monitor("1.1 HYBRID_MODEL : model loaded + DP ", args.device) # ===== Monitoring ===== infos['1.ML'] = f'{memory_allocated(device)}' for epoch in range(1, args.epochs + 1): # Add `infos` parameter for monitoring train(args, model, device, train_loader, optimizer, epoch, infos) run_results.append(test(args, model, device, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100)) ## # Memory stats infos['BS'] = args.batch_size infos['DP'] = 'N' if args.disable_dp else 'Y' print( f'\n>>>>> Train / Batch size = {infos["BS"]}, DP={infos["DP"]} >>>>>') print('\t', infos) # Store stats in .cvs file with open(file_name, 'a') as f: for k in ('BS', 'DP', '1.ML', '2.IL', '3.LG', '4.MU'): f.write(str(infos[k])) if k != '4.MU': f.write(', ') f.write('\n')