def test(args, model, test_loader, device): model.eval() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] with torch.no_grad(): for images, target in tqdm(test_loader): images = images.to(device) target = target.to(device) output = model(images) loss = criterion(output, target) preds = np.argmax(output.detach().cpu().numpy(), axis=1) labels = target.detach().cpu().numpy() acc1 = accuracy(preds, labels) losses.append(loss.item()) top1_acc.append(acc1) top1_avg = np.mean(top1_acc) stats.update(stats.StatType.TEST, acc1=top1_avg) print(f"\tTest set:" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {top1_avg :.6f} ") return np.mean(top1_acc)
def test(args, model, test_loader, device): model.eval() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] top5_acc = [] with torch.no_grad(): for images, target in tqdm(test_loader): images = images.to(device) target = target.to(device) output = model(images) loss = criterion(output, target) acc1, acc5 = topk_accuracy(output, target, topk=(1, 5)) losses.append(loss.item()) top1_acc.append(acc1.item()) top5_acc.append(acc5.item()) top1_avg = np.mean(top1_acc) top5_avg = np.mean(top5_acc) stats.update(stats.StatType.TEST, acc1=top1_avg, acc5=top5_avg) print(f"\tTest set:" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {top1_avg :.6f} " f"Acc@5: {top5_avg :.6f} ") return np.mean(top1_acc)
def train(args, model, train_loader, optimizer, epoch, device): start_time = datetime.now() model.train() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] for i, (images, target) in enumerate(tqdm(train_loader)): images = images.to(device) target = target.to(device) # compute output output = model(images) loss = criterion(output, target) preds = np.argmax(output.detach().cpu().numpy(), axis=1) labels = target.detach().cpu().numpy() # measure accuracy and record loss acc1 = accuracy(preds, labels) losses.append(loss.item()) top1_acc.append(acc1) stats.update(stats.StatType.TRAIN, acc1=acc1) # compute gradient and do SGD step loss.backward() # make sure we take a step after processing the last mini-batch in the # epoch to ensure we start the next epoch with a clean state if ((i + 1) % args.n_accumulation_steps == 0) or ((i + 1) == len(train_loader)): optimizer.step() optimizer.zero_grad() else: optimizer.virtual_step() if i % args.print_freq == 0: if not args.disable_dp: epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent( args.delta ) print( f"\tTrain Epoch: {epoch} \t" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {np.mean(top1_acc):.6f} " f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}" ) else: print( f"\tTrain Epoch: {epoch} \t" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {np.mean(top1_acc):.6f} " ) train_duration = datetime.now() - start_time return train_duration
def train(args, model, train_loader, optimizer, epoch, device): model.train() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] for i, (images, target) in enumerate(tqdm(train_loader)): images = images.to(device) target = target.to(device) # Step 1: compute per-sample-grads (provided by pytorch) # loss.backward() populates the grad_sample attribute of each param with model.compute_per_sample_grads(batch_size=images.shape[0]): output = model(images) loss = criterion(output, target) loss.backward() # Step 2: Clip the per-sample-grads, sum them to form grads, and add noise # Opacus implements this but I wrote a custom one to show how this would work. # This deletes the grad_sample attributes and populates the grad attributes clip_and_accumulate_and_add_noise(model, args.max_per_sample_grad_norm, args.sigma) preds = np.argmax(output.detach().cpu().numpy(), axis=1) labels = target.detach().cpu().numpy() losses.append(loss.item()) # measure accuracy and record loss acc1 = accuracy(preds, labels) top1_acc.append(acc1) stats.update(stats.StatType.TRAIN, acc1=acc1) # make sure we take a step after processing the last mini-batch in the # epoch to ensure we start the next epoch with a clean state if ((i + 1) % args.n_accumulation_steps == 0) or ((i + 1) == len(train_loader)): optimizer.step() optimizer.zero_grad() else: optimizer.virtual_step() if i % args.print_freq == 0: print(f"\tTrain Epoch: {epoch} \t" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {np.mean(top1_acc):.6f} ")
def test_stats_example(self): # IMPORTANT: When changing this code you also need to update # the docstrings for opacus.utils.stats.Stat class MockSummaryWriter: def __init__(self): self.logs = defaultdict(dict) def add_scalar(self, name, value, iter): self.logs[name][iter] = value mock_summary_writer = MockSummaryWriter() stats.set_global_summary_writer(mock_summary_writer) stat = stats.Stat(stats.StatType.GRAD, "sample_stats", frequency=0.1) for i in range(21): stat.log({"val": i}) self.assertEqual(len(mock_summary_writer.logs["GRAD:sample_stats/val"]), 2) stats.add(stats.Stat(stats.StatType.TEST, "accuracy", frequency=1.0)) stats.update(stats.StatType.TEST, acc1=1.0)
def train(args, model, train_loader, optimizer, epoch, device): model.train() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] for i, (images, target) in enumerate(tqdm(train_loader)): images = images.to(device) target = target.to(device) # Step 1: compute per-sample-grads # In order to use functional vmap+grad, we need to be able to # pass the weights to a model. weights, func_model, descriptors = make_functional(model) # To use vmap+grad to compute per-sample-grads, the forward pass # must be re-formulated on a single example. # We use the `grad` operator to compute forward+backward on a single example, # and finally `vmap` to do forward+backward on multiple examples. def compute_loss_and_output(weights, image, target): images = image.unsqueeze(0) targets = target.unsqueeze(0) output = func_model(weights, (images, )) loss = criterion(output, targets) return loss, output.squeeze(0) # `grad(f)` is a functional API that returns a function `f'` that # computes gradients by running both the forward and backward pass. # We want to extract some intermediate # values from the computation (i.e. the loss and output). # # To extract the loss, we use the `grad_and_value` API, that returns the # gradient of the weights w.r.t. the loss and the loss. # # To extract the output, we use the `has_aux=True` flag. # `has_aux=True` assumes that `f` returns a tuple of two values, # where the first is to be differentiated and the second "auxiliary value" # is not to be differentiated. `f'` returns the gradient w.r.t. the loss, # the loss, and the auxiliary value. grads_loss_output = grad_and_value(compute_loss_and_output, has_aux=True) sample_grads, (sample_loss, output) = \ vmap(grads_loss_output, (None, 0, 0))(weights, images, target) loss = sample_loss.mean() # `state` is the inverse operation of make_functional. We put # things back into a model so that they're easier to manipulate load_state(model, weights, descriptors) for grad_sample, weight in zip(sample_grads, model.parameters()): weight.grad_sample = grad_sample.detach() # Step 2: Clip the per-sample-grads, sum them to form grads, and add noise grads = clip_and_accumulate_and_add_noise( model, args.max_per_sample_grad_norm, args.sigma) preds = np.argmax(output.detach().cpu().numpy(), axis=1) labels = target.detach().cpu().numpy() losses.append(loss.item()) # measure accuracy and record loss acc1 = accuracy(preds, labels) top1_acc.append(acc1) stats.update(stats.StatType.TRAIN, acc1=acc1) # make sure we take a step after processing the last mini-batch in the # epoch to ensure we start the next epoch with a clean state if ((i + 1) % args.n_accumulation_steps == 0) or ((i + 1) == len(train_loader)): optimizer.step() optimizer.zero_grad() else: optimizer.virtual_step() if i % args.print_freq == 0: print(f"\tTrain Epoch: {epoch} \t" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {np.mean(top1_acc):.6f} ")