def test(args, model, test_loader, device): model.eval() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] top5_acc = [] with torch.no_grad(): for images, target in tqdm(test_loader): images = images.to(device) target = target.to(device) output = model(images) loss = criterion(output, target) acc1, acc5 = topk_accuracy(output, target, topk=(1, 5)) losses.append(loss.item()) top1_acc.append(acc1.item()) top5_acc.append(acc5.item()) top1_avg = np.mean(top1_acc) top5_avg = np.mean(top5_acc) stats.update(stats.StatType.TEST, acc1=top1_avg, acc5=top5_avg) print(f"\tTest set:" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {top1_avg :.6f} " f"Acc@5: {top5_avg :.6f} ") return np.mean(top1_acc)
def layer_stats_collector( param_name, clipping_factor, clipping_threshold, per_sample_norm, per_sample_grad, grad_before_clip, grad_after_clip, ): global _clipping_stats if param_name is None: # module is done processing all params, report all stats at once stats.update(stats.StatType.CLIPPING, "Clipping", **_clipping_stats) # clear stats for next round _clipping_stats = {} return _clipping_stats[f"{param_name}:max_norm"] = per_sample_norm.max() _clipping_stats[f"{param_name}:mean_norm"] = per_sample_norm.mean() _clipping_stats[f"{param_name}:median_norm"] = per_sample_norm.median() _clipping_stats[f"{param_name}:clip"] = clipping_threshold _clipping_stats[f"{param_name}:percent"] = (( per_sample_norm > clipping_threshold).to( dtype=torch.float64).mean()) pre_clip_pos = grad_before_clip > 0 post_clip_pos = grad_after_clip > 0 _clipping_stats[f"{param_name}:switch"] = (torch.logical_xor( pre_clip_pos, post_clip_pos).to(dtype=torch.float64).mean())
def train(train_loader, model, criterion, optimizer, epoch, args): batch_time = AverageMeter("Time", ":6.3f") data_time = AverageMeter("Data", ":6.3f") losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Acc@1", ":6.2f") top5 = AverageMeter("Acc@5", ":6.2f") progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch), ) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) stats.update(stats.StatType.TRAIN, acc1=acc1[0], acc5=acc5[0]) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) if not args.disable_dp: epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent( args.delta) print( f"Train Epoch: {epoch} \t" f"Loss: {loss.item():.6f} " f"(Ɛ = {epsilon}, 𝛿 = {args.delta}) for α = {best_alpha}") else: print(f"Train Epoch: {epoch} \t" f"Loss: {loss.item():.6f} ")
def train(args, model, train_loader, optimizer, epoch, device): model.train() criterion = nn.CrossEntropyLoss() losses = [] top1_acc = [] top5_acc = [] for i, (images, target) in enumerate(tqdm(train_loader)): images = images.to(device) target = target.to(device) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = topk_accuracy(output, target, topk=(1, 5)) losses.append(loss.item()) top1_acc.append(acc1.item()) top5_acc.append(acc5.item()) stats.update(stats.StatType.TRAIN, acc1=acc1.item(), acc5=acc5.item()) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # make sure we take a step after processing the last mini-batch in the # epoch to ensure we start the next epoch with a clean state if ((i + 1) % args.n_accumulation_steps == 0) or ((i + 1) == len(train_loader)): optimizer.step() else: optimizer.virtual_step() if i % args.print_freq == 0: if not args.disable_dp: epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent( args.delta) print( f"\tTrain Epoch: {epoch} \t" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {np.mean(top1_acc):.6f} " f"Acc@5: {np.mean(top5_acc):.6f} " f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}" ) else: print(f"\tTrain Epoch: {epoch} \t" f"Loss: {np.mean(losses):.6f} " f"Acc@1: {np.mean(top1_acc):.6f} " f"Acc@5: {np.mean(top5_acc):.6f} ")
def validate(val_loader, model, criterion, args): batch_time = AverageMeter("Time", ":6.3f") losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Acc@1", ":6.2f") top5 = AverageMeter("Acc@5", ":6.2f") progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix="Test: ") # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() progress.display(i) # TODO: this should also be done with the ProgressMeter stats.update(stats.StatType.TEST, acc1=top1.avg, acc2=top5.avg) print(" * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5)) return top1.avg