def test_run_resnet101(self): imgSize = (3, 224, 224) # should throw because privacy engine does not work with batch norm # remove the next two lines when we support batch norm with self.assertRaises(Exception): self.runOneBatch(models.resnet101(), imgSize) self.runOneBatch(utils.convert_batchnorm_modules(models.resnet101()), imgSize)
def test_run_basic_case(self): imgSize = (3, 4, 5) # should throw because privacy engine does not work with batch norm # remove the next two lines when we support batch norm with self.assertRaises(Exception): self.runOneBatch(BasicModel(imgSize), imgSize) self.runOneBatch(utils.convert_batchnorm_modules(BasicModel(imgSize)), imgSize)
def test_convert_batchnorm_modules_resnet50(self): model = models.resnet50() # check module BatchNorms is there self.checkModulePresent(model, nn.BatchNorm2d) # replace the module with instancenorm model = utils.convert_batchnorm_modules(model) # check module is not present self.checkModuleNotPresent(model, nn.BatchNorm2d) self.checkModulePresent(model, nn.GroupNorm)
nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 ) def forward(self, input): if input.is_cuda and self.ngpu > 1: output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) else: output = self.main(input) return output netG = Generator(ngpu) if not opt.disable_dp: netG = utils.convert_batchnorm_modules(netG) netG = netG.to(device) netG.apply(weights_init) if opt.netG != '': netG.load_state_dict(torch.load(opt.netG)) class Discriminator(nn.Module): def __init__(self, ngpu): super(Discriminator, self).__init__() self.ngpu = ngpu self.main = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group( backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank, ) # create model: resnet 18 # since our differential privacy engine does not support BatchNormXd # we need to replace all such blocks with DP-aware normalisation modules model = utils.convert_batchnorm_modules(models.resnet18(num_classes=10)) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = "cuda:{}".format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint["epoch"] best_acc1 = checkpoint["best_acc1"] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint["epoch"])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, "train") valdir = os.path.join(args.data, "val") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), ) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, ) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]), ), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, ) if not args.disable_dp: print("PRIVACY ENGINE ON") privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, **clipping, ) privacy_engine.attach(optimizer) else: print("PRIVACY ENGINE OFF") if args.evaluate: validate(val_loader, model, criterion, args) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { "epoch": epoch + 1, "arch": "SampleConvNet", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", )
def test_returns_true(self): inspector = dp_inspector.DPModelInspector() model = utils.convert_batchnorm_modules(models.resnet50()) self.assertTrue(inspector.validate(model))
def main(): parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training") parser.add_argument( "-j", "--workers", default=2, type=int, metavar="N", help="number of data loading workers (default: 2)", ) parser.add_argument( "--epochs", default=90, type=int, metavar="N", help="number of total epochs to run", ) parser.add_argument( "--start-epoch", default=1, type=int, metavar="N", help="manual epoch number (useful on restarts)", ) parser.add_argument( "-b", "--batch-size", default=256, type=int, metavar="N", help="mini-batch size (default: 256), this is the total " "batch size of all GPUs on the current node when " "using Data Parallel or Distributed Data Parallel", ) parser.add_argument( "-na", "--n_accumulation_steps", default=1, type=int, metavar="N", help="number of mini-batches to accumulate into an effective batch", ) parser.add_argument( "--lr", "--learning-rate", default=0.001, type=float, metavar="LR", help="initial learning rate", dest="lr", ) parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="SGD momentum") parser.add_argument( "--wd", "--weight-decay", default=5e-4, type=float, metavar="W", help="SGD weight decay (default: 1e-4)", dest="weight_decay", ) parser.add_argument( "-p", "--print-freq", default=10, type=int, metavar="N", help="print frequency (default: 10)", ) parser.add_argument( "--resume", default="", type=str, metavar="PATH", help="path to latest checkpoint (default: none)", ) parser.add_argument( "-e", "--evaluate", dest="evaluate", action="store_true", help="evaluate model on validation set", ) parser.add_argument("--seed", default=None, type=int, help="seed for initializing training. ") parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--checkpoint-file", type=str, default="checkpoint", help="path to save check points", ) parser.add_argument( "--data-root", type=str, default="../cifar10", help="Where CIFAR10 is/will be stored", ) parser.add_argument("--log-dir", type=str, default="", help="Where Tensorboard log will be stored") parser.add_argument( "--optim", type=str, default="Adam", help="Optimizer to use (Adam, RMSprop, SGD)", ) args = parser.parse_args() # The following few lines, enable stats gathering about the run # 1. where the stats should be logged stats.set_global_summary_writer( tensorboard.SummaryWriter(os.path.join("/tmp/stat", args.log_dir))) # 2. enable stats stats.add( # stats about gradient norms aggregated for all layers stats.Stat(stats.StatType.CLIPPING, "AllLayers", frequency=0.1), # stats about gradient norms per layer stats.Stat(stats.StatType.CLIPPING, "PerLayer", frequency=0.1), # stats about clipping stats.Stat(stats.StatType.CLIPPING, "ClippingStats", frequency=0.1), # stats on training accuracy stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01), # stats on validation accuracy stats.Stat(stats.StatType.TEST, "accuracy"), ) # The following lines enable stat gathering for the clipping process # and set a default of per layer clipping for the Privacy Engine clipping = {"clip_per_layer": False, "enable_stat": True} augmentations = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] normalize = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] train_transform = transforms.Compose( augmentations + normalize if args.disable_dp else normalize) test_transform = transforms.Compose(normalize) train_dataset = CIFAR10(root=args.data_root, train=True, download=True, transform=train_transform) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True, ) test_dataset = CIFAR10(root=args.data_root, train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, ) best_acc1 = 0 device = torch.device(args.device) model = utils.convert_batchnorm_modules(models.resnet18(num_classes=10)) model = model.to(device) if args.optim == "SGD": optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optim == "RMSprop": optimizer = optim.RMSprop(model.parameters(), lr=args.lr) elif args.optim == "Adam": optimizer = optim.Adam(model.parameters(), lr=args.lr) else: raise NotImplementedError( "Optimizer not recognized. Please check spelling") if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size * args.n_accumulation_steps, sample_size=len(train_dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, **clipping, ) privacy_engine.attach(optimizer) for epoch in range(args.start_epoch, args.epochs + 1): train(args, model, train_loader, optimizer, epoch, device) top1_acc = test(args, model, test_loader, device) # remember best acc@1 and save checkpoint is_best = top1_acc > best_acc1 best_acc1 = max(top1_acc, best_acc1) save_checkpoint( { "epoch": epoch + 1, "arch": "ResNet18", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", )