def __init__( self, data_dir: Optional[str] = "../mnist", test_batch_size: int = 1024, sample_rate: float = 0.001, secure_rng: bool = False, ): """MNIST DataModule with DP-ready batch sampling Args: data_dir: A path where MNIST is stored test_batch_size: Size of batch for predicting on test sample_rate: Sample rate used for batch construction secure_rng: Use secure random number generator """ super().__init__() self.data_root = data_dir self.dataloader_kwargs = {"num_workers": 1, "pin_memory": True} self.save_hyperparameters() if secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e self.generator = prng.create_random_device_generator( "/dev/urandom") else: self.generator = None
def test_cpu_parallel(self): urandom_gen = csprng.create_random_device_generator('/dev/urandom') def measure(size): t = torch.empty(size, dtype=torch.float32, device='cpu') start = time.time() for i in range(20): t.normal_(generator=urandom_gen) finish = time.time() return finish - start time_for_1K = measure(1000) time_for_1M = measure(1000000) # Pessimistic check that parallel execution gives >= 1.5 performance boost self.assertTrue(time_for_1M / time_for_1K < 1000 / 1.5)
def generate_shares(secret, session: Session) -> List[ShareTensor]: """Given a secret generate, split it into a number of shares such that each party would get one :return: list of shares :rtype: List of Zero Shares """ if not isinstance(secret, ShareTensor): raise ValueError("Secret should be a ShareTensor") parties: List[Any] = session.parties nr_parties = len(parties) min_value = session.min_value max_value = session.max_value shape = secret.shape tensor_type = session.tensor_type random_shares = [] generator = csprng.create_random_device_generator() for _ in range(nr_parties - 1): rand_value = torch.empty( size=shape, dtype=torch.long).random_(generator=generator) share = ShareTensor(session=session) # Add the share after such that we do not encode it share.tensor = rand_value random_shares.append(share) shares = [] for i in range(len(parties)): if i == 0: share = random_shares[i] elif i < nr_parties - 1: share = random_shares[i] - random_shares[i - 1] else: share = secret - random_shares[i - 1] shares.append(share) return shares
def main(): parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training") parser.add_argument( "-j", "--workers", default=2, type=int, metavar="N", help="number of data loading workers (default: 2)", ) parser.add_argument( "--epochs", default=90, type=int, metavar="N", help="number of total epochs to run", ) parser.add_argument( "--start-epoch", default=1, type=int, metavar="N", help="manual epoch number (useful on restarts)", ) parser.add_argument( "-b", "--batch-size", # This should be 256, but that OOMs using the prototype. default=64, type=int, metavar="N", help="mini-batch size (default: 64), this is the total " "batch size of all GPUs on the current node when " "using Data Parallel or Distributed Data Parallel", ) parser.add_argument( "-na", "--n_accumulation_steps", default=1, type=int, metavar="N", help="number of mini-batches to accumulate into an effective batch", ) parser.add_argument( "--lr", "--learning-rate", default=0.001, type=float, metavar="LR", help="initial learning rate", dest="lr", ) parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="SGD momentum") parser.add_argument( "--wd", "--weight-decay", default=5e-4, type=float, metavar="W", help="SGD weight decay (default: 1e-4)", dest="weight_decay", ) parser.add_argument( "-p", "--print-freq", default=10, type=int, metavar="N", help="print frequency (default: 10)", ) parser.add_argument( "--resume", default="", type=str, metavar="PATH", help="path to latest checkpoint (default: none)", ) parser.add_argument( "-e", "--evaluate", dest="evaluate", action="store_true", help="evaluate model on validation set", ) parser.add_argument("--seed", default=None, type=int, help="seed for initializing training. ") parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--secure-rng", action="store_true", default=False, help= "Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--checkpoint-file", type=str, default="checkpoint", help="path to save check points", ) parser.add_argument( "--data-root", type=str, default="../cifar10", help="Where CIFAR10 is/will be stored", ) parser.add_argument("--log-dir", type=str, default="", help="Where Tensorboard log will be stored") parser.add_argument( "--optim", type=str, default="Adam", help="Optimizer to use (Adam, RMSprop, SGD)", ) args = parser.parse_args() args.disable_dp = True if args.disable_dp and args.n_accumulation_steps > 1: raise ValueError("Virtual steps only works with enabled DP") # The following few lines, enable stats gathering about the run # 1. where the stats should be logged stats.set_global_summary_writer( tensorboard.SummaryWriter(os.path.join("/tmp/stat", args.log_dir))) # 2. enable stats stats.add( # stats about gradient norms aggregated for all layers stats.Stat(stats.StatType.GRAD, "AllLayers", frequency=0.1), # stats about gradient norms per layer stats.Stat(stats.StatType.GRAD, "PerLayer", frequency=0.1), # stats about clipping stats.Stat(stats.StatType.GRAD, "ClippingStats", frequency=0.1), # stats on training accuracy stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01), # stats on validation accuracy stats.Stat(stats.StatType.TEST, "accuracy"), ) # The following lines enable stat gathering for the clipping process # and set a default of per layer clipping for the Privacy Engine clipping = {"clip_per_layer": False, "enable_stat": True} if args.secure_rng: assert False try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None augmentations = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] normalize = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] train_transform = transforms.Compose( augmentations + normalize if args.disable_dp else normalize) test_transform = transforms.Compose(normalize) train_dataset = CIFAR10(root=args.data_root, train=True, download=True, transform=train_transform) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True, generator=generator, ) test_dataset = CIFAR10(root=args.data_root, train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, ) best_acc1 = 0 device = torch.device(args.device) model = convert_batchnorm_modules(models.resnet18(num_classes=10)) # model = CIFAR10Model() model = model.to(device) if args.optim == "SGD": optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optim == "RMSprop": optimizer = optim.RMSprop(model.parameters(), lr=args.lr) elif args.optim == "Adam": optimizer = optim.Adam(model.parameters(), lr=args.lr) else: raise NotImplementedError( "Optimizer not recognized. Please check spelling") if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size * args.n_accumulation_steps, sample_size=len(train_dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, secure_rng=args.secure_rng, **clipping, ) privacy_engine.attach(optimizer) for epoch in range(args.start_epoch, args.epochs + 1): train(args, model, train_loader, optimizer, epoch, device) top1_acc = test(args, model, test_loader, device) # remember best acc@1 and save checkpoint is_best = top1_acc > best_acc1 best_acc1 = max(top1_acc, best_acc1) save_checkpoint( { "epoch": epoch + 1, "arch": "ResNet18", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", )
def main(): parser = argparse.ArgumentParser(description="PyTorch IMDB Example") parser.add_argument( "-b", "--batch-size-test", type=int, default=64, metavar="B", help="input batch size for test (default: 64)", ) parser.add_argument( "-sr", "--sample-rate", type=float, default=0.00256, metavar="SR", help="sample rate used for batch construction (default: 0.00256)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)", ) parser.add_argument( "--lr", type=float, default=0.02, metavar="LR", help="learning rate (default: .02)", ) parser.add_argument( "--sigma", type=float, default=0.56, metavar="S", help="Noise multiplier (default 0.56)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--max-sequence-length", type=int, default=256, metavar="SL", help="Longer sequences will be cut to this length (default: 256)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla optimizer", ) parser.add_argument( "--secure-rng", action="store_true", default=False, help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost", ) parser.add_argument( "--data-root", type=str, default="../imdb", help="Where IMDB is/will be stored" ) parser.add_argument( "-j", "--workers", default=2, type=int, metavar="N", help="number of data loading workers (default: 2)", ) args = parser.parse_args() device = torch.device(args.device) raw_dataset = load_dataset("imdb", cache_dir=args.data_root) tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased") dataset = raw_dataset.map( lambda x: tokenizer( x["text"], truncation=True, max_length=args.max_sequence_length ), batched=True, ) dataset.set_format(type="torch", columns=["input_ids", "label"]) train_dataset = dataset["train"] test_dataset = dataset["test"] if args.secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None train_loader = DataLoader( train_dataset, num_workers=args.workers, generator=generator, batch_sampler=UniformWithReplacementSampler( num_samples=len(train_dataset), sample_rate=args.sample_rate, generator=generator, ), collate_fn=padded_collate, pin_memory=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size_test, shuffle=False, num_workers=args.workers, collate_fn=padded_collate, pin_memory=True, ) model = SampleNet(vocab_size=len(tokenizer)).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) if not args.disable_dp: privacy_engine = PrivacyEngine( model, sample_rate=args.sample_rate, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, secure_rng=args.secure_rng, ) privacy_engine.attach(optimizer) mean_accuracy = 0 for epoch in range(1, args.epochs + 1): train(args, model, train_loader, optimizer, epoch) mean_accuracy = evaluate(args, model, test_loader) torch.save(mean_accuracy, "run_results_imdb_classification.pt")
net_glob.train() loss_func = nn.CrossEntropyLoss() optimizer = get_optimizer(args, net_glob) if args.dp: args.secure_rng = True privacy_engine = PrivacyEngine( net_glob, batch_size=args.bs, sample_size=len(train_loader), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng) privacy_engine.attach(optimizer) generator = (prng.create_random_device_generator("/dev/urandom") if args.secure_rng else None) train_loader = prngDataloader( train_attributes, train_labels, [idx for idx in range(len(train_attributes))], batchsize=args.local_bs, gene=generator) # training loss_train = [] loss_valid = [] best_valid_loss = np.finfo(float).max best_net_glob = copy.deepcopy(net_glob) with memory_time_moniter() as mt: for iter in range(args.epochs):
def generate_shares( secret: Union[ShareTensor, torch.Tensor, float, int], nr_parties: int, config: Config = Config(), tensor_type: Optional[torch.dtype] = None, ) -> List[ShareTensor]: """Generate shares from secret. Given a secret, split it into a number of shares such that each party would get one. Args: secret (Union[ShareTensor, torch.Tensor, float, int]): Secret to split. nr_parties (int): Number of parties to split the scret. config (Config): Configuration used for the Share Tensor (in case it is needed). Use default Config if nothing provided. The ShareTensor config would have priority. tensor_type (torch.dtype, optional): tensor type. Defaults to None. Returns: List[ShareTensor]. List of ShareTensor. Raises: ValueError: If secret is not a expected format. Examples: >>> from sympc.tensor.mpc_tensor import MPCTensor >>> MPCTensor.generate_shares(secret=2, nr_parties=2) [[ShareTensor] | [FixedPointEncoder]: precision: 16, base: 2 | Data: tensor([15511500.]), [ShareTensor] | [FixedPointEncoder]: precision: 16, base: 2 | Data: tensor([-15380428.])] >>> MPCTensor.generate_shares(secret=2, nr_parties=2, encoder_base=3, encoder_precision=4) [[ShareTensor] | [FixedPointEncoder]: precision: 4, base: 3 | Data: tensor([14933283.]), [ShareTensor] | [FixedPointEncoder]: precision: 4, base: 3 | Data: tensor([-14933121.])] """ if isinstance(secret, (torch.Tensor, float, int)): # if secret is not a ShareTensor, a new instance is created secret = ShareTensor(secret, config=config) else: config = secret.config if not isinstance(secret, ShareTensor): raise ValueError( "Secret should be a ShareTensor, torchTensor, float or int.") op = operator.sub shape = secret.shape random_shares = [] generator = csprng.create_random_device_generator() for _ in range(nr_parties - 1): rand_value = torch.empty( size=shape, dtype=tensor_type).random_(generator=generator) share = ShareTensor(data=rand_value, config=config) share.tensor = rand_value random_shares.append(share) shares = [] for i in range(nr_parties): if i == 0: share = random_shares[i] elif i < nr_parties - 1: share = op(random_shares[i], random_shares[i - 1]) else: share = op(secret, random_shares[i - 1]) shares.append(share) return shares
class TestCSPRNG(unittest.TestCase): all_generators = [ csprng.create_random_device_generator(), csprng.create_random_device_generator('/dev/urandom'), csprng.create_mt19937_generator(), csprng.create_mt19937_generator(42) ] int_dtypes = [ torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64 ] fp_ftypes = [torch.float, torch.double] num_dtypes = int_dtypes + fp_ftypes all_dtypes = num_dtypes + [torch.bool] size = 1000 all_devices = ['cpu', 'cuda'] if (torch.cuda.is_available() and csprng.supports_cuda()) else ['cpu'] def test_random_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: if dtype == torch.float: to_inc = 2**24 elif dtype == torch.double: to_inc = 2**53 else: to_inc = torch.iinfo(dtype).max t = torch.empty(self.size, dtype=dtype, device=device).random_(generator=gen) res = stats.kstest(t.cpu(), stats.randint.cdf, args=(0, to_inc)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_cpu_vs_cuda(self): for dtype in self.num_dtypes: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').random_(generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').random_(generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_to_kstest(self): to_ = 42 for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: t = torch.zeros(self.size, dtype=dtype, device=device).random_(to_, generator=gen) res = stats.kstest(t.cpu(), stats.randint.cdf, args=(0, to_)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_to_cpu_vs_cuda(self): to_ = 42 for dtype in self.num_dtypes: gen = csprng.create_mt19937_generator(42) cpu_t = torch.zeros(self.size, dtype=dtype, device='cpu').random_(to_, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.zeros(self.size, dtype=dtype, device='cuda').random_(to_, generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_from_to_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: for from_ in [0, 24, 42]: for to_ in [42, 99, 123]: if from_ < to_: t = torch.zeros(self.size, dtype=dtype, device=device).random_( from_, to_, generator=gen) res = stats.kstest(t.cpu(), stats.randint.cdf, args=(from_, to_)) self.assertTrue(res.statistic < 0.2) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_from_to_cpu_vs_cuda(self): for dtype in self.num_dtypes: for from_ in [0, 24, 42]: for to_ in [42, 99, 123]: if from_ < to_: gen = csprng.create_mt19937_generator(42) cpu_t = torch.zeros(self.size, dtype=dtype, device='cpu').random_( from_, to_, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.zeros(self.size, dtype=dtype, device='cuda').random_( from_, to_, generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_bool(self): for device in self.all_devices: for gen in self.all_generators: t = torch.empty(self.size, dtype=torch.bool, device=device) t.fill_(False) t.random_(generator=gen) self.assertEqual(t.min(), False) self.assertEqual(t.max(), True) self.assertTrue(0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6) t.fill_(True) t.random_(generator=gen) self.assertEqual(t.min(), False) self.assertEqual(t.max(), True) self.assertTrue(0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_bool_cpu_vs_cuda(self): gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=torch.bool, device='cpu').random_(generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=torch.bool, device='cuda').random_(generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_uniform_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for from_ in [-42, 0, 4.2]: for to_ in [-4.2, 0, 42]: if to_ > from_: t = torch.empty(self.size, dtype=dtype, device=device).uniform_( from_, to_, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'uniform', args=(from_, (to_ - from_))) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_uniform_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for from_ in [-42, 0, 4.2]: for to_ in [-4.2, 0, 42]: if to_ > from_: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').uniform_( from_, to_, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').uniform_( from_, to_, generator=gen) self.assertTrue( (cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_normal_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: t = torch.empty(self.size, dtype=dtype, device=device).normal_( mean=mean, std=std, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'norm', args=(mean, std)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_normal_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').normal_(mean=mean, std=std, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').normal_(mean=mean, std=std, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_log_normal_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: t = torch.empty(self.size, dtype=dtype, device=device).log_normal_( mean=mean, std=std, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'lognorm', args=(std, 0, math.exp(mean))) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_log_normal_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').log_normal_( mean=mean, std=std, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').log_normal_( mean=mean, std=std, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-4) def test_exponential_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for lambd in [0.5, 1.0, 5.0]: t = torch.empty(self.size, dtype=dtype, device=device).exponential_( lambd=lambd, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'expon', args=( 0, 1 / lambd, )) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_exponential_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for lambd in [0.5, 1.0, 5.0]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').exponential_(lambd=lambd, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').exponential_(lambd=lambd, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_cauchy_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for median in [-10, 0, 50]: for sigma in [0.5, 1.0, 10.0]: t = torch.empty(self.size, dtype=dtype, device=device).cauchy_( median=median, sigma=sigma, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'cauchy', args=(median, sigma)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_cauchy_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for median in [-10, 0, 50]: for sigma in [0.5, 1.0, 10.0]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').cauchy_(median=median, sigma=sigma, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').cauchy_(median=median, sigma=sigma, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_geometric(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for p in [0.2, 0.5, 0.8]: t = torch.empty(self.size, dtype=dtype, device=device).geometric_( p=p, generator=gen) # actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0] # expected = stats.geom(p).pmf(np.arange(1, 99)) * self.size # res = stats.chisquare(actual, expected) # self.assertAlmostEqual(res.pvalue, 1.0, delta=0.5) TODO https://github.com/pytorch/csprng/issues/7 @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_geometric_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for p in [0.2, 0.5, 0.8]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').geometric_(p=p, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').geometric_(p=p, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_non_contiguous_vs_contiguous(self): size = 10 for device in self.all_devices: for dtype in self.all_dtypes: for i in range(10): t = torch.zeros([size, size, size], dtype=dtype, device=device) x1 = random.randrange(0, size) y1 = random.randrange(0, size) z1 = random.randrange(0, size) x2 = random.randrange(x1 + 1, max(x1 + 2, size)) y2 = random.randrange(y1 + 1, max(y1 + 2, size)) z2 = random.randrange(z1 + 1, max(z1 + 2, size)) maybe_non_contiguous = t[x1:x2, y1:y2, z1:z2] assert (maybe_non_contiguous.numel() > 0) if not maybe_non_contiguous.is_contiguous(): seed = random.randrange(1000) non_contiguous = maybe_non_contiguous gen = csprng.create_mt19937_generator(seed) non_contiguous.random_(generator=gen) contiguous = torch.zeros_like(non_contiguous) gen = csprng.create_mt19937_generator(seed) contiguous.random_(generator=gen) assert (contiguous.is_contiguous()) self.assertTrue((non_contiguous == contiguous).all()) for x in range(0, size): for y in range(0, size): for z in range(0, size): if not x1 <= x < x2 and not y1 <= y < y2 and not z1 <= z < z2: self.assertTrue(t[x, y, z] == 0) @unittest.skipIf(torch.get_num_threads() < 2, "requires multithreading CPU") def test_cpu_parallel(self): urandom_gen = csprng.create_random_device_generator('/dev/urandom') def measure(size): t = torch.empty(size, dtype=torch.float32, device='cpu') start = time.time() for i in range(10): t.normal_(generator=urandom_gen) finish = time.time() return finish - start time_for_1K = measure(1000) time_for_1M = measure(1000000) # Pessimistic check that parallel execution gives >= 1.5 performance boost self.assertTrue(time_for_1M / time_for_1K < 1000 / min(1.5, torch.get_num_threads())) @unittest.skip("Temporary disable because doesn't work on Sandcastle") def test_version(self): import torchcsprng.version as version self.assertTrue(version.__version__) self.assertTrue(version.git_version) def test_randperm(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.int_dtypes: for size in range(0, 20): expected = torch.arange(size, dtype=dtype, device=device) actual = torch.randperm(size, dtype=dtype, device=device, generator=gen) actual_out = torch.empty(1, dtype=dtype, device=device) torch.randperm(size, out=actual_out, generator=gen) if size >= 10: self.assertTrue( not torch.allclose(expected, actual)) self.assertTrue( not torch.allclose(expected, actual_out)) actual = actual.sort()[0] actual_out = actual.sort()[0] self.assertTrue(torch.allclose(expected, actual)) self.assertTrue(torch.allclose(expected, actual_out)) def test_aes128_key_tensor(self): size = 10 for gen in self.all_generators: s = set() for _ in range(0, size): t = csprng.aes128_key_tensor(gen) s.add(str(t)) self.assertEqual(len(s), size) def test_const_generator(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.int_dtypes: key = csprng.aes128_key_tensor(gen) const_gen = csprng.create_const_generator(key) first = torch.empty( self.size, dtype=dtype, device=device).random_(generator=const_gen) second = torch.empty( self.size, dtype=dtype, device=device).random_(generator=const_gen) self.assertTrue((first - second).max().abs() == 0)
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-sr", "--sample-rate", type=float, default=0.001, metavar="SR", help="sample rate used for batch construction (default: 0.001)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=0.1, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--secure-rng", action="store_true", default=False, help= "Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) args = parser.parse_args() device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} if args.secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None train_dataset = datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )), ]), ) train_loader = torch.utils.data.DataLoader( train_dataset, generator=generator, batch_sampler=UniformWithReplacementSampler( num_samples=len(train_dataset), sample_rate=args.sample_rate, generator=generator, ), **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )), ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] for _ in range(args.n_runs): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, sample_rate=args.sample_rate, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, secure_rng=args.secure_rng, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) run_results.append(test(args, model, device, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100)) repro_str = ( f"{model.name()}_{args.lr}_{args.sigma}_" f"{args.max_per_sample_grad_norm}_{args.sample_rate}_{args.epochs}") torch.save(run_results, f"run_results_{repro_str}.pt") if args.save_model: torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
def __init__( self, module: nn.Module, batch_size: int, sample_size: int, alphas: List[float], noise_multiplier: float, max_grad_norm: Union[float, List[float]], secure_rng: bool = False, grad_norm_type: int = 2, batch_first: bool = True, target_delta: float = 1e-6, loss_reduction: str = "mean", **misc_settings, ): r""" Args: module: The Pytorch module to which we are attaching the privacy engine batch_size: Training batch size. Used in the privacy accountant. sample_size: The size of the sample (dataset). Used in the privacy accountant. alphas: A list of RDP orders noise_multiplier: The ratio of the standard deviation of the Gaussian noise to the L2-sensitivity of the function to which the noise is added max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm higher than this will be clipped to this value. secure_rng: If on, it will use ``torchcsprng`` for secure random number generation. Comes with a significant performance cost, therefore it's recommended that you turn it off when just experimenting. grad_norm_type: The order of the norm. For instance, 2 represents L-2 norm, while 1 represents L-1 norm. batch_first: Flag to indicate if the input tensor to the corresponding module has the first dimension representing the batch. If set to True, dimensions on input tensor will be ``[batch_size, ..., ...]``. target_delta: The target delta loss_reduction: Indicates if the loss reduction (for aggregating the gradients) is a sum or a mean operation. Can take values "sum" or "mean" **misc_settings: Other arguments to the init """ self.steps = 0 self.module = module self.secure_rng = secure_rng self.alphas = alphas self.device = next(module.parameters()).device self.batch_size = batch_size self.sample_rate = batch_size / sample_size self.noise_multiplier = noise_multiplier self.max_grad_norm = max_grad_norm self.grad_norm_type = grad_norm_type self.batch_first = batch_first self.target_delta = target_delta if self.secure_rng: self.seed = None self.random_number_generator = csprng.create_random_device_generator( "/dev/urandom" ) else: warnings.warn( "Secure RNG turned off. This is perfectly fine for experimentation as it allows " "for much faster training performance, but remember to turn it on and retrain " "one last time before production with ``secure_rng`` turned on." ) with warnings.catch_warnings(): warnings.simplefilter("ignore") self.seed = int.from_bytes(os.urandom(8), byteorder="big", signed=True) self.random_number_generator = self._set_seed(self.seed) self.validator = DPModelInspector() self.clipper = None # lazy initialization in attach self.misc_settings = misc_settings self.loss_reduction = loss_reduction
def main(): args = parse_args() if args.debug >= 1: logger.setLevel(level=logging.DEBUG) # Sets `world_size = 1` if you run on a single GPU with `args.local_rank = -1` if args.device != "cpu": rank, local_rank, world_size = setup(args) device = local_rank else: device = "cpu" rank = 0 world_size = 1 if args.disable_dp and args.n_accumulation_steps > 1: raise ValueError("Virtual steps only works with enabled DP") if args.dist_algo == "ddp_hook" and not args.clip_per_layer: raise ValueError( "Please enable `--clip_per_layer` if you want to use Opacus DDP") # The following few lines, enable stats gathering about the run # 1. where the stats should be logged stats.set_global_summary_writer(tensorboard.SummaryWriter(args.log_dir)) # 2. enable stats stats.add( # stats about gradient norms aggregated for all layers stats.Stat(stats.StatType.GRAD, "AllLayers", frequency=0.1), # stats about gradient norms per layer stats.Stat(stats.StatType.GRAD, "PerLayer", frequency=0.1), # stats about clipping stats.Stat(stats.StatType.GRAD, "ClippingStats", frequency=0.1), # stats on training accuracy stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01), # stats on validation accuracy stats.Stat(stats.StatType.TEST, "accuracy"), ) # The following lines enable stat gathering for the clipping process # and set a default of per layer clipping for the Privacy Engine clipping = { "clip_per_layer": args.clip_per_layer, "enable_stat": (rank == 0), } if args.secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None augmentations = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] normalize = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] train_transform = transforms.Compose( augmentations + normalize if args.disable_dp else normalize) test_transform = transforms.Compose(normalize) train_dataset = CIFAR10(root=args.data_root, train=True, download=True, transform=train_transform) if world_size > 1: train_sampler = DistributedPoissonBatchSampler( total_size=len(train_dataset), sample_rate=args.sample_rate, num_replicas=world_size, rank=rank, generator=generator, ) else: train_sampler = UniformWithReplacementSampler( num_samples=len(train_dataset), sample_rate=args.sample_rate, generator=generator, ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_sampler, generator=generator, num_workers=args.workers, pin_memory=True, ) test_dataset = CIFAR10(root=args.data_root, train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size_test, shuffle=False, num_workers=args.workers, ) best_acc1 = 0 model = convnet(num_classes=10) model = model.to(device) # Use the right distributed module wrapper if distributed training is enabled if world_size > 1: if not args.disable_dp: if args.dist_algo == "naive": model = DPDDP(model) elif args.dist_algo == "ddp_hook": model = DDP(model, device_ids=[device]) else: raise NotImplementedError( f"Unrecognized argument for the distributed algorithm: {args.dist_algo}" ) else: model = DDP(model, device_ids=[device]) if args.optim == "SGD": optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optim == "RMSprop": optimizer = optim.RMSprop(model.parameters(), lr=args.lr) elif args.optim == "Adam": optimizer = optim.Adam(model.parameters(), lr=args.lr) else: raise NotImplementedError( "Optimizer not recognized. Please check spelling") if not args.disable_dp: if args.clip_per_layer: # Each layer has the same clipping threshold. The total grad norm is still bounded by `args.max_per_sample_grad_norm`. n_layers = len([(n, p) for n, p in model.named_parameters() if p.requires_grad]) max_grad_norm = [ args.max_per_sample_grad_norm / np.sqrt(n_layers) ] * n_layers else: max_grad_norm = args.max_per_sample_grad_norm privacy_engine = PrivacyEngine( model, sample_rate=args.sample_rate * args.n_accumulation_steps, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=max_grad_norm, secure_rng=args.secure_rng, **clipping, ) privacy_engine.attach(optimizer) # Store some logs accuracy_per_epoch = [] time_per_epoch = [] for epoch in range(args.start_epoch, args.epochs + 1): if args.lr_schedule == "cos": lr = args.lr * 0.5 * (1 + np.cos(np.pi * epoch / (args.epochs + 1))) for param_group in optimizer.param_groups: param_group["lr"] = lr train_duration = train(args, model, train_loader, optimizer, epoch, device) top1_acc = test(args, model, test_loader, device) # remember best acc@1 and save checkpoint is_best = top1_acc > best_acc1 best_acc1 = max(top1_acc, best_acc1) time_per_epoch.append(train_duration) accuracy_per_epoch.append(float(top1_acc)) save_checkpoint( { "epoch": epoch + 1, "arch": "Convnet", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", ) if rank == 0: time_per_epoch_seconds = [t.total_seconds() for t in time_per_epoch] avg_time_per_epoch = sum(time_per_epoch_seconds) / len( time_per_epoch_seconds) metrics = { "accuracy": best_acc1, "accuracy_per_epoch": accuracy_per_epoch, "avg_time_per_epoch_str": str(timedelta(seconds=int(avg_time_per_epoch))), "time_per_epoch": time_per_epoch_seconds, } logger.info( "\nNote:\n- 'total_time' includes the data loading time, training time and testing time.\n- 'time_per_epoch' measures the training time only.\n" ) logger.info(metrics) if world_size > 1: cleanup()
def __init__( self, base_model: nn.Module, num_replicas: int, L2_clip: float, noise_multiplier: float, secure_rng: bool = False, seed: Optional[int] = None, watchdog: Optional[PrivacyWatchdog] = None, ) -> None: """Factory class which wraps any model and returns a model suitable for DP-SGD learning. The class will replicate the model in a memory-efficient way and run the forward and backward passes in parallel over the inputs. Args: base_model (nn.Module): The model instalce to wrap. num_replicas (int): How many times to replicate the model. Must be set equal to the batch size. L2_clip (float): Clipping norm for the DP-SGD procedure. noise_multiplier (float): Noise multiplier for the DP-SGD procedure. secure_rng (bool): Whether to use a cryptographically secure random number generator to produce the noise. Compare Mironov, or Gazeau et al. Models trained without secure RNG are not suitable for anything except experimentation. The secure RNG has a significant performance overhead from collecting entropy at each step. seed (optional int): The seed for the (insecure) random number generator. This is incompatible with the cryptographic RNG and will raise an error if both are set. watchdog (optional PrivacyWatchDog): A PrivacyWatchdog instance to attach to the PrivacyWrapper. For more information on L2_clip and noise_multiplier, see Abadi et al., 2016. The wrapped model is compatible with any first-order optimizer (SGD, Adam, etc.) without any modifications to the optimizer itself. The wrapper includes a sanity check to make sure that the model doesn't include any layers incompatible with the notion of "per-sample" gradient calculation, such as BatchNorm layers. If it throws an error, look to the ModelSurgeon to remedy these issues. Sample use: >> model = PrivacyWrapper(resnet18(), num_replicas=64, L2_clip=1., noise_multiplier=1.) >> optimizer = torch.optim.SGD(model.wrapped_model.parameters(), lr=0.1) >> y_pred = model(data) >> loss = criterion(y_pred, y_true) >> loss.backward() >> model.clip_and_accumulate() >> model.noise_gradient() >> optimizer.step() >> model.prepare_next_batch() >> ...(repeat) """ super().__init__() self.L2_clip = L2_clip self.noise_multiplier = noise_multiplier self.num_replicas = num_replicas self.wrapped_model = base_model self.snooper = ModelSnooper() self.snooper.snoop(self.wrapped_model) del self.snooper # snooped enough self.watchdog = watchdog if self.watchdog: setattr(self.watchdog, "wrapper", self) self.input_size = getattr(self.wrapped_model, "input_size", None) self.models = self._clone_model(self.wrapped_model) self.seed = seed self.secure_rng = secure_rng if self.seed and self.secure_rng: raise ValueError( "Setting a seed is incompatible with the secure_rng option.") if self.secure_rng: try: import torchcsprng as rng self.noise_gen = rng.create_random_device_generator( "/dev/urandom") except ImportError as e: raise ImportError( "To use the secure RNG, torchcsprng must be installed." ) from e self._steps_taken = 0 self._forward_succesful = False self._clip_succesful = False self._noise_succesful = False self._privacy_spent = None
partial_priv_keys.append(priv_key) for rank_idx in rank_sampling_list[:args.num_share-1]: partial_priv_keys.append(priv_keys[rank_idx]) assert len(partial_priv_keys) == args.num_share train_attributes, train_labels, valid_attributes, valid_labels, test_attributes, test_labels = data_loading(args) attrisize = list(train_attributes[0].size())[0] train_loader = DataLoader(dataset=TensorDataset(train_attributes, train_labels), batch_size=args.bs, shuffle=True) valid_loader = DataLoader(dataset=TensorDataset(valid_attributes, valid_labels), batch_size=args.bs, shuffle=True) test_loader = DataLoader(dataset=TensorDataset(test_attributes, test_labels), batch_size=args.bs, shuffle=True) local_train_idxes = [idx for idx in range(int(train_attributes.shape[0]*args.rank/args.num_users),int(train_attributes.shape[0]*(args.rank+1)/args.num_users))] local_train_loader = clientDataloader(train_attributes, train_labels, local_train_idxes, batchsize=args.local_bs) if args.dp: args.secure_rng = True generator = (prng.create_random_device_generator("/dev/urandom") if args.secure_rng else None) local_train_loader = prngDataloader(train_attributes, train_labels, local_train_idxes, batchsize=args.local_bs, gene=generator) # Initialize socket connections ip_port = read_ip_port_json('../ip_port.json') # json_path = '../json/decentralized_{}_{}.json'.format(args.dataset, args.optim.lower()) # if args.dp: # json_path = '../json/decentralized_{}_{}_dp.json'.format(args.dataset, args.optim.lower()) # if args.tphe: # json_path = '../json/decentralized_{}_{}_tphe.json'.format(args.dataset, args.optim.lower()) # if args.dp: # json_path = '../json/decentralized_{}_{}_dp_tphe.json'.format(args.dataset, args.optim.lower()) # ip_port = read_ip_port_json(json_path) print(ip_port) self_ip = ip_port[args.rank]['ip']
def main(): args = parse_args() if args.debug >= 1: logger.setLevel(level=logging.DEBUG) device = args.device if args.secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None augmentations = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] normalize = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] train_transform = transforms.Compose( augmentations + normalize if args.disable_dp else normalize) test_transform = transforms.Compose(normalize) train_dataset = CIFAR10(root=args.data_root, train=True, download=True, transform=train_transform) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=int(args.sample_rate * len(train_dataset)), generator=generator, num_workers=args.workers, pin_memory=True, ) test_dataset = CIFAR10(root=args.data_root, train=False, download=True, transform=test_transform) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size_test, shuffle=False, num_workers=args.workers, ) best_acc1 = 0 model = models.__dict__[args.architecture]( pretrained=False, norm_layer=(lambda c: nn.GroupNorm(args.gn_groups, c))) model = model.to(device) if args.optim == "SGD": optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optim == "RMSprop": optimizer = optim.RMSprop(model.parameters(), lr=args.lr) elif args.optim == "Adam": optimizer = optim.Adam(model.parameters(), lr=args.lr) else: raise NotImplementedError( "Optimizer not recognized. Please check spelling") privacy_engine = None if not args.disable_dp: if args.clip_per_layer: # Each layer has the same clipping threshold. The total grad norm is still bounded by `args.max_per_sample_grad_norm`. n_layers = len([(n, p) for n, p in model.named_parameters() if p.requires_grad]) max_grad_norm = [ args.max_per_sample_grad_norm / np.sqrt(n_layers) ] * n_layers else: max_grad_norm = args.max_per_sample_grad_norm privacy_engine = PrivacyEngine(secure_mode=args.secure_rng, ) clipping = "per_layer" if args.clip_per_layer else "flat" model, optimizer, train_loader = privacy_engine.make_private( module=model, optimizer=optimizer, data_loader=train_loader, noise_multiplier=args.sigma, max_grad_norm=max_grad_norm, clipping=clipping, ) # Store some logs accuracy_per_epoch = [] time_per_epoch = [] for epoch in range(args.start_epoch, args.epochs + 1): if args.lr_schedule == "cos": lr = args.lr * 0.5 * (1 + np.cos(np.pi * epoch / (args.epochs + 1))) for param_group in optimizer.param_groups: param_group["lr"] = lr train_duration = train(args, model, train_loader, optimizer, privacy_engine, epoch, device) top1_acc = test(args, model, test_loader, device) # remember best acc@1 and save checkpoint is_best = top1_acc > best_acc1 best_acc1 = max(top1_acc, best_acc1) time_per_epoch.append(train_duration) accuracy_per_epoch.append(float(top1_acc)) save_checkpoint( { "epoch": epoch + 1, "arch": "Convnet", "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, filename=args.checkpoint_file + ".tar", ) time_per_epoch_seconds = [t.total_seconds() for t in time_per_epoch] avg_time_per_epoch = sum(time_per_epoch_seconds) / len( time_per_epoch_seconds) metrics = { "accuracy": best_acc1, "accuracy_per_epoch": accuracy_per_epoch, "avg_time_per_epoch_str": str(timedelta(seconds=int(avg_time_per_epoch))), "time_per_epoch": time_per_epoch_seconds, } logger.info( "\nNote:\n- 'total_time' includes the data loading time, training time and testing time.\n- 'time_per_epoch' measures the training time only.\n" ) logger.info(metrics)
def __init__( self, module: nn.Module, *, # As per PEP 3102, this forces clients to specify kwargs explicitly, not positionally sample_rate: Optional[float] = None, batch_size: Optional[int] = None, sample_size: Optional[int] = None, max_grad_norm: Union[float, List[float]], noise_multiplier: Optional[float] = None, alphas: List[float] = DEFAULT_ALPHAS, secure_rng: bool = False, batch_first: bool = True, target_delta: float = 1e-6, target_epsilon: Optional[float] = None, epochs: Optional[float] = None, loss_reduction: str = "mean", poisson: bool = False, **misc_settings, ): r""" Args: module: The Pytorch module to which we are attaching the privacy engine alphas: A list of RDP orders noise_multiplier: The ratio of the standard deviation of the Gaussian noise to the L2-sensitivity of the function to which the noise is added max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm higher than this will be clipped to this value. batch_size: Training batch size. Used in the privacy accountant. sample_size: The size of the sample (dataset). Used in the privacy accountant. sample_rate: Sample rate used to build batches. Used in the privacy accountant. secure_rng: If on, it will use ``torchcsprng`` for secure random number generation. Comes with a significant performance cost, therefore it's recommended that you turn it off when just experimenting. batch_first: Flag to indicate if the input tensor to the corresponding module has the first dimension representing the batch. If set to True, dimensions on input tensor will be ``[batch_size, ..., ...]``. target_delta: The target delta. If unset, we will set it for you. loss_reduction: Indicates if the loss reduction (for aggregating the gradients) is a sum or a mean operation. Can take values "sum" or "mean" **misc_settings: Other arguments to the init """ self.steps = 0 self.poisson = poisson self.loss_reduction = loss_reduction self.batch_size = batch_size self.sample_size = sample_size self.sample_rate = sample_rate self._set_sample_rate() if isinstance( module, DifferentiallyPrivateDistributedDataParallel) or isinstance( module, torch.nn.parallel.DistributedDataParallel): rank = torch.distributed.get_rank() n_replicas = torch.distributed.get_world_size() self.sample_rate *= n_replicas else: rank = 0 n_replicas = 1 self.module = GradSampleModule(module) if poisson: # TODO: Check directly if sampler is UniformSampler when sampler gets passed to the Engine (in the future) if sample_size is None: raise ValueError( "If using Poisson sampling, sample_size should get passed to the PrivacyEngine." ) # Number of empty batches follows a geometric distribution # Planck is the same distribution but its parameter is the (negative) log of the geometric's parameter self._poisson_empty_batches_distribution = planck( -math.log(1 - self.sample_rate) * self.sample_size) if noise_multiplier is None: if target_epsilon is None or target_delta is None or epochs is None: raise ValueError( "If noise_multiplier is not specified, (target_epsilon, target_delta, epochs) should be given to the engine." ) self.noise_multiplier = get_noise_multiplier( target_epsilon, target_delta, self.sample_rate, epochs, alphas) else: self.noise_multiplier = noise_multiplier self.max_grad_norm = max_grad_norm self.alphas = alphas self.target_delta = target_delta self.secure_rng = secure_rng self.batch_first = batch_first self.misc_settings = misc_settings self.n_replicas = n_replicas self.rank = rank self.device = next(module.parameters()).device self.steps = 0 if self.noise_multiplier < 0: raise ValueError( f"noise_multiplier={self.noise_multiplier} is not a valid value. Please provide a float >= 0." ) if isinstance(self.max_grad_norm, float) and self.max_grad_norm <= 0: raise ValueError( f"max_grad_norm={self.max_grad_norm} is not a valid value. Please provide a float > 0." ) if not self.target_delta: if self.sample_size: warnings.warn( "target_delta unset. Setting it to an order of magnitude less than 1/sample_size." ) self.target_delta = 0.1 * (1 / self.sample_size) else: raise ValueError("Please provide a target_delta.") if self.secure_rng: self.seed = None try: import torchcsprng as csprng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e self.seed = None self.random_number_generator = csprng.create_random_device_generator( "/dev/urandom") else: warnings.warn( "Secure RNG turned off. This is perfectly fine for experimentation as it allows " "for much faster training performance, but remember to turn it on and retrain " "one last time before production with ``secure_rng`` turned on." ) with warnings.catch_warnings(): warnings.simplefilter("ignore") self.seed = int.from_bytes(os.urandom(8), byteorder="big", signed=True) self.random_number_generator = self._set_seed(self.seed) self.validator = DPModelInspector() self.clipper = None # lazy initialization in attach
def main(): parser = argparse.ArgumentParser(description="PyTorch IMDB Example") parser.add_argument( "-b", "--batch-size", type=int, default=64, metavar="B", help="input batch size for training (default: 64)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)", ) parser.add_argument( "--lr", type=float, default=0.02, metavar="LR", help="learning rate (default: .02)", ) parser.add_argument( "--sigma", type=float, default=0.56, metavar="S", help="Noise multiplier (default 0.56)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--max-sequence-length", type=int, default=256, metavar="SL", help="Longer sequences will be cut to this length (default: 256)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla optimizer", ) parser.add_argument( "--secure-rng", action="store_true", default=False, help= "Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost", ) parser.add_argument("--data-root", type=str, default="../imdb", help="Where IMDB is/will be stored") parser.add_argument( "-j", "--workers", default=2, type=int, metavar="N", help="number of data loading workers (default: 2)", ) args = parser.parse_args() device = torch.device(args.device) raw_train_dataset, raw_test_dataset = RawIMDB() train_dataset = HuggingFaceTorchTextDataset( raw_train_dataset, max_len=args.max_sequence_length) test_dataset = HuggingFaceTorchTextDataset( raw_test_dataset, max_len=args.max_sequence_length) generator = (prng.create_random_device_generator("/dev/urandom") if args.secure_rng else None) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True, generator=generator, collate_fn=padded_collate, pin_memory=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, collate_fn=padded_collate, pin_memory=True, ) model = SampleNet(vocab_size=len(train_dataset.tokenizer)).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) if not args.disable_dp: privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, secure_rng=args.secure_rng, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, train_loader, optimizer, epoch) evaluate(args, model, test_loader)
def main(): args = parser.parse_args() device = torch.device(args.device) ds = NamesDataset(args.data_root) train_len = int(args.train_split * len(ds)) test_len = len(ds) - train_len print(f"{train_len} samples for training, {test_len} for testing") if args.secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None train_ds, test_ds = torch.utils.data.random_split(ds, [train_len, test_len], generator=generator) model = CharNNClassifier( args.embedding_size, args.hidden_size, len(ds.labels), args.n_lstm_layers, args.bidirectional_lstm, ) model = model.to(device) train_ds, test_ds = torch.utils.data.random_split(ds, [train_len, test_len], generator=generator) train_loader = DataLoader( train_ds, num_workers=8, pin_memory=True, generator=generator, batch_sampler=UniformWithReplacementSampler( num_samples=len(train_ds), sample_rate=args.sample_rate, generator=generator), collate_fn=padded_collate, ) test_loader = DataLoader( test_ds, batch_size=args.batch_size_test, shuffle=False, num_workers=8, pin_memory=True, collate_fn=padded_collate, ) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate) if not args.disable_dp: privacy_engine = PrivacyEngine( model, sample_rate=args.sample_rate, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, target_delta=args.delta, secure_rng=args.secure_rng, ) privacy_engine.attach(optimizer) else: privacy_engine = None print("Train stats: \n") for epoch in tqdm(range(args.epochs)): train(model, criterion, optimizer, train_loader, epoch, device=device) if args.test_every: if epoch % args.test_every == 0: test(model, test_loader, privacy_engine, device=device) test(model, test_loader, privacy_engine, device=device)
class TestCSPRNG(unittest.TestCase): all_generators = [ csprng.create_random_device_generator(), csprng.create_random_device_generator('/dev/urandom'), csprng.create_mt19937_generator(), csprng.create_mt19937_generator(42) ] int_dtypes = [ torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64 ] fp_ftypes = [torch.float, torch.double] num_dtypes = int_dtypes + fp_ftypes all_dtypes = num_dtypes + [torch.bool] size = 1000 all_devices = ['cpu', 'cuda'] if (torch.cuda.is_available() and csprng.supports_cuda()) else ['cpu'] def test_random_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: if dtype == torch.float: to_inc = 2**24 elif dtype == torch.double: to_inc = 2**53 else: to_inc = torch.iinfo(dtype).max t = torch.empty(self.size, dtype=dtype, device=device).random_(generator=gen) res = stats.kstest(t.cpu(), stats.randint.cdf, args=(0, to_inc)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_cpu_vs_cuda(self): for dtype in self.num_dtypes: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').random_(generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').random_(generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_to_kstest(self): to_ = 42 for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: t = torch.zeros(self.size, dtype=dtype, device=device).random_(to_, generator=gen) res = stats.kstest(t.cpu(), stats.randint.cdf, args=(0, to_)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_to_cpu_vs_cuda(self): to_ = 42 for dtype in self.num_dtypes: gen = csprng.create_mt19937_generator(42) cpu_t = torch.zeros(self.size, dtype=dtype, device='cpu').random_(to_, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.zeros(self.size, dtype=dtype, device='cuda').random_(to_, generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_from_to_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: for from_ in [0, 24, 42]: for to_ in [42, 99, 123]: if from_ < to_: t = torch.zeros(self.size, dtype=dtype, device=device).random_( from_, to_, generator=gen) res = stats.kstest(t.cpu(), stats.randint.cdf, args=(from_, to_)) self.assertTrue(res.statistic < 0.2) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_from_to_cpu_vs_cuda(self): for dtype in self.num_dtypes: for from_ in [0, 24, 42]: for to_ in [42, 99, 123]: if from_ < to_: gen = csprng.create_mt19937_generator(42) cpu_t = torch.zeros(self.size, dtype=dtype, device='cpu').random_( from_, to_, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.zeros(self.size, dtype=dtype, device='cuda').random_( from_, to_, generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_bool(self): for device in self.all_devices: for gen in self.all_generators: t = torch.empty(self.size, dtype=torch.bool, device=device) t.fill_(False) t.random_(generator=gen) self.assertEqual(t.min(), False) self.assertEqual(t.max(), True) self.assertTrue(0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6) t.fill_(True) t.random_(generator=gen) self.assertEqual(t.min(), False) self.assertEqual(t.max(), True) self.assertTrue(0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_random_bool_cpu_vs_cuda(self): gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=torch.bool, device='cpu').random_(generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=torch.bool, device='cuda').random_(generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_uniform_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for from_ in [-42, 0, 4.2]: for to_ in [-4.2, 0, 42]: if to_ > from_: t = torch.empty(self.size, dtype=dtype, device=device).uniform_( from_, to_, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'uniform', args=(from_, (to_ - from_))) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_uniform_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for from_ in [-42, 0, 4.2]: for to_ in [-4.2, 0, 42]: if to_ > from_: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').uniform_( from_, to_, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').uniform_( from_, to_, generator=gen) self.assertTrue( (cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_normal_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: t = torch.empty(self.size, dtype=dtype, device=device).normal_( mean=mean, std=std, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'norm', args=(mean, std)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_normal_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').normal_(mean=mean, std=std, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').normal_(mean=mean, std=std, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_log_normal_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: t = torch.empty(self.size, dtype=dtype, device=device).log_normal_( mean=mean, std=std, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'lognorm', args=(std, 0, math.exp(mean))) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_log_normal_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').log_normal_( mean=mean, std=std, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').log_normal_( mean=mean, std=std, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-4) def test_exponential_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for lambd in [0.5, 1.0, 5.0]: t = torch.empty(self.size, dtype=dtype, device=device).exponential_( lambd=lambd, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'expon', args=( 0, 1 / lambd, )) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") @unittest.skip("https://github.com/pytorch/pytorch/issues/38662") def test_exponential_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for lambd in [0.5, 1.0, 5.0]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').exponential_(lambd=lambd, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').exponential_(lambd=lambd, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_cauchy_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for median in [-10, 0, 50]: for sigma in [0.5, 1.0, 10.0]: t = torch.empty(self.size, dtype=dtype, device=device).cauchy_( median=median, sigma=sigma, generator=gen) res = stats.kstest(t.cpu().to(torch.double), 'cauchy', args=(median, sigma)) self.assertTrue(res.statistic < 0.1) @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_cauchy_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for median in [-10, 0, 50]: for sigma in [0.5, 1.0, 10.0]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').cauchy_(median=median, sigma=sigma, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').cauchy_(median=median, sigma=sigma, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_geometric(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.fp_ftypes: for p in [0.2, 0.5, 0.8]: t = torch.empty(self.size, dtype=dtype, device=device).geometric_( p=p, generator=gen) # actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0] # expected = stats.geom(p).pmf(np.arange(1, 99)) * self.size # res = stats.chisquare(actual, expected) # self.assertAlmostEqual(res.pvalue, 1.0, delta=0.5) TODO https://github.com/pytorch/csprng/issues/7 @unittest.skipIf( not torch.cuda.is_available() or not csprng.supports_cuda(), "CUDA is not available or csprng was not compiled with CUDA support") def test_geometric_cpu_vs_cuda(self): for dtype in self.fp_ftypes: for p in [0.2, 0.5, 0.8]: gen = csprng.create_mt19937_generator(42) cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').geometric_(p=p, generator=gen) gen = csprng.create_mt19937_generator(42) cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').geometric_(p=p, generator=gen) self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) def test_non_contiguous_vs_contiguous(self): size = 10 for device in self.all_devices: for dtype in self.all_dtypes: for i in range(10): t = torch.zeros([size, size, size], dtype=dtype, device=device) x1 = random.randrange(0, size) y1 = random.randrange(0, size) z1 = random.randrange(0, size) x2 = random.randrange(x1 + 1, max(x1 + 2, size)) y2 = random.randrange(y1 + 1, max(y1 + 2, size)) z2 = random.randrange(z1 + 1, max(z1 + 2, size)) maybe_non_contiguous = t[x1:x2, y1:y2, z1:z2] assert (maybe_non_contiguous.numel() > 0) if not maybe_non_contiguous.is_contiguous(): seed = random.randrange(1000) non_contiguous = maybe_non_contiguous gen = csprng.create_mt19937_generator(seed) non_contiguous.random_(generator=gen) contiguous = torch.zeros_like(non_contiguous) gen = csprng.create_mt19937_generator(seed) contiguous.random_(generator=gen) assert (contiguous.is_contiguous()) self.assertTrue((non_contiguous == contiguous).all()) for x in range(0, size): for y in range(0, size): for z in range(0, size): if not x1 <= x < x2 and not y1 <= y < y2 and not z1 <= z < z2: self.assertTrue(t[x, y, z] == 0) @unittest.skipIf(torch.get_num_threads() < 2, "requires multithreading CPU") def test_cpu_parallel(self): urandom_gen = csprng.create_random_device_generator('/dev/urandom') def measure(size): t = torch.empty(size, dtype=torch.float32, device='cpu') start = time.time() for i in range(20): t.normal_(generator=urandom_gen) finish = time.time() return finish - start time_for_1K = measure(1000) time_for_1M = measure(1000000) # Pessimistic check that parallel execution gives >= 1.5 performance boost self.assertTrue(time_for_1M / time_for_1K < 1000 / 1.5) @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle") def test_version(self): import torchcsprng.version as version self.assertTrue(version.__version__) self.assertTrue(version.git_version) def test_randperm(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.int_dtypes: for size in range(0, 20): expected = torch.arange(size, dtype=dtype, device=device) actual = torch.randperm(size, dtype=dtype, device=device, generator=gen) actual_out = torch.empty(1, dtype=dtype, device=device) torch.randperm(size, out=actual_out, generator=gen) if size >= 10: self.assertTrue( not torch.allclose(expected, actual)) self.assertTrue( not torch.allclose(expected, actual_out)) actual = actual.sort()[0] actual_out = actual.sort()[0] self.assertTrue(torch.allclose(expected, actual)) self.assertTrue(torch.allclose(expected, actual_out)) def test_encrypt_decrypt(self): key_size_bytes = 16 block_size_bytes = 16 def sizeof(dtype): if dtype == torch.bool: return 1 elif dtype.is_floating_point: return torch.finfo(dtype).bits // 8 else: return torch.iinfo(dtype).bits // 8 def pad(data, pad_size): if len(data) % pad_size == 0: return data length = pad_size - (len(data) % pad_size) return data + bytes([0]) * length def create_aes(m, k): if m == "ecb": return AES.new(k.tobytes(), AES.MODE_ECB) elif m == "ctr": ctr = Counter.new(AES.block_size * 8, initial_value=0, little_endian=True) return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr) else: return None for key_dtype in self.all_dtypes: key_size = key_size_bytes // sizeof(key_dtype) key = torch.empty(key_size, dtype=key_dtype).random_() key_np = key.numpy().view(np.int8) for initial_dtype in self.all_dtypes: for initial_size in [0, 4, 8, 15, 16, 23, 42]: initial = torch.empty(initial_size, dtype=initial_dtype).random_() initial_np = initial.numpy().view(np.int8) initial_size_bytes = initial_size * sizeof(initial_dtype) for encrypted_dtype in self.all_dtypes: encrypted_size = ( initial_size_bytes + block_size_bytes - 1 ) // block_size_bytes * block_size_bytes // sizeof( encrypted_dtype) encrypted = torch.zeros(encrypted_size, dtype=encrypted_dtype) for decrypted_dtype in self.all_dtypes: decrypted_size = (initial_size_bytes + sizeof(decrypted_dtype) - 1) // sizeof(decrypted_dtype) decrypted = torch.zeros(decrypted_size, dtype=decrypted_dtype) for mode in ["ecb", "ctr"]: for device in self.all_devices: key = key.to(device) initial = initial.to(device) encrypted = encrypted.to(device) decrypted = decrypted.to(device) csprng.encrypt(initial, encrypted, key, "aes128", mode) encrypted_np = encrypted.cpu().numpy( ).view(np.int8) aes = create_aes(mode, key_np) encrypted_expected = np.frombuffer( aes.encrypt( pad(initial_np.tobytes(), block_size_bytes)), dtype=np.int8) self.assertTrue( np.array_equal(encrypted_np, encrypted_expected)) csprng.decrypt(encrypted, decrypted, key, "aes128", mode) decrypted_np = decrypted.cpu().numpy( ).view(np.int8)[:initial_size_bytes] aes = create_aes(mode, key_np) decrypted_expected = np.frombuffer( aes.decrypt( pad(encrypted_np.tobytes(), block_size_bytes)), dtype=np.int8)[:initial_size_bytes] self.assertTrue( np.array_equal(decrypted_np, decrypted_expected)) self.assertTrue( np.array_equal(initial_np, decrypted_np))
from typing import Iterable from typing import List from typing import Tuple # third party import torch import torchcsprng as csprng # type: ignore from sympc.store import register_primitive_generator from sympc.store import register_primitive_store_add from sympc.store import register_primitive_store_get from sympc.tensor import MPCTensor from sympc.tensor import ShareTensor from sympc.utils import count_wraps ttp_generator = csprng.create_random_device_generator() """ Those functions should be executed by the Trusted Party """ def _get_triples( op_str: str, nr_parties: int, a_shape: Tuple[int], b_shape: Tuple[int], **kwargs: Dict[Any, Any] ) -> List[Tuple[Tuple[ShareTensor, ShareTensor, ShareTensor]]]: """Get triples. The Trusted Third Party (TTP) or Crypto Provider should provide this triples Currently, the one that orchestrates the communication provides those triples.".
def generate_shares( secret: Union[ShareTensor, torch.Tensor, float, int], nr_parties: int, tensor_type: Optional[torch.dtype] = None, **kwargs, ) -> List[ShareTensor]: """Given a secret, split it into a number of shares such that each party would get one. Args: secret (Union[ShareTensor, torch.Tensor, float, int]): secret to split nr_parties (int): number of parties to split the scret tensor_type (torch.dtype, optional): tensor type. Defaults to None. **kwargs: keywords arguments passed to ShareTensor Returns: List[ShareTensor]. List of ShareTensor Examples: >>> from sympc.tensor.mpc_tensor import MPCTensor >>> MPCTensor.generate_shares(secret=2, nr_parties=2) [[ShareTensor] | [FixedPointEncoder]: precision: 16, base: 2 | Data: tensor([15511500.]), [ShareTensor] | [FixedPointEncoder]: precision: 16, base: 2 | Data: tensor([-15380428.])] >>> MPCTensor.generate_shares(secret=2, nr_parties=2, encoder_base=3, encoder_precision=4) [[ShareTensor] | [FixedPointEncoder]: precision: 4, base: 3 | Data: tensor([14933283.]), [ShareTensor] | [FixedPointEncoder]: precision: 4, base: 3 | Data: tensor([-14933121.])] """ if isinstance(secret, (torch.Tensor, float, int)): secret = ShareTensor(secret, **kwargs) # if secret is not a ShareTensor, a new instance is created if not isinstance(secret, ShareTensor): raise ValueError( "Secret should be a ShareTensor, torchTensor, float or int.") shape = secret.shape random_shares = [] generator = csprng.create_random_device_generator() for _ in range(nr_parties - 1): rand_value = torch.empty( size=shape, dtype=tensor_type).random_(generator=generator) share = ShareTensor(session=secret.session) share.tensor = rand_value random_shares.append(share) shares = [] for i in range(nr_parties): if i == 0: share = random_shares[i] elif i < nr_parties - 1: share = random_shares[i] - random_shares[i - 1] else: share = secret - random_shares[i - 1] shares.append(share) return shares