def test_remove_hooks(self):
        """
        Test that after calling .remove_hooks() no hooks are left
        """
        copy_of_original_model = mobilenet_v3_small()
        copy_of_original_model.load_state_dict(
            self.original_model.state_dict(),
            strict=True,
        )
        new_grad_sample_module = GradSampleModule(copy_of_original_model,
                                                  batch_first=True,
                                                  loss_reduction="mean")
        new_grad_sample_module.remove_hooks()

        remaining_forward_hooks = {
            module: module._forward_hooks
            for module in new_grad_sample_module.modules()
            if module._forward_hooks
        }
        assert (
            not remaining_forward_hooks
        ), f"Some forward hooks remain after .remove_hooks(): {remaining_forward_hooks}"

        remaining_backward_hooks = {
            module: module._backward_hooks
            for module in new_grad_sample_module.modules()
            if module._backward_hooks
        }
        assert (
            not remaining_backward_hooks
        ), f"Some backward hooks remain after .remove_hooks(): {remaining_backward_hooks}"
    def setUp_clipped_model(self, clip_value=0.003, run_clipper_step=True):
        # Deep copy
        self.clipped_model = SampleConvNet()  # create the structure
        self.clipped_model.load_state_dict(
            self.original_model.state_dict())  # fill it

        self.clipped_model = GradSampleModule(
            self.clipped_model)  # TODO change this as we refactor clipper

        # Intentionally clipping to a very small value
        norm_clipper = (ConstantFlatClipper(clip_value)
                        if not isinstance(clip_value, list) else
                        ConstantPerLayerClipper(clip_value))
        self.clipper = PerSampleGradientClipper(self.clipped_model,
                                                norm_clipper)

        for x, y in self.dl:
            logits = self.clipped_model(x)
            loss = self.criterion(logits, y)
            loss.backward()  # puts grad in self.clipped_model.parameters()
            if run_clipper_step:
                self.clipper.clip_and_accumulate()
                self.clipper.pre_step()
        self.clipped_grads_norms = torch.stack(
            [
                p.grad.norm()
                for p in self.clipped_model.parameters() if p.requires_grad
            ],
            dim=-1,
        )
    def setUp(self):
        self.original_model = mobilenet_v3_small()
        copy_of_original_model = mobilenet_v3_small()
        copy_of_original_model.load_state_dict(
            self.original_model.state_dict(), strict=True)

        self.grad_sample_module = GradSampleModule(copy_of_original_model,
                                                   batch_first=True,
                                                   loss_reduction="mean")
        self.DATA_SIZE = 8
        self.setUp_data()
        self.criterion = nn.L1Loss()
Пример #4
0
    def compute_opacus_grad_sample(
        self,
        x: Union[torch.Tensor, PackedSequence],
        module: nn.Module,
        batch_first=True,
        loss_reduction="mean",
    ) -> Dict[str, torch.tensor]:
        """
        Runs Opacus to compute per-sample gradients and return them for testing purposes.

        Args:
            x: The tensor in input to the ``module``
            module: The ``ModelWithLoss`` that wraps the nn.Module you want to test.
            batch_first: Whether batch size is the first dimension (as opposed to the second).
                Defaults to True.
            loss_reduction: What reduction to apply to the loss. Defaults to "mean".

        Returns:
            Dictionary mapping parameter_name -> per-sample-gradient for that parameter
        """
        torch.use_deterministic_algorithms(True)
        torch.manual_seed(0)
        np.random.seed(0)

        gs_module = GradSampleModule(clone_module(module),
                                     batch_first=batch_first,
                                     loss_reduction=loss_reduction)
        grad_sample_module = ModelWithLoss(gs_module, loss_reduction)

        grad_sample_module.zero_grad()
        loss = grad_sample_module(x)
        loss.backward()

        opacus_grad_samples = {
            name: p.grad_sample
            for name, p in
            grad_sample_module.wrapped_module._module.named_parameters()
        }

        return opacus_grad_samples
    def test_to_standard_module(self):
        copy_of_original_model = mobilenet_v3_small()
        copy_of_original_model.load_state_dict(
            self.original_model.state_dict(),
            strict=True,
        )
        new_grad_sample_module = GradSampleModule(copy_of_original_model,
                                                  batch_first=True,
                                                  loss_reduction="mean")

        new_grad_sample_module = new_grad_sample_module.to_standard_module()

        assert isinstance(new_grad_sample_module, type(self.original_model))

        original_state_dict = self.original_model.state_dict()
        gs_state_dict = new_grad_sample_module.state_dict()

        missing_keys = gs_state_dict.keys() - original_state_dict.keys()
        assert not missing_keys, f"The following keys are missing: {missing_keys}"

        extra_keys = original_state_dict.keys() - gs_state_dict.keys()
        assert not extra_keys, f"The following keys are extra: {extra_keys}"

        for key in original_state_dict:
            original_tensor = original_state_dict[key].float()
            gs_tensor = gs_state_dict[key].float()
            msg = (
                f"Param {key}: GradSample L2 norm = : {gs_tensor.norm(2)}, ",
                f"Original L2 norm = : {original_tensor.norm(2)}, ",
                f"MSE = {F.mse_loss(gs_tensor, original_tensor)}, ",
                f"L1 Loss = {F.l1_loss(gs_tensor, original_tensor)}",
            )

            assert_allclose(gs_tensor,
                            original_tensor,
                            atol=1e-6,
                            rtol=1e-4,
                            msg=msg)
Пример #6
0
    def __init__(
        self,
        module: nn.Module,
        *,  # As per PEP 3102, this forces clients to specify kwargs explicitly, not positionally
        sample_rate: Optional[float] = None,
        batch_size: Optional[int] = None,
        sample_size: Optional[int] = None,
        max_grad_norm: Union[float, List[float]],
        noise_multiplier: Optional[float] = None,
        alphas: List[float] = DEFAULT_ALPHAS,
        secure_rng: bool = False,
        batch_first: bool = True,
        target_delta: float = 1e-6,
        target_epsilon: Optional[float] = None,
        epochs: Optional[float] = None,
        loss_reduction: str = "mean",
        poisson: bool = False,
        **misc_settings,
    ):
        r"""
        Args:
            module: The Pytorch module to which we are attaching the privacy engine
            alphas: A list of RDP orders
            noise_multiplier: The ratio of the standard deviation of the Gaussian noise to
                the L2-sensitivity of the function to which the noise is added
            max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm
                higher than this will be clipped to this value.
            batch_size: Training batch size. Used in the privacy accountant.
            sample_size: The size of the sample (dataset). Used in the privacy accountant.
            sample_rate: Sample rate used to build batches. Used in the privacy accountant.
            secure_rng: If on, it will use ``torchcsprng`` for secure random number generation.
                Comes with a significant performance cost, therefore it's recommended that you
                turn it off when just experimenting.
            batch_first: Flag to indicate if the input tensor to the corresponding module
                has the first dimension representing the batch. If set to True, dimensions on
                input tensor will be ``[batch_size, ..., ...]``.
            target_delta: The target delta. If unset, we will set it for you.
            loss_reduction: Indicates if the loss reduction (for aggregating the gradients)
                is a sum or a mean operation. Can take values "sum" or "mean"
            **misc_settings: Other arguments to the init
        """

        self.steps = 0
        self.poisson = poisson
        self.loss_reduction = loss_reduction
        self.batch_size = batch_size
        self.sample_size = sample_size
        self.sample_rate = sample_rate
        self._set_sample_rate()

        if isinstance(
                module,
                DifferentiallyPrivateDistributedDataParallel) or isinstance(
                    module, torch.nn.parallel.DistributedDataParallel):
            rank = torch.distributed.get_rank()
            n_replicas = torch.distributed.get_world_size()
            self.sample_rate *= n_replicas
        else:
            rank = 0
            n_replicas = 1

        self.module = GradSampleModule(module)

        if poisson:
            # TODO: Check directly if sampler is UniformSampler when sampler gets passed to the Engine (in the future)
            if sample_size is None:
                raise ValueError(
                    "If using Poisson sampling, sample_size should get passed to the PrivacyEngine."
                )

            # Number of empty batches follows a geometric distribution
            # Planck is the same distribution but its parameter is the (negative) log of the geometric's parameter
            self._poisson_empty_batches_distribution = planck(
                -math.log(1 - self.sample_rate) * self.sample_size)

        if noise_multiplier is None:
            if target_epsilon is None or target_delta is None or epochs is None:
                raise ValueError(
                    "If noise_multiplier is not specified, (target_epsilon, target_delta, epochs) should be given to the engine."
                )
            self.noise_multiplier = get_noise_multiplier(
                target_epsilon, target_delta, self.sample_rate, epochs, alphas)
        else:
            self.noise_multiplier = noise_multiplier

        self.max_grad_norm = max_grad_norm
        self.alphas = alphas
        self.target_delta = target_delta
        self.secure_rng = secure_rng
        self.batch_first = batch_first
        self.misc_settings = misc_settings
        self.n_replicas = n_replicas
        self.rank = rank

        self.device = next(module.parameters()).device
        self.steps = 0

        if self.noise_multiplier < 0:
            raise ValueError(
                f"noise_multiplier={self.noise_multiplier} is not a valid value. Please provide a float >= 0."
            )

        if isinstance(self.max_grad_norm, float) and self.max_grad_norm <= 0:
            raise ValueError(
                f"max_grad_norm={self.max_grad_norm} is not a valid value. Please provide a float > 0."
            )

        if not self.target_delta:
            if self.sample_size:
                warnings.warn(
                    "target_delta unset. Setting it to an order of magnitude less than 1/sample_size."
                )
                self.target_delta = 0.1 * (1 / self.sample_size)
            else:
                raise ValueError("Please provide a target_delta.")

        if self.secure_rng:
            self.seed = None
            try:
                import torchcsprng as csprng
            except ImportError as e:
                msg = (
                    "To use secure RNG, you must install the torchcsprng package! "
                    "Check out the instructions here: https://github.com/pytorch/csprng#installation"
                )
                raise ImportError(msg) from e

            self.seed = None
            self.random_number_generator = csprng.create_random_device_generator(
                "/dev/urandom")
        else:
            warnings.warn(
                "Secure RNG turned off. This is perfectly fine for experimentation as it allows "
                "for much faster training performance, but remember to turn it on and retrain "
                "one last time before production with ``secure_rng`` turned on."
            )
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                self.seed = int.from_bytes(os.urandom(8),
                                           byteorder="big",
                                           signed=True)
                self.random_number_generator = self._set_seed(self.seed)

        self.validator = DPModelInspector()
        self.clipper = None  # lazy initialization in attach