示例#1
0
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership in the training set of the target estimator.

        :param x: Input records to attack.
        :param y: True labels for `x`.
        :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just
                              the predicted class
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member,
                 or class probabilities.
        """
        if y is None:  # pragma: no cover
            raise ValueError(
                "MembershipInferenceBlackBox requires true labels `y`.")

        if self.estimator.input_shape is not None:  # pragma: no cover
            if self.estimator.input_shape[0] != x.shape[1]:
                raise ValueError(
                    "Shape of x does not match input_shape of estimator")

        if "probabilities" in kwargs:
            probabilities = kwargs.get("probabilities")
        else:
            probabilities = False

        if not self._regressor_model:
            y = check_and_transform_label_format(y,
                                                 len(np.unique(y)),
                                                 return_one_hot=True)

        if y is None:
            raise ValueError("None value detected.")

        if y.shape[0] != x.shape[0]:  # pragma: no cover
            raise ValueError("Number of rows in x and y do not match")

        if self.input_type == "prediction":
            features = self.estimator.predict(x).astype(np.float32)
        elif self.input_type == "loss":
            features = self.estimator.compute_loss(x, y).astype(
                np.float32).reshape(-1, 1)

        if self._regressor_model:
            y = y.astype(np.float32).reshape(-1, 1)

        if self.default_model and self.attack_model_type == "nn":
            import torch  # lgtm [py/repeated-import] lgtm [py/import-and-import-from]
            from torch.utils.data import DataLoader  # lgtm [py/repeated-import]
            from art.utils import to_cuda, from_cuda

            self.attack_model.eval()  # type: ignore
            inferred: Optional[np.ndarray] = None
            test_set = self._get_attack_dataset(f_1=features, f_2=y)
            test_loader = DataLoader(test_set,
                                     batch_size=self.batch_size,
                                     shuffle=False,
                                     num_workers=0)
            for input1, input2, _ in test_loader:
                input1, input2 = to_cuda(input1), to_cuda(input2)
                outputs = self.attack_model(input1, input2)  # type: ignore
                if not probabilities:
                    predicted = torch.round(outputs)
                else:
                    predicted = outputs
                predicted = from_cuda(predicted)

                if inferred is None:
                    inferred = predicted.detach().numpy()
                else:
                    inferred = np.vstack(
                        (inferred, predicted.detach().numpy()))

            if inferred is not None:
                if not probabilities:
                    inferred_return = np.round(inferred)
                else:
                    inferred_return = inferred
            else:  # pragma: no cover
                raise ValueError("No data available.")
        elif not self.default_model:
            # assumes the predict method of the supplied model returns probabilities
            pred = self.attack_model.predict(np.c_[features,
                                                   y])  # type: ignore
            if probabilities:
                inferred_return = pred
            else:
                inferred_return = np.round(pred)
        else:
            pred = self.attack_model.predict_proba(np.c_[features,
                                                         y])  # type: ignore
            if probabilities:
                inferred_return = pred[:, [1]]
            else:
                inferred_return = np.round(pred[:, [1]])

        return inferred_return
    def fit(  # pylint: disable=W0613
        self, x: np.ndarray, y: np.ndarray, test_x: np.ndarray, test_y: np.ndarray, **kwargs
    ):
        """
        Infer membership in the training set of the target estimator.

        :param x: Records that were used in training the target model.
        :param y: True labels for `x`.
        :param test_x: Records that were not used in training the target model.
        :param test_y: True labels for `test_x`.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        if self.estimator.input_shape is not None:
            if self.estimator.input_shape[0] != x.shape[1]:
                raise ValueError("Shape of x does not match input_shape of classifier")
            if self.estimator.input_shape[0] != test_x.shape[1]:
                raise ValueError("Shape of test_x does not match input_shape of classifier")

        y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True)
        test_y = check_and_transform_label_format(test_y, len(np.unique(test_y)), return_one_hot=True)

        if y.shape[0] != x.shape[0]:
            raise ValueError("Number of rows in x and y do not match")
        if test_y.shape[0] != test_x.shape[0]:
            raise ValueError("Number of rows in test_x and test_y do not match")

        # Create attack dataset
        # uses final probabilities/logits
        if self.input_type == "prediction":
            # members
            features = self.estimator.predict(x).astype(np.float32)
            # non-members
            test_features = self.estimator.predict(test_x).astype(np.float32)
        # only for models with loss
        elif self.input_type == "loss":
            if NeuralNetworkMixin not in type(self.estimator).__mro__:
                raise TypeError("loss input_type can only be used with neural networks")
            # members
            features = self.estimator.compute_loss(x, y).astype(np.float32).reshape(-1, 1)
            # non-members
            test_features = self.estimator.compute_loss(test_x, test_y).astype(np.float32).reshape(-1, 1)
        else:
            raise ValueError("Illegal value for parameter `input_type`.")

        # members
        labels = np.ones(x.shape[0])
        # non-members
        test_labels = np.zeros(test_x.shape[0])

        x_1 = np.concatenate((features, test_features))
        x_2 = np.concatenate((y, test_y))
        y_new = np.concatenate((labels, test_labels))

        if self.default_model and self.attack_model_type == "nn":
            import torch  # lgtm [py/repeated-import]
            import torch.nn as nn  # lgtm [py/repeated-import]
            import torch.optim as optim  # lgtm [py/repeated-import]
            from torch.utils.data import DataLoader  # lgtm [py/repeated-import]
            from art.utils import to_cuda

            loss_fn = nn.BCELoss()
            optimizer = optim.Adam(self.attack_model.parameters(), lr=self.learning_rate)  # type: ignore

            attack_train_set = self._get_attack_dataset(f_1=x_1, f_2=x_2, label=y_new)
            train_loader = DataLoader(attack_train_set, batch_size=self.batch_size, shuffle=True, num_workers=0)

            self.attack_model = to_cuda(self.attack_model)  # type: ignore
            self.attack_model.train()  # type: ignore

            for _ in range(self.epochs):
                for (input1, input2, targets) in train_loader:
                    input1, input2, targets = to_cuda(input1), to_cuda(input2), to_cuda(targets)
                    _, input2 = torch.autograd.Variable(input1), torch.autograd.Variable(input2)
                    targets = torch.autograd.Variable(targets)

                    optimizer.zero_grad()
                    outputs = self.attack_model(input1, input2)  # type: ignore
                    loss = loss_fn(outputs, targets.unsqueeze(1))  # lgtm [py/call-to-non-callable]

                    loss.backward()
                    optimizer.step()
        else:
            y_ready = check_and_transform_label_format(y_new, len(np.unique(y_new)), return_one_hot=False)
            self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore
示例#3
0
    def fit(  # pylint: disable=W0613
            self,
            x: np.ndarray,
            y: np.ndarray,
            test_x: np.ndarray,
            test_y: np.ndarray,
            pred: Optional[np.ndarray] = None,
            test_pred: Optional[np.ndarray] = None,
            **kwargs):
        """
        Train the attack model.

        :param x: Records that were used in training the target estimator.
        :param y: True labels for `x`.
        :param test_x: Records that were not used in training the target estimator.
        :param test_y: True labels for `test_x`.
        :param pred: Estimator predictions for the records, if not supplied will be generated by calling the estimators'
                     `predict` function. Only relevant for input_type='prediction'.
        :param test_pred: Estimator predictions for the test records, if not supplied will be generated by calling the
                          estimators' `predict` function. Only relevant for input_type='prediction'.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        if self.estimator.input_shape is not None:
            if self.estimator.input_shape[0] != x.shape[1]:  # pragma: no cover
                raise ValueError(
                    "Shape of x does not match input_shape of estimator")
            if self.estimator.input_shape[0] != test_x.shape[
                    1]:  # pragma: no cover
                raise ValueError(
                    "Shape of test_x does not match input_shape of estimator")

        if not self._regressor_model:
            y = check_and_transform_label_format(y,
                                                 len(np.unique(y)),
                                                 return_one_hot=True)
            test_y = check_and_transform_label_format(test_y,
                                                      len(np.unique(test_y)),
                                                      return_one_hot=True)

        if y.shape[0] != x.shape[0]:  # pragma: no cover
            raise ValueError("Number of rows in x and y do not match")
        if test_y.shape[0] != test_x.shape[0]:  # pragma: no cover
            raise ValueError(
                "Number of rows in test_x and test_y do not match")

        # Create attack dataset
        # uses final probabilities/logits
        if self.input_type == "prediction":
            # members
            if pred is None:
                features = self.estimator.predict(x).astype(np.float32)
            else:
                features = pred.astype(np.float32)
            # non-members
            if test_pred is None:
                test_features = self.estimator.predict(test_x).astype(
                    np.float32)
            else:
                test_features = test_pred.astype(np.float32)
        # only for models with loss
        elif self.input_type == "loss":
            # members
            features = self.estimator.compute_loss(x, y).astype(
                np.float32).reshape(-1, 1)
            # non-members
            test_features = self.estimator.compute_loss(test_x, test_y).astype(
                np.float32).reshape(-1, 1)
        else:  # pragma: no cover
            raise ValueError("Illegal value for parameter `input_type`.")

        # members
        labels = np.ones(x.shape[0])
        # non-members
        test_labels = np.zeros(test_x.shape[0])

        x_1 = np.concatenate((features, test_features))
        x_2 = np.concatenate((y, test_y))
        y_new = np.concatenate((labels, test_labels))

        if self._regressor_model:
            x_2 = x_2.astype(np.float32).reshape(-1, 1)

        if self.default_model and self.attack_model_type == "nn":
            import torch  # lgtm [py/repeated-import] lgtm [py/import-and-import-from]
            from torch import nn  # lgtm [py/repeated-import]
            from torch import optim  # lgtm [py/repeated-import]
            from torch.utils.data import DataLoader  # lgtm [py/repeated-import]
            from art.utils import to_cuda

            loss_fn = nn.BCELoss()
            optimizer = optim.Adam(self.attack_model.parameters(),
                                   lr=self.learning_rate)  # type: ignore

            attack_train_set = self._get_attack_dataset(f_1=x_1,
                                                        f_2=x_2,
                                                        label=y_new)
            train_loader = DataLoader(attack_train_set,
                                      batch_size=self.batch_size,
                                      shuffle=True,
                                      num_workers=0)

            self.attack_model = to_cuda(self.attack_model)  # type: ignore
            self.attack_model.train()  # type: ignore

            for _ in range(self.epochs):
                for (input1, input2, targets) in train_loader:
                    input1, input2, targets = to_cuda(input1), to_cuda(
                        input2), to_cuda(targets)
                    _, input2 = torch.autograd.Variable(
                        input1), torch.autograd.Variable(input2)
                    targets = torch.autograd.Variable(targets)

                    optimizer.zero_grad()
                    outputs = self.attack_model(input1, input2)  # type: ignore
                    loss = loss_fn(
                        outputs,
                        targets.unsqueeze(1))  # lgtm [py/call-to-non-callable]

                    loss.backward()
                    optimizer.step()
        else:
            y_ready = check_and_transform_label_format(y_new,
                                                       len(np.unique(y_new)),
                                                       return_one_hot=False)
            self.attack_model.fit(np.c_[x_1, x_2],
                                  y_ready.ravel())  # type: ignore
    def infer(self,
              x: np.ndarray,
              y: Optional[np.ndarray] = None,
              **kwargs) -> np.ndarray:
        """
        Infer membership in the training set of the target estimator.

        :param x: Input records to attack.
        :param y: True labels for `x`.
        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
        """
        if y is None:
            raise ValueError(
                "MembershipInferenceBlackBox requires true labels `y`.")

        if self.estimator.input_shape[0] != x.shape[1]:
            raise ValueError(
                "Shape of x does not match input_shape of classifier")

        y = check_and_transform_label_format(y,
                                             len(np.unique(y)),
                                             return_one_hot=True)

        if y.shape[0] != x.shape[0]:
            raise ValueError("Number of rows in x and y do not match")

        if self.input_type == "prediction":
            features = self.estimator.predict(x).astype(np.float32)
        elif self.input_type == "loss":
            features = self.estimator.loss(x, y).astype(np.float32).reshape(
                -1, 1)

        if self.default_model and self.attack_model_type == "nn":
            import torch  # lgtm [py/repeated-import]
            from torch.utils.data import DataLoader  # lgtm [py/repeated-import]
            from art.utils import to_cuda, from_cuda

            self.attack_model.eval()
            inferred = None
            test_set = self._get_attack_dataset(f_1=features, f_2=y)
            test_loader = DataLoader(test_set,
                                     batch_size=self.batch_size,
                                     shuffle=True,
                                     num_workers=0)
            for input1, input2, _ in test_loader:
                input1, input2 = to_cuda(input1), to_cuda(input2)
                outputs = self.attack_model(input1, input2)
                predicted = torch.round(outputs)
                predicted = from_cuda(predicted)

                if inferred is None:
                    inferred = predicted.detach().numpy()
                else:
                    inferred = np.vstack(
                        (inferred, predicted.detach().numpy()))
            inferred = inferred.reshape(-1).astype(np.int)
        else:
            inferred = np.array([
                np.argmax(arr)
                for arr in self.attack_model.predict(np.c_[features, y])
            ])
        return inferred