def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Infer membership in the training set of the target estimator. :param x: Input records to attack. :param y: True labels for `x`. :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just the predicted class :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member, or class probabilities. """ if y is None: # pragma: no cover raise ValueError( "MembershipInferenceBlackBox requires true labels `y`.") if self.estimator.input_shape is not None: # pragma: no cover if self.estimator.input_shape[0] != x.shape[1]: raise ValueError( "Shape of x does not match input_shape of estimator") if "probabilities" in kwargs: probabilities = kwargs.get("probabilities") else: probabilities = False if not self._regressor_model: y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True) if y is None: raise ValueError("None value detected.") if y.shape[0] != x.shape[0]: # pragma: no cover raise ValueError("Number of rows in x and y do not match") if self.input_type == "prediction": features = self.estimator.predict(x).astype(np.float32) elif self.input_type == "loss": features = self.estimator.compute_loss(x, y).astype( np.float32).reshape(-1, 1) if self._regressor_model: y = y.astype(np.float32).reshape(-1, 1) if self.default_model and self.attack_model_type == "nn": import torch # lgtm [py/repeated-import] lgtm [py/import-and-import-from] from torch.utils.data import DataLoader # lgtm [py/repeated-import] from art.utils import to_cuda, from_cuda self.attack_model.eval() # type: ignore inferred: Optional[np.ndarray] = None test_set = self._get_attack_dataset(f_1=features, f_2=y) test_loader = DataLoader(test_set, batch_size=self.batch_size, shuffle=False, num_workers=0) for input1, input2, _ in test_loader: input1, input2 = to_cuda(input1), to_cuda(input2) outputs = self.attack_model(input1, input2) # type: ignore if not probabilities: predicted = torch.round(outputs) else: predicted = outputs predicted = from_cuda(predicted) if inferred is None: inferred = predicted.detach().numpy() else: inferred = np.vstack( (inferred, predicted.detach().numpy())) if inferred is not None: if not probabilities: inferred_return = np.round(inferred) else: inferred_return = inferred else: # pragma: no cover raise ValueError("No data available.") elif not self.default_model: # assumes the predict method of the supplied model returns probabilities pred = self.attack_model.predict(np.c_[features, y]) # type: ignore if probabilities: inferred_return = pred else: inferred_return = np.round(pred) else: pred = self.attack_model.predict_proba(np.c_[features, y]) # type: ignore if probabilities: inferred_return = pred[:, [1]] else: inferred_return = np.round(pred[:, [1]]) return inferred_return
def fit( # pylint: disable=W0613 self, x: np.ndarray, y: np.ndarray, test_x: np.ndarray, test_y: np.ndarray, **kwargs ): """ Infer membership in the training set of the target estimator. :param x: Records that were used in training the target model. :param y: True labels for `x`. :param test_x: Records that were not used in training the target model. :param test_y: True labels for `test_x`. :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member. """ if self.estimator.input_shape is not None: if self.estimator.input_shape[0] != x.shape[1]: raise ValueError("Shape of x does not match input_shape of classifier") if self.estimator.input_shape[0] != test_x.shape[1]: raise ValueError("Shape of test_x does not match input_shape of classifier") y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True) test_y = check_and_transform_label_format(test_y, len(np.unique(test_y)), return_one_hot=True) if y.shape[0] != x.shape[0]: raise ValueError("Number of rows in x and y do not match") if test_y.shape[0] != test_x.shape[0]: raise ValueError("Number of rows in test_x and test_y do not match") # Create attack dataset # uses final probabilities/logits if self.input_type == "prediction": # members features = self.estimator.predict(x).astype(np.float32) # non-members test_features = self.estimator.predict(test_x).astype(np.float32) # only for models with loss elif self.input_type == "loss": if NeuralNetworkMixin not in type(self.estimator).__mro__: raise TypeError("loss input_type can only be used with neural networks") # members features = self.estimator.compute_loss(x, y).astype(np.float32).reshape(-1, 1) # non-members test_features = self.estimator.compute_loss(test_x, test_y).astype(np.float32).reshape(-1, 1) else: raise ValueError("Illegal value for parameter `input_type`.") # members labels = np.ones(x.shape[0]) # non-members test_labels = np.zeros(test_x.shape[0]) x_1 = np.concatenate((features, test_features)) x_2 = np.concatenate((y, test_y)) y_new = np.concatenate((labels, test_labels)) if self.default_model and self.attack_model_type == "nn": import torch # lgtm [py/repeated-import] import torch.nn as nn # lgtm [py/repeated-import] import torch.optim as optim # lgtm [py/repeated-import] from torch.utils.data import DataLoader # lgtm [py/repeated-import] from art.utils import to_cuda loss_fn = nn.BCELoss() optimizer = optim.Adam(self.attack_model.parameters(), lr=self.learning_rate) # type: ignore attack_train_set = self._get_attack_dataset(f_1=x_1, f_2=x_2, label=y_new) train_loader = DataLoader(attack_train_set, batch_size=self.batch_size, shuffle=True, num_workers=0) self.attack_model = to_cuda(self.attack_model) # type: ignore self.attack_model.train() # type: ignore for _ in range(self.epochs): for (input1, input2, targets) in train_loader: input1, input2, targets = to_cuda(input1), to_cuda(input2), to_cuda(targets) _, input2 = torch.autograd.Variable(input1), torch.autograd.Variable(input2) targets = torch.autograd.Variable(targets) optimizer.zero_grad() outputs = self.attack_model(input1, input2) # type: ignore loss = loss_fn(outputs, targets.unsqueeze(1)) # lgtm [py/call-to-non-callable] loss.backward() optimizer.step() else: y_ready = check_and_transform_label_format(y_new, len(np.unique(y_new)), return_one_hot=False) self.attack_model.fit(np.c_[x_1, x_2], y_ready) # type: ignore
def fit( # pylint: disable=W0613 self, x: np.ndarray, y: np.ndarray, test_x: np.ndarray, test_y: np.ndarray, pred: Optional[np.ndarray] = None, test_pred: Optional[np.ndarray] = None, **kwargs): """ Train the attack model. :param x: Records that were used in training the target estimator. :param y: True labels for `x`. :param test_x: Records that were not used in training the target estimator. :param test_y: True labels for `test_x`. :param pred: Estimator predictions for the records, if not supplied will be generated by calling the estimators' `predict` function. Only relevant for input_type='prediction'. :param test_pred: Estimator predictions for the test records, if not supplied will be generated by calling the estimators' `predict` function. Only relevant for input_type='prediction'. :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member. """ if self.estimator.input_shape is not None: if self.estimator.input_shape[0] != x.shape[1]: # pragma: no cover raise ValueError( "Shape of x does not match input_shape of estimator") if self.estimator.input_shape[0] != test_x.shape[ 1]: # pragma: no cover raise ValueError( "Shape of test_x does not match input_shape of estimator") if not self._regressor_model: y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True) test_y = check_and_transform_label_format(test_y, len(np.unique(test_y)), return_one_hot=True) if y.shape[0] != x.shape[0]: # pragma: no cover raise ValueError("Number of rows in x and y do not match") if test_y.shape[0] != test_x.shape[0]: # pragma: no cover raise ValueError( "Number of rows in test_x and test_y do not match") # Create attack dataset # uses final probabilities/logits if self.input_type == "prediction": # members if pred is None: features = self.estimator.predict(x).astype(np.float32) else: features = pred.astype(np.float32) # non-members if test_pred is None: test_features = self.estimator.predict(test_x).astype( np.float32) else: test_features = test_pred.astype(np.float32) # only for models with loss elif self.input_type == "loss": # members features = self.estimator.compute_loss(x, y).astype( np.float32).reshape(-1, 1) # non-members test_features = self.estimator.compute_loss(test_x, test_y).astype( np.float32).reshape(-1, 1) else: # pragma: no cover raise ValueError("Illegal value for parameter `input_type`.") # members labels = np.ones(x.shape[0]) # non-members test_labels = np.zeros(test_x.shape[0]) x_1 = np.concatenate((features, test_features)) x_2 = np.concatenate((y, test_y)) y_new = np.concatenate((labels, test_labels)) if self._regressor_model: x_2 = x_2.astype(np.float32).reshape(-1, 1) if self.default_model and self.attack_model_type == "nn": import torch # lgtm [py/repeated-import] lgtm [py/import-and-import-from] from torch import nn # lgtm [py/repeated-import] from torch import optim # lgtm [py/repeated-import] from torch.utils.data import DataLoader # lgtm [py/repeated-import] from art.utils import to_cuda loss_fn = nn.BCELoss() optimizer = optim.Adam(self.attack_model.parameters(), lr=self.learning_rate) # type: ignore attack_train_set = self._get_attack_dataset(f_1=x_1, f_2=x_2, label=y_new) train_loader = DataLoader(attack_train_set, batch_size=self.batch_size, shuffle=True, num_workers=0) self.attack_model = to_cuda(self.attack_model) # type: ignore self.attack_model.train() # type: ignore for _ in range(self.epochs): for (input1, input2, targets) in train_loader: input1, input2, targets = to_cuda(input1), to_cuda( input2), to_cuda(targets) _, input2 = torch.autograd.Variable( input1), torch.autograd.Variable(input2) targets = torch.autograd.Variable(targets) optimizer.zero_grad() outputs = self.attack_model(input1, input2) # type: ignore loss = loss_fn( outputs, targets.unsqueeze(1)) # lgtm [py/call-to-non-callable] loss.backward() optimizer.step() else: y_ready = check_and_transform_label_format(y_new, len(np.unique(y_new)), return_one_hot=False) self.attack_model.fit(np.c_[x_1, x_2], y_ready.ravel()) # type: ignore
def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Infer membership in the training set of the target estimator. :param x: Input records to attack. :param y: True labels for `x`. :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member. """ if y is None: raise ValueError( "MembershipInferenceBlackBox requires true labels `y`.") if self.estimator.input_shape[0] != x.shape[1]: raise ValueError( "Shape of x does not match input_shape of classifier") y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True) if y.shape[0] != x.shape[0]: raise ValueError("Number of rows in x and y do not match") if self.input_type == "prediction": features = self.estimator.predict(x).astype(np.float32) elif self.input_type == "loss": features = self.estimator.loss(x, y).astype(np.float32).reshape( -1, 1) if self.default_model and self.attack_model_type == "nn": import torch # lgtm [py/repeated-import] from torch.utils.data import DataLoader # lgtm [py/repeated-import] from art.utils import to_cuda, from_cuda self.attack_model.eval() inferred = None test_set = self._get_attack_dataset(f_1=features, f_2=y) test_loader = DataLoader(test_set, batch_size=self.batch_size, shuffle=True, num_workers=0) for input1, input2, _ in test_loader: input1, input2 = to_cuda(input1), to_cuda(input2) outputs = self.attack_model(input1, input2) predicted = torch.round(outputs) predicted = from_cuda(predicted) if inferred is None: inferred = predicted.detach().numpy() else: inferred = np.vstack( (inferred, predicted.detach().numpy())) inferred = inferred.reshape(-1).astype(np.int) else: inferred = np.array([ np.argmax(arr) for arr in self.attack_model.predict(np.c_[features, y]) ]) return inferred