示例#1
0
    def infer(
        self, attack_model_type: str = "nn", *args, **kwargs
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Alias method for attack().

        :param attack_model_type: Type of the attack model. On of "rf", "gb", "nn".
        :param args: Arguments of the attack.
        :param kwargs: Keyword arguments of the attack.
        :return: Two arrays holding the inferred membership status. The first array includes the results for the
        inferred membership status of the train data and the second includes the results for the test data, where 1
        indicates a member and 0 indicates non-member. The optimal attack would return only ones for the first array and
        only zeros for the second.
        """
        assert attack_model_type in ["rf", "gb", "nn"]

        attack = MembershipInferenceBlackBox(
            self.target_model.art_classifier, attack_model_type=attack_model_type
        )

        attack.fit(
            self.x_train[: self.attack_train_size],
            self.y_train[: self.attack_train_size],
            self.x_test[: self.attack_test_size],
            self.y_test[: self.attack_test_size],
        )

        inferred_train_data = attack.infer(
            self.x_train[self.attack_train_size :],
            self.y_train[self.attack_train_size :],
        )
        inferred_test_data = attack.infer(
            self.x_test[self.attack_test_size :], self.y_test[self.attack_test_size :]
        )

        return inferred_train_data, inferred_test_data
示例#2
0
def test_meminf_black_box_slice(art_warning, decision_tree_estimator, get_iris_dataset):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 0.6
            x[(x > 0.2) & (x <= 0.5)] = 0.35
            x[x <= 0.2] = 0.1

        values = [0.1, 0.35, 0.6]

        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # training data with attacked feature (after transformation)
        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)
        # test data with attacked feature (after transformation)
        x_test = np.concatenate((x_test_for_attack[:, :attack_feature], x_test_feature), axis=1)
        x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1)

        classifier = decision_tree_estimator()

        meminf_attack = MembershipInferenceBlackBox(classifier, attack_model_type="nn")
        attack_train_ratio = 0.5
        attack_train_size = int(len(x_train) * attack_train_ratio)
        attack_test_size = int(len(x_test) * attack_train_ratio)
        meminf_attack.fit(
            x_train[:attack_train_size],
            y_train_iris[:attack_train_size],
            x_test[:attack_test_size],
            y_test_iris[:attack_test_size],
        )
        attack = AttributeInferenceMembership(
            classifier, meminf_attack, attack_feature=slice(attack_feature, attack_feature + 1)
        )
        # infer attacked feature
        inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values)
        inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values)
        # check accuracy
        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
        assert 0.1 <= train_acc
        assert 0.1 <= test_acc

    except ARTTestException as e:
        art_warning(e)
示例#3
0
def test_black_box_one_hot_float(art_warning, get_iris_dataset):
    try:
        attack_feature = 2  # petal length

        # need to transform attacked feature into categorical
        def transform_feature(x):
            x[x > 0.5] = 2
            x[(x > 0.2) & (x <= 0.5)] = 1
            x[x <= 0.2] = 0

        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # transform to one-hot encoding
        num_columns = int(x_train_feature.max()) + 1
        train_one_hot = np.zeros((x_train_feature.size, num_columns))
        train_one_hot[np.arange(x_train_feature.size), x_train_feature.reshape(1, -1).astype(int)] = 1
        # training data with attacked feature (after transformation)
        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], train_one_hot), axis=1)
        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        y_train = np.array([np.argmax(y) for y in y_train_iris]).reshape(-1, 1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)
        # transform to one-hot encoding
        test_one_hot = np.zeros((x_test_feature.size, int(x_test_feature.max()) + 1))
        test_one_hot[np.arange(x_test_feature.size), x_test_feature.reshape(1, -1).astype(int)] = 1
        # test data with attacked feature (after transformation)
        x_test = np.concatenate((x_test_for_attack[:, :attack_feature], test_one_hot), axis=1)
        x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1)

        # scale before training
        scaler = StandardScaler().fit(x_train)
        x_test = scaler.transform(x_test).astype(np.float32)
        x_train = scaler.transform(x_train).astype(np.float32)
        # derive dataset for attack (after scaling)
        attack_feature = slice(attack_feature, attack_feature + 3)
        x_train_for_attack = np.delete(x_train, attack_feature, 1)
        x_test_for_attack = np.delete(x_test, attack_feature, 1)
        train_one_hot = x_train[:, attack_feature]
        test_one_hot = x_test[:, attack_feature]

        tree = DecisionTreeClassifier()
        tree.fit(x_train, y_train)
        classifier = ScikitlearnDecisionTreeClassifier(tree)

        meminf_attack = MembershipInferenceBlackBox(classifier, attack_model_type="nn")
        attack_train_ratio = 0.5
        attack_train_size = int(len(x_train) * attack_train_ratio)
        attack_test_size = int(len(x_test) * attack_train_ratio)
        meminf_attack.fit(
            x_train[:attack_train_size],
            y_train_iris[:attack_train_size],
            x_test[:attack_test_size],
            y_test_iris[:attack_test_size],
        )
        attack = AttributeInferenceMembership(classifier, meminf_attack, attack_feature=attack_feature)
        # infer attacked feature
        values = [[-0.559017, 1.7888544], [-0.47003216, 2.127514], [-1.1774395, 0.84930056]]
        inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values)
        inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values)
        # check accuracy
        train_acc = np.sum(
            np.all(np.around(inferred_train, decimals=3) == np.around(train_one_hot, decimals=3), axis=1)
        ) / len(inferred_train)
        test_acc = np.sum(
            np.all(np.around(inferred_test, decimals=3) == np.around(test_one_hot, decimals=3), axis=1)
        ) / len(inferred_test)
        assert 0.1 <= train_acc
        assert 0.1 <= test_acc

    except ARTTestException as e:
        art_warning(e)
示例#4
0
def test_meminf_black_box_regressor(art_warning, get_diabetes_dataset):
    try:
        attack_feature = 0  # age

        bins = [
            -0.96838121,
            -0.18102872,
            0.21264752,
            1.0,
        ]

        # need to transform attacked feature into categorical
        def transform_feature(x):
            for i in range(len(bins) - 1):
                x[(x >= bins[i]) & (x < bins[i + 1])] = i

        values = list(range(len(bins) - 1))

        (x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes) = get_diabetes_dataset
        # training data without attacked feature
        x_train_for_attack = np.delete(x_train_diabetes, attack_feature, 1)
        # only attacked feature
        x_train_feature = x_train_diabetes[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_train_feature)
        # training data with attacked feature (after transformation)
        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)

        # test data without attacked feature
        x_test_for_attack = np.delete(x_test_diabetes, attack_feature, 1)
        # only attacked feature
        x_test_feature = x_test_diabetes[:, attack_feature].copy().reshape(-1, 1)
        transform_feature(x_test_feature)
        # test data with attacked feature (after transformation)
        x_test = np.concatenate((x_test_for_attack[:, :attack_feature], x_test_feature), axis=1)
        x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1)

        from sklearn import linear_model

        regr_model = linear_model.LinearRegression()
        regr_model.fit(x_train_diabetes, y_train_diabetes)
        regressor = ScikitlearnRegressor(regr_model)

        meminf_attack = MembershipInferenceBlackBox(regressor, attack_model_type="rf", input_type="loss")
        attack_train_ratio = 0.5
        attack_train_size = int(len(x_train) * attack_train_ratio)
        attack_test_size = int(len(x_test) * attack_train_ratio)
        meminf_attack.fit(
            x_train[:attack_train_size],
            y_train_diabetes[:attack_train_size],
            x_test[:attack_test_size],
            y_test_diabetes[:attack_test_size],
        )
        attack = AttributeInferenceMembership(regressor, meminf_attack, attack_feature=attack_feature)
        # infer attacked feature
        inferred_train = attack.infer(x_train_for_attack, y_train_diabetes, values=values)
        inferred_test = attack.infer(x_test_for_attack, y_test_diabetes, values=values)
        # check accuracy
        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
        assert 0.1 <= train_acc
        assert 0.1 <= test_acc

    except ARTTestException as e:
        art_warning(e)
示例#5
0
    # x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid'))
    # x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid)

    # attack.calibrate_distance_threshold(x_train[100:300], y_train[100:300], x_valid[100:300], y_valid[100:300])
    # result = np.concatenate((attack.infer(x_train[:100], y_train[:100]), attack.infer(x_valid[:100], y_valid[:100])))
    # y_truth = np.concatenate(([1] * len(x_train[:100]), [0] * len(x_valid[:100])))
    # print('result:')
    # print('F1 score: ', metrics.f1_score(result, y_truth))
    # print('Accuracy score: ', metrics.accuracy_score(result, y_truth))
    # print('Recall score: ', metrics.recall_score(result, y_truth))
    # print('Precision score: ', metrics.precision_score(result, y_truth))
    attack = Attack(classifier)
    x_train, y_train = dataset_to_list(dataset.get_dataset('train'))
    x_train, y_train = to_numpy(torch.stack(x_train)), to_numpy(y_train)
    x_valid, y_valid = dataset_to_list(dataset.get_dataset('valid'))
    x_valid, y_valid = to_numpy(torch.stack(x_valid)), to_numpy(y_valid)

    x_train, y_train = x_train[:1000], y_train[:1000]
    x_valid, y_valid = x_valid[:1000], y_valid[:1000]

    attack.fit(x_train[100:], y_train[100:], x_valid[100:], y_valid[100:])
    result = np.concatenate((attack.infer(x_train[:100], y_train[:100]),
                             attack.infer(x_valid[:100], y_valid[:100])))
    y_truth = np.concatenate(
        ([1] * len(x_train[:100]), [0] * len(x_valid[:100])))
    print('result:')
    print('F1 score: ', metrics.f1_score(result, y_truth))
    print('Accuracy score: ', metrics.accuracy_score(result, y_truth))
    print('Recall score: ', metrics.recall_score(result, y_truth))
    print('Precision score: ', metrics.precision_score(result, y_truth))