def detect_poison(self, **kwargs) -> Tuple[dict, np.ndarray]:
        """
        Returns poison detected and a report.

        :param kwargs: A dictionary of detection-specific parameters.
        :return: (report, is_clean_lst):
                where a report is a dict object that contains information specified by the provenance detection method
                where is_clean is a list, where is_clean_lst[i]=1 means that x_train[i]
                there is clean and is_clean_lst[i]=0, means that x_train[i] was classified as poison.
        :rtype: `tuple`
        """
        self.set_params(**kwargs)

        if self.x_val is None:
            report = self.detect_poison_untrusted()
        else:
            report = self.detect_poison_partially_trusted()

        n_train = len(self.x_train)
        indices_by_provenance = segment_by_class(np.arange(n_train),
                                                 self.p_train,
                                                 self.num_devices)
        self.is_clean_lst = np.array([1] * n_train)

        for device in report:
            self.is_clean_lst[indices_by_provenance[device]] = 0
        self.assigned_clean_by_device = segment_by_class(
            np.array(self.is_clean_lst), self.p_train, self.num_devices)

        return report, self.is_clean_lst
    def detect_poison(self, **kwargs) -> Tuple[dict, List[int]]:
        """
        Returns poison detected and a report.

        :return: (report, is_clean_lst):
                where a report is a dictionary containing the index as keys the outlier score of suspected poisons as
                values where is_clean is a list, where is_clean_lst[i]=1 means that x_train[i] there is clean and
                is_clean_lst[i]=0, means that x_train[i] was classified as poison.
        """
        self.set_params(**kwargs)

        if self.classifier.layer_names is not None:
            nb_layers = len(self.classifier.layer_names)
        else:
            raise ValueError("No layer names identified.")
        features_x_poisoned = self.classifier.get_activations(
            self.x_train, layer=nb_layers - 1, batch_size=self.batch_size)

        features_split = segment_by_class(features_x_poisoned,
                                          self.y_train_sparse,
                                          self.classifier.nb_classes)
        score_by_class = []
        keep_by_class = []

        for idx, feature in enumerate(features_split):
            # Check for empty list
            if len(feature):  # pylint: disable=C1801
                score = SpectralSignatureDefense.spectral_signature_scores(
                    np.vstack(feature))
                score_cutoff = np.quantile(
                    score,
                    max(1 - self.eps_multiplier * self.expected_pp_poison,
                        0.0))
                score_by_class.append(score)
                keep_by_class.append(score < score_cutoff)
            else:
                score_by_class.append([0])
                keep_by_class.append([True])

        base_indices_by_class = segment_by_class(
            np.arange(len(self.y_train_sparse)),
            self.y_train_sparse,
            self.classifier.nb_classes,
        )
        is_clean_lst = [0] * len(self.y_train_sparse)
        report = {}

        for keep_booleans, all_scores, indices in zip(keep_by_class,
                                                      score_by_class,
                                                      base_indices_by_class):
            for keep_boolean, all_score, idx in zip(keep_booleans, all_scores,
                                                    indices):
                if keep_boolean:
                    is_clean_lst[idx] = 1
                else:
                    report[idx] = all_score[0]

        return report, is_clean_lst
    def detect_poison_untrusted(self, **kwargs) -> Dict[int, float]:
        """
        Detect poison given no trusted validation data

        :return: dictionary where keys are suspected poisonous device indices and values are performance differences
        """
        self.set_params(**kwargs)

        suspected = {}
        (
            train_data,
            valid_data,
            train_labels,
            valid_labels,
            train_prov,
            valid_prov,
        ) = train_test_split(self.x_train,
                             self.y_train,
                             self.p_train,
                             test_size=self.pp_valid)

        train_segments = segment_by_class(train_data, train_prov,
                                          self.num_devices)
        valid_segments = segment_by_class(valid_data, valid_prov,
                                          self.num_devices)

        for device_idx, (train_segment, valid_segment) in enumerate(
                zip(train_segments, valid_segments)):
            filtered_data, filtered_labels = self.filter_input(
                train_data, train_labels, train_segment)

            unfiltered_model = deepcopy(self.classifier)
            filtered_model = deepcopy(self.classifier)

            unfiltered_model.fit(train_data, train_labels)
            filtered_model.fit(filtered_data, filtered_labels)

            valid_non_device_data, valid_non_device_labels = self.filter_input(
                valid_data, valid_labels, valid_segment)
            var_w = performance_diff(
                filtered_model,
                unfiltered_model,
                valid_non_device_data,
                valid_non_device_labels,
                perf_function=self.perf_func,
            )

            if self.eps < var_w:
                suspected[device_idx] = var_w
                train_data = filtered_data
                train_labels = filtered_labels
                valid_data = valid_non_device_data
                valid_labels = valid_non_device_labels

        return suspected
示例#4
0
    def test_segment_by_class(self):
        data = np.array([[3, 2], [9, 2], [4, 0], [9, 0]])
        classes = to_categorical(np.array([2, 1, 0, 1]))
        num_classes = 3
        segments = segment_by_class(data, classes, num_classes)
        self.assertEqual(len(segments), num_classes)
        self.assertEqual(len(segments[1]), 2)
        self.assertTrue(np.all(np.equal(segments[0], np.array([data[2]]))))
        self.assertTrue(np.all(np.equal(segments[1], np.array([data[1], data[3]]))))
        self.assertTrue(np.all(np.equal(segments[2], np.array([data[0]]))))

        num_classes = 4
        segments = segment_by_class(data, classes, num_classes)
        self.assertEqual(len(segments), num_classes)
示例#5
0
    def evaluate_defence(self, is_clean, **kwargs):
        """
        Returns confusion matrix.

        :param is_clean: Ground truth, where is_clean[i]=1 means that x_train[i] is clean and is_clean[i]=0 means
                         x_train[i] is poisonous.
        :type is_clean: :class `np.ndarray`
        :param kwargs: A dictionary of defence-specific parameters.
        :type kwargs: `dict`
        :return: JSON object with confusion matrix.
        :rtype: `jsonObject`
        """
        if is_clean is None or is_clean.size == 0:
            raise ValueError(
                "is_clean was not provided while invoking evaluate_defence.")
        self.set_params(**kwargs)

        if not self.assigned_clean_by_device:
            self.detect_poison()

        self.is_clean_by_device = segment_by_class(is_clean, self.p_train,
                                                   self.num_devices)
        self.errors_by_device, conf_matrix_json = self.evaluator.analyze_correctness(
            self.assigned_clean_by_device, self.is_clean_by_device)
        return conf_matrix_json
    def detect_poison_partially_trusted(self, **kwargs) -> Dict[int, float]:
        """
        Detect poison given trusted validation data

        :return: dictionary where keys are suspected poisonous device indices and values are performance differences
        """
        self.set_params(**kwargs)

        if self.x_val is None or self.y_val is None:
            raise ValueError("Trusted data unavailable.")

        suspected = {}
        unfiltered_data = np.copy(self.x_train)
        unfiltered_labels = np.copy(self.y_train)

        segments = segment_by_class(self.x_train, self.p_train, self.num_devices)
        for device_idx, segment in enumerate(segments):
            filtered_data, filtered_labels = self.filter_input(unfiltered_data, unfiltered_labels, segment)

            unfiltered_model = deepcopy(self.classifier)
            filtered_model = deepcopy(self.classifier)

            unfiltered_model.fit(unfiltered_data, unfiltered_labels)
            filtered_model.fit(filtered_data, filtered_labels)

            var_w = performance_diff(
                filtered_model, unfiltered_model, self.x_val, self.y_val, perf_function=self.perf_func,
            )
            if self.eps < var_w:
                suspected[device_idx] = var_w
                unfiltered_data = filtered_data
                unfiltered_labels = filtered_labels

        return suspected
    def evaluate_defence(self, is_clean: np.ndarray, **kwargs) -> str:
        """
        If ground truth is known, this function returns a confusion matrix in the form of a JSON object.

        :param is_clean: Ground truth, where is_clean[i]=1 means that x_train[i] is clean and is_clean[i]=0 means
                         x_train[i] is poisonous.
        :param kwargs: A dictionary of defence-specific parameters.
        :return: JSON object with confusion matrix.
        """
        if is_clean is None or is_clean.size == 0:
            raise ValueError("is_clean was not provided while invoking evaluate_defence.")
        is_clean_by_class = segment_by_class(is_clean, self.y_train_sparse, self.classifier.nb_classes)
        _, predicted_clean = self.detect_poison()
        predicted_clean_by_class = segment_by_class(predicted_clean, self.y_train_sparse, self.classifier.nb_classes)

        _, conf_matrix_json = self.evaluator.analyze_correctness(predicted_clean_by_class, is_clean_by_class)

        return conf_matrix_json
示例#8
0
    def _segment_by_class(self, data: np.ndarray, features: np.ndarray) -> List[np.ndarray]:
        """
        Returns segmented data according to specified features.

        :param data: Data to be segmented.
        :param features: Features used to segment data, e.g., segment according to predicted label or to `y_train`.
        :return: Segmented data according to specified features.
        """
        n_classes = self.classifier.nb_classes
        return segment_by_class(data, features, n_classes)
示例#9
0
    def _segment_by_class(self, data, features):
        """
        Returns segmented data according to specified features.

        :param data: to be segmented
        :type data: `np.ndarray`
        :param features: features used to segment data, e.g., segment according to predicted label or to `y_train`
        :type features: `np.ndarray`
        :return: segmented data according to specified features.
        :rtype: `list`
        """
        n_classes = self.classifier.nb_classes()
        return segment_by_class(data, features, n_classes)