def _non_zero_columns_search(array):
        """
        Given an array return a CArray with non zero column index
        """
        col_num = array.shape[1]
        non_zero_col = CArray([], dtype=int)
        for c in range(col_num):
            col = array[:, c]
            if col.any() == True:
                non_zero_col = non_zero_col.append(c)

        return non_zero_col
Exemplo n.º 2
0
def refine_roc(fpr, tpr, th):
    """Function to ensure the bounds of a ROC.

    The first and last points should be (0,0) and (1,1) respectively.

    Parameters
    ----------
    fpr : CArray
        False Positive Rates, as returned by `.BaseRoc.compute()`.
    tpr : CArray
        True Positive Rates, as returned by `.BaseRoc.compute()`.
    th : CArray
        Thresholds, as returned by `.BaseRoc.compute()`.

    """
    if tpr[0] != fpr[0] or tpr[0] != 0 or fpr[0] != 0:
        fpr = CArray(0).append(fpr)
        tpr = CArray(0).append(tpr)
        th = CArray(th[0] + 1e-3).append(th)
    if tpr[-1] != fpr[-1] or tpr[-1] != 1 or fpr[-1] != 1:
        fpr = fpr.append(1)
        tpr = tpr.append(1)
        th = th.append(th[-1] - 1e-3)
    return fpr, tpr, th
    def apply_feasible_manipulations(self, t, x: CArray) -> CArray:
        """
		Apply the padding practical manipulation on the input sample
		Parameters
		----------
		t : CArray
			the vector of manipulations in [0,1]
		x : CArray
			the input space sample to perturb

		Returns
		-------
		CArray:
			the adversarial malware
		"""
        byte_values = (t * 255).astype(np.int)
        x_adv = x.append(byte_values)
        return x_adv
Exemplo n.º 4
0
    def _test_evasion_multiclass(self, expected_x):

        # EVASION
        self.multiclass.verbose = 2

        if self.normalizer is not None:
            lb = self.normalizer.feature_range[0]
            ub = self.normalizer.feature_range[1]
        else:
            lb = None
            ub = None

        dmax = 2

        self.solver_params = {'eta': 1e-1, 'eta_min': 1.0}

        eva = CAttackEvasionPGDLS(classifier=self.multiclass,
                                  surrogate_classifier=self.multiclass,
                                  surrogate_data=self.ds,
                                  distance='l2',
                                  dmax=dmax,
                                  lb=lb,
                                  ub=ub,
                                  solver_params=self.solver_params,
                                  y_target=self.y_target)

        eva.verbose = 0  # 2

        # Points from class 2 region
        # p_idx = 0

        # Points from class 1 region
        # p_idx = 68

        # Points from class 3 region
        p_idx = 1  # Wrong classified point
        # p_idx = 53  # Evasion goes up usually

        # Points from class 0 region
        # p_idx = 49  # Wrong classified point
        # p_idx = 27  # Correctly classified point

        x0 = self.ds.X[p_idx, :]
        y0 = self.ds.Y[p_idx].item()

        x_seq = CArray.empty((0, x0.shape[1]))
        scores = CArray([])
        f_seq = CArray([])

        x = x0
        for d_idx, d in enumerate(range(0, dmax + 1)):

            self.logger.info("Evasion at dmax: " + str(d))

            eva.dmax = d
            x, f_opt = eva._run(x0=x0, y0=y0, x_init=x)
            y_pred, score = self.multiclass.predict(
                x, return_decision_function=True)
            f_seq = f_seq.append(f_opt)
            # not considering all iterations, just values at dmax
            # for all iterations, you should bring eva.x_seq and eva.f_seq
            x_seq = x_seq.append(x, axis=0)

            s = score[:, y0 if self.y_target is None else self.y_target]

            scores = scores.append(s)

        self.logger.info("Predicted label after evasion: " + str(y_pred))
        self.logger.info("Score after evasion: {:}".format(s))
        self.logger.info("Objective function after evasion: {:}".format(f_opt))

        # Compare optimal point with expected
        self.assert_array_almost_equal(eva.x_opt.todense().ravel(),
                                       expected_x,
                                       decimal=4)

        self._make_plots(x_seq, dmax, eva, x0, scores, f_seq)
Exemplo n.º 5
0
    def _make_plot(self, p_idx, eva, dmax):

        if self.make_figures is False:
            self.logger.debug("Skipping figures...")
            return

        x0 = self.ds.X[p_idx, :]
        y0 = self.ds.Y[p_idx].item()

        x_seq = CArray.empty((0, x0.shape[1]))
        scores = CArray([])
        f_seq = CArray([])

        x = x0
        for d_idx, d in enumerate(range(0, dmax + 1)):

            self.logger.info("Evasion at dmax: " + str(d))

            eva.dmax = d
            x, f_opt = eva._run(x0=x0, y0=y0, x_init=x)
            y_pred, score = self.multiclass.predict(
                x, return_decision_function=True)
            f_seq = f_seq.append(f_opt)
            # not considering all iterations, just values at dmax
            # for all iterations, you should bring eva.x_seq and eva.f_seq
            x_seq = x_seq.append(x, axis=0)

            s = score[:, y0 if self.y_target is None else self.y_target]

            scores = scores.append(s)

        self.logger.info("Predicted label after evasion: {:}".format(y_pred))
        self.logger.info("Score after evasion: {:}".format(s))
        self.logger.info("Objective function after evasion: {:}".format(f_opt))

        fig = CFigure(height=9, width=10, markersize=6, fontsize=12)

        # Get plot bounds, taking into account ds and evaded point path
        bounds_x, bounds_y = self.ds.get_bounds()
        min_x, max_x = bounds_x
        min_y, max_y = bounds_y
        min_x = min(min_x, x_seq[:, 0].min())
        max_x = max(max_x, x_seq[:, 0].max())
        min_y = min(min_y, x_seq[:, 1].min())
        max_y = max(max_y, x_seq[:, 1].max())
        ds_bounds = [(min_x, max_x), (min_y, max_y)]

        # Plotting multiclass decision regions
        fig.subplot(2, 2, 1)
        fig = self._plot_decision_function(fig, plot_background=True)

        fig.sp.plot_path(x_seq,
                         path_style='-',
                         start_style='o',
                         start_facecolor='w',
                         start_edgewidth=2,
                         final_style='o',
                         final_facecolor='k',
                         final_edgewidth=2)

        # plot distance constraint
        fig.sp.plot_fun(func=self._rescaled_distance,
                        multipoint=True,
                        plot_background=False,
                        n_grid_points=20,
                        levels_color='k',
                        grid_limits=ds_bounds,
                        levels=[0],
                        colorbar=False,
                        levels_linewidth=2.0,
                        levels_style=':',
                        alpha_levels=.4,
                        c=x0,
                        r=dmax)

        fig.sp.grid(linestyle='--', alpha=.5, zorder=0)

        # Plotting multiclass evasion objective function
        fig.subplot(2, 2, 2)

        fig = self._plot_decision_function(fig)

        fig.sp.plot_fgrads(eva.objective_function_gradient,
                           grid_limits=ds_bounds,
                           n_grid_points=20,
                           color='k',
                           alpha=.5)

        fig.sp.plot_path(x_seq,
                         path_style='-',
                         start_style='o',
                         start_facecolor='w',
                         start_edgewidth=2,
                         final_style='o',
                         final_facecolor='k',
                         final_edgewidth=2)

        # plot distance constraint
        fig.sp.plot_fun(func=self._rescaled_distance,
                        multipoint=True,
                        plot_background=False,
                        n_grid_points=20,
                        levels_color='w',
                        grid_limits=ds_bounds,
                        levels=[0],
                        colorbar=False,
                        levels_style=':',
                        levels_linewidth=2.0,
                        alpha_levels=.5,
                        c=x0,
                        r=dmax)

        fig.sp.plot_fun(lambda z: eva.objective_function(z),
                        multipoint=True,
                        grid_limits=ds_bounds,
                        colorbar=False,
                        n_grid_points=20,
                        plot_levels=False)

        fig.sp.grid(linestyle='--', alpha=.5, zorder=0)

        fig.subplot(2, 2, 3)
        if self.y_target is not None:
            fig.sp.title("Classifier Score for Target Class (Targ. Evasion)")
        else:
            fig.sp.title("Classifier Score for True Class (Indiscr. Evasion)")
        fig.sp.plot(scores)

        fig.sp.grid()
        fig.sp.xlim(0, dmax)
        fig.sp.xlabel("dmax")

        fig.subplot(2, 2, 4)
        fig.sp.title("Objective Function")
        fig.sp.plot(f_seq)

        fig.sp.grid()
        fig.sp.xlim(0, dmax)
        fig.sp.xlabel("dmax")

        fig.tight_layout()

        k_name = self.kernel.class_type if self.kernel is not None else 'lin'
        fig.savefig(
            fm.join(
                self.images_folder,
                "pgd_ls_reject_threshold_{:}c_kernel-{:}_target-{:}.pdf".
                format(self.ds.num_classes, k_name, self.y_target)))
    def compute_indices(self, dataset):
        """Compute training set and test set indices for each fold.

        Parameters
        ----------
        dataset : CDataset
            Dataset to split.

        Returns
        -------
        CDataSplitter
            Instance of the dataset splitter with tr/ts indices.

        """
        # Resetting indices
        self._tr_idx = []
        self._ts_idx = []
        self._tr_classes = []

        # If no custom number of training classes is selected,
        # use half of the classes
        n_train_classes = int(dataset.num_classes / 2) \
            if self.n_train_classes is None else int(self.n_train_classes)

        for fold in range(self.num_folds):

            if self.random_state is not None:
                # Adding 1234 to specified random state to get different folds
                random_state = self.random_state + 1234 * fold
            else:  # Random state is None, numpy will manage it
                random_state = self.random_state

            # only 'n_train_classes' random classes will be trained...
            # but now we randsample all classes to backup in case one or
            # more classes will be skipped for n_train_samples
            all_tr_classes = CArray.randsample(dataset.classes,
                                               dataset.num_classes,
                                               random_state=random_state)

            # Placeholder for indices of chosen training classes' samples
            train_samples_idx = CArray([], dtype=int)
            train_classes = CArray([], dtype=all_tr_classes.dtype)

            for train_class in all_tr_classes:
                if train_classes.size >= n_train_classes:
                    break  # we reached the desired number of training classes
                # Vector with indices of current client's samples
                client_samples_idx = CArray(
                    dataset.Y.find(dataset.Y == train_class))
                # Check if we have at least n_train_samples + 1 samples for
                # current client
                if client_samples_idx.size < self.n_train_samples + 1:
                    self.logger.warning("skipping class {:} for training set. "
                                        "{:} samples is less than {:}."
                                        "".format(train_class,
                                                  client_samples_idx.size,
                                                  self.n_train_samples + 1))
                    continue

                # Random subselection of training samples
                random_samples = CArray.randsample(client_samples_idx,
                                                   self.n_train_samples,
                                                   random_state=random_state)
                # Appending to vector of indices for training set a random
                # subselection of samples
                train_samples_idx = train_samples_idx.append(random_samples)
                # Adding class id
                train_classes = train_classes.append(train_class)

            # We store the sorted training classes list
            self._tr_classes += [train_classes.sort()]

            # Storing a sorted array of training samples indices
            train_samples_idx.sort(inplace=True)

            # All other samples go to test
            test_samples_idx = CArray([
                idx for idx in range(dataset.num_samples)
                if idx not in train_samples_idx
            ])

            self._tr_idx += [train_samples_idx]
            self._ts_idx += [test_samples_idx]

        return self
    def test_append(self):
        """Test for CArray.append() method."""
        self.logger.info("Test for CArray.append() method.")

        def _append_allaxis(array1, array2):

            self.logger.info("a1: {:} ".format(array1))
            self.logger.info("a2: {:} ".format(array2))

            # default append, axis None (ravelled)
            append_res = array1.append(array2)
            self.logger.info("a1.append(a2): {:}".format(append_res))
            # If axis is None, result should be ravelled...
            if array1.isdense:
                self.assertEqual(1, append_res.ndim)
            else:  # ... but if array is sparse let's check for shape[0]
                self.assertEqual(1, append_res.shape[0])
            # Let's check the elements of the resulting array
            a1_comp = array1.todense().ravel()
            a2_comp = array2.todense().ravel()
            if array1.issparse:  # result will be sparse, so always 2d
                a1_comp = a1_comp.atleast_2d()
                a2_comp = a2_comp.atleast_2d()
            self.assert_array_equal(append_res[:array1.size], a1_comp)
            self.assert_array_equal(append_res[array1.size:], a2_comp)

            array1_shape0 = array1.atleast_2d().shape[0]
            array1_shape1 = array1.atleast_2d().shape[1]
            array2_shape0 = array2.atleast_2d().shape[0]
            array2_shape1 = array2.atleast_2d().shape[1]

            # check append on axis 0 (vertical)
            append_res = array1.append(array2, axis=0)
            self.logger.info("a1.append(a2, axis=0): {:}".format(append_res))
            self.assertEqual(array1_shape1, append_res.shape[1])
            self.assertEqual(array1_shape0 + array2_shape0,
                             append_res.shape[0])
            self.assert_array_equal(append_res[array1_shape0:, :], array2)

            # check append on axis 1 (horizontal)
            append_res = array1.append(array2, axis=1)
            self.logger.info("a1.append(a2, axis=1): {:}".format(append_res))
            self.assertEqual(array1_shape1 + array2_shape1,
                             append_res.shape[1])
            self.assertEqual(array1_shape0, append_res.shape[0])
            self.assert_array_equal(append_res[:, array1_shape1:], array2)

        _append_allaxis(self.array_dense, self.array_dense)
        _append_allaxis(self.array_sparse, self.array_sparse)
        _append_allaxis(self.array_sparse, self.array_dense)
        _append_allaxis(self.array_dense, self.array_sparse)

        # check append on empty arrays
        empty_sparse = CArray([], tosparse=True)
        empty_dense = CArray([], tosparse=False)
        self.assertTrue((empty_sparse.append(empty_dense,
                                             axis=None) == empty_dense).all())
        self.assertTrue((empty_sparse.append(empty_dense,
                                             axis=0) == empty_dense).all())
        self.assertTrue((empty_sparse.append(empty_dense,
                                             axis=1) == empty_dense).all())
Exemplo n.º 8
0
 def apply_feasible_manipulations(self, t, x: CArray):
     byte_values = (t * 255).astype(np.int)
     x_adv = x.append(byte_values)
     return x_adv