def _forward(self, x): """Implementation of decision function.""" if hasattr(self._sklearn_model, "decision_function"): scores = self._sklearn_model.decision_function(x.get_data()) probs = False elif hasattr(self._sklearn_model, "predict_proba"): scores = self._sklearn_model.predict_proba(x.get_data()) probs = True else: raise AttributeError( "This model has neither decision_function nor predict_proba.") scores = CArray(scores) # two-class classifiers outputting only scores for class 1 if len(scores.shape) == 1: # duplicate column for class 0 outputs = CArray.zeros(shape=(x.shape[0], self.n_classes)) outputs[:, 1] = scores.T outputs[:, 0] = -scores.T if probs is False else 1 - scores.T scores = outputs if scores.shape[1] != self.n_classes: # this happens in one-vs-one raise ValueError( "Number of columns is not equal to number of classes!") scores.atleast_2d() return scores
def _inverse_transform(self, x): """Map data back to its original space. Parameters ---------- x : CArray Array to transform back to its original space. Returns -------- CArray Input array mapped back to its original space. Examples -------- >>> from secml.array import CArray >>> from secml.ml.features.reduction import CPCA >>> array = CArray([[1., 0., 2.], [2., 5., 0.], [0., 1., -9.]]) >>> pca = CPCA().fit(array) >>> array_pca = pca.transform(array) >>> pca.inverse_transform(array_pca).round(6) CArray(3, 3)(dense: [[ 1. -0. 2.] [ 2. 5. -0.] [-0. 1. -9.]]) """ data_carray = CArray(x).atleast_2d() if data_carray.shape[1] != self.n_components: raise ValueError("array to revert must have {:} " "features (columns).".format(self.n_components)) out = CArray(data_carray.dot(self._components) + self.mean) return out.atleast_2d() if x.ndim >= 2 else out
def predict(self, x: CArray, return_decision_function: bool = True): """ Returns the prediction of the sample (in input space). Parameters ---------- x : CArray The input sample in input space. return_decision_function : bool, default True If True, it also returns the decision function value, rather than only the label. Default is True. Returns ------- CArray, (CArray) Returns the label of the sample. If return_decision_function is True, it also returns the output of the decision function. """ x = x.atleast_2d() # feature_vectors = [] # for i in range(x.shape[0]): # x_i = x[i, :] # padding_position = x_i.find(x_i == 256) # if padding_position: # x_i = x_i[0, :padding_position[0]] # feature_vectors.append(self.extract_features(x_i)) # feature_vectors = CArray(feature_vectors) feature_vectors = self.extract_features(x) return self.classifier.predict(feature_vectors, return_decision_function=return_decision_function)
def _decision_function(x, y=None): x = x.atleast_2d() try: scores = CArray(self.skclfs[i].decision_function( x.get_data())) probs = False except AttributeError: scores = CArray(self.skclfs[i].predict_proba(x.get_data())) probs = True # two-class classifiers outputting only scores for class 1 if len(scores.shape) == 1: # duplicate column for class 0 outputs = CArray.ones(shape=(x.shape[0], clf.n_classes)) scores = scores.T outputs[:, 1] = scores outputs[:, 0] = -scores if probs is False else 1 - scores scores = outputs scores.atleast_2d() if y is not None: return scores[:, y].ravel() else: return scores
def _forward(self, x): """Apply the reduction algorithm on data. Parameters ---------- x : array_like Array to be transformed. 2-D array object of shape (n_patterns, n_features). n_features must be equal to n_components parameter set before or during training. Returns -------- CArray Input data mapped to PCA space. Examples -------- >>> from secml.array import CArray >>> from secml.ml.features.reduction import CPCA >>> array = CArray([[1., 0., 2.], [2., 5., 0.], [0., 1., -9.]]) >>> pca = CPCA().fit(array) >>> pca.transform(CArray.concatenate(array, [4., 2., -6.], axis=0)) CArray(4, 3)(dense: [[-4.078722e+00 -2.478266e+00 1.855417e-17] [-2.727232e+00 2.829603e+00 6.708859e-17] [ 6.805954e+00 -3.513362e-01 -1.757349e-16] [ 3.209152e+00 1.129680e+00 3.296909e+00]]) >>> pca.transform([4., 2.]) Traceback (most recent call last): ... ValueError: array to transform must have 3 features (columns). """ data_carray = CArray(x).todense().atleast_2d() if data_carray.shape[1] != self.mean.size: raise ValueError("array to transform must have {:} " "features (columns).".format(self.mean.size)) out = CArray((data_carray - self.mean).dot(self._components.T)) return out.atleast_2d() if x.ndim >= 2 else out
def _forward(self, x): """Apply the reduction algorithm on data. Parameters ---------- x : CArray Array to be transformed. 2-D array object of shape (n_patterns, n_features). n_features must be equal to n_components parameter set before or during training. Returns -------- CArray Input data mapped to LDA space. Examples -------- >>> from secml.array import CArray >>> from secml.data import CDataset >>> from secml.ml.features.reduction import CLDA >>> ds = CDataset([[1., 0., 2.], [2., 5., 0.], [0., 1., -9.]], [1,1,2]) >>> lda = CLDA().fit(ds.X, ds.Y) >>> lda.transform(CArray.concatenate(ds.X, [4., 2., -6.], axis=0)) CArray(4, 1)(dense: [[-1.209938] [ 0.204275] [ 1.005663] [ 2.278455]]) >>> lda.transform([4., 2.]) Traceback (most recent call last): ... ValueError: array to transform must have 3 features (columns). """ data_carray = CArray(x).todense().atleast_2d() if data_carray.shape[1] != self.mean.size: raise ValueError("array to transform must have {:} features " "(columns).".format(self.mean.size)) out = CArray(self._lda.transform(data_carray.tondarray())) return out.atleast_2d() if x.ndim >= 2 else out
def extract_features(self, x: CArray) -> CArray: """ Extract EMBER features Parameters ---------- x : CArray program sample Returns ------- CArray EMBER features """ extractor = PEFeatureExtractor(2, print_feature_warning=False) x = x.atleast_2d() size = x.shape[0] features = [] for i in range(size): x_i = x[i, :] x_bytes = bytes(x_i.astype(np.int).tolist()[0]) features.append(np.array(extractor.feature_vector(x_bytes), dtype=np.float32)) features = CArray(features) return features
class CArrayTestCases(CUnitTest): """Unittests interface for CArray.""" def setUp(self): """Basic set up.""" self.array_dense = CArray([[1, 0, 0, 5], [2, 4, 0, 0], [3, 6, 0, 0]]) self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) self.array_dense_sym = CArray([[1, 2, 0], [2, 4, 6], [0, 6, 0]]) self.array_sparse_sym = CArray(self.array_dense_sym.deepcopy(), tosparse=True) self.array_dense_nozero = CArray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) self.array_sparse_nozero = CArray(self.array_dense_nozero.deepcopy(), tosparse=True) self.array_dense_allzero = CArray([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) self.array_sparse_allzero = CArray(self.array_dense_allzero.deepcopy(), tosparse=True) self.array_dense_bool = CArray([[True, False, True, True], [False, False, False, False], [True, True, True, True]]) self.array_sparse_bool = CArray(self.array_dense_bool.deepcopy(), tosparse=True) self.array_dense_bool_true = CArray([[True, True, True, True], [True, True, True, True], [True, True, True, True]]) self.array_sparse_bool_true = CArray( self.array_dense_bool_true.deepcopy(), tosparse=True) self.array_dense_bool_false = CArray([[False, False, False, False], [False, False, False, False], [False, False, False, False]]) self.array_sparse_bool_false = CArray( self.array_dense_bool_false.deepcopy(), tosparse=True) self.row_flat_dense = CArray([4, 0, 6]) self.row_dense = self.row_flat_dense.atleast_2d() self.column_dense = self.row_dense.deepcopy().T self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) self.column_sparse = self.row_sparse.deepcopy().T self.single_flat_dense = CArray([4]) self.single_dense = self.single_flat_dense.atleast_2d() self.single_sparse = CArray(self.single_dense.deepcopy(), tosparse=True) self.single_flat_dense_zero = CArray([0]) self.single_dense_zero = self.single_flat_dense_zero.atleast_2d() self.single_sparse_zero = CArray(self.single_dense_zero.deepcopy(), tosparse=True) self.single_bool_flat_dense = CArray([True]) self.single_bool_dense = self.single_bool_flat_dense.atleast_2d() self.single_bool_sparse = CArray(self.single_bool_dense.deepcopy(), tosparse=True) self.single_bool_flat_dense_false = CArray([False]) self.single_bool_dense_false = \ self.single_bool_flat_dense_false.atleast_2d() self.single_bool_sparse_false = CArray( self.single_bool_dense_false.deepcopy(), tosparse=True) self.empty_flat_dense = CArray([], tosparse=False) self.empty_dense = CArray([[]], tosparse=False) self.empty_sparse = CArray([], tosparse=True) def _test_multiple_eq(self, items_list): """Return True if all items are equal.""" # We are going to compare the first element # with the second, the second with the third, etc. for item_idx, item in enumerate(items_list): if item_idx == len(items_list) - 1: break # We checked all the elements self.assert_array_equal(item, items_list[item_idx + 1]) # Every item is equal to each other, return True return True def _test_operator_cycle(self, totest_op, totest_items, totest_result): """Check if operator return the expected result on given items. totest_op: list of operators totest_items: list of items PAIR to test totest_result: list of expected result (class name) for each PAIR """ for operator in totest_op: to_check = [] for pair_idx, pair in enumerate(totest_items): class0 = type(pair[0]._data) if \ hasattr(pair[0], 'isdense') else type(pair[0]) class1 = type(pair[1]._data) if \ hasattr(pair[1], 'isdense') else type(pair[1]) self.logger.info("Operator {:} between {:} and {:}" "".format(operator.__name__, class0, class1)) result = operator(pair[0], pair[1]) self.assertIsInstance(result._data, totest_result[pair_idx]) self.logger.info("Result: {:}".format( result._data.__class__.__name__)) to_check.append(result) self.assertTrue(self._test_multiple_eq(to_check)) def _test_operator_notimplemented(self, totest_op, totest_items): """Check if operator is not implemented for given items. totest_op: list of operators totest_items: list of items PAIR to test """ for operator in totest_op: for pair in totest_items: with self.assertRaises(NotImplementedError): operator(pair[0], pair[1])
def test_setter(self): """Method that tests __setitem__ methods.""" def test_selectors(input_array, selector_list, assignment_list, target_list): for selector_idx, selector in enumerate(selector_list): self.logger.info("Set: array[{:}] = {:}".format( selector, assignment_list[selector_idx])) array_copy = input_array.deepcopy() try: # Using a try to easier debug array_copy[selector] = assignment_list[selector_idx] except (IndexError, ValueError, TypeError): array_copy[selector] = assignment_list[selector_idx] self.logger.info("Result is: \n" + str(array_copy)) self.assertFalse( CArray(array_copy != target_list[selector_idx]).any(), "{:} is different from {:}".format( array_copy, target_list[selector_idx])) if hasattr(target_list[selector_idx], 'shape'): self.assertEqual(array_copy.shape, target_list[selector_idx].shape) # 2D/1D INDEXING (MATRIX) arrays_list = [self.array_dense, self.array_sparse] for array in arrays_list: self.logger.info("Testing setters for matrix: \n" + str(array)) selectors = [[[1, 2, 2, 2], [2, 0, 1, 2]], [[1, 2, 2, 2], [ np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0] ]], [[ np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0] ], [2, 0, 1, 2]], [[ np.ravel(1)[0], np.ravel(2)[0], np.ravel(2)[0], np.ravel(2)[0] ], [ np.ravel(2)[0], np.ravel(0)[0], np.ravel(1)[0], np.ravel(2)[0] ]]] selectors += 3 * [ CArray([[False, False, False, False], [False, False, True, False], [True, True, True, False] ]) ] assignments = [ 10, 10, CArray([10, 20, 30, 40]), CArray([10, 20, 30, 40]), CArray([10, 20, 30, 40]), CArray([10, 20, 30, 40]), 10 ] targets_a = [ CArray([[1, 0, 0, 5], [2, 4, 10, 0], [10, 10, 10, 0]]) ] targets_b = [ CArray([[1, 0, 0, 5], [2, 4, 10, 0], [20, 30, 40, 0]]) ] targets = 2 * targets_a + 4 * targets_b + targets_a test_selectors(array, selectors, assignments, targets) # 2D/1D INDEXING (MATRIX SYMMETRIC, for easier testing of different indices) arrays_list = [self.array_dense_sym, self.array_sparse_sym] for array in arrays_list: self.logger.info("Testing setters for matrix: \n" + str(array)) selectors_unique = [ 2, np.ravel(2)[0], [1, 2], CArray([1, 2]), CArray([[1, 2]]), CArray([1, 2], tosparse=True), slice(1, 3), [False, True, True], CArray([False, True, True]) ] selectors = list(itertools.product(selectors_unique, repeat=2)) selectors += [(2, 2), (2, 2)] assignments_a = [ 10, 10 ] + 5 * [CArray([[10, 20]])] + 2 * [CArray([10, 20])] assignments_b = [CArray([[10], [20]])] + [CArray([[10], [20]], tosparse=True)] + \ 7 * [CArray([[10, 20], [30, 40]])] assignments_c = [CArray([10]), CArray([10], tosparse=True)] assignments = 2 * assignments_a + 7 * assignments_b + assignments_c targets_a = 2 * [CArray([[1, 2, 0], [2, 4, 6], [0, 6, 10]])] + \ 7 * [CArray([[1, 2, 0], [2, 4, 6], [0, 10, 20]])] targets_b = 2 * [CArray([[1, 2, 0], [2, 4, 10], [0, 6, 20]])] + \ 7 * [CArray([[1, 2, 0], [2, 10, 20], [0, 30, 40]])] targets_c = 2 * [CArray([[1, 2, 0], [2, 4, 6], [0, 6, 10]])] targets = 2 * targets_a + 7 * targets_b + targets_c test_selectors(array, selectors, assignments, targets) # 2D/1D INDEXING (VECTOR-LIKE) arrays_list = [self.row_flat_dense, self.row_dense, self.row_sparse] for array in arrays_list: self.logger.info("Testing setters for array: \n" + str(array)) selectors_a = [[[0, 0], [2, 0]], [[0, 0], [np.ravel(2)[0], np.ravel(0)[0]]], [[np.ravel(0)[0], np.ravel(0)[0]], [2, 0]], [[np.ravel(0)[0], np.ravel(0)[0]], [np.ravel(2)[0], np.ravel(0)[0]]], CArray([[True, False, True]]), CArray([True, False, True]), CArray([[True, False, True]]), CArray([True, False, True])] selectors_row = [ 0, np.ravel(0)[0], [0], CArray([0]), -1, np.ravel(-1)[0], [-1], CArray([-1]), True, np.ravel(True)[0], [True], CArray([True]) ] selectors_col = [[], 0, np.ravel(0)[0], [1, 2], CArray([1, 2]), slice(1, 3), [False, True, True], CArray([False, True, True])] selectors = selectors_a + [(x, y) for x in selectors_row for y in selectors_col] assignments_a = 2 * [CArray([10, 20])] + \ 2 * [CArray([[10, 20]])] + \ 2 * [CArray([10, 20])] + \ 2 * [CArray([[10, 20]])] assignments_b = [0] + [10, 10] + \ 2 * [CArray([[10, 20]])] + \ 3 * [CArray([10, 20])] assignments = assignments_a + 12 * assignments_b targets_a = CArray([20, 0, 10]) targets_b = CArray([10, 0, 20]) targets_c = CArray([10, 0, 6]) targets_d = CArray([4, 10, 20]) # Output always flat for flat arrays if array.ndim == 1: targets = 4 * [targets_a] + 4 * [targets_b] + \ 12 * ([CArray([4, 0, 6])] + 2 * [targets_c] + 5 * [targets_d]) else: targets = 4 * [targets_a.atleast_2d()] + 4 * [targets_b.atleast_2d()] + \ 12 * ([CArray([[4, 0, 6]])] + 2 * [targets_c.atleast_2d()] + 5 * [targets_d.atleast_2d()]) test_selectors(array, selectors, assignments, targets) # 1D INDEXING (VECTOR-LIKE) arrays_list = [self.row_flat_dense, self.row_dense, self.row_sparse] for array in arrays_list: self.logger.info("Testing setters for vector: \n" + str(array)) selectors = [[], 0, np.ravel(0)[0], [1, 2], CArray([1, 2]), slice(1, 3), slice(None), 0, 0] assignments = [0] + [10, 10] + 2 * [CArray([[10, 20]])] + \ [CArray([[10, 20]], tosparse=True)] + [CArray([[10, 20, 30]])] + \ [CArray([10]), CArray([10], tosparse=True)] targets_a = CArray([10, 0, 6]) targets_b = CArray([4, 10, 20]) targets_c = CArray([10, 20, 30]) # Output always flat for flat arrays if array.ndim == 1: targets = [CArray([4, 0, 6])] + 2 * [targets_a] + \ 3 * [targets_b] + [targets_c] + 2 * [targets_a] else: targets = [CArray([[4, 0, 6]])] + 2 * [targets_a.atleast_2d()] + \ 3 * [targets_b.atleast_2d()] + [targets_c.atleast_2d()] + \ 2 * [targets_a.atleast_2d()] test_selectors(array, selectors, assignments, targets) # SPECIAL CASE: SIZE 1 ARRAY arrays_list = [ self.single_flat_dense, self.single_dense, self.single_sparse ] for array in arrays_list: self.logger.info("Testing setters for array: \n" + str(array)) selectors = [[], 0, np.ravel(0)[0], True, [True], CArray([True]), slice(0, 1), slice(None), 0, 0] assignments = 8 * [10] + [ CArray([10]), CArray([10], tosparse=True) ] targets_a = CArray([10]) # Output always flat for flat arrays if array.ndim == 1: targets = [CArray([4])] + 9 * [targets_a] else: targets = [CArray([[4]])] + 9 * [targets_a.atleast_2d()] test_selectors(array, selectors, assignments, targets)