def fit(self, X, y, sample_weight=None): """ Build a classifier from the training set (X, y). Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values (class labels in classification). sample_weight : array-like, shape = [n_samples] or None Individual weights for each sample. Returns ------- self : object Returns self. """ self._validate_params(**self.get_params()) X, y = check_X_y(X, y, accept_sparse=True) if sp.isspmatrix(X): self._is_sparse_train_X = True else: self._is_sparse_train_X = False self._n_samples, self._n_features = X.shape sample_weight = self._get_sample_weight(sample_weight) check_consistent_length(X, y, sample_weight) check_classification_targets(y) self._classes = sorted(np.unique(y)) self._n_classes = len(self._classes) self._classes_map = {} self._set_params_with_dependencies() params = self._get_params() if self._n_classes == 2: self._classes_map[0] = self._classes[0] self._classes_map[1] = self._classes[1] self._estimators = [None] y = (y == self._classes[0]).astype(int) self._fit_binary_task(X, y, sample_weight, params) elif self._n_classes > 2: if sp.isspmatrix_dok(X): X = X.tocsr().tocoo() # Fix to avoid scipy 7699 issue self._estimators = [None] * self._n_classes self._fit_multiclass_task(X, y, sample_weight, params) else: raise ValueError( "Classifier can't predict when only one class is present.") self._fitted = True self.n_features_in_ = self._n_features return self
def fit(self, X, y, sample_weight=None): """ Build a classifier from the training set (X, y). Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values (class labels in classification). sample_weight : array-like, shape = [n_samples] or None Individual weights for each sample. Returns ------- self : object Returns self. """ self._validate_params(**self.get_params()) X, y = check_X_y(X, y, accept_sparse=True) if sp.isspmatrix(X): self._is_sparse_train_X = True else: self._is_sparse_train_X = False self._n_samples, self._n_features = X.shape sample_weight = self._get_sample_weight(sample_weight) check_consistent_length(X, y, sample_weight) check_classification_targets(y) self._classes = sorted(np.unique(y)) self._n_classes = len(self._classes) self._classes_map = {} self._set_params_with_dependencies() params = self._get_params() if self._n_classes == 2: self._classes_map[0] = self._classes[0] self._classes_map[1] = self._classes[1] self._estimators = [None] y = (y == self._classes[0]).astype(int) self._fit_binary_task(X, y, sample_weight, params) elif self._n_classes > 2: if sp.isspmatrix_dok(X): X = X.tocsr().tocoo() # Fix to avoid scipy 7699 issue self._estimators = [None] * self._n_classes self._fit_multiclass_task(X, y, sample_weight, params) else: raise ValueError("Classifier can't predict when only one class is present.") self._fitted = True return self
def sparse_matrix_report(m): print(repr(m)) print('Number of non-zeros :', m.nnz) print('Sparsity :', 1 - m.nnz / (m.shape[0] * m.shape[1])) if isspmatrix_csr(m) or isspmatrix_csc(m): print('data length : {} ({})'.format(len(m.data), m.data.dtype)) print('indptr length : {} ({})'.format(len(m.indptr), m.indptr.dtype)) print('indices length : {} ({})'.format(len(m.indices), m.indices.dtype)) print('Size :', size(m.data.nbytes + m.indptr.nbytes + m.indices.nbytes)) print('10 x 10 preview:') print(m[:10, :10].toarray()) elif isspmatrix_bsr(m): print('data length : {} ({})'.format(len(m.data), m.data.dtype)) print('indptr length : {} ({})'.format(len(m.indptr), m.indptr.dtype)) print('indices length : {} ({})'.format(len(m.indices), m.indices.dtype)) print('blocksize length : {}'.format(m.blocksize)) print('Size :', size(m.data.nbytes + m.indptr.nbytes + m.indices.nbytes)) print('preview:') print(m) elif isspmatrix_coo(m): print('data length : {} ({})'.format(len(m.data), m.data.dtype)) print('row length : {} ({})'.format(len(m.row), m.row.dtype)) print('col length : {} ({})'.format(len(m.col), m.col.dtype)) print('Size :', size(m.data.nbytes + m.row.nbytes + m.col.nbytes)) print('preview:') print(m) elif isspmatrix_dok(m): print('Size :', size(sys.getsizeof(m))) print('10 x 10 preview:') print(m[:10, :10].toarray()) elif isspmatrix_dia(m): print('data length : {} ({})'.format(len(m.data), m.data.dtype)) print('Offsets : {} ({})'.format(len(m.offsets), m.offsets.dtype)) print('Size :', size(m.data.nbytes + m.offsets.nbytes)) print('(no preview)') elif isspmatrix_lil(m): print('data length : {} ({})'.format(len(m.data), m.data.dtype)) print('rows : {} ({})'.format(len(m.rows), m.rows.dtype)) print('Size :', size(m.data.nbytes + m.rows.nbytes)) print('(no preview)')
def indexed_entries(sparse_matrix): """ Args: Return: list of (row_id, col_id, value) """ if not isspmatrix_dok(sparse_matrix): sparse_matrix = sparse_matrix.todok() return ((i, j, sparse_matrix[i, j]) for i, j in zip(*sparse_matrix.nonzero()))
def sparse_insert(a,b,i_start=0,j_start=None): if j_start == None: j_start = i_start; if spm.isspmatrix_coo(b): for i,j,v in zip(b.row,b.col,b.data): a[i+i_start,j+j_start] = v; elif spm.isspmatrix_dok(b): for key in b.keys(): a[key[0]+i_start,key[1]+j_start] = b.get(key); else: tmpb = smp.coo_matrix(b); for i,j,v in zip(tmpb.row,tmpb.col,tmpb.data): a[i+i_start,j+j_start] = v;
def should_enforce_sparse(m, sparse_format: SparseFormat, policy: SparsePolicy, dtype, sparse_values: bool = True) -> bool: """ Returns whether it is preferable to convert a given matrix into a `scipy.sparse.csr_matrix`, `scipy.sparse.csc_matrix` or `scipy.sparse.dok_matrix`, depending on the format of the given matrix and a given `SparsePolicy`: If the given policy is `SparsePolicy.AUTO`, the matrix will be converted into the given sparse format, if possible, if the sparse matrix is expected to occupy less memory than a dense matrix. To be able to convert the matrix into a sparse format, it must be a `scipy.sparse.lil_matrix`, `scipy.sparse.dok_matrix` or `scipy.sparse.coo_matrix`. If the given sparse format is `csr` or `csc` and the matrix is a already in that format, it will not be converted. If the given policy is `SparsePolicy.FORCE_DENSE`, the matrix will always be converted into the specified sparse format, if possible. If the given policy is `SparsePolicy.FORCE_SPARSE`, the matrix will always be converted into a dense matrix. :param m: A `np.ndarray` or `scipy.sparse.matrix` to be checked :param sparse_format: The `SparseFormat` to be used :param policy: The `SparsePolicy` to be used :param dtype: The type of the values that should be stored in the matrix :param sparse_values: True, if the values must explicitly be stored when using a sparse format, False otherwise :return: True, if it is preferable to convert the matrix into a sparse matrix of the given format, False otherwise """ if not issparse(m): # Given matrix is dense if policy != SparsePolicy.FORCE_SPARSE: return False elif (isspmatrix_csr(m) and sparse_format == SparseFormat.CSR) or ( isspmatrix_csc(m) and sparse_format == SparseFormat.CSC): # Matrix is a `scipy.sparse.csr_matrix` or `scipy.sparse.csc_matrix` and is already in the given sparse format return policy != SparsePolicy.FORCE_DENSE elif isspmatrix_lil(m) or isspmatrix_coo(m) or isspmatrix_dok(m): # Given matrix is in a format that might be converted into the specified sparse format if policy == SparsePolicy.AUTO: return is_sparse(m, sparse_format=sparse_format, dtype=dtype, sparse_values=sparse_values) else: return policy == SparsePolicy.FORCE_SPARSE raise ValueError('Matrix of type ' + type(m).__name__ + ' cannot be converted to format "' + str(sparse_format) + '""')
def __init__(self, w, pair_to_node, model_file, num_clus, beta=1.e-4): assert sp.isspmatrix_dok( w), "Input w must be a scipy DOK-based sparse matrix." self.w = w.asfptype() self.pair_to_node = pair_to_node # [(, )] assert self.sym_checker(), "Node pairs duplicated in input matrix." self.node_to_pair = self.find_node_to_pair() # [[(, )]] self.model_file = model_file self.num_pair = self.w.shape[0] # S self.num_type = self.w.shape[1] # M self.num_node = len(self.node_to_pair) # V self.num_clus = num_clus # K assert self.num_clus >= self.num_type, "Number of clusters must be greater than or equal to number of meta-paths." self.beta = beta self.alpha = self.num_type * (self.num_node - 1.) + 1.
def test_check_array(): # accept_sparse == None # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d X_array = check_array([0, 1, 2]) assert_equal(X_array.ndim, 2) X_array = check_array([0, 1, 2], ensure_2d=False) assert_equal(X_array.ndim, 1) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # force_all_finite X_inf = np.arange(4).reshape(2, 2).astype(np.float) X_inf[0, 0] = np.inf assert_raises(ValueError, check_array, X_inf) check_array(X_inf, force_all_finite=False) # no raise # nan check X_nan = np.arange(4).reshape(2, 2).astype(np.float) X_nan[0, 0] = np.nan assert_raises(ValueError, check_array, X_nan) check_array(X_inf, force_all_finite=False) # no raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if order == 'C': assert_true(X_checked.flags['C_CONTIGUOUS']) assert_false(X_checked.flags['F_CONTIGUOUS']) elif order == 'F': assert_true(X_checked.flags['F_CONTIGUOUS']) assert_false(X_checked.flags['C_CONTIGUOUS']) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert_true(X is X_checked) # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = ["object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf."] assert_true(message in messages) else: assert_equal(len(w), 0) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if X.format in accept_sparse: # no change if allowed assert_equal(X.format, X_checked.format) else: # got converted assert_equal(X_checked.format, accept_sparse[0]) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X.format == X_checked.format): assert_true(X is X_checked) # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert_true(isinstance(X_dense, np.ndarray)) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense) result = check_array(X_no_array) assert_true(isinstance(result, np.ndarray))
def test_check_array(): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert X_array.ndim == 1 # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = [ "object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf." ] assert message in messages else: assert len(w) == 0 if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if X.format in accept_sparse: # no change if allowed assert X.format == X_checked.format else: # got converted assert X_checked.format == accept_sparse[0] if copy: assert X is not X_checked else: # doesn't copy if it was already good if X.dtype == X_checked.dtype and X.format == X_checked.format: assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, np.ndarray) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = _NotAnArray(X_dense) result = check_array(X_no_array) assert isinstance(result, np.ndarray) # deprecation warning if string-like array with dtype="numeric" expected_warn_regex = r"converted to decimal numbers if dtype='numeric'" X_str = [['11', '12'], ['13', 'xx']] for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] for X in [X_bytes, np.array(X_bytes, dtype='V1')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric")
def test_check_array(): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert_equal(X_array.ndim, 1) # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = ["object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf."] assert message in messages else: assert_equal(len(w), 0) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if X.format in accept_sparse: # no change if allowed assert_equal(X.format, X_checked.format) else: # got converted assert_equal(X_checked.format, accept_sparse[0]) if copy: assert X is not X_checked else: # doesn't copy if it was already good if X.dtype == X_checked.dtype and X.format == X_checked.format: assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, np.ndarray) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense) result = check_array(X_no_array) assert isinstance(result, np.ndarray) # deprecation warning if string-like array with dtype="numeric" expected_warn_regex = r"converted to decimal numbers if dtype='numeric'" X_str = [['11', '12'], ['13', 'xx']] for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] for X in [X_bytes, np.array(X_bytes, dtype='V1')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric")
def test_check_array(): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) with pytest.raises(TypeError): check_array(X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert X_array.ndim == 1 # ensure_2d=True with 1d array with pytest.raises(ValueError, match="Expected 2D array," " got 1D array instead"): check_array([0, 1, 2], ensure_2d=True) # ensure_2d=True with scalar array with pytest.raises(ValueError, match="Expected 2D array," " got scalar array instead"): check_array(10, ensure_2d=True) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) with pytest.raises(ValueError): check_array(X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(int) X_float = X_C.astype(float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, int, float, np.float32, None, bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(int) X_float = X_csc.astype(float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): # XXX unreached code as of v0.22 message = str(w[0].message) messages = [ "object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf." ] assert message in messages else: assert len(w) == 0 if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if X.format in accept_sparse: # no change if allowed assert X.format == X_checked.format else: # got converted assert X_checked.format == accept_sparse[0] if copy: assert X is not X_checked else: # doesn't copy if it was already good if X.dtype == X_checked.dtype and X.format == X_checked.format: assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, np.ndarray) # raise on too deep lists with pytest.raises(ValueError): check_array(X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = _NotAnArray(X_dense) result = check_array(X_no_array) assert isinstance(result, np.ndarray)
def test_check_array(): # accept_sparse == None # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert_equal(X_array.ndim, 1) # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if order == 'C': assert_true(X_checked.flags['C_CONTIGUOUS']) assert_false(X_checked.flags['F_CONTIGUOUS']) elif order == 'F': assert_true(X_checked.flags['F_CONTIGUOUS']) assert_false(X_checked.flags['C_CONTIGUOUS']) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert_true(X is X_checked) # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = [ "object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf." ] assert_true(message in messages) else: assert_equal(len(w), 0) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if X.format in accept_sparse: # no change if allowed assert_equal(X.format, X_checked.format) else: # got converted assert_equal(X_checked.format, accept_sparse[0]) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X.format == X_checked.format): assert_true(X is X_checked) # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert_true(isinstance(X_dense, np.ndarray)) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense) result = check_array(X_no_array) assert_true(isinstance(result, np.ndarray)) # deprecation warning if string-like array with dtype="numeric" X_str = [['a', 'b'], ['c', 'd']] assert_warns_message( FutureWarning, "arrays of strings will be interpreted as decimal numbers if " "parameter 'dtype' is 'numeric'. It is recommended that you convert " "the array to type np.float64 before passing it to check_array.", check_array, X_str, "numeric") assert_warns_message( FutureWarning, "arrays of strings will be interpreted as decimal numbers if " "parameter 'dtype' is 'numeric'. It is recommended that you convert " "the array to type np.float64 before passing it to check_array.", check_array, np.array(X_str, dtype='U'), "numeric") assert_warns_message( FutureWarning, "arrays of strings will be interpreted as decimal numbers if " "parameter 'dtype' is 'numeric'. It is recommended that you convert " "the array to type np.float64 before passing it to check_array.", check_array, np.array(X_str, dtype='S'), "numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] assert_warns_message( FutureWarning, "arrays of strings will be interpreted as decimal numbers if " "parameter 'dtype' is 'numeric'. It is recommended that you convert " "the array to type np.float64 before passing it to check_array.", check_array, X_bytes, "numeric") assert_warns_message( FutureWarning, "arrays of strings will be interpreted as decimal numbers if " "parameter 'dtype' is 'numeric'. It is recommended that you convert " "the array to type np.float64 before passing it to check_array.", check_array, np.array(X_bytes, dtype='V1'), "numeric")
def fit(self, X, y, sample_weight=None): """ Build a RGF Classifier from the training set (X, y). Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values (class labels in classification). sample_weight : array-like, shape = [n_samples] or None Individual weights for each sample. Returns ------- self : object Returns self. """ _validate_params(**self.get_params()) X, y = check_X_y(X, y, accept_sparse=True) n_samples, self._n_features = X.shape if self.sl2 is None: self._sl2 = self.l2 else: self._sl2 = self.sl2 if isinstance(self.min_samples_leaf, _FLOATS): self._min_samples_leaf = ceil(self.min_samples_leaf * n_samples) else: self._min_samples_leaf = self.min_samples_leaf if self.n_iter is None: if self.loss == "LS": self._n_iter = 10 else: self._n_iter = 5 else: self._n_iter = self.n_iter if sample_weight is None: sample_weight = np.ones(n_samples, dtype=np.float32) else: sample_weight = column_or_1d(sample_weight, warn=True) if (sample_weight <= 0).any(): raise ValueError("Sample weights must be positive.") check_consistent_length(X, y, sample_weight) check_classification_targets(y) self._classes = sorted(np.unique(y)) self._n_classes = len(self._classes) self._classes_map = {} params = dict(max_leaf=self.max_leaf, test_interval=self.test_interval, algorithm=self.algorithm, loss=self.loss, reg_depth=self.reg_depth, l2=self.l2, sl2=self._sl2, normalize=self.normalize, min_samples_leaf=self._min_samples_leaf, n_iter=self._n_iter, n_tree_search=self.n_tree_search, opt_interval=self.opt_interval, learning_rate=self.learning_rate, memory_policy=self.memory_policy, verbose=self.verbose) if self._n_classes == 2: self._classes_map[0] = self._classes[0] self._classes_map[1] = self._classes[1] self._estimators = [None] y = (y == self._classes[0]).astype(int) self._estimators[0] = _RGFBinaryClassifier(**params) self._estimators[0].fit(X, y, sample_weight) elif self._n_classes > 2: if sp.isspmatrix_dok(X): X = X.tocsr().tocoo() # Fix to avoid scipy 7699 issue self._estimators = [None] * self._n_classes ovr_list = [None] * self._n_classes for i, cls_num in enumerate(self._classes): self._classes_map[i] = cls_num ovr_list[i] = (y == cls_num).astype(int) self._estimators[i] = _RGFBinaryClassifier(**params) self._estimators = Parallel(n_jobs=self.n_jobs)( delayed(_fit_ovr_binary)(self._estimators[i], X, ovr_list[i], sample_weight) for i in range(self._n_classes)) else: raise ValueError( "Classifier can't predict when only one class is present.") self._fitted = True return self
def fit(self, X, y, sample_weight=None): """ Build a RGF Classifier from the training set (X, y). Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values (class labels in classification). sample_weight : array-like, shape = [n_samples] or None Individual weights for each sample. Returns ------- self : object Returns self. """ _validate_params(**self.get_params()) X, y = check_X_y(X, y, accept_sparse=True) n_samples, self._n_features = X.shape if self.sl2 is None: self._sl2 = self.l2 else: self._sl2 = self.sl2 if isinstance(self.min_samples_leaf, _FLOATS): self._min_samples_leaf = ceil(self.min_samples_leaf * n_samples) else: self._min_samples_leaf = self.min_samples_leaf if self.n_iter is None: if self.loss == "LS": self._n_iter = 10 else: self._n_iter = 5 else: self._n_iter = self.n_iter if sample_weight is None: sample_weight = np.ones(n_samples, dtype=np.float32) else: sample_weight = column_or_1d(sample_weight, warn=True) if (sample_weight <= 0).any(): raise ValueError("Sample weights must be positive.") check_consistent_length(X, y, sample_weight) check_classification_targets(y) self._classes = sorted(np.unique(y)) self._n_classes = len(self._classes) self._classes_map = {} params = dict(max_leaf=self.max_leaf, test_interval=self.test_interval, algorithm=self.algorithm, loss=self.loss, reg_depth=self.reg_depth, l2=self.l2, sl2=self._sl2, normalize=self.normalize, min_samples_leaf=self._min_samples_leaf, n_iter=self._n_iter, n_tree_search=self.n_tree_search, opt_interval=self.opt_interval, learning_rate=self.learning_rate, memory_policy=self.memory_policy, verbose=self.verbose) if self._n_classes == 2: self._classes_map[0] = self._classes[0] self._classes_map[1] = self._classes[1] self._estimators = [None] y = (y == self._classes[0]).astype(int) self._estimators[0] = _RGFBinaryClassifier(**params) self._estimators[0].fit(X, y, sample_weight) elif self._n_classes > 2: if sp.isspmatrix_dok(X): X = X.tocsr().tocoo() # Fix to avoid scipy 7699 issue self._estimators = [None] * self._n_classes ovr_list = [None] * self._n_classes for i, cls_num in enumerate(self._classes): self._classes_map[i] = cls_num ovr_list[i] = (y == cls_num).astype(int) self._estimators[i] = _RGFBinaryClassifier(**params) self._estimators = Parallel(n_jobs=self.n_jobs)(delayed(_fit_ovr_binary)(self._estimators[i], X, ovr_list[i], sample_weight) for i in range(self._n_classes)) else: raise ValueError("Classifier can't predict when only one class is present.") self._fitted = True return self
def add(self, other, in_place=True, write_to_self=False): """ Add a matrix. The sum of self._raw_matrix with the passed StateMatrix (other). Args: other: another StateMatrix object of the same type as this object in_place: If True, matrix addition is applied (in-place) to (self) If False, a new copy will be returned. Returns: The sum of self with the passed StateMatrix (other). """ if write_to_self: # update the reference matrix inside this object. if not in_place: result_mat = self.copy() else: result_mat = self if isinstance(other, (StateMatrixNumpy, self.__class__)): source_matrix = other source_matrix_ref = other._raw_matrix elif isinstance(other, np.ndarray): source_matrix = other source_matrix_ref = other else: raise TypeError( "matrix has to be either 'StateMatrixNumpy', or 'StateMatrixSpSciPy', or 'np.ndarray' " ) else: # the target is the input matrix or a copy of it if not in_place: result_mat = other.copy() else: result_mat = other source_matrix = self source_matrix_ref = self._raw_matrix # # Check the result matrix format if isinstance(result_mat, self.__class__): result_mat_ref = result_mat.get_raw_matrix_ref() # # frmt = result_mat._raw_matrix.getformat() # print('\n xxxxxxxxxxxxxxxxx \n %s \n xxxxxxxxxxxxxxxxx \n' % frmt) # if sparse.isspmatrix_bsr(result_mat._raw_matrix): result_mat_ref = sparse.bsr_matrix(result_mat_ref + source_matrix_ref) elif sparse.isspmatrix_coo(result_mat._raw_matrix): result_mat_ref = sparse.coo_matrix(result_mat_ref + source_matrix_ref) elif sparse.isspmatrix_csc(result_mat._raw_matrix): result_mat_ref = sparse.csc_matrix(result_mat_ref + source_matrix_ref) elif sparse.isspmatrix_csr(result_mat._raw_matrix): result_mat_ref = sparse.csr_matrix(result_mat_ref + source_matrix_ref) # print(result_mat._raw_matrix) # print("is sparse: ", sparse.issparse(result_mat._raw_matrix)) elif sparse.isspmatrix_dia(result_mat._raw_matrix): result_mat_ref = sparse.dia_matrix(result_mat_ref + source_matrix_ref) elif sparse.isspmatrix_dok(result_mat._raw_matrix): result_mat_ref = sparse.dok_matrix(result_mat_ref + source_matrix_ref) elif sparse.isspmatrix_lil(result_mat._raw_matrix): result_mat_ref = sparse.lil_matrix(result_mat_ref + source_matrix_ref) else: raise TypeError( "Unsupported Format! My format has been tapered with!") result_mat.set_raw_matrix_ref(result_mat_ref) result_mat._update_attributes() elif isinstance(result_mat, StateMatrixNumpy): result_mat_ref = result_mat.get_raw_matrix_ref() if isinstance(source_matrix, self.__class__): result_mat_ref = result_mat_ref + source_matrix_ref try: result_mat_ref = result_mat_ref.toarray() except AttributeError: result_mat_ref = np.asarray(result_mat_ref) elif isinstance(source_matrix, (np.ndarray, StateMatrixNumpy)): result_mat_ref = result_mat_ref + source_matrix_ref result_mat.set_raw_matrix_ref(result_mat_ref) elif isinstance(result_mat, np.ndarray): result_mat_ref = result_mat if isinstance(source_matrix, self.__class__): result_mat_ref = result_mat_ref + source_matrix_ref try: result_mat_ref = result_mat_ref.toarray() except AttributeError: result_mat_ref = np.asarray(result_mat_ref) elif isinstance(source_matrix, (np.ndarray, StateMatrixNumpy)): result_mat_ref = result_mat_ref + source_matrix_ref else: type.mro(type(other)) print(type.mro(type(other))) print(other) raise TypeError( "matrix has to be either 'StateMatrixNumpy', or 'StateMatrixSpSciPy', or 'np.ndarray' " ) # raise TypeError("matrix has to be either 'StateMatrixNumpy', or 'StateMatrixSpSciPy'! ") # return result_mat
def draw_network(self, adjacency, styles={}, axis_labels=None, vertex_labels=None, labels=False, height=False, node_cb=False, edge_cb=False, node_cmap=None, edge_cmap=None): """ Plots network, submit eg vertex color values via styles={"vertex_color":values} Parameters ---------- adjacency: styles: dict sym: bool if true: axis_labels: vertex_labels: array e.g. pars["input_power"] labels: bool height: bool """ if height: from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d.art3d import Line3DCollection as LineCollection else: from matplotlib.collections import LineCollection from scipy.sparse import issparse, isspmatrix_dok if issparse(adjacency): assert isspmatrix_dok(adjacency) # print "Build network from sparse dok matrix." N = adjacency.shape[0] edgelist = sorted( set([tuple(np.sort(key)) for key in list(adjacency.keys())])) else: N = len(adjacency) edgelist = np.vstack(np.where(adjacency > 0)).transpose() edgelist = sorted( set([ tuple(np.sort(edgelist[i])) for i in range(len(edgelist)) ])) source = [e[0] for e in edgelist] target = [e[1] for e in edgelist] if node_cmap is None: node_cmap = pyplot.get_cmap("linear") if edge_cmap is None: edge_cmap = pyplot.get_cmap("linear") visual_style = dict(edge_color=np.repeat('#8e908f', len(edgelist)), edge_width=seaborn.axes_style()["axes.linewidth"], vertex_size=100, vertex_label=list(range(N))) if styles: visual_style.update(styles) if "layout" not in visual_style: if height: visual_style["layout"] = np.random.random([N, 3]) else: visual_style["layout"] = np.random.random([N, 2]) print("Assign random layout for plotting.") if "edge_color_dict" in visual_style: min_color = np.min(list(visual_style["edge_color_dict"].values())) max_color = np.max(list(visual_style["edge_color_dict"].values())) f = lambda x: (np.float(visual_style["edge_color"][x]) - min_color ) / (max_color - min_color) visual_style["edge_color"] = [f(e) for e in edgelist] alpha = 1. else: alpha = 1. if height: fig = pyplot.figure() ax = fig.gca(projection='3d') x, y, z = list(zip(*visual_style["layout"])) args = (x, y, z) else: fig, ax = pyplot.subplots(nrows=1, ncols=1) fig.tight_layout() x, y = list(zip(*visual_style["layout"])) args = (x, y) # ax.axis("off") if height: xyz = (np.asarray( ((visual_style["layout"][source, 0], visual_style["layout"][source, 1], visual_style["layout"][source, 2]), (visual_style["layout"][target, 0], visual_style["layout"][target, 1], visual_style["layout"][target, 2]))).transpose(2, 0, 1)) else: xyz = (np.asarray( ((visual_style["layout"][source, 0], visual_style["layout"][source, 1]), (visual_style["layout"][target, 0], visual_style["layout"][target, 1]))).transpose(2, 0, 1)) l_collection = LineCollection(xyz, linewidths=visual_style["edge_width"], antialiaseds=(1, ), colors=visual_style["edge_color"], cmap=edge_cmap, alpha=alpha, zorder=1, transOffset=ax.transData) ax.add_collection(l_collection) # if edge_cb: #TODO: edge colorbar #if visual_style.has_key("edge_color_dict"): # sm = pyplot.cm.ScalarMappable(cmap=map_edges, norm=pyplot.Normalize(vmin= min_color, vmax= max_color)) # # fake up the array of the scalar mappable. Urgh... # sm.set_array(visual_style["edge_color"]) # cb= pyplot.colorbar(sm,format=r"%.2f") # cb.outline.set_visible(False) # from matplotlib import ticker # tick_locator = ticker.MaxNLocator(nbins=6) # cb.locator = tick_locator # cb.update_ticks() # ax.set_title('maximum equals '+str(max_color)+' at edge '+str(visual_style["edge_color_dict"].keys()[np.argmax(visual_style[ # "edge_color_dict"].values())])) margin = max(0.05 * (np.max(x) - np.min(x)), 0.05 * (np.max(y) - np.min(y))) ax.set_xlim([np.min(x) - margin, np.max(x) + margin]) ax.set_ylim([np.min(y) - margin, np.max(y) + margin]) if "vertex_color" not in visual_style: nodes = ax.scatter(*args, c='#8e908f', s=visual_style["vertex_size"], cmap=node_cmap, edgecolor='w', zorder=2) else: nodes = ax.scatter( *args, c=visual_style["vertex_color"], s=visual_style["vertex_size"], cmap=node_cmap, vmin=np.floor(np.min(visual_style["vertex_color"])), vmax=np.ceil(np.max(visual_style["vertex_color"])), edgecolor='w', zorder=2) if node_cb: cb = fig.colorbar(nodes, orientation='horizontal', shrink=0.66, format=r"%.2f") if axis_labels: ax.set_xlabel(axis_labels[0], labelpad=30) ax.set_ylabel(axis_labels[1], labelpad=30) if height: ax.set_zlabel(axis_labels[2], labelpad=30) if vertex_labels is None: if labels: for i in range(N): pyplot.annotate( str(i), xy=(x[i], y[i]), xytext=(3, 3), textcoords='offset points', # size=0.5 * self.rc["font.size"], horizontalalignment='left', verticalalignment='bottom') else: for i in range(N): pyplot.annotate( str(vertex_labels[i]), xy=(x[i], y[i]), xytext=(3, -25), textcoords='offset points', # size=0.5 * self.params["font.size"], horizontalalignment='left', verticalalignment='bottom') # we may adjust the background colour to make light nodes more visible #ax.set_axis_bgcolor((.9, .9, .9)) self.figures.append(fig) return fig