def test_get_representations(): model = LightFM(random_state=SEED) model.fit_partial(train, epochs=10) num_users, num_items = train.shape for (item_features, user_features) in ( (None, None), ( (sp.identity(num_items) + sp.random(num_items, num_items)), (sp.identity(num_users) + sp.random(num_users, num_users)), ), ): test_predictions = model.predict( test.row, test.col, user_features=user_features, item_features=item_features ) item_biases, item_latent = model.get_item_representations(item_features) user_biases, user_latent = model.get_user_representations(user_features) assert item_latent.dtype == np.float32 assert user_latent.dtype == np.float32 predictions = ( (user_latent[test.row] * item_latent[test.col]).sum(axis=1) + user_biases[test.row] + item_biases[test.col] ) assert np.allclose(test_predictions, predictions, atol=0.000001)
def test_sparse_placeholder_fit(): test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] in1 = Input(shape=(3,)) in2 = Input(shape=(3,), sparse=True) out1 = Dropout(0.5, name='dropout')(in1) out2 = Dense(4, name='dense_1')(in2) model = Model([in1, in2], [out1, out2]) model.predict(test_inputs, batch_size=2) model.compile('rmsprop', 'mse') model.fit(test_inputs, test_outputs, epochs=1, batch_size=2, validation_split=0.5) model.evaluate(test_inputs, test_outputs, batch_size=2)
def main(m=1000, n=1000, sparsity=0.01): A = sparse.random(m, n, density=sparsity, format='csr', dtype=np.float64) # float64? B = sparse.random(n, m, density=sparsity, format='csc', dtype=np.float64) # float64? print(A.getnnz()) print(B.getnnz()) start = time.time() C = A.dot(B) C.sort_indices() stop = time.time() print('Python: ', stop-start) print(C.getnnz()) print(bool(C.has_sorted_indices))
def test(): """ :rtype : None """ import scipy.sparse as sparse row, col = 100, 100 np.random.seed(77) df = pd.DataFrame(sparse.random(row, col, density=0.15).A).apply(np.ceil) df.loc[0] = [1 if x < 20 else 0 for x in range(0, df.shape[1])] df.loc[1] = [1 if x > 13 and x < 35 else 0 for x in range(0, df.shape[1])] df.loc[2] = [1 if x > 80 else 0 for x in range(0, df.shape[1])] m = MutEx(background=df, permutations=1000) pd.set_option('display.max_columns', 1000) print(df.loc[[0, 1, 2]]) print("\nExample - 1 thread \n----------") r = m.calculate([4, 5, 6], parallel=False) print(r) print("\nExample - multi-threaded \n----------") r = m.calculate([0, 1, 2]) print(r) random.seed(18) group_generator = (random.sample(df.index.tolist(), random.sample([2, 3, 4], 1)[0]) for x in range(10)) result_list = [m.calculate(g) for g in group_generator] print(pd.DataFrame.from_records([r.__dict__ for r in result_list]))
def connectivityMatrixNew(self): self.patterns =np.random.normal(0,1, size=(self.p,self.N)) mybin=np.random.binomial(1,0.5,size=(self.p,self.N)) #self.patterns =np.multiply(mybin,np.random.normal(-1,1, size=(self.p,self.N)))+np.multiply(1-mybin,np.random.normal(1,1,size=(self.p,self.N))) #mu1=0.0 #sigma1=1.0 #self.patterns =np.random.lognormal(mu1,sigma1, size=(self.p,self.N))-np.exp(mu1+(sigma1**2)/2.) print 'Patterns created. N patterns:',self.p patterns_pre=self.patterns patterns_post=self.patterns #creating connectivity with sparse matrices rv=bernoulli(1).rvs #connectivity=sparse.csr_matrix(sparse.random(self.N,self.N,density=self.c,data_rvs=rv)) indexes=sparse.find(sparse.random(self.N,self.N,density=self.c,data_rvs=rv)) print 'Connectivity created. N patterns:',self.p #finding the non zero entries #index_row=sparse.find(connectivity)[0] #index_col=sparse.find(connectivity)[1] # smart way to write down the outer product learning connectivity=(self.Amp/(self.c*self.N))*np.einsum('ij,ij->j',patterns_post[:,indexes[0]],patterns_pre[:,indexes[1]]) connectivity=sparse.csr_matrix((connectivity,(indexes[0],indexes[1])),shape=(self.N,self.N)) 'Connectivity loaded with patterns. N patterns:',self.p self.connectivity=connectivity
def rand_matrix(d, scaled=True, sparse=False, stype='csr', density=None): """ Generate a random complex matrix of order `d` with normally distributed entries. If `scaled` is `True`, then in the limit of large `d` the eigenvalues will be distributed on the unit complex disk. Parameters ---------- d: matrix dimension scaled: whether to scale the matrices values such that its spectrum approximately lies on the unit disk (for dense matrices) sparse: whether to produce a sparse matrix stype: the type of sparse matrix if so density: target density for the sparse matrix Returns ------- mat: random matrix """ if sparse: # Aim for 10 non-zero values per row, but betwen 1 and d/2 density = 10/d if density is None else density density = min(max(density, d**-2), 1 - d**-2) mat = sp.random(d, d, format=stype, density=density) mat.data = np.random.randn(mat.nnz) + 1.0j * np.random.randn(mat.nnz) else: density = 1.0 mat = np.random.randn(d, d) + 1.0j * np.random.randn(d, d) mat = np.asmatrix(mat) if scaled: mat /= (2 * d * density)**0.5 return mat
def test_ogf_sparse(n, p, me_solver): np.random.seed(0) A = sps.random(n, n, density=5 / n, format='csc', data_rvs=stats.norm().rvs) A -= n * sps.eye(n) C = sps.random(p, n, density=5 / n, format='csc', data_rvs=stats.norm().rvs) Aop = NumpyMatrixOperator(A) Cop = NumpyMatrixOperator(C) solve_lyap = _get_solve_lyap(me_solver) Zva = solve_lyap(Aop, None, Cop, trans=True, options={'type': me_solver}) Z = Zva.to_numpy().T assert len(Zva) <= n assert relative_residual(A, None, C, Z, trans=True) < 1e-10
def sparse_lanczos(A,k): q = sp.random(A.shape[0],1) n = A.shape[0] Q = sp.lil_matrix(np.zeros((n,k+1))) A = sp.lil_matrix(A) Q[:,0] = q/sparsenorm(q) alpha = 0 beta = 0 for i in range(k): if i == 0: q = A*Q[:,i] else: q = A*Q[:,i] - beta*Q[:,i-1] alpha = q.T*Q[:,i] q = q - Q[:,i]*alpha q = q - Q[:,:i]*Q[:,:i].T*q # full reorthogonalization beta = sparsenorm(q) Q[:,i+1] = q/beta print(i) Q = Q[:,:k] Sigma = Q.T*A*Q A2 = Q[:,:k]*Sigma[:k,:k]*Q[:,:k].T return A2
def test_sparse_input_validation_split(): test_input = sparse.random(6, 3, density=0.25).tocsr() in1 = Input(shape=(3,), sparse=True) out1 = Dense(4)(in1) test_output = np.random.random((6, 4)) model = Model(in1, out1) model.compile('rmsprop', 'mse') model.fit(test_input, test_output, epochs=1, batch_size=2, validation_split=0.2)
def test_symmetrize(self): W = sparse.random(100, 100, random_state=42) for method in ['average', 'maximum', 'fill', 'tril', 'triu']: # Test that the regular and sparse versions give the same result. W1 = utils.symmetrize(W, method=method) W2 = utils.symmetrize(W.toarray(), method=method) np.testing.assert_equal(W1.toarray(), W2) self.assertRaises(ValueError, utils.symmetrize, W, 'sum')
def rand_ket(d, sparse=False, stype='csr', density=0.01): """ Generates a ket of length `d` with normally distributed entries. """ if sparse: ket = sp.random(d, 1, format=stype, density=density) ket.data = np.random.randn(ket.nnz) + 1.0j * np.random.randn(ket.nnz) else: ket = np.asmatrix(np.random.randn(d, 1) + 1.0j * np.random.randn(d, 1)) return nmlz(ket)
def test_cgf_sparse_E(n, m, me_solver): np.random.seed(0) A = sps.random(n, n, density=5 / n, format='csc', data_rvs=stats.norm().rvs) A = (A + A.T) / 2 A -= n * sps.eye(n) E = sps.random(n, n, density=5 / n, format='csc', data_rvs=stats.norm().rvs) E = (E + E.T) / 2 E += n * sps.eye(n) B = sps.random(n, m, density=5 / n, format='csc', data_rvs=stats.norm().rvs) Aop = NumpyMatrixOperator(A) Eop = NumpyMatrixOperator(E) Bop = NumpyMatrixOperator(B) solve_lyap = _get_solve_lyap(me_solver) Zva = solve_lyap(Aop, Eop, Bop, options={'type': me_solver}) Z = Zva.to_numpy().T assert len(Zva) <= n assert relative_residual(A, E, B, Z) < 1e-10
def test_cg_nan2(self): # test out Nan appearing in CG code (from https://github.com/benfred/implicit/issues/106) Ciu = random(m=100, n=100, density=0.0005, format='coo', dtype=np.float32, random_state=42, data_rvs=None).T.tocsr() configs = [{'use_native': True, 'use_gpu': False}, {'use_native': False, 'use_gpu': False}] if HAS_CUDA: configs.append({'use_gpu': True}) for options in configs: model = AlternatingLeastSquares(factors=32, regularization=10, iterations=10, dtype=np.float32, **options) model.fit(Ciu, show_progress=False) self.assertTrue(np.isfinite(model.item_factors).all()) self.assertTrue(np.isfinite(model.user_factors).all())
def test_coo_with_duplicate_entries(): # Calling .tocsr on a COO matrix with duplicate entries # changes its data arrays in-place, leading to out-of-bounds # array accesses in the WARP code. # Reported in https://github.com/lyst/lightfm/issues/117. rows, cols = (1000, 100) mat = sp.random(rows, cols) mat.data[:] = 1 # Duplicate entries in the COO matrix mat.data = np.concatenate((mat.data, mat.data[:1000])) mat.row = np.concatenate((mat.row, mat.row[:1000])) mat.col = np.concatenate((mat.col, mat.col[:1000])) for loss in ("warp", "bpr", "warp-kos"): model = LightFM(loss="warp") model.fit(mat)
def plot_it(degree): Ds = np.arange(1, 6, 1) densities = np.arange(.1, 1.1, .1)#[0.0.1, .2, .3, .4, .5] times_sparse = [[0. for d in Ds] for d in densities] times_dense = [0. for d in Ds] iters = 5 for iter in xrange(iters): for col, D in enumerate(Ds): for j, density in enumerate(densities): X = random(100000, D, density).tocsr() X_d = X.toarray() for name, l, trans in (('sparse', times_sparse, SparsePolynomialFeatures), ('dense', times_dense, PolynomialFeatures)): if name == 'sparse': t = trans(degree) a = time() t.fit_transform(X) b = time() l[j][col] += b-a else: t = trans(degree, include_bias=False) a = time() t.fit_transform(X_d) b = time() l[col] += b-a times_sparse = np.array(times_sparse) / iters times_dense = np.array(times_dense) / iters plt.hold = True #Plot sparse for density, times, c in zip(densities, times_sparse, colors): plt.plot(Ds, times, '%s:' % (c,), label='Sparse d=%s' % density) plt.plot(Ds, times_dense, 'k', label='scikit-learn') plt.xlabel('Dimensionality of Feature Matrix') plt.ylabel('Time to Compute Polynomial Features (seconds)') plt.title('Speed of scikit-learn vs Sparse Method (degree %s)' % (degree,)) plt.legend(loc=2) plt.savefig('D_vs_time.pdf')
def connectivityMatrixNew(self): print 'Patterns created. N patterns:',self.p patterns_pre=self.g(self.patterns_fr) patterns_post=self.f(self.patterns_fr) #creating connectivity with sparse matrices rv=bernoulli(1).rvs #connectivity=sparse.csr_matrix(sparse.random(self.N,self.N,density=self.c,data_rvs=rv)) indexes=sparse.find(sparse.random(self.N,self.N,density=self.c,data_rvs=rv)) print 'Connectivity created. N patterns:',self.p #finding the non zero entries #index_row=sparse.find(connectivity)[0] #index_col=sparse.find(connectivity)[1] # smart way to write down the outer product learning connectivity=(self.Amp/(self.c*self.N))*np.einsum('ij,ij->j',patterns_post[:,indexes[0]],patterns_pre[:,indexes[1]]) connectivity=sparse.csr_matrix((connectivity,(indexes[0],indexes[1])),shape=(self.N,self.N)) 'Connectivity loaded with patterns. N patterns:',self.p self.connectivity=connectivity
def test_load_sparse_matrix(self): M = sparse.random(1000, 1000, .2) print(M.data.tobytes()) print(M)
def test_reshape_upcast(): a = sparse.random((10, 10, 10), density=0.5, format="coo", idx_dtype=np.uint8) assert a.reshape(1000).coords.dtype == np.uint16
p = 10000 # Feature dense = 0.1 # density # config clusterNum1 = 5 clusterNum2 = 5 pe = 0.2 pg1 = 0.4 pg2 = 0.4 np.set_printoptions(precision=4, threshold=10000, linewidth=150) # generate a random X matrix X = sparse.random(n, p, density=0.5) # X is a n x p matrix with the value from 0 to X = X.A X = np.around(X, decimals= 3) np.savetxt('../Data/RandomData/random.genotype.csv', X, '%5.3f',delimiter=',') [m, n] = X.shape featureNum = p * dense idx = scipy.random.randint(0,n,featureNum).astype(int) idx = sorted(idx) w = 1*np.random.normal(0, 1, size=featureNum) ypheno = scipy.dot(X[:,idx],w) ypheno = (ypheno-ypheno.mean())/ypheno.std() ypheno = ypheno.reshape(ypheno.shape[0]) error = np.random.normal(0, 1, m)
def test_coord_dtype(): s = sparse.random((2, 3, 4), density=0.5) assert s.coords.dtype == np.uint8 s = COO.from_numpy(np.zeros(1000)) assert s.coords.dtype == np.uint16
def test_transpose_error(axis): x = sparse.random((2, 3, 4), density=.25) with pytest.raises(ValueError): x.transpose(axis)
import osqp import osqppurepy import scipy.sparse as sparse import scipy as sp import numpy as np import mathprogbasepy as mpbpy sp.random.seed(2) n = 100 m = 1000 A = sparse.random(m, n, density=0.5, data_rvs=np.random.randn, format='csc') l = -1. - np.random.rand(m) u = 1 + np.random.rand(m) # A = sparse.eye(n).tocsc() # l = -1 * np.ones(n) # u = 1 * np.ones(n) # l += 10 # u += 10 # l *= 1000 # u *= 1000 # A *= 1000 # Make problem infeasible # A_temp = A[5, :] # A[6, :] = A_temp # l[6] = l[5] + 2. # u[6] = l[6] + 3.
import numpy as np # from scipy.sparse import random from scipy.sparse import random np.random.seed(10) matrix = random(5, 5, format='csr', density=0.25) # matrix= matrix.toarray() print(matrix.A) # matrix = (10-5)*matrix + 5*matrix.ceil() print("---------------------------------------------------") print(matrix) # with open('output.txt', 'w') as f: # for item in matrix: # for index,inner in enumerate(item): # if index == len(item)-1: # f.write("%s" % str(inner), ) # else: # f.write("%s\t" % str(inner), ) # f.write("\n") for item in matrix.A: print(item)
def test_random_nnz(shape, nnz): s = sparse.random(shape, nnz=nnz) assert isinstance(s, COO) assert s.nnz == nnz
def test_random_invalid_density_and_nnz(density, nnz): with pytest.raises(ValueError): sparse.random((1, ), density, nnz=nnz)
def test_concatenate_mixed(func, axis): s = sparse.random((10, 10), density=0.5) d = s.todense() with pytest.raises(ValueError): func([d, s, s], axis=axis)
def test_triul(shape, k): s = sparse.random(shape, density=0.5) x = s.todense() assert_eq(np.triu(x, k), sparse.triu(s, k)) assert_eq(np.tril(x, k), sparse.tril(s, k))
def test_slicing_errors(index): s = sparse.random((2, 3, 4), density=0.5) with pytest.raises(IndexError): s[index]
def test_advanced_indexing(index): s = sparse.random((2, 3, 4), density=0.5) x = s.todense() assert_eq(x[index], s[index])
def test_to_scipy_sparse(): s = sparse.random((3, 5), density=0.5) a = s.to_scipy_sparse() b = scipy.sparse.coo_matrix(s.todense()) assert_eq(a, b)
def test_reshape(a, b): s = sparse.random(a, density=0.5) x = s.todense() assert_eq(x.reshape(b), s.reshape(b))
def test_two_random_unequal(): s1 = sparse.random((2, 3, 4), 0.3) s2 = sparse.random((2, 3, 4), 0.3) assert not np.allclose(s1.todense(), s2.todense())
def test_sparse_array(self): mapper = KeplerMapper() data = sparse.random(100, 10) lens = mapper.fit_transform(data)
def test_two_random_same_seed(): state = np.random.randint(100) s1 = sparse.random((2, 3, 4), 0.3, random_state=state) s2 = sparse.random((2, 3, 4), 0.3, random_state=state) assert_eq(s1, s2)
dense = 0.3 # density datapath = './Data/mediumdata/' # other setting: config = np.array([n,p,k,h]) np.set_printoptions(precision=4, threshold=10000, linewidth=150) # generate a random X matrix X = np.random.random((n,p)) # X is a n x p matrix with the value from 0 to X = np.around(X, decimals=2) B = sparse.random(p, 1, density=dense) # B is random sparse vector B = B.A B = np.around(B, decimals=2) Y_1 = X.dot(B) Y_1 = np.around(Y_1, decimals=2) print X.shape print B.shape print Y_1.shape # Clustering clf = KMeans(n_clusters = k) s = clf.fit(Y_1) # Generate the group
Iconst = 1. # constant external input Iext = np.ones(N) # externally applied stimulus Iext = Iconst * Iext # external input set to 0.001 [A] Iext_time = np.random.randn(N,len(time)) + Iconst rate = np.zeros((N,len(time))) # population activity (instantaneous) popAct = np.zeros(len(time)) # population activity (instantaneous) psthdt = 400 # PSTH time duration [msec] ## Synapse weight matrix print("Generate weight matrix...") g = 1. # recurrent gain parameter mu_w = 0 # zero mean sigma_w = g*(1/N) # variance 1/N for balance w_conn = 0.1 # connectivity in the weight matrix rands = st.norm(loc=mu_w,scale=sigma_w).rvs # samples from a Gaussian random distr. w_rec = sp.random(N, N, density=w_conn, data_rvs=rands) # generates sparse matrix #print synapses.nnz # prints number of nonzero elements #np.random.normal(mu_w, sigma_w, (N,N)) # Gaussian distributed weights (full network) ## LIF neurons def f_LIF(i): return -(Vm[:,i-1] - Vrest) ## QIF neurons def f_QIF(i): return (Vm[:,i-1] - Vrest)*(Vm[:,i-1] - Vth)/deltaV ## Escape rate function def esc_rate(V): return (1/tau_esc)*np.exp(beta_esc*(V-Vth))
def X_sparse(): # Make an X that looks somewhat like a small tf-idf matrix. rng = check_random_state(42) X = sp.random(60, 55, density=0.2, format="csr", random_state=rng) X.data[:] = 1 + np.log(X.data) return X
def test_swapaxes_error(axis1, axis2): x = sparse.random((2, 3, 4), density=0.25) with pytest.raises(ValueError): x.swapaxes(axis1, axis2)
def test_transpose(axis): x = sparse.random((2, 3, 4), density=.25) y = x.todense() xx = x.transpose(axis) yy = y.transpose(axis) assert_eq(xx, yy)
# Size configuration n = 1000 # Case Number p = 100 # Feature k = 10 # Number of Group h = 0.2 # Impact of Group dense = 0.1 # density config = np.array([n,p,k,h]) np.set_printoptions(precision=4, threshold=10000, linewidth=150) # generate a random X matrix X = np.random.random((n,p)) # X is a n x p matrix with the value from 0 to X = np.around(X, decimals=2) B = sparse.random(p, 1, density=dense) # B is random sparse vector B = B.A B = np.around(B, decimals=2) Y_1 = X.dot(B) Y_1 = np.around(Y_1, decimals=2) print X.shape print B.shape print Y_1.shape # Clustering clf = KMeans(n_clusters = k) s = clf.fit(Y_1) # Generate the group
for c, d in enumerate(row): if d != 0: indices.append(c) data.append(d) nnz += 1 indptr.append(nnz) return csr_matrix((data, indices, indptr), shape=A.shape) if __name__ == '__main__': from scipy.sparse import random from sklearn.preprocessing import PolynomialFeatures import numpy as np from time import time X = random(2, 3, .7).tocsr() print 'actual nnz', csr_matrix(PolynomialFeatures(2, include_bias=False).fit_transform(X.toarray())).nnz print X.toarray() print second_deg(X).toarray() #a=time() xp = csr_matrix(PolynomialFeatures(2, include_bias=False).fit_transform(X.toarray())) print xp.indptr print xp.indices print xp.shape print xp.data #print time()-a
def test_reshape_same(): s = sparse.random((3, 5), density=0.5) assert s.reshape(s.shape) is s
def test_swapaxes(axis1, axis2): x = sparse.random((2, 3, 4), density=0.25) y = x.todense() xx = x.swapaxes(axis1, axis2) yy = y.swapaxes(axis1, axis2) assert_eq(xx, yy)
def test_moveaxis(source, destination): x = sparse.random((2, 3, 4, 5), density=0.25) y = x.todense() xx = sparse.moveaxis(x, source, destination) yy = np.moveaxis(y, source, destination) assert_eq(xx, yy)
#!/usr/bin/python import sys from scipy.sparse import random n = int(sys.argv[1]) density = float(sys.argv[2]) seed = int(sys.argv[3]) m = random(n, n, density, format="csr", random_state=seed) m.data *= 2 m.data -= 1 print n, n, len(m.data), 0 for x in m.data: print x, print for x in m.indptr: print x, print for x in m.indices: print x,
from __future__ import division, print_function from scipy import sparse import numpy as np import pandas import h5py from nose.tools import assert_raises import cooler.core class MockCooler(dict): pass binsize = 100 n_bins = 20 r = sparse.random(n_bins, n_bins, density=1, random_state=1) r = sparse.triu(r, k=1).tocsr() r_full = r.toarray() + r.toarray().T mock_cooler = MockCooler({ 'chroms': { 'name': np.array(['chr1', 'chr2'], dtype='S'), 'length': np.array([1000, 1000], dtype=np.int32), }, 'bins': { 'chrom': np.array([0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1], dtype=int), 'start': np.array([0,100,200,300,400,500,600,700,800,900, 0,100,200,300,400,500,600,700,800,900], dtype=int), 'end': np.array([100,200,300,400,500,600,700,800,900,1000,
def random_oov_handler(shape, dtype, density, random_state): return sparse.random(shape[0], shape[1], density=density, format='csr', random_state=random_state, dtype=dtype)
def test_random_idx_dtype(): with pytest.raises(ValueError): sparse.random((300, ), density=0.1, format="coo", idx_dtype=np.int8)
z = sps.hstack([z, z], format='csr') #sps.save_npz('tmp.npz', z) x = y = z print 'x shape', x.shape, x.nnz try: cpu = int(eval(sys.argv[3])) except: cpu = 1 except: N = int(eval(sys.argv[1])) try: cpu = int(eval(sys.argv[2])) except: cpu = 1 x = sps.random(N, N, 2./N, format='csr', dtype='float32') row, col = x.nonzero() row += N//2 col += N//2 x = sps.csr_matrix((x.data,(row, col)), shape=(2*N, 2*N)) #sps.save_npz('tmp.npz', x) #raise SystemExit() y = x #sps.save_npz('tmp.npz', x) print 'random matrix', time() - st #small = sps.random(100, 100, .01, format='csr') #small_xy = csrmm_ez(small, small) st = time() y2 = csrmm_ez(x, y, 'msav', cpu=cpu)
def test_moveaxis_error(source, destination): x = sparse.random((2, 3, 4), density=0.25) with pytest.raises(ValueError): sparse.moveaxis(x, source, destination)
__author__ = 'Guillaume Taglang <*****@*****.**>' import pytest import numpy as np import scipy.sparse as sp from gouyou.utils.sparse.operations import csr_vector_matrix_and testdata = [ ( sp.random(1, 5000000, format='csr').astype('bool'), sp.random(3, 5000000, format='csr').astype('bool') ), ( sp.random(1, 5000000, density=0, format='csr').astype('bool'), sp.random(3, 5000000, format='csr').astype('bool') ), ( sp.random(1, 5000000, format='csr').astype('bool'), sp.random(3, 5000000, density=0, format='csr').astype('bool') ), ( sp.random(1, 5000000, density=0, format='csr').astype('bool'), sp.random(3, 5000000, density=0, format='csr').astype('bool') ), ( sp.random(1, 5000000, density=1, format='csr').astype('bool'), sp.random(3, 5000000, density=1, format='csr').astype('bool') ), ]
def gen_adata( shape: Tuple[int, int], X_type=sparse.csr_matrix, X_dtype=np.float32, # obs_dtypes, # var_dtypes, obsm_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame), varm_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame), layers_types: "Collection[Type]" = (sparse.csr_matrix, np.ndarray, pd.DataFrame), ) -> AnnData: """\ Helper function to generate a random AnnData for testing purposes. Note: For `obsm_types`, `varm_types`, and `layers_types` these currently just filter already created objects. In future, these should choose which objects are created. Params ------ shape What shape you want the anndata to be. X_type What kind of container should `X` be? This will be called on a randomly generated 2d array. X_dtype What should the dtype of the `.X` container be? obsm_types What kinds of containers should be in `.obsm`? varm_types What kinds of containers should be in `.varm`? layers_types What kinds of containers should be in `.layers`? """ M, N = shape obs_names = pd.Index(f"cell{i}" for i in range(shape[0])) var_names = pd.Index(f"gene{i}" for i in range(shape[1])) obs = gen_typed_df(M, obs_names) var = gen_typed_df(N, var_names) # For #147 obs.rename(columns=dict(cat="obs_cat"), inplace=True) var.rename(columns=dict(cat="var_cat"), inplace=True) if X_type is None: X = None else: X = X_type(np.random.binomial(100, 0.005, (M, N)).astype(X_dtype)) obsm = dict( array=np.random.random((M, 50)), sparse=sparse.random(M, 100, format="csr"), df=gen_typed_df(M, obs_names), ) obsm = {k: v for k, v in obsm.items() if type(v) in obsm_types} varm = dict( array=np.random.random((N, 50)), sparse=sparse.random(N, 100, format="csr"), df=gen_typed_df(N, var_names), ) varm = {k: v for k, v in varm.items() if type(v) in varm_types} layers = dict( array=np.random.random((M, N)), sparse=sparse.random(M, N, format="csr") ) layers = {k: v for k, v in layers.items() if type(v) in layers_types} obsp = dict( array=np.random.random((M, M)), sparse=sparse.random(M, M, format="csr") ) varp = dict( array=np.random.random((N, N)), sparse=sparse.random(N, N, format="csr") ) uns = dict( O_recarray=gen_vstr_recarray(N, 5), nested=dict( scalar_str="str", scalar_int=42, scalar_float=3.0, nested_further=dict(array=np.arange(5)), ), # U_recarray=gen_vstr_recarray(N, 5, "U4") ) adata = AnnData( X=X, obs=obs, var=var, obsm=obsm, varm=varm, layers=layers, obsp=obsp, varp=varp, dtype=X_dtype, uns=uns, ) return adata
def test_random_rvs(rvs, dtype, shape, density): x = sparse.random(shape, density, data_rvs=rvs) assert x.shape == shape assert x.dtype == dtype
def test_spmv_3d_feat(idtype): def src_mul_edge_udf(edges): return { 'sum': edges.src['h'] * F.unsqueeze(F.unsqueeze(edges.data['h'], 1), 1) } def sum_udf(nodes): return {'h': F.sum(nodes.mailbox['sum'], 1)} n = 100 p = 0.1 a = sp.random(n, n, p, data_rvs=lambda n: np.ones(n)) g = dgl.DGLGraph(a) g = g.astype(idtype).to(F.ctx()) m = g.number_of_edges() # test#1: v2v with adj data h = F.randn((n, 5, 5)) e = F.randn((m, )) g.ndata['h'] = h g.edata['h'] = e g.update_all(message_func=fn.src_mul_edge('h', 'h', 'sum'), reduce_func=fn.sum('sum', 'h')) # 1 ans = g.ndata['h'] g.ndata['h'] = h g.edata['h'] = e g.update_all(message_func=src_mul_edge_udf, reduce_func=fn.sum('sum', 'h')) # 2 assert F.allclose(g.ndata['h'], ans) g.ndata['h'] = h g.edata['h'] = e g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3 assert F.allclose(g.ndata['h'], ans) # test#2: e2v def src_mul_edge_udf(edges): return {'sum': edges.src['h'] * edges.data['h']} h = F.randn((n, 5, 5)) e = F.randn((m, 5, 5)) g.ndata['h'] = h g.edata['h'] = e g.update_all(message_func=fn.src_mul_edge('h', 'h', 'sum'), reduce_func=fn.sum('sum', 'h')) # 1 ans = g.ndata['h'] g.ndata['h'] = h g.edata['h'] = e g.update_all(message_func=src_mul_edge_udf, reduce_func=fn.sum('sum', 'h')) # 2 assert F.allclose(g.ndata['h'], ans) g.ndata['h'] = h g.edata['h'] = e g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3 assert F.allclose(g.ndata['h'], ans)
__author__ = 'Guillaume Taglang <*****@*****.**>' import pytest import numpy as np import scipy.sparse as sp from gouyou.utils.sparse.operations import cs_vector_vector_and testdata = [ ( sp.random(1, 10000, density=0.1, format='csr').astype('bool'), sp.random(1, 10000, density=0.1, format='csr').astype('bool') ), ( sp.random(1, 100, density=0, format='csr').astype('bool'), sp.random(1, 100, format='csr').astype('bool') ), ( sp.random(1, 100, format='csr').astype('bool'), sp.random(1, 100, density=0, format='csr').astype('bool') ), ( sp.random(1, 100, density=0, format='csr').astype('bool'), sp.random(1, 100, density=0, format='csr').astype('bool') ), ( sp.random(1, 100, density=1, format='csr').astype('bool'), sp.random(1, 100, density=1, format='csr').astype('bool') ), (
def test_random_fv(format): fv = np.random.rand() s = sparse.random((2, 3, 4), density=0.5, format=format, fill_value=fv) assert s.fill_value == fv
''' from scipy.sparse import random import time as t import numpy as np from scipy import ndimage import os, sys import unittest sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("voxel.py")))) import voxel as vc structure = ndimage.generate_binary_structure(3, 1) #np.ones((3,3,3)) try: input_dvar = np.load("dense_array.npy", mmap_mode="r") except: print("creating dense input array will take time...") input_dvar = random(400, 160000, density=0.7, dtype="float64") input_dvar = input_dvar.todense() input_dvar = np.array(input_dvar) input_dvar = np.reshape(input_dvar, (400, 400, 400)) np.save("dense_array.npy", input_dvar) # creating sparse input array try: input_svar = np.load("sparse_array.npy", mmap_mode="r") except: print("creating sparse input array will take time...") input_svar = random(400, 160000, density=0.3, dtype="float64") input_svar = input_svar.todense() input_svar = np.array(input_svar) input_svar = np.reshape(input_svar, (400, 400, 400)) np.save("sparse_array.npy", input_svar)
degree = 2 trials = 3 num_rows = 1000 dimensionalities = np.array([1, 2, 8, 16, 32, 64]) densities = np.array([0.01, 0.1, 1.0]) csr_times = {d: np.zeros(len(dimensionalities)) for d in densities} dense_times = {d: np.zeros(len(dimensionalities)) for d in densities} transform = PolynomialFeatures(degree=degree, include_bias=False, interaction_only=False) for trial in range(trials): for density in densities: for dim_index, dim in enumerate(dimensionalities): print(trial, density, dim) X_csr = sparse.random(num_rows, dim, density).tocsr() X_dense = X_csr.toarray() # CSR t0 = time() transform.fit_transform(X_csr) csr_times[density][dim_index] += time() - t0 # Dense t0 = time() transform.fit_transform(X_dense) dense_times[density][dim_index] += time() - t0 csr_linestyle = (0, (3, 1, 1, 1, 1, 1)) # densely dashdotdotted dense_linestyle = (0, ()) # solid fig, axes = plt.subplots(nrows=len(densities), ncols=1, figsize=(8, 10)) for density, ax in zip(densities, axes):