def test_2d_2d(self): #Not dot with vector exactly, #just to make sure new feature hasn't break anything # Test with row > col av = expr.arange((132, 100)) bv = expr.arange((100, 77)) na = np.arange(13200).reshape(132, 100) nb = np.arange(7700).reshape(100, 77) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb)) # Test with row < col av = expr.arange((67, 100)) bv = expr.arange((100, 77)) na = np.arange(6700).reshape(67, 100) nb = np.arange(7700).reshape(100, 77) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb)) #Dot with numpy obj cv = expr.arange((77, 100)) dv = np.arange(8800).reshape(100, 88) nc = np.arange(7700).reshape(77, 100) nd = np.arange(8800).reshape(100, 88) Assert.all_eq(expr.dot(cv, dv).glom(), np.dot(nc, nd))
def test_reshape_dot(self): npa1 = np.random.random((357, 93)) npa2 = np.random.random((31, 357)) result = np.dot(np.reshape(npa1, (1071, 31)), npa2) t1 = expr.from_numpy(npa1) t2 = expr.from_numpy(npa2) t3 = expr.dot(expr.reshape(t1, (1071, 31)), t2) Assert.all_eq(result, t3.glom(), 10e-9) npa1 = np.random.random((357, 718)) npa2 = np.random.random((718, )) result = np.dot(npa1, np.reshape(npa2, (718, 1))) t1 = expr.from_numpy(npa1) t2 = expr.from_numpy(npa2) t3 = expr.dot(t1, expr.reshape(t2, (718, 1))) Assert.all_eq(result, t3.glom(), 10e-9) npa1 = np.random.random((718, )) npa2 = np.random.random((1, 357)) result = np.dot(np.reshape(npa1, (718, 1)), npa2) t1 = expr.from_numpy(npa1) t2 = expr.from_numpy(npa2) t3 = expr.dot(expr.reshape(t1, (718, 1)), t2) Assert.all_eq(result, t3.glom(), 10e-9)
def fn2(): a = expr.ones((N, N)) b = expr.ones((N, N/2)) g = expr.dot(a, b) + expr.dot(expr.sum(a, axis=1).reshape((1, N)), b) t1 = time.time() g_opt = g.optimized() #g_opt.evaluate() t2 = time.time() print t2 - t1 print g_opt
def fn2(): a = expr.ones((N, N)) b = expr.ones((N, N/2)) g = expr.dot(a, b) + expr.dot(expr.sum(a, axis=1).reshape((1, N)), b) t1 = time.time() g_opt = g.optimized() #g_opt.force() t2 = time.time() print t2 - t1 print g_opt
def update(self): """ gradient_update = 2xTxw - 2xTy + 2* lambda * w Correct this if the update function is wrong. """ xT = expr.transpose(self.x) g1 = expr.dot(expr.dot(xT, self.x), self.w) g2 = expr.dot(xT, self.y) g3 = self.ridge_lambda * self.w g4 = g1 + g2 + g3 return expr.reshape(g4, (1, self.N_DIM))
def test_numpy_2d_vec(self): av = expr.arange((77, 100)) bv = np.arange(100) na = np.arange(7700).reshape(77, 100) nb = np.arange(100) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb))
def test_numpy_vec_vec(self): av = expr.arange(stop=100) bv = np.arange(100) na = np.arange(100) nb = np.arange(100) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb))
def test_2d_vec(self): # Test with row > col av = expr.arange((100, 77)) bv = expr.arange(stop=77) na = np.arange(7700).reshape(100, 77) nb = np.arange(77) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb)) # Test with col > row av = expr.arange((77, 100)) bv = expr.arange(stop=100) na = np.arange(7700).reshape(77, 100) nb = np.arange(100) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb))
def connectedConponents(ctx, dim, numIters): linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim / ctx.num_workers, dim)), make_matrix, )) power = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim / ctx.num_workers, dim)), make_matrix, )) eye = expr.eye(dim, tile_hint = (dim / ctx.num_workers,dim)) startCompute = time.time() result = expr.logical_or(eye, linkMatrix).optimized().glom() for i in range(numIters): power = expr.dot(power, linkMatrix).optimized().glom() result = expr.logical_or(result, power) result.optimized().glom() final = expr.logical_and(result, expr.transpose(result.optimized())).optimized().evaluate() endCompute = time.time() return endCompute - startCompute
def jacobi_method(A, b, _iter=100): """ Iterative algorithm for approximating the solutions of a diagonally dominant system of linear equations. Parameters ---------- A : ndarray or Expr - 2d Input matrix b : ndarray or Expr - vector RHS vector _iter : int Times of iteration needed, default to be 100 Returns ------- result : Expr - vector Approximated solution. """ util.Assert.eq(A.shape[0], b.shape[0]) x = expr.zeros((A.shape[0], )) D = expr.diag(A) R = A - expr.diagflat(D) for i in xrange(_iter): x = (b - expr.dot(R, x)) / D return x
def cgit(A, x): ''' CGIT Conjugate Gradient iteration z = cgit(A, x) generates approximate solution to A*z = x. Args: A(Expr): matrix to be processed. x(Expr): the input vector. ''' z = expr.zeros(x.shape) r = x rho = expr.sum(r * r).optimized().glom() #util.log_warn('rho:%s', rho) p = r for i in xrange(15): q = expr.dot(A, p) alpha = rho / expr.sum(p * q).optimized().glom() #util.log_warn('alpha:%s', alpha) z = z + p * alpha rho0 = rho r = r - q * alpha rho = expr.sum(r * r).optimized().glom() beta = rho / rho0 #util.log_warn('beta:%s', beta) p = r + p * beta return z
def benchmark_cholesky(ctx, timer): print "#worker:", ctx.num_workers #n = int(math.pow(ctx.num_workers, 1.0 / 3.0)) n = int(math.sqrt(ctx.num_workers)) #ARRAY_SIZE = 1600 * 4 ARRAY_SIZE = 1600 * n util.log_warn('prepare data!') #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE) #A = np.dot(A, A.T) #A = expr.force(from_numpy(A, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n))) #A = expr.randn(ARRAY_SIZE, ARRAY_SIZE, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n)) A = expr.randn(ARRAY_SIZE, ARRAY_SIZE) # FIXME: Ideally we should be able to get rid of tile_hint. # However, current extent.change_partition_axis relies on the # information of one-dimentional size to change tiling to grid tiling. # It assumes that every extent should be partitioned in the same size. # Trace extent.pyx to think about how to fix it! A = expr.dot(A, expr.transpose(A), tile_hint=(ARRAY_SIZE, ARRAY_SIZE / ctx.num_workers)).force() util.log_warn('begin cholesky!') t1 = datetime.now() L = cholesky(A).glom() t2 = datetime.now() assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj()))) cost_time = millis(t1, t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time / n)
def fn2(): a = expr.ones((N, N)) b = expr.ones((N, N)) x = expr.dot(a, b) g = a + b + x return g
def bfs(ctx, dim): util.log_info("start to computing......") sGenerate = time.time() current = eager( expr.shuffle( expr.ndarray( (dim, 1), dtype = np.int64, tile_hint = (dim / ctx.num_workers, 1)), make_current, )) linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim, dim / ctx.num_workers)), make_matrix, )) eGenerate = time.time() startCompute = time.time() while(True): next = expr.dot(linkMatrix, current) formerNum = expr.count_nonzero(current) laterNum = expr.count_nonzero(next) hasNew = expr.equal(formerNum, laterNum).glom() current = next if (hasNew): break current.evaluate() endCompute = time.time() return (eGenerate - sGenerate, endCompute - startCompute)
def update(self): ''' gradient_update = (h(w) - y) * x h(w) = x * w ''' yp = expr.dot(self.x, self.w) return self.x * (yp - self.y)
def _test_optimization_ordered(self): na = np.random.rand(1000, 1000) nb = np.random.rand(1000, 1000) a = expr.from_numpy(na) b = expr.from_numpy(nb) c = a - b d = a + c f = c[200:900, 200:900] g = d[200:900, 200:900] h = f - g i = f + h j = h[100:500, 100:500] k = i[100:500, 100:500] l = expr.dot(j, k) m = j + k n = k - l o = n - m q = o[100:200, 100:200] nc = na - nb nd = na + nc nf = nc[200:900, 200:900] ng = nd[200:900, 200:900] nh = nf - ng ni = nf + nh nj = nh[100:500, 100:500] nk = ni[100:500, 100:500] nl = np.dot(nj, nk) nm = nj + nk nn = nk - nl no = nn - nm nq = no[100:200, 100:200] Assert.all_eq(nq, q.optimized().glom(), tolerance=1e-10)
def _test_optimization_ordered(self): na = np.random.rand(1000, 1000) nb = np.random.rand(1000, 1000) a = expr.from_numpy(na) b = expr.from_numpy(nb) c = a - b d = a + c f = c[200:900, 200:900] g = d[200:900, 200:900] h = f - g i = f + h j = h[100:500, 100:500] k = i[100:500, 100:500] l = expr.dot(j, k) m = j + k n = k - l o = n - m q = o[100:200, 100:200] nc = na - nb nd = na + nc nf = nc[200:900, 200:900] ng = nd[200:900, 200:900] nh = nf - ng ni = nf + nh nj = nh[100:500, 100:500] nk = ni[100:500, 100:500] nl = np.dot(nj, nk) nm = nj + nk nn = nk - nl no = nn - nm nq = no[100:200, 100:200] Assert.all_eq(nq, q.optimized().glom(), tolerance = 1e-10)
def jacobi_method(A, b, _iter=100): """ Iterative algorithm for approximating the solutions of a diagonally dominant system of linear equations. Parameters ---------- A : ndarray or Expr - 2d Input matrix b : ndarray or Expr - vector RHS vector _iter : int Times of iteration needed, default to be 100 Returns ------- result : Expr - vector Approximated solution. """ util.Assert.eq(A.shape[0], b.shape[0]) x = expr.zeros((A.shape[0],)) D = expr.diag(A) R = A - expr.diagflat(D) for i in xrange(_iter): x = (b - expr.dot(R, x)) / D return x
def cgit(A, x): ''' CGIT Conjugate Gradient iteration z = cgit(A, x) generates approximate solution to A*z = x. Args: A(Expr): matrix to be processed. x(Expr): the input vector. ''' z = expr.zeros(x.shape, tile_hint=(A.tile_shape()[1], 1)) r = x rho = expr.sum(r * r).glom() #util.log_warn('rho:%s', rho) p = r for i in xrange(15): q = expr.dot(A, p, tile_hint=(A.tile_shape()[1], 1)) alpha = rho / expr.sum(p * q).glom() #util.log_warn('alpha:%s', alpha) z = z + p * alpha rho0 = rho r = r - q * alpha rho = expr.sum(r * r).glom() beta = rho / rho0 #util.log_warn('beta:%s', beta) p = r + p * beta return z
def gen_dot(a, b): if not hasattr(a, 'shape') or not hasattr(b, 'shape') or len(a.shape) * len(b.shape) == 0: return [a * b] if a.shape[0] == b.shape[0]: if len(a.shape) > 1: return [expr.dot(expr.transpose(a), b)] elif len(b.shape) == 1: return [expr.dot(a, b)] if len(a.shape) > 1 and a.shape[1] == b.shape[0]: return [expr.dot(a, b)] if len(b.shape) > 1 and a.shape[0] == b.shape[1]: return [expr.dot(b, a)] if len(a.shape) > 1 and len(b.shape) > 1 and a.shape[1] == b.shape[1]: return [expr.dot(a, expr.transpose(b))] return [a, b]
def update(self): ''' gradient_update = (h(w) - y) * x h(w) = 1 / (1 + e^(-(x*w))) ''' g = expr.exp(expr.dot(self.x, self.w)) yp = g / (g + 1) return self.x * (yp - self.y)
def test_numpy_vec_2d(self): av = expr.arange(stop = 100) bv = np.arange(7700).reshape(100, 77) na = np.arange(100) nb = np.arange(7700).reshape(100, 77) Assert.all_eq(expr.dot(av, bv).glom(), np.dot(na, nb))
def _step(): yp = expr.dot(x, w) Assert.all_eq(yp.shape, y.shape) diff = x * (yp - y) grad = expr.sum(diff, axis=0).glom().reshape((N_DIM, 1)) wprime = w - grad * 1e-6 wprime.evaluate()
def _step(): yp = expr.dot(x, w) Assert.all_eq(yp.shape, y.shape) diff = x * (yp - y) grad = expr.sum(diff, axis=0).glom().reshape((N_DIM, 1)) wprime = w - grad * 1e-6 expr.force(wprime)
def train_smo_1998(self, data, labels): ''' Train an SVM model using the SMO (1998) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N]) labels = expr.force(labels) self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() for i in xrange(N): self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0] util.log_info("Starting SMO") it = 0 num_changed = 0 examine_all = True while (num_changed > 0 or examine_all) and (it < self.maxiter): util.log_info("Iteration:%d", it) num_changed = 0 if examine_all: for i in xrange(N): num_changed += self.examine_example(i, N, labels, kernel_results) else: for i in xrange(N): if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C: num_changed += self.examine_example(i, N, labels, kernel_results) it += 1 if examine_all: examine_all = False elif num_changed == 0: examine_all = True self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom() self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def svd(A, k=None): """ Stochastic SVD. Parameters ---------- A : spartan matrix Array to compute the SVD on, of shape (M, N) k : int, optional Number of singular values and vectors to compute. The operations include matrix multiplication and QR decomposition. We parallelize both of them. Returns -------- U : Spartan array of shape (M, k) S : numpy array of shape (k,) V : numpy array of shape (k, k) """ if k is None: k = A.shape[1] ctx = blob_ctx.get() Omega = expr.randn(A.shape[1], k, tile_hint=(A.shape[1]/ctx.num_workers, k)) r = A.shape[0] / ctx.num_workers Y = expr.dot(A, Omega, tile_hint=(r, k)).force() Q, R = qr(Y) B = expr.dot(expr.transpose(Q), A) BTB = expr.dot(B, expr.transpose(B)).glom() S, U_ = np.linalg.eig(BTB) S = np.sqrt(S) # Sort by eigen values from large to small si = np.argsort(S)[::-1] S = S[si] U_ = U_[:, si] U = expr.dot(Q, U_).force() V = np.dot(np.dot(expr.transpose(B).glom(), U_), np.diag(np.ones(S.shape[0]) / S)) return U, S, V.T
def margins(self, data): ''' Calculate margin of given instances Args: data(Expr): data to be calculated ''' return expr.dot(data, self.w) + self.b
def gen_dot(a, b): if not hasattr(a, 'shape') or not hasattr( b, 'shape') or len(a.shape) * len(b.shape) == 0: return [a * b] if a.shape[0] == b.shape[0]: if len(a.shape) > 1: return [expr.dot(expr.transpose(a), b)] elif len(b.shape) == 1: return [expr.dot(a, b)] if len(a.shape) > 1 and a.shape[1] == b.shape[0]: return [expr.dot(a, b)] if len(b.shape) > 1 and a.shape[0] == b.shape[1]: return [expr.dot(b, a)] if len(a.shape) > 1 and len(b.shape) > 1 and a.shape[1] == b.shape[1]: return [expr.dot(a, expr.transpose(b))] return [a, b]
def test_matmul(self): x = expr.arange(XDIM, dtype=np.int).astype(np.float64) y = expr.arange(YDIM, dtype=np.int).astype(np.float64) z = expr.dot(x, y) nx = np.arange(np.prod(XDIM), dtype=np.int).reshape(XDIM).astype(np.float64) ny = np.arange(np.prod(YDIM), dtype=np.int).reshape(YDIM).astype(np.float64) nz = np.dot(nx, ny) Assert.all_eq(z.glom(), nz)
def svd(A, k=None): """ Stochastic SVD. Parameters ---------- A : spartan matrix Array to compute the SVD on, of shape (M, N) k : int, optional Number of singular values and vectors to compute. The operations include matrix multiplication and QR decomposition. We parallelize both of them. Returns -------- U : Spartan array of shape (M, k) S : numpy array of shape (k,) V : numpy array of shape (k, k) """ if k is None: k = A.shape[1] Omega = expr.randn(A.shape[1], k) Y = expr.dot(A, Omega) Q, R = qr(Y) B = expr.dot(expr.transpose(Q), A) BTB = expr.dot(B, expr.transpose(B)).optimized().glom() S, U_ = np.linalg.eig(BTB) S = np.sqrt(S) # Sort by eigen values from large to small si = np.argsort(S)[::-1] S = S[si] U_ = U_[:, si] U = expr.dot(Q, U_).optimized().force() V = np.dot(np.dot(expr.transpose(B).optimized().glom(), U_), np.diag(np.ones(S.shape[0]) / S)) return U, S, V.T
def test_transpose_dot(self): npa1 = np.random.random((401, 97)) npa2 = np.random.random((401, 97)) result1 = np.dot(npa1, np.transpose(npa2)) #result2 = np.dot(np.transpose(npa1), npa2) t1 = expr.from_numpy(npa1) t2 = expr.from_numpy(npa2) t3 = expr.dot(t1, expr.transpose(t2)) #t4 = expr.dot(expr.transpose(t1), t2) assert np.all(np.isclose(result1, t3.glom()))
def sparse_multiply(wts, p, p_tile_hint): avg_time = 0.0 for i in range(num_iter): util.log_warn('iteration %d begin!', i) t1 = datetime.now() p = expr.dot(wts, p, tile_hint=p_tile_hint).force() t2 = datetime.now() time_cost = millis(t1, t2) print "iteration %d sparse * dense: %s ms" % (i, time_cost) avg_time += time_cost return avg_time / num_iter
def fn1(): a = expr.ones((N, N)) b = expr.ones((N, N)) x = expr.dot(a, b) g = a + b + x t1 = time.time() print g.optimized() t2 = time.time() print t2 - t1
def qr(Y): ''' Compute the thin qr factorization of a matrix. Factor the matrix Y as QR, where Q is orthonormal and R is upper-triangular. Parameters ---------- Y: Spartan array of shape (M, K). Notes ---------- Y'Y must fit in memory. Y is a Spartan array of shape (M, K). Since this QR decomposition is mainly used in Stochastic SVD, K will be the rank of the matrix of shape (M, N) and the assumption is that the rank K should be far less than M or N. Returns ------- Q : Spartan array of shape (M, K). R : Numpy array of shape (K, K). ''' # Since the K should be far less than M. So the matrix multiplication # should be the bottleneck instead of local cholesky decomposition and # finding inverse of R. So we just parallelize the matrix mulitplication. # If K is really large, we may consider using our Spartan cholesky # decomposition, but for now, we use numpy version, it works fine. # YTY = Y'Y. YTY has shape of (K, K). YTY = expr.dot(expr.transpose(Y), Y).optimized().glom() # Do cholesky decomposition and get R. R = np.linalg.cholesky(YTY).T # Find the inverse of R inv_R = np.linalg.inv(R) # Q = Y * inv(R) Q = expr.dot(Y, inv_R).optimized().force() return Q, R
def qr(Y): ''' Compute the thin qr factorization of a matrix. Factor the matrix Y as QR, where Q is orthonormal and R is upper-triangular. Parameters ---------- Y: Spartan array of shape (M, K). Notes ---------- Y'Y must fit in memory. Y is a Spartan array of shape (M, K). Since this QR decomposition is mainly used in Stochastic SVD, K will be the rank of the matrix of shape (M, N) and the assumption is that the rank K should be far less than M or N. Returns ------- Q : Spartan array of shape (M, K). R : Numpy array of shape (K, K). ''' # Since the K should be far less than M. So the matrix multiplication # should be the bottleneck instead of local cholesky decomposition and # finding inverse of R. So we just parallelize the matrix mulitplication. # If K is really large, we may consider using our Spartan cholesky # decomposition, but for now, we use numpy version, it works fine. # YTY = Y'Y. YTY has shape of (K, K). YTY = expr.dot(expr.transpose(Y), Y).optimized().glom() # Do cholesky decomposition and get R. R = np.linalg.cholesky(YTY).T # Find the inverse of R inv_R = np.linalg.inv(R) # Q = Y * inv(R) Q = expr.dot(Y, inv_R).optimized().evaluate() return Q, R
def transform(self, X): """Reduce dimensions of matrix X. Parameters ---------- X : Spartan distributed array of shape (n_samples, n_features). Returns ------- X_new : reduced dimension numpy.array, shape (n_samples, n_components) """ X_transformed = X - self.mean_ X_transformed = expr.dot(X_transformed, self.components_.T).glom() return X_transformed
def transform(self, X): """Reduce dimensions of matrix X. Parameters ---------- X : Spartan distributed array of shape (n_samples, n_features). Returns ------- X_new : reduced dimension numpy.array, shape (n_samples, n_components) """ X_transformed = X - self.mean_ X_transformed = expr.dot(X_transformed, self.components_.T).optimized().glom() return X_transformed
def inverse_transform(self, X): """Transform data back to its original space. Parameters ---------- X : spartan arary or numpy array of shape (n_samples, n_components). Returns X_original: numpyarray of shape (n_samples, n_features). """ if isinstance(X, expr.Expr) or isinstance(X, array.distarray.DistArray): return (expr.dot(X, self.components_) + self.mean_).optimized().evaluate() else: return np.dot(X, self.components_) + self.mean_.glom()
def predict(model, new_data): ''' Predict the label of the given instance. Args: model(dict): trained naive bayes model. new_data(Expr or DistArray): data to be predicted ''' scores_per_label_and_feature = model['scores_per_label_and_feature'] scoring_vector = expr.dot(scores_per_label_and_feature, expr.transpose(new_data)) # util.log_warn('scoring_vector:%s', scoring_vector.glom().T) return np.argmax(scoring_vector.glom())
def margin_one(self, arr): ''' Calculate margin of given instance Args: arr(Expr): data to be calculated ''' f = self.b if self.usew_: f += expr.dot(arr, self.w).glom() return f
def test_pagerank(self): _skip_if_travis() OUTLINKS_PER_PAGE = 10 PAGES_PER_WORKER = 1000000 num_pages = PAGES_PER_WORKER * self.ctx.num_workers wts = expr.shuffle( expr.ndarray( (num_pages, num_pages), dtype=np.float32, tile_hint=(num_pages, PAGES_PER_WORKER / 8)), make_weights, ) start = time.time() p = expr.eager(expr.ones((num_pages, 1), tile_hint=(PAGES_PER_WORKER / 8, 1), dtype=np.float32)) expr.dot(wts, p, tile_hint=(PAGES_PER_WORKER / 8, 1)).evaluate() cost = time.time() - start self._verify_cost("pagerank", cost)
def inverse_transform(self, X): """Transform data back to its original space. Parameters ---------- X : spartan arary or numpy array of shape (n_samples, n_components). Returns X_original: numpyarray of shape (n_samples, n_features). """ if isinstance(X, expr.Expr) or isinstance(X, array.distarray.DistArray): return (expr.dot(X, self.components_) + self.mean_).force() else: return np.dot(X, self.components_) + self.mean_.glom()
def test_sparse_operators(self): x = expr.sparse_diagonal(ARRAY_SIZE) #print x.glom().todense() y = x print 'test add' #z = expr.add(x, y) z = expr.add(x, y) print z.glom().todense() print 'test minus' z = expr.sub(x, y) print z.glom().todense() print 'test multiply' z = expr.dot(x, x) print z.glom().todense()
def test_matrix_mult(self): _skip_if_travis() N_POINTS = 2000 x = expr.rand(N_POINTS, N_POINTS, tile_hint=(N_POINTS, N_POINTS / self.ctx.num_workers)).astype(np.float32) y = expr.rand(N_POINTS, N_POINTS, tile_hint=(N_POINTS / self.ctx.num_workers, N_POINTS)).astype(np.float32) x = expr.eager(x) y = expr.eager(y) start = time.time() for i in range(5): res = expr.dot(x, y, tile_hint=(N_POINTS, N_POINTS / self.ctx.num_workers)) res.evaluate() cost = time.time() - start self._verify_cost("matrix_mult", cost)
def benchmark_pagerank(ctx, timer): num_pages = PAGES_PER_WORKER * ctx.num_workers util.log_info('Total pages: %s', num_pages) wts = eager( expr.shuffle( expr.ndarray((num_pages, num_pages), dtype=np.float32, tile_hint=(num_pages, PAGES_PER_WORKER / 8)), make_weights, )) p = eager( expr.ones((num_pages, 1), tile_hint=(PAGES_PER_WORKER / 8, 1), dtype=np.float32)) for i in range(3): timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
def fit(self, X, y): """ Transform to distarray if it's numpy array""" if isinstance(X, np.ndarray): X = expr.make_from_numpy(X) if isinstance(y, np.ndarray): y = expr.make_from_numpy(y) X, y, X_mean, y_mean, X_std = self._center_data( X, y, self.fit_intercept, self.normalize) N_DIM = X.shape[1] self._coef = np.random.randn(N_DIM, 1) for i in range(self.iterations): yp = expr.dot(X, self._coef) print expr.sum((yp - y) ** 2).glom() diff = X * (yp - y) grad = expr.sum(diff, axis=0).glom().reshape((N_DIM, 1)) self._coef = self._coef - grad * 1e-6 return self