def shortestPath(ctx, dim, numIters): dist = eager( expr.shuffle( expr.ndarray( (dim, 1), dtype = np.int64, tile_hint = (dim / ctx.num_workers, 1) ), make_dist, ) ) linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim, dim / ctx.num_workers)), make_matrix, )) startCompute = time.time() for it in range(numIters): first = expr.add(dist, linkMatrix) second = first.min(axis = 0) dist = second.reshape(dim, 1) dist.evaluate() endCompute = time.time() return endCompute - startCompute
def test_netflix_sgd(ctx): U = 100 M = 100 * 100 r = 20 d = 8 P_RATING = 1000.0 / (U * M) # create random factor and value matrices Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32)) V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) V = spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING}))) for i in range(2): _ = netflix.sgd(V, Mfactor, Ufactor).force()
def bfs(ctx, dim): util.log_info("start to computing......") sGenerate = time.time() current = eager( expr.shuffle( expr.ndarray( (dim, 1), dtype = np.int64, tile_hint = (dim / ctx.num_workers, 1)), make_current, )) linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim, dim / ctx.num_workers)), make_matrix, )) eGenerate = time.time() startCompute = time.time() while(True): next = expr.dot(linkMatrix, current) formerNum = expr.count_nonzero(current) laterNum = expr.count_nonzero(next) hasNew = expr.equal(formerNum, laterNum).glom() current = next if (hasNew): break current.evaluate() endCompute = time.time() return (eGenerate - sGenerate, endCompute - startCompute)
def test_netflix_sgd(ctx): U = 100 M = 100*100 r = 20 d = 8 P_RATING = 1000.0 / (U * M) # create random factor and value matrices Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32)) V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) V = spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING}))) for i in range(2): _ = netflix.sgd(V, Mfactor, Ufactor).evaluate()
def benchmark_netflix_sgd(ctx, timer): d = ctx.num_workers V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) V = timer.time_op( 'prep', lambda: spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING})))) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) for r in [25, 50]: Mfactor = spartan.eager( spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32)) Ufactor = spartan.eager( spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32)) timer.time_op('rank %d' % r, netflix.sgd(V, Mfactor, Ufactor).force)
def connectedConponents(ctx, dim, numIters): linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim / ctx.num_workers, dim)), make_matrix, )) power = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim / ctx.num_workers, dim)), make_matrix, )) eye = expr.eye(dim, tile_hint = (dim / ctx.num_workers,dim)) startCompute = time.time() result = expr.logical_or(eye, linkMatrix).optimized().glom() for i in range(numIters): power = expr.dot(power, linkMatrix).optimized().glom() result = expr.logical_or(result, power) result.optimized().glom() final = expr.logical_and(result, expr.transpose(result.optimized())).optimized().evaluate() endCompute = time.time() return endCompute - startCompute
def benchmark_pagerank(ctx, timer): num_pages = PAGES_PER_WORKER * ctx.num_workers util.log_info('Total pages: %s', num_pages) wts = eager( expr.shuffle( expr.ndarray( (num_pages, num_pages), dtype=np.float32, tile_hint=(num_pages, PAGES_PER_WORKER / 8)), make_weights, )) p = eager(expr.ones((num_pages, 1), tile_hint=(PAGES_PER_WORKER / 8, 1), dtype=np.float32)) for i in range(3): timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
def saveAsTextFile(ctx, dim): matrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int32, tile_hint = (dim, dim / ctx.num_workers)), #tile_hint = (2, 2)), make_matrix, ))
def benchmark_pagerank(ctx, timer): num_pages = PAGES_PER_WORKER * ctx.num_workers util.log_info('Total pages: %s', num_pages) wts = eager( expr.shuffle( expr.ndarray((num_pages, num_pages), dtype=np.float32, tile_hint=(num_pages, PAGES_PER_WORKER / 8)), make_weights, )) p = eager( expr.ones((num_pages, 1), tile_hint=(PAGES_PER_WORKER / 8, 1), dtype=np.float32)) for i in range(3): timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
def benchmark_netflix_sgd(ctx, timer): d = ctx.num_workers V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) V = timer.time_op( "prep", lambda: spartan.eager( spartan.tocoo(spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={"p_rating": P_RATING})) ), ) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) for r in [25, 50]: Mfactor = spartan.eager(spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32)) timer.time_op("rank %d" % r, netflix.sgd(V, Mfactor, Ufactor).force)
def pagerankDistributed(ctx, numPage, numIters, alpha): sGenerate = time.time() rank = eager(expr.ones((numPage, 1), tile_hint = (numPage / ctx.num_workers, 1), dtype = np.float32)) linkMatrix = eager( expr.shuffle( expr.ndarray( (numPage, numPage), dtype = np.float32, tile_hint = (numPage, numPage / ctx.num_workers)), make_weights, )) eGenerate = time.time() util.log_info("**pagerank** rank init finished") startCompute = time.time() for i in range(numIters): #rank = ((1 - alpha) * expr.dot(linkMatrix, rank,tile_hint = (numPage, numPage/10))) + belta rank = expr.dot(linkMatrix, rank, tile_hint = (numPage, numPage/10)) rank.evaluate() endCompute = time.time() util.log_info("**pagerank** compute finished") return (eGenerate - sGenerate, endCompute - startCompute)
def benchmark_stdev(ctx, timer): X = S.eager(S.randn(ctx.num_workers, W, H)) timer.benchmark_op(lambda: highlight_image(X).optimized().force())
def loadMatrix(ctx, filePath, numPage): #return eager(expr.from_file(filePath, tile_hint = (numPage, numPage / 8))) return eager(expr.from_file_parallel(filePath, "numpy", sparse = True, tile_hint = (numPage, numPage/10)))
def benchmark_stdev(ctx, timer): X = S.eager(S.randn(ctx.num_workers, W, H)) timer.benchmark_op(lambda: highlight_image(X).optimized().evaluate())