def test_netflix_sgd(ctx): U = 100 M = 100 * 100 r = 20 d = 8 P_RATING = 1000.0 / (U * M) # create random factor and value matrices Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32)) V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) V = spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING}))) for i in range(2): _ = netflix.sgd(V, Mfactor, Ufactor).force()
def test_netflix_sgd(ctx): U = 100 M = 100*100 r = 20 d = 8 P_RATING = 1000.0 / (U * M) # create random factor and value matrices Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32)) V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) V = spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING}))) for i in range(2): _ = netflix.sgd(V, Mfactor, Ufactor).evaluate()
def benchmark_convnet(ctx, timer): image_size = BASE_IMG_SIZE minibatch = 64 #minibatch = ctx.num_workers hint = util.divup(image_size, sqrt(ctx.num_workers)) tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size) util.log_info('Hint: %s', tile_hint) images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size), tile_hint=tile_hint)) w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) def _(): conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) expr.force(pool3) # force parakeet functions to compile before timing. _() for i in range(2): timer.time_op('convnet', _)
def benchmark_netflix_sgd(ctx, timer): d = ctx.num_workers V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) V = timer.time_op( 'prep', lambda: spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING})))) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) for r in [25, 50]: Mfactor = spartan.eager( spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32)) Ufactor = spartan.eager( spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32)) timer.time_op('rank %d' % r, netflix.sgd(V, Mfactor, Ufactor).force)
def benchmark_convnet(ctx, timer): image_size = BASE_IMG_SIZE minibatch = 64 #minibatch = ctx.num_workers hint = util.divup(image_size, sqrt(ctx.num_workers)) tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size) util.log_info('Hint: %s', tile_hint) images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size), tile_hint=tile_hint)) w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) def _(): conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) pool3.evaluate() # force parakeet functions to compile before timing. _() for i in range(2): timer.time_op('convnet', _)
def test_convnet(ctx): hint = util.divup(64, sqrt(ctx.num_workers)) images = expr.eager( expr.ones((N_IMGS, ) + IMG_SIZE, tile_hint=(N_IMGS, N_COLORS, hint, hint))) w1 = expr.eager( expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) w2 = expr.eager( expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) w3 = expr.eager( expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) util.log_info(pool3.shape)
def test_kmeans_expr(self): ctx = spartan.blob_ctx.get() pts = expr.rand(N_PTS, N_DIM, tile_hint=(divup(N_PTS, ctx.num_workers), N_DIM)).force() k = KMeans(N_CENTERS, ITER) k.fit(pts)
def benchmark_svm(ctx, timer): print "#worker:", ctx.num_workers max_iter = 2 #N = 200000 * ctx.num_workers N = 1000 * 64 D = 64 # create data data = expr.randn(N, D, dtype=np.float64, tile_hint=(N, util.divup(D, ctx.num_workers))) labels = expr.shuffle(data, _init_label_mapper, shape_hint=(data.shape[0], 1)) t1 = datetime.now() w = fit(data, labels, T=max_iter).force() t2 = datetime.now() util.log_warn('train time per iteration:%s ms, final w:%s', millis(t1,t2)/max_iter, w.glom().T) correct = 0 for i in range(10): new_data = expr.randn(1, D, dtype=np.float64, tile_hint=[1, D]) new_label = predict(w, new_data) #print 'point %s, predict %s' % (new_data.glom(), new_label) new_data = new_data.glom() if new_data[0,0] >= new_data[0,1] and new_label == 1.0 or new_data[0,0] < new_data[0,1] and new_label == -1.0: correct += 1 print 'predict precision:', correct * 1.0 / 10
def precompute(self): '''Precompute the most k similar items for each item. After this funcion returns. 2 attributes will be created. Attributes ------ top_k_similar_table : Numpy array of shape (N, k). Records the most k similar scores between each items. top_k_similar_indices : Numpy array of shape (N, k). Records the indices of most k similar items for each item. ''' M = self.rating_table.shape[0] N = self.rating_table.shape[1] self.similarity_table = expr.shuffle(self.rating_table, _similarity_mapper, kw={'item_norm': self._get_norm_of_each_item(self.rating_table), 'step': util.divup(self.rating_table.shape[1], blob_ctx.get().num_workers)}, shape_hint=(N, N)) # Release the memory for item_norm top_k_similar_indices = expr.zeros((N, self.k), dtype=np.int) # Find top-k similar items for each item. # Store the similarity scores into table top_k_similar table. # Store the indices of top k items into table top_k_similar_indices. cost = np.prod(top_k_similar_indices.shape) top_k_similar_table = expr.shuffle(self.similarity_table, _select_most_k_similar_mapper, kw = {'top_k_similar_indices': top_k_similar_indices, 'k': self.k}, shape_hint=(N, self.k), cost_hint={hash(top_k_similar_indices):{'00': 0, '01': cost, '10': cost, '11': cost}}) self.top_k_similar_table = top_k_similar_table.optimized().glom() self.top_k_similar_indices = top_k_similar_indices.optimized().glom()
def benchmark_netflix_sgd(ctx, timer): d = ctx.num_workers V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) V = timer.time_op( "prep", lambda: spartan.eager( spartan.tocoo(spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={"p_rating": P_RATING})) ), ) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) for r in [25, 50]: Mfactor = spartan.eager(spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32)) timer.time_op("rank %d" % r, netflix.sgd(V, Mfactor, Ufactor).force)
def benchmark_matmul(ctx, timer): N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0)) # N = 4000 M = util.divup(N, ctx.num_workers) T = util.divup(N, math.sqrt(ctx.num_workers)) util.log_info("Testing with %d workers, N = %d, tile_size=%s", ctx.num_workers, N, T) # x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) # y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) # print expr.glom(expr.dot(x, y)) # print expr.dag(expr.dot(x, y)) def _step(): expr.evaluate(expr.dot(x, y)) timer.time_op("matmul", _step)
def precompute(self): '''Precompute the most k similar items for each item. After this funcion returns. 2 attributes will be created. Attributes ------ top_k_similar_table : Numpy array of shape (N, k). Records the most k similar scores between each items. top_k_similar_indices : Numpy array of shape (N, k). Records the indices of most k similar items for each item. ''' M = self.rating_table.shape[0] N = self.rating_table.shape[1] self.similarity_table = expr.shuffle( self.rating_table, _similarity_mapper, kw={ 'item_norm': self._get_norm_of_each_item(self.rating_table), 'step': util.divup(self.rating_table.shape[1], blob_ctx.get().num_workers) }, shape_hint=(N, N)) # Release the memory for item_norm top_k_similar_indices = expr.zeros((N, self.k), dtype=np.int) # Find top-k similar items for each item. # Store the similarity scores into table top_k_similar table. # Store the indices of top k items into table top_k_similar_indices. cost = np.prod(top_k_similar_indices.shape) top_k_similar_table = expr.shuffle(self.similarity_table, _select_most_k_similar_mapper, kw={ 'top_k_similar_indices': top_k_similar_indices, 'k': self.k }, shape_hint=(N, self.k), cost_hint={ hash(top_k_similar_indices): { '00': 0, '01': cost, '10': cost, '11': cost } }) self.top_k_similar_table = top_k_similar_table.optimized().glom() self.top_k_similar_indices = top_k_similar_indices.optimized().glom()
def benchmark_matmul(ctx, timer): N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0)) #N = 4000 M = util.divup(N, ctx.num_workers) T = util.divup(N, math.sqrt(ctx.num_workers)) util.log_info('Testing with %d workers, N = %d, tile_size=%s', ctx.num_workers, N, T) #x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) #y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) #print expr.glom(expr.dot(x, y)) #print expr.dag(expr.dot(x, y)) def _step(): expr.evaluate(expr.dot(x, y)) timer.time_op('matmul', _step)
def start_cluster(num_workers, use_cluster_workers): ''' Start a cluster with ``num_workers`` workers. If use_cluster_workers is True, then use the remote workers defined in `spartan.config`. Otherwise, workers are all spawned on the localhost. :param num_workers: :param use_cluster_workers: ''' rpc.set_default_timeout(FLAGS.default_rpc_timeout) #clean the checkpoint directory if os.path.exists(FLAGS.checkpoint_path): shutil.rmtree(FLAGS.checkpoint_path) master = spartan.master.Master(FLAGS.port_base, num_workers) ssh_processes = [] if not use_cluster_workers: start_remote_worker('localhost', 0, num_workers) else: available_workers = sum([cnt for _, cnt in FLAGS.hosts]) assert available_workers >= num_workers, 'Insufficient slots to run all workers.' count = 0 num_hosts = len(FLAGS.hosts) for worker, total_tasks in FLAGS.hosts: if FLAGS.assign_mode == AssignMode.BY_CORE: sz = total_tasks else: sz = util.divup(num_workers, num_hosts) sz = min(sz, num_workers - count) ssh_processes.append(start_remote_worker(worker, count, count + sz)) count += sz if count == num_workers: break master.wait_for_initialization() # Kill the now unnecessary ssh processes. # Fegin : if we kill these processes, we can't get log from workers. #for process in ssh_processes: #process.kill() return master
def benchmark_als(ctx, timer): print "#worker:", ctx.num_workers #USER_SIZE = 100 * ctx.num_workers USER_SIZE = 320 #USER_SIZE = 200 * 64 MOVIE_SIZE = 12800 num_features = 20 num_iter = 2 A = expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5, tile_hint=(USER_SIZE, util.divup(MOVIE_SIZE, ctx.num_workers))) #A = expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5) util.log_warn('begin als!') t1 = datetime.now() U, M = als(A, implicit_feedback=True, num_features=num_features, num_iter=num_iter) U.force() M.force() t2 = datetime.now() cost_time = millis(t1,t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/num_iter)
def benchmark_pr(ctx, timer): num_pages = 300 * 1000 * 3 * ctx.num_workers num_outlinks = 10 density = num_outlinks * 1.0 / num_pages same_site_prob = 0.9 print "#worker:", ctx.num_workers col_step = util.divup(num_pages, ctx.num_workers) wts_tile_hint = [num_pages, col_step] p_tile_hint = [col_step, 1] #wts = expr.sparse_diagonal((num_pages, num_pages), dtype=np.float32, tile_hint=wts_tile_hint) #wts = expr.eager( # expr.sparse_rand((num_pages, num_pages), # density=density, # format='csr', # dtype=np.float32, # tile_hint=wts_tile_hint)) wts = pagerank_sparse(num_pages, num_outlinks, same_site_prob) #res = wts.glom().todense() #for i in range(res.shape[0]): # l = [] # for j in range(res.shape[1]): # l.append(round(res[i,j],1)) # print l #p = expr.sparse_empty((num_pages,1), dtype=np.float32, tile_hint=p_tile_hint).evaluate() #for i in range(num_pages): # p[i,0] = 1 #p = expr.sparse_rand((num_pages, 1), density=1.0, format='csc', dtype=np.float32, tile_hint=p_tile_hint) p = expr.rand(num_pages, 1).astype(np.float32) #q = expr.zeros((num_pages, 1), dtype=np.float32, tile_hint=p_tile_hint).evaluate() #q[:] = p.glom().todense() #q = expr.lazify(q) #r = expr.dot(wts, p) #print r.glom() t1 = datetime.now() sparse_multiply(wts, p, p_tile_hint) t2 = datetime.now() cost_time = millis(t1, t2) print 'current benchmark:', cost_time / num_iter / 1000
def test_stencil(ctx): st = time.time() IMG_SIZE = int(8 * math.sqrt(ctx.num_workers)) FILT_SIZE = 8 N = 8 F = 32 tile_size = util.divup(IMG_SIZE, math.sqrt(ctx.num_workers)) images = expr.ones((N, 3, IMG_SIZE, IMG_SIZE), dtype=np.float, tile_hint=(N, 3, tile_size, tile_size)) filters = expr.ones((F, 3, FILT_SIZE, FILT_SIZE), dtype=np.float, tile_hint=ONE_TILE) result = stencil.stencil(images, filters, 1) ed = time.time() print ed - st
def test_convnet(ctx): hint = util.divup(64, sqrt(ctx.num_workers)) images = expr.eager(expr.ones((N_IMGS,) + IMG_SIZE, tile_hint=(N_IMGS, N_COLORS, hint, hint))) w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) util.log_info(pool3.shape)