def __parallel_run_ray(self, run_async=False): """ Initializes a ray pool. Asynchronous pools are still not implemented. """ from ray.util.multiprocessing.pool import Pool def set_niceness(niceness): # pool initializer os.nice(niceness) def worker_wrapper(x): os.nice(self.parameters.get('niceness', 20)) for k, v in zip(self.parameters['parallel'], x): self.parameters[k] = v out = self.process() return out iterable_vars = list( zip(*[self.parameters[k] for k in self.parameters['parallel']])) n_cores = self.parameters.get('n_cores', 4) pool = Pool(processes=n_cores, initializer=set_niceness, initargs=(self.parameters.get('niceness', 20), ), ray_address='auto') #(Run in same host it was called) outs = pool.map(worker_wrapper, iterable_vars) return self.__process_outputs(outs)
def __init__(self, map_func, reduce_func, num_workers=None, host_address=None): """ map_func Map Function. reduce_func Reducer function. num_workers The number of workers to create in the pool. If None, then defaults to the number of CPUs available on the current host. host_address The IP address of master node. If None, then defaults to localhost. """ from ray.util.multiprocessing.pool import Pool # import within __init__() self.pool = Pool() self.map_func = map_func self.reduce_func = reduce_func
class RayMapReduce(object): def __init__(self, map_func, reduce_func, num_workers=None, host_address=None): """ map_func Map Function. reduce_func Reducer function. num_workers The number of workers to create in the pool. If None, then defaults to the number of CPUs available on the current host. host_address The IP address of master node. If None, then defaults to localhost. """ from ray.util.multiprocessing.pool import Pool # import within __init__() self.pool = Pool() self.map_func = map_func self.reduce_func = reduce_func def partition(self, mapped_values): """ Organize the mapped values by their key. Returns an unsorted sequence of tuples with a key and a sequence of values. """ partitioned_data = collections.defaultdict(list) for key, value in mapped_values: partitioned_data[key].append(value) return partitioned_data.items() def __call__(self, inputs, chunksize=1): """ Process the inputs through the map and reduce functions given. inputs An iterable containing the input data to be processed. chunksize=1 The portion of the input data to hand to each worker. This can be used to tune performance during the mapping phase. """ map_responses = self.pool.map(self.map_func, inputs, chunksize=chunksize) partitioned_data = self.partition(itertools.chain(*map_responses)) reduced_values = self.pool.map(self.reduce_func, partitioned_data) return reduced_values
def approximate_pi_distributed(num_samples): from ray.util.multiprocessing.pool import Pool # NOTE: Only the import statement is changed. pool = Pool() start = time.time() num_inside = 0 sample_batch_size = 100000 for result in pool.map(sample, [sample_batch_size for _ in range(num_samples//sample_batch_size)]): num_inside += result print("pi ~= {}".format((4*num_inside)/num_samples)) print("Finished in: {:.2f}s".format(time.time()-start))
n_burn = config["Sampler"].getint("burnin") names, par0, chain0, lnprob, sampler_type = postprocess.get_simu_parameters( run_config_path, intrinsic=False) i_map = np.where(lnprob[0, :, n_burn:] == np.max(lnprob[0, :, n_burn:])) # p_map = chain0[0, :, n_burn:, :][i_map[0][0], i_map[1][0]] # pos0 = chain0[:, :, -1, :] pos0 = chain0[:, :, i_map[1][0], :] # Deleting useless variables del chain0, lnprob # Choosing parallelization process multiproc = config["Sampler"].get("multiproc") if multiproc == 'ray': from ray.util.multiprocessing.pool import Pool pool = Pool(threads) else: pool = None if (not psd_estimation) & (not imputation): sampler = samplers.ExtendedPTMCMC(nwalkers, len(names), log_likelihood, posteriormodel.logp, ntemps=ntemps, threads=threads, pool=pool, loglargs=[par_aux0], logpargs=(lower_bounds, upper_bounds))
def fit(self, A, y_init): y = np.copy(y_init) num_samples, num_features = A.shape p = self.params losses = np.zeros(p.num_epoch + 1) # Initialization of parameters if self.x is None: self.x = np.random.normal(0, INIT_WEIGHT_STD, size=(num_features, )) self.x = np.tile(self.x, (p.n_cores, 1)).T self.z = self.x self.w = np.ones((1, p.n_cores), dtype=np.float64) self.u = np.zeros(self.x.shape, dtype=np.float64) # self.x_estimate = np.copy(self.x) self.x_hat = np.copy(self.x) # if p.method == 'old': # self.h = np.zeros_like(self.x) # alpha = 1. / (A.shape[1] / p.coordinates_to_keep + 1) # splitting data onto machines if p.distribute_data: np.random.seed(p.split_data_random_seed) num_samples_per_machine = num_samples // p.n_cores if p.split_data_strategy == 'random': all_indexes = np.arange(num_samples) np.random.shuffle(all_indexes) elif p.split_data_strategy == 'naive': all_indexes = np.arange(num_samples) elif p.split_data_strategy == 'label-sorted': all_indexes = np.argsort(y) indices = [] for machine in range(0, p.n_cores - 1): indices += [all_indexes[num_samples_per_machine * machine:\ num_samples_per_machine * (machine + 1)]] indices += [ all_indexes[num_samples_per_machine * (p.n_cores - 1):] ] print("length of indices:", len(indices)) print("length of last machine indices:", len(indices[-1])) else: num_samples_per_machine = num_samples indices = np.tile(np.arange(num_samples), (p.n_cores, 1)) # should have shape (num_machines, num_samples) # if cifar10 or mnist dataset, then make it binary if len(np.unique(y)) > 2: y[y < 5] = -1 y[y >= 5] = 1 print("Number of different labels:", len(np.unique(y))) # epoch 0 loss evaluation losses[0] = self.loss(A, y) compute_loss_every = int(num_samples_per_machine / LOSS_PER_EPOCH) all_losses = np.zeros( int(num_samples_per_machine * p.num_epoch / compute_loss_every) + 1) train_start = time.time() np.random.seed(p.random_seed) ray.init(address="auto") pool = Pool(ray_address='auto') for epoch in np.arange(p.num_epoch): for iteration in range(num_samples_per_machine): t = epoch * num_samples_per_machine + iteration # if t % compute_loss_every == 0: if t % 10 == 0: loss = self.loss(A, y) print( '{}: t = {}, epoch = {}, iter = {}, loss = {}, elapsed = {} s, transmitted = {} MiB' .format(p, t, epoch, iteration, loss, time.time() - train_start, self.transmitted / 1e6)) all_losses[t // compute_loss_every] = loss if np.isinf(loss) or np.isnan(loss): print("finish trainig") break lr = self.lr(epoch, iteration, num_samples_per_machine, num_features) # Gradient step x_plus = np.zeros_like(self.x) # for machine in range(0, p.n_cores): # sample_idx = np.random.choice(indices[machine]) # a = A[sample_idx] # x = self.x[:, machine] # z = self.z[:, machine] # if p.method == "SGP": # minus_grad = y[sample_idx] * a * sigmoid(-y[sample_idx] * a.dot(z).squeeze()) # else: # minus_grad = y[sample_idx] * a * sigmoid(-y[sample_idx] * a.dot(x).squeeze()) # if isspmatrix(a): # minus_grad = minus_grad.toarray().squeeze(0) # if p.regularizer: # minus_grad -= p.regularizer * x # x_plus[:, machine] = lr * minus_grad pool_args = [] for machine in range(0, p.n_cores): sample_idx = np.random.choice(indices[machine]) pool_args.append( (machine, A[sample_idx], y[sample_idx], lr)) tmp = pool.starmap(self.gradient, pool_args) for machine in range(0, p.n_cores): x_plus[:, machine] = tmp[machine][0] # Communication step if p.method == "plain": self.x = (self.x + x_plus).dot(self.W) self.transmitted += x_plus.nbytes elif p.method == "ea-sgd": # use with centralized topology assert p.topology == "centralized" if p.comm_period == None: # Sync self.x = self.x + x_plus - lr * p.elasticity * ( self.x - self.x_hat) self.x_hat = ( 1 - p.n_cores * p.elasticity * lr ) * self.x_hat + p.n_cores * p.elasticity * lr * self.x.dot( self.W) self.transmitted += self.x.nbytes else: # Async tmp_x = self.x if t % p.comm_period == 0: self.x = self.x - lr * p.elasticity * (tmp_x - self.x_hat) self.x_hat = self.x_hat + p.elasticity * lr * ( tmp_x.dot(self.W) - self.x_hat) self.transmitted += tmp_x.nbytes self.x += x_plus elif p.method == "SGP": self.x = (self.x + x_plus).dot(self.W) self.w = self.w.dot(self.W) self.z = self.x / self.w self.transmitted += x_plus.nbytes + self.w.nbytes elif p.method == "choco": x_plus += self.x self.x = x_plus + p.consensus_lr * self.x_hat.dot( self.W - np.eye(p.n_cores)) quantized = self.__quantize(self.x - self.x_hat) self.x_hat += quantized self.transmitted += self.x_hat.nbytes elif p.method == 'dcd-psgd': x_plus += self.x.dot(self.W) quantized = self.__quantize(x_plus - self.x) self.x += quantized self.transmitted += self.x.nbytes elif p.method == 'ecd-psgd': x_plus += self.x_hat.dot(self.W) z = (1 - 0.5 * (t + 1)) * self.x + 0.5 * (t + 1) * x_plus quantized = self.__quantize(z) self.x = np.copy(x_plus) self.x_hat = (1 - 2. / (t + 1)) * self.x_hat + 2. / (t + 1) * quantized self.transmitted += self.x_hat.nbytes self.update_estimate(t) losses[epoch + 1] = self.loss(A, y) print("epoch {}: loss {} score {}".format(epoch, losses[epoch + 1], self.score(A, y))) if np.isinf(losses[epoch + 1]) or np.isnan(losses[epoch + 1]): print("finish trainig") break print("Training took: {}s".format(time.time() - train_start)) ray.shutdown() return losses, all_losses
# sorted_tets = sort_items_prefix(book_tets, "b") # sorted_tets = sort_items_prefix(book_tets, "b") tet_dict = {} for i in book_tets: yes = 'b' + str(func_get_movie(i)) tet_dict[yes] = i list_movies = hope.index.tolist() "##################################################" "################## COMPUTE SIMILARITY BETWEEN MOVIE TETS ####################" num_movies = len(tet_dict) print(num_movies) pool = Pool(mp.cpu_count() - 2) # pool = Pool(mp.cpu_count()-2) results = (pool.map(partial(f, tet_dict=tet_dict, list_movies=list_movies, spec=spec_book), list_movies, chunksize=2000)) data = [x[1] for x in results] movies = [] for x in results: movies.append(x[0]) df = pd.DataFrame(data=data, index=movies, columns=movies).fillna(0) cols = df.columns.values.tolist()