def parallel_map(client, task, args, message, batchsize=1, background=False): """ Helper to map a function over a sequence of inputs, in parallel, with progress meter. :param client: IPython.parallel.Client instance :param task: Function :param args: Must be a list of tuples of arguments that the task function will be mapped onto. If the function takes a single argument, it still must be a 1-tuple. :param message: String for progress bar :param batchsize: Jobs are shipped in batches of this size. Higher numbers mean less network traffic, but longer execution time per job. :return: IPython.parallel.AsyncMapResult """ njobs = len(args) nproc = len(client) view = client.load_balanced_view() message += ' ({} proc)'.format(nproc) pbar = setup_progressbar(message, njobs, simple_progress=True) if not background: pbar.start() map_result = view.map(task, *list(zip(*args)), chunksize=batchsize) if background: return map_result, client while not map_result.ready(): map_result.wait(1) pbar.update(map_result.progress * batchsize) pbar.finish() return map_result
def _generic_sequential_matrix_task(task, trees, normalise): jobs = itertools.combinations(trees, 2) n_jobs = int(math.ceil(len(trees) * (len(trees)-1) / 2)) pbar = setup_progressbar("Getting inter-tree distances (seq)", n_jobs) pbar.start() results = [] for i, (t1, t2, normalise) in enumerate((t1, t2, normalise) for (t1, t2) in jobs): results.append(task(t1, t2, normalise)) pbar.update(i) pbar.finish() return scipy.spatial.distance.squareform(results)
def _generic_async_matrix_task(task, trees, normalise, batch_size=100): jobs = itertools.combinations(trees, 2) n_jobs = int(math.ceil(len(trees) * (len(trees)-1) / (2*batch_size))) pbar = setup_progressbar("Getting inter-tree distances (async)", n_jobs, simple_progress=True) pbar.start() # Split the work into batches of 'batch_size'. Each batch is executed asynchronously # -- this should work better for large amounts of quick tasks job_chunks = task.chunks(((t1, t2, normalise) for (t1, t2) in jobs), batch_size).group()() while not job_chunks.ready(): pbar.update(job_chunks.completed_count()) time.sleep(2) pbar.finish() results = job_chunks.get() return scipy.spatial.distance.squareform(flatten_list(results))
def sequential_map(task, args, message): """ Helper to map a function over a sequence of inputs, sequentially, with progress meter. :param client: IPython.parallel.Client instance :param task: Function :param args: Must be a list of tuples of arguments that the task function will be mapped onto. If the function takes a single argument, it still must be a 1-tuple. :param message: String for progress bar :param batchsize: Jobs are shipped in batches of this size. Higher numbers mean less network traffic, but longer execution time per job. :return: IPython.parallel.AsyncMapResult """ njobs = len(args) pbar = setup_progressbar(message, njobs, simple_progress=True) pbar.start() map_result = [] for (i, arglist) in enumerate(args): map_result.append(task(*arglist)) pbar.update(i) pbar.finish() return map_result