def simulation(self, sweep_dict: Dict[str, List[Any]], states_list: List[Dict[str, Any]], configs: List[Tuple[List[Callable], List[Callable]]], env_processes: Dict[str, Callable], time_seq: range, runs: int) -> List[List[Dict[str, Any]]]: def execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run) -> List[Dict[str, Any]]: run += 1 def generate_init_sys_metrics(genesis_states_list): for d in genesis_states_list: d['run'], d['substep'], d['timestep'] = run, 0, 0 yield d states_list_copy: List[Dict[str, Any]] = list( generate_init_sys_metrics(deepcopy(states_list))) first_timestep_per_run: List[Dict[str, Any]] = self.run_pipeline( sweep_dict, states_list_copy, configs, env_processes, time_seq, run) del states_list_copy return first_timestep_per_run tp = TPool(runs) pipe_run: List[List[Dict[str, Any]]] = flatten( tp.map( lambda run: execute_run(sweep_dict, states_list, configs, env_processes, time_seq, run), list(range(runs)))) tp.clear() return pipe_run
class PandasParallelRunner: num_partitions = 10 #number of partitions to split dataframe num_cores = cpu_count() - 1 #number of cores on your machine pool = None def __init__(self): self.num_partitions = self.num_cores self.pool = ThreadPool(self.num_cores) # self.pool = ProcessPool(self.num_cores) def p_arr_run(self, tup): data, func_holder = tup for i, v in enumerate(data): data[i] = func_holder.func(v, *func_holder.args) return data def p_arr(self, arr, func_holder): arr_split = np.array_split(arr, self.num_partitions) arr = np.concatenate( self.pool.map(self.p_arr_run, product(arr_split, [func_holder]))) return arr def p_df_run(self, tup): data, func_holder = tup return data.apply(func_holder.func, args=func_holder.args) def p_df(self, df, func_holder): df_split = np.array_split(df, self.num_partitions) # df = pd.concat(self.pool.map(self.p_df_run, product(df_split, [func_holder]))) df = df.apply(func_holder.func, args=func_holder.args) return df
def __init__(self,file_list, modification_pipeline=None, use_menpo_type=False): videos=OrderedDict() file_list_filtered=[] video_getter_pool=ThreadPool(nodes=8) mpio_obj_list = video_getter_pool.map(menpo_import_video_verbose,file_list) for vp,mpio_obj in zip(file_list,mpio_obj_list): if not mpio_obj is None: videos[vp]=mpio_obj file_list_filtered.append(vp) #for vp in file_list: #print('menpo.io.import_video importing %s' % vp) #try: #mpio_obj=menpo.io.import_video(vp, exact_frame_count=False, normalise=False) #videos[vp]=mpio_obj #except Exception as err: #print('menpo.io.import_video could not import %s' % vp) #print(err) self.file_list = file_list_filtered self.videos=videos if not modification_pipeline is None: self.modification_pipeline=modification_pipeline self.fps=mpio_obj.fps self.use_menpo_type=use_menpo_type
def fit(self, words): self.coherence_scores = {} self.pairwise_probability = {} self.word_probability = {} self.pairwise_hits = {} self.word_hits = {} pool = ThreadPool(N_CPUS) pool.map(self.compute_word_hits, words) # for word_i in self.words: sorted_desc = sorted(self.word_hits.items(), key=operator.itemgetter(1), reverse=True) sorted_asc = sorted(self.word_hits.items(), key=operator.itemgetter(1)) for most_common in sorted_desc: most_common_ngram = most_common[0] most_common_hits = most_common[1] for most_rare in sorted_asc: most_rare_ngram = most_rare[0] most_rare_hits = most_rare[1] if most_common_ngram is not most_rare_ngram: if most_rare_hits < most_common_hits: pairwise_key = most_rare_ngram + "_" + most_common_ngram if pairwise_key not in self.pairwise_probability.keys( ): self.pairwise_probability[pairwise_key] = 0 self.pairwise[pairwise_key] = { "most_common_ngram": most_common_ngram, "most_common_hits": most_common_hits, "most_rare_ngram": most_rare_ngram, "most_rare_hits": most_rare_hits } pool.map(self.compute_pairwise_hits, self.pairwise.keys()) return sum(self.coherence_scores.values())
def download_top_melee_gifs(pages = 1): print('Looking for gifs on the top {} reddit pages'.format(pages)) saveDir = create_timestamped_dir() print('Will save to {}'.format(saveDir)) urls = get_melee_gif_urls(pages) print('Found {} gif urls'.format(len(urls))) pool = ThreadPool(50) results = pool.map(lambda url:download_gif_and_convert_to_images(url, saveDir), urls) print('Done downloading and converting {} gifs'.format(len(filter(None, results))))
def fit(self, words): self.coherence_scores = {} self.pairwise_probability = {} self.word_probability = {} self.pairwise_hits = {} self.word_hits = {} for word_i in words: for word_j in words: if word_i is not word_j: pairwise_key = "_".join(sorted([word_i, word_j])) if pairwise_key not in self.pairwise_probability.keys(): self.pairwise_probability[pairwise_key] = 0 self.pairwise.append(pairwise_key) pool = ThreadPool(N_CPUS) pool.map(self.compute_word_hits, words) pool.map(self.compute_pairwise_hits, self.pairwise) return sum(self.coherence_scores.values())
def __init__(self, image_generators=[], fps=None, modification_pipelines=None, structured_modification_pipelines=None, use_menpo_type=False, opts={}): self.video_lists=[] self.file_lists=[] print('GroupedVideoGenerator3()') for i in range(0,len(image_generators)): if(isinstance(image_generators[i], VideoGenerator3)): assert(isinstance(image_generators[i].videos, OrderedDict)) self.file_lists.append(image_generators[i].file_list); self.video_lists.append(image_generators[i].videos) fps=image_generators[i].fps elif(isinstance(image_generators[i],list)): video_getter_pool=ThreadPool(nodes=8) tmp_mpio_obj_list=video_getter_pool.map(menpo_import_video_verbose, image_generators[i]) tmp_mpio_obj_list = [x for x in tmp_mpio_obj_list if x is not None] safe_idxs = [idx for idx,x in enumerate(tmp_mpio_obj_list) if x is not None] tmp_file_list=image_generators[i] self.file_lists.append([tmp_file_list[safe_idx] for safe_idx in safe_idxs]) self.video_lists.append(tmp_mpio_obj_list) else: raise TypeError('You can only make a GroupedImageGenerator2 '+\ 'from a list of string-lists or ImageGenerator2s') self.file_lists[i] = [tmp for tmp in self.file_lists[i] if not tmp is None] self.fps=fps self.modification_pipelines= \ modification_pipelines if not modification_pipelines is None else {} self.structured_modification_pipelines= \ structured_modification_pipelines if not structured_modification_pipelines is None else {} self.io_pool = None self.enable_caching = 0 self.enable_pp_caching = 0 self.img_pp_cache = {} self.img_pp_oo_cache = {} self.img_cache = {} self.opts = opts self.use_menpo_type = use_menpo_type
# Copyright (c) 1997-2015 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) if __name__ == '__main__': from pathos.pools import ThreadPool as TPool tpool = TPool() print "Evaluate 10 items on 1 thread" tpool.nthreads = 1 res3 = tpool.map(host, range(10)) print tpool print '\n'.join(res3) print '' print "Evaluate 10 items on 2 threads" tpool.nthreads = 2 res5 = tpool.map(host, range(10)) print tpool print '\n'.join(res5) print '' print "Evaluate 10 items on ? threads" tpool.nthreads = None res9 = tpool.map(host, range(10)) print tpool
from pathos.helpers import freeze_support freeze_support() from pathos.pools import ProcessPool as Pool from pathos.pools import ThreadPool as TPool pool = Pool() tpool = TPool() # test 'dilled' multiprocessing for inner print("Evaluate 10 items on 2 proc:") pool.ncpus = 2 print(pool) print(pool.map(add_me, range(10))) print('') # test 'dilled' multiprocessing for lambda print("Evaluate 10 items on 4 proc:") pool.ncpus = 4 print(pool) print(pool.map(squ, range(10))) print('') # test for lambda, but with threads print("Evaluate 10 items on 4 threads:") tpool.nthreads = 4 print(tpool) print(tpool.map(squ, range(10))) print('') # end of file
def threadcompute(self, xs): pool = ThreadPool(4) results = pool.map(self.compute, xs) return results
from pathos.helpers import freeze_support freeze_support() from pathos.pools import ProcessPool as Pool from pathos.pools import ThreadPool as TPool pool = Pool() tpool = TPool() # test 'dilled' multiprocessing for inner print "Evaluate 10 items on 2 proc:" pool.ncpus = 2 print pool print pool.map(add_me, range(10)) print '' # test 'dilled' multiprocessing for lambda print "Evaluate 10 items on 4 proc:" pool.ncpus = 4 print pool print pool.map(squ, range(10)) print '' # test for lambda, but with threads print "Evaluate 10 items on 4 threads:" tpool.nthreads = 4 print tpool print tpool.map(squ, range(10)) print '' # end of file
# License: 3-clause BSD. The full license text is available at: # - https://github.com/uqfoundation/pathos/blob/master/LICENSE def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) if __name__ == '__main__': from pathos.pools import ThreadPool as TPool tpool = TPool() print("Evaluate 10 items on 1 thread") tpool.nthreads = 1 res3 = tpool.map(host, range(10)) print(tpool) print('\n'.join(res3)) print('') print("Evaluate 10 items on 2 threads") tpool.nthreads = 2 res5 = tpool.map(host, range(10)) print(tpool) print('\n'.join(res5)) print('') print("Evaluate 10 items on ? threads") tpool.nthreads = None res9 = tpool.map(host, range(10)) print(tpool)
def df_apply(df, f, pool=None, n_cpus=None, return_df=True): """Apply the function `f` to each row in `df` in a parallel fashion. """ if pool is None: if n_cpus is None: n_cpus = cpu_count() pool = ThreadPool(n_cpus) class RecordProxy: """A proxy object to wrap a `DataFrame.iat[row_i, col_i]` access model and provide a dictionary style interface. """ __df = df __field_names = list(df.columns) @classmethod def _field_i(cls, name): try: return cls.__field_names.index(name) except ValueError as e: raise KeyError( f"key '{name}' not found on record. Available keys are: {cls.__field_names}" ) @classmethod def wrap_map_func(cls, f): """Wraps the given function to be passed to a map() style function. Returns a function that expects to be called with an index value and it will call the given function passing it an object with a python dictionary style interface to the row. """ return lambda row_i: f(cls(row_i)) @property def index(self): return self.__row_i def __init__(self, row_i): self.__row_i = row_i def __getitem__(self, key): i = self._field_i(key) return self.__df.iat[self.__row_i, i] def __setitem__(self, key, value): i = self._field_i(key) self.__df.iat[self.__row_i, i] = value def get(self, key, value=None): try: i = self._field_i(key) return self.__df.iat[self.__row_i, i] except KeyError: return value def __str__(self): parts = ["Record({"] fields_repr = [] for field_name in self.__field_names: field_repr = self.__getitem__(field_name).__repr__() fields_repr.append(f"'{field_name}': {field_repr}") parts.extend(",".join(fields_repr)) parts.append("})") return "".join(parts) def dict(self, keys=None): if keys is None: keys = self.__field_names return { key: self.__df.iat[self.__row_i, i] for i, key in enumerate(self.__field_names) if key in keys } def __iter__(self): return (self.__df.iat[self.__row_i, i] for i in range(len(self.__field_names))) results = pool.map(RecordProxy.wrap_map_func(f), range(df.shape[0])) if return_df: return df else: return results