def __init__(self,file_list, modification_pipeline=None, use_menpo_type=False): videos=OrderedDict() file_list_filtered=[] video_getter_pool=ThreadPool(nodes=8) mpio_obj_list = video_getter_pool.map(menpo_import_video_verbose,file_list) for vp,mpio_obj in zip(file_list,mpio_obj_list): if not mpio_obj is None: videos[vp]=mpio_obj file_list_filtered.append(vp) #for vp in file_list: #print('menpo.io.import_video importing %s' % vp) #try: #mpio_obj=menpo.io.import_video(vp, exact_frame_count=False, normalise=False) #videos[vp]=mpio_obj #except Exception as err: #print('menpo.io.import_video could not import %s' % vp) #print(err) self.file_list = file_list_filtered self.videos=videos if not modification_pipeline is None: self.modification_pipeline=modification_pipeline self.fps=mpio_obj.fps self.use_menpo_type=use_menpo_type
def __init__(self, image_generators=[], fps=None, modification_pipelines=None, structured_modification_pipelines=None, use_menpo_type=False, opts={}): self.video_lists=[] self.file_lists=[] print('GroupedVideoGenerator3()') for i in range(0,len(image_generators)): if(isinstance(image_generators[i], VideoGenerator3)): assert(isinstance(image_generators[i].videos, OrderedDict)) self.file_lists.append(image_generators[i].file_list); self.video_lists.append(image_generators[i].videos) fps=image_generators[i].fps elif(isinstance(image_generators[i],list)): video_getter_pool=ThreadPool(nodes=8) tmp_mpio_obj_list=video_getter_pool.map(menpo_import_video_verbose, image_generators[i]) tmp_mpio_obj_list = [x for x in tmp_mpio_obj_list if x is not None] safe_idxs = [idx for idx,x in enumerate(tmp_mpio_obj_list) if x is not None] tmp_file_list=image_generators[i] self.file_lists.append([tmp_file_list[safe_idx] for safe_idx in safe_idxs]) self.video_lists.append(tmp_mpio_obj_list) else: raise TypeError('You can only make a GroupedImageGenerator2 '+\ 'from a list of string-lists or ImageGenerator2s') self.file_lists[i] = [tmp for tmp in self.file_lists[i] if not tmp is None] self.fps=fps self.modification_pipelines= \ modification_pipelines if not modification_pipelines is None else {} self.structured_modification_pipelines= \ structured_modification_pipelines if not structured_modification_pipelines is None else {} self.io_pool = None self.enable_caching = 0 self.enable_pp_caching = 0 self.img_pp_cache = {} self.img_pp_oo_cache = {} self.img_cache = {} self.opts = opts self.use_menpo_type = use_menpo_type
def init_thread_pool(self): self.io_pool=ThreadPool(nodes=4)
class GroupedVideoGenerator2: def __init__(self, image_generators,fps=None,modification_pipelines=None,structured_modification_pipelines=None): self.video_lists=[] self.file_lists=[] for i in range(0,len(image_generators)): if(isinstance(image_generators[i], VideoGenerator)): assert(isinstance(image_generators[i].videos, OrderedDict)) self.file_lists.append(image_generators[i].file_list); self.video_lists.append(image_generators[i].videos) fps=image_generators[i].fps elif(isinstance(image_generators[i],list)): self.file_lists.append(image_generators[i]); tmp_mpio_obj_list=map(lambda vp:menpo.io.import_video(vp, exact_frame_count=False, normalise=False), image_generators[i]) self.video_lists.append(tmp_mpio_obj_list) else: raise TypeError('You can only make a GroupedImageGenerator2 from a list of string-lists or ImageGenerator2s') self.file_lists[i] = filter(None,self.file_lists[i]); self.fps=fps self.modification_pipelines=modification_pipelines if not modification_pipelines is None else {} self.structured_modification_pipelines=structured_modification_pipelines if not structured_modification_pipelines is None else {} self.io_pool = None self.enable_caching=0; self.enable_pp_caching=0; self.img_pp_cache={} self.img_pp_oo_cache={} self.img_cache={} self.opts={} def group_lists(self,get_key_callback): new_file_lists=sbpy_utils.core.sets.group_by(self.file_lists,get_key_callback); self.file_lists=new_file_lists; reordered_video_lists=[] old_video_lists=self.video_lists for view_idx,file_list in enumerate(new_file_lists): reordered_video_list=OrderedDict() old_video_list=old_video_lists[view_idx] for g in file_list: reordered_video_list[g]=old_video_list[g] def dynamic_func(self,key,*args): fhandle = self.opts[key]; out = fhandle(*args); return out def get_sample_dims(self, pipeline_mask=[], invert_pipeline_mask=True): modified_img,_=self.get(0,0,pipeline_mask, invert_pipeline_mask) all_dims=[] for view in modified_img: dims = view[0].shape; all_dims.append(dims) return all_dims def structured_get(self, idx, time_idxs, pipeline_mask=[], invert_pipeline_mask=True): return self._get(idx, time_idxs, pipeline_mask, invert_pipeline_mask,collect_pipeline_multi_outputs=True) def get(self, idx, time_idxs, pipeline_mask=[], invert_pipeline_mask=True): return self._get(idx, time_idxs, pipeline_mask, invert_pipeline_mask,collect_pipeline_multi_outputs=False) def get_num_samples(self): return len(self.file_lists[0]); def _get(self, idx, time_idxs, pipeline_mask=[], invert_pipeline_mask=True,collect_pipeline_multi_outputs=False): segments=[] metadatas=[] all_oos=[] if isinstance(time_idxs, int): time_idxs=[time_idxs] for i in range(0,len(self.file_lists)): vid_path = self.video_lists[i].keys()[idx] video_obj = self.video_lists[i][vid_path] safe_time_idxs=[safe_idx for safe_idx in time_idxs if safe_idx <len(video_obj)] sliced_frame_list=video_obj[safe_time_idxs] if self.io_pool is None: processed_imgs=[] pipeline_oos=[] for frame in sliced_frame_list: if collect_pipeline_multi_outputs: processed_img,oo=apply_func_pipeline_masked_wrapped(menpo_grayscale_to_rgb(frame), self.structured_modification_pipelines[i],pipeline_mask, invert_pipeline_mask) pipeline_oos.append(oo) processed_imgs.append(processed_img) else: processed_img = apply_func_pipeline_masked(menpo_grayscale_to_rgb(frame), self.modification_pipelines[i],pipeline_mask, invert_pipeline_mask) processed_imgs.append(processed_img) else: if collect_pipeline_multi_outputs: frame_proc = lambda frame: apply_func_pipeline_masked_wrapped(menpo_grayscale_to_rgb(frame), self.structured_modification_pipelines[i],pipeline_mask, invert_pipeline_mask) processed_imgs_oos = self.io_pool.map(frame_proc,sliced_frame_list) processed_imgs,pipeline_oos=zip(*processed_imgs_oos) else: frame_proc = lambda frame: apply_func_pipeline_masked(menpo_grayscale_to_rgb(frame), self.modification_pipelines[i],pipeline_mask, invert_pipeline_mask) processed_imgs = self.io_pool.map(frame_proc,sliced_frame_list) metadata={'video_path':vid_path,'time_idxs':time_idxs} #if(self.enable_caching and (mij_path in self.img_cache)): #img = self.img_cache[mij_path]; ##print(mij_path+'(using cache) ; ') #elif(self.enable_caching and (not mij_path in self.img_cache)): #img = self.imreader_callback(mij_path); #self.img_cache[mij_path]=img ##print(mij_path + '(caching) ; '); #else: #img = self.imreader_callback(mij_path); ##print(mij_path+' ; '); #pp_cache_key=stringman.sanitize_string(mij_path+str(pipeline_mask)+str(invert_pipeline_mask)); #if(self.enable_pp_caching and kv_haskey(pp_cache_key,self.img_pp_cache)): #modified_img = self.img_pp_cache[pp_cache_key]; ##print(mij_path+'(using img_pp_cache) ; '); #elif(self.enable_pp_caching and not (pp_cache_key in self.img_pp_cache)): #modified_img = apply_func_pipeline_masked(img, self.modification_pipelines[i], pipeline_mask, invert_pipeline_mask); #self.img_pp_cache = kv_set(pp_cache_key,modified_img,self.img_pp_cache); ##print([mij_path,'(caching to img_pp_cache) ; ']); #else: #modified_img = apply_func_pipeline_masked(img, self.modification_pipelines[i], pipeline_mask, invert_pipeline_mask); ##print([mij_path,' ; ']); segments.append(processed_imgs); metadatas.append(metadata); all_oos.append(pipeline_oos) ret_val=(segments, metadatas) if collect_pipeline_multi_outputs: ret_val+=(all_oos,) return ret_val def start_fill(self,queuesize=10,pipeline_mask=[],invert_pipeline_mask=True,min_length=3,max_length=5,alpha=12,beta=0.5): self.read_queue = Queue(maxsize=queuesize) self.worker_threads_events=[] self.worker_threads=[] def structured_get_loop(queue_obj,event_obj): while True: print('fetching sequence') data_obj,metadatas,data_obj_oos = self.get_random_segment_group(pipeline_mask, invert_pipeline_mask,min_length,max_length,alpha,beta) queue_obj.put( {'data_obj':data_obj, 'data_paths':metadatas, 'data_obj_oos':data_obj_oos, 'metadatas':metadatas} ) event_is_set = event_obj.wait() num_threads=1 for i in range(num_threads): worker_event = threading.Event() worker_event.set() worker = threading.Thread(target=structured_get_loop, args=(self.read_queue,worker_event)) worker.setDaemon(True) self.worker_threads.append(worker) self.worker_threads_events.append(worker_event) worker.start() def get_random_segment_group_from_q(self,num_items=1): return self.structured_gets_from_q(num_items) def structured_gets_from_q(self,num_items=1): items=[] for i in range(0,num_items): res=self.read_queue.get() item_consumed_handle = self.read_queue.task_done items.append((res,item_consumed_handle)) return items def structured_get_from_q(self): return self.structured_gets_from_q() def pause_fill(self): for event_obj in self.worker_threads_events: event_obj.clear() with self.read_queue.mutex: self.read_queue.queue.clear() def resume_fill(self): for event_obj in self.worker_threads_events: event_obj.set() def gets(self, idxs, time_idxs_s, io_pool, pipeline_mask=[], invert_pipeline_mask=True): f = lambda idx,time_idxs: self.get(idx, time_idxs,pipeline_mask, invert_pipeline_mask) data_objs = io_pool.map(f,idxs,time_idxs_s) return data_objs def structured_gets(self, idxs, time_idxs_s, io_pool, pipeline_mask=[], invert_pipeline_mask=True): f = lambda idx, time_idx: self.structured_get(idx, time_idxs, pipeline_mask, invert_pipeline_mask) data_objs_tmp = io_pool.map(f,idxs,time_idxs_s) data_obj_tmp,data_paths_tmp,data_obj_oos_tmp = [list(c) for c in zip(*data_objs_tmp)] data_obj,data_paths,data_obj_oos=[list(x) for x in zip(*data_obj_tmp)],[list(x) for x in zip(*data_paths_tmp)],[list(x) for x in zip(*data_obj_oos_tmp)] return (data_obj,data_paths,data_obj_oos) def get_random_segment_group(self,pipeline_mask=[], invert_pipeline_mask=True,min_length=3,max_length=5,alpha=12,beta=0.5): file_group_idx=random.randint(0,len(self.video_lists[0].keys())-1) vid_paths=[vlist[file_group_idx] for vlist in self.file_lists] vid_lengths=map(lambda vlist,path: len(vlist[path]), self.video_lists, vid_paths) vid_length=min(vid_lengths) start_idx=random.randint(0, vid_length-min_length) segment_length=min(max_length,max(random.gammavariate(alpha,beta),min_length))*self.fps end_idx=int(np.round(start_idx+segment_length)) time_idxs=range(start_idx,end_idx) processed_segment = self.structured_get(file_group_idx,time_idxs,pipeline_mask,invert_pipeline_mask) return processed_segment def set_image_modifiers(self,modification_pipelines): self.modification_pipelines=modification_pipelines; def set_structured_image_modifiers(self,structured_modification_pipelines): self.structured_modification_pipelines=structured_modification_pipelines; def get_func_pipeline(self): return copy.deepcopy(self.modification_pipelines); def get_structured_func_pipeline(self): return copy.deepcopy(self.structured_modification_pipelines) def get_num_groups(self): return len(self.file_lists); def init_thread_pool(self): self.io_pool=ThreadPool(nodes=4) def close_thread_pool(self): self.io_pool.close()
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2016 California Institute of Technology. # Copyright (c) 2016-2019 The Uncertainty Quantification Foundation. # License: 3-clause BSD. The full license text is available at: # - https://github.com/uqfoundation/pathos/blob/master/LICENSE def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) if __name__ == '__main__': from pathos.pools import ThreadPool as TPool tpool = TPool() print("Evaluate 10 items on 1 thread") tpool.nthreads = 1 res3 = tpool.map(host, range(10)) print(tpool) print('\n'.join(res3)) print('') print("Evaluate 10 items on 2 threads") tpool.nthreads = 2 res5 = tpool.map(host, range(10)) print(tpool) print('\n'.join(res5)) print('')
# build from inner function add_me = adder(5) # build from lambda functions squ = lambda x:x**2 if __name__ is '__main__': from pathos.helpers import freeze_support freeze_support() from pathos.pools import ProcessPool as Pool from pathos.pools import ThreadPool as TPool pool = Pool() tpool = TPool() # test 'dilled' multiprocessing for inner print("Evaluate 10 items on 2 proc:") pool.ncpus = 2 print(pool) print(pool.map(add_me, range(10))) print('') # test 'dilled' multiprocessing for lambda print("Evaluate 10 items on 4 proc:") pool.ncpus = 4 print(pool) print(pool.map(squ, range(10))) print('')
xp = np.arange(N * nodes, dtype=np.float64)[::-1] print("Input: %s\n" % x) # map sin_diff to the workers, then print to screen print("Running serial python ...") y = map(sin_diff, x, xp) print("Output: %s\n" % np.asarray(y)) if HAS_PYINA: # map sin_diff to the workers, then print to screen print("Running mpi4py on %d cores..." % nodes) y = MpiPool(nodes).map(sin_diff, x, xp) print("Output: %s\n" % np.asarray(y)) # map sin_diff to the workers, then print to screen print("Running multiprocesing on %d processors..." % nodes) y = ProcessPool(nodes).map(sin_diff, x, xp) print("Output: %s\n" % np.asarray(y)) # map sin_diff to the workers, then print to screen print("Running multiprocesing on %d threads..." % nodes) y = ThreadPool(nodes).map(sins_diff, x, xp) print("Output: %s\n" % np.asarray(y)) # map sin_diff to the workers, then print to screen print("Running parallelpython on %d cpus..." % nodes) y = ParallelPool(nodes).map(sin_diff, x, xp) print("Output: %s\n" % np.asarray(y)) # EOF
print("Running serial python ...") y = map(sin2, x) print("Output: %s\n" % np.asarray(y)) if HAS_PYINA: # map sin2 to the workers, then print to screen print("Running mpi4py on %d cores..." % nodes) y = MpiPool(nodes).map(sin2, x) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running multiprocesing on %d processors..." % nodes) y = ProcessPool(nodes).map(sin2, x) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running multiprocesing on %d threads..." % nodes) y = ThreadPool(nodes).map(sin2, x) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running parallelpython on %d cpus..." % nodes) y = ParallelPool(nodes).map(sin2, x) print("Output: %s\n" % np.asarray(y)) # EOF
def f(x, y): return x * y x = range(10) y = range(5) if __name__ == '__main__': from pathos.helpers import freeze_support, shutdown freeze_support() from pathos.pools import ProcessPool, ThreadPool amap = ProcessPool().amap tmap = ThreadPool().map print(amap(f, [h(x), h(x), h(x), h(x), h(x)], y).get()) def _f(m, g, x, y): return sum(m(g, x)) * y print(amap(_f, [tmap] * len(y), [g] * len(y), [x] * len(y), y).get()) from math import sin, cos print(amap(tmap, [sin, cos], [x, x]).get()) shutdown() # EOF
def __init__(self): self.num_partitions = self.num_cores self.pool = ThreadPool(self.num_cores)
def threadcompute(self, xs): pool = ThreadPool(4) results = pool.map(self.compute, xs) return results
def orm_extract(args): """ Function for the ORMExtractParser :param args: Namespace :return: nothing """ # Load database Base = databaseManage.WebsiteBase(args.database[0]) Base.create_tables() if type(args.thread) is list: args.thread = args.thread[0] # Load data URLs = list(importData.csv_to_list(args.path[0])[1].keys()) # --------------------- # Filter the results already in database # --------------------- alreadyIn = [] for url in Base.session.query(Base.__getattribute__(args.table[0])).all(): alreadyIn.append(url.url) for url in URLs: if "http://" in url[:7]: URLs[URLs.index(url)] = url[7:] elif "https://" in url[:8]: URLs[URLs.index(url)] = url[8:] URLs = set(URLs) for url in alreadyIn: try: URLs.remove(url) except KeyError: pass logger.info("{} websites will be added to the database".format(len(URLs))) itera = iter(URLs) URLs = zip(*[itera] * args.thread) # --------------------- # Add to the database # -------------------- dBase = databaseManage.NormalizationBase("DB/norm.db") normDict = {} for norm in dBase.session.query(dBase.Normalization).all(): normDict[norm.feature] = {"data": norm.data, "normalizer": norm.normalizer, "scaler": norm.scaler} i = 1 for url in URLs: logger.debug(str(i)) logger.info("Add : {}".format(url)) i += args.thread # Create URL object result1 = ThreadPool().map(Website.website, url) result2 = [] tmp = [] for web in result1: if web.html is None: result2.append(web) # result1.remove(web) else: tmp.append(web) if args.extraction: # Extract features fct = partial(Website.website.features_extraction, normDict=normDict) ThreadPool().map(fct, tmp) result2 += tmp for web in result2: print(web) # Add in database Base.adding(web, args.table[0]) else: for web in result1: # Add in database Base.adding(web, args.table[0]) if i % ((50 // args.thread) * args.thread) == 1 and i != 1: # Get new identity with tor with Controller.from_port(port=9051) as controller: controller.authenticate() controller.signal(Signal.NEWNYM)
class GroupedVideoGenerator3: def __init__(self, image_generators=[], fps=None, modification_pipelines=None, structured_modification_pipelines=None, use_menpo_type=False, opts={}): self.video_lists=[] self.file_lists=[] print('GroupedVideoGenerator3()') for i in range(0,len(image_generators)): if(isinstance(image_generators[i], VideoGenerator3)): assert(isinstance(image_generators[i].videos, OrderedDict)) self.file_lists.append(image_generators[i].file_list); self.video_lists.append(image_generators[i].videos) fps=image_generators[i].fps elif(isinstance(image_generators[i],list)): video_getter_pool=ThreadPool(nodes=8) tmp_mpio_obj_list=video_getter_pool.map(menpo_import_video_verbose, image_generators[i]) tmp_mpio_obj_list = [x for x in tmp_mpio_obj_list if x is not None] safe_idxs = [idx for idx,x in enumerate(tmp_mpio_obj_list) if x is not None] tmp_file_list=image_generators[i] self.file_lists.append([tmp_file_list[safe_idx] for safe_idx in safe_idxs]) self.video_lists.append(tmp_mpio_obj_list) else: raise TypeError('You can only make a GroupedImageGenerator2 '+\ 'from a list of string-lists or ImageGenerator2s') self.file_lists[i] = [tmp for tmp in self.file_lists[i] if not tmp is None] self.fps=fps self.modification_pipelines= \ modification_pipelines if not modification_pipelines is None else {} self.structured_modification_pipelines= \ structured_modification_pipelines if not structured_modification_pipelines is None else {} self.io_pool = None self.enable_caching = 0 self.enable_pp_caching = 0 self.img_pp_cache = {} self.img_pp_oo_cache = {} self.img_cache = {} self.opts = opts self.use_menpo_type = use_menpo_type def copy3(self): ret_val=GroupedVideoGenerator3([], fps=self.fps, modification_pipelines=self.modification_pipelines, structured_modification_pipelines=self.structured_modification_pipelines, use_menpo_type=self.use_menpo_type, opts=self.opts) ret_val.file_lists = copy.copy(self.file_lists) ret_val.video_lists = copy.copy(self.video_lists) return ret_val def group_lists(self,get_key_callback): new_file_lists=sbpy_utils.core.sets.group_by(self.file_lists,get_key_callback); self.file_lists=new_file_lists; reordered_video_lists=[] old_video_lists=self.video_lists for view_idx,file_list in enumerate(new_file_lists): reordered_video_list=OrderedDict() old_video_list=old_video_lists[view_idx] for g in file_list: reordered_video_list[g]=old_video_list[g] def dynamic_func(self,key,*args): fhandle = self.opts[key]; out = fhandle(*args); return out def get_sample_dims(self, pipeline_mask=[], invert_pipeline_mask=True): vidgroup_obj=self.get(0,0,pipeline_mask, invert_pipeline_mask) return vidgroup_obj.shape() def structured_get(self, idx, time_idxs, pipeline_mask=[], invert_pipeline_mask=True): return self._get(idx, time_idxs, pipeline_mask, invert_pipeline_mask,collect_pipeline_multi_outputs=True) def get(self, idx, time_idxs, pipeline_mask=[], invert_pipeline_mask=True): return self._get(idx, time_idxs, pipeline_mask, invert_pipeline_mask,collect_pipeline_multi_outputs=False) def get_num_samples(self): return len(self.file_lists[0]); def _get(self, idx, time_idxs, pipeline_mask=[], invert_pipeline_mask=True,collect_pipeline_multi_outputs=False): segments=[] metadatas=[] all_oos=[] if isinstance(time_idxs, int): time_idxs=[time_idxs] img_type_converter = (lambda x:x) if self.use_menpo_type else menpo_to_rgb for i in range(0,len(self.file_lists)): vid_path = self.video_lists[i].keys()[idx] video_obj = self.video_lists[i][vid_path] safe_time_idxs=[safe_idx for safe_idx in time_idxs if safe_idx <len(video_obj)] sliced_frame_list=video_obj[safe_time_idxs] if self.io_pool is None: processed_imgs=[] pipeline_oos=[] for frame in sliced_frame_list: if collect_pipeline_multi_outputs: processed_img,oo=apply_func_pipeline_masked_wrapped(img_type_converter(frame), self.structured_modification_pipelines[i],pipeline_mask, invert_pipeline_mask) pipeline_oos.append(oo) processed_imgs.append(processed_img) else: processed_img = apply_func_pipeline_masked(img_type_converter(frame), self.modification_pipelines[i],pipeline_mask, invert_pipeline_mask) processed_imgs.append(processed_img) else: if collect_pipeline_multi_outputs: frame_proc = lambda frame: apply_func_pipeline_masked_wrapped(img_type_converter(frame), self.structured_modification_pipelines[i],pipeline_mask, invert_pipeline_mask) processed_imgs_oos = self.io_pool.map(frame_proc,sliced_frame_list) processed_imgs,pipeline_oos=zip(*processed_imgs_oos) else: frame_proc = lambda frame: apply_func_pipeline_masked(img_type_converter(frame), self.modification_pipelines[i],pipeline_mask, invert_pipeline_mask) processed_imgs = self.io_pool.map(frame_proc,sliced_frame_list) #if(self.enable_caching and (mij_path in self.img_cache)): #img = self.img_cache[mij_path]; ##print(mij_path+'(using cache) ; ') #elif(self.enable_caching and (not mij_path in self.img_cache)): #img = self.imreader_callback(mij_path); #self.img_cache[mij_path]=img ##print(mij_path + '(caching) ; '); #else: #img = self.imreader_callback(mij_path); ##print(mij_path+' ; '); #pp_cache_key=stringman.sanitize_string(mij_path+str(pipeline_mask)+str(invert_pipeline_mask)); #if(self.enable_pp_caching and kv_haskey(pp_cache_key,self.img_pp_cache)): #modified_img = self.img_pp_cache[pp_cache_key]; ##print(mij_path+'(using img_pp_cache) ; '); #elif(self.enable_pp_caching and not (pp_cache_key in self.img_pp_cache)): #modified_img = apply_func_pipeline_masked(img, self.modification_pipelines[i], pipeline_mask, invert_pipeline_mask); #self.img_pp_cache = kv_set(pp_cache_key,modified_img,self.img_pp_cache); ##print([mij_path,'(caching to img_pp_cache) ; ']); #else: #modified_img = apply_func_pipeline_masked(img, self.modification_pipelines[i], pipeline_mask, invert_pipeline_mask); ##print([mij_path,' ; ']); if collect_pipeline_multi_outputs: per_frame_extra_data=pipeline_oos else: per_frame_extra_data=None new_vid_obj=Video(processed_imgs, video_path=vid_path, frame_idxs=safe_time_idxs, per_frame_extra_data=per_frame_extra_data) segments.append(new_vid_obj); return VideoGroup(segments) def start_fill(self,queuesize=10,pipeline_mask=[],invert_pipeline_mask=True,min_length=3,max_length=5,alpha=12,beta=0.5): self.read_queue = Queue(maxsize=queuesize) self.worker_threads_events=[] self.worker_threads=[] def structured_get_loop(queue_obj,event_obj): while True: grouped_video_lists = self.get_random_segment_group(pipeline_mask, invert_pipeline_mask,min_length,max_length,alpha,beta) queue_obj.put( grouped_video_lists ) event_is_set = event_obj.wait() num_threads=1 for i in range(num_threads): worker_event = threading.Event() worker_event.set() worker = threading.Thread(target=structured_get_loop, args=(self.read_queue,worker_event)) worker.setDaemon(True) self.worker_threads.append(worker) self.worker_threads_events.append(worker_event) worker.start() def get_random_segment_group_from_q(self,num_items=1): return self.structured_gets_from_q(num_items) def structured_gets_from_q(self,num_items=1): video_groups=[] for i in range(0,num_items): video_group=self.read_queue.get() video_groups.append(video_group) return (VideoGroupList(video_groups).as_GroupedVideoLists(),[self.read_queue.task_done]*num_items) def structured_get_from_q(self): return self.structured_gets_from_q() def pause_fill(self): for event_obj in self.worker_threads_events: event_obj.clear() with self.read_queue.mutex: self.read_queue.queue.clear() def resume_fill(self): for event_obj in self.worker_threads_events: event_obj.set() def gets(self, idxs, time_idxs_s, io_pool, pipeline_mask=[], invert_pipeline_mask=True): f = lambda idx,time_idxs: self.get(idx, time_idxs,pipeline_mask, invert_pipeline_mask) data_objs = io_pool.map(f,idxs,time_idxs_s) return data_objs def structured_gets(self, idxs, time_idxs_s, io_pool, pipeline_mask=[], invert_pipeline_mask=True): f = lambda idx, time_idx: self.structured_get(idx, time_idxs, pipeline_mask, invert_pipeline_mask) video_group_list = io_pool.map(f,idxs,time_idxs_s) return video_group_list.as_GroupedVideoLists() def get_random_segment_group(self,pipeline_mask=[], invert_pipeline_mask=True,min_length=3,max_length=5,alpha=12,beta=0.5): file_group_idx=random.randint(0,len(self.video_lists[0].keys())-1) vid_paths=[vlist[file_group_idx] for vlist in self.file_lists] vid_lengths=map(lambda vlist,path: len(vlist[path]), self.video_lists, vid_paths) vid_length=min(vid_lengths) start_idx=random.randint(0, vid_length-min_length) segment_length=min(max_length,max(random.gammavariate(alpha,beta),min_length))*self.fps end_idx=int(np.round(start_idx+segment_length)) time_idxs=range(start_idx,end_idx) #print('intended_segment length: '+str(len(time_idxs))) processed_segment = self.structured_get(file_group_idx,time_idxs,pipeline_mask,invert_pipeline_mask) print('processed_segment length: '+str(len(processed_segment))) return processed_segment def set_image_modifiers(self,modification_pipelines): self.modification_pipelines=modification_pipelines; def set_structured_image_modifiers(self,structured_modification_pipelines): self.structured_modification_pipelines=structured_modification_pipelines; def get_func_pipeline(self): return copy.deepcopy(self.modification_pipelines); def get_structured_func_pipeline(self): return copy.deepcopy(self.structured_modification_pipelines) def get_num_groups(self): return len(self.file_lists); def init_thread_pool(self): self.io_pool=ThreadPool(nodes=4) def close_thread_pool(self): self.io_pool.close()
# build from inner function add_me = adder(5) # build from lambda functions squ = lambda x: x**2 if __name__ == '__main__': from pathos.helpers import freeze_support, shutdown freeze_support() from pathos.pools import ProcessPool as Pool from pathos.pools import ThreadPool as TPool pool = Pool() tpool = TPool() # test 'dilled' multiprocessing for inner print("Evaluate 10 items on 2 proc:") pool.ncpus = 2 print(pool) print(pool.map(add_me, range(10))) print('') # test 'dilled' multiprocessing for lambda print("Evaluate 10 items on 4 proc:") pool.ncpus = 4 print(pool) print(pool.map(squ, range(10))) print('')
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 1997-2015 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/pathos/LICENSE def host(id): import socket return "Rank: %d -- %s" % (id, socket.gethostname()) if __name__ == '__main__': from pathos.pools import ThreadPool as TPool tpool = TPool() print "Evaluate 10 items on 1 thread" tpool.nthreads = 1 res3 = tpool.map(host, range(10)) print tpool print '\n'.join(res3) print '' print "Evaluate 10 items on 2 threads" tpool.nthreads = 2 res5 = tpool.map(host, range(10)) print tpool print '\n'.join(res5) print '' print "Evaluate 10 items on ? threads"
def df_apply(df, f, pool=None, n_cpus=None, return_df=True): """Apply the function `f` to each row in `df` in a parallel fashion. """ if pool is None: if n_cpus is None: n_cpus = cpu_count() pool = ThreadPool(n_cpus) class RecordProxy: """A proxy object to wrap a `DataFrame.iat[row_i, col_i]` access model and provide a dictionary style interface. """ __df = df __field_names = list(df.columns) @classmethod def _field_i(cls, name): try: return cls.__field_names.index(name) except ValueError as e: raise KeyError( f"key '{name}' not found on record. Available keys are: {cls.__field_names}" ) @classmethod def wrap_map_func(cls, f): """Wraps the given function to be passed to a map() style function. Returns a function that expects to be called with an index value and it will call the given function passing it an object with a python dictionary style interface to the row. """ return lambda row_i: f(cls(row_i)) @property def index(self): return self.__row_i def __init__(self, row_i): self.__row_i = row_i def __getitem__(self, key): i = self._field_i(key) return self.__df.iat[self.__row_i, i] def __setitem__(self, key, value): i = self._field_i(key) self.__df.iat[self.__row_i, i] = value def get(self, key, value=None): try: i = self._field_i(key) return self.__df.iat[self.__row_i, i] except KeyError: return value def __str__(self): parts = ["Record({"] fields_repr = [] for field_name in self.__field_names: field_repr = self.__getitem__(field_name).__repr__() fields_repr.append(f"'{field_name}': {field_repr}") parts.extend(",".join(fields_repr)) parts.append("})") return "".join(parts) def dict(self, keys=None): if keys is None: keys = self.__field_names return { key: self.__df.iat[self.__row_i, i] for i, key in enumerate(self.__field_names) if key in keys } def __iter__(self): return (self.__df.iat[self.__row_i, i] for i in range(len(self.__field_names))) results = pool.map(RecordProxy.wrap_map_func(f), range(df.shape[0])) if return_df: return df else: return results