def calculate_prob(hole_cards, num_iterations, given_board): import itertools #must pip these library from multiprocess import Pool import dill as pickle #creates 4 threads p = Pool(4) deck_cards = prob_functions.generate_deck(hole_cards) possible_card_pairings = tuple(itertools.combinations(deck_cards, 2)) card_combos = map( lambda x: tuple (list(hole_cards) + [x]), possible_card_pairings) s = pickle.dumps(lambda hc: single_prob(hc, num_iterations, given_board)) f = pickle.loads(s) prob_list = p.map( f , card_combos) tie = 0 win = 0 for prob in prob_list: tie += prob[0] win += prob[1] l = len(prob_list) tie = tie / l win = win / l return (tie,win)
def pcall_mp(fun,args,cores=cores): """Calls a function for every input in args""" mainpool = Pool(cores) # create pool # print("Using",cores,"cores") out = mainpool.map(fun,args) # return list mainpool.terminate() del mainpool # delete pool return out
def inner(*args): pool = Pool(processes=1) res = pool.apply_async(f,args) try: v = res.get(timeout=sec) except Exception as inst: print(inst) v = None finally: pool.terminate() return v
def __init__(self): super(GroupCheckerGui, self).__init__('Group Checker') self._group_name = ControlText('Group Name', CONFIG['group_name']) self._group_name.enabled = False self._allowed_tags = UnicodeControlList('Allowed Tags', plusFunction=self.__add_tag_action, minusFunction=self.__remove_tag_action) self.allowed_tags = GuiList(CONFIG['white_filters']['SubstringFilter']['substrings'], self._allowed_tags) self._allowed_ids = ControlList('Allowed Ids', plusFunction=self.__add_id_action, minusFunction=self.__remove_id_action) self.allowed_ids = GuiList(CONFIG['white_filters']['SignerFilter']['ids'], self._allowed_ids) self._bad_posts = ControlCheckBoxList('Bad posts') self._bad_posts._form.listWidget.itemDoubleClicked.connect(self.__show_link_action) self._remove_button = ControlButton('Remove') self._remove_button.value = self.__remove_action self._show_button = ControlButton('Show bad posts') self._show_button.value = self.__show_bad_post_action self.pool = Pool(processes=1) self.bad_posts = [] self._formset = [('', '_group_name', ''), ('', '_allowed_tags', '_allowed_ids', ''), '', ('', '_bad_posts', ''), ('', '_remove_button', '_show_button', ''), '']
def download_image_thread(location_q, image_q, MAX_DL_THREADS=10): print("Running Download Image Thread.") max_processes = MAX_DL_THREADS print("Creating a thread pool of size {} for downloading images...".format(max_processes)) pool = Pool(processes=max_processes) # Allow us to have n processes runnning, and n processes scheduled to run # TODO: Manager is not necessary here, but is used to get around the fact # that thread-safe objects cannot be passed by reference, they must be # inheretence. A more lightweight solution should be found workers = Manager().Semaphore(max_processes*2) def async_download(location): image = download_image(location) image_q.put((location, image), True) workers.release() while True: location = location_q.get(True) workers.acquire() pool.apply_async(async_download, (location,))
class ProcessPoolExecutor(Executor): """Process Pool Executor""" def __init__(self): super(ProcessPoolExecutor, self).__init__() import os from multiprocess import Pool self.pool = Pool(os.cpu_count() or 1) def submit(self, func, *args, **kwargs): from concurrent.futures import Future fut = Future() self.tasks[fut] = self.pool.apply_async( func, args, kwargs, fut.set_result, fut.set_exception ) fut.add_done_callback(self.tasks.pop) return fut def shutdown(self, wait=True): super(ProcessPoolExecutor, self).shutdown(wait) self.pool.terminate() self.pool.join()
def __init__(self, storage, threads): # Manager for concurrency self.manager = Manager() # System storage self.storage = storage # Queues self.high_access = self.manager.list([]) self.normal_access = self.manager.list([]) self._pool = Pool(processes=threads) # Operations self.operation_table = self.manager.dict()
def get_new_tickets(self, from_time=utils.pre_day_to_string(1)): search_conditions = { "skip": 0, "query": { "ctimeGte": "{}T21:00:00.000Z".format(from_time) } } pool_size = multiprocess.cpu_count() pool_volume = 10 * pool_size index = 0 tickets_num = self._get_number_of_tickets(from_time, to_time) req_num = utils.ceil_division(tickets_num, 1000) pool = Pool(pool_size) for req_count in range(req_num): search_tickets = self.search_tickets(search_conditions) while True: tickets = pool.map(self.add_attr_to_ticket, itertools.islice(search_tickets, pool_volume)) if tickets: print('Downloaded {}/{} tickets'.format(index, tickets_num), end='\r') index += pool_volume yield tickets else: break search_conditions['skip'] += 1000
def zte_gpon_svlan_check(): clear_log() nodes = graph.cypher.execute( "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)") olts = ((x[0], x[1]) for x in nodes) lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1]) pool = Pool(8) lock = Manager().Lock() func = partial(svlan_entry, lock) list(pool.map(compose(func, lzte_gpon_svlan), olts)) pool.close() pool.join()
def prime_calculate(self): break_points = [] # List that will have start and stopping points for i in range(cores): # Creates start and stopping points based on length of range_finish break_points.append( {"start": int(math.ceil(((self.maximum_prime + 1) + 0.0) / cores * i)), "stop": int(math.ceil(((self.maximum_prime + 1) + 0.0) / cores * (i + 1)))}) p = Pool(cores) # Number of processes to create. for i in break_points: # Cycles though the breakpoints list created above. a = p.apply_async(self.prime_calculator, kwds=i, args=tuple(), callback=self.update_num) # This will start the separate processes. p.close() # Prevents any more processes being started p.join() # Waits for worker process to end
def interface_check_m(): clear_log() # cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model" cmd = "match(s: Switch) return s.ip, s.model" # cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2" nodes = graph.cypher.execute(cmd) switchs = [(x[0], x[1]) for x in nodes] pool = Pool(16) lock = Manager().Lock() out_inf = partial(output_interface_m, lock) list(pool.map(compose(out_inf, get_interface), switchs)) pool.close() pool.join()
def svlan_check(): clear_log() # nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246') nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='company', property_value='zte') olts = [(x['ip'], x['company'], x['area']) for x in nodes] # list(map(compose(card_entry, get_card), olts)) pool = Pool(16) lock = Manager().Lock() func = partial(svlan_entry, lock) list(pool.map(compose(func, get_svlan), olts)) pool.close() pool.join()
def add_infs(): funcs = {'zte': Zte.get_infs, 'hw': Huawei.get_infs} get_infs = partial(_company, funcs) clear_log() nodes = graph.cypher.execute( 'match (n:Olt) return n.ip as ip,n.company as company') olts = [dict(ip=x['ip'], company=x['company']) for x in nodes] pool = Pool(128) lock = Manager().Lock() _add_infs_p = partial(_add_infs, lock) list(pool.map(compose(_add_infs_p, get_infs), olts)) pool.close() pool.join()
def main(args): filedate = args.filedate database = args.database slablist = ['alu','cal','cam','car','cas','cot','hal','hel','him','hin','izu','jap','ker','kur','mak','man','mue','pam','png','phi','puy','ryu','sam','sco','sol','sul','sum','van'] indices = range(len(slablist)) pool1 = Pool(args.nCores) partial_loop1 = partial(calls2d, database, filedate, slablist) pts = pool1.map(partial_loop1, indices) pool1.close() pool1.join()
def hostname_check(): clear_log() nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46') olts = [(x['ip'], x['company']) for x in nodes] pool = Pool(16) lock = Manager().Lock() func = partial(hostname_entry, lock) list(pool.map(compose(func, get_hostname), olts)) pool.close() pool.join() ip_hostname = (x.split(',') for x in open(result_file)) cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}" list(map(lambda x: graph.cypher.execute( cmd, ip=x[0], hostname=x[1]), ip_hostname))
def get_vlan_usersP(bras): def _get_vlan_users(bas): funcs = {'m6k': M6k.get_vlan_users, 'me60': ME60.get_vlan_users} _gvu = partial(_model, funcs) return _gvu(bas) bras = [dict(ip=x[0], model=x[1], inf=x[2]) for x in bras] pool = Pool(len(bras)) temp = pool.map(_get_vlan_users, bras) pool.close() pool.join() temp = [x[1] for x in temp if x[1]] rslt = reduce(lambda x, y: merge_with(sum, x, y), temp) return rslt
def calculate(self, data): t1 = dt.datetime.utcnow() LOGGER.info('Starting calculation...') self._data = deepcopy(data) self._check_inputs(data) dep = self._dependencies() sorted_dep = topological_sort(dep) for items in sorted_dep: # loading node with inputs for item in items: node = self._get_node(item) args = [i_name for i_name in node.input_names if i_name not in node.kwargs] data_to_pass = [] for arg in args: data_to_pass.append(self._data[arg]) kwargs_to_pass = {} for kwarg in node.kwargs: kwargs_to_pass[kwarg] = self._data[kwarg] node.load_inputs(data_to_pass, kwargs_to_pass) # running nodes if self._parallel: pool = Pool(self._pool_size) results = pool.map( Graph.run_node, [self._get_node(i) for i in items] ) pool.close() pool.join() results = {k: v for k, v in results} else: results = {} for item in items: node = self._get_node(item) res = node.run_with_loaded_inputs() results[node.id] = res # save results for item in items: node = self._get_node(item) res = results[node.id] if len(node.output_names) == 1: self._data[node.output_names[0]] = res else: for i, out in enumerate(node.output_names): self._data[out] = res[i] t2 = dt.datetime.utcnow() LOGGER.info('Calculation finished in {}'.format(t2-t1)) return res
def zhongji_check(): clear_log() nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46') olts = [(x['ip'], x['company']) for x in nodes] pool = Pool(16) lock = Manager().Lock() func = partial(zhongji_entry, lock) list(pool.map(compose(func, get_zhongji), olts)) pool.close() pool.join() ports = (x.split(',') for x in open(result_file)) cmd = """match(n: Olt) where n.ip = {ip} merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) merge(m) - [:Include]->(p: Port{name: {interface}})""" list(map(lambda x: graph.cypher.execute( cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
def parallel_cdist(data1, data2, n_rows_per_job=100): from scipy.spatial.distance import cdist data1 = np.array(data1) data2 = np.array(data2) pool = Pool(12) start_indices = np.arange(0, data1.shape[0], n_rows_per_job) end_indices = start_indices + n_rows_per_job - 1 partial_distance_matrices = pool.map(lambda (si, ei): cdist(data1[si:ei+1].copy(), data2), zip(start_indices, end_indices)) pool.close() pool.join() distance_matrix = np.concatenate(partial_distance_matrices) return distance_matrix
def eval_EFG(self,x,num_procs=None,info=False): from multiprocess import Pool,cpu_count if not num_procs: num_procs = cpu_count() num_samples = self.parameters['num_samples'] pool = Pool(num_procs) num = int(np.ceil(float(num_samples)/float(num_procs))) results = list(zip(*pool.map(lambda i: self.eval_EFG_sequential(x,num,i,info),range(num_procs),chunksize=1))) pool.terminate() pool.join() if not info: assert(len(results) == 4) else: assert(len(results) == 5) assert(all([len(vals) == num_procs for vals in results])) return [sum(vals)/float(num_procs) for vals in results]
def add_power_info(): funcs = {'S8508': S85.get_power_info, 'S8505': S85.get_power_info, 'T64G': T64.get_power_info, 'S8905': S89.get_power_info, 'S8905E': S8905E.get_power_info, 'S9306': S93.get_power_info, 'S9303': S93.get_power_info} get_power_info = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model") switches = [dict(ip=x['ip'], model=x['model']) for x in nodes] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_power_info, lock) list(pool.map(compose(_ff, get_power_info), switches)) pool.close() pool.join()
def add_traffics(): funcs = {'S8508': S85.get_traffics, 'S8505': S85.get_traffics, 'T64G': T64.get_traffics, 'S8905': S89.get_traffics, 'S8905E': S8905E.get_traffics, 'S9306': S93.get_traffics, 'S9303': S93.get_traffics} get_traffics = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model") switchs = [dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_traffics, lock) list(pool.map(compose(_ff, get_traffics), switchs)) pool.close() pool.join()
def compute_jaccard_pairwise(indices, square_form=True, parallel=True, return_poses=False): n = len(indices) if parallel: pool = Pool(16) scores_poses_tuples = pool.map(lambda x: compute_jaccard_i_vs_list(x[0],x[1]), [(indices[i], indices[i+1:]) for i in range(n)]) pool.close() pool.join() else: scores_poses_tuples = [compute_jaccard_i_vs_list(indices[i], indices[i+1:]) for i in range(n)] pairwise_scores = np.array([scores for scores, poses in scores_poses_tuples]) if square_form: pairwise_scores = squareform(np.concatenate(pairwise_scores)) if return_poses: poses = np.array([poses for scores, poses in scores_poses_tuples]) return pairwise_scores, poses else: return pairwise_scores
def eval_EQ(self,p,num_procs=None,quiet=True): """ Evaluates E[Q(p,r)] and its gradient in parallel. Parameters ---------- p : generator powers num_procs : number of parallel processes quiet : flag """ from multiprocess import Pool,cpu_count if not num_procs: num_procs = cpu_count() num_samples = self.parameters['num_samples'] pool = Pool(num_procs) num = int(np.ceil(float(num_samples)/float(num_procs))) results = list(zip(*pool.map(lambda i: self.eval_EQ_sequential(p,num,i,quiet),range(num_procs),chunksize=1))) pool.terminate() pool.join() assert(len(results) == 2) assert(all([len(vals) == num_procs for vals in results])) return [sum(vals)/float(num_procs) for vals in results]
labelmap_fp = os.path.splitext( input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg) bp.pack_ndarray_file(big_labelmap, labelmap_fp) # for tile_i in range(12): # execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d.tif' % \ # dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i)) # execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d_labelmap_cellprofiler.bp' % \ # dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i)) # Generate labelmap viz t = time.time() viz = img_as_ubyte(label2rgb(big_labelmap, bg_label=0, bg_color=(0, 0, 0))) cv2.imwrite( os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.png' % dict(alg=alg), viz) sys.stderr.write('Generate labelmap viz: %.2f seconds.\n' % (time.time() - t)) # 60s t = time.time() pool = Pool(12) pool.map(detect_cells, range(first_sec, last_sec + 1)) pool.close() pool.join() sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time() - t))
w = w_tb h = h_tb else: raise # input_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=5, version=version, resol='raw') out_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=2, resol=resol, version=version) print 'out_dir:', out_dir # script = os.path.join(REPO_DIR, 'preprocess', 'warp_crop_IM_v3.py') # ! rm -rf {out_dir} create_if_not_exists(out_dir) t = time.time() pool = Pool(8) _ = pool.map(lambda img_name: crop(stack=stack, img_name=img_name, version=version, resol=resol, x=x, y=y, w=w, h=h), metadata_cache['valid_filenames'][stack]) pool.close() pool.join() # for img_name in metadata_cache['valid_filenames'][stack]: # f(stack=stack, img_name=img_name, version=version, resol=resol, # x=x, y=y, w=w, h=h) # run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d \"%%(output_fp)s\"' % \ # {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw}, # kwargs_list=[{'input_fp': DataManager.get_image_filepath_v2(stack=stack, prep_id=5, resol='raw', version=version, fn=img_name), # 'output_fp': DataManager.get_image_filepath_v2(stack=stack, fn=img_name, prep_id=2, version=version, resol='raw')} # for img_name in metadata_cache['valid_filenames'][stack]],
def sample_chains(self, n_sample, init_states, chain_var_funcs, n_process=1, memmap_enabled=False, memmap_path=None, stack_chain_arrays=False): """Sample one or more Markov chains from given initial states. Performs a specified number of chain iterations (each of which may be composed of multiple individual Markov transitions), recording the outputs of functions of the sampled chain state after each iteration. The chains may be run in parallel across multiple independent processes or sequentially. In all cases all chains use independent random draws. Args: n_sample (int): Number of samples (iterations) to draw per chain. init_states (Iterable[ChainState] or Iterable[array]): Initial chain states. Each entry can be either an array specifying the state or a `ChainState` instance. One chain will be run for each state in the iterable sequence. chain_var_funcs (dict[str, callable]): Dictionary of functions which compute the chain variables to be recorded at each iteration, with each function being passed the current state and returning an array corresponding to the variable(s) to be stored. The keys to the functions are used to index the chain variable arrays in the returned data. n_process (int or None): Number of parallel processes to run chains over. If set to one then chains will be run sequentially in otherwise a `multiprocessing.Pool` object will be used to dynamically assign the chains across multiple processes. If set to `None` then the number of processes will default to the output of `os.cpu_count()`. memmap_enabled (bool): Whether to memory-map arrays used to store chain data to files on disk to avoid excessive system memory usage for long chains and/or high memory chain states. The chain data is written to `.npy` files in the directory specified by `memmap_path` (or a temporary directory if not provided). These files persist after the termination of the function so should be manually deleted when no longer required. memmap_path (str): Path to directory to write memory-mapped chain data to. If not provided, a temporary directory will be created and the chain data written to files there. stack_chain_arrays (bool): Whether to stack the lists of per-chain arrays in the returned dictionaries into new arrays with the chain index as the first axis. Note if set to `True` when memory-mapping is enabled (`memmap_enabled=True`) all memory-mapped arrays will be loaded from disk in to memory. Returns: chains (dict[str, list[array]]): Chain variable array lists, with one entry per function in `chain_var_funcs` with the same key. Each entry consists of a list of arrays, one per chain, with the leading dimension of the arrays corresponding to the sampling (draw) index. chain_stats (dict[str, dict[str, list[array]]]): Dictionary of chain transition statistics. Outer dictionary contains entries for each chain transition which returns statistics (e.g. acceptance probabilities) on each iteration. For each such transition, a dictionary is returned with string keys describing the statistics recorded and list of array values with one array per chain and the leading dimension of the arrays corresponding to the sampling index. """ n_chain = len(init_states) # Create temp directory if memory-mapping enabled and no path provided if memmap_enabled and memmap_path is None: memmap_path = tempfile.mkdtemp() if RANDOMGEN_AVAILABLE: seed = self.rng.randint(2**64, dtype='uint64') rngs = [ randomgen.Xorshift1024(seed).jump(i).generator for i in range(n_chain) ] else: seeds = (self.rng.choice(2**16, n_chain, False) * 2**16 + self.rng.choice(2**16, n_chain, False)) rngs = [np.random.RandomState(seed) for seed in seeds] if n_process == 1: # Using single process therefore run chains sequentially chain_outputs = [] for c, (rng, init_state) in enumerate(zip(rngs, init_states)): chains, chain_stats, n_sample_chain = self._sample_chain( rng, n_sample, init_state, chain_var_funcs, chain_index=c, parallel_chains=False, memmap_enabled=memmap_enabled, memmap_path=memmap_path) chain_outputs.append((chains, chain_stats, n_sample_chain)) if n_sample_chain != n_sample: logger.error( f'Sampling manually interrupted at chain {c} iteration' f' {n_sample_chain}. Arrays containing chain variables' f' and statistics computed before interruption will' f' be returned, all entries for iteration ' f' {n_sample_chain} and above of chain {c} should be' f' ignored.') break else: # Run chains in parallel using a multiprocess(ing).Pool # Child processes made to ignore SIGINT signals to allow handling # of KeyboardInterrupts in parent process with Pool(n_process, _ignore_sigint_initialiser) as pool: try: chain_outputs = pool.starmap( self._sample_chain, zip( rngs, [n_sample] * n_chain, init_states, [chain_var_funcs] * n_chain, range(n_chain), # chain_index [True] * n_chain, # parallel_chains flags [memmap_enabled] * n_chain, [memmap_path] * n_chain, )) except KeyboardInterrupt: # Close any still running processes pool.terminate() pool.join() err_message = 'Sampling manually interrupted.' if memmap_enabled: err_message += ( f' Chain data recorded so far is available in ' f'directory {memmap_path}.') logger.error(err_message) raise # When running parallel jobs with memory-mapping enabled, data arrays # returned by processes as file paths to array memory-maps therfore # load memory-maps objects from file before returing results load_memmaps = memmap_enabled and n_process > 1 return self._collate_chain_outputs(n_sample, chain_outputs, load_memmaps, stack_chain_arrays)
def fmultiprocess(log, function, inputArray, poolSize=False, **kwargs): """multiprocess pool **Key Arguments:** - ``log`` -- logger - ``function`` -- the function to multiprocess - ``inputArray`` -- the array to be iterated over **Return:** - ``resultArray`` -- the array of results **Usage:** .. code-block:: python from fundamentals import multiprocess # DEFINE AN INPUT ARRAY inputArray = range(10000) results = multiprocess(log=log, function=functionName, inputArray=inputArray, otherFunctionKeyword="cheese") """ log.info('starting the ``multiprocess`` function') # DEFINTE POOL SIZE - NUMBER OF CPU CORES TO USE (BEST = ALL - 1) # if cpu_count() > 1: # poolSize = cpu_count() - 1 # else: # poolSize = 1 # if len(inputArray) < poolSize: # poolSize = len(inputArray) if poolSize: p = Pool(processes=poolSize) else: p = Pool() # MAP-REDUCE THE WORK OVER MULTIPLE CPU CORES try: mapfunc = partial(function, log=log, **kwargs) resultArray = p.map(mapfunc, inputArray) except: try: mapfunc = partial(function, **kwargs) resultArray = p.map(mapfunc, inputArray) except: mapfunc = partial(function, log=log, **kwargs) resultArray = p.map(mapfunc, inputArray) p.close() p.terminate() p.join() log.info('completed the ``multiprocess`` function') return resultArray
if len(a) > 0: ntb_to_nissl[ntb_v] = np.unique(a)[0] ntb_values = np.arange(0, 5000) ntb_matched_values = np.interp(ntb_values, [ntb_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())], [nissl_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())]) sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t)) return ntb_matched_values, (region1_x, region1_y, region1_w, region1_h) n_regions = 8 pool = Pool(4) res = pool.map(f, range(n_regions)) ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res) pool.close() pool.join() # for region_id in range(10): # while True: # region1_x = np.random.randint(0, w-10000, 1)[0] # region1_y = np.random.randint(0, h-10000, 1)[0] # region1_w = 5000 # region1_h = 5000 # print region1_x, region1_y, region1_w, region1_h # tb_region1_xmin = region1_x / 32
ntb_blue_bins = np.arange(5001) ntb_blue_inv_bins = np.arange(5001) ntb_inv_to_nissl_mapping = np.interp(ntb_blue_inv_bins, ntb_inv_vals, nissl_vals) ntb_to_nissl_mapping = ntb_inv_to_nissl_mapping[5000 - ntb_blue_bins] ntb_to_nissl_mapping = np.round(ntb_to_nissl_mapping).astype(np.uint8) ntb_matched_values_all_examples_one_section.append(ntb_to_nissl_mapping) region_bboxes_all_examples_one_section.append((region1_x, region1_y, region1_w, region1_h)) sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t)) return ntb_to_nissl_mapping, (region1_x, region1_y, region1_w, region1_h) pool = Pool(4) res = pool.map(match_intensity_histogram_one_region, regions) ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res) pool.close() pool.join() fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_intensity_mapping_all_regions.npy' % (ntb_fn, nissl_fn)) create_parent_dir_if_not_exists(fp) np.save(fp, np.asarray(ntb_matched_values_all_examples_one_section)) upload_to_s3(fp) fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_region_bboxes.npy' % (ntb_fn, nissl_fn)) np.save(fp, np.asarray(region_bboxes_all_examples_one_section)) upload_to_s3(fp)
def get_pool(): global pool if pool is None: pool = Pool(initializer=initializer) return pool
def extract_features( img_input, ft_output, network_ckpt, dataset_cstor, dataset_args, batchifier_cstor, out_dir, set_type, batch_size, no_threads, gpu_ratio): # CPU/GPU option cpu_pool = Pool(no_threads, maxtasksperchild=1000) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess: saver = tf.train.Saver() saver.restore(sess, network_ckpt) for one_set in set_type: print("Load dataset -> set: {}".format(one_set)) dataset_args["which_set"] = one_set dataset = dataset_cstor(**dataset_args) # hack dataset to only keep one game by image image_id_set = {} games = [] for game in dataset.games: if game.image.id not in image_id_set: games.append(game) image_id_set[game.image.id] = 1 dataset.games = games no_images = len(games) source_name = os.path.basename(img_input.name[:-2]) dummy_tokenizer = DummyTokenizer() batchifier = batchifier_cstor(tokenizer=dummy_tokenizer, sources=[source_name]) iterator = Iterator(dataset, batch_size=batch_size, pool=cpu_pool, batchifier=batchifier) ############################ # CREATE FEATURES ############################ print("Start computing image features...") filepath = os.path.join(out_dir, "{}_features.h5".format(one_set)) with h5py.File(filepath, 'w') as f: ft_shape = [int(dim) for dim in ft_output.get_shape()[1:]] ft_dataset = f.create_dataset('features', shape=[no_images] + ft_shape, dtype=np.float32) idx2img = f.create_dataset('idx2img', shape=[no_images], dtype=np.int32) pt_hd5 = 0 for batch in tqdm(iterator): feat = sess.run(ft_output, feed_dict={img_input: numpy.array(batch[source_name])}) # Store dataset batch_size = len(batch["raw"]) ft_dataset[pt_hd5: pt_hd5 + batch_size] = feat # Store idx to image.id for i, game in enumerate(batch["raw"]): idx2img[pt_hd5 + i] = game.image.id # update hd5 pointer pt_hd5 += batch_size print("Start dumping file: {}".format(filepath)) print("Finished dumping file: {}".format(filepath)) print("Done!")
def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0): rank = len(inputData.shape) - 1 if rank != self.n_inputDimensions: raise ValueError( "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format( self.n_inputDimensions)) manager = Manager() predictQueue = manager.Queue() # workaround as predict does not support batches atm # add dummy dimension to let embedInputData work properly (is optimized to work for batches) inputData = inputData.reshape(1, *inputData.shape) modifiedInputData = self._embedInputData(inputData) modifiedInputData = modifiedInputData[0] inputData = inputData[0] self.transientTime = transientTime self.sharedNamespace.transientTime = transientTime predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime)) jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]), axis=rank).reshape(-1, rank).tolist() nJobs = len(jobs) self.resetState() iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self) pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess, initargs=[predictQueue, self]) pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1) def _processPoolWorkerResults(): nJobsDone = 0 if verbose > 0: bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001) bar.update(0) while nJobsDone < nJobs: data = predictQueue.get() # result of predicting indices, prediction, state = data id = self._uniqueIDFromIndices(indices) self._xs[id] = state # update the values predictionOutput[tuple([Ellipsis] + indices)] = prediction nJobsDone += 1 if verbose > 0: bar.update(nJobsDone) if verbose > 1: print(nJobsDone) if verbose > 0: bar.finish() _processPoolWorkerResults() pool.close() return predictionOutput
os.mkdir(os.path.join(EXPORT_DIR, id)) write_file(os.path.join(EXPORT_DIR, id, os.path.basename(file)), data) os.remove(file) def mv_erase_dir(file): os.rename(file, os.path.join(ERASE_DIR, os.path.basename(file))) def handle_file(file): try: data = read_file(file) modality = data.Modality if modality in ['OP', 'OPT', 'Opt', 'Op']: handle_and_mv_export_dir(data, file) else: mv_erase_dir(file) except Exception: try: os.rename(file, os.path.join(EXCEPTION_DIR, os.path.basename(file))) except Exception: pass if __name__ == "__main__": pool = Pool(30) for _ in tqdm.tqdm(pool.imap_unordered(handle_file, files), total=len(files)): pass
class KubernetesManager(ClusterManager): TAG = "KubernetesManager" pool = Pool(5) @memoized_property def kubernetes_home(self): try: cmd = ["which", "kubectl.sh"] output = subprocess.check_output(cmd) return output.split("/cluster/kubectl.sh")[0] except subprocess.CalledProcessError as e: error_log(self.TAG, "Could not get Kubernetes home: {}".format(e)) return None def _generate_auth_token(self): return str(hash(time.time())) def _create(self, filename, namespace=None): success = True try: cmd = ["kubectl.sh", "create", "-f", filename] if namespace: cmd.append('--namespace={0}'.format(namespace)) subprocess.check_call(cmd) except subprocess.CalledProcessError as e: msg = "Could not deploy specification: {0} on Kubernetes cluster: {1}".format( filename, e) error_log(self.TAG, msg) success = False return success def __get_service_url(self, service_name): try: cmd = ["kubectl.sh", "describe", "service", service_name] output = subprocess.check_output(cmd) ip_re = re.compile("LoadBalancer Ingress:(?P<ip>.*)\n") m = ip_re.search(output) if not m: error_log(self.TAG, "Could not extract IP from service description") return None return m.group("ip").strip() except subprocess.CalledProcessError as e: return None def _get_proxy_url(self): return self.__get_service_url("proxy-registration") def _get_registry_url(self): return self.__get_service_url("registry") def _get_lookup_url(self): #return self.__get_service_url("proxy-lookup") return ClusterManager.CLUSTER_HOST def _get_pod_ip(self, app_id): try: cmd = [ "kubectl.sh", "describe", "pod", "notebook-server", "--namespace={}".format(app_id) ] output = subprocess.check_output(cmd) ip_re = re.compile("IP:(?P<ip>.*)\n") ready_re = re.compile("State:\s+(?P<ready>.*)") m = ip_re.search(output) if not m: info_log(self.TAG, "Could not extract IP from pod description") return None return m.group("ip").strip() # TODO the following code makes the above check safer (will prevent proxy errors) but is too slow ready = ready_re.search(output) if not ready: warning_log( self.TAG, "Extracted the pod IP, but the notebook container is not ready" ) return None else: status = ready.group("ready").lower().strip() debug_log(self.TAG, "status: {}".format(status)) if status != "running": info_log( self.TAG, "Extracted the pod IP, but the notebook container is not ready" ) return None except subprocess.CalledProcessError as e: return None def _launch_registry_server(self): registry_path = os.path.join(MainSettings.ROOT, "registry") for name in os.listdir(registry_path): self._create(os.path.join(registry_path, name)) info_log(self.TAG, "Sleeping for 10 seconds so registry launch can complete...") time.sleep(10) def _launch_proxy_server(self, token): # TODO the following chunk of code is reused in App.deploy (should be abstracted away) proxy_path = os.path.join(MainSettings.ROOT, "proxy") # clean up the old deployment deploy_path = os.path.join(proxy_path, "deploy") if os.path.isdir(deploy_path): shutil.rmtree(deploy_path) os.mkdir(deploy_path) params = {"token": token} # load all the template strings templates_path = os.path.join(proxy_path, "deployment") template_names = os.listdir(templates_path) templates = {} for name in template_names: with open(os.path.join(templates_path, name), 'r') as tf: templates[name] = tf.read() # insert the notebooks container into the pod.json template for name in template_names: with open(os.path.join(deploy_path, name), 'w+') as p_file: p_string = fill_template_string(templates[name], params) p_file.write(p_string) # launch each component self._create(os.path.join(deploy_path, name)) def _read_proxy_info(self): with open(os.path.join(MainSettings.ROOT, ".proxy_info"), "r") as proxy_file: raw_host, raw_token = proxy_file.readlines() return "http://" + raw_host.strip( ) + "/api/routes", raw_token.strip() def _write_proxy_info(self, url, token): with open(os.path.join(MainSettings.ROOT, ".proxy_info"), "w+") as proxy_file: proxy_file.write("{}\n".format(url)) proxy_file.write("{}\n".format(token)) def _read_registry_url(self): with open(os.path.join(MainSettings.ROOT, ".registry_info"), "r") as registry_file: url = registry_file.readlines()[0] return url def _write_registry_url(self, url): with open(os.path.join(MainSettings.ROOT, ".registry_info"), "w+") as registry_file: registry_file.write("{}\n".format(url)) def _get_inactive_routes(self, min_inactive): now = datetime.utcnow() threshold = (now - timedelta(minutes=min_inactive)).isoformat() base_url, token = self._read_proxy_info() h = {"Authorization": "token {}".format(token)} proxy_url = base_url + "?inactive_since={}".format(threshold) debug_log(self.TAG, "proxy_url: {}".format(proxy_url)) try: r = requests.get(proxy_url, headers=h) if r.status_code == 200: routes = r.json().keys() return map(lambda r: r[1:], routes) except requests.exceptions.ConnectionError: warning_log( self.TAG, "Could not get all routes inactive for {} minutes".format( min_inactive)) return None def _remove_proxy_route(self, app_id): base_url, token = self._read_proxy_info() h = {"Authorization": "token {}".format(token)} proxy_url = base_url + "/" + app_id try: r = requests.delete(proxy_url, headers=h) if r.status_code == 204: info_log(self.TAG, "Removed proxy route for {}".format(app_id)) return True except requests.exceptions.ConnectionError: error_log(self.TAG, "Could not remove proxy route for {}".format(app_id)) return False def _register_proxy_route(self, app_id): num_retries = 30 pause = 1 for i in range(num_retries): # TODO should the notebook port be a parameter? ip = self._get_pod_ip(app_id) # TODO this is a stopgap solution for a race condition that should be fixed through other means time.sleep(1) if ip: base_url, token = self._read_proxy_info() body = {'target': "http://" + ip + ":8888"} h = {"Authorization": "token {}".format(token)} proxy_url = base_url + "/" + app_id debug_log( self.TAG, "body: {}, headers: {}, proxy_url: {}".format( body, h, proxy_url)) try: r = requests.post(proxy_url, data=json.dumps(body), headers=h) if r.status_code == 201: info_log( self.TAG, "Proxying {} to {}".format(proxy_url, ip + ":8888")) return True else: raise Exception( "could not register route with proxy server") except requests.exceptions.ConnectionError: error_log(self.TAG, "could not connect to proxy server") pass info_log( self.TAG, "App not yet assigned an IP address. Waiting for {} seconds..." .format(pause)) time.sleep(pause) return False def get_running_apps(self): try: proxy_loc = MainSettings.KUBE_PROXY_HOST + ':' + MainSettings.KUBE_PROXY_PORT url = urljoin(proxy_loc, "/api/v1/pods") r = requests.get(url) if r.status_code != 200: error_log(self.TAG, "could not get list of running pods") return None json = r.json() if 'items' not in json: error_log(self.TAG, "pods api endpoint returning malformed JSON") return None pod_specs = json['items'] pods = [] for pod_spec in pod_specs: meta = pod_spec['metadata'] if meta['namespace'] == 'kube-system' or meta[ 'namespace'] == 'default': continue if meta['name'] == 'notebook-server': full_image = pod_spec['spec']['containers'][0]['image'] image_name = full_image.split('/')[-1] pods.append((meta['namespace'], image_name)) return pods except ConnectionError as e: error_log(self.TAG, e) return None def _nodes_command(self, func, shell=False): provider = os.environ["KUBERNETES_PROVIDER"] if isinstance(func, str): func_str = func def _func(node, zone): split = node.split() if len(split) > 0: node_name = split[0] if node_name != "kubernetes-master": info_log( self.TAG, "Running {0} on {1}...".format(func, node_name)) cmd = [ "gcloud", "compute", "ssh", node_name, "--zone", zone, "--command", "{}".format(func_str) ] return subprocess.Popen(cmd, shell=shell) return None func = _func if provider == 'gce': # get zone info zone = os.environ.get("KUBE_GCE_ZONE") if not zone: zone_re = re.compile( "ZONE\=\$\{KUBE_GCE_ZONE:\-(?P<zone>.*)\}") with open( os.path.join(self.kubernetes_home, "cluster/gce/config-default.sh"), 'r') as f: m = zone_re.search(f.read()) if m: zone = m.group("zone") else: error_log(self.TAG, "zone could not be determined") if not zone: return False nodes_cmd = ["kubectl.sh", "get", "nodes"] output = subprocess.check_output(nodes_cmd) nodes = output.split("\n")[1:] return [func(node, zone) for node in nodes] elif provider == 'aws': # TODO support aws return [] else: warning_log(self.TAG, "Only aws and gce providers are currently supported") return [] def get_total_capacity(self): def _get_capacity(node, zone): pod_re = re.compile(".*pods:\s+(?P<pods>\d+)") split = node.split() if len(split) > 0: node_name = split[0] cmd = ['kubectl.sh', 'describe', 'node', node_name] output_lines = subprocess.check_output(cmd).split('\n') match_lines = [ pod_re.search(l) for l in output_lines if pod_re.search(l) ] if match_lines: return int(match_lines[0].group('pods')) return 0 return 0 caps = self._nodes_command(_get_capacity) return sum(caps) def preload_image(self, image_name): def _preload(node, zone): split = node.split() if len(split) > 0: node_name = split[0] if node_name != "kubernetes-master": info_log( self.TAG, "Preloading {0} onto {1}...".format( image_name, node_name)) docker_cmd = "sudo gcloud docker pull {0}/{1}".format( MainSettings.REGISTRY_NAME, image_name) cmd = [ "gcloud", "compute", "ssh", node_name, "--zone", zone, "--command", "{}".format(docker_cmd) ] return subprocess.Popen(cmd) return None procs = self._nodes_command(_preload) info_log(self.TAG, "Waiting for preloading to finish...") for proc in procs: if proc: proc.wait() info_log(self.TAG, "Preloaded image {} onto all nodes".format(image_name)) return True def _start_proxy_server(self): token = self._generate_auth_token() self._launch_proxy_server(token) num_retries = 5 for i in range(num_retries): debug_log(self.TAG, "Sleeping for 20s before getting proxy URL") time.sleep(20) proxy_url = self._get_proxy_url() if proxy_url: debug_log(self.TAG, "proxy_url: {}".format(proxy_url)) # record the proxy url and auth token self._write_proxy_info(proxy_url, token) break if not proxy_url: error_log( self.TAG, "Could not obtain the proxy server's URL. Cluster launch unsuccessful" ) return False def _start_registry_server(self): # TODO remove duplicated code here self._launch_registry_server() num_retries = 5 for i in range(num_retries): debug_log(self.TAG, "Sleeping for 20s before getting registry URL") time.sleep(20) registry_url = self._get_registry_url() if registry_url: debug_log(self.TAG, "registry_url: {}".format(registry_url)) # record the registry url self._write_registry_url(registry_url) break if not registry_url: error_log( self.TAG, "Could not obtain the registry server's URL. Cluster launch unsuccessful" ) return False def _preload_registry_server(self): try: subprocess.check_call([ "docker", "pull", "{}/binder-base".format(MainSettings.DOCKER_HUB_USER) ]) subprocess.check_call([ "docker", "tag", "{}/binder-base".format(MainSettings.DOCKER_HUB_USER), "{}/binder-base".format(MainSettings.REGISTRY_NAME) ]) subprocess.check_call([ "docker", "push", "{}/binder-base".format(MainSettings.REGISTRY_NAME) ]) return True except subprocess.CalledProcessError as e: error_log( self.TAG, "Could not preload registry server with binder-base image: {}". format(e)) return False def start(self, num_minions=3, provider="gce"): success = True try: # start the cluster os.environ["NUM_MINIONS"] = str(num_minions) os.environ["KUBERNETES_PROVIDER"] = provider subprocess.check_call(['kube-up.sh']) # launch binderd binderd_proc = subprocess.Popen(["binderd"]) # sleep just for good measure (modules starting up) time.sleep(5) # generate an auth token and launch the proxy server info_log(self.TAG, "Launching proxy server...") self._start_proxy_server() # launch the private Docker registry info_log(self.TAG, "Launching private Docker registry...") self._start_registry_server() info_log(self.TAG, "Preloading registry server with binder-base image...") self._preload_registry_server() # preload the generic base image onto all the workers info_log(self.TAG, "Preloading binder-base image onto all nodes...") success = success and self.preload_image("binder-base") # start the inactive app removal cron job cron = CronTab() cmd = " ".join([ get_env_string(), os.path.join(MainSettings.ROOT, "util", "stop-inactive-apps"), "&>/tmp/binder-cron" ]) job = cron.new(cmd, comment="binder-stop") job.minute.every(MonitoringSettings.APP_CRON_PERIOD) job.enable(True) cron.write_to_user(user=True) except subprocess.CalledProcessError as e: success = False if success: info_log(self.TAG, "Started Kubernetes cluster successfully") else: error_log(self.TAG, "Could not launch the Kubernetes cluster") return success def stop(self, provider="gce"): try: os.environ["KUBERNETES_PROVIDER"] = provider subprocess.check_call(['kube-down.sh']) # start the inactive app removal cron job cron = CronTab() jobs = cron.find_comment("binder-stop") for job in jobs: job.enable(False) cron.remove(job) cron.write_to_user(user=True) except subprocess.CalledProcessError as e: error_log(self.TAG, "Could not destroy the Kubernetes cluster") def destroy_app(self, app_id): pass def list_apps(self): pass def deploy_app(self, app_id, app_dir): success = True # first create a namespace for the app success = self._create(os.path.join(app_dir, "namespace.json")) # now launch all other components in the new namespace for f in os.listdir(app_dir): if f != "namespace.json": path = os.path.join(app_dir, f) success = success and self._create(path, namespace=app_id) if not success: error_log( self.TAG, "Could not deploy {0} on Kubernetes cluster".format( path)) # create a route in the proxy success = success and self._register_proxy_route(app_id) if not success: error_log(self.TAG, "Could not deploy {} on Kubernetes cluster".format(path)) return None lookup_url = self._get_lookup_url() app_url = urljoin("https://" + lookup_url, app_id) info_log(self.TAG, "Access app at: \n {}".format(app_url)) return app_url def stop_app(self, app_id): if app_id == "kube-system": return try: self._remove_proxy_route(app_id) stop_cmd = [ "kubectl.sh", "stop", "pods,services,replicationControllers", "--all", "--namespace={}".format(app_id) ] cleanup_cmd = ["kubectl.sh", "delete", "namespace", app_id] subprocess.check_call(stop_cmd) subprocess.check_call(cleanup_cmd) info_log(self.TAG, "Stopped app {}".format(app_id)) except subprocess.CalledProcessError as e: error_log(self.TAG, "Could not stop app {}".format(app_id)) def _stop_apps(self, app_ids): if not app_ids: info_log(self.TAG, "No apps to stop") return for app_id in app_ids: self.stop_app(app_id) def stop_inactive_apps(self, min_inactive): routes = self._get_inactive_routes(min_inactive) self._stop_apps(routes) def stop_all_apps(self): app_ids = map(lambda app: app[0], self.get_running_apps()) self._stop_apps(app_ids)
def parse_files(self, workers, data_dicts=None): """Parse all files""" print("\n\n### Parsing files ###") os.chdir(self.workdir) # questionable if data_dicts is None: data_dicts = [{ "filename": fn.name } for fn in os.scandir(self.textdir)] filequeue = [{ "name": d["filename"], "size": os.path.getsize(self.textdir + d["filename"]), "id": n + 1, "options": d["options"] if "options" in d else {}, "newpath": self.textdir + d["filename"], "raw": self.workdir + d["filename"] + ".raw", "words": self.workdir + d["filename"] + ".words.sorted", "toms": self.workdir + d["filename"] + ".toms", "sortedtoms": self.workdir + d["filename"] + ".toms.sorted", "pages": self.workdir + d["filename"] + ".pages", "refs": self.workdir + d["filename"] + ".refs", "graphics": self.workdir + d["filename"] + ".graphics", "lines": self.workdir + d["filename"] + ".lines", "results": self.workdir + d["filename"] + ".results", } for n, d in enumerate(data_dicts)] self.raw_files = [f["raw"] + ".lz4" for f in filequeue] self.metadata_hierarchy.append([]) # Adding in doc level metadata for d in data_dicts: for k in list(d.keys()): if k not in self.metadata_fields: self.metadata_fields.append(k) self.metadata_hierarchy[0].append(k) if k not in self.metadata_types: self.metadata_types[k] = "doc" # don't need to check for conflicts, since doc is first. # Adding non-doc level metadata for element_type in self.parser_config["metadata_to_parse"]: if element_type != "page" and element_type != "ref" and element_type != "line": self.metadata_hierarchy.append([]) for param in self.parser_config["metadata_to_parse"][ element_type]: if param not in self.metadata_fields: self.metadata_fields.append(param) self.metadata_hierarchy[-1].append(param) if param not in self.metadata_types: self.metadata_types[param] = element_type else: # we have a serious error here! Should raise going forward. pass print("%s: parsing %d files." % (time.ctime(), len(filequeue))) with tqdm(total=len(filequeue), leave=False) as pbar: with Pool(workers) as pool: for results in pool.imap_unordered(self.__parse_file, zip(filequeue, data_dicts)): with open(results, "rb") as proc_fh: vec = pickle.load( proc_fh ) # load in the results from the child's parsework() function. self.omax = [max(x, y) for x, y in zip(vec, self.omax)] pbar.update() print("%s: done parsing" % time.ctime())
def qsd_solve( H, psi0, tspan, Ls, sdeint_method, obsq=None, normalized_equation=True, normalize_state=True, multiprocessing=False, ntraj=1, processes=8, seed=1, implicit_type=None, ): ''' Args: H: NxN csr matrix, dtype = complex128 Hamiltonian. psi0: Nx1 csr matrix, dtype = complex128 input state. tspan: numpy array, dtype = float Time series of some length T. Ls: list of NxN csr matrices, dtype = complex128 System-environment interaction terms (Lindblad terms). sdeint_method (Optional) SDE solver method: Which SDE solver to use. Default is sdeint.itoSRI2. obsq (optional): list of NxN csr matrices, dtype = complex128 Observables for which to generate trajectory information. Default value is None (no observables). normalized_equation (optional): Boolean Use the normalized quantum state diffusion equations. (TODO: case False) normalize_state (optional): Boolean Whether to numerically normalize the equation at each step. multiprocessing (optional): Boolean Whether or not to use multiprocessing ntraj (optional): int number of trajectories. processes (optional): int number of processes. If processes == 1, don't use multiprocessing. seed (optional): int Seed for random noise. implicit_type (optional): string Type of implicit solver to use if the solver is implicit. Returns: A dictionary with the following keys and values: ['psis'] -> np.array with shape = (ntraj,T,N) and dtype = complex128 ['obsq_expects'] -> np.array with shape = (ntraj,T,len(obsq)) and dtype = complex128 ''' ## Check dimensions of inputs. These should be consistent with qutip Qobj.data. N = psi0.shape[0] if psi0.shape[1] != 1: raise ValueError("psi0 should have dimensions Nx1.") a, b = H.shape if a != N or b != N: raise ValueError("H should have dimensions NxN (same size as psi0).") for L in Ls: a, b = L.shape if a != N or b != N: raise ValueError( "Every L should have dimensions NxN (same size as psi0).") ## Determine seeds for the SDEs if type(seed) is list or type(seed) is tuple: assert len(seed) == ntraj seeds = seed elif type(seed) is int or seed is None: np.random.seed(seed) seeds = [np.random.randint(1000000) for _ in range(ntraj)] else: raise ValueError("Unknown seed type.") T_init = time() psi0_arr = np.asarray(psi0.todense()).T[0] x0 = np.concatenate([psi0_arr.real, psi0_arr.imag]) drift_diffusion = drift_diffusion_holder(H, Ls, tspan) f = complex_to_real_vector(drift_diffusion.f) G = complex_to_real_matrix(drift_diffusion.G) def SDE_helper(args, s): '''Let's make different wiener increments for each trajectory''' m = 2 * len(Ls) N = len(tspan) - 1 h = (tspan[N - 1] - tspan[0]) / (N - 1) np.random.seed(s) dW = np.random.normal(0.0, np.sqrt(h), (N, m)) / np.sqrt(2.) if implicit_type is None: out = sdeint_method(*args, dW=dW, normalized=normalize_state) try: out = sdeint_method(*args, dW=dW, normalized=normalize_state, implicit_type=implicit_type) except TypeError: print("Not an implicit method. implicit_type argument ignored.") out = sdeint_method(*args, dW=dW, normalized=normalize_state) return out ## simulation parameters params = [[f, G, x0, tspan]] * ntraj if multiprocessing: pool = Pool(processes=processes, ) outputs = pool.map(lambda z: SDE_helper(z[0], z[1]), zip(params, seeds)) else: outputs = [SDE_helper(p, s) for p, s in zip(params, seeds)] try: xs = np.array([o["trajectory"] for o in outputs]) except KeyError: print("Warning: trajectory not returned by SDE method!") try: norms = np.array([o["norms"] for o in outputs]) except KeyError: print("Warning: norms not returned by SDE method!") norms = None print("done running simulation!") psis = xs[:, :, :int(len(x0) / 2)] + 1j * xs[:, :, int(len(x0) / 2):] # Obtaining expectations of observables obsq_expects = (np.asarray([[ np.asarray([ob.dot(psi).dot(psi.conj()) for ob in obsq]) for psi in psis[i] ] for i in range(ntraj)]) if not obsq is None else None) T_fin = time() print("Run time: ", T_fin - T_init, " seconds.") return { "psis": psis, "obsq_expects": obsq_expects, "seeds": seeds, "norms": norms }
def dump_messages(dmp, **kwargs): """Сообщения dmp: Dumper object """ global users folder = os.path.join('dump', 'dialogs') os.makedirs(folder, exist_ok=True) print('[получение диалогов...]') print('\x1b[2K 0/???', end='\r') conversations = dmp._vk_tools.get_all( method='messages.getConversations', max_count=200, values={ 'extended': 1, 'fields': 'first_name, last_name, name' }) print('\x1b[2K {}/{}'.format(len(conversations['items']), conversations['count'])) if dmp._DUMP_DIALOGS_ONLY: print('[будет сохранено диалогов: {}]'.format(len(dmp._DUMP_DIALOGS_ONLY)), end='\n\n') else: print('[будет исключено диалогов: {}]'.format(len(dmp._EXCLUDED_DIALOGS)), end='\n\n') print('Сохранение диалогов:') for con in conversations['items']: did = con['conversation']['peer']['id'] pass_dialog = False if dmp._DUMP_DIALOGS_ONLY: if did not in dmp._DUMP_DIALOGS_ONLY: if dmp._settings['HIDE_EXCLUDED_DIALOGS']: continue else: pass_dialog = True elif did in dmp._EXCLUDED_DIALOGS: if dmp._settings['HIDE_EXCLUDED_DIALOGS']: continue else: pass_dialog = True if con['conversation']['peer']['type'] == 'user': if did not in users: users_add(dmp._vk, did) dialog_name = users.get(did)['name'] elif con['conversation']['peer']['type'] == 'group': if did not in users: users_add(dmp._vk, did) dialog_name = users.get(did)['name'] elif con['conversation']['peer']['type'] == 'chat': dialog_name = con['conversation']['chat_settings']['title'] else: dialog_name = r'{unknown}' for c in dmp._INVALID_CHARS: if c in dialog_name: dialog_name = dialog_name.replace(c, dmp._settings['REPLACE_CHAR']) fn = '{}_{id}'.format('_'.join(dialog_name.split(' ')), id=did) for n in os.listdir(folder): if str(did) == n.split('.txt')[0].split('_')[-1]: if dmp._settings['KEEP_DIALOG_NAMES']: fn = n.split('.txt')[0] else: shutil.move(os.path.join(folder, n), os.path.join(folder, '{}_{id}'.format('_'.join(dialog_name.split(' ')), id=did) + ('.txt' if '.txt' in n else ''))) print(' Диалог: {}{nfn}'.format(dialog_name, nfn=(' (as {})'.format(fn) if ' '.join(fn.split('_')[:-1]) != dialog_name else ''))) if pass_dialog is True: print(' [исключён]\n') continue values = { 'peer_id': con['conversation']['peer']['id'], 'extended': 1, 'fields': 'first_name, last_name' } append = {'use': dmp._settings['DIALOG_APPEND_MESSAGES'] and os.path.exists(os.path.join(folder, f'{fn}.txt'))} try: if append['use']: with open(os.path.join(folder, f'{fn}.txt'), 'rb') as t: t.seek(-2, 2) while t.read(1) != b'\n': t.seek(-2, 1) last = t.readline().decode() r = re.match('^\[last:[0-9]+\]$', last) if r: start_message_id = int(re.search(r'\d+', r.group(0)).group(0)) values['start_message_id'] = start_message_id t.seek(-len(last.encode('utf-8'))-2, 1) while True: while t.read(1) != b'\n': t.seek(-2, 1) tmp = t.readline().decode() r = re.match('^ {8}\[\d+ [а-я a-z]+ \d+\]$', tmp) # TODO: получение last_id по последнему сообщению (???) if r: append['prev_date'] = re.search('\d+ [а-я a-z]+ \d+', r.group(0)).group(0) break else: t.seek(-len(tmp.encode('utf-8'))-2, 1) else: values['rev'] = 1 append['use'] = False else: values['rev'] = 1 except OSError: values['rev'] = 1 append['use'] = False print(' [кэширование]') print('\x1b[2K 0/???', end='\r') try: history = dmp._vk_tools.get_all( method='messages.getHistory', max_count=200, values=values, negative_offset=append['use']) print('\x1b[2K {}/{}'.format(len(history['items']), history['count'])) if len(history['items']) == 0: print() continue except VkToolsException: print('\x1b[2K 0/0\n') continue if append['use']: def sortById(msg): return msg['id'] history['items'].sort(key=sortById) attachments = { 'photos': [], 'video_ids': [], 'docs': [], 'audio_messages': [] } if append['use']: tmp = '' else: f = open(os.path.join(folder, f'{fn}.txt'), 'w', encoding='utf-8') count = len(history['items']) print(' [сохранение сообщений]') print(' {}/{}'.format(0, count), end='\r') prev = None prev_date = None if append['use']: prev_date = append['prev_date'] for i in range(count): m = history['items'][i] if m['from_id'] not in users: users_add(dmp._vk, m['from_id']) res = message_handler(dmp._vk, m) date = time_handler(m['date']) hold = ' ' * (users.get(m['from_id'])['length'] + 2) msg = res['date'] + ' ' msg += hold if (prev and date and prev == m['from_id'] and prev_date == date) \ else users.get(m['from_id'])['name'] + ': ' if res['messages']: msg += res['messages'][0] + '\n' for r in res['messages'][1:]: msg += hold + ' '*8 + r + '\n' else: msg += '\n' for a in res['attachments']['audio_messages']: if a not in attachments['audio_messages']: attachments['audio_messages'].append(a) if dmp._settings['SAVE_DIALOG_ATTACHMENTS']: for tp in res['attachments']: for a in res['attachments'][tp]: if a not in attachments[tp]: attachments[tp].append(a) if prev_date != date: if prev_date: if append['use']: tmp += '\n' else: f.write('\n') if append['use']: tmp += f' [{date}]\n' else: f.write(f' [{date}]\n') prev_date = date if append['use']: tmp += msg else: f.write(msg) prev = m['from_id'] print('\x1b[2K {}/{}'.format(i+1, count), end='\r') if append['use']: import codecs orig_file = os.path.join(folder, f'{fn}.txt') tmp_file = os.path.join(folder, f'{fn}.new') try: with codecs.open(orig_file, 'r', encoding='utf-8') as fi,\ codecs.open(tmp_file, 'w', encoding='utf-8') as fo: for line in fi: if re.match('^\[last:[0-9]+\]$', line): line = tmp+'[last:{}]\n'.format(history['items'][-1]['id']) fo.write(line) os.remove(orig_file) os.rename(tmp_file, orig_file) except Exception: os.remove(tmp_file) else: f.write('[last:{}]\n'.format(history['items'][-1]['id'])) f.close() print() if attachments['audio_messages']: at_folder = os.path.join(folder, fn) af = os.path.join(at_folder, 'Голосовые') os.makedirs(af, exist_ok=True) print(' [сохранение голосовых сообщений]') print(' .../{}'.format(len(attachments['audio_messages'])), end='\r') with Pool(dmp._settings['POOL_PROCESSES']) as pool: res = pool.starmap(dmp._download, zip(itertools.repeat(dmp.__class__), attachments['audio_messages'], itertools.repeat(af))) print('\x1b[2K {}/{} (total: {})'.format(sum(filter(None, res)), len(attachments['audio_messages']), len(next(os.walk(af))[2]))) if dmp._settings['SAVE_DIALOG_ATTACHMENTS']: at_folder = os.path.join(folder, fn) os.makedirs(at_folder, exist_ok=True) if attachments['photos']: af = os.path.join(at_folder, 'Фото') os.makedirs(af, exist_ok=True) print(' [сохранение фото]') print(' .../{}'.format(len(attachments['photos'])), end='\r') with Pool(dmp._settings['POOL_PROCESSES']) as pool: res = pool.starmap(dmp._download, zip(itertools.repeat(dmp.__class__), attachments['photos'], itertools.repeat(af))) print('\x1b[2K {}/{} (total: {})'.format(sum(filter(None, res)), len(attachments['photos']), len(next(os.walk(af))[2]))) if attachments['video_ids']: af = os.path.join(at_folder, 'Видео') os.makedirs(af, exist_ok=True) videos = dmp._vk_tools.get_all( method='video.get', max_count=200, values={ 'videos': ','.join(attachments['video_ids']), 'extended': 1 } ) print(' [сохранение видео]') print(' .../{}'.format(len(videos['items'])), end='\r') try: with Pool(dmp._AVAILABLE_THREADS if dmp._settings['LIMIT_VIDEO_PROCESSES'] else dmp._settings['POOL_PROCESSES']) as pool: res = pool.starmap(dmp._download_video, zip(itertools.repeat(dmp.__class__), videos['items'], itertools.repeat(af))) print('\x1b[2K {}/{} (total: {})'.format(sum(filter(None, res)), len(videos['items']), len(next(os.walk(af))[2]))) except MaybeEncodingError: print('\x1b[2K ???/{} (total: {})'.format(len(videos['items']), len(next(os.walk(af))[2]))) if attachments['docs']: af = os.path.join(at_folder, 'Документы') os.makedirs(af, exist_ok=True) print(' [сохранение документов]') print(' .../{}'.format(len(attachments['docs'])), end='\r') with Pool(dmp._settings['POOL_PROCESSES']) as pool: res = pool.starmap(dmp._download, zip(itertools.repeat(dmp.__class__), attachments['docs'], itertools.repeat(af))) print('\x1b[2K {}/{} (total: {})'.format(sum(filter(None, res)), len(attachments['docs']), len(next(os.walk(af))[2]))) with open('users.json', 'w', encoding='utf-8') as f: json.dump(users, f, ensure_ascii=False, indent=4)
def qsd_solve_two_systems(H1, H2, psi0, tspan, L1s, L2s, R, eps, n, sdeint_method, trans_phase=None, obsq=None, normalize_state=True, downsample=1, ops_on_whole_space=False, multiprocessing=False, ntraj=1, processes=8, seed=1, implicit_type=None): ''' Args: H1: N1xN1 csr matrix, dtype = complex128 Hamiltonian for system 1. H2: N2xN2 csr matrix, dtype = complex128 Hamiltonian for system 2. psi0: Nx1 csr matrix, dtype = complex128 input state. tspan: numpy array, dtype = float Time series of some length T. L1s: list of N1xN1 csr matrices, dtype = complex128 System-environment interaction terms (Lindblad terms) for system 1. L2s: list of N2xN2 csr matrices, dtype = complex128 System-environment interaction terms (Lindblad terms) for system 2. R: float reflectivity used to separate the classical versus coherent transmission eps: float The multiplier by which the classical state displaces the coherent state n: float Scalar to multiply the measurement feedback noise sdeint_method (Optional) SDE solver method: Which SDE solver to use. Default is sdeint.itoSRI2. obsq (optional): list of NxN csr matrices, dtype = complex128 Observables for which to generate trajectory information. Default value is None (no observables). normalize_state (optional): Boolean Whether to numerically normalize the equation at each step. downsample: optional, integer to indicate how frequently to save values. ops_on_whole_space (optional): Boolean whether the Given L and H operators have been defined on the whole space or individual subspaces. multiprocessing (optional): Boolean Whether or not to use multiprocessing ntraj (optional): int number of trajectories. processes (optional): int number of processes. If processes == 1, don't use multiprocessing. seed (optional): int Seed for random noise. Returns: A dictionary with the following keys and values: ['psis'] -> np.array with shape = (ntraj,T,N) and dtype = complex128 ['obsq_expects'] -> np.array with shape = (ntraj,T,len(obsq)) and dtype = complex128 ''' ## Check dimensions of inputs. These should be consistent with qutip Qobj.data. N = psi0.shape[0] if psi0.shape[1] != 1: raise ValueError("psi0 should have dimensions Nx1.") ## Determine seeds for the SDEs if type(seed) is list or type(seed) is tuple: assert len(seed) == ntraj seeds = seed elif type(seed) is int or seed is None: np.random.seed(seed) seeds = [np.random.randint(1000000) for _ in range(ntraj)] else: raise ValueError("Unknown seed type.") T_init = time() psi0_arr = np.asarray(psi0.todense()).T[0] x0 = np.concatenate([psi0_arr.real, psi0_arr.imag]) drift_diffusion = drift_diffusion_two_systems_holder( H1, H2, L1s, L2s, R, eps, n, tspan, trans_phase=trans_phase, ops_on_whole_space=ops_on_whole_space) f = complex_to_real_vector(drift_diffusion.f_normalized) G = complex_to_real_matrix(drift_diffusion.G_normalized) def SDE_helper(args, s): '''Let's make different wiener increments for each trajectory''' m = 2 * (len(L1s) + len(L2s)) N = len(tspan) - 1 h = (tspan[N - 1] - tspan[0]) / (N - 1) np.random.seed(s) dW = np.random.normal(0.0, np.sqrt(h), (N, m)) / np.sqrt(2.) dW_with_conj = insert_conj(dW, port=1) if sdeint_method is sdeint.itoQuasiImplicitEuler: implicit_ports = [1, 2, int(m / 2 + 1), int(m / 2) + 2] out = sdeint_method(*args, dW=dW_with_conj, normalized=normalize_state, downsample=downsample, implicit_ports=implicit_ports) return out if implicit_type is None: out = sdeint_method(*args, dW=dW_with_conj, normalized=normalize_state, downsample=downsample) return out try: out = sdeint_method(*args, dW=dW_with_conj, normalized=normalize_state, downsample=downsample, implicit_type=implicit_type) except TypeError: print("Not an implicit method. implicit_type argument ignored.") out = sdeint_method(*args, dW=dW_with_conj, normalized=normalize_state, downsample=downsample) return out ## simulation parameters params = [[f, G, x0, tspan]] * ntraj if multiprocessing: pool = Pool(processes=processes, ) outputs = pool.map(lambda z: SDE_helper(z[0], z[1]), zip(params, seeds)) else: outputs = [SDE_helper(p, s) for p, s in zip(params, seeds)] try: xs = np.array([o["trajectory"] for o in outputs]) except KeyError: print("Warning: trajectory not returned by SDE method!") try: norms = np.array([o["norms"] for o in outputs]) except KeyError: print("Warning: norms not returned by SDE method!") norms = None print("done running simulation!") psis = xs[:, :, :int(len(x0) / 2)] + 1j * xs[:, :, int(len(x0) / 2):] # Obtaining expectations of observables obsq_expects = (np.asarray([[ np.asarray([ob.dot(psi).dot(psi.conj()) for ob in obsq]) for psi in psis[i] ] for i in range(ntraj)]) if not obsq is None else None) T_fin = time() print("Run time: ", T_fin - T_init, " seconds.") return { "psis": psis, "obsq_expects": obsq_expects, "seeds": seeds, "norms": norms }
def dump_fave_posts(dmp): """Вложения понравившихся постов (фото, видео, документы) dmp: Dumper object """ folder_photo = os.path.join('dump', 'photo', 'Понравившиеся') os.makedirs(folder_photo, exist_ok=True) folder_video = os.path.join('dump', 'video', 'Понравившиеся') os.makedirs(folder_video, exist_ok=True) folder_docs = os.path.join('dump', 'docs', 'Понравившиеся') os.makedirs(folder_docs, exist_ok=True) print('[получение постов]') posts = get_fave(dmp._vk, 'posts') # from pprint import pprint # print(type(posts)) # print(type(posts['items'][0])) photo = [] video = [] docs = [] for p in posts['items']: if 'attachments' in p: for at in p['attachments']: if at['type'] == 'photo': at['photo']['sizes'].sort( key=itemgetter('width', 'height')) obj = { 'url': at['photo']['sizes'][-1]['url'], 'prefix': '{}_{}'.format(p['owner_id'], p['id']) } if 'access_key' in at['photo']: obj['access_key'] = at['photo']['access_key'] photo.append(obj) elif at['type'] == 'video': video.append('{oid}_{id}{access_key}'.format( oid=at['video']['owner_id'], id=at['video']['id'], access_key='_' + (at['video'].get('access_key') or ''))) elif at['type'] == 'doc': obj = { 'url': at['doc']['url'], 'prefix': '{}_{}'.format(p['owner_id'], p['id']), 'name': '{}_{}'.format(at['doc']['title'], at['doc']['id']), 'ext': at['doc']['ext'] } if 'access_key' in at['doc']: obj['access_key'] = at['doc']['access_key'] docs.append(obj) if video: video = dmp._vk_tools.get_all(method='video.get', max_count=200, values={ 'videos': ','.join(video), 'extended': 1 }) print('Сохранение ({} вложений из {} постов):'.format( sum([len(photo), len(video), len(docs)]), len(posts['items']))) if photo: print(' [фото ({})]'.format(len(photo))) with Pool(dmp._settings['POOL_PROCESSES']) as pool: pool.starmap( dmp._download, zip(itertools.repeat(dmp.__class__), photo, itertools.repeat(folder_photo))) try: if video: print(' [видео ({})]'.format(len(video['items']))) with Pool(dmp._settings['POOL_PROCESSES'] if not dmp._settings['LIMIT_VIDEO_PROCESSES'] else dmp. _AVAILABLE_THREADS) as pool: pool.starmap( dmp._download_video, zip(itertools.repeat(dmp.__class__), video['items'], itertools.repeat(folder_video))) except MaybeEncodingError: pass if docs: print(' [документы ({})]'.format(len(docs))) with Pool(dmp._settings['POOL_PROCESSES']) as pool: pool.starmap( dmp._download, zip(itertools.repeat(dmp.__class__), docs, itertools.repeat(folder_docs)))
} for rob in study[result_key]: rob.update(study_info) writer.writerow(rob) def convert_keys_to_string(dictionary): """Recursively converts dictionary keys to strings.""" if not isinstance(dictionary, dict): return dictionary return dict( (str(k), convert_keys_to_string(v)) for k, v in dictionary.items()) if __name__ == '__main__': pool = Pool(processes=8) reviews = [] for subdir, dirs, files in os.walk(REVIEWS_DIR): for file in files: filepath = os.path.join(subdir, file) if filepath.endswith('.rm5'): reviews.append(filepath) reviews = [filepath for filepath in reviews if 'publication' in filepath] all_studies = pool.map(extract_review_info, reviews, chunksize=8) rob_headers = [ 'file', 'id', 'modified', 'result', 'result_description', 'rob_name', 'rob_id', 'rob_description', 'group_id', 'group_name' ] write_results_to_csv('robs.csv', rob_headers, 'robs')
setting=actual_setting) create_parent_dir_if_not_exists(viz_fp) try: if add_label_text: label_text = str(structure) else: label_text = None viz = scoremap_overlay_on(bg='original', stack=stack, sec=sec, structure=structure, downscale=downscale, label_text=label_text, setting=actual_setting) imsave(viz_fp, img_as_ubyte(viz)) upload_from_ec2_to_s3(viz_fp) except Exception as e: sys.stderr.write('%s\n' % e) return pool = Pool(NUM_CORES) pool.map(save_scoremap, all_known_structures) pool.close() pool.join() sys.stderr.write('Visualize scoremaps: %.2f seconds.\n' % (time.time() - t)) # 7s for one structure, one section, single process # 20s for all structures, one section, 8 processes
def fit(self, inputData, outputData, transientTime=0, verbose=0): rank = len(inputData.shape) - 1 if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1: raise ValueError( "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format( self.n_inputDimensions)) # reshape the input so that it has the shape (timeseries, time, input_dimension^n) if rank == self.n_inputDimensions: inputData = inputData.reshape(1, *inputData.shape) outputData = outputData.reshape(1, *outputData.shape) else: # modify rank again rank -= 1 partialLength = (inputData.shape[1] - transientTime) totalLength = inputData.shape[0] * partialLength timeseriesCount = inputData.shape[0] manager = Manager() fitQueue = manager.Queue() modifiedInputData = self._embedInputData(inputData) self.sharedNamespace.transientTime = transientTime self.sharedNamespace.partialLength = partialLength self.sharedNamespace.totalLength = totalLength self.sharedNamespace.timeseriesCount = timeseriesCount jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]), axis=rank).reshape(-1, rank).tolist() nJobs = len(jobs) self.resetState() iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self) pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self]) pool.map_async(self._fitProcess, iterator, chunksize=16) def _processPoolWorkerResults(): nJobsDone = 0 if verbose > 0: bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001) bar.update(0) while nJobsDone < nJobs: data = fitQueue.get() # result of fitting indices, x, WOut = data id = self._uniqueIDFromIndices(indices) if WOut is None: import sys print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr) # store WOut if self._averageOutputWeights: if WOut is not None: self._WOut += WOut / np.prod(self.inputShape) else: self._WOuts[id] = WOut # store x self._xs[id] = x nJobsDone += 1 if verbose > 0: bar.update(nJobsDone) if verbose > 1: print(nJobsDone) if verbose > 0: bar.finish() _processPoolWorkerResults() pool.close()
continue new_mask = odm.dense_mask_to_sparse_mask(binary_mask,category_ids,default_label=255) base_name = wmlu.base_name(full_path)+".png" save_path = os.path.join(save_dir,base_name) new_mask = new_mask.astype(np.uint8) if os.path.exists(save_path): print(f"WARNING: File {save_path} exists.") cv2.imwrite(save_path,new_mask) sys.stdout.write(f"\r{i}") if __name__ == "__main__": data_dir ="/home/wj/ai/mldata/mapillary_vistas/" save_dir = os.path.join(data_dir,'boe_labels_validation') name_to_id_dict = update_name_to_id(name_to_id_dict,data_dir) idxs = list(range(0,18049,50)) r_idxs = [] for i in range(len(idxs)-1): r_idxs.append([idxs[i],idxs[i+1]]) wmlu.show_list(r_idxs) pool = Pool(10) def fun(d): trans_data(data_dir,save_dir,d[0],d[1]) res = list(pool.map(fun,r_idxs)) pool.close() pool.join() print(res) #list(map(fun,r_idxs))
def integrate(self, func, min_iter=10, max_iter=20, var_thresh=0.0, max_err=10, neff=float('inf'), nmax=None, progress=False, epoch=None, verbose=True): ''' Evaluate the integral Parameters ---------- func : function Integrand function min_iter : int Minimum number of integrator iterations max_iter : int Maximum number of integrator iterations var_thresh : float Variance threshold for terminating integration max_err : int Maximum number of errors to catch before terminating integration neff : float Effective samples threshold for terminating integration nmax : int Maximum number of samples to draw progress : bool Print GMM parameters each iteration ''' err_count = 0 cumulative_eval_time = 0 if nmax is None: nmax = max_iter * self.n while self.iterations < max_iter and self.ntotal < nmax and self.eff_samp < neff: # print('Iteration:', self.iterations) if err_count >= max_err: print('Exiting due to errors...') break try: self._sample() except KeyboardInterrupt: print('KeyboardInterrupt, exiting...') break except Exception as e: print(traceback.format_exc()) print('Error sampling, resetting...') err_count += 1 self._reset() continue t1 = time.time() if self.proc_count is None: self.value_array = func(np.copy(self.sample_array)) else: split_samples = np.array_split(self.sample_array, self.proc_count) p = Pool(self.proc_count) self.value_array = np.concatenate(p.map(func, split_samples), axis=0) p.close() cumulative_eval_time += time.time() - t1 self._calculate_prior() self._calculate_results() self.iterations += 1 self.ntotal += self.n if self.iterations >= min_iter and self.var < var_thresh: break try: self._train() except KeyboardInterrupt: print('KeyboardInterrupt, exiting...') break except Exception as e: print(traceback.format_exc()) print('Error training, resetting...') err_count += 1 self._reset() if self.user_func is not None: self.user_func(self) if progress: for k in self.gmm_dict: if self.gmm_dict[k] is not None: self.gmm_dict[k].print_params() if epoch is not None and self.iterations % epoch == 0: self._reset() if verbose: # Standard mcsampler message, to monitor convergence print(" : {} {} {} {} {} ".format( (self.iterations - 1) * self.n, self.eff_samp, np.sqrt(2 * np.max(self.cumulative_values)), np.sqrt(2 * np.log(self.integral)), "-")) print('cumulative eval time: ', cumulative_eval_time) print('integrator iterations: ', self.iterations)
big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n n += labelmap.max() labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg) bp.pack_ndarray_file(big_labelmap, labelmap_fp) # for tile_i in range(12): # execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d.tif' % \ # dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i)) # execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d_labelmap_cellprofiler.bp' % \ # dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i)) # Generate labelmap viz t = time.time() viz = img_as_ubyte(label2rgb(big_labelmap, bg_label=0, bg_color=(0, 0, 0))) cv2.imwrite(os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.png' % dict(alg=alg), viz); sys.stderr.write('Generate labelmap viz: %.2f seconds.\n' % (time.time()-t)) # 60s t = time.time() pool = Pool(12) pool.map(detect_cells, range(first_sec, last_sec+1)) pool.close() pool.join() sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
"--image_dir", required=True, help="path to the input dir") ap.add_argument("-p", "--plot", type=bool, default=False, required=False, help="plot results") ap.add_argument("-ps", "--pool_size", type=int, default=1, required=False, help="pool size for multiprocessing") args = vars(ap.parse_args()) images = glob.glob(args["image_dir"] + '*') plot = bool(args["plot"]) pool_size = int(args["pool_size"]) print(args["image_dir"], plot) pool = Pool(pool_size) pool_outputs = pool.map(partial(get_boxes, config=config, plot=plot), images[:]) pool.close() pool.join() pool.terminate() for output in pool_outputs: cv2.imwrite(output[2].replace('\\in', '\\out'), output[3])
def balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10, min_count=0, ignore_diags=1, tol=1e-5, max_iters=200): """ Cooler contact matrix balancing. Parameters ---------- cool_uri : str URI of cooler group. nproc : int Number of processes. (Default: 1) """ cool_path, group_path = parse_cooler_uri(cool_uri) # pre-check the weight column with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if 'weight' in grp['bins']: del grp['bins']['weight'] # Overwrite the weight column log.info('Balancing {0}'.format(cool_uri)) clr = Cooler(cool_uri) try: if nproc > 1: pool = Pool(nproc) map_ = pool.imap_unordered else: map_ = map if clr.info['metadata']['onlyIntra']=='True': onlyIntra = True else: onlyIntra = False bias, stats = ice.iterative_correction( clr, chunksize=chunksize, cis_only=onlyIntra, trans_only=False, tol=tol, min_nnz=min_nnz, min_count=min_count, blacklist=None, mad_max=mad_max, max_iters=max_iters, ignore_diags=ignore_diags, rescale_marginals=True, use_lock=False, map=map_) finally: if nproc > 1: pool.close() if not stats['converged']: log.error('Iteration limit reached without convergence') log.error('Storing final result. Check log to assess convergence.') with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] # add the bias column to the file h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('weight', data=bias, **h5opts) grp['bins']['weight'].attrs.update(stats)
benchmark = Benchmark() if args.benchmark == 'CEC13': func_num = 28 funcs = [benchmark.cec13[_] for _ in range(func_num)] elif args.benchmark == 'CEC17': func_num = 30 funcs = [benchmark.cec17[_] for _ in range(func_num)] res = np.empty((func_num, args.repetition)) cst = np.empty((func_num, args.repetition)) for i in range(func_num): if args.multiprocess: p = Pool(5) results = p.map(single_opt, [(funcs, i, args)]*args.repetition) p.close() p.join() for idx, tmp in enumerate(results): res[i, idx] = tmp[0] cst[i, idx] = tmp[1] else: for j in range(args.repetition): res[i, j], cst[i, j] = single_opt((funcs, i, args)) with open("logs/{}_{}_{}D.pkl".format(args.alg_name, args.benchmark, args.dim), "wb") as f: pkl.dump({"res": res, "cst": cst}, f)
def resample(settings, partial=False, full_cvs=False): """ Resample each shooting point in each thread with different CV definitions to produce new output files with extant aimless shooting data. This function also assesses decorrelation times and produces one or more decorrelated output files. If and only if settings.information_error_checking == True, decorrelated files are produced at each settings.information_error_freq increment. In this case, if partial == True, decorrelation will only be assessed for data lengths absent from the info_err.out file in the working directory. Parameters ---------- settings : argparse.Namespace Settings namespace object partial : bool If True, reads the info_err.out file and only builds new decorrelated output files where the corresponding lines are missing from that file. If partial == False, decorrelation is assessed for every valid data length. Has no effect if not settings.information_error_checking. full_cvs : bool If True, also resamples as_full_cvs.out using every prod trajectory in the working directory. Returns ------- None """ # todo: test this more thoroughly using a dummy thread and a manual decorrelation time calculation using different software # This function is sometimes called from outside the working directory, so make sure we're there os.chdir(settings.working_directory) # Remove pre-existing output files if any, initialize new one open(settings.working_directory + '/as_raw_resample.out', 'w').close() if settings.information_error_checking: open(settings.working_directory + '/as_raw_timestamped.out', 'w').close() # Load in allthreads from restart.pkl try: allthreads = pickle.load(open('restart.pkl', 'rb')) except FileNotFoundError: raise FileNotFoundError( 'resample = True requires restart.pkl, but could not find one in working directory: ' + settings.working_directory) # Open files for writing outside loop (much faster than opening/closing for each write) f1 = open(settings.working_directory + '/as_raw_resample.out', 'a') if settings.information_error_checking: f2 = open(settings.working_directory + '/as_raw_timestamped.out', 'a') # Iterate through each thread's history.init_coords list and obtain CV values as needed for thread in allthreads: thread.this_cvs_list = [ ] # initialize full nested list of CV values for this thread thread.cvs_for_later = [ ] # need this one with empty lists for failed moves, for indexing reasons for step_index in range(len(thread.history.prod_results)): if thread.history.prod_results[step_index][0] in ['fwd', 'bwd']: if thread.history.prod_results[step_index][0] == 'fwd': this_basin = 'B' else: # 'bwd' this_basin = 'A' # Get CVs for this shooting point # todo: a bit sloppy... can I clean this up? try: if not os.path.exists( thread.history.init_coords[step_index][0]): warnings.warn( 'attempted to resample ' + thread.history.init_coords[step_index][0] + ' but no such ' 'file exists in the working directory\nSkipping and continuing', RuntimeWarning) thread.cvs_for_later.append([]) continue # skip to next step_index except IndexError: # getting cv's failed (maybe corrupt coordinate file) so consider this step failed thread.cvs_for_later.append([]) continue # skip to next step_index try: this_cvs = get_cvs( thread.history.init_coords[step_index][0], settings) except IndexError: # getting cv's failed (maybe corrupt coordinate file) so consider this step failed thread.cvs_for_later.append([]) continue # skip to next step_index # Write CVs to as_raw_resample.out f1.write(this_basin + ' <- ' + this_cvs + '\n') f1.flush() if settings.information_error_checking: f2.write( str(thread.history.timestamps[step_index]) + ' ' + this_basin + ' <- ' + this_cvs + '\n') f2.flush() # Append this_cvs to running list for evaluating decorrelation time thread.this_cvs_list.append( [[float(item) for item in this_cvs.split(' ')], thread.history.timestamps[step_index]]) thread.cvs_for_later.append( [float(item) for item in this_cvs.split(' ')]) else: thread.cvs_for_later.append([]) # Close files just to be sure f1.close() if settings.information_error_checking: f2.close() if settings.information_error_checking: # sort timestamped output file shutil.copy( settings.working_directory + '/as_raw_timestamped.out', settings.working_directory + '/as_raw_timestamped_copy.out') open(settings.working_directory + '/as_raw_timestamped.out', 'w').close() with open(settings.working_directory + '/as_raw_timestamped_copy.out', 'r') as f: for line in sorted(f): open(settings.working_directory + '/as_raw_timestamped.out', 'a').write(line) open(settings.working_directory + '/as_raw_timestamped.out', 'a').close() os.remove(settings.working_directory + '/as_raw_timestamped_copy.out') # Construct list of data lengths to perform decorrelation for if settings.information_error_checking: if not partial: lengths = [ leng for leng in range( settings.information_error_freq, len( open( settings.working_directory + '/as_raw_timestamped.out', 'r').readlines()) + 1, settings.information_error_freq) ] else: # if partial lengths = [ leng for leng in range( settings.information_error_freq, len( open( settings.working_directory + '/as_raw_timestamped.out', 'r').readlines()) + 1, settings.information_error_freq) if not leng in [ int(line.split(' ')[0]) for line in open( settings.working_directory + '/info_err.out', 'r').readlines() ] ] pattern = re.compile( '[0-9]+') # pattern for reading out timestamp from string else: lengths = [ len( open(settings.working_directory + '/as_raw_resample.out', 'r').readlines()) ] pattern = None # Assess decorrelation and write as_decorr.out for length in lengths: if settings.information_error_checking: suffix = '_' + str( length ) # only use-case with multiple lengths, so this keeps them from stepping on one another's toes cutoff_timestamp = int( pattern.findall( open( settings.working_directory + '/as_raw_timestamped.out', 'r').readlines()[length - 1])[0]) else: cutoff_timestamp = math.inf suffix = '' open(settings.working_directory + '/as_decorr' + suffix + '.out', 'w').close() f3 = open(settings.working_directory + '/as_decorr' + suffix + '.out', 'a') for thread in allthreads: if thread.this_cvs_list: # if there were any 'fwd' or 'bwd' results in this thread mapped = list( map( list, zip(*[ item[0] for item in thread.this_cvs_list if item[1] <= cutoff_timestamp ]))) # list of lists of values of each CV slowest_lag = -1 # initialize running tally of slowest autocorrelation time among CVs in this thread if settings.include_qdot: ndims = len(thread.this_cvs_list[0] ) / 2 # number of non-rate-of-change CVs if not ndims % 1 == 0: raise ValueError( 'include_qdot = True, but an odd number of dimensions were found in the ' 'threads in restart.pkl, so they can\'t contain inertial terms.' ) ndims = int(ndims) else: ndims = len(thread.this_cvs_list[0]) for dim_index in range(ndims): slowest_lag = -1 if mapped: this_cv = mapped[dim_index] if len(this_cv) > 1: this_autocorr = stattools.acf(this_cv, nlags=len(this_cv) - 1, fft=True) for lag in range(len(this_cv) - 1): corr = this_autocorr[lag] if abs(corr) <= 1.96 / numpy.sqrt(len(this_cv)): slowest_lag = lag + 1 break if slowest_lag > 0: # only proceed to writing decorrelated output file if a slowest_lag was found # Write the same way as to as_raw_resample.out above, but starting the range at slowest_lag for step_index in range(slowest_lag, len(thread.history.prod_results)): if thread.history.prod_results[step_index][0] in [ 'fwd', 'bwd' ] and thread.history.timestamps[ step_index] <= cutoff_timestamp: if thread.history.prod_results[step_index][ 0] == 'fwd': this_basin = 'B' else: # 'bwd' this_basin = 'A' # Get CVs for this shooting point and write them to the decorrelated output file if thread.cvs_for_later[step_index]: this_cvs = thread.cvs_for_later[ step_index] # retrieve CVs from last evaluation f3.write( this_basin + ' <- ' + ' '.join([str(item) for item in this_cvs]) + '\n') f3.close() # Move resample raw output file to take its place as the only raw output file shutil.move(settings.working_directory + '/as_raw_resample.out', settings.working_directory + '/as_raw.out') # Implement full_cvs if full_cvs: open(settings.working_directory + '/as_full_cvs.out', 'w').close() temp_settings = copy.deepcopy(settings) temp_settings.include_qdot = False # never want to include_qdot in this upcoming call to get_cvs try: affinity = len(os.sched_getaffinity(0)) except AttributeError: # os.sched_getaffinity raises AttributeError on non-UNIX systems. affinity = 1 if affinity == 1: with open(settings.working_directory + '/as_full_cvs.out', 'a') as f: for thread in allthreads: for step_index in range( min([ len(thread.history.prod_results), len(thread.history.prod_trajs) ]) ): # just in case one got an extra write in over the other if thread.history.prod_results[step_index] in [[ 'fwd', 'bwd' ], ['bwd', 'fwd']]: # if step accepted for job_index in range(2): if os.path.exists( thread.history.prod_trajs[step_index] [job_index]): f.write( get_cvs( thread.history. prod_trajs[step_index][job_index], temp_settings, False, 'all') + '\n') else: # affinity > 1 # Map partial_full_cvs calls to available processes with Pool(affinity) as p: p.starmap( partial_full_cvs, zip(allthreads, [ 'partial_full_cvs_' + str(thread_index) + '.out' for thread_index in range(len(allthreads)) ], itertools.repeat(temp_settings))) # Finally, combine the partial files into the full file with open(settings.working_directory + '/as_full_cvs.out', 'w') as outfile: for fname in [ 'partial_full_cvs_' + str(thread_index) + '.out' for thread_index in range(len(allthreads)) ]: with open(fname) as infile: for line in infile: if line: # skip blank lines outfile.write(line) os.remove(fname)
raise # input_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=5, version=version, resol='raw') out_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=2, resol=resol, version=version) print 'out_dir:', out_dir # script = os.path.join(REPO_DIR, 'preprocess', 'warp_crop_IM_v3.py') # ! rm -rf {out_dir} create_if_not_exists(out_dir) t = time.time() pool = Pool(8) _ = pool.map( lambda img_name: crop(stack=stack, img_name=img_name, version=version, resol=resol, x=x, y=y, w=w, h=h), metadata_cache['valid_filenames'][stack]) pool.close() pool.join() # for img_name in metadata_cache['valid_filenames'][stack]: # f(stack=stack, img_name=img_name, version=version, resol=resol,
def evaluate(self, split_name: str, ds: torch.utils.data.DataLoader): # Prepare data saving: flag_filepath_format = os.path.join(self.eval_dirpath, split_name, "{}.flag") # Loading model self.load_checkpoint() self.model.eval() # Create pool for multiprocessing pool = None if not self.config["eval_params"]["patch_size"]: # If single image is not being split up, then a pool to process each sample in the batch makes sense pool = Pool(processes=self.config["num_workers"]) compute_polygonization = self.config["eval_params"]["save_individual_outputs"]["poly_shapefile"] or \ self.config["eval_params"]["save_individual_outputs"]["poly_geojson"] or \ self.config["eval_params"]["save_individual_outputs"]["poly_viz"] or \ self.config["eval_params"]["save_aggregated_outputs"]["poly_coco"] # Saving individual outputs to disk: save_individual_outputs = True in self.config["eval_params"][ "save_individual_outputs"].values() saver_async = None if save_individual_outputs: save_outputs_partial = partial( save_utils.save_outputs, config=self.config, eval_dirpath=self.eval_dirpath, split_name=split_name, flag_filepath_format=flag_filepath_format) saver_async = async_utils.Async(save_outputs_partial) saver_async.start() # Saving aggregated outputs save_aggregated_outputs = True in self.config["eval_params"][ "save_aggregated_outputs"].values() tile_data_list = [] if self.gpu == 0: tile_iterator = tqdm(ds, desc="Eval {}: ".format(split_name), leave=True) else: tile_iterator = ds for tile_i, tile_data in enumerate(tile_iterator): # --- Inference, add result to tile_data_list if self.config["eval_params"]["patch_size"] is not None: # Cut image into patches for inference inference.inference_with_patching(self.config, self.model, tile_data) else: # Feed images as-is to the model inference.inference_no_patching(self.config, self.model, tile_data) tile_data_list.append(tile_data) # --- Accumulate batches into tile_data_list until capacity is reached (or this is the last batch) if self.config["eval_params"]["batch_size_mult"] <= len(tile_data_list)\ or tile_i == len(tile_iterator) - 1: # Concat tensors of tile_data_list accumulated_tile_data = {} for key in tile_data_list[0].keys(): if isinstance(tile_data_list[0][key], list): accumulated_tile_data[key] = [ item for _tile_data in tile_data_list for item in _tile_data[key] ] elif isinstance(tile_data_list[0][key], torch.Tensor): accumulated_tile_data[key] = torch.cat( [_tile_data[key] for _tile_data in tile_data_list], dim=0) else: raise TypeError( f"Type {type(tile_data_list[0][key])} is not handled!" ) tile_data_list = [] # Empty tile_data_list else: # tile_data_list is not full yet, continue running inference... continue # --- Polygonize if compute_polygonization: crossfield = accumulated_tile_data[ "crossfield"] if "crossfield" in accumulated_tile_data else None accumulated_tile_data["polygons"], accumulated_tile_data[ "polygon_probs"] = polygonize.polygonize( self.config["polygonize_params"], accumulated_tile_data["seg"], crossfield_batch=crossfield, pool=pool) # --- Save output if self.config["eval_params"]["save_individual_outputs"]["seg_mask"] or \ self.config["eval_params"]["save_aggregated_outputs"]["seg_coco"]: # Take seg_interior: seg_pred_mask = self.config["eval_params"][ "seg_threshold"] < accumulated_tile_data["seg"][:, 0, ...] accumulated_tile_data["seg_mask"] = seg_pred_mask accumulated_tile_data = local_utils.batch_to_cpu( accumulated_tile_data) sample_list = local_utils.split_batch(accumulated_tile_data) # Save individual outputs: if save_individual_outputs: for sample in sample_list: saver_async.add_work(sample) # Store aggregated outputs: if save_aggregated_outputs: self.shared_dict["name_list"].extend( accumulated_tile_data["name"]) if self.config["eval_params"]["save_aggregated_outputs"][ "stats"]: y_pred = accumulated_tile_data["seg"][:, 0, ...].cpu() if "gt_mask" in accumulated_tile_data: y_true = accumulated_tile_data["gt_mask"][:, 0, ...] elif "gt_polygons_image" in accumulated_tile_data: y_true = accumulated_tile_data[ "gt_polygons_image"][:, 0, ...] else: raise ValueError( "Either gt_mask or gt_polygons_image should be in accumulated_tile_data" ) iou = measures.iou( y_pred.reshape(y_pred.shape[0], -1), y_true.reshape(y_true.shape[0], -1), threshold=self.config["eval_params"]["seg_threshold"]) self.shared_dict["iou_list"].extend(iou.cpu().numpy()) if self.config["eval_params"]["save_aggregated_outputs"][ "seg_coco"]: for sample in sample_list: annotations = save_utils.seg_coco(sample) self.shared_dict["seg_coco_list"].extend(annotations) if self.config["eval_params"]["save_aggregated_outputs"][ "poly_coco"]: for sample in sample_list: annotations = save_utils.poly_coco( sample["polygons"], sample["polygon_probs"], sample["image_id"].item()) self.shared_dict["poly_coco_list"].append( annotations ) # annotations could be a dict, or a list # END of loop over samples # Save aggregated results if save_aggregated_outputs: self.barrier.wait( ) # Wait on all processes so that shared_dict is synchronized. if self.gpu == 0: if self.config["eval_params"]["save_aggregated_outputs"][ "stats"]: print("Start saving stats:") # Save sample_stats in CSV: t1 = time.time() stats_filepath = os.path.join( self.eval_dirpath, "{}.stats.csv".format(split_name)) stats_file = open(stats_filepath, "w") fnames = ["name", "iou"] writer = csv.DictWriter(stats_file, fieldnames=fnames) writer.writeheader() for name, iou in sorted(zip(self.shared_dict["name_list"], self.shared_dict["iou_list"]), key=lambda pair: pair[0]): writer.writerow({"name": name, "iou": iou}) stats_file.close() print(f"Finished in {time.time() - t1:02}s") if self.config["eval_params"]["save_aggregated_outputs"][ "seg_coco"]: print("Start saving seg_coco:") t1 = time.time() seg_coco_filepath = os.path.join( self.eval_dirpath, "{}.annotation.seg.json".format(split_name)) python_utils.save_json( seg_coco_filepath, list(self.shared_dict["seg_coco_list"])) print(f"Finished in {time.time() - t1:02}s") if self.config["eval_params"]["save_aggregated_outputs"][ "poly_coco"]: print("Start saving poly_coco:") poly_coco_base_filepath = os.path.join( self.eval_dirpath, f"{split_name}.annotation.poly") t1 = time.time() save_utils.save_poly_coco( self.shared_dict["poly_coco_list"], poly_coco_base_filepath) print(f"Finished in {time.time() - t1:02}s") # Sync point of individual outputs if save_individual_outputs: print_utils.print_info( f"GPU {self.gpu} -> INFO: Finishing saving individual outputs." ) saver_async.join() self.barrier.wait( ) # Wait on all processes so that all saver_asyncs are finished
for iy, y0 in enumerate(np.arange(0, img_h, 5000)): for ix, x0 in enumerate(np.arange(0, img_w, 5000)): origins.append((x0, y0)) alg = 'cellprofiler' big_labelmap = np.zeros((img_h, img_w), dtype=np.int64) n = 0 for i, input_fp in enumerate(input_fps): prefix = os.path.splitext(input_fp)[0] labelmap = labelmap_alltiles[i].astype(np.int64) # astype(np.int64) is important, otherwise results in negative label values. x0, y0 = origins[i] big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n n += labelmap.max() labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg) bp.pack_ndarray_file(big_labelmap, labelmap_fp) upload_to_s3(labelmap_fp) for fp in input_fps: execute_command('rm ' + fp) t = time.time() pool = Pool(NUM_CORES/2) pool.map(detect_cells, filenames) pool.close() pool.join() sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
def aggregate(self, feature_files): """ This aggregator is a front-end to the pymir3 stats module. The statistics that must be computed are found in the simple_aggregation key in the experiment file. :param feature_files: a list of FeatureTrack filenames :type feature_files: list[str] :return: :rtype: None .. note:: These keys are expected to be set in the experiment file: * ['simple_aggregation']['mean'] * ['simple_aggregation']['delta'] * ['simple_aggregation']['variance'] * ['simple_aggregation']['acceleration'] * ['simple_aggregation']['slope'] * ['simple_aggregation']['limits'] * ['simple_aggregation']['csv'] * ['simple_aggregation']['normalize'] * ['general']['scratch_directory'] * ['feature_aggregation']['aggregated_output'] """ features = load_feature_files(feature_files) if self.params['simple_aggregation']['texture_windows']: #for i in range(len(feature_files)): # feature_files[i] = feature_files[i] + "_tw" jobs = [] out_idx = 0 for f in features: jobs.append(( f, self.params['simple_aggregation']['texture_window_length'], feature_files[out_idx])) out_idx += 1 num_files = len(jobs) output_buffer_size = self.params['simple_aggregation'][ 'tw_buffer_size'] pool = Pool( processes=self.params['simple_aggregation']['tw_workers']) pool.map(calc_textures, jobs) # out_idx = 0 # for i in range(0, num_files, output_buffer_size): # print "Calculating texture windows %d through %d of %d" % (i + 1, min(i + output_buffer_size, num_files), num_files) # result = pool.map(calc_textures, jobs[i:min(i + output_buffer_size, num_files)]) # for track in result: # filename = feature_files[out_idx] # print "writing features to file %s..." % (filename) # feature_file = open(filename, "w") # track.save(feature_file) # feature_file.close() # del track # out_idx+=1 # del result # gc.collect() pool.close() pool.join() features = None if features == None: features = load_feature_files(feature_files) stats = feat_stats.Stats() m = stats.stats( features, mean=self.params['simple_aggregation']['mean'], delta=self.params['simple_aggregation']['delta'], variance=self.params['simple_aggregation']['variance'], acceleration=self.params['simple_aggregation']['acceleration'], slope=self.params['simple_aggregation']['slope'], limits=self.params['simple_aggregation']['limits'], csv=self.params['simple_aggregation']['csv'], normalize=self.params['simple_aggregation']['normalize']) out = open( self.params['general']['scratch_directory'] + "/" + self.params['feature_aggregation']['aggregated_output'], "w") m.save(out) out.close()
rescale_factor = args.rescale_factor else: w = args.width h = args.height n_jobs = args.jobs def worker(img_name): input_fp = input_fp_map[img_name] output_fp = output_fp_map[img_name] create_parent_dir_if_not_exists(output_fp) img = imread(input_fp) save_data(img[::1 / rescale_factor, ::1 / rescale_factor], output_fp) pool = Pool(n_jobs) _ = pool.map(worker, in_image_names) pool.close() pool.join() # run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d \"%%(output_fp)s\"' % \ # {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw}, # kwargs_list=[{'input_fp': , # 'output_fp': output_fp_map[img_name]} # for img_name in metadata_cache['valid_filenames'][stack]], # argument_type='single', # jobs_per_node=1, # local_only=True)
sim_likelihood.increment_likelihood_legacy(simobs.dist_matrix, simobs.rts_matrix, **sim_params) fp_value = fp_likelihood.likelihood sim_value = sim_likelihood.likelihood return (fp_value, sim_value, i, j) fp_likelihoods = np.zeros((100, 100)) sim_likelihoods = np.zeros((100, 100)) sigmas = np.linspace(0.2, 3, 100) rewards = np.linspace(0.5, 1.5, 100) arglists = it.product(zip(sigmas, range(100)), zip(rewards, range(100))) pool = Pool(processes=22) output_likelihoods = pool.map(comparisons, arglists) for output in output_likelihoods: i, j = output[2:] fp_likelihoods[i, j] = output[0] sim_likelihoods[i, j] = output[1] fw = open('/home/berk/Documents/fp_likelihoods_test.p', 'wb') outdict = { 'fp_likelihoods': fp_likelihoods, 'sim_likelihoods': sim_likelihoods } pickle.dump(outdict, fw) fw.close()
def main(lensname, dataname, work_dir='./'): main_path = os.getcwd() sys.path.append(work_dir + "config/") config = importlib.import_module("config_" + lensname + "_" + dataname) base_lcs = pycs.gen.util.readpickle(config.data) f = open( os.path.join(config.report_directory, 'report_optimisation_%s.txt' % config.simoptfctkw), 'w') if config.mltype == "splml": if config.forcen: ml_param = config.nmlspl string_ML = "nmlspl" else: ml_param = config.mlknotsteps string_ML = "knml" elif config.mltype == "polyml": ml_param = config.degree string_ML = "deg" else: raise RuntimeError( 'I dont know your microlensing type. Choose "polyml" or "spml".') for a, kn in enumerate(config.knotstep): for b, ml in enumerate(ml_param): lcs = copy.deepcopy(base_lcs) destpath = os.path.join( main_path, config.lens_directory + config.combkw[a, b] + '/') print destpath ##### We start by shifting our curves "by eye", to get close to the result and help the optimisers to do a good job applyshifts( lcs, config.timeshifts, config.magshifts) #be carefull, this remove ml as well... # We also give them a microlensing model (here, similar to Courbin 2011) config.attachml( lcs, ml) #this is because they were saved as raw lcs, wihtout lcs. if config.max_core == None: nworkers = cpu_count() else: nworkers = config.max_core for c, opts in enumerate(config.optset): if config.simoptfctkw == "spl1": kwargs = {'kn': kn, 'name': 'spl1'} elif config.simoptfctkw == "regdiff": kwargs = config.kwargs_optimiser_simoptfct[c] else: print "Error : simoptfctkw must be spl1 or regdiff" if config.run_on_copies: print "I will run the optimiser on the copies with the parameters :", kwargs p = Pool(nworkers) if config.simoptfctkw == "spl1": job_args = [ (j, config.simset_copy, lcs, config.simoptfct, kwargs, opts, config.tsrand, destpath) for j in range(nworkers) ] success_list_copies = p.map(exec_worker_copie_aux, job_args) # success_list_copies = [exec_worker_copie_aux(job_args[0])]# DEBUG elif config.simoptfctkw == "regdiff": if a == 0 and b == 0: # for copies, run on only 1 (knstp,mlknstp) as it the same for others job_args = (0, config.simset_copy, lcs, config.simoptfct, kwargs, opts, config.tsrand, destpath) success_list_copies = exec_worker_copie_aux( job_args) success_list_copies = [ success_list_copies ] # we hace to turn it into a list to match spl format dir_link = os.path.join( destpath, "sims_%s_opt_%s" % (config.simset_copy, opts)) print "Dir link :", dir_link pkl.dump( dir_link, open( os.path.join( config.lens_directory, 'regdiff_copies_link_%s.pkl' % kwargs['name']), 'w')) # p.map(exec_worker_copie_aux, job_args)# because for some reason, regdiff does not like multiproc. f.write('COPIES, kn%i, %s%i, optimiseur %s : \n' % (kn, string_ML, ml, kwargs['name'])) write_report_optimisation(f, success_list_copies) f.write('################### \n') if config.run_on_sims: print "I will run the optimiser on the simulated lcs with the parameters :", kwargs p = Pool(nworkers) if config.simoptfctkw == "spl1": job_args = [ (j, config.simset_mock, lcs, config.simoptfct, kwargs, opts, config.tsrand, destpath) for j in range(nworkers) ] success_list_simu = p.map(exec_worker_mocks_aux, job_args) # success_list_simu = [exec_worker_mocks_aux(job_args[0])] #DEBUG elif config.simoptfctkw == "regdiff": job_args = (0, config.simset_mock, lcs, config.simoptfct, kwargs, opts, config.tsrand, destpath) success_list_simu = exec_worker_mocks_aux( job_args ) # because for some reason, regdiff does not like multiproc. success_list_simu = [success_list_simu] # p.map(exec_worker_copie_aux, job_args) f.write('SIMULATIONS, kn%i, %s%i, optimiseur %s : \n' % (kn, string_ML, ml, kwargs['name'])) write_report_optimisation(f, success_list_simu) f.write('################### \n') print "OPTIMISATION DONE : report written in %s" % (os.path.join( config.report_directory, 'report_optimisation_%s.txt' % config.simoptfctkw)) f.close()
from multiprocess import Pool def f(x): return x * x if __name__ == '__main__': p = Pool(4) result = p.map_async(f, range(10)) print(result.get(timeout=1))
def error_rate(self, data, labels, other_bc_predict_args, other_bc_predict_kwargs, process_count): """ Outputs the error rate of multiclass classifier for the given data and labels. Parameters ---------- data : ndarray An ndarray where each row is a input vector consisting of the state of the visible units. labels : ndarray An ndarray where each element is the label/classification of a input vector in data for binary classification. Valid label values are -1 and 1. other_bc_predict_args : tuple Positional arguments to BinaryClassifier.predict not including input_vector. other_bc_predict_kwargs : dict Keyword arguments to BinaryClassifier.predict not including input_vector. process_count : int The number of worker processes to use when generating the predictions. Note the elements in data must correspond in sequence to the elements in labels. Returns ------- float The error rate of the multiclass classifier for the given data and labels. """ def binary_classifier_predict(binary_classifier, data, other_bc_predict_args, other_bc_predict_kwargs): """ Generate predictions for a specific binary classifier. Parameters ---------- binary_classifier : binary classifier The binary classifier to generate predictions. data : ndarray Data to generate predictions for. other_bc_predict_args : tuple Positional arguments to BinaryClassifier.predict not including input_vector. other_bc_predict_kwargs : dict Keyword arguments to BinaryClassifier.predict not including input_vector. Returns ------- list The predictions of the given binary classifier. """ bc_data_scores = [ binary_classifier.predict(input_vector, *other_bc_predict_args, **other_bc_predict_kwargs) for input_vector in data ] return bc_data_scores def error_callback(exc): """ Callback used by pool.apply_async when an error occurs. Parameters ---------- exc : Exception Exception thrown by the process pool.apply_async was running in. """ print(exc.__cause__) if process_count is not None and process_count > 1: # Unfortunately we cannot just use self.predict directly # (e.g. predictions = pool.map(self.predict, data)). # Instead must partially repeat what self.predict does here. binary_classifier_results = {} binary_classifier_scores = {} with Pool(processes=process_count) as pool: # Use the process pool to compute predictions of the binary classifiers. for (label_1, label_2 ), binary_classifier in self.binary_classifiers.items(): binary_classifier_results[(label_1, label_2)] = ( pool.apply_async(func=binary_classifier_predict, args=(binary_classifier, data, other_bc_predict_args, other_bc_predict_kwargs), error_callback=error_callback)) # Retrieve the binary classifier scores from the process pool. for (label_1, label_2) in self.binary_classifiers.keys(): binary_classifier_scores[(label_1, label_2)] = ( binary_classifier_results[(label_1, label_2)].get()) # Generate list of predictions for each data element based on the predictions of # the underlying binary classifiers. predictions = [] if self.strategy in ('OVA', 'OVO'): # Compute a confidence score for each label and set the predicted label to be # the one with the highest score. # Same technique as in self.predict with the difference here being # binary_classifier_scores values are not a single value and instead are an # iterable of scores for each data element. for i in range(len(data)): label_scores = { label: sum(scores[i] if label == label_1 else -scores[i] for (label_1, label_2 ), scores in binary_classifier_scores.items() if label in (label_1, label_2)) for label in self.possible_labels } predicted_label = max(label_scores, key=label_scores.get) predictions.append(predicted_label) else: raise NotImplementedError(self.strategy) predictions = np.asarray(predictions, dtype=labels.dtype) else: # Generate list of predictions for each data element using self.predict. predictions = np.asarray([ self.predict(input_vector, other_bc_predict_args, other_bc_predict_kwargs) for input_vector in data ], dtype=labels.dtype) # Gather the results of the predictions; prediction_results is an ndarray corresponding # to the predictions and the labels for the data with True meaning the prediction matched # the label and False meaning it did not. prediction_results = (predictions == labels) # Note the number of incorrect prediction results # (i.e. the number of False entries in prediction_results). num_incorrect_prediction_results = np.sum(~prediction_results) # Note the number of results. num_prediction_results = prediction_results.shape[0] # Compute the error rate. error_rate = num_incorrect_prediction_results / num_prediction_results return error_rate
def all_countries(base_path, multiprocess=True, overwrite=True, savefig=False, report=False): """ Main function to estimate the length of all the roads and countries we are interested in. Args: *base_path* : Base path to the location of all files and directories in this project. *multiprocess* : Set to True by default. Set to False in the case of limited processing power. *overwrite* : Set to True by default. This relates to all input data (i.e. .poly files, .osm.pbf files and shapefiles). *savefig* : Set to False by default. When set to True, it will return a figure with the roads of a country. Returns: An Excel file with the length of all **Primary**, **Secondary**, **Tertiary**, **Track** and **Other** roads for each country. """ print('The calculation of road lenghts has started!') start = time.time() # ============================================================================= # """ Set path to dirs""" # ============================================================================= dir_out = os.path.join(base_path, 'output_data') poly_dir = os.path.join(base_path, 'poly_files') osm_path_in = os.path.join(base_path, 'osm_continent') fig_dir = os.path.join(base_path, 'Figures') # ============================================================================= # """ create directories if they are not created yet """ # ============================================================================= if not os.path.exists(dir_out): os.makedirs(dir_out) if not os.path.exists(poly_dir): os.makedirs(poly_dir) if not os.path.exists(osm_path_in): os.makedirs(osm_path_in) if (savefig == True) and not os.path.exists(fig_dir): os.makedirs(fig_dir) # ============================================================================= # """Set path to files we use """ # ============================================================================= wb_country_in = os.path.join(base_path, 'input_data', 'wbccodes2014.csv') global_shape = os.path.join(base_path, 'input_data', '2015_GAUL_Dataset_Mod.gdb') # ============================================================================= # """Load country shapes and list and only save the required countries""" # ============================================================================= wb_country = pd.read_csv(wb_country_in, header=0, index_col=0) #filter high income countries from country file country_list = wb_country[['country', 'continent' ]].loc[wb_country['wbregion'] != 'YHI'] # add column to country list so we can easily look up the required continental # osm file for that continent map_continent = { 'MA': 'central-america', 'SA': 'south-america', 'EU': 'europe', 'AS': 'asia', 'AU': 'australia-oceania', 'AF': 'africa', 'AM': 'north-america' } country_list['osm-cont'] = country_list['continent'].map( lambda x: (map_continent[x])) # ============================================================================= # """ create .poly files to clip countries from osm.pbf files """ # ============================================================================= if not os.listdir(poly_dir): create_poly_files(base_path, global_shape, save_shapefile=overwrite) # ============================================================================= # """ check if we have actually downloaded the openstreetmap input files. If not, # lets download them. Note: this will take a while! """ # ============================================================================= continent_list = [ 'central-america', 'south-america', 'europe', 'asia', 'australia-oceania', 'africa', 'north-america' ] for continent in continent_list: url = 'http://download.geofabrik.de/%s-latest.osm.pbf' % continent if '%s-latest.osm.pbf' % (continent) not in os.listdir(osm_path_in): urllib.request.urlretrieve(url, osm_path_in) # ============================================================================= # """ create extracted osm files for each country per continent """ # ============================================================================= out = [] countries = [] continent_osms = [] base_paths = [] overwrites = [] savefigs = [] reporting = [] for country in country_list.iterrows(): country = country[1] continent_osm = os.path.join( osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont'])) countries.append(country['country']) continent_osms.append(continent_osm) base_paths.append(base_path) overwrites.append(overwrite) savefigs.append(savefig) reporting.append(report) # multiprocessing will start if set to True. Set to False with limited processing capacities if multiprocess == True: pool = Pool(cpu_count() - 1) out = pool.starmap( single_country, zip(countries, continent_osms, base_paths, overwrites, savefigs, reporting)) # when multiprocessing set to False, we will just loop over the countries. else: out = [] i = 0 for country in country_list.iterrows(): country = country[1] continent_osm = os.path.join( osm_path_in, '%s-latest.osm.pbf' % (country['osm-cont'])) out.append( single_country(country['country'], continent_osm, base_path, overwrites[i], savefigs[i], reporting[i])) i += 1 df = pd.concat(out, axis=1).T map_country = dict(zip(wb_country['country'], wb_country['country_name'])) df['Country'] = df.index.to_series().map(map_country) df.set_index('Country', inplace=True, drop=True) writer = pd.ExcelWriter(os.path.join(dir_out, 'dist_roads.xlsx')) df.to_excel(writer, 'output') writer.save() end = time.time() print('It took ' + str(np.float16((end - start))) + " seconds to finish!")