def _itergroundings(self, simplify=False, unsatfailure=False): global global_bpll_grounding global_bpll_grounding = self if self.multicore: pool = Pool(maxtasksperchild=1) try: for gndresult in pool.imap(with_tracing(create_formula_groundings), self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) checkmem() yield None except CtrlCException as e: pool.terminate() raise e pool.close() pool.join() else: for gndresult in imap(create_formula_groundings, self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) yield None
def add_tree(self, iterations=-1, snapshot=False): """ Multi-core, fully utilizes underlying CPU to create the trees of the forest and stores them into the forest's list of trees :param iterations: number of trees to make, -1 means use default setting :return: None """ print("Adding trees:", iterations) if iterations == -1: iterations = self.default_tree_count ######################### # MULTI THREADED ######################## pool = Pool() # creates multiple processes equal to cores in machine outputs = pool.map(make_tree, [(self.data_copy(), self.depthlimit, self.weak_learner) for _ in range(iterations)]) pool.close() pool.join() self.trees.extend(outputs) # get the trees created and store them ######################### # SINGLE THREADED ######################## #for i in range(iterations): # tree = Tree(self.data, self.bagging, self.bag_ratio, self.depthlimit, self.weak_learner) # self.trees.append(tree) # get the trees created and store them if snapshot: self.sum_squares(len(self.trees)) # get error after each snapshot, if this command is run multiple times
def work(host, port, processes, threads, times): pool = Pool(processes, lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) p = Process(target=progress) p.daemon = True start = time.time() try: for chunk in divide(times, processes): pool.apply_async(thread, (host, port, threads, chunk)) p.start() pool.close() pool.join() p.terminate() p.join() except KeyboardInterrupt: pool.terminate() p.terminate() p.join() pool.join() return time.time() - start
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(width=100, steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() try: for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def main(datadir, convert_dir, crop_size): try: os.mkdir(convert_dir) except OSError: pass filenames = data_util.get_image_files(datadir) print('Resizing images in {} to {}'.format(datadir, convert_dir)) n = len(filenames) batch_size = 500 batches = n // batch_size + 1 p = Pool() args = [] for f in filenames: args.append((convert_size, (datadir, convert_dir, f, crop_size))) for i in range(batches): print('batch {:>2} / {}'.format(i + 1, batches)) p.map(convert, args[i * batch_size : (i + 1) * batch_size]) p.close() p.join() print('Done')
def main(): global pool pool = Pool(POOL_SIZE) nseeds = 100 # print("== generating seeds...") # generate_seeds(nseeds) #print("running const density experiments...") #run_constant_density(0.1, range(100, 1000, 100), nseeds) #print("running const size experiments...") #run_constant_size(50, range(100, 1000, 100), nseeds) print("== running aggregate interval experiments (const density)...") # run_aggregate_interval_constant_density(0.1, range(100, 1000, 100), nseeds, [100, 500] + list(range(1000, 4000, 1000))) run_aggregate_interval_constant_density(0.1, range(100, 1000, 100), nseeds, [3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000]) reset_pool() run_aggregate_interval_constant_density(0.2, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000]) reset_pool() run_aggregate_interval_constant_density(0.3, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000]) reset_pool() run_aggregate_interval_constant_density(0.4, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000]) reset_pool() run_aggregate_interval_constant_density(0.5, range(100, 1000, 100), nseeds, [100, 500, 1000, 2000,4000, 5000, 6000, 7000, 8000, 9000, 10000, 50000]) pool.close() pool.join()
def query_tweets(query, limit=None, begindate=dt.date(2006, 3, 21), enddate=dt.date.today(), poolsize=20, lang=''): no_days = (enddate - begindate).days if poolsize > no_days: # Since we are assigning each pool a range of dates to query, # the number of pools should not exceed the number of dates. poolsize = no_days dateranges = [begindate + dt.timedelta(days=elem) for elem in linspace(0, no_days, poolsize+1)] if limit: limit_per_pool = (limit // poolsize)+1 else: limit_per_pool = None queries = ['{} since:{} until:{}'.format(query, since, until) for since, until in zip(dateranges[:-1], dateranges[1:])] all_tweets = [] try: pool = Pool(poolsize) logger.info('queries: {}'.format(queries)) try: for new_tweets in pool.imap_unordered(partial(query_tweets_once, limit=limit_per_pool, lang=lang), queries): all_tweets.extend(new_tweets) logger.info('Got {} tweets ({} new).'.format( len(all_tweets), len(new_tweets))) except KeyboardInterrupt: logger.info('Program interrupted by user. Returning all tweets ' 'gathered so far.') finally: pool.close() pool.join() return all_tweets
class _MultiExecutor(_Executor): """Execute functions async in a process pool""" def __init__(self): super(_MultiExecutor, self).__init__() self._children = 0 self.pool = Pool() def _collector(self, result): super(_MultiExecutor, self)._collector(result) self._children -= 1 def execute(self, func, args): self._children += 1 self.pool.apply_async(func, args, callback=self._collector) def wait_for_results(self): self.pool.close() # One would have hoped joining the pool would take care of this, but # apparently you need to first make sure that all your launched tasks # has returned their results properly, before calling join, or you # risk a deadlock. while self._children > 0: time.sleep(0.001) self.pool.join()
def get_correlation_parallel(s1,s2): """ params s1 - series 1 params s2 - series 2 NOTE : series are number 1 to 25 when giving in arguments returns the correlation between series """ start = time.time() offsets = [] #this will be the arguments to all the parallel jobs instances = (MAX_ROWS/BATCH_SIZE) mean,std = calculate_mean_std_parallel() stripped_mean,stripped_std = calculate_stripped_mean_std_parallel(mean,std) processes = Pool(processes=instances) for i in range(instances): offsets.append((s1,s2,mean,std,stripped_mean,stripped_std,i*BATCH_SIZE)) results = processes.map(get_correlation,offsets) processes.close() processes.join() pearson_corr = 0 total = 0 for result in results: pearson_corr += result[0]*result[1] total += result[1] pearson_corr = 1.0*pearson_corr / total t_value = abs(pearson_corr*math.sqrt( 1.0*(total - 2) / ( 1 - (pearson_corr*pearson_corr)))) p_value = t.sf(t_value,total-2) print "\n ######### CORRELATION BETWEEN SERIES ",s1," AND SERIES ",s2, " is ",pearson_corr , "t value is ", t_value ," and p value is ", p_value, "######### \n" end = time.time() print "EXECUTION TIME : ", end-start , " sec" return pearson_corr
def start(self): """Starts a server that controls local workers. Calling this function starts a pool of `num_workers` workers used to run targets sent to the server. The server will run indefinitely unless shut down by the user. """ try: serv = Listener((self.hostname, self.port)) workers = Pool( processes=self.num_workers, initializer=Worker, initargs=(self.status, self.queue, self.waiting), ) logging.info( "Started %s workers, listening on port %s", self.num_workers, serv.address[1], ) self.wait_for_clients(serv) except OSError as e: if e.errno == 48: raise ServerError( ( "Could not start workers listening on port {}. " "The port may already be in use." ).format(self.port) ) except KeyboardInterrupt: logging.info("Shutting down...") workers.close() workers.join() self.manager.shutdown()
def stat_volume(stime,etime): tgsinfo = read_tgs_info() # from multiprocessing.dummy import Pool as ThreadPool from multiprocessing.pool import Pool pool = Pool() volume = [pool.apply_async(stat_tgs_volume,args=(stime,etime,int(cid))) for cid in tgsinfo.keys()] pool.close() print 'waiting to join....' pool.join() print 'start to writing to file...' volume0 = [] for i,elem in enumerate(volume): volume0.append((tgsinfo.keys()[i], elem.get())) volume0.sort(key=lambda x:x[1], reverse=True) total = 0 with open(os.path.join(root_dir, "result", "volume.txt"),"w") as f: for i,elem in enumerate(volume0): # cid = tgsinfo.keys()[i] # vol = elem.get() total += elem[1] line = "%5s,%s: %d\n" % (elem[0], tgsinfo[elem[0]]['kkmc'], elem[1]) f.write(line) print 'totally %d records.' % (total)
def ingest( dataset, cls, skip_if_exists=True, multi_process=False, multi_threaded=False, cores=None): pool = None if multi_process: pool = Pool(cores or cpu_count()) map_func = pool.imap_unordered elif multi_threaded: pool = ThreadPool(cores or cpu_count()) map_func = pool.imap_unordered else: map_func = map cls_args = repeat(cls) skip_args = repeat(skip_if_exists) map_func(ingest_one, zip(dataset, cls_args, skip_args)) if pool is not None: # if we're ingesting using multiple processes or threads, the processing # should be parallel, but this method should be synchronous from the # caller's perspective pool.close() pool.join()
def main(): print('Process (%s) start...' % os.getpid()) p = Pool() for i in range(4): p.apply_async(long_time_task, args=(i,)) print('Waiting for all subprocesses done...') p.close() p.join() print('All subprocesses done.')
def load_images_uint(files): p = Pool() process = imread results = p.map(process, files) p.close() p.join() images = np.array(results) images = images.transpose(0, 3, 1, 2) return images
def get_data(): f2 = open('app_links1.txt','r') nprocs = 500 # nprocs is the number of processes to run ParsePool = Pool(nprocs) #ParsePool.map(btl_test,url) ParsedURLS = ParsePool.map(deatilsExtract,f2) ParsePool.close() ParsePool.join()
def get_word(): domains=open('dic/newwords').readlines() try: pool=Pool(processes=2) pool.map(check_domain,domains) pool.close() pool.join() except Exception as e: print e pass
def calculate_stripped_mean_std_parallel(mean,std): """ params - mean params - std returns stripped up mean and std """ stripped_mean = [] stripped_squares = [] stripped_std = [] dirty_data = [] outliers = [] for i in range(0,NO_OF_SERIES): stripped_std.append(0) stripped_squares.append(0) stripped_mean.append(0) dirty_data.append(0) outliers.append(0) start = time.time() offsets = [] #this will be the arguments to all the parallel jobs instances = (MAX_ROWS/BATCH_SIZE) processes = Pool(processes=instances) for i in range(instances): offsets.append((mean,std,i*BATCH_SIZE)) results = processes.map(calculate_stripped_mean_std,offsets) processes.close() processes.join() total = 0 for result in results: for i in range(len(result[0])): count = result[2] - result[3][i] #actual - dirty data stripped_mean[i] += result[0][i]*count stripped_squares[i] += result[1][i]*count dirty_data[i] += result[3][i] outliers[i] += result[4][i] total += result[2] for i in range(len(mean)): stripped_mean[i] = 1.0*(stripped_mean[i])/(total - dirty_data[i]) stripped_squares[i] = 1.0*(stripped_squares[i]) / (total - dirty_data[i]) stripped_std[i] = math.sqrt(stripped_squares[i] - (stripped_mean[i]*stripped_mean[i])) end = time.time() print "######### STRIPPED MEAN ######### \n" print stripped_mean print "\n ######### STRIPPED STANDARD DEVIATION ######### \n" print stripped_std print "\n######### NAN ROWS COUNT #########\n" print dirty_data print "\n######### OUTLIERS ROWS COUNT #########\n" print outliers print "\n######### EXECUTION TIME #########\n" print (end-start) return stripped_mean,stripped_std
def run(self): cases = self.get_test_case() # 定义一个进程池 pool = Pool(processes=len(cases)) result.append(pool.map_async(self.init_driver, cases.values())) pool.close() pool.join() while not q.empty(): comm.Template.set_middle(q.get())
def _get(self, args): draft_id = args[0] id = args[1] if len(args) > 1 else None q = self.db.query(Player) if id is not None: player = q.filter(Player.id == int(id)).first() team = self.db.query(Team).filter(and_(Team.is_owner == True, Team.draft_id == draft_id)).first() available_players = self.db.query(Player).join(Player.core).filter(and_(PlayerCore.rank != None, PlayerCore.target_price != None, PlayerCore.points > 0, Player.draft_id == draft_id, Player.team_id == None, Player.id != player.id)).order_by(PlayerCore.rank).all() min_price = 1 max_price = min(player.core.target_price + 21, team.money) manager = Manager() max_starters_points = manager.dict() max_bench_points = manager.dict() pool = Pool(processes=8) starters, bench = get_starters_and_bench(self.db, team.id) max_starters_points[0] = optimizer.optimize_roster(starters, available_players, team.money - (constants.BENCH_SIZE - len(bench)))[1] for m in range(min_price, 10): pool.apply_async(wrap_optimizer, args=(starters, available_players, team.money - m - (constants.BENCH_SIZE - len(bench)) + 1, max_bench_points, m)) full_starters = True for s in starters: if s is None: full_starters = False if not full_starters: starters_clone = list(starters) bench_clone = list(bench) place_player(player, starters_clone, bench_clone) for m in range(min_price, max_price): pool.apply_async(wrap_optimizer, args=(starters_clone, available_players, team.money - m - (constants.BENCH_SIZE - len(bench_clone)), max_starters_points, m)) pool.close() pool.join() ret = player.to_dict(['core']) ret['max_starters_points'] = dict(max_starters_points) ret['max_bench_points'] = dict(max_bench_points) return ret else: players = q.join(PlayerCore).filter(and_(Player.draft_id == int(draft_id), PlayerCore.rank != None, PlayerCore.target_price != None)).all() return {'players': [p.to_dict(['core']) for p in players]}
def parallel_augment(images, normalize=None, test=False): if normalize is not None: mean, std = normalize images = images - mean[:, np.newaxis, np.newaxis] # assuming channel-wise normalization images = images / std[:, np.newaxis, np.newaxis] p = Pool() process = partial(augment, test=test) results = p.map(process, images) p.close() p.join() augmented_images = np.array(results, dtype=np.float32) return augmented_images
def calculate_mean_std_parallel(): """ call this function to compute the mean, standard deviation and NaNs for each seies the file name, no of jobs can be changed in the settings file """ start = time.time() offsets = [] instances = (MAX_ROWS/BATCH_SIZE) processes = Pool(processes=instances) for i in range(instances): offsets.append(i*BATCH_SIZE) print offsets result = processes.map(calculate_mean_std,offsets) processes.close() processes.join() mean = [] std = [] squares = [] dirty_data = [] #initializing for i in range(0,NO_OF_SERIES): mean.append(0) std.append(0) squares.append(0) dirty_data.append(0) total = 0 ### here we combine the results from different processes / threads for r in result: for i in range(len(r[0])): ### update for each time series count = (r[2] - r[3][i]) ### actual count - the count with missing value mean[i] += r[0][i]*count squares[i] += r[1][i]*count dirty_data[i] += r[3][i] total += r[2] for i in range(len(mean)): mean[i] = 1.0*(mean[i])/(total - dirty_data[i]) squares[i] = 1.0*(squares[i]) / (total - dirty_data[i]) std[i] = math.sqrt(squares[i] - (mean[i]*mean[i])) end = time.time() print "######### MEAN ######### \n" print mean print "\n ######### STANDARD DEVIATION ######### \n" print std print "\n######### NAN ROWS COUNT #########\n" print dirty_data print "\n######### EXECUTION TIME #########\n" print (end-start) return mean,std
def main(): """ Build all the models. Spin off a new process for each participant because the ANN library is not multithreaded. Process is used instead of thread to leverage multiple cores. """ parser = ArgumentParser() parser.add_argument("inputFilename") parser.add_argument("outputDirectory") args = parser.parse_args() inputFilename = args.inputFilename outputDirectory = args.outputDirectory data = pickle.load( open(inputFilename, 'rb') ) tasks = [ 'matb', 'rantask' ] participantIds = [ '001', '002', '003', '004', '005', '006', '007' ] # Cut off first row header for each data set for task in tasks: for participantId in participantIds: data[participantId][task] = data[participantId][task][1:] splits = performSplit( data ) # Record start time so that the elapsed time can be determined start_time = time.time() # Create a multicore processing pool with 7 processes ( 7 so that one core stays free # for system processes ) pool = Pool( processes = 7 ) # Build models for participants in a task for task in tasks: for participantId in participantIds: outputFilename = path.join( outputDirectory, 'testingOn-' + participantId + '-' + task + '.txt' ) # Spin off a process for the building pool.apply_async( tuneANN, ( splits[participantId][task], outputFilename ) ) # Close down the pool so that we can wait on all the processes pool.close() pool.join() # Calculate and print the elapsed time elapsed_time = time.time() - start_time print( "Elapsed time: " + str(elapsed_time) )
def multi_proc5(self, batch): start_time = datetime.datetime.now() sql = "select count(id) from records" count_result = db_connection.execute(sql) for row in count_result: count = row[0] break sql = "select id from records" result = db_connection.execute(sql) record_ids = [] for idx, row in enumerate(result): if (idx % int(count/4) == 0) or (idx == count - 1): #4 because that is how many workers we have if idx == 0: some_records = [] else: record_ids.append(some_records) some_records = [] some_records.append(row[0]) input_pool = Pool(4) #Add id messages to input queue input_pool.map(partial(add_batch_ids_to_queue, batch_size=int(batch)), record_ids) input_pool.close() input_pool.join() output_pool = Pool(4) #Read ids from input_queue, read message from DB and write it to output_queue worker_results = [] for i in range(4): worker_results.append(output_pool.apply_async(read_id_from_queue, ())) output_pool.close() for r in worker_results: r.get() # This reports results, including errors, of workers output_pool.join() # This blocks until all the processes have finished end_time = datetime.datetime.now() time_taken = (end_time - start_time).total_seconds() return time_taken
def create_training_parallel(count): pool_size = 8 batch_count = pool_size * 5 pool = Pool(pool_size) print("generating") results = [] for i in range(batch_count): results.append(pool.apply_async(create_training_data, (count/batch_count,))) pool.close() pool.join() print("concatenating") output = [] for r in results: output.extend(r.get(1000)) return output
def manager_process(dir_queue, file_queue, out_queue): """Dispatches and manages path and scanning workers. """ pool = Pool(options.num_threads) atexit.register(at_exit_manager, pool) logging.info('Gathering Files...') pool.apply(explore_path, (dir_queue, file_queue)) logging.info('Files gathered. Scanning %s files...', file_queue.qsize()) logging.info('Starting %s scan processes...', options.num_threads) print '~' * 80 thread.start_new_thread(print_status, (file_queue,)) for _ in range(options.num_threads): pool.apply_async(parallel_scan, (file_queue, out_queue)) pool.close() pool.join() out_queue.put(StopIteration)
def multi_proc3(self, batch): start_time = datetime.datetime.now() sql = "select count(id) from records" count_result = db_connection.execute(sql) for row in count_result: count = row[0] break sql = "select id from records" result = db_connection.execute(sql) record_ids = [] for idx, row in enumerate(result): if (idx % int(batch) == 0) or (idx == count - 1): if idx == 0: some_records = [] else: record_ids.append(some_records) some_records = [] some_records.append(row[0]) #Add id messages to input queue msg_handler = MessageHandler() for records in record_ids: msg_handler.add_message(json.dumps({"ids":records}), "input_queue") worker_results = [] p = Pool(4) for i in range(4): worker_results.append(p.apply_async(read_id_from_queue, ())) p.close() for r in worker_results: r.get() p.join() # This blocks until all the processes have finished end_time = datetime.datetime.now() time_taken = (end_time - start_time).total_seconds() return time_taken
def run(config_uri, app_name=None, username=None, types=(), batch_size=500, processes=None): # multiprocessing.get_context is Python 3 only. from multiprocessing import get_context from multiprocessing.pool import Pool # Loading app will have configured from config file. Reconfigure here: logging.getLogger('snovault').setLevel(logging.DEBUG) testapp = internal_app(config_uri, app_name, username) connection = testapp.app.registry[CONNECTION] uuids = [str(uuid) for uuid in connection.__iter__(*types)] transaction.abort() logger.info('Total items: %d' % len(uuids)) pool = Pool( processes=processes, initializer=initializer, initargs=(config_uri, app_name, username), context=get_context('forkserver'), ) all_results = [] try: for result in pool.imap_unordered(worker, batched(uuids, batch_size), chunksize=1): results = result['results'] errors = sum(error for item_type, path, update, error in results) updated = sum(update for item_type, path, update, error in results) logger.info('Batch: Updated %d of %d (errors %d)' % (updated, len(results), errors)) all_results.extend(results) finally: pool.terminate() pool.join() def result_item_type(result): # Ensure we always return a string return result[0] or '' for item_type, results in itertools.groupby( sorted(all_results, key=result_item_type), key=result_item_type): results = list(results) errors = sum(error for item_type, path, update, error in results) updated = sum(update for item_type, path, update, error in results) logger.info('Collection %s: Updated %d of %d (errors %d)' % (item_type, updated, len(results), errors))
def multiprocess_all_chromosomes(func, cls, *args, **kwargs): ''' Convenience method for splitting up queries based on tag id. ''' processes = current_settings.ALLOWED_PROCESSES set_chromosome_lists(cls, use_table=kwargs.get('use_table', None)) p = Pool(processes) try: for chr_list in current_settings.CHR_LISTS: p.apply_async(func, args=[cls, chr_list, ] + list(args)) p.close() p.join() except Exception as e: print('Terminating pool.') p.terminate() raise e
def propagatePrediction(self): sortedTargets = sorted(self.targetToTermToScore.keys()) inputs = [self.targetToTermToScore[targeti] for targeti in sortedTargets] global go go=self.go p = Pool(processes=10) results= p.map(makeCompletePrediction, inputs, chunksize=20) p.close() p.join() for i, result in enumerate(results): self.targetToTermToScore[sortedTargets[i]] = result return self
def multi_proc4(self, batch): start_time = datetime.datetime.now() sql = "select count(id) from records" count_result = db_connection.execute(sql) for row in count_result: count = row[0] break sql = "select id from records" result = db_connection.execute(sql) record_ids = [] for idx, row in enumerate(result): if (idx % int(batch) == 0) or (idx == count - 1): if idx == 0: some_records = [] else: record_ids.append(some_records) some_records = [] some_records.append(row[0]) p = Pool(4) #Add id messages to input queue p.map(add_ids_to_queue, record_ids) #Read ids from input_queue, read message from DB and write it to output_queue worker_results = [] p = Pool(4) for i in range(4): worker_results.append(p.apply_async(read_id_from_queue, ())) p.close() for r in worker_results: r.get() p.join() # This blocks until all the processes have finished end_time = datetime.datetime.now() time_taken = (end_time - start_time).total_seconds() return time_taken
def capture(interface,database_output_file,redraw_frequency,arp_resolve, dns_resolve,sender_lists,target_lists,color_profile, output_columns,display_false,pcap_output_file,force_sender, *args,**kwargs): dbfile = database_output_file osigint = signal.signal(signal.SIGINT,signal.SIG_IGN) pool = Pool(3) signal.signal(signal.SIGINT, osigint) try: # ============== # START SNIFFING # ============== ''' The sniffer is started in a distinct process because Scapy will block forever when scapy.all.sniff is called. This allows us to interrupt execution of the sniffer by terminating the process. TODO: It may be easier to use threading. Pool methods were fresh to me at the time of original development. ''' ptable = None pcount = 0 # Handle new database file. When verbose, alert user that a new # capture must occur prior to printing results. arp_resolution = ('disabled','enabled')[arp_resolve] dns_resolution = ('disabled','enabled')[dns_resolve] print('\x1b[2J\x1b[H\33[F') print(logo+'\n') print(f'Capture interface: {interface}') print(f'ARP resolution: {arp_resolution}') print(f'DNS resolution: {dns_resolution}') sess = create_db(dbfile) # ====================================== # CREATE AN IP FOR THE CURRENT INTERFACE # ====================================== iface_mac, iface_ips = get_interfaces()[interface] for ip in iface_ips: ip = get_or_create_ip(ip, sess, mac_address=iface_mac) if not Path(dbfile).exists(): print('- Initializing capture\n- This may take time depending '\ 'on network traffic and filter configurations') else: print(f'Requests analyzed: {pcount}\n') ptable = get_output_table( sess, sender_lists=sender_lists, target_lists=target_lists, dns_resolve=dns_resolve, color_profile=color_profile, arp_resolve=arp_resolve, columns=output_columns, display_false=display_false, force_sender=force_sender) print(ptable) # Cache packets that will be written to output file pkts = [] sniff_result = None arp_resolve_result, dns_resolve_result = None, None # Loop eternally while True: # Handle sniff results if sniff_result and sniff_result.ready(): packets = sniff_result.get() sniff_result = None # Capture packets for the output file if pcap_output_file and packets: pkts += packets if packets: pcount += packets.__len__() # Clear the previous table from the screen using # escape sequences screen # https://stackoverflow.com/questions/5290994/remove-and-replace-printed-items/5291044#5291044 if ptable: lcount = ptable.split('\n').__len__()+2 stdout.write('\033[F\033[K'*lcount) ptable = get_output_table( sess, sender_lists=sender_lists, target_lists=target_lists, dns_resolve=dns_resolve, color_profile=color_profile, arp_resolve=arp_resolve, columns=output_columns, display_false=display_false, force_sender=force_sender) print(f'Requests analyzed: {pcount}\n') print(ptable) # Do sniffing elif not sniff_result: sniff_result = pool.apply_async( async_sniff, ( interface, redraw_frequency, sender_lists, target_lists, database_output_file, ) ) # ================== # DNS/ARP RESOLUTION # ================== # Do reverse resolution if dns_resolve: # Reset dns resolution results if not dns_resolve_result or dns_resolve_result.ready(): to_resolve = sess.query(IP) \ .filter(IP.reverse_dns_attempted != True) \ .count() if to_resolve: dns_resolve_result = pool.apply_async( reverse_dns_resolve_ips, (database_output_file,) ) # Do ARP resolution if arp_resolve: if not arp_resolve_result or arp_resolve_result.ready(): to_resolve = sess.query(IP) \ .filter(IP.arp_resolve_attempted != True) \ .count() if to_resolve: arp_resolve_result = pool.apply_async( arp_resolve_ips, (interface, database_output_file,) ) sleep(.2) except KeyboardInterrupt: print('\n- CTRL^C Caught...') sess.close() finally: # =================== # HANDLE OUTPUT FILES # =================== if pcap_output_file: wrpcap(pcap_output_file,pkts) # ===================== # CLOSE CHILD PROCESSES # ===================== try: pool.close() if sniff_result: print('- Waiting for the sniffer process...',end='') sniff_result.wait(5) print('done') if dns_resolve_result: print('- Waiting for the DNS resolver process...',end='') dns_resolve_result.wait(5) print('done') if arp_resolve_result: print('- Waiting for the ARP resolver ocess...',end='') arp_resolve_result.wait(5) print('done') except KeyboardInterrupt: pool.terminate() pool.join()
# 如果还没满,就创建一个新的进程来执行该请求,否则,该请求就会等待,直到池中有进程结束,才会创建新的进程 import os import time from multiprocessing.pool import Pool from random import random def task(task_name): print("开始我的新任务啦....", task_name, os.getpid()) starttime = time.time() time.sleep(random() * 3) endtime = time.time() #print("我的任务--{}--完成啦...耗时{},进程{}".format(task_name, endtime-starttime, os.getpid())) return "我的任务--{}--完成啦...耗时{},进程{}".format(task_name, endtime - starttime, os.getpid()) def callback_func(n): print(n) if __name__ == "__main__": # 进程池 pool = Pool(5) tasks = ["听音乐", "吃饭", "打游戏", "看孩子", "做饭", "跑步", "学习", "打架", "听音乐", "吃饭", "打游戏", "看孩子", "做饭", "跑步", "学习", "打架"] for t in tasks: pool.apply_async(task, args=(t,), callback = callback_func) # 异步方式,非阻塞 pool.close() # 进程池添加结束 pool.join() # 使主进程阻塞
class PyRAMmp(): ''' The PyRAMmp class sets up and runs a multiprocessing pool to enable parallel PyRAM model runs. ''' def __init__(self, processes=None, maxtasksperchild=None): ''' Initialise the pool and variable lists. processes and maxtasksperchild are passed to the pool. ''' self.pool = Pool(processes=processes, maxtasksperchild=maxtasksperchild) self.results = [] # Results from PyRAM.run() self._outputs = [ ] # New outputs from PyRAM.run() for transfer to self.results self._waiting = [] # Waiting runs self._num_waiting = 0 # Number of waiting runs self._num_active = 0 # Number of active runs self._sleep_time = 1e-2 # Minimum sleep time between adding runs to pool self._new = True # Flag to indicate ready for new set of runs def submit_runs(self, runs): ''' Submit new runs to the pool as resources become available runs is a list of PyRAM input tuples (args, kwargs) ''' # Add to waiting list for run in runs: self._waiting.append(run) self._num_waiting = len(self._waiting) # Check how many active runs have finished for _ in range(len(self._outputs)): run = self._outputs.pop(0) self.results.append(run) self._num_active -= 1 num_start = self.pool._processes - self._num_active num_start = min(num_start, self._num_waiting) # Start new runs if processes are free for _ in range(num_start): run = self._waiting.pop(0) self.pool.apply_async(run_pyram, args=(run, ), callback=self._get_output) self._num_active += 1 if self._new: self._new = False self._wait() def _wait(self): ''' Wait for all submitted runs to complete. ''' while self._num_active > 0: self.submit_runs([]) sleep(self._sleep_time) self._new = True def close(self): ''' Close the pool and wait for all processes to finish. ''' self.pool.close() self.pool.join() def _get_output(self, output): ''' Get a PyRAM output. ''' self._outputs.append(output) def __del__(self): self.close()
dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = GQL.get_instance(2, 1, {}) train = DataProcess.merge_data(train) OptML.optimise(worker, output_path, train, test, global_iters=1000, learning_rate=learning_rate) if __name__ == '__main__': if len(sys.argv) == 2: n_proc = int(sys.argv[1]) elif len(sys.argv) == 1: n_proc = 1 else: raise Exception('invalid argument') p = Pool(n_proc) p.map(run_BD, range(len(configs))) p.close() # no more tasks p.join() # wrap up current tasks
import time from multiprocessing.pool import Pool min_val = float('inf') min_item = None def update_min(item): global min_val, min_item print('outside if', min_val, min_item) if item[0] < min_val: print(f'updatin min from {min_val} to {item[0]}') min_val = item[0] min_item = item[1] time.sleep(0.5) if __name__ == '__main__': lst = [(4, 'a'), (2, 'b'), (1, 'c'), (0, 'd'), (3, 'f')] pool = Pool(processes=4) pool.map(update_min, lst) pool.close() pool.join() print(min_item, min_val)
def ensemble(training_output_folder1, training_output_folder2, output_folder, task, validation_folder, folds, allow_ensembling: bool = True): print("\nEnsembling folders\n", training_output_folder1, "\n", training_output_folder2) output_folder_base = output_folder output_folder = join(output_folder_base, "ensembled_raw") # only_keep_largest_connected_component is the same for all stages dataset_directory = join(preprocessing_output_dir, task) plans = load_pickle(join(training_output_folder1, "plans.pkl")) # we need this only for the labels files1 = [] files2 = [] property_files = [] out_files = [] gt_segmentations = [] folder_with_gt_segs = join(dataset_directory, "gt_segmentations") # in the correct shape and we need the original geometry to restore the niftis for f in folds: validation_folder_net1 = join(training_output_folder1, "fold_%d" % f, validation_folder) validation_folder_net2 = join(training_output_folder2, "fold_%d" % f, validation_folder) if not isdir(validation_folder_net1): raise AssertionError( "Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net1) if not isdir(validation_folder_net2): raise AssertionError( "Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net2) # we need to ensure the validation was successful. We can verify this via the presence of the summary.json file if not isfile(join(validation_folder_net1, 'summary.json')): raise AssertionError( "Validation directory incomplete: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net1) if not isfile(join(validation_folder_net2, 'summary.json')): raise AssertionError( "Validation directory missing: %s. Please rerun validation with `nnUNet_train CONFIG TRAINER TASK FOLD -val --npz`" % validation_folder_net2) patient_identifiers1_npz = [ i[:-4] for i in subfiles(validation_folder_net1, False, None, 'npz', True) ] patient_identifiers2_npz = [ i[:-4] for i in subfiles(validation_folder_net2, False, None, 'npz', True) ] # we don't do postprocessing anymore so there should not be any of that noPostProcess patient_identifiers1_nii = [ i[:-7] for i in subfiles(validation_folder_net1, False, None, suffix='nii.gz', sort=True) if not i.endswith("noPostProcess.nii.gz") and not i.endswith('_postprocessed.nii.gz') ] patient_identifiers2_nii = [ i[:-7] for i in subfiles(validation_folder_net2, False, None, suffix='nii.gz', sort=True) if not i.endswith("noPostProcess.nii.gz") and not i.endswith('_postprocessed.nii.gz') ] if not all( [i in patient_identifiers1_npz for i in patient_identifiers1_nii]): raise AssertionError( "Missing npz files in folder %s. Please run the validation for all models and folds with the '--npz' flag." % (validation_folder_net1)) if not all( [i in patient_identifiers2_npz for i in patient_identifiers2_nii]): raise AssertionError( "Missing npz files in folder %s. Please run the validation for all models and folds with the '--npz' flag." % (validation_folder_net2)) patient_identifiers1_npz.sort() patient_identifiers2_npz.sort() assert all([ i == j for i, j in zip(patient_identifiers1_npz, patient_identifiers2_npz) ]), "npz filenames do not match. This should not happen." maybe_mkdir_p(output_folder) for p in patient_identifiers1_npz: files1.append(join(validation_folder_net1, p + '.npz')) files2.append(join(validation_folder_net2, p + '.npz')) property_files.append(join(validation_folder_net1, p) + ".pkl") out_files.append(join(output_folder, p + ".nii.gz")) gt_segmentations.append(join(folder_with_gt_segs, p + ".nii.gz")) p = Pool(default_num_threads) p.map(merge, zip(files1, files2, property_files, out_files)) p.close() p.join() if not isfile(join(output_folder, "summary.json")) and len(out_files) > 0: aggregate_scores(tuple(zip(out_files, gt_segmentations)), labels=plans['all_classes'], json_output_file=join(output_folder, "summary.json"), json_task=task, json_name=task + "__" + output_folder_base.split("/")[-1], num_threads=default_num_threads) if allow_ensembling and not isfile( join(output_folder_base, "postprocessing.json")): # now lets also look at postprocessing. We cannot just take what we determined in cross-validation and apply it # here because things may have changed and may also be too inconsistent between the two networks determine_postprocessing(output_folder_base, folder_with_gt_segs, "ensembled_raw", "temp", "ensembled_postprocessed", default_num_threads, dice_threshold=0) out_dir_all_json = join(network_training_output_dir, "summary_jsons") json_out = load_json( join(output_folder_base, "ensembled_postprocessed", "summary.json")) json_out["experiment_name"] = output_folder_base.split("/")[-1] save_json( json_out, join(output_folder_base, "ensembled_postprocessed", "summary.json")) maybe_mkdir_p(out_dir_all_json) shutil_sol.copyfile( join(output_folder_base, "ensembled_postprocessed", "summary.json"), join(out_dir_all_json, "%s__%s.json" % (task, output_folder_base.split("/")[-1])))
total = 0 scores = {} commanders = [ 'examples.Greedy', 'examples.Balanced', 'examples.Random', 'examples.Defender' ] maps = ['map00', 'map01', 'map10', 'map20'] pairs = itertools.permutations(commanders, 2) games = list(itertools.product(maps, pairs)) print "Running competition with %i commanders and %i maps, for a total of %i games.\n" % ( len(commanders), len(maps), len(games)) try: for map, results in p.map(run, games): for bot, score in results.items(): scores.setdefault(bot, [0, 0]) scores[bot][0] += score[0] scores[bot][1] += score[1] total += 1 except KeyboardInterrupt: print "\nTerminating competition due to keyboard interrupt." p.terminate() p.join() else: print "\n%i total games run." % (total) for r, s in sorted(scores.items(), key=lambda i: -i[1][0] + i[1][1]): print "{} for: {}, against: {}".format( r.replace('Commander', '').upper(), s[0], s[1]) raw_input()
def __call__(self, model, max_iter, data_dir, fnames, D_config, model_save_dir=None, save_every_iter=1000, full_batch=False): """ model: LSTM/LSTM_CNN/BiLSTM class, model for training, model should be initialized/loaded before passing in max_iter: max iterations for training data_dir: .npz file dir fnames: list of file names for training/testing D_config: data loader config model_save_dir: string, folder dir where to save all models save_every_iter: int, save the model into model_save_dir every save_every_iter iterations """ # auto save model according to class name self.model_class = model.__class__.__name__ self.D_config = D_config target_files = self.data_dir2target_files(data_dir, fnames) if target_files is None: return None if self.num_file_in_mem >= len(target_files): # if all heater data can fit in memory self.D_config["free_mem"] = False else: self.D_config["free_mem"] = True file_idx = 0 it = 0 # process based thread pool # API: https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing pool = Pool(self.num_threads) load_flag = False print("[INFO] Notice that we are using multiprocessing to load files, so that child processes won't print out on ipython-notebook, which only monitor the parent process. Please check the terminal for more logging info.") self.cur_file_in_mem = 0 while True: if self.num_file_in_mem >= len(target_files): # if all data in mem, should wait untill all the data is ready # otherwise will run multiple times on data that loaded first if not load_flag: for one_file in target_files: pool.apply_async(self._newthread_helper, args=(one_file,), \ callback=self._callback_helper, error_callback=self._error_helper) pool.close() # wait till all loaded pool.join() # flag up, then we wont load data again load_flag = True else: one_file = target_files[file_idx] if self.cur_file_in_mem < min(self.num_file_in_mem, len(target_files)): # start a new process loading one_file pool.apply_async(self._newthread_helper, args=(one_file,), \ callback=self._callback_helper, error_callback=self._error_helper) self.cur_file_in_mem += 1 file_idx = (file_idx + 1) % len(target_files) if file_idx == 0: # shuffle target_files if we finish one round on all of them random.shuffle(target_files) else: time.sleep(0.001) if len(self.DataPool) > 0: # some process returned Heater_Data into self.DataPool self.train_main(model, model_save_dir, save_every_iter, pool, target_files, max_iter, full_batch) else: time.sleep(0.001) pool.close() pool.join() # reset DataPool for another training/testing self.DataPool.clear()
col1 = db1[website] col2 = db2[website] for i in col1.find(): temp = dict() temp['url'] = i['url'] temp['grabtime'] = i['grabtime'] temp['website'] = i['website'] temp['status'] = i['status'] temp['pagetime'] = i['pagetime'] col2.insert(temp) t_stop = time.time() print("执行完毕,耗时%0.2f" % (t_stop - t_start)) if __name__ == "__main__": po = Pool(10) for website in websites: po.apply_async(worker, (website, )) print("----start----") start = time.time() po.close() po.join() print("-----end-----") stop = time.time() print('总用时: %0.2f' % (stop - start))
def star(self): process_pool = ProcessPool(processes=self.concurrency) process_pool.map(self.run, range(self.concurrency)) process_pool.close() process_pool.join()
def validate(self, do_mirroring: bool = True, use_sliding_window: bool = True, step_size: float = 0.5, save_softmax: bool = True, use_gaussian: bool = True, overwrite: bool = True, validation_folder_name: str = 'validation_raw', debug: bool = False, all_in_gpu: bool = False, segmentation_export_kwargs: dict = None, run_postprocessing_on_folds: bool = True): current_mode = self.network.training self.network.eval() assert self.was_initialized, "must initialize, ideally with checkpoint (or train first)" if self.dataset_val is None: self.load_dataset() self.do_split() if segmentation_export_kwargs is None: if 'segmentation_export_params' in self.plans.keys(): force_separate_z = self.plans['segmentation_export_params'][ 'force_separate_z'] interpolation_order = self.plans['segmentation_export_params'][ 'interpolation_order'] interpolation_order_z = self.plans[ 'segmentation_export_params']['interpolation_order_z'] else: force_separate_z = None interpolation_order = 1 interpolation_order_z = 0 else: force_separate_z = segmentation_export_kwargs['force_separate_z'] interpolation_order = segmentation_export_kwargs[ 'interpolation_order'] interpolation_order_z = segmentation_export_kwargs[ 'interpolation_order_z'] output_folder = join(self.output_folder, validation_folder_name) maybe_mkdir_p(output_folder) if do_mirroring: mirror_axes = self.data_aug_params['mirror_axes'] else: mirror_axes = () pred_gt_tuples = [] export_pool = Pool(2) results = [] transpose_backward = self.plans.get('transpose_backward') for k in self.dataset_val.keys(): properties = load_pickle(self.dataset[k]['properties_file']) data = np.load(self.dataset[k]['data_file'])['data'] # concat segmentation of previous step seg_from_prev_stage = np.load( join(self.folder_with_segs_from_prev_stage, k + "_segFromPrevStage.npz"))['data'][None] print(data.shape) data[-1][data[-1] == -1] = 0 data_for_net = np.concatenate( (data[:-1], to_one_hot(seg_from_prev_stage[0], range(1, self.num_classes)))) softmax_pred = self.predict_preprocessed_data_return_seg_and_softmax( data_for_net, do_mirroring=do_mirroring, mirror_axes=mirror_axes, use_sliding_window=use_sliding_window, step_size=step_size, use_gaussian=use_gaussian, all_in_gpu=all_in_gpu, mixed_precision=self.fp16)[1] if transpose_backward is not None: transpose_backward = self.plans.get('transpose_backward') softmax_pred = softmax_pred.transpose( [0] + [i + 1 for i in transpose_backward]) fname = Path(properties['list_of_data_files'][0]).parts[-1][:-12] if save_softmax: softmax_fname = join(output_folder, fname + ".npz") else: softmax_fname = None """There is a problem with python process communication that prevents us from communicating obejcts larger than 2 GB between processes (basically when the length of the pickle string that will be sent is communicated by the multiprocessing.Pipe object then the placeholder (\%i I think) does not allow for long enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either filename or np.ndarray and will handle this automatically""" if np.prod(softmax_pred.shape) > (2e9 / 4 * 0.85): # *0.85 just to be save np.save(fname + ".npy", softmax_pred) softmax_pred = fname + ".npy" results.append( export_pool.starmap_async( save_segmentation_nifti_from_softmax, ((softmax_pred, join(output_folder, fname + ".nii.gz"), properties, interpolation_order, self.regions_class_order, None, None, softmax_fname, None, force_separate_z, interpolation_order_z), ))) pred_gt_tuples.append([ join(output_folder, fname + ".nii.gz"), join(self.gt_niftis_folder, fname + ".nii.gz") ]) _ = [i.get() for i in results] task = Path(self.dataset_directory).parts[-1] job_name = self.experiment_name _ = aggregate_scores(pred_gt_tuples, labels=list(range(self.num_classes)), json_output_file=join(output_folder, "summary.json"), json_name=job_name, json_author="Fabian", json_description="", json_task=task) if run_postprocessing_on_folds: # in the old nnunet we would stop here. Now we add a postprocessing. This postprocessing can remove everything # except the largest connected component for each class. To see if this improves results, we do this for all # classes and then rerun the evaluation. Those classes for which this resulted in an improved dice score will # have this applied during inference as well self.print_to_log_file("determining postprocessing") determine_postprocessing(self.output_folder, self.gt_niftis_folder, validation_folder_name, final_subf_name=validation_folder_name + "_postprocessed", debug=debug) # after this the final predictions for the vlaidation set can be found in validation_folder_name_base + "_postprocessed" # They are always in that folder, even if no postprocessing as applied! # detemining postprocesing on a per-fold basis may be OK for this fold but what if another fold finds another # postprocesing to be better? In this case we need to consolidate. At the time the consolidation is going to be # done we won't know what self.gt_niftis_folder was, so now we copy all the niftis into a separate folder to # be used later gt_nifti_folder = join(self.output_folder_base, "gt_niftis") maybe_mkdir_p(gt_nifti_folder) for f in subfiles(self.gt_niftis_folder, suffix=".nii.gz"): success = False attempts = 0 while not success and attempts < 10: try: shutil.copy(f, gt_nifti_folder) success = True except OSError: attempts += 1 sleep(1) self.network.train(current_mode) export_pool.close() export_pool.join()
def join(self): Pool.join(self) for r in self.int_results: # return values were already handled in the callbacks, but asking # for them might raise exceptions which would otherwise be lost self.results.append(r.get())
def parallel_process(func, dataset): pool = Pool(4) result = [pool.apply_async(func, data) for data in dataset] pool.close() pool.join() return [_result.get() for _result in result]
def read_files_batched(filenames, file_batch_size=8192, file_batch_shuffle=False, max_batches=math.inf, return_mode='array', n_jobs=-1, max_batches_in_queue=1000, max_queue_wait_seconds=0.5, pd_kwargs={}): """Read multiple files in parallel.""" def listify_generator(func, *args, **kwargs): listified_generator = list(func(*args, **kwargs)) return (listified_generator) if n_jobs == -1: n_jobs = cpu_count() - 1 n_jobs = min((n_jobs, len(filenames))) # Parallel if n_jobs > 1: # Batch queue, appended in callback batch_queue = deque(maxlen=max_batches_in_queue) def callback(batch): while True: if len(batch_queue) < max_batches_in_queue: batch_queue.append(batch) break else: time.sleep(0.1) # Create processes p = Pool(n_jobs) for filename in filenames: p.apply_async(listify_generator, (read_file_batched, filename), dict(file_batch_size=file_batch_size, file_batch_shuffle=file_batch_shuffle, max_batches=max_batches, return_mode=return_mode, pd_kwargs=pd_kwargs), callback=callback) # Yield from queue keep_trying = True last_non_empty_batch = None while keep_trying: if len(batch_queue) > 0: for batch in batch_queue.popleft(): yield batch last_non_empty_batch = time.clock() if len(batch_queue) == 0: if last_non_empty_batch is not None: if time.clock( ) - last_non_empty_batch >= max_queue_wait_seconds: keep_trying = False p.close() p.join() # Single process else: for filename in filenames: for batch in read_file_batched( filename, file_batch_size=file_batch_size, file_batch_shuffle=file_batch_shuffle, max_batches=max_batches, return_mode=return_mode, pd_kwargs=pd_kwargs): yield batch
def peak__partition(v, s1, s2, find_maxima=True, partition_op=None, multiprocessing_process_num=0): """ partition the volume then detect peaks for each partition note that this will result in redundant peaks!! Clean up must be done afterwards!! """ import aitom.image.vol.partition as IVP if multiprocessing_process_num > 0: pool = Pool(processes=min(multiprocessing_process_num, multiprocessing.cpu_count())) else: pool = None if partition_op is None: # in this case, just generate a single partition siz_max = max(v.shape) partition_op = { 'nonoverlap_width': siz_max * 2, 'overlap_width': siz_max * 2 } b = IVP.gen_bases(v.shape, nonoverlap_width=partition_op['nonoverlap_width'], overlap_width=partition_op['overlap_width']) print('partition num', b.shape) ps = [] if pool is not None: pool_re = [] for i0 in range(b.shape[0]): for i1 in range(b.shape[1]): for i2 in range(b.shape[2]): bp = N.squeeze(b[i0, i1, i2, :, :]) pool_re.append( pool.apply_async( func=peak__partition__single_job, kwds={ 'v': v[bp[0, 0]:bp[0, 1], bp[1, 0]:bp[1, 1], bp[2, 0]:bp[2, 1]], 's1': s1, 's2': s2, 'base': bp, 'find_maxima': find_maxima, 'partition_id': (i0, i1, i2), 'save_vg': (partition_op['save_vg'] if 'save_vg' in partition_op else False) })) for pool_re_t in pool_re: ppsj = pool_re_t.get(9999999) ps.extend(ppsj['ps']) print('\r', ppsj['partition_id'], ' ') sys.stdout.flush() pool.close() pool.join() del pool else: for i0 in range(b.shape[0]): for i1 in range(b.shape[1]): for i2 in range(b.shape[2]): bp = N.squeeze(b[i0, i1, i2, :, :]) ppsj = peak__partition__single_job( v=v[bp[0, 0]:bp[0, 1], bp[1, 0]:bp[1, 1], bp[2, 0]:bp[2, 1]], s1=s1, s2=s2, base=bp, find_maxima=find_maxima, partition_id=(i0, i1, i2), save_vg=(partition_op['save_vg'] if 'save_vg' in partition_op else False)) ps.extend(ppsj['ps']) print('\r', ppsj['partition_id'], ' ') sys.stdout.flush() # order peaks in ps according to values if find_maxima: ps = sorted(ps, key=lambda _: (-_['val'])) else: ps = sorted(ps, key=lambda _: _['val']) return ps
def main(argv): print('python {:s} {:s}'.format(' '.join(sys.argv), str(datetime.now())[:20])) if RUN_TEST_ENH == 1: runscripts(TEST_ENH, SAMPLE_ID, TEST_PATH,\ SPECIES, ANALYZE_AGE, ANALYZE_BREAKS) if ANALYZE_SHUFFLE == 1: # create pool and run simulations in parallel shuffle_id = "shuf-" + (SAMPLE_ID).split("_enhancers")[0] shuffle_path = os.path.join(TEST_PATH, SAMPLE_ID, "shuffle") if os.path.exists(shuffle_path) == False: os.mkdir(shuffle_path) test_enh_formatted = preformatBedfile( TEST_ENH, SAMPLE_ID, TEST_PATH) # format the enhancer bed file and sort pool = Pool(NUM_THREADS) partial_calcExp = partial(calculateExpected,\ test_enh_formatted, SAMPLE_ID,\ shuffle_path, SPECIES) exp_sum_list = pool.map(partial_calcExp, [i for i in range(ITERATIONS)]) pool.close() pool.join() if os.path.exists(shuffle_path) == False: os.mkdir(shuffle_path) if "enh_ages.bed" in TEST_ENH: print("SHUFFLE_ID", shuffle_id) runscripts(TEST_ENH, shuffle_id, TEST_PATH,\ SPECIES, ANALYZE_AGE, ANALYZE_BREAKS) elif "enh_ages.bed" not in TEST_ENH: shuf_fs = glob.glob(f"{shuffle_path}/{shuffle_id}*.bed" ) # get all the shuffle files val = 0 for shuf_f in shuf_fs: # age each shuffle file. iter_id = shuffle_id + "-" + str(val) print("iter_id", iter_id) runscripts(shuf_f, iter_id, shuffle_path,\ SPECIES, ANALYZE_AGE, ANALYZE_BREAKS) val += 1 else: print("sarah, address these problems with shuffle not running") rm_cmd = f"rm {TEST_PATH}/cut-*{SAMPLE_ID}*.bed" os.system(rm_cmd)
def concurrency_run(num: int): pool = Pool(num) for k in range(num): pool.apply_async(func=run) pool.close() pool.join()
""" # author Liu shi hao # date: 2019/12/11 15:43 # file_name: process_pool_test 进程池 """ import os import time from multiprocessing.pool import Pool # 进程应该完成的任务 def task(): for i in range(3): print(os.getpid(), i) time.sleep(0.2) if __name__ == '__main__': pool1 = Pool(3) for i in range(15): # pool1.apply_async(task) # 异步 pool1.apply(task) # 同步 pool1.close() pool1.join() print('finish')
def evaluate_csv_right(self): """ 评估CSV文件 """ # in_file_name = 'test_400_right' # 测试400 # in_file_name = 'test_1000_right' # 测试1000 # in_file_name = 'random_1w_urls' # 测试1w # in_file = os.path.join(DATA_DIR, 'test_urls_files', in_file_name + ".csv") # in_file_name = "sanghu.zj_question_cut_sampled_jueying_url_5k_1229" # 整页影印 # in_file_name = "dump_write_pure.out" # 纯手写 # in_file_name = "7_train_ori.out" # 整页query # in_file_name = "HW_TRAIN.out" # in_file_name = "biaozhu_fix.check" # in_file_name = "biaozhu_csv_out" # in_file_name = "random_1w_urls" # 普通query # in_file_name = "zjw_url" # 小图 # in_file_name = "xiaotu_labeled_25w_165512" # 小图 in_file_name = "zjw_imgs_20210427_urls" # 小图 in_file = os.path.join(DATA_DIR, 'page_dataset_files', in_file_name + ".txt") # 输入文件 print('[Info] in_file: {}'.format(in_file)) data_lines = read_file(in_file) print('[Info] 样本总量: {}'.format(len(data_lines))) if len(data_lines) == 0: print('[Info] 文件路径错误: {}'.format(in_file)) return # 测试文件 n = 10000 if len(data_lines) > n: random.seed(47) # random.seed(89) random.shuffle(data_lines) # 随机生成 data_lines = data_lines[:n] print('[Info] 样本数量: {}'.format(len(data_lines))) # 测试文件 time_str = get_current_time_str() out_name = 'check_{}.{}.csv'.format(in_file_name, time_str) out_dir = os.path.join(DATA_DIR, "check_dir_20210329") mkdir_if_not_exist(out_dir) out_file = os.path.join(out_dir, out_name) # 筛选文件 # out_dir = os.path.join(DATA_DIR, "xiaotu_dir") # in_file_name = '{}_good.txt'.format(in_file_name) # mkdir_if_not_exist(out_dir) # out_file = os.path.join(out_dir, in_file_name) # write_dir = os.path.join(out_dir, 'write_dir_{}'.format(time_str)) # mkdir_if_not_exist(write_dir) write_dir = None pool = Pool(processes=100) for idx, data_line in enumerate(data_lines): # 方案1 # if idx == 0: # continue # url, r_angle = data_line.split(',') # 方案2 url, r_angle = data_line, 0 # name = url.split('/')[-1].split('.')[0] # file_name_x = in_file_name.split('.')[0] # url = "https://sm-transfer.oss-cn-hangzhou.aliyuncs.com/zhengsheng.wcl/problems_rotation/" \ # "datasets/{}_x/{}.jpg".format(file_name_x, name) try: pool.apply_async(OnlineEvaluation.process_thread_right, (idx, url, r_angle, out_file, write_dir)) # OnlineEvaluation.process_thread_right(idx, url, r_angle, out_file, write_dir) # 筛选图像 # pool.apply_async(OnlineEvaluation.process_save_img_url, (idx, url, r_angle, out_file, write_dir)) # OnlineEvaluation.process_save_img_url(idx, url, r_angle, out_file, write_dir) except Exception as e: print('[Info] Error URL: {}'.format(url)) continue # print('[Info] URL: {}'.format(url)) # OnlineEvaluation.process_thread_right(idx, url, r_angle, out_file, write_dir) pool.close() pool.join() print('[Info] 写入文件: {}'.format(out_file))
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy, _Resultable): _Processors_Pool: Pool = None _Processors_List: List[Union[ApplyResult, AsyncResult]] = None def __init__(self, pool_size: int): super().__init__(pool_size=pool_size) def initialization(self, queue_tasks: Optional[Union[_BaseQueueTask, _BaseList]] = None, features: Optional[Union[_BaseFeatureAdapterFactory, _BaseList]] = None, *args, **kwargs) -> None: super(ProcessPoolStrategy, self).initialization(queue_tasks=queue_tasks, features=features, *args, **kwargs) # Activate multiprocessing.managers.BaseManager server activate_manager_server() # Initialize and build the Processes Pool. __pool_initializer: Callable = kwargs.get("pool_initializer", None) __pool_initargs: IterableType = kwargs.get("pool_initargs", None) self._Processors_Pool = Pool(processes=self.pool_size, initializer=__pool_initializer, initargs=__pool_initargs) def apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}) -> None: self.reset_result() __process_running_result = None self._Processors_List = [ self._Processors_Pool.apply(func=function, args=args, kwds=kwargs) for _ in range(tasks_size) ] for _p in self._Processors_List: try: __process_running_result = _p __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=__exception) def async_apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() self._Processors_List = [ self._Processors_Pool.apply_async(func=function, args=args, kwds=kwargs, callback=callback, error_callback=error_callback) for _ in range(tasks_size) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def apply_with_iter(self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None) -> None: self.reset_result() __process_running_result = None if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] self._Processors_List = [ self._Processors_Pool.apply(func=_func, args=_args, kwds=_kwargs) for _func, _args, _kwargs in zip(functions_iter, args_iter, kwargs_iter) ] for prcoess in self._Processors_List: try: __process_running_result = prcoess __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=__exception) def async_apply_with_iter( self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None, callback_iter: List[Callable] = None, error_callback_iter: List[Callable] = None) -> None: self.reset_result() if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] if callback_iter is None: callback_iter = [None for _ in functions_iter] if error_callback_iter is None: error_callback_iter = [None for _ in functions_iter] self._Processors_List = [ self._Processors_Pool.apply_async(func=_func, args=_args, kwds=_kwargs, callback=_callback, error_callback=_error_callback) for _func, _args, _kwargs, _callback, _error_callback in zip( functions_iter, args_iter, kwargs_iter, callback_iter, error_callback_iter) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.map( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _process_running_result = None _exception = None _map_result = self._Processors_Pool.map_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) try: _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.starmap( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _map_result = self._Processors_Pool.starmap_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=None) def imap(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def imap_unordered(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap_unordered( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def _result_saving(self, successful: bool, result: List, exception: Exception) -> None: _process_result = { "successful": successful, "result": result, "exception": exception } self._Processors_Running_Result.append(_process_result) def close(self) -> None: self._Processors_Pool.close() self._Processors_Pool.join() def terminal(self) -> None: self._Processors_Pool.terminate() def get_result(self) -> List[_ProcessPoolResult]: return self.result() def _saving_process(self) -> List[_ProcessPoolResult]: _pool_results = [] for __result in self._Processors_Running_Result: _pool_result = _ProcessPoolResult() _pool_result.is_successful = __result["successful"] _pool_result.data = __result["result"] _pool_result.exception = __result["exception"] _pool_results.append(_pool_result) return _pool_results
def evaluate_regions(folder_predicted: str, folder_gt: str, regions: dict, processes=default_num_threads): region_names = list(regions.keys()) files_in_pred = subfiles(folder_predicted, suffix='.nii.gz', join=False) files_in_gt = subfiles(folder_gt, suffix='.nii.gz', join=False) have_no_gt = [i for i in files_in_pred if i not in files_in_gt] assert len( have_no_gt ) == 0, "Some files in folder_predicted have not ground truth in folder_gt" have_no_pred = [i for i in files_in_gt if i not in files_in_pred] if len(have_no_pred) > 0: print( "WARNING! Some files in folder_gt were not predicted (not present in folder_predicted)!" ) files_in_gt.sort() files_in_pred.sort() # run for all cases full_filenames_gt = [folder_gt + "/" + i for i in files_in_pred] full_filenames_pred = [folder_predicted + "/" + i for i in files_in_pred] p = Pool(processes) res = p.starmap( evaluate_case, zip(full_filenames_pred, full_filenames_gt, [list(regions.values())] * len(files_in_gt))) p.close() p.join() all_results = {r: [] for r in region_names} with open(folder_predicted + "/" + 'summary.csv', 'w') as f: f.write("casename") for r in region_names: f.write(",%s" % r) f.write("\n") for i in range(len(files_in_pred)): f.write(files_in_pred[i][:-7]) result_here = res[i] for k, r in enumerate(region_names): dc = result_here[k] f.write(",%02.4f" % dc) all_results[r].append(dc) f.write("\n") f.write('mean') for r in region_names: f.write(",%02.4f" % np.nanmean(all_results[r])) f.write("\n") f.write('median') for r in region_names: f.write(",%02.4f" % np.nanmedian(all_results[r])) f.write("\n") f.write('mean (nan is 1)') for r in region_names: tmp = np.array(all_results[r]) tmp[np.isnan(tmp)] = 1 f.write(",%02.4f" % np.mean(tmp)) f.write("\n") f.write('median (nan is 1)') for r in region_names: tmp = np.array(all_results[r]) tmp[np.isnan(tmp)] = 1 f.write(",%02.4f" % np.median(tmp)) f.write("\n")
def fit(self, X, y, mask, subset=None, num_workers=1, queue_len=2, chunk_size=10000): """Fit the model according to the given training data. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target vector relative to X mask : array-like, shape = [n_samples] Control group mask vector relative to X, where True (1) - control, False (0) - test sample_weight : array-like, shape = [n_samples], optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. Returns ------- self : object """ try: self.X_shape = X.shape assert (len(self.X_shape) == 2) self.num_samples = self.X_shape[0] self.num_features = self.X_shape[1] if self.verbose == 1: print('parenclitic_graphs') sys.stdout.flush() if subset is None: self.partition.fit(self.num_features) else: self.partition.fit(subset) if not self.pair_filter is None: self.pair_filter.fit(X, mask, self.partition) self.pairs = self.pair_filter else: self.pairs = self.partition global num_done, num_pairs num_done = 0 num_pairs = len(self.pairs) each_progress = int(np.sqrt(num_pairs + 0.5)) if self.progress_bar: from tqdm import tqdm progress_bar = tqdm(total=num_pairs) #global fit #globals()['X'] = X #globals()['y'] = y #globals()['mask'] = mask fit = self.kernel.fit M, D, E = [], [], [] need_parallel = num_workers > 1 my_parallel_calc = parallel_calc(self.verbose) if need_parallel: global done_tasks, ready pool = Pool(num_workers, initializer=my_parallel_calc.init, initargs=(X, y, mask, self.kernel)) done_tasks = 0 ready = Semaphore(num_workers * queue_len) else: my_parallel_calc.init(X, y, mask, self.kernel) def upd_graph(res): global num_done, done_tasks, ready if self.verbose == 1: print('upd_graphs') sys.stdout.flush() if not type(res) is int: for cur in res: if not cur is None: m, d, i, j = cur #m, d = res.get_edges() if m.any(): M.append(m) D.append(d) E.append([i, j]) res = len(res) if need_parallel: done_tasks += 1 ready.release() if self.progress_bar: progress_bar.set_description('Number of edges: %i' % len(M), refresh=False) progress_bar.update(res) num_done += 1 if num_done % each_progress == 0 or num_done == num_pairs: stop = timeit.default_timer() if self.verbose == 1: print('Graph for', num_done, 'pairs calculated in', stop - start) sys.stdout.flush() if self.verbose == 1: print('start iterate') sys.stdout.flush() start = timeit.default_timer() num_tasks = 0 for ids in chunked_iterable(self.pairs, chunk_size): #if not self.pair_filter is None: # if self.pair_filter.is_filtered(i, j): # continue num_tasks += 1 if need_parallel: ''' if self.verbose == 1: print('acquire') sys.stdout.flush() ''' ready.acquire() ''' if self.verbose == 1: print('apply_async') sys.stdout.flush() ''' #pool.apply_async(self.kernel.fit, args = (X[:, i], X[:, j], y, mask), callback = upd_graph) #pool.apply_async(my_parallel_calc.calc_links, args = (i, j), callback = upd_graph) # pool.apply_async(my_parallel_calc.calc_batch, args=(np.array(ids), ), callback=upd_graph) # pass else: if self.verbose == 1: print('calc batch') sys.stdout.flush() #upd_graph(self.kernel.fit(X[:, i], X[:, j], y, mask)) #upd_graph(len(ids)) #upd_graph(my_parallel_calc.calc_links(i, j)) upd_graph(my_parallel_calc.calc_batch(np.array(ids))) pass if need_parallel: while done_tasks < num_tasks: ready.acquire() pool.close() pool.join() if self.verbose == 1: print('ready done') sys.stdout.flush() if self.progress_bar: progress_bar.close() if M == []: self.M = np.zeros((self.num_samples, 0), dtype=np.bool) self.D = np.zeros((self.num_samples, 0), dtype=np.float32) self.E = np.zeros((0, 2), dtype=np.float32) else: self.M = np.array(M).T self.D = np.array(D).T self.E = np.array(E) self.is_fitted = True except: if self.progress_bar: progress_bar.close() raise return self
except: print('Failed to save image') #定义主函数,加入偏移数offset,使用线程池下载 from multiprocessing.pool import Pool def main(offset): json=get_page_jrtt(offset) for item in get_images_jrtt(json): save_image_jrtt(item) #起始,结束页 start=0 end=20 if __name__=='__main__': print('开始下载图片,请稍等..') pool=Pool() groups=([x*20 for x in range(start,end)]) pool.map(main,groups) #创建线程池,参数为函数名和传入参数 pool.close() pool.join() #调用join之前,先调用close函数,否则会出错。执行完close后不会有新的进程加入到pool,join函数等待所有子进程结束 print('图片下载完成.')
local_image_url = item.get('image') new_image_url = local_image_url.replace('list', 'large') r = requests.get('http:' + new_image_url) if r.status_code == 200: file_path = img_path + os.path.sep + '{0}.{1}'.format( md5(r.content).hexdigest(), 'jpg') if not os.path.exists(file_path): with open(file_path, 'wb') as f: f.write(r.content) def saveToMongo(item): if db[MONGO_TABLE].insert(item): print('储存到MONGODB成功', item) return False def main(offset): json = getPage(offset) for item in getImage(json): saveImage(item) saveToMongo(item) if __name__ == '__main__': pool = Pool() groups = [x * 20 for x in range(2)] #爬取五页 pool.map(main, groups) pool.close() #关闭进程池(pool),使其不在接受新的任务 pool.join() #主进程阻塞等待子进程的退出
def _execute_sub_tasks(task_id, params, sig_content, verbosity, runmode, sigmode, monitor_interval, resource_monitor_interval): '''If this is a master task, execute as individual tasks''' m = ProcessMonitor( task_id, monitor_interval=monitor_interval, resource_monitor_interval=resource_monitor_interval, max_walltime=params.sos_dict['_runtime'].get('max_walltime', None), max_mem=params.sos_dict['_runtime'].get('max_mem', None), max_procs=params.sos_dict['_runtime'].get('max_procs', None), sos_dict=params.sos_dict) m.start() env.logger.info(f'{task_id} ``started``') master_out = os.path.join(os.path.expanduser('~'), '.sos', 'tasks', task_id + '.out') master_err = os.path.join(os.path.expanduser('~'), '.sos', 'tasks', task_id + '.err') # if this is a master task, calling each sub task with open(master_out, 'wb') as out, open(master_err, 'wb') as err: def copy_out_and_err(result): tid = result['task'] out.write( f'{tid}: {"completed" if result["ret_code"] == 0 else "failed"}\n' .encode()) if 'output' in result: out.write(f'output: {result["output"]}\n'.encode()) sub_out = os.path.join(os.path.expanduser('~'), '.sos', 'tasks', tid + '.out') if os.path.isfile(sub_out): with open(sub_out, 'rb') as sout: out.write(sout.read()) try: os.remove(sub_out) except Exception as e: env.logger.warning(f'Failed to remove {sub_out}: {e}') sub_err = os.path.join(os.path.expanduser('~'), '.sos', 'tasks', tid + '.err') if 'exception' in result: err.write(str(result['exception']).encode()) err.write( f'{tid}: {"completed" if result["ret_code"] == 0 else "failed"}\n' .encode()) if os.path.isfile(sub_err): with open(sub_err, 'rb') as serr: err.write(serr.read()) try: os.remove(sub_err) except Exception as e: env.logger.warning(f'Failed to remove {sub_err}: {e}') # remove other files as well try: remove_task_files(tid, ['.out', '.err']) except Exception as e: env.logger.debug(f'Failed to remove files {tid}: {e}') if params.num_workers > 1: from multiprocessing.pool import Pool p = Pool(params.num_workers) results = [] for t in params.task_stack: results.append( p.apply_async(_execute_task, ((*t, { t[0]: sig_content.get(t[0], {}) }), verbosity, runmode, sigmode, None, None), callback=copy_out_and_err)) for idx, r in enumerate(results): results[idx] = r.get() p.close() p.join() # we wait for all results to be ready to return or raise # but we only raise exception for one of the subtasks # for res in results: # if 'exception' in res: # failed = [x.get("task", "") # for x in results if "exception" in x] # env.logger.error( # f'{task_id} ``failed`` due to failure of subtask{"s" if len(failed) > 1 else ""} {", ".join(failed)}') # return {'ret_code': 1, 'exception': res['exception'], 'task': task_id} else: results = [] for tid, tdef in params.task_stack: # no monitor process for subtasks res = _execute_task((tid, tdef, { tid: sig_content.get(tid, {}) }), verbosity=verbosity, runmode=runmode, sigmode=sigmode, monitor_interval=None, resource_monitor_interval=None) try: copy_out_and_err(res) except Exception as e: env.logger.warning( f'Failed to copy result of subtask {tid}: {e}') results.append(res) # for res in results: # if 'exception' in res: # failed = [x.get("task", "") # for x in results if "exception" in x] # env.logger.error( # f'{task_id} ``failed`` due to failure of subtask{"s" if len(failed) > 1 else ""} {", ".join(failed)}') # return {'ret_code': 1, 'exception': res['exception'], 'task': task_id} # # now we collect result all_res = { 'ret_code': 0, 'output': None, 'subtasks': {}, 'shared': {}, 'skipped': 0, 'signature': {} } for tid, x in zip(params.task_stack, results): all_res['subtasks'][tid[0]] = x if 'exception' in x: all_res['exception'] = x['exception'] all_res['ret_code'] += 1 continue all_res['ret_code'] += x['ret_code'] if all_res['output'] is None: all_res['output'] = x['output'] else: try: all_res['output'].extend(x['output'], keep_groups=True) except Exception as e: env.logger.warning( f"Failed to extend output {all_res['output']} with {x['output']}" ) all_res['shared'].update(x['shared']) # does not care if one or all subtasks are executed or skipped. all_res['skipped'] += x.get('skipped', 0) if 'signature' in x: all_res['signature'].update(x['signature']) if all_res['ret_code'] != 0: if all_res['ret_code'] == len(results): env.logger.info( f'All {len(results)} tasks in {task_id} ``failed``') else: env.logger.info( f'{all_res["ret_code"]} of {len(results)} tasks in {task_id} ``failed``' ) # if some failed, some skipped, not skipped if 'skipped' in all_res: all_res.pop('skipped') elif all_res['skipped']: if all_res['skipped'] == len(results): env.logger.info( f'All {len(results)} tasks in {task_id} ``ignored`` or skipped' ) else: # if only partial skip, we still save signature and result etc env.logger.info( f'{all_res["skipped"]} of {len(results)} tasks in {task_id} ``ignored`` or skipped' ) all_res.pop('skipped') else: env.logger.info(f'All {len(results)} tasks in {task_id} ``completed``') return all_res
def authorate(arguments): """Main function which delegates to fabric tasks.""" global engine engine = create_engine('sqlite:///' + arguments['--db']) create_db(engine) global VERBOSE VERBOSE = arguments['--verbose'] multi_thread = not arguments['--one'] if arguments['-C']: classify.classifiers_dir = arguments['-C'] # Assume successful return value ret = 0 if arguments['load']: # Load in words and word counts from file session = get_session(engine) if len(session.query(Word_Count).all()) == 0: subprocess.call('sqlite3 ' + arguments['--db'] + ' < import_words.sql', shell=True) prefix = arguments['--prefix'] if os.path.exists(prefix): # Determine how many snippets to get per path. snippets_count = arguments['<snippets-per-path>'] if not snippets_count: snippets_count = DEFAULT_SNIPPETS_COUNT pool = Pool(cpu_count() if multi_thread else 1) with open(arguments['<paths-file>'], 'r') as paths_file: paths = paths_file.readlines() for path in paths: res = load_path(pool, path.rstrip(), prefix=prefix, multi_thread=multi_thread) if not res: ret = 3 # Join the pool pool.close() pool.join() else: display_error( "The given prefix does not exist: {path}".format(path=prefix)) ret = 2 elif arguments['process']: # Cleanup the classifier dir classify.clean_classifier_dir() # Get and scale data from snippets session = get_session(engine) snippets = session.query(Book, Snippet).join(Snippet).all() data = [text_to_vector(snip.text, session) for _, snip in snippets] scaler = classify.create_and_save_scaler(data) scaled_data = scaler.transform(data) targets = [book.path_id for book, _ in snippets] # Train the classifiers for (Cls, kwargs) in classify.classifier_types: with warnings.catch_warnings(): warnings.simplefilter("ignore") classifier = Cls(**kwargs) classifier.fit(scaled_data, targets) classify.save_classifier(classifier) elif arguments['classify']: snip_file = arguments['<snippet-file>'] input_files = [snip_file if snip_file else '-'] classify.classify_all( engine, " ".join([ unicode(line.rstrip(), errors='ignore') for line in fileinput.input(input_files) ])) elif arguments['test']: session = get_session(engine) snippets = session.query(Book, Snippet).join(Snippet).all() if VERBOSE: print("Converting raw data to vectors. . .") data = [text_to_vector(snip.text, session) for _, snip in snippets] targets = [book.path_id for book, _ in snippets] classify.test_all(engine, data, targets) else: display_error("No subcommand given.") ret = 1 return ret
def pos_type_classify(bamfile, chrom, start, end, is_single, read_length, temp_dir, extension=None, center=True, maxsize=None, process=20, minmapq=0, is_multmapfilter=False): print bamfile, chrom, start, end, is_single, read_length, temp_dir, extension, center if is_single: total_reads_type6_left = [ ] # 6. in left place of del and second read is on the breakpoint total_reads_type6_right = [ ] # 6. in right place of del and first read is on the breakpoint total_reads_type7 = [] # 7. reads within the del # temp_prefix = "%s/classify_%s" % (temp_dir, sub_num) if extension: rel_start = start - extension rel_end = end + extension else: rel_start = start rel_end = end if center: reads_type6_left, reads_type6_right, reads_type7, filtered_reads_num = posType_sub_single( bamfile, chrom, rel_start, rel_end, start, end, minmapq, is_multmapfilter) else: rel_start_left = rel_start rel_end_left = start + maxsize rel_start_right = end - maxsize rel_end_right = rel_end reads_type6_left_1, reads_type6_right_1, reads_type7_1, filtered_reads_num_1 = posType_sub_single( bamfile, chrom, rel_start_left, rel_end_left, start, end, minmapq, is_multmapfilter) reads_type6_left_2, reads_type6_right_2, reads_type7_2, filtered_reads_num_2 = posType_sub_single( bamfile, chrom, rel_start_right, rel_end_right, start, end, minmapq, is_multmapfilter) reads_type6_left = reads_type6_left_1 + reads_type6_right_1 reads_type6_right = reads_type6_right_1 + reads_type6_right_2 reads_type7 = reads_type7_1 + reads_type7_2 filtered_reads_num = filtered_reads_num_1 + filtered_reads_num_2 total_reads_type6_left.extend(reads_type6_left) total_reads_type6_right.extend(reads_type6_right) total_reads_type7.extend(reads_type7) total_filtered_reads = filtered_reads_num print total_reads_type6_left, total_reads_type6_right, total_reads_type7, total_filtered_reads return total_reads_type6_left, total_reads_type6_right, total_reads_type7, total_filtered_reads else: total_reads_type1_left = [ ] # 1. in left place of del and second read is on the breakpoint total_reads_type1_right = [ ] # 1. in right place of del and first read is on the breakpoint total_reads_type2_left = [ ] # 2. in left place of del and first read is on the breakpoint total_reads_type2_right = [ ] # 2. in right place of del and second read is on the breakpoint total_reads_type3_left = [ ] # 3. in left place of del and first read and right read is crossover breakpoint with no intersection total_reads_type3_right = [ ] # 3. in right place of del and first read and right read is crossover breakpoint with no intersection total_reads_type4 = [] # 4. reads within the del total_reads_type5_left = [ ] # 5. in left place of del and first read and right read are all has intersection total_reads_type5_right = [ ] # 3. in right place of del and first read and right read are all has intersection total_filtered_reads = 0 length = end - start + 1 sub_num = length / read_length # when start = end, translocation of chromosome if start == end: rel_start = start - maxsize rel_end = end + maxsize print rel_start, rel_end # temp_prefix = "%s/classify_%s" % (temp_dir, "whole") (reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_reads_num) \ = posType_sub_paired(bamfile, chrom, rel_start, rel_end, start, end, read_length, minmapq, is_multmapfilter, extension=extension) total_reads_type1_left.extend(reads_type1_left) total_reads_type1_right.extend(reads_type1_right) total_reads_type2_left.extend(reads_type2_left) total_reads_type2_right.extend(reads_type2_right) total_reads_type3_left.extend(reads_type3_left) total_reads_type3_right.extend(reads_type3_right) total_reads_type4.extend(reads_type4) total_reads_type5_left.extend(reads_type5_left) total_reads_type5_right.extend(reads_type5_right) total_filtered_reads = filtered_reads_num # end - start < read_length and there is no need to extend its scope elif sub_num == 0 and not extension: # temp_prefix = "%s/classify_%s" % (temp_dir, sub_num) (reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_reads_num) \ = posType_sub_paired(bamfile, chrom, start, end, start, end, read_length, minmapq, is_multmapfilter, extension=extension) total_reads_type1_left.extend(reads_type1_left) total_reads_type1_right.extend(reads_type1_right) total_reads_type2_left.extend(reads_type2_left) total_reads_type2_right.extend(reads_type2_right) total_reads_type3_left.extend(reads_type3_left) total_reads_type3_right.extend(reads_type3_right) total_reads_type4.extend(reads_type4) total_reads_type5_left.extend(reads_type5_left) total_reads_type5_right.extend(reads_type5_right) total_filtered_reads = filtered_reads_num # there should be more than one process to calculate. else: run_pool = Pool(process) result_list = [] # extension the range to cover whole reads if extension: rel_start = start - extension rel_end = end + extension length = rel_end - rel_start + 1 sub_num = length / read_length else: rel_start = start rel_end = end # if center should be consider or center is no need to consider, but the center size is too less if center or (not center and maxsize is not None and length < maxsize * 2): for i in range(sub_num): sub_start = i * read_length + rel_start if i == sub_num - 1: sub_end = rel_end else: sub_end = sub_start + 1 print "Sub Process: %s" % i, sub_start, sub_end result_list.append( run_pool.apply_async( posType_sub_paired, args=(bamfile, chrom, sub_start, sub_end, start, end, read_length, minmapq, is_multmapfilter, extension))) run_pool.close() run_pool.join() # if center is no need to consider else: rel_start_left = rel_start rel_end_left = start + maxsize rel_start_right = end - maxsize rel_end_right = rel_end # print rel_start_left, rel_end_left, rel_start_right, rel_end_right length = rel_end_left - rel_start_left + 1 sub_num = length / read_length for i in range(sub_num): sub_start = i * read_length + rel_start_left if i == sub_num - 1: sub_end = rel_end_left else: sub_end = sub_start + 1 print "Sub Process: %s" % i, sub_start, sub_end # temp_prefix = "%s/classify_%s" % (temp_dir, i) result_list.append( run_pool.apply_async( posType_sub_paired, args=(bamfile, chrom, sub_start, sub_end, start, end, read_length, minmapq, is_multmapfilter, extension))) length = rel_end_right - rel_start_right + 1 sub_num = length / read_length for i in range(sub_num): sub_start = i * read_length + rel_start_right if i == sub_num - 1: sub_end = rel_end_right else: sub_end = sub_start + 1 print "Sub Process: %s" % i, sub_start, sub_end # temp_prefix = "%s/classify_%s" % (temp_dir, i) result_list.append( run_pool.apply_async( posType_sub_paired, args=(bamfile, chrom, sub_start, sub_end, start, end, read_length, minmapq, is_multmapfilter, extension))) run_pool.close() run_pool.join() for res in result_list: reads_type1_left, reads_type1_right, reads_type2_left, reads_type2_right, reads_type3_left, reads_type3_right, reads_type4, reads_type5_left, reads_type5_right, filtered_reads_num = res.get( ) total_reads_type1_left.extend(reads_type1_left) total_reads_type1_right.extend(reads_type1_right) total_reads_type2_left.extend(reads_type2_left) total_reads_type2_right.extend(reads_type2_right) total_reads_type3_left.extend(reads_type3_left) total_reads_type3_right.extend(reads_type3_right) total_reads_type4.extend(reads_type4) total_reads_type5_left.extend(reads_type5_left) total_reads_type5_right.extend(reads_type5_right) total_filtered_reads += filtered_reads_num print "type1_left: %s; type1_right: %s, type2_left: %s; type2_right: %s, type3_left: %s; " \ "type3_right: %s, type4: %s; type5_left: %s; type5_right: %s" % ( len(total_reads_type1_left), len(total_reads_type1_right), len(total_reads_type2_left), len(total_reads_type2_right), len(total_reads_type3_left), len(total_reads_type3_right), len(total_reads_type4), len(total_reads_type5_left), len(total_reads_type5_right)) print "total_filtered_reads: %s" % total_filtered_reads return total_reads_type1_left, total_reads_type1_right, total_reads_type2_left, total_reads_type2_right, total_reads_type3_left, total_reads_type3_right, total_reads_type4, total_reads_type5_left, total_reads_type5_right, total_filtered_reads
def main(args): """Do it all.""" if not os.path.isdir(args.logs): raise Fail("Logs location '%s' is not a directory." % args.logs) builds = gather_builds(args) if args.verbose: print("Lined up %d builds." % len(builds)) # The "configure" step is single-threaded. We can run many at the same # time, even when we're also running a "build" step at the same time. # This means we may run a lot more processes than we have CPUs, but there's # no law against that. There's also I/O time to be covered. configure_pool = Pool() # Builds which have failed the "configure" stage, with their errors. This # queue must never stall, so that we can let results pile up here while the # work continues. configure_fails = Queue(len(builds)) # Waiting list for the "build" stage. It contains Build objects, # terminated by a final None to signify that there are no more builds to be # done. build_queue = JoinableQueue(10) # Builds that have failed the "build" stage. build_fails = Queue(len(builds)) # Waiting list for the "test" stage. It contains Build objects, terminated # by a final None. test_queue = JoinableQueue(10) # The "build" step tries to utilise all CPUs, and it may use a fair bit of # memory. Run only one of these at a time, in a single worker process. build_worker = Process( target=service_builds, args=(build_queue, build_fails, test_queue)) build_worker.start() # Builds that have failed the "test" stage. test_fails = Queue(len(builds)) # Completed builds. This must never stall. done_queue = JoinableQueue(len(builds)) # The "test" step can not run concurrently (yet). So, run tests serially # in a single worker process. It takes its jobs directly from the "build" # worker. test_worker = Process( target=service_tests, args=(test_queue, test_fails, done_queue)) test_worker.start() # Feed all builds into the "configure" pool. Each build which passes this # stage goes into the "build" queue. for build in builds: configure_pool.apply_async( build.do_configure, callback=partial(enqueue, build_queue, build), error_callback=partial(enqueue_error, configure_fails, build)) if args.verbose: print("All jobs are underway.") configure_pool.close() configure_pool.join() # TODO: Async reporting for faster feedback. configure_fail_count = report_failures(configure_fails, "CONFIGURE FAIL") if args.verbose: print("Configure stage done.") # Mark the end of the build queue for the build worker. build_queue.put(None) build_worker.join() # TODO: Async reporting for faster feedback. build_fail_count = report_failures(build_fails, "BUILD FAIL") if args.verbose: print("Build step done.") # Mark the end of the test queue for the test worker. test_queue.put(None) test_worker.join() # TODO: Async reporting for faster feedback. test_fail_count = report_failures(test_fails, "TEST FAIL") if args.verbose: print("Test step done.") # All done. Clean up. for build in builds: build.clean_up() ok_count = count_entries(done_queue) if ok_count == len(builds): print("All tests OK.") else: print( "Failures during configure: %d - build: %d - test: %d. OK: %d." % ( configure_fail_count, build_fail_count, test_fail_count, ok_count, ))
def aggregate_scores(test_ref_pairs, evaluator=NiftiEvaluator, labels=None, nanmean=True, json_output_file=None, json_name="", json_description="", json_author="Fabian", json_task="", num_threads=2, **metric_kwargs): """ test = predicted image :param test_ref_pairs: :param evaluator: :param labels: must be a dict of int-> str or a list of int :param nanmean: :param json_output_file: :param json_name: :param json_description: :param json_author: :param json_task: :param metric_kwargs: :return: """ if type(evaluator) == type: evaluator = evaluator() if labels is not None: evaluator.set_labels(labels) all_scores = OrderedDict() all_scores["all"] = [] all_scores["mean"] = OrderedDict() test = [i[0] for i in test_ref_pairs] ref = [i[1] for i in test_ref_pairs] p = Pool(num_threads) all_res = p.map( run_evaluation, zip(test, ref, [evaluator] * len(ref), [metric_kwargs] * len(ref))) p.close() p.join() for i in range(len(all_res)): all_scores["all"].append(all_res[i]) # append score list for mean for label, score_dict in all_res[i].items(): if label in ("test", "reference"): continue if label not in all_scores["mean"]: all_scores["mean"][label] = OrderedDict() for score, value in score_dict.items(): if score not in all_scores["mean"][label]: all_scores["mean"][label][score] = [] all_scores["mean"][label][score].append(value) for label in all_scores["mean"]: for score in all_scores["mean"][label]: if nanmean: all_scores["mean"][label][score] = float( np.nanmean(all_scores["mean"][label][score])) else: all_scores["mean"][label][score] = float( np.mean(all_scores["mean"][label][score])) # save to file if desired # we create a hopefully unique id by hashing the entire output dictionary if json_output_file is not None: json_dict = OrderedDict() json_dict["name"] = json_name json_dict["description"] = json_description timestamp = datetime.today() json_dict["timestamp"] = str(timestamp) json_dict["task"] = json_task json_dict["author"] = json_author json_dict["results"] = all_scores json_dict["id"] = hashlib.md5( json.dumps(json_dict).encode("utf-8")).hexdigest()[:12] save_json(json_dict, json_output_file) return all_scores
def test(self, model, data_dir, fnames, D_config, use_logits=False): """ model: LSTM/LSTM_CNN/BiLSTM class, model for testing, should be loaded before pass in data_dir: .npz file dir fnames: list of file names for training/testing D_config: data loader config """ self.D_config = D_config self.D_config["free_mem"] = True self.use_logits = use_logits # reset for testing self.all_y = [] self.all_pred = [] target_files = self.data_dir2target_files(data_dir, fnames) if target_files is None: return None file_idx = 0 # indicating that all file started loading all_done = False pool = Pool(self.num_threads) print("[INFO] Notice that we are using multiprocessing to load files, so that child processes won't print out on ipython-notebook, which only monitor the parent process. Please check the terminal for more logging info.") self.cur_file_in_mem = 0 while not all_done: one_file = target_files[file_idx] if self.cur_file_in_mem < self.num_file_in_mem and not all_done: # start a new process for loading test data pool.apply_async(self._newthread_helper, args=(one_file,), \ callback=self._callback_helper, error_callback=self._error_helper) self.cur_file_in_mem += 1 file_idx = file_idx + 1 if file_idx == len(target_files): # all file started loading all_done = True else: time.sleep(0.001) if len(self.DataPool) > 0: self._batch_test_helper(model) else: time.sleep(0.001) pool.close() # wait all child process done, i.e. put all Heater_Data into self.DataPool pool.join() for _ in range(len(self.DataPool)): self._batch_test_helper(model) # calc overall accuracy and AUC self.all_y_onehot = np.concatenate(self.all_y) self.all_y = np.argmax(self.all_y_onehot, axis=1) self.all_pred = np.concatenate(self.all_pred) pred_y = np.argmax(self.all_pred, axis=1) m_auc = roc_auc_score(self.all_y_onehot, self.all_pred) print("overall acc: %.4f, overall AUC: %.4f" % (np.mean(pred_y==self.all_y), m_auc)) # reset self.DataPool for future training/testing self.DataPool.clear() return m_auc