def main(): proc = Popen(['docker', 'run', '-i', 'pipe_api'], stdin=PIPE, stdout=PIPE) stdout = proc.stdout stdin = proc.stdin with PPE(max_workers=16) as exe: for r in exe.map(driver, [(writer, stdin.fileno()), (reader, stdout.fileno())]): r stdin.close()
def run_experiment(config_file): with open(config_file) as f: config = yaml.load(f) pool = PPE() fs = [] results = [] for dim in config['dimensions']: for run in range(config['num_runs']): for fevals in config['fevals_per_dim']: for alg in config['algs']: for variant in config[alg]: setup = { 'dim': dim, 'fevals': min(80, fevals * dim), 'run': run + config['runs_offset'], 'variant': variant['name'], 'alg': alg, 'kwargs': variant['kwargs'], 'result_path': config['result_path'] } if config['name'] == 'saddle': fs.append(pool.submit(run_saddle_unit_exp, setup)) elif config['name'] == 'mop': fs.append(pool.submit(run_mop_unit_exp, setup)) elif config['name'] == 'adhd': if run == 0: try: bkp_file = os.path.join(os.path.dirname(__file__), config['result_path'].replace('.json', '_bkp.json')) os.remove(bkp_file) except FileNotFoundError: pass setup['n_init'] = config['n_init'] setup['start_seed'] = config['start_seed'] setup['result_path_bkp'] = bkp_file fs.append(pool.submit(run_adhd_unit_exp, setup)) else: raise Exception('unknown experiment') results = [] for x in as_completed(fs): try: results.append(x.result()) except Exception as e: print(e) print(results) with open(os.path.join(os.path.dirname(__file__), config['result_path']), 'w') as f: json.dump(results, f, sort_keys=True, indent=4)
def build(self): ''' Trigering the build function solves all models in order to find the best model, by score, then returns it as a result. Also generates self.all_models and self.best to store the information. ''' with PPE() as exe: ppe = [exe.submit(self._solve, lag) for lag in range(2, self.lags)] for proc in as_completed(ppe): key, spec = proc.result() self.all_models[key] = spec del ppe self.best = self._check_all_models() return self.best
async def main(*peers): ts = [] loop = aio.get_running_loop() bc = Blockchain.genesis(1) heads = {h(bytes(bc.blocks[0])): bc} if not peers: t, protocol = await loop.create_datagram_endpoint( lambda: Protocol(bc, heads), local_addr=('127.0.0.1', 9999)) print('server up') else: for addr, port in peers: t, protocol = await loop.create_datagram_endpoint( lambda: Protocol(bc, heads), remote_addr=(addr, port)) ts.append(t) print('get chain') protocol.getchain(0) try: ex = PPE(max_workers=1) while True: head = max(heads, key=lambda _h: len(heads[_h].blocks)) bc = heads[head] b = await bc.mine(ex) if bc.verify(b): bc.add(b) del heads[head] heads[h(bytes(b))] = bc for addr in Protocol.peers: protocol.sendblock(addr, b) print(f'MINED {b.ix}') finally: for t in ts: t.close()
def main(): urls = set() urls |= scrape( (3, ['http://blog.livedoor.jp/geek/archives/cat_10022560.html'])) print(urls) snapshots = sorted(glob.glob('tmp/snapshots/*')) for snapshot in snapshots: try: urls |= pickle.loads(open(snapshot, 'rb').read()) except EOFError as ex: continue while True: urltmp = set() with PPE(max_workers=CPU_SIZE) as exe: for _urlret in exe.map(scrape, chunk_urls(urls)): if _urlret is not None: urltmp |= _urlret urls = urltmp if len(urls) == 0: break
def download(self): while True: try: info = urllib.request.urlopen(self.video_url).info() except urllib.error.HTTPError: req = urllib.request.Request(self.video_url) req.headers.update(headers()) info = urllib.request.urlopen(req).info() except http.client.IncompleteRead: continue except AttributeError: exit() else: break self.total_length = int(info.get('content-length')) self.file_type = info.get('content-type').split('/')[-1] self.split_num = self.total_length // 300000 print('Use cpu thread count: ', cpu_count()) print('Split count: ', self.split_num, '\n') l = [(self.total_length + i) // self.split_num for i in range(self.split_num)] args = [(i, 0 if i == 0 else sum(l[:i]) + 1, sum(l[:i]) + val) for i, val in enumerate(l)] with PPE(max_workers=cpu_count(), initializer=self.pool_init, initargs=(Value('i', 0), )) as exe: exe.map(self.split_download, args) with open('{}.{}'.format(self.title, self.file_type), 'wb') as f: self.combine(f) return str( round( os.path.getsize('{}.{}'.format(self.title, self.file_type)) / (1024.0**2), 1)) + 'MB'
def run(): files = list(Path(CONFIG.HREF_PATH).glob('*')) random.shuffle(files) files = files[:100_0000] args = {} for idx, file in enumerate(files): key = idx % 16 if args.get(key) is None: args[key] = [] args[key].append(file) args = [(key, files) for key, files in args.items()] objs = set() with PPE(max_workers=4) as exe: for _objs in exe.map(pmap, args): objs |= _objs print('total size', len(objs)) with open('urls.pkl.gz', 'wb') as fp: fp.write(gzip.compress(pickle.dumps(objs))) for url in list(objs)[:100]: print(url)
import asyncio, types from concurrent.futures import ProcessPoolExecutor as PPE ppe = PPE() # IO bound async def do_something(fname, data, loop): await write(fname, data, loop) @types.coroutine def write(f, data, loop): yield from loop.run_in_executor(ppe, do_write, f, data) def do_write(f, data): with open(f, 'w') as f: for line in data: f.write(line) # CPU bound async def do_something_else(n, loop): await count(n, loop) @types.coroutine def count(n, loop):
def main(): with PPE(max_workers=16) as exe: r = [r for r in exe.map(calc, list(range(1, 16)))] print(r)
def football(self, begin_date="2010-01-01", end_date=None): interval = DateHandler(begin_date).create_interval_till(end_date) with PPE(self.cpu_count) as worker_pool: worker_pool.map(DataCollector.fetch_day_events, interval)
return print(os.path.exists(f'images/{hashs}.{type}')) if os.path.exists(f'images/{hashs}.{type}'): return session = requests.session() session.proxies = { 'http': 'socks5h://localhost:9050', 'https': 'socks5h://localhost:9050' } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' } try: r = session.get(img_url, headers=headers) except Exception as ex: print(ex) return bins = r.content with open(f'images/{hashs}.{type}', 'wb') as fp: fp.write(bins) print('finish', img_url) except Exception as ex: print(ex) from concurrent.futures import ProcessPoolExecutor as PPE PPE(max_workers=300).map(pmap, img_urls)
try: obj = json.load(path.open()) created_at = datetime.strptime( obj['created_at'], '%a %b %d %H:%M:%S +0000 %Y') + timedelta(hours=9) day = created_at.day hour = created_at.hour #print(created_at) text = obj['text'] if 'オセロニア' in text: if time_freq.get((day, hour)) is None: time_freq[(day, hour)] = 0 time_freq[(day, hour)] += 1 except Exception as ex: print(ex) return time_freq args = [(key, paths) for key, paths in key_paths.items()] time_freq = {} with PPE(max_workers=12) as exe: for _time_freq in exe.map(pmap, args): for time, freq in _time_freq.items(): if time_freq.get(time) is None: time_freq[time] = 0 time_freq[time] += freq for time, freq in sorted(time_freq.items(), key=lambda x: x[0]): day, hour = time print(f'{day}日{hour}時', freq)
def aaf_dist(datfile,countfile,nThreads,samples,kl,long=False): #check executables if os.system('which fitch_kmerX > /dev/null'): if long: fitch = './fitch_kmerX_long' else: fitch = './fitch_kmerX' if not is_exe(fitch): print(fitch+' not found. Make sure it is in your PATH or the') print('current directory, and that it is executable') sys.exit() else: if long: fitch = 'fitch_kmerX_long' else: fitch = 'fitch_kmerX' #process the .dat.gz file try: iptf = smartopen(datfile,'rt') except IOError: print('Cannot open file', datafile) sys.exit() if not os.path.isfile(countfile): print('Cannot find file', countfile) sys.exit() try: total = open(countfile,'rt') except IOError: print('Cannot open file', countfile) sys.exit() try: infile = open('infile','wt') except IOError: print('Cannot open infile for writing') sys.exit() ###Read header sl = [] #species list line = iptf.readline() ll = line.split() if kl != float(ll[1]): print("The recorded k in the shared kmer table file is not the same with the k supplied to aaf_dist; exiting now.") #kmer length sys.exit() while True: line = iptf.readline() if line.startswith('#-'): continue elif line.startswith('#sample'): ll = line.split() sl.append(ll[1]) else: break if sl != samples: print("The recorded sample list in the shared kmer table file is not the same with the one supplied to aaf_dist; exiting now.") #kmer length sys.exit() ###Initialize shared kmers matrix sn = len(samples) #species number nshare = [[0] * sn for i in range(sn)] ### It turns out to be very slow if we give very big chunks. So we will be ### Using only 1G of RAM intotal. As a result, we could use all the cores ### available, which was not possible for the kmer_count step. cpu_num = psutil.cpu_count() ###Compute the number of lines to process per thread (chunk size) line = iptf.readline() line_size = sys.getsizeof(line) chunkLength = int(1024 ** 3 / cpu_num / line_size) print('chunkLength = {}'.format(chunkLength)) while True: lines = [] for nLines in range(chunkLength): if not line: #if empty break lines.append(line) line = iptf.readline() if not lines: #if empty break ###Compute shared kmer matrix with PPE(max_workers = cpu_num) as executor: for result in executor.map(countShared_single,lines): for i in range(sn): for j in range(i + 1, sn): nshare[i][j] += result[i][j] iptf.close() ###Compute distance matrix ntotal = [0.0] * sn for i in range(sn): ntotal[i] = float(total.readline().split()[1]) dist = [[0] * sn for i in range(sn)] for i in range(sn): for j in range(i + 1, sn): mintotal = min(ntotal[i], ntotal[j]) if nshare[i][j] == 0: dist[j][i] = dist[i][j] = 1 else: distance = (-1 / float(kl) * math.log(nshare[i][j] / mintotal)) #print(mintotal,nshare[i][j]) dist[j][i] = dist[i][j] = distance nshare[j][i] = nshare[i][j] total.close() ###Write infile infile.write('{} {}'.format(sn, sn)) namedic = {} for i in range(sn): lsl = len(sl[i]) if lsl >= 10: ssl = sl[i][:10] appendix = 1 while ssl in namedic: ssl = ssl[:-len(str(appendix))] + str(appendix) appendix += 1 if lsl < 10: ssl = sl[i] + ' ' * (10 - lsl) namedic[ssl] = sl[i] infile.write('\n{}'.format(ssl)) for j in range(sn): infile.write('\t{}'.format(dist[i][j])) infile.close() ###Run fitch_kmer print('{} building tree'.format(time.strftime("%c"))) if os.path.exists("./outfile"): os.system("rm -f outfile outtree") command = 'printf "K\n{}\nY" | {} > /dev/null'.format(int(kl),fitch) os.system(command) fh = open('outtree','rt') fh1 = open(datfile.split('.')[0]+'.tre','wt') for line in fh: for key in namedic: key_new = key.rstrip()+":" if key_new in line: newline = line.replace(key_new,namedic[key].rstrip()+":",1) line = newline fh1.write(line) #This can be either line or new line because when it exits #the for loop, line==newline fh.close() fh1.close() command = 'mv infile {}.dist'.format(datfile.split('.')[0]) os.system(command) os.system('rm -f outfile outtree') print('{} end'.format(time.strftime("%c")))
driver.get(url) html = driver.page_source soup = bs4.BeautifulSoup(html, 'lxml') belements = soup.find_all('input', {'class': 'LinkButton'}) selements = driver.find_elements_by_xpath( '//input[contains(@class,"LinkButton")]') #print(selements) element = selements[index] print('try', belements[index].get('value')) for i in range(10**10): print('now', i + 1, belements[index].get('value')) try: element.click() html = driver.page_source save_html_with_hash(html) except Exception as ex: print(ex) element = driver.find_element_by_xpath( '//input[contains(@name,"fwListNaviBtnNext")]') except Exception as ex: print(ex) driver.quit() url = 'https://www.hellowork.go.jp/servicef/130020.do?action=initDisp&screenId=130020' args = [] for index in range(53): args.append((url, index)) with PPE(max_workers=53) as exe: exe.map(run, args)
executor.submit(ocrFunc, fileListToBeScanned[i]) if __name__ == "__main__": startTime = time.time() imagesFolderPath_1 = getAbsFolderPath("imgP1") imagesFolderFileList_1 = os.listdir("imgP1") imgP1Dir = list( map(lambda x: imagesFolderPath_1 + x, imagesFolderFileList_1)) imagesFolderPath_2 = getAbsFolderPath("imgP2") imagesFolderFileList_2 = os.listdir("imgP2") imgP2Dir = list( map(lambda x: imagesFolderPath_2 + x, imagesFolderFileList_2)) imagesFolderPath_3 = getAbsFolderPath("imgP3") imagesFolderFileList_3 = os.listdir("imgP3") imgP3Dir = list( map(lambda x: imagesFolderPath_3 + x, imagesFolderFileList_3)) imagesFolderPath_4 = getAbsFolderPath("imgP4") imagesFolderFileList_4 = os.listdir("imgP4") imgP4Dir = list( map(lambda x: imagesFolderPath_4 + x, imagesFolderFileList_4)) #开4个进程同时处理,每个进程包含8个线程 with PPE(multiprocessing.cpu_count() * 2) as executor: # compressImage("./imgP1","./imgP1_compressed") submitOCRExecutor(multiProcessingDetect, imgP1Dir, imgP2Dir, imgP3Dir, imgP4Dir) endTime = time.time() print(endTime - startTime, " sec used")
def initialize_pool(cls, num_processes=0): if num_processes > 1: if DDFacetSim.__exec_pool is None: DDFacetSim.__exec_pool = PPE(max_workers=num_processes) DDFacetSim.__IN_PARALLEL_INIT = True
def aaf_kmercount(dataDir,k,n,nThreads,memPerThread): #check excutables if k > 25: if os.system('which kmer_countx > /dev/null'): kmerCount = './kmer_countx' if not is_exe(kmerCount): print('kmer_countx not found. Make sure it is in your PATH or the') print('current directory, and that it is executable') sys.exit(1) else: kmerCount = 'kmer_countx' else: if os.system('which kmer_count > /dev/null'): kmerCount = './kmer_count' if not is_exe(kmerCount): print('kmer_count not found. Make sure it is in your PATH or the') print('current directory, and that it is executable') sys.exit(1) else: kmerCount = 'kmer_count' ###Get sample list: samples = [] for fileName in os.listdir(dataDir): if os.path.isdir(os.path.join(dataDir, fileName)): samples.append(fileName) else: if not fileName.startswith('.'): sample = fileName.split(".")[0] if sample in samples: sample = fileName.split(".")[0]+fileName.split(".")[1] if sample in samples: print('Error, redundant sample or file names. Aborting!') sys.exit(3) os.system("mkdir {}/{}".format(dataDir,sample)) os.system("mv {}/{} {}/{}/".format(dataDir,fileName,dataDir,sample)) samples.append(sample) samples.sort() print(time.strftime('%c')) print('SPECIES LIST:') for sample in samples: print(sample) ###Prepare kmer_count jobs jobList = [] for sample in samples: outFile = '{}.pkdat.gz'.format(sample) command = '{} -l {} -n {} -G {} -o {} -f '.format(kmerCount, k, n, memPerThread, outFile) command1 = '' for inputFile in os.listdir(os.path.join(dataDir, sample)): inputFile = os.path.join(dataDir, sample, inputFile) handle = smartopen(inputFile) firstChar = handle.read(1) if firstChar == '@': seqFormat = 'FQ' elif firstChar == '>': seqFormat = 'FA' else: print('Error, file {} is not FA or FQ format. Aborting!'.format(inputFile)) sys.exit(3) command1 += " -i '{}'".format(inputFile) command += '{}{}> {}.wc'.format(seqFormat,command1,sample) jobList.append(command) ###Run jobs with PPE(max_workers = nThreads) as executor: executor.map(run_command,jobList) return samples
def main(): with PPE(max_workers=2) as exe: exe.map(rap, [random_sample, filter_words])
r = requests.get(simple_url) r.encoding = r.apparent_encoding html = r.text open(f'darturl_clean/{link_hash}', 'w').write( simple_url ) open(f'htmls/{simple_hash}.gz', 'wb').write( gzip.compress( bytes(html,'utf8') ) ) print(simple_url) from concurrent.futures import ProcessPoolExecutor as PPE paths = [path for path in Path('./xml_parse').glob('*')] def pmap(path): #print(path) obj = json.load(path.open()) #print( obj ) link = obj['link'] link_hash = obj['link_hash'] if 'rdsig.yahoo.co.jp' in link: # rdsigは量が多すぎて評価できないので無視する print(link) rdsig((link, link_hash)) ... else: print(link) pickup((link, link_hash)) with PPE(max_workers=24) as exe: exe.map(pmap, paths)
description=description, body=body, hrefs=hrefs) ffdb.save(key=url, val=parsed) except UnicodeError as ex: Path(path).unlink() except UnicodeEncodeError as ex: Path(path).unlink() except EOFError as ex: Path(path).unlink() except Exception as ex: print(ex) ffdb.save(key=url, val=None) gc.collect() print('finish batch', key) args = {} files = list(glob.glob('./tmp/htmls/*')) random.shuffle(files) size = len(files) for idx, path in enumerate(files): key = idx % (size//100000) #key = idx % 16 if args.get(key) is None: args[key] = [] args[key].append(path) args = [(key, paths) for key, paths in args.items()] print('made chunks') #[pmap(arg) for arg in args] with PPE(max_workers=8) as exe: exe.map(pmap, args)
#href = re.sub(r'\?.*?$', '', href) hrefs.add(a.get('href')) except Exception as ex: print(ex) continue return hrefs if '--resume' in sys.argv: urls = pickle.load(open('urls.pkl', 'rb') ) else: urls = pmap((-1, [url])) print(urls) DIST = 1 args = { key:[] for key in range(DIST) } [ args[index%DIST].append(url) for index, url in enumerate(urls) ] args = [ (key,urls) for key, urls in args.items() ] #[ pmap(arg) for arg in args ] from concurrent.futures import ProcessPoolExecutor as PPE while True: with PPE(max_workers=DIST) as exe: urls = set() for _hrefs in exe.map(pmap, args): urls |= _hrefs pickle.dump( urls, open('urls.pkl', 'wb') ) args = { key:[] for key in range(DIST) } [ args[index%DIST].append(url) for index, url in enumerate(urls) ] args = [ (key,urls) for key, urls in args.items() ]
if os.path.exists('./'+selection_dir): command = 'rm -r {}'.format(selection_dir) os.system(command) command = 'mkdir {}'.format(selection_dir) os.system(command) #Run ReadsSelector reads_cmd = [] for sample in samples: infiles = os.listdir(os.path.join(dataDir,sample)) command = '{} -k sba.kmer -fa 1 -o {}/{}_selected '.format(ReadsSelector,selection_dir,sample) for infile in infiles: command += '-s {}'.format(os.path.join(dataDir,sample,infile)) reads_cmd.append(command) with PPE(max_workers = nThreads) as executor: executor.map(run_command,reads_cmd) #After selection samples = aaf_kmercount(selection_dir,kl,n,options.nThreads,memSize/options.nThreads) ###Merge output wc files divFile = selection_dir+'.wc' handle = open(divFile, 'w') handle.close() for sample in samples: countfile = sample + '.wc' os.system('cat {} >> {}'.format(countfile, divFile)) os.remove(countfile)