Python PPE示例，concurrent.futures.PPE Python示例

示例#1

0

显示文件

文件： async_ml_chaos.py 项目： GINK03/async-pipe-for-microservice-alternative

def main():
    proc = Popen(['docker', 'run', '-i', 'pipe_api'], stdin=PIPE, stdout=PIPE)
    stdout = proc.stdout
    stdin = proc.stdin

    with PPE(max_workers=16) as exe:
        for r in exe.map(driver, [(writer, stdin.fileno()),
                                  (reader, stdout.fileno())]):
            r
        stdin.close()

示例#2

0

显示文件

文件： toy_experiments.py 项目： joeaortiz/bayesopt-nash-eq

def run_experiment(config_file):
    with open(config_file) as f:
        config = yaml.load(f)


    pool = PPE()
    fs = []
    results = []

    for dim in config['dimensions']:
        for run in range(config['num_runs']):
            for fevals in config['fevals_per_dim']:
                for alg in config['algs']:
                    for variant in config[alg]:
                        setup = {
                            'dim': dim,
                            'fevals': min(80, fevals * dim),
                            'run': run + config['runs_offset'],
                            'variant': variant['name'],
                            'alg': alg,
                            'kwargs': variant['kwargs'],
                            'result_path': config['result_path']
                        }
                        if config['name'] == 'saddle':
                            fs.append(pool.submit(run_saddle_unit_exp, setup))
                        elif config['name'] == 'mop':
                            fs.append(pool.submit(run_mop_unit_exp, setup))
                        elif config['name'] == 'adhd':
                            if run == 0:
                                try:
                                    bkp_file = os.path.join(os.path.dirname(__file__), config['result_path'].replace('.json', '_bkp.json'))
                                    os.remove(bkp_file)
                                except FileNotFoundError:
                                    pass

                            setup['n_init'] = config['n_init']
                            setup['start_seed'] = config['start_seed']
                            setup['result_path_bkp'] = bkp_file
                            fs.append(pool.submit(run_adhd_unit_exp, setup))
                        else:
                            raise Exception('unknown experiment')

    results = []
    for x in as_completed(fs):
        try:
            results.append(x.result())
        except Exception as e:
            print(e)

    print(results)
    with open(os.path.join(os.path.dirname(__file__), config['result_path']), 'w') as f:
        json.dump(results, f, sort_keys=True, indent=4)

示例#3

0

显示文件

 def build(self):
     '''
     Trigering the build function solves all models in order to
     find the best model, by score, then returns it as a result.
     Also generates self.all_models and self.best to store the information.
     '''
     with PPE() as exe:
         ppe = [exe.submit(self._solve, lag) for lag in range(2, self.lags)]
     for proc in as_completed(ppe):
         key, spec = proc.result()
         self.all_models[key] = spec
     del ppe
     self.best = self._check_all_models()
     return self.best

示例#4

0

显示文件

文件： bc.py 项目： ltricot/notebooks

async def main(*peers):
    ts = []
    loop = aio.get_running_loop()

    bc = Blockchain.genesis(1)
    heads = {h(bytes(bc.blocks[0])): bc}

    if not peers:
        t, protocol = await loop.create_datagram_endpoint(
            lambda: Protocol(bc, heads),
            local_addr=('127.0.0.1', 9999))
        
        print('server up')

    else:
        for addr, port in peers:
            t, protocol = await loop.create_datagram_endpoint(
                lambda: Protocol(bc, heads),
                remote_addr=(addr, port))
            
            ts.append(t)
        
        print('get chain')
        protocol.getchain(0)

    try:
        ex = PPE(max_workers=1)
        while True:
            head = max(heads, key=lambda _h: len(heads[_h].blocks))
            bc = heads[head]
            b = await bc.mine(ex)

            if bc.verify(b):
                bc.add(b)

                del heads[head]
                heads[h(bytes(b))] = bc

                for addr in Protocol.peers:
                    protocol.sendblock(addr, b)

            print(f'MINED {b.ix}')

    finally:
        for t in ts:
            t.close()

示例#5

0

显示文件

文件： A001_scrape.py 项目： GINK03/minimal-search-engine

def main():
    urls = set()
    urls |= scrape(
        (3, ['http://blog.livedoor.jp/geek/archives/cat_10022560.html']))
    print(urls)
    snapshots = sorted(glob.glob('tmp/snapshots/*'))
    for snapshot in snapshots:
        try:
            urls |= pickle.loads(open(snapshot, 'rb').read())
        except EOFError as ex:
            continue
    while True:
        urltmp = set()
        with PPE(max_workers=CPU_SIZE) as exe:
            for _urlret in exe.map(scrape, chunk_urls(urls)):
                if _urlret is not None:
                    urltmp |= _urlret
        urls = urltmp
        if len(urls) == 0:
            break

示例#6

0

显示文件

文件： downloader.py 项目： ksk001100/avideo-dl

    def download(self):
        while True:
            try:
                info = urllib.request.urlopen(self.video_url).info()
            except urllib.error.HTTPError:
                req = urllib.request.Request(self.video_url)
                req.headers.update(headers())
                info = urllib.request.urlopen(req).info()
            except http.client.IncompleteRead:
                continue
            except AttributeError:
                exit()
            else:
                break
        self.total_length = int(info.get('content-length'))
        self.file_type = info.get('content-type').split('/')[-1]
        self.split_num = self.total_length // 300000

        print('Use cpu thread count: ', cpu_count())
        print('Split count: ', self.split_num, '\n')

        l = [(self.total_length + i) // self.split_num
             for i in range(self.split_num)]
        args = [(i, 0 if i == 0 else sum(l[:i]) + 1, sum(l[:i]) + val)
                for i, val in enumerate(l)]

        with PPE(max_workers=cpu_count(),
                 initializer=self.pool_init,
                 initargs=(Value('i', 0), )) as exe:
            exe.map(self.split_download, args)

        with open('{}.{}'.format(self.title, self.file_type), 'wb') as f:
            self.combine(f)

        return str(
            round(
                os.path.getsize('{}.{}'.format(self.title, self.file_type)) /
                (1024.0**2), 1)) + 'MB'

示例#7

0

显示文件

文件： recover_urls.py 项目： H1R0Y4/scraping-designs

def run():
    files = list(Path(CONFIG.HREF_PATH).glob('*'))
    random.shuffle(files)
    files = files[:100_0000]

    args = {}
    for idx, file in enumerate(files):
        key = idx % 16
        if args.get(key) is None:
            args[key] = []
        args[key].append(file)
    args = [(key, files) for key, files in args.items()]
    objs = set()
    with PPE(max_workers=4) as exe:
        for _objs in exe.map(pmap, args):
            objs |= _objs

    print('total size', len(objs))
    with open('urls.pkl.gz', 'wb') as fp:
        fp.write(gzip.compress(pickle.dumps(objs)))

    for url in list(objs)[:100]:
        print(url)

示例#8

0

显示文件

文件： coro_multi.py 项目： ramseydsilva/python3.5_coro_testing

import asyncio, types
from concurrent.futures import ProcessPoolExecutor as PPE

ppe = PPE()


# IO bound
async def do_something(fname, data, loop):
    await write(fname, data, loop)


@types.coroutine
def write(f, data, loop):
    yield from loop.run_in_executor(ppe, do_write, f, data)


def do_write(f, data):
    with open(f, 'w') as f:
        for line in data:
            f.write(line)


# CPU bound


async def do_something_else(n, loop):
    await count(n, loop)


@types.coroutine
def count(n, loop):

示例#9

0

显示文件

def main():
    with PPE(max_workers=16) as exe:
        r = [r for r in exe.map(calc, list(range(1, 16)))]
    print(r)

示例#10

0

显示文件

文件： CollectData.py 项目： w1r2p1/SofaScore-Statistics

    def football(self, begin_date="2010-01-01", end_date=None):
        interval = DateHandler(begin_date).create_interval_till(end_date)

        with PPE(self.cpu_count) as worker_pool:
            worker_pool.map(DataCollector.fetch_day_events, interval)

示例#11

0

显示文件

文件： download_images.py 项目： H1R0Y4/scraping-designs

            return
        print(os.path.exists(f'images/{hashs}.{type}'))
        if os.path.exists(f'images/{hashs}.{type}'):
            return

        session = requests.session()
        session.proxies = {
            'http': 'socks5h://localhost:9050',
            'https': 'socks5h://localhost:9050'
        }
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
        }
        try:
            r = session.get(img_url, headers=headers)
        except Exception as ex:
            print(ex)
            return

        bins = r.content
        with open(f'images/{hashs}.{type}', 'wb') as fp:
            fp.write(bins)
        print('finish', img_url)
    except Exception as ex:
        print(ex)


from concurrent.futures import ProcessPoolExecutor as PPE
PPE(max_workers=300).map(pmap, img_urls)

示例#12

0

显示文件

        try:
            obj = json.load(path.open())
            created_at = datetime.strptime(
                obj['created_at'],
                '%a %b %d %H:%M:%S +0000 %Y') + timedelta(hours=9)
            day = created_at.day
            hour = created_at.hour
            #print(created_at)
            text = obj['text']
            if 'オセロニア' in text:
                if time_freq.get((day, hour)) is None:
                    time_freq[(day, hour)] = 0
                time_freq[(day, hour)] += 1
        except Exception as ex:
            print(ex)

    return time_freq


args = [(key, paths) for key, paths in key_paths.items()]
time_freq = {}
with PPE(max_workers=12) as exe:
    for _time_freq in exe.map(pmap, args):
        for time, freq in _time_freq.items():
            if time_freq.get(time) is None:
                time_freq[time] = 0
            time_freq[time] += freq
for time, freq in sorted(time_freq.items(), key=lambda x: x[0]):
    day, hour = time
    print(f'{day}日{hour}時', freq)

示例#13

0

显示文件

文件： AAF.py 项目： shulp2211/phyloRAD

def aaf_dist(datfile,countfile,nThreads,samples,kl,long=False):
    #check executables
    if os.system('which fitch_kmerX > /dev/null'):
        if long:
            fitch = './fitch_kmerX_long'
        else:
            fitch = './fitch_kmerX'
        if not is_exe(fitch):
            print(fitch+' not found. Make sure it is in your PATH or the')
            print('current directory, and that it is executable')
            sys.exit()
    else:
        if long:
            fitch = 'fitch_kmerX_long'
        else:
            fitch = 'fitch_kmerX'
    #process the .dat.gz file
    try:
        iptf = smartopen(datfile,'rt')
    except IOError:
        print('Cannot open file', datafile)
        sys.exit()

    if not os.path.isfile(countfile):
        print('Cannot find file', countfile)
        sys.exit()

    try:
        total = open(countfile,'rt')
    except IOError:
        print('Cannot open file', countfile)
        sys.exit()

    try:
        infile = open('infile','wt')
    except IOError:
        print('Cannot open infile for writing')
        sys.exit()

    ###Read header
    sl = []                 #species list
    line = iptf.readline()
    ll = line.split()
    if kl != float(ll[1]):
        print("The recorded k in the shared kmer table file is not the same with the k supplied to aaf_dist; exiting now.")       #kmer length
        sys.exit()
    while True:
        line = iptf.readline()
        if line.startswith('#-'):
            continue
        elif line.startswith('#sample'):
            ll = line.split()
            sl.append(ll[1])
        else:
            break
    if sl != samples:
        print("The recorded sample list in the shared kmer table file is not the same with the one supplied to aaf_dist; exiting now.")       #kmer length
        sys.exit()
    ###Initialize shared kmers matrix
    sn = len(samples)    #species number
    nshare = [[0] * sn for i in range(sn)]

    ### It turns out to be very slow if we give very big chunks. So we will be
    ### Using only 1G of RAM intotal. As a result, we could use all the cores
    ### available, which was not possible for the kmer_count step.
    cpu_num = psutil.cpu_count()
    ###Compute the number of lines to process per thread (chunk size)
    line = iptf.readline()
    line_size = sys.getsizeof(line)
    chunkLength = int(1024 ** 3 / cpu_num / line_size)
    print('chunkLength = {}'.format(chunkLength))
    while True:
        lines = []
        for nLines in range(chunkLength):
            if not line: #if empty
                break
            lines.append(line)
            line = iptf.readline()
        if not lines: #if empty
            break
        ###Compute shared kmer matrix
        with PPE(max_workers = cpu_num) as executor:
            for result in executor.map(countShared_single,lines):
                for i in range(sn):
                    for j in range(i + 1, sn):
                        nshare[i][j] += result[i][j]

    iptf.close()

    ###Compute distance matrix
    ntotal = [0.0] * sn

    for i in range(sn):
        ntotal[i] = float(total.readline().split()[1])
    dist = [[0] * sn for i in range(sn)]

    for i in range(sn):
        for j in range(i + 1, sn):
            mintotal = min(ntotal[i], ntotal[j])
            if nshare[i][j] == 0:
                dist[j][i] = dist[i][j] = 1
            else:
                distance = (-1 / float(kl) * math.log(nshare[i][j] / mintotal))
                #print(mintotal,nshare[i][j])
                dist[j][i] = dist[i][j] = distance
                nshare[j][i] = nshare[i][j]

    total.close()

    ###Write infile
    infile.write('{} {}'.format(sn, sn))
    namedic = {}
    for i in range(sn):
        lsl = len(sl[i])
        if lsl >= 10:
            ssl = sl[i][:10]
        appendix = 1
        while ssl in namedic:
            ssl = ssl[:-len(str(appendix))] + str(appendix)
            appendix += 1
        if lsl < 10:
            ssl = sl[i] + ' ' * (10 - lsl)
        namedic[ssl] = sl[i]
        infile.write('\n{}'.format(ssl))
        for j in range(sn):
            infile.write('\t{}'.format(dist[i][j]))

    infile.close()

    ###Run fitch_kmer
    print('{} building tree'.format(time.strftime("%c")))
    if os.path.exists("./outfile"):
        os.system("rm -f outfile outtree")
    command = 'printf "K\n{}\nY" | {} > /dev/null'.format(int(kl),fitch)
    os.system(command)
    fh = open('outtree','rt')
    fh1 = open(datfile.split('.')[0]+'.tre','wt')

    for line in fh:
        for key in namedic:
            key_new = key.rstrip()+":"
            if key_new in line:
                newline = line.replace(key_new,namedic[key].rstrip()+":",1)
                line = newline
        fh1.write(line) #This can be either line or new line because when it exits
    #the for loop, line==newline
    fh.close()
    fh1.close()
    command = 'mv infile {}.dist'.format(datfile.split('.')[0])
    os.system(command)

    os.system('rm -f outfile outtree')

    print('{} end'.format(time.strftime("%c")))

示例#14

0

显示文件

文件： A001_hellowork.py 项目： GINK03/hellowork-map

        driver.get(url)
        html = driver.page_source
        soup = bs4.BeautifulSoup(html, 'lxml')
        belements = soup.find_all('input', {'class': 'LinkButton'})
        selements = driver.find_elements_by_xpath(
            '//input[contains(@class,"LinkButton")]')
        #print(selements)
        element = selements[index]
        print('try', belements[index].get('value'))
        for i in range(10**10):
            print('now', i + 1, belements[index].get('value'))
            try:
                element.click()
                html = driver.page_source
                save_html_with_hash(html)
            except Exception as ex:
                print(ex)
            element = driver.find_element_by_xpath(
                '//input[contains(@name,"fwListNaviBtnNext")]')
    except Exception as ex:
        print(ex)
    driver.quit()


url = 'https://www.hellowork.go.jp/servicef/130020.do?action=initDisp&screenId=130020'
args = []
for index in range(53):
    args.append((url, index))
with PPE(max_workers=53) as exe:
    exe.map(run, args)

示例#15

0

显示文件

文件： HIGHLY_OCR_py35.py 项目： n8659150/highlyOCR

            executor.submit(ocrFunc, fileListToBeScanned[i])


if __name__ == "__main__":

    startTime = time.time()
    imagesFolderPath_1 = getAbsFolderPath("imgP1")
    imagesFolderFileList_1 = os.listdir("imgP1")
    imgP1Dir = list(
        map(lambda x: imagesFolderPath_1 + x, imagesFolderFileList_1))
    imagesFolderPath_2 = getAbsFolderPath("imgP2")
    imagesFolderFileList_2 = os.listdir("imgP2")
    imgP2Dir = list(
        map(lambda x: imagesFolderPath_2 + x, imagesFolderFileList_2))
    imagesFolderPath_3 = getAbsFolderPath("imgP3")
    imagesFolderFileList_3 = os.listdir("imgP3")
    imgP3Dir = list(
        map(lambda x: imagesFolderPath_3 + x, imagesFolderFileList_3))
    imagesFolderPath_4 = getAbsFolderPath("imgP4")
    imagesFolderFileList_4 = os.listdir("imgP4")
    imgP4Dir = list(
        map(lambda x: imagesFolderPath_4 + x, imagesFolderFileList_4))
    #开4个进程同时处理，每个进程包含8个线程
    with PPE(multiprocessing.cpu_count() * 2) as executor:

        # compressImage("./imgP1","./imgP1_compressed")
        submitOCRExecutor(multiProcessingDetect, imgP1Dir, imgP2Dir, imgP3Dir,
                          imgP4Dir)
    endTime = time.time()
    print(endTime - startTime, " sec used")

示例#16

0

显示文件

文件： DDFacetSim.py 项目： ratt-ru/CubiCal

 def initialize_pool(cls, num_processes=0):
     if num_processes > 1:
         if DDFacetSim.__exec_pool is None:
             DDFacetSim.__exec_pool = PPE(max_workers=num_processes)
         DDFacetSim.__IN_PARALLEL_INIT = True

示例#17

0

显示文件

文件： AAF.py 项目： shulp2211/phyloRAD

def aaf_kmercount(dataDir,k,n,nThreads,memPerThread):
    #check excutables
    if k > 25:
        if os.system('which kmer_countx > /dev/null'):
            kmerCount = './kmer_countx'
            if not is_exe(kmerCount):
                print('kmer_countx not found. Make sure it is in your PATH or the')
                print('current directory, and that it is executable')
                sys.exit(1)
        else:
            kmerCount = 'kmer_countx'

    else:
        if os.system('which kmer_count > /dev/null'):
            kmerCount = './kmer_count'
            if not is_exe(kmerCount):
                print('kmer_count not found. Make sure it is in your PATH or the')
                print('current directory, and that it is executable')
                sys.exit(1)
        else:
            kmerCount = 'kmer_count'

    ###Get sample list:
    samples = []
    for fileName in os.listdir(dataDir):
        if os.path.isdir(os.path.join(dataDir, fileName)):
            samples.append(fileName)
        else:
            if not fileName.startswith('.'):
                sample = fileName.split(".")[0]
                if sample in samples:
                    sample = fileName.split(".")[0]+fileName.split(".")[1]
                    if sample in samples:
                        print('Error, redundant sample or file names. Aborting!')
                        sys.exit(3)
                os.system("mkdir {}/{}".format(dataDir,sample))
                os.system("mv {}/{} {}/{}/".format(dataDir,fileName,dataDir,sample))
                samples.append(sample)
    samples.sort()
    print(time.strftime('%c'))
    print('SPECIES LIST:')
    for sample in samples:
        print(sample)

    ###Prepare kmer_count jobs
    jobList = []
    for sample in samples:
        outFile = '{}.pkdat.gz'.format(sample)
        command = '{} -l {} -n {} -G {} -o {} -f '.format(kmerCount, k, n,
                    memPerThread, outFile)
        command1 = ''
        for inputFile in os.listdir(os.path.join(dataDir, sample)):
            inputFile = os.path.join(dataDir, sample, inputFile)
            handle = smartopen(inputFile)
            firstChar = handle.read(1)
            if firstChar == '@':
                seqFormat = 'FQ'
            elif firstChar == '>':
                seqFormat = 'FA'
            else:
                print('Error, file {} is not FA or FQ format. Aborting!'.format(inputFile))
                sys.exit(3)
            command1 += " -i '{}'".format(inputFile)
        command += '{}{}> {}.wc'.format(seqFormat,command1,sample)
        jobList.append(command)
    ###Run jobs
    with PPE(max_workers = nThreads) as executor:
        executor.map(run_command,jobList)
    return samples

示例#18

0

显示文件

文件： stream.py 项目： GINK03/twitter-corpus-making

def main():
	with PPE(max_workers=2) as exe:
		exe.map(rap, [random_sample, filter_words])

示例#19

0

显示文件

文件： 20-darturl-clean.py 项目： GINK03/facebook-graphapi-augment

  r = requests.get(simple_url)
  r.encoding = r.apparent_encoding 
  html = r.text
  open(f'darturl_clean/{link_hash}', 'w').write( simple_url )
  open(f'htmls/{simple_hash}.gz', 'wb').write( gzip.compress( bytes(html,'utf8') ) ) 

  print(simple_url)

from concurrent.futures import ProcessPoolExecutor as PPE

paths = [path for path in Path('./xml_parse').glob('*')]

def pmap(path):
  #print(path)
  obj = json.load(path.open())
  #print( obj )
  link = obj['link']
  link_hash = obj['link_hash']
  
  if 'rdsig.yahoo.co.jp' in link:
    # rdsigは量が多すぎて評価できないので無視する
    print(link)
    rdsig((link, link_hash))
    ...
  else:
    print(link)
    pickup((link, link_hash))

with PPE(max_workers=24) as exe:
  exe.map(pmap, paths)

示例#20

0

显示文件

文件： B001_parse_html_and_make_shrinked_data.py 项目： GINK03/minimal-search-engine

                            description=description, body=body, hrefs=hrefs)
            ffdb.save(key=url, val=parsed)
        except UnicodeError as ex:
            Path(path).unlink()
        except UnicodeEncodeError as ex:
            Path(path).unlink()
        except EOFError as ex:
            Path(path).unlink()
        except Exception as ex:
            print(ex)
            ffdb.save(key=url, val=None)
    gc.collect()
    print('finish batch', key)


args = {}
files = list(glob.glob('./tmp/htmls/*'))
random.shuffle(files)
size = len(files)
for idx, path in enumerate(files):
    key = idx % (size//100000)
    #key = idx % 16
    if args.get(key) is None:
        args[key] = []
    args[key].append(path)
args = [(key, paths) for key, paths in args.items()]
print('made chunks')
#[pmap(arg) for arg in args]
with PPE(max_workers=8) as exe:
    exe.map(pmap, args)

示例#21

0

显示文件

        #href = re.sub(r'\?.*?$', '', href)
        hrefs.add(a.get('href'))
    except Exception as ex:
      print(ex)
      continue
  return hrefs
  
if '--resume' in sys.argv:
  urls = pickle.load(open('urls.pkl', 'rb') )
else:
  urls = pmap((-1, [url]))
  print(urls)

DIST = 1
args = { key:[] for key in range(DIST) }
[ args[index%DIST].append(url) for index, url in enumerate(urls) ] 
args = [ (key,urls) for key, urls in args.items() ]
#[ pmap(arg) for arg in args ]

from concurrent.futures import ProcessPoolExecutor as PPE
while True:
  with PPE(max_workers=DIST) as exe:
    urls = set()
    for _hrefs in exe.map(pmap, args):
      urls |= _hrefs 
  pickle.dump( urls, open('urls.pkl', 'wb') )
  args = { key:[] for key in range(DIST) }
  [ args[index%DIST].append(url) for index, url in enumerate(urls) ] 
  args = [ (key,urls) for key, urls in args.items() ]

示例#22

0

显示文件

if os.path.exists('./'+selection_dir):
    command = 'rm -r {}'.format(selection_dir)
    os.system(command)
command = 'mkdir {}'.format(selection_dir)
os.system(command)

#Run ReadsSelector
reads_cmd = []
for sample in samples:
    infiles = os.listdir(os.path.join(dataDir,sample))
    command = '{} -k sba.kmer -fa 1 -o {}/{}_selected '.format(ReadsSelector,selection_dir,sample)
    for infile in infiles:
        command += '-s {}'.format(os.path.join(dataDir,sample,infile))
    reads_cmd.append(command)
with PPE(max_workers = nThreads) as executor:
    executor.map(run_command,reads_cmd)


#After selection
samples = aaf_kmercount(selection_dir,kl,n,options.nThreads,memSize/options.nThreads)

###Merge output wc files
divFile = selection_dir+'.wc'
handle = open(divFile, 'w')
handle.close()

for sample in samples:
    countfile = sample + '.wc'
    os.system('cat {} >> {}'.format(countfile, divFile))
    os.remove(countfile)