def fetch_item_info(session, observations, claims, verbose=False): """ Fetches information about wikidata items. :Parameters: session : :class:`mwapi.Session` An API session to use for querying observations : `iterable`(`dict`) A collection of observations to annotate claims : `list` ( `str` ) A set of property names to look up claims for verbose : `bool` Print dots and stuff :Returns: An `iterator` of observations augmented with an `autolabel` field containing the requested information. Note that observations that can't be found will be excluded. """ batches = chunkify(observations, 25) executor = ThreadPoolExecutor(max_workers=4) _fetch_item_info = build_fetch_item_info(session, claims) for annotated_batch in executor.map(_fetch_item_info, batches): for annotated_item in annotated_batch: yield annotated_item if verbose: sys.stderr.write(".") sys.stderr.flush() if verbose: sys.stderr.write("\n")
def fetch(self, tiles): """ Execute all tile requests. :param tiles: List of tile requests. """ pool = ThreadPoolExecutor(max_workers=32) pool.map(self.fetch_tile, tiles, timeout=5) pool.shutdown()
def process_seqs(input_seqs, threads=5, extract_regions=False, known_names=None): """Calls map_seqs_to_ref in a multithreaded way.""" if extract_regions: region_dict = load_region() else: region_dict = defaultdict(list) chunksize = 50 iter_seqs = iter(input_seqs) chunk_iterable = yield_chunks(iter_seqs, chunksize) if threads > 1: logging.warning('Started ThreadPool with %i workers' % threads) ex = ThreadPoolExecutor(max_workers=threads) res_iter = chain.from_iterable(ex.map(map_seqs_to_ref, chunk_iterable)) else: logging.warning('Running with NO THREADS!') res_iter = chain.from_iterable(imap(map_seqs_to_ref, chunk_iterable)) name_count = 0 prev_name = None for row in res_iter: if row['Name'] != prev_name: prev_name = row['Name'] name_count += 1 yield row for region_row in region_dict[row['RegionName']]: nrow = region_linker(deepcopy(row), region_row) if nrow: yield nrow
def handler(event, contest): logger.info("Start!") executor = ThreadPoolExecutor(max_workers=100) cal = Sum() queue_url = event['queueUrl'] message_count = event['messageCount'] queue = sqs.Queue(queue_url) num_of_calls = message_count // batch_count queues = [] for i in range(num_of_calls): queues.append(queue) message_count = 0 responses = executor.map(one_request, queues) for response in responses: message_count += len(response) for msg in response: cal.add(msg) logger.info("Receive API count: {}".format(num_of_calls)) logger.info("Fetched messages: {}".format(message_count)) executor.shutdown()
def play(av, n): proxy_list = get_proxy(n) executor = ThreadPoolExecutor(max_workers=n) play_video_av = partial(play_video_1, av=av) for data in executor.map(play_video_av, proxy_list): print("in main: 1 success".format(data))
def main(): db.fixkeys(key_utils.to_ipv6) parser = argparse.ArgumentParser(description='Submit nodes and links to fc00') parser.add_argument('-v', '--verbose', help='increase output verbosity', dest='verbose', action='store_true') parser.set_defaults(verbose=False) args = parser.parse_args() con = connect() nodes = dump_node_store(con) edges = {} get_peer_queue = queue.Queue(0) result_queue = queue.Queue(0) e = ThreadPoolExecutor(max_workers=4) def args(): for ip,node in nodes.items(): yield ip,keyFromAddr(node['addr']),node['path'],node['version'] args = zip(*args()) dbnodes = {} for peers, node_id, ip in e.map(get_peers_derp, *args): get_edges_for_peers(edges, peers, node_id) addpeersto(dbnodes,node_id,ip,peers) for ip, id in peers: addpeersto(dbnodes,id,ip) print('otay!') send_graph(dbnodes, edges) sys.exit(0)
def download_all_comics(output_directory, file_prefix): """Download every comic getting the latest comic id from the RSS feed.""" max_comic_id = 1 for (comic_id, _) in get_latest_comics_from_feed(): comic_id = int(comic_id) if max_comic_id < comic_id: max_comic_id = comic_id comic_id_list = list(range(1, max_comic_id + 1)) if file_prefix: file_name_list = prefix(file_prefix, comic_id_list) else: file_name_list = comic_id_list # Kept a default of 10. We don't want to leech off XKCD.com executor = ThreadPoolExecutor(max_workers=10) executor.map(download_single_comic, comic_id_list, repeat(output_directory), file_name_list)
def main(): parser = argparse.ArgumentParser() parser.add_argument("shader", nargs='*', default=['shaders'], metavar="<shader_file | shader dir>", help="A shader file or directory containing shader " "files. Defaults to 'shaders/'") args = parser.parse_args() os.environ["shader_precompile"] = "true" os.environ["allow_glsl_extension_directive_midshader"] = "true" if "INTEL_DEBUG" in os.environ: print("Warning: INTEL_DEBUG environment variable set!", file=sys.stderr) os.environ["INTEL_DEBUG"] += ",vs,gs,fs" else: os.environ["INTEL_DEBUG"] = "vs,gs,fs" try: os.stat("bin/glslparsertest") except OSError: print("./bin must be a symlink to a built piglit bin directory") sys.exit(1) runtimebefore = time.time() filenames = process_directories(args.shader) executor = ThreadPoolExecutor(cpu_count()) for t in executor.map(run_test, filenames): sys.stdout.write(t) runtime = time.time() - runtimebefore print("shader-db run completed in {:.1f} secs".format(runtime))
def set_task(translator, translit=False): """ Task Setter Coroutine End point destination coroutine of a purely consumer type. Delegates Text IO to the `write_stream` function. :param translation_function: Translator :type translation_function: Function :param translit: Transliteration Switch :type translit: Boolean """ # Initialize Task Queue task = str() queue = list() # Function Partial output = "translit" if translit else "trans" stream = partial(write_stream, output=output) workers = ThreadPoolExecutor(max_workers=8) try: while True: task = yield queue.append(task) except GeneratorExit: list(map(stream, workers.map(translator, queue)))
def run(self): """ Start the check run, the run is made of three stages: 1. Preparation - get current date, clear counters, prepare queue of project 2. Execution - process every project in the queue 3. Finalize - create `db.Run` entry with counters and time """ # 1. Preparation phase # We must convert it to datetime for comparison with sqlalchemy TIMESTAMP column session = db.Session() time = arrow.utcnow().datetime self.clear_counters() queue = self.construct_queue(time) total_count = len(queue) if not queue: return # 2. Execution _log.info( "Starting check on {} for total of {} projects".format(time, total_count) ) pool_size = config.get("CRON_POOL", 10) pool = ThreadPoolExecutor(pool_size) pool.map(self.update_project, queue) # 3. Finalize _log.info( "Check done. Checked ({}): error ({}), success ({}), fail ({})".format( total_count, self.error_counter, self.success_counter, self.error_counter, ) ) run = db.Run( created_on=time, total_count=total_count, error_count=self.error_counter, ratelimit_count=self.ratelimit_counter, success_count=self.success_counter, ) session.add(run) session.commit()
def poem_from_template(template, db, sound_cache=None): executor = ThreadPoolExecutor(4) letter_sound_map = map_letters_to_sounds(db, template, sound_cache) process_tmpl_line = threaded(partial(extract_ruleset, db, letter_sound_map), partial(ruleset_to_line, db)) poem_lines = executor.map(process_tmpl_line, template) executor.shutdown() return list(poem_lines)
def __get_reports_for_domains_collection(domains, key, threads_count=1): executor = ThreadPoolExecutor(threads_count) reports_lists_iterator = executor.map(lambda chunk: __get_for_domains(chunk, key, RETRY_COUNT), __split(domains, CHUNK_SIZE)) maps_list = list(reports_lists_iterator) return __merge_dicts(*maps_list)
def test_adding_next_tick_from_another_thread(self): # The test has probabilistic nature - there's a slight change it'll give a false negative with LoopAndGroup(quit_after=15) as ctx: n = 1000 func = _make_invocation_counter(ctx.io_loop, stop_after=n) tpe = ThreadPoolExecutor(n) list(tpe.map(ctx.group.add_next_tick_callback, repeat(func, n))) assert n == func.count()
def run(self): while not self.next_queue.empty(): queue = self.next_queue self.next_queue = Queue() targets = [] while not queue.empty(): targets.append(queue.get()) executor = ThreadPoolExecutor(self.workers) futures = executor.map(self.check, targets) for future in futures: self.log(*future)
def poem_from_template(template, db: Database, corpus_id, sound_cache=None): engine = get_engine(db) conn = engine.connect() executor = ThreadPoolExecutor(4) letter_sound_map = map_letters_to_sounds(conn, corpus_id, template, sound_cache) process_tmpl_line = threaded(partial(extract_ruleset, conn, corpus_id, letter_sound_map), partial(ruleset_to_line, conn, corpus_id)) poem_lines = executor.map(process_tmpl_line, template) executor.shutdown() return list(poem_lines)
def test_many_individual(self): def method(_): r = requests.get(ROOT_URL) data = r.json() self.assertTrue(data) print('GG') return data with self.assertTimeTakenLessThan(80): pool = ThreadPoolExecutor(max_workers=10) list(pool.map(method, range(50)))
def resolve_mirrors(self): mirrors = self.get_mirrors_from_database() threadpool = TPE(4) def not_a_lambda_function(mirror): mirror_hostname = parse.urlparse(mirror['mirror']).hostname address_list = [] for addrinfo in socket.getaddrinfo(mirror_hostname,80): address_list.append(addrinfo[-1][0]) return (mirror['ID'], address_list) results = threadpool.map(not_a_lambda_function,mirrors) return list(results)
def main(args): # Find out the configs and thier number of runs configs = [] cmd = ["opp_run", "-a", args.config, "--repeat=1"] # do not use repeat of ini file ^ configs_output = subprocess.check_output(cmd) for line in configs_output.splitlines(): m = re.search("Config (.*): ([0-9]*)",str(line)) if m: config = m.group(1) runs = int(m.group(2)) if config != "General": configs.append({'config':config, 'runs': runs}) print(configs) # Build up commands cmds = [] for repetition in range(0,int(args.repetitions)): for config in configs: for run in range(0,config['runs']): name = str(config['config'])+"-"+str(run)+"-"+str(repetition) cmds.append(["opp_run", "--repeat=1", "-u","Cmdenv", "-r",str(run), "--output-scalar-file="+args.results+"/"+name+".sca", "--output-vector-file="+args.results+"/"+name+".vec", "--seed-set="+str(repetition), "-c",config['config'], "-n", INET_DSME+"simulations:"+INET_DSME+"src:"+args.inet+"/examples:"+args.inet+"/src", "-l",args.inet+"/src/INET", "-l",INET_DSME+"src/inet-dsme", args.config]) executor = ThreadPoolExecutor(max_workers=int(args.jobs)) executor.map(runcmd,cmds)
def seq_align_to_ref(input_seqs, ref_seq, max_workers=None): """Aligns all sequences to a reference. """ check_seqs = [[(name, seq), ("__ref__", ref_seq)] for name, seq in input_seqs] if max_workers > 1: executor = ThreadPoolExecutor(max_workers=max_workers) res = executor.map(call_muscle, check_seqs) else: res = imap(call_muscle, check_seqs) for alignment in res: adict = dict(alignment) name = [key for key in adict.keys() if key != "__ref__"][0] ref_align = seq_map_to_ref(adict[name], adict["__ref__"]) yield name, ref_align
class OpcacheManager(object): # Global timeout for all objects TIMEOUT = 5 def __init__(self, admin_port): self.admin_port = admin_port self.threadpool = ThreadPoolExecutor(max_workers=10) def _invalidate_host(self, host, filename): url = "http://{hostname}:{port}/opcache-free".format( hostname=host, port=self.admin_port) if filename is not None: params = {'file': filename} else: params = {} try: result = requests.get(url, params=params, timeout=self.TIMEOUT) result.raise_for_status() return (True, None) except requests.exceptions.HTTPError as e: return (False, "Response returned error {}".format(str(e))) except requests.exceptions.Timeout: return (False, 'A timeout happened before a response was received') except Exception as e: return (False, str(e)) def invalidate(self, hosts, filename): """Invalidates files/directories (or all) opcache.""" def invalidate_closure(host): return (host, self._invalidate_host(host, filename)) results = self.threadpool.map(invalidate_closure, hosts) # Collect all failed results and return them to the caller return {host: result[1] for host, result in results if not result[0]} def invalidate_all(self, config, filename=None): target_groups = targets.DirectDshTargetList('mw_web_clusters', config) # Fallback if nothing was defined. if not target_groups.all: target_groups.primary_key = 'dsh_targets' target_groups.deploy_groups = None failed = {} for _, group in target_groups.groups.items(): failed.update(self.invalidate(group.targets, filename)) return failed
class parallel_map(collections.Iterable): def __init__(self, pool_size, function, *iterables): if not isinstance(pool_size, numbers.Integral): raise TypeError('pool_size must be an integer, not ' + repr(pool_size)) elif not callable(function): raise TypeError('function must be callable, not ' + repr(function)) elif not iterables: raise TypeError('missing iterable') self.pool = ThreadPoolExecutor(pool_size) self.results = self.pool.map(function, *iterables) def __iter__(self): for value in self.results: yield value self.pool.shutdown()
def batch_loader(batch_list, ignore_photos=False, photo_size=(224, 224), pad=0): # load all of photos using thread pool. photo_paths = [ path for sample in batch_list for view in sample[3] for path in view ] pool = ThreadPoolExecutor() results = pool.map(lambda x: get_image(x, photo_size), photo_paths) pool.shutdown() data = [list() for i in batch_list[0]] for sample in batch_list: for i, val in enumerate(sample): if i in (0, 1, 2): # reviews val=[sent_id1, sent_id2, ...] data[i].append(val) if not ignore_photos and i == 3: # photos data[i].append([[next(results) for path in ps] for ps in val]) if i == 4: # ratings data[i].append(val) # pad sentences Ru and Ri max_count, max_len = 0, 0 for ru, ri in zip(data[0], data[1]): max_count = max(max_count, max(len(ru), len(ri))) max_len = max( max_len, max(max([len(i) for i in ru]), max([len(i) for i in ri]))) lengths = [0, 0, 0] data[0], lengths[0] = pad_reviews(data[0], max_count, max_len, pad=pad) data[1], lengths[1] = pad_reviews(data[1], max_count, max_len, pad=pad) data[2], lengths[2] = pad_reviews(data[2], pad=pad) return ( torch.LongTensor(data[0]), torch.LongTensor(data[1]), torch.LongTensor(data[2]), torch.LongTensor(lengths[0]), torch.LongTensor(lengths[1]), torch.LongTensor(lengths[2]), torch.Tensor(data[3]), torch.Tensor(data[4]), )
def main(name): start = time.time() # Displays all the information related with the query info = getBooks(name) # Gets the books totalBooks = info.get("totalItems") books = info.get("items") # displays books that contains the query click.echo(f'TOTAL OF BOOKS FOUND: {totalBooks}') click.echo("BOOKS FOUND: ") book_list = [book.get('volumeInfo') for book in books] # Para cada libro deveulve executor = ThreadPoolExecutor(100) result = list(executor.map(parseInfo, book_list)) for book in result: click.echo(book) click.echo("****************************************")
def test_series_reductions_concurrency(method): from concurrent.futures import ThreadPoolExecutor e = ThreadPoolExecutor(10) np.random.seed(0) srs = [Series(np.random.random(10000)) for _ in range(1)] def call_test(sr): fn = getattr(sr, method) if method in ["std", "var"]: return fn(ddof=1) else: return fn() def f(sr): return call_test(sr + 1) list(e.map(f, srs * 50))
def multi_get_md5(file_list): """ get file_list md5 :param file_list: :return: """ pool = ThreadPoolExecutor(max_workers=5) results = pool.map(get_file_md5, file_list) pool.shutdown() files_md5 = {} for rtn in results: if not rtn: return False else: files_md5.update(rtn) return files_md5
def _upload(self, src, bucket, key, extra_args): reader = FileChunkReader(src) self._total_size = reader.get_size() chunks = reader.get_chunks() self._parts_number = len(chunks) # upload small file by using put_object. if self._parts_number == 1: chunk = chunks[0] LOGGER.info("%s is not need using MultipartUpload." % key) self.client.put_object(Bucket=bucket, Key=key, Body=chunk.read(), **extra_args) self._add_finished_size(chunk.size) if self._progress: self._callback(self._total_size, self._finished_size) LOGGER.info("upload %s finished" % key) return True response = self.client.create_multipart_upload( Bucket=bucket, Key=key, **extra_args) upload_id = response['UploadId'] executor = ThreadPoolExecutor(max_workers=self._threads) try: do_upload = self._upload_one_part(bucket, key, upload_id, extra_args) # multi upload process. parts = map(lambda x: x, executor.map(do_upload, chunks)) self.client.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts}) LOGGER.info("upload %s finished" % key) except (KeyboardInterrupt, Exception) as error: executor.shutdown() LOGGER.error("abort upload %s %s %s" % (src, bucket, key)) self.client.abort_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id) raise(error) return True
def verify(self, validator, handler, repeat=1, concurrency=10, sleep=1): proxy_count = len(self._proxylist) progress_count = 0 def run(proxy): time.sleep(sleep) test_logs = list([validator.verify(proxy) for _ in range(repeat)]) data = dict(proxy=proxy, test_logs=test_logs) handler.handle(data) return proxy excutor = ThreadPoolExecutor(max_workers=concurrency) for proxy in excutor.map(run, self._proxylist): progress_count += 1 progress = round(progress_count / proxy_count * 100, 2) print(f'Verified [ {progress}% | {progress_count}/{proxy_count} ] {proxy.proxy_url}') if self._context and self._context.logger: self._context.logger.info(f'ProxyPool: Verified [ {progress}% | {progress_count}/{proxy_count} ] {proxy.proxy_url}.') handler.close()
def get_all_logs(nconfigs, experiment_id, lock=threading.Lock(), split_by_job_id=False): # noqa: B008 with lock: print(f"Collecting metrics for {experiment_id}") split = nconfigs[experiment_id] # The first key in `xp_logs` is used to differentiate jobs when `split_by_job_id` is True. xp_logs = defaultdict(lambda: defaultdict(dict)) for split_key, runs in split.items(): pool = ThreadPoolExecutor(min(MAX_RUN_LOADERS, len(runs))) log_loader = functools.partial(load_logs, split_by_job_id=split_by_job_id) metrics = pool.map(log_loader, runs) for metrics_per_job in metrics: for job_id, r, m in metrics_per_job: xp_logs[job_id][split_key][r] = m # Convert to regular dict. xp_logs_dict = {k: dict(v) for k, v in xp_logs.items()} return experiment_id, xp_logs_dict
def mulAcToken(isHost, limit=''): print(time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(time.time()))) logger = Logger('AccessToken') account = MySqlDao() # res = account.rows("select * from nikeaccount where id >50 and id <= 100") if limit != '': res = account.rows("select * from nikeaccount limit " + limit) else: res = account.rows("select * from nikeaccount") print("[nkBot]>get " + str(len(res)) + " accounts Access Token") data = [] for re in res: if re[4] and re[8]: data.append((re[0], re[4], re[8], isHost)) if len(data): pool = ThreadPoolExecutor() res = list(pool.map(accessToken, data)) logger.write(res) else: pass
def run(self, io_threads, timeout, chunk_size=10000): start_time = time() pool = ThreadPoolExecutor(max_workers=io_threads) gen = (line.strip() for line in sys.stdin) i = 0 for chunk in self._chunker(gen, chunk_size): for obj in pool.map(self.do_io, chunk): i += 1 if obj[1] is not None: yield self.featurize(*obj) if not i % self.print_interval: print >> sys.stderr, json.dumps({ "i": i, "time": time() - start_time, }) self.close()
def _download(self, bucket, key, dest, extra_args): write_file = "%s/%s" % (dest, key.split('/')[-1]) \ if os.path.isdir(dest) else dest file_size = int(self.client.head_object( Bucket=bucket, Key=key, **extra_args)['ContentLength']) self._total_size = file_size writer = FileChunkWriter(write_file, file_size) chunks = writer.get_chunks() executor = ThreadPoolExecutor(max_workers=self._threads) do_download = self._download_one_part(bucket, key, extra_args) try: map(lambda x: x, executor.map(do_download, chunks)) writer.commit_write() except (KeyboardInterrupt, Exception) as error: writer.abort_write() executor.shutdown() LOGGER.error("abort download %s %s %s" % (bucket, key, dest)) raise(error) return True
def main(): a = read_html("Bitcoin - The New York Times.html") links = a.find_all("a", {"class": "story-link"}) links = [href.attrs["href"] for href in links] print(len(links)) # for link in links: # a = parse_article_page(link) # print(a['name']) # print(a["text"]) # with open("new_york_articles.json", "a") as myfile: # myfile.write(json.dumps(a)) pool = Pool(100) results = pool.map(parse_article_page, links) results = [res for res in results] result = json.dumps(results) with open("new_york_articles.json", "a") as myfile: myfile.write(result) for result in results: print(result["name"]) print(result["text"])
def get_results(self): """ Maps the resources to be downloaded, parses them and returns a dictionary containing the results """ # Renew cookie self.renew_cookie() # Threads Powah! pool = ThreadPoolExecutor(max_workers=2) results = { res_name: getattr(Parser(res_data), res_name)() for (res_name, res_data) in pool.map(self.resource_fetch, self.resources) } pool.shutdown() return results
def parallel_blast(targets, reference, num_cpu=4): blastdb_pth = os.path.join(tmp_dir, 'blast-%s' % GoAspect(ASPECT)) records = [SeqRecord(Seq(seq), id) for id, seq in reference.items()] SeqIO.write(records, open(blastdb_pth, 'w+'), "fasta") os.system("makeblastdb -in %s -dbtype prot" % blastdb_pth) predictions = dict() e = ThreadPoolExecutor(num_cpu) def _parallel_blast_helper(s): return s[0], _blast(SeqRecord(Seq(s[1]), s[0]), reference, topn=None, choose_max_prob=True) pbar = tqdm(range(len(targets)), desc="blast2go processed") for tgtid, preds in e.map(_parallel_blast_helper, targets.items()): predictions[tgtid] = preds pbar.update(1) pbar.close() return predictions
def deploy(subserver_list, deploy_type, is_restart_server, user_name, deploy_version, operation_type): worker_num = len(subserver_list[0]) executor = ThreadPoolExecutor(max_workers=worker_num) # subserver_list格式[[1,2,3],[4,5,6],[7,8]] for item in subserver_list: if deploy_type in ['deployall', 'deploypkg', 'deploycfg']: cmd_list = ['prepare', 'backup', 'stop', deploy_type, 'start', 'check'] \ if is_restart_server else ['prepare', 'backup', deploy_type, 'check'] elif deploy_type == 'rollback': cmd_list = ['stop', 'rollback', 'start', 'check'] \ if is_restart_server else ['rollback', 'check'] elif deploy_type == 'stop': cmd_list = ['stop'] elif deploy_type == 'start': cmd_list = ['start', 'check'] elif deploy_type == 'restart': cmd_list = ['stop', 'start', 'check'] else: return False cmd_len = len(cmd_list) for index, cmd in enumerate(cmd_list): # 根据命令的个数,计算每个命令执行完成之后的百分比 percent_value = "%.0f%%" % ((index + 1) / cmd_len * 100) # 多线程版本,应用为IO密集型,适合threading模式 server_id = [] for itme_id in item: server_id.append(itme_id) server_len = len(server_id) for data in executor.map(cmd_run, server_id, [cmd] * server_len, [user_name] * server_len, [percent_value] * server_len, [deploy_version] * server_len, [operation_type] * server_len): if not data: return False return True
class ThreadPool: def __init__(self, max_workers=10): self.app = current_app self._executor = None self.futures = FutureCollection() self._executor = ThreadPoolExecutor(max_workers=max_workers) def submit(self, func, *args, **kwargs): func = self._prepare_func(func) return self._executor.submit(func, *args, **kwargs) def submit_stored(self, future_key, func, *args, **kwargs): future = self.submit(func, *args, **kwargs) self.futures.add(future_key, future) return future def shutdown(self): return self._executor.shutdown() def map(self, func, *iterables, **kwargs): func = self._prepare_func(func) return self._executor.map(func, *iterables, **kwargs) def job(self, func): return ThreadPoolJob(executor=self, func=func) def _copy_current_app_context(self, func): app_context = _app_ctx_stack.top def wrapper(*args, **kwargs): with app_context: return func(*args, **kwargs) return wrapper def _prepare_func(self, func): if isinstance(self._executor, concurrent.futures.ThreadPoolExecutor): if _request_ctx_stack.top is not None: func = copy_current_request_context(func) func = self._copy_current_app_context(func) return func
def run(self): """Fetch networks information from ASRank and push to wikibase. """ self.wh.login() # Login once for all threads pool = ThreadPoolExecutor() has_next = True i = 0 while has_next: req = requests.get(URL_API + f'?offset={i}') if req.status_code != 200: sys.exit('Error while fetching data from API') ranking = json.loads(req.text)['data']['asns'] has_next = ranking['pageInfo']['hasNextPage'] for res in pool.map(self.update_net, ranking['edges']): sys.stderr.write( f'\rProcessing... {i+1}/{ranking["totalCount"]}') i += 1 pool.shutdown()
def get_data(sector): query = "SELECT top 1000 * FROM obsPointing WHERE obs_collection='TESS' AND dataproduct_type='image' " \ "AND sequence_number={}".format(sector) df = query_db(CAOM_OPS, query) df['coords'] = df.apply(lambda x: parse_s_region(x['s_region']), axis=1) # Generate MOC start_time = time.time() pool = ThreadPoolExecutor(max_workers=4) results = list(pool.map(get_polygon_moc, [row for _, row in df.iterrows()])) end_time = time.time() print('Total time : {} seconds'.format(end_time - start_time)) # Union of MOCs start_time = time.time() moc = MOC.union(*results) end_time = time.time() print('Total time : {} seconds'.format(end_time - start_time)) return moc
def map(self, func: Callable, iterable: Iterable, timeout: float=None, callback_timeout: Callable=None, daemon: bool = True ) -> Iterable: """ :param func: the function to execute :param iterable: an iterable of function arguments :param timeout: after this time, the process executing the function will be killed if it did not finish :param callback_timeout: this function will be called, if the task times out. It gets the same arguments as the original function :param daemon: define the child process as daemon """ executor = ThreadPoolExecutor(max_workers=self.processes) params = ({'func': func, 'fn_args': p_args, "p_kwargs": {}, 'timeout': timeout, 'callback_timeout': callback_timeout, 'daemon': daemon} for p_args in iterable) return executor.map(self._submit_unpack_kwargs, params)
def search(word="たこ焼き"): def foo(area): fuga = [] for i in range(1, 100000): try: with request.urlopen(f"https://api.gnavi.co.jp/RestSearchAPI/v3/?keyid={API_KEY}&hit_per_page=100&areacode_l={area}&offset_page={i}&freeword={parse.quote(word)}&freeword_condition=2") as res: # ','.join(word) html = res.read().decode("utf-8") except Exception as e: break a = json.loads(html) for x in a["rest"]: if x["latitude"]=="" or x["longitude"]=="": continue fuga.append({"id":x["id"], "url": x["url"], "latitude": x["latitude"], "longitude": x["longitude"]}) return fuga pool = ThreadPoolExecutor(max_workers=32) with request.urlopen(f"https://api.gnavi.co.jp/master/GAreaLargeSearchAPI/v3/?keyid={API_KEY}") as res: html = res.read().decode("utf-8") result = pool.map(foo, [x["areacode_l"] for x in json.loads(html)["garea_large"]]) hoge = list(itertools.chain.from_iterable(result)) return hoge
def use_threadpool_to_get_full_data(self): executor = ThreadPoolExecutor(max_workers=100) for i in range(0, self.rowcount): self.row_order = i for data in executor.map( self.get_singe_row_data_by_response_length, self.coname_list): self.columndata.append(data) print(self.columndata) self.full_data_dict[self.row_order] = self.columndata self.columndata = [] print(self.full_data_dict) return self.full_data_dict
class InputProcessor: def __init__(self, max_workers=5): # data precess self.pool = ThreadPoolExecutor(max_workers=max_workers) @staticmethod def split_content(text): # lambda split_w = lambda t, s: [i for i in t.split(s) if len(i) != 0] ft = [text] for stop in stop_words: for i in range(0, len(ft)): # split len over max len (50) if len(ft[i]) <= (52 - 2): ft[i] = [ft[i]] else: ft[i] = split_w(ft[i], stop) if len(ft) > 0: ft = list(np.concatenate(ft)) return ft def do(self, contents): # split content list_cons = list(self.pool.map(self.split_content, contents)) # convert to dataframe, [content, content_id] data = DataFrame([], columns=('text', 'id')) for i in range(0, len(list_cons)): item = list_cons[i] if item is None: data.append(['', i]) else: for it in item: data.loc[len(data)] = [ it if it is not None and len(it) != 0 else '', i ] return data
def yearly_absolute_data(pool: ThreadPoolExecutor, session: Session, orion_cb: ContextBroker, inds: Mapping[str, indicators.Indicator], year: Optional[int] = None): """Get absolute data for the given indicators and years""" granularities: List[Tuple[indicators.Indicator, str, Optional[int]]] = list() for ind in inds.values(): for geo_grain in ind.geographical_granularities.values(): granularities.append((ind, geo_grain, year)) batches = [ granularities[base:(base + BATCH_SIZE)] for base in range(0, len(granularities), BATCH_SIZE) ] for batch in batches: logging.info( "Batching indicators %s", ", ".join("%s:%s" % (item[0].code, item[1]) for item in batch)) orion_cb.batch(session, rotate(pool.map((lambda g: ind_as_kpi(*g)), batch)))
def to_pngs(texes, texes_outfile, dpi=200, threads=2, check=True, only_dvi=False, replace=False): ''' texes are collection of (tex, tex_outfile) ''' pool = ThreadPoolExecutor(max_workers=(max(len(texes), threads))) map_iter = pool.map( functools.partial(to_png, check=check, only_dvi=only_dvi, replace=replace), texes, texes_outfile, ) results = [result for result in map_iter] return results
def enumerate_definition_descriptors(self): executor = ThreadPoolExecutor(max_workers=16) # scan github repositories for github_org_cfg in self.job_mapping.github_organisations(): github_cfg = self.cfg_set.github(github_org_cfg.github_cfg_name()) github_org_name = github_org_cfg.org_name() logger.info( f'scanning github organisation {github_org_name}' f' with github_cfg {github_cfg.name()}' ) github_api = ccc.github.github_api(github_cfg) github_org = github_api.organization(github_org_name) if self.job_mapping.secret_cfg(): secret_cfg = self.cfg_set.secret(self.job_mapping.secret_cfg()) else: secret_cfg = None scan_repository_for_definitions = functools.partial( self._scan_repository_for_definitions, github_cfg=github_cfg, org_name=github_org_name, secret_cfg=secret_cfg, job_mapping=self.job_mapping, ) matching_repositories = ( repo for repo in github_org.repositories() if github_org_cfg.repository_matches(repo.name) and (not self.repository_filter or self.repository_filter(repo)) ) for definition_descriptors in executor.map( scan_repository_for_definitions, matching_repositories, ): yield from definition_descriptors
def convert2tfrecords(dataset_file_path, file_suffix, tfrecord_file_path, n_workers): filenames = os.listdir(dataset_file_path) if file_suffix is None: filenames = [f for f in list(filenames)] else: filenames = [f for f in list(filenames) if f.endswith(file_suffix)] def processor(filepath, filename, tfrecord_filename): token_dicts = None data_field_list = [] data_field_list.append( DataSchema(name='query', processor='to_np', type=tf.int32, dtype='int32', shape=(None, ), is_with_len=True)) label_field = DataSchema(name='label', processor='to_np', type=tf.float32, dtype='float32', shape=(1, ), is_with_len=False) parser = TextlineParser(token_dicts, data_field_list, label_field) generator = TFDataset(parser=parser, file_path=filepath, file_suffix=filename) generator.to_tfrecords(tfrecord_filename) return tfrecord_filename task_param_list = [ tuple(dataset_file_path, filename, tfrecord_file_path + '/' + str(i) + '.tfrecord') for filename, i in zip(filenames, len(filenames)) ] pool = ThreadPoolExecutor(max_workers=n_workers) for result in pool.map(processor, task_param_list): print(result, 'finish')
def load(self, N=None, random_order=True): """ Load all or a subset of stored SleepStudy objects Data is loaded using a thread pool with one thread per SleepStudy. Args: N: Number of SleepStudy objects to load. Defaults to loading all. random_order: Randomly select which of the stored objects to load rather than starting from the beginning. Only has an effect with N != None Returns: self, reference to the SleepStudyDataset object """ from concurrent.futures import ThreadPoolExecutor if N is None: N = len(self) random_order = False not_loaded = self.non_loaded_pairs if random_order: to_load = np.random.choice(not_loaded, size=N, replace=False) else: to_load = not_loaded[:N] self.log("Loading {}/{} SleepStudy objects...".format( len(to_load), len(self))) pool = ThreadPoolExecutor(max_workers=min(len(to_load), 7)) res = pool.map(lambda x: x.load(), to_load) try: for i, ss in enumerate(res): print(" -- {}/{}".format(i + 1, len(to_load)), end="\r", flush=True) except CouldNotLoadError as e: raise CouldNotLoadError("Could not load sleep study {}." " Please refer to the above " "traceback.".format(e.study_id)) from e finally: pool.shutdown() return self
def crawl(self): """ Crawl a site recursively """ if self.threads: executor = ThreadPoolExecutor(max_workers=self.concurrency) else: executor = ProcessPoolExecutor(max_workers=self.concurrency) with executor: urls = [self.url] url_dict = {} url_dict[self.url] = 1 while len(urls): results = executor.map(self.fetch_url, urls, timeout=120) urls = [] for result in results: url, status, data = result if status in (200, 301, 302): # Save and parse the text print('Fetched', url) self.count += 1 self.data_map[url] = data child_urls = self.parse_child_links(url, data) # Push this to queue for url in child_urls: if not url in url_dict: # print('Pushing',url) url_dict[url] = 1 urls.append(url) print('\n\nCrawl complete.') print('Fetched', self.count, 'URLs.') # Dump data filename = 'data_%s.pkl' % self.domain pickle.dump(self.data_map, open(filename, 'wb')) print('Data saved to', filename)
def deploy(subserver_list, deploy_type, is_restart_server, user_name, app_name, deploy_version, deploy_no, operation_type, env): worker_num = len(subserver_list[0]) executor = ThreadPoolExecutor(max_workers=worker_num) # subserver_list格式[[1,2,3],[4,5,6],[7,8]] for item in subserver_list: if deploy_type not in ['rollback', 'stop', 'start', 'restart']: # 如果部署类型不是回滚,启停的话,则为常规的部署方式 cmd_list = ['backup', 'stop', deploy_type, 'start', 'status'] if is_restart_server else ['backup', deploy_type] elif deploy_type == 'rollback': cmd_list = ['stop', 'rollback', 'start', 'status'] if is_restart_server else ['rollback'] elif deploy_type == 'stop': cmd_list = ['stop'] elif deploy_type == 'start': cmd_list = ['start', 'status'] elif deploy_type == 'restart': cmd_list = ['stop', 'start', 'status'] else: return False cmd_len = len(cmd_list) for index, cmd in enumerate(cmd_list): # 根据命令的个数,计算每个命令执行完成之后的百分比 percent_value = "%.0f%%" % ((index+1)/cmd_len*100) # 多线程版本,应用为IO密集型,适合threading模式 server = [] for sub in item: server.append(sub) server_len = len(server) for data in executor.map(cmd_run, server, [deploy_type] * server_len, [cmd] * server_len, [user_name] * server_len, [percent_value] * server_len, [deploy_version] * server_len, [operation_type] * server_len): if not data: return False return True
def scrape(query): hero_query, *response_query = query.split('/') response_query = ''.join(response_query) filtered_heroes = list( filter( lambda hero: is_hero_searched(hero_query, hero['name']), heroes, )) if len(filtered_heroes) == 0: return [] executor = ThreadPoolExecutor(len(filtered_heroes)) results = executor.map( lambda hero: collect_sounds(hero, response_query), filtered_heroes, ) return [ single_result for results_list in results for single_result in results_list ][0:50]
def _transcode_single_pass(arguments: Namespace, input_file: InputFile, scenes: SceneList, executor: ThreadPoolExecutor): """Transcode a given input file using Single-Pass encoding.""" all_command_lines = _create_single_pass_command_lines(arguments, input_file, scenes) to_run_command_lines = _limit_and_filter_commands(arguments, all_command_lines) logger.info(f'About to start {len(to_run_command_lines)} scene encodes for input file "{input_file.input_file}".') if arguments.limit_encodes: # Both 0 and None evaluate to False, thus the division is safe. logger.info(f"This will use {round(len(to_run_command_lines)/arguments.limit_encodes*100)}% " f"of the remaining encoder contingent.") run_methods = (cli.run for cli in to_run_command_lines) runs = executor.map((lambda run: run()), run_methods) # Filter out unsuccessful runs finished_command_lines = ( command for command, _ in zip(to_run_command_lines, runs) # Ignore the None returned by run() if command.finished) # tuple() drives the map execution successful_encodes = len(tuple(finished_command_lines)) if arguments.limit_encodes is not None: # Subtract the number of successful encodes from the encode count contingent. # This alters the global state so that the next input file, if any, has this many less encodes available. arguments.limit_encodes -= successful_encodes
def multithread_master(config, task_func, task_queue, result_list, db_list=None, max_thread=4): Util.print_yellow("Process start: [%5d]" % os.getpid()) # 建立连接池 mysql_pool = {} if mysql_pool is not None: for db_key in db_list: mysql_pool[db_key] = Util.mysql_pool(config, db_key) # 建立线程池 data_queue = queue.Queue(maxsize=max_thread) executor = ThreadPoolExecutor(max_workers=max_thread) data_iter = DataIterator(task_func, task_queue, data_queue, mysql_pool) # 动态分配任务并收集结果 for res in executor.map(multithread_slave, data_iter): result_list.append(res)
def training_epoch_end(self, outputs: List[Any]) -> None: if self.current_epoch < 300: return if not self.current_epoch % 7 == 0: return # import multiprocessing # pool = multiprocessing.Pool(os.cpu_count()) # 初始化线程池,用于计算置信度 pool = ThreadPoolExecutor(max_workers=os.cpu_count()) # 选择一个confidence阈值 min_confidence = 0.01 pesudo_labels = [] # audio_path, confidence, text self.eval() with torch.no_grad(): for i, batch in enumerate(tqdm(self.trainer.datamodule.pseudo_train_dataloader())): percentage = batch[2] out = self.forward(batch[0].to(self.device), percentage) t_lengths = torch.mul(out.size(1), percentage).int() # 输出实际长度, 因为batch中的音频长度不一,因此需要mask texts = self.wer.ctc_decoder_predictions_tensor(torch.argmax(out, dim=-1, keepdim=False), t_lengths) # 数据分片 map datas_seq = [(i, out[i].cpu().numpy(), t_lengths[i].cpu().numpy()) for i in range(len(out))] # 计算结果并汇合 reduce results = list(pool.map(seq_sum_logprob_np, datas_seq)) results = sorted(results, key=lambda x:x[0], reverse=False) avg_probs = [item[1] for item in results] # avg_probs = sum_logprob(out, t_lengths) # 置信度list, 单线程处理较慢 for audio_path, text, prob in zip(batch[-1], texts, avg_probs): if prob <= min_confidence: pesudo_labels.append((audio_path, text)) logger.info("伪标签数据量{:d}条".format(len(pesudo_labels))) batch_size = self.trainer.datamodule.pseudo_train_dataloader().batch_size total_count = len(self.trainer.datamodule.pseudo_train_dataloader())*batch_size logger.info("总数据量{:d}".format(total_count)) self.pesudo_percentage = len(pesudo_labels)/total_count # 给训练集注入伪标签数据 self.trainer.datamodule.inject_pesudo_datasets(pesudo_labels) self.trainer.reset_train_dataloader(self) # 重新加载训练集 self.train()
def _search_all_async(type, id, info): logger.debug("Searching for sellers for %s/%s asynchronously"%(type, id)) def search_module(module): cache_key = "vgmdb/%s/%s/sellers/%s"%(type,id,module.__name__) with Timer(tag=module.__name__, verbose=False): search = getattr(module, "search_%s"%(type,), None) empty = getattr(module, "empty_%s"%(type,), None) prev = cache.get(cache_key) ret = None if search and not prev: try: ret = search(info) except Exception as e: logger.error("Exception while searching %s for %s/%s: %s"%(module, type, id, e)) if ret: cache.set(cache_key, ret) return ret else: return empty(info) return prev executor = ThreadPoolExecutor(max_workers=5) results = executor.map(search_module, search_modules, timeout=60) results = filter(lambda x:x, results) return results
def get_rect_tiles(self, x1, y1, x2, y2, parallel=False): """Return a PIL.Image of a rectangular map whose upper left and bottom right corner have tiles coordinates (x1, y1) and (x2, y2) respectively. If parallel=True: try to speed up the acquiring of tiles by running the needed calls to get_tile() asynchronously. Default False. """ big = Image.new("RGB", (int((x2-x1) * self.xres), int((y2-y1) * self.yres))) #rows and columns of tiles containing (x1, y1) and (x2, y2) tiles_x1 = floor(x1) tiles_x2 = floor(x2) tiles_y1 = floor(y1) tiles_y2 = floor(y2) xdiff_pix = int(self.xres * (x1 - tiles_x1)) ydiff_pix = int(self.yres * (y1 - tiles_y1)) #acquire each tile needed and paste it into big if parallel: tiles_needed = [(x, y) for y in range(tiles_y1, tiles_y2+1) for x in range(tiles_x1, tiles_x2+1)] tpe = ThreadPoolExecutor(10) images = tpe.map(self.get_tile, *zip(*tiles_needed)) for im, xy in zip(images, tiles_needed): x, y = xy big.paste(im, ((x-tiles_x1) * self.xres - xdiff_pix, (y-tiles_y1) * self.yres - ydiff_pix)) else: for y in range(tiles_y1, tiles_y2+1): for x in range(tiles_x1, tiles_x2+1): im = self.get_tile(x, y) big.paste(im, ((x-tiles_x1) * self.xres - xdiff_pix, (y-tiles_y1) * self.yres - ydiff_pix)) return big
class parallel_map(collections.Iterable): def __init__(self, pool_size, function, *iterables): if not isinstance(pool_size, numbers.Integral): raise TypeError('pool_size must be an integer, not ' + repr(pool_size)) elif not callable(function): raise TypeError('function must be callable, not ' + repr(function)) elif not iterables: raise TypeError('missing iterable') self.pool = ThreadPoolExecutor(pool_size) self.function = function self.results = self.pool.map(self.map_function, *iterables) def map_function(self, *args): try: value = self.function(*args) except Exception: return False, sys.exc_info() return True, value def __iter__(self): errors = [] for success, value in self.results: if success: yield value else: errors.append(value) self.pool.shutdown() if PY3: for _, exc, tb in errors: raise exc.with_traceback(tb) else: for _, exc, tb in errors: exec('raise exc, None, tb')
def get_wiki_summary(phrase): summ = [] log.info ("pharse=%s\n", phrase) try: sentence = wikipedia.summary(phrase).split('. ')[0] log.debug("got: %s\n", sentence) summ.append(sentence) except wikipedia.exceptions.PageError: log.debug("exepted!!\n") if len(phrase.split()) < 2: log.debug("WAT=\n") return [] words = [w for w in phrase.split() if len(w)>3] log.debug("words=%s\n", words) if len(words) < 1: return [] else: log.debug('popcorn') ex = ThreadPoolExecutor(max_workers=MAX_WORKERS) #summ += list(ex.map(get_wiki_summary, phrase.split())) log.debug("summ before is: %s\n", summ) summ += list(itertools.chain(*ex.map(get_wiki_summary, words))) log.debug("summ is now: %s\n", summ) return summ except wikipedia.exceptions.DisambiguationError as e: log.debug("excepted\n") suggestions = format(str(e)).split("\n")[1:-1] log.debug('suggestions=%s', suggestions) #summ = get_wiki_summary(suggestions[random.randint(0, len(suggestions))]) summ += get_wiki_summary(suggestions[0]) finally: return summ
def _get_all_elevation_info(self): lat_lngs = [] if self.location_info != None: # multithread the calls to MapQuestElevationAPI to improve performance executor = ThreadPoolExecutor(max_workers = multiprocessing.cpu_count()) result = executor.map(self._get_elevation_info, self.location_info['route']['locations']) for element in result: lat_lngs.append(element) return lat_lngs
class thread_pool( object ): """ A context manager that yields a thread pool of the given size. On normal closing, this context manager closes the pool and joins all threads in it. On exceptions, the pool will be terminated but threads won't be joined. """ def __init__( self, size ): self.executor = ThreadPoolExecutor( size ) def __enter__( self ): return self # noinspection PyUnusedLocal def __exit__( self, exc_type, exc_val, exc_tb ): self.executor.shutdown( wait=exc_type is None ) def apply_async( self, fn, args, callback=None ): future = self.executor.submit( fn, *args ) if callback is not None: future.add_done_callback( lambda f: callback( f.result( ) ) ) def map( self, fn, iterable ): return list( self.executor.map( fn, iterable ) )