def futures_executor(items, function, accumulator, workers=1, status=True, unit='items', desc='Processing', **kwargs): """Execute using multiple local cores using python futures Parameters ---------- items : list List of input arguments function : function A function to be called on each input, which returns an accumulator instance accumulator : AccumulatorABC An accumulator to collect the output of the function workers : int Number of parallel processes for futures status : bool If true (default), enable progress bar unit : str Label of progress bar unit desc : str Label of progress bar description """ with concurrent.futures.ProcessPoolExecutor( max_workers=workers) as executor: futures = set() futures.update( executor.submit(function, item, **kwargs) for item in items) futures_handler(futures, accumulator, status, unit, desc) return accumulator
def get_all_comments(restaurants_url, pages_tracker={}, max_workers=64): restaurants_url_to_do_iterator = iter(restaurants_url) pages_comments = [] pbar = tqdm(total=len(restaurants_url)) with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {} for restaurant_url, page_number in itertools.islice( restaurants_url_to_do_iterator, max_workers ): futures_executor = executor.submit( get_page_comments, restaurant_url, page_number ) futures.update({futures_executor: restaurant_url}) while futures: done, _ = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED ) for future in done: pbar.update(1) restaurant_url = futures[future] futures.pop(future) try: comments = future.result() except Exception as exc: tqdm.write(f"{restaurant_url} generated an exception: {exc}") else: if pages_tracker: pages_tracker[restaurant_url][1] += 1 if ( pages_tracker[restaurant_url][1] >= pages_tracker[restaurant_url][0] ): with DimnaDatabase(db_path, logger) as db: db.update_page_visit_status( base_url, restaurant_url, True, ) pages_comments.append(comments) with DimnaDatabase(db_path, logger) as db: for comment, rating in comments["comments"]: db.insert_rating( base_url, comment.replace("\x00", ""), rating ) for restaurant_url, page_number in itertools.islice( restaurants_url_to_do_iterator, len(done) ): futures_executor = executor.submit( get_page_comments, restaurant_url, page_number ) futures.update({futures_executor: restaurant_url}) pbar.close() return pages_comments
def futures_executor(items, function, accumulator, workers=2, status=True, unit='items', desc='Processing', function_args={}): with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor: futures = set() try: futures.update(executor.submit(function, item, **function_args) for item in items) with tqdm(disable=not status, unit=unit, total=len(futures), desc=desc) as pbar: while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished: accumulator += job.result() pbar.update(1) futures -= finished del finished time.sleep(1) except KeyboardInterrupt: for job in futures: job.cancel() if status: print("Received SIGINT, killed pending jobs. Running jobs will continue to completion.") print("Running jobs:", sum(1 for j in futures if j.running())) except Exception: for job in futures: job.cancel() raise return accumulator
def scrap_all_comments(base_url, urls, max_workers=256): urls_to_do = [url for (_, url, is_visited) in urls if not is_visited] urls_to_do_iterator = iter(urls_to_do) pbar = tqdm(initial=len(urls) - len(urls_to_do), total=len(urls)) with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {} for url in itertools.islice(urls_to_do_iterator, max_workers): futures_executor = executor.submit(scrap_comments, url=url) futures.update({futures_executor: url}) while futures: done, _ = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED ) for future in done: pbar.update(1) url = futures[future] futures.pop(future) try: comments = future.result() except Exception as exc: tqdm.write(f"{url} generated an exception: {exc}") else: with DimnaDatabase(db_path, logger) as db: db.update_page_visit_status( base_url, url, True, ) if comments: db.insert_all_rating(base_url, comments) for url in itertools.islice(urls_to_do_iterator, len(done)): futures_executor = executor.submit(scrap_comments, url=url) futures.update({futures_executor: url}) pbar.close()
def futuresum(tmp_arr): while np.size(tmp_arr) > 1: chunk_sum = [] chunk_tmp_arr = np.array_split(tmp_arr, int(np.size(tmp_arr) / 2)) if len(chunk_tmp_arr) > 1: with concurrent.futures.ProcessPoolExecutor( max_workers=16) as executor: futures = set() futures.update( executor.submit(add, chunk_tmp_arr[i]) for i in range(0, len(chunk_tmp_arr))) if (len(futures) == 0): continue try: total = len(futures) processed = 0 while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished: chunk_i = job.result() chunk_sum.append(chunk_i) futures -= finished del finished except KeyboardInterrupt: print("Ok quitter") for job in futures: job.cancel() except: for job in futures: job.cancel() raise else: chunk_sum.append(add(chunk_tmp_arr[0])) tmp_arr = np.array(chunk_sum) print(tmp_arr) return tmp_arr
def find_all_doctors_url(base_url, cities_url, max_workers=128): cities_url_iterator = iter(cities_url) pbar = tqdm(total=len(cities_url)) with concurrent.futures.ThreadPoolExecutor( max_workers=max_workers) as executor: futures = {} for city_url in itertools.islice(cities_url_iterator, max_workers): futures_executor = executor.submit(find_doctors_url, base_url=base_url, city_url=city_url) futures.update({futures_executor: city_url}) while futures: done, _ = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) for future in done: pbar.update(1) city_url = futures[future] futures.pop(future) try: doctors_url = future.result() except Exception as exc: tqdm.write(f"{city_url} generated an exception: {exc}") else: with DimnaDatabase(db_path, logger) as db: db.insert_all_pages_url(base_url, doctors_url) for city_url in itertools.islice(cities_url_iterator, len(done)): futures_executor = executor.submit(find_doctors_url, base_url=base_url, city_url=city_url) futures.update({futures_executor: city_url}) pbar.close()
def futures_executor(items, function, accumulator, workers=1, status=True, unit='items', desc='Processing', **kwargs): with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor: futures = set() futures.update(executor.submit(function, item, **kwargs) for item in items) futures_handler(futures, accumulator, status, unit, desc) return accumulator
def batched_pool_runner(f, iterable, pool, batch_size): it = iter(iterable) # Submit the first batch of tasks. futures = set(pool.submit(f, x) for x in islice(it, batch_size)) while futures: done, futures = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) # Replenish submitted tasks up to the number that completed. futures.update(pool.submit(f, x) for x in islice(it, len(done))) for d in done: yield d
def write_data(dataset, topicname): for eachline in dataset: msg = str(eachline) futures.update({msg: None}) # When you publish a message, the client returns a future. future = publisher.publish( topicname, msg.encode("utf-8") # data must be a bytestring. ) futures[msg] = future # Publish failures shall be handled in the callback function. future.add_done_callback(get_callback(future, msg))
def find_all_comments_pages(pages_url, max_workers=128): book_url_to_do = [ book_url for (_, book_url, is_visited) in pages_url if not is_visited ] book_url_to_do_iterator = iter(book_url_to_do) pbar = tqdm(initial=len(pages_url) - len(book_url_to_do), total=len(pages_url)) comments_url = list() with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {} for book_url in itertools.islice(book_url_to_do_iterator, max_workers): book_id, book_name = book_url.split("/")[-2:] first_comment_url = f"{comments_base_url}/{book_id}/{book_name}.json" futures_executor = executor.submit( find_number_of_comments, comment_url=first_comment_url ) futures.update({futures_executor: book_url}) while futures: done, _ = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED ) for future in done: pbar.update(1) book_url = futures[future] futures.pop(future) book_id, book_name = book_url.split("/")[-2:] try: num_pages = future.result() except Exception as exc: tqdm.write(f"{book_url} generated an exception: {exc}") else: if num_pages: for page in range(1, num_pages + 1): comment_url = f"{comments_base_url}/{book_id}/{book_name}.json?p={page}" comments_url.append([book_url, comment_url]) else: with DimnaDatabase(db_path, logger) as db: db.update_page_visit_status( base_url, book_url, True, ) for book_url in itertools.islice(book_url_to_do_iterator, len(done)): book_id, book_name = book_url.split("/")[-2:] first_comment_url = f"{comments_base_url}/{book_id}/{book_name}.json" futures_executor = executor.submit( find_number_of_comments, comment_url=first_comment_url ) futures.update({futures_executor: book_url}) pbar.close() return comments_url
def render(modelname): with open('data/' + modelname + '.model') as fin: model = pickle.load(fin) model_arr = [] for ch in model: print('generating model for channel', ch.name) small_model = rl.Model(ch.name.encode("ascii")) small_model.addChannel(model[ch.name]) model_arr.append(small_model) print(model_arr) print('Rendering') #for i in range(0,len(model_arr)): # futurerender(model_arr[i], modelname) with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor: futures = set() futures.update( executor.submit(futurerender, model_arr[i], modelname) for i in range(0, len(model_arr))) try: total = len(futures) processed = 0 while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished: job.result() futures -= finished del finished except KeyboardInterrupt: print("Ok quitter") for job in futures: job.cancel() except: for job in futures: job.cancel() raise
def read_write_file(bucket_name, filename, topicname): bucket = storage_client.get_bucket(bucket_name) # get bucket data as blob blob = bucket.get_blob(filename) # convert to string data = ndjson.loads(blob.download_as_string()) for eachline in data: df = pandas.DataFrame(eachline["Itinerary"].split("-")) df.columns = ['AirportCode'] df_inner = pandas.merge(df, dataframe, on='AirportCode', how='inner') x = df_inner['CountryName'].unique() trip_type = 'International' if x.size > 1 else 'Domestic' y = df_inner.to_dict('r') eachline['CountryList'] = y eachline['Trip_type'] = trip_type msg = str(eachline) futures.update({msg: None}) # When you publish a message, the client returns a future. future = publisher.publish( topicname, msg.encode("utf-8") # data must be a bytestring. ) futures[msg] = future # Publish failures shall be handled in the callback function. future.add_done_callback(get_callback(future, msg))
def scrap_all_comments(comments_url, max_workers=128): comments_url_iterator = iter(comments_url) pbar = tqdm(total=len(comments_url)) with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor: futures = {} for book_url, comment_url in itertools.islice( comments_url_iterator, max_workers ): futures_executor = executor.submit(scrap_comments, comment_url=comment_url) futures.update({futures_executor: book_url}) while futures: done, _ = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED ) for future in done: pbar.update(1) book_url = futures[future] futures.pop(future) try: comments = future.result() except Exception as exc: tqdm.write(f"{book_url} generated an exception: {exc}") else: with DimnaDatabase(db_path, logger) as db: db.update_page_visit_status( base_url, book_url, True, ) db.insert_all_rating(base_url, comments) for book_url, comment_url in itertools.islice( comments_url_iterator, len(done) ): futures_executor = executor.submit( scrap_comments, comment_url=comment_url ) futures.update({futures_executor: book_url}) pbar.close()
def run(self): """ Starts the load test by instantiating required number of SignupUser and MultiUser instances. It increases the number of user instances according to the total_qps_increase_rate after every minute. It collects and returns the results returned by all user instances after load test is completed. """ initial_signup_instances_count = self.get_initial_signup_instances_count() initial_multi_instances_count = self.get_initial_multi_instances_count() signup_qps_increase_rate = self.get_signup_qps_increase_rate() multi_qps_increase_rate = self.get_multi_qps_increase_rate() # end_time is 1 minute more than the duration for getting responses of requests sent in the last minute end_time = datetime.now() + timedelta(minutes=self.duration + 1) users.User.end_time = end_time users.MultiUser.user_details = LoadTest.all_users users.MultiUser.chat_details = LoadTest.all_chats users.MultiUser.distribution = self.get_api_distribution_without_signup() users.User.total_qps = 0 results = [] total_iterations = self.duration completed_iterations = 0 with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_qps) as executor: signup_instances = get_signup_user_instances(initial_signup_instances_count) multi_instances = get_multi_user_instances(initial_multi_instances_count, self.all_users_index) self.all_users_index += initial_multi_instances_count futures = {executor.submit(_.simulate) for _ in signup_instances} futures.update({executor.submit(_.simulate) for _ in multi_instances}) while completed_iterations < total_iterations: time.sleep(60) signup_instances = get_signup_user_instances(signup_qps_increase_rate) multi_instances = get_multi_user_instances(multi_qps_increase_rate, self.all_users_index) self.all_users_index += multi_qps_increase_rate futures.update({executor.submit(_.simulate) for _ in signup_instances}) futures.update({executor.submit(_.simulate) for _ in multi_instances}) completed_iterations += 1 for future in concurrent.futures.as_completed(futures): results.extend(future.result()) return results
"Bu2KJpsi2KMuMu_inclusive": "{}/v1_0/files_BuToJpsiK_SoftQCDnonD.txt".format(skim_directory), } nworkers = 1 #fileslice = slice(None, 5) fileslice = slice(None) nevents = {} #with concurrent.futures.ThreadPoolExecutor(max_workers=nworkers) as executor: with concurrent.futures.ProcessPoolExecutor(max_workers=nworkers) as executor: futures = set() for dataset, filelistpath in in_txt.items(): with open(filelistpath) as filelist: files = [x.strip() for x in filelist.readlines()] print(files) futures.update(executor.submit(process_file, dataset, f) for f in files) nevents[dataset] = 0 try: total = len(futures) processed = 0 while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished: dataset, nentries, rhistograms = job.result() nevents[dataset] += nentries for k in rhistograms.keys(): histograms[k] += rhistograms[k] processed += 1 print("Processing: done with % 4d / % 4d files" % (processed, total)) futures -= finished del finished
if run.size == 0: return dataset, lumi_tools.LumiList() lumilist = lumi_tools.LumiList(run, lumi) return dataset, lumilist dataset_lumi = {} nworkers = 12 with concurrent.futures.ProcessPoolExecutor(max_workers=nworkers) as executor: futures = set() print(samples.keys()) for dataset, files in samples.items(): splitFiles = slice_it(files['files'], slices) for iL, iList in enumerate(splitFiles): futures.update( executor.submit(get_lumilist, dataset, file, files['treename']) for file in iList) try: total = len(futures) processed = 0 while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished: dataset, accumulator = job.result() if dataset in dataset_lumi: dataset_lumi[dataset] += accumulator else: dataset_lumi[dataset] = accumulator processed += 1 if processed % 10 == 0: print("Processing: done with % 4d / % 4d files" %
def scan_profile(url, org, fid, timeout=None): """ Report permutations of OFX version/prettyprint/unclosed_elements that successfully download OFX profile from server. Returns a pair of (OFXv1 results, OFXv2 results), each type(dict). dict values provide ``ofxget`` configs that will work to connect. """ if timeout is None: timeout = 5 ofxv1 = [102, 103, 151, 160] ofxv2 = [200, 201, 202, 203, 210, 211, 220] futures = {} client = OFXClient(url, org, fid) with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: for prettyprint in (False, True): for close_elements in (False, True): futures.update({ executor.submit(client.request_profile, version=version, prettyprint=prettyprint, close_elements=close_elements, timeout=timeout): (version, prettyprint, close_elements) for version in ofxv1 }) futures.update({ executor.submit(client.request_profile, version=version, prettyprint=prettyprint, close_elements=True, timeout=timeout): (version, prettyprint, True) for version in ofxv2 }) working = defaultdict(list) for future in concurrent.futures.as_completed(futures): try: response = future.result() except (urllib.error.URLError, urllib.error.HTTPError, ConnectionError, OSError) as exc: cancelled = future.cancel() continue else: (version, prettyprint, close_elements) = futures[future] working[version].append((prettyprint, close_elements)) def collate_results(results): results = list(results) if not results: return [], [] versions, formats = zip(*results) # Assumption: the same formatting requirements apply to all # sub-versions (e.g. 1.0.2 and 1.0.3, or 2.0.3 and 2.2.0). # If a (pretty, close_elements) pair succeeds on most sub-versions # but fails on a few, we'll chalk it up to network transmission # errors and ignore it. # # Translation: just pick the longest sequence of successful # formats and assume it applies to the whole version. formats = max(formats, key=len) formats.sort() formats = [ OrderedDict([("pretty", format[0]), ("unclosed_elements", not format[1])]) for format in formats ] return sorted(list(versions)), formats v2, v1 = utils.partition(lambda pair: pair[0] < 200, working.items()) v1_versions, v1_formats = collate_results(v1) v2_versions, v2_formats = collate_results(v2) # V2 always has closing tags for elements; just report prettyprint for format in v2_formats: del format["unclosed_elements"] return json.dumps((OrderedDict([("versions", v1_versions), ("formats", v1_formats)]), OrderedDict([("versions", v2_versions), ("formats", v2_formats)])))
def process_recursive(directories, *, album_gain=False, opus_output_gain=False, mtime_second_offset=None, skip_tagged=False, thread_count=None, ffmpeg_path=None, dry_run=False, report=False): """ Analyze and tag all audio files recursively found in input directories. """ error_count = 0 # walk directories albums_filepaths = [] walk_stats = collections.OrderedDict(((k, 0) for k in ("files", "dirs"))) with dynamic_tqdm(desc="Analyzing directories", unit=" dir", postfix=walk_stats, leave=False) as progress: for input_directory in directories: for root_dir, subdirs, filepaths in os.walk(input_directory, followlinks=False): audio_filepaths = tuple( map(functools.partial(os.path.join, root_dir), filter(is_audio_filepath, filepaths))) if audio_filepaths: albums_filepaths.append(audio_filepaths) if progress is not None: walk_stats["files"] += len(filepaths) walk_stats["dirs"] += 1 progress.set_postfix(walk_stats, refresh=False) progress.update(1) # get optimal thread count if thread_count is None: thread_count = OPTIMAL_THREAD_COUNT executor = concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) start_evt = threading.Event() futures = {} with dynamic_tqdm(total=len(albums_filepaths), desc="Building work queue", unit=" albums", leave=False) as progress: # analysis futures for album_filepaths in albums_filepaths: dir_futures = scan(album_filepaths, album_gain=album_gain, skip_tagged=skip_tagged, ffmpeg_path=ffmpeg_path, executor=executor, start_evt=start_evt) dir_futures = { k: (tuple(f for f in dir_futures.keys() if f is not k), v) for k, v in dir_futures.items() } futures.update(dir_futures) if progress is not None: progress.update(1) with dynamic_tqdm(total=sum(map(len, albums_filepaths)) + int(album_gain) * len(albums_filepaths), desc="Analyzing audio loudness", unit=" files", leave=False, smoothing=0) as progress: # get results start_evt.set() pending_futures = futures while futures: done_futures, pending_futures = concurrent.futures.wait( pending_futures, return_when=concurrent.futures.FIRST_COMPLETED) to_del_futures = set() for done_future in done_futures: other_dir_futures, _ = futures[done_future] if progress is not None: # update progress progress.update(1) # ignore futures already processed if done_future in to_del_futures: continue # only tag when the whole directory is scanned dir_futures = (done_future, ) + other_dir_futures if not all(f.done() for f in dir_futures): continue # get album filepaths audio_filepaths = tuple(futures[f][1] for f in dir_futures if futures[f][1] != ALBUM_GAIN_KEY) # get analysis results for this directory r128_data = {} for dir_future in dir_futures: key = futures[dir_future][1] try: result = dir_future.result() except Exception as e: if album_gain and (key == ALBUM_GAIN_KEY): logger().warning( "Failed to analyze files %s: %s %s" % (", ".join( repr(audio_filepath) for audio_filepath in audio_filepaths), e.__class__.__qualname__, e)) else: logger().warning( "Failed to analyze file %r: %s %s" % (key, e.__class__.__qualname__, e)) error_count += 1 else: if result is not None: r128_data[key] = result if report and audio_filepaths: show_scan_report( audio_filepaths, os.path.dirname(audio_filepaths[0]) if album_gain else None, r128_data) if not dry_run: # tag try: album_loudness, album_peak = r128_data[ALBUM_GAIN_KEY] except KeyError: album_loudness, album_peak = None, None for audio_filepath in audio_filepaths: try: loudness, peak = r128_data[audio_filepath] except KeyError: if album_loudness is None: # file was skipped continue else: loudness, peak = None, None try: tag(audio_filepath, loudness, peak, album_loudness=album_loudness, album_peak=album_peak, opus_output_gain=opus_output_gain, mtime_second_offset=mtime_second_offset) except Exception as e: logger().error( "Failed to tag file '%s': %s %s" % (audio_filepath, e.__class__.__qualname__, e)) error_count += 1 to_del_futures.add(done_future) for f in other_dir_futures: to_del_futures.add(f) for to_del_future in to_del_futures: del futures[to_del_future] executor.shutdown(True) return error_count
return val nworkers = 22 fileslice = slice(None) with concurrent.futures.ProcessPoolExecutor(max_workers=nworkers) as executor: futures = set() for dataset, info in datadef.items(): if options.dataset and options.dataset not in dataset: continue for k, v in samples.items(): if options.selection and options.selection not in k: continue for i in range(0, len(v)): if v[i] not in dataset: continue print(dataset) futures.update( executor.submit(analysis, k, options.year, dataset_xs[dataset], dataset, file) for file in info['files'][fileslice]) # for file in info['files'][fileslice]: # analysis(k, options.year, dataset_xs[dataset], dataset, file) if (len(futures) == 0): continue try: total = len(futures) processed = 0 while len(futures) > 0: finished = set(job for job in futures if job.done()) for job in finished: dataset, sumws, nentries, hout = job.result() nevents += nentries sumw += sumws for k in hout.keys():