def run(self) -> float: if self._complete.is_set(): raise StopIteration("This runner has already being used") if self._running: raise StopIteration("This runner is being executed") self._running = True if (self._threads != 1 or self._processes != 1) and self._optimize_workers: t = time.time() result = self._function(*next(self._raw_function_arguments)) time_spent = time.time() - t if self._check_function(result): self._success_function(result) if time_spent < self.__speed_reference: self._threads = 1 self._processes = 1 self._function_arguments = self._raw_function_arguments else: self._function_arguments = self._raw_function_arguments if self._threads == self._processes and self._threads == 1: self._function_arguments: collections.Iterable start = time.time() for args in self._function_arguments: output = self._function(*args) if self._check_function(output): self._success_function(output) return time.time() - start self._function_arguments = Queue(self._raw_function_arguments) if self._processes == 1 or self._threads == 1: if self._processes > self._threads: self._threads = self._processes self._blocking_success = True start = time.time() self._process_worker() return time.time() - start self._blocking_success = False self._success_sync_queue = multiprocessing.Queue() sync_thread = threading.Thread(target=self._sync_success, ) sync_thread.start() if any(platform in sys.platform for platform in ("win", "ios")) or self._processes_as_threads: process_pool = multiprocessing.pool.ThreadPool else: process_pool = multiprocessing.pool.Pool start = time.time() pool = process_pool(processes=self._processes) pool.imap_unordered(lambda f: f(), (self._process_worker for _ in range(self._processes)), chunksize=self._processes) pool.close() pool.join() pool.terminate() self._complete.set() self._function_arguments.stop() self._function_arguments.join() sync_thread.join() self._running = False return time.time() - start
def wrapper(iterable, *args, **kwargs): def starfunc(iterable): return func(iterable, *args, **kwargs) try: iter(iterable) except TypeError: return func(iterable, *args, **kwargs) if thread_count is None: current_thread_count = MAX_THREADS else: current_thread_count = set_threads(thread_count, set_global=False) with multiprocessing.pool.ThreadPool(current_thread_count) as pool: if return_results: results = [] for result in progress_callback( pool.imap(starfunc, iterable), total=len(iterable), include_progress_callback=include_progress_callback ): results.append(result) return results else: for result in progress_callback( pool.imap_unordered(starfunc, iterable), total=len(iterable), include_progress_callback=include_progress_callback ): pass
def _push(self, src, dst): """ Push src to dst on the remote. """ force = False if src.startswith('+'): src = src[1:] force = True present = [self._refs[name][1] for name in self._refs] present.extend(self._pushed.values()) # before updating the ref, write all objects that are referenced objects = git_list_objects(src, present) try: # upload objects in parallel pool = multiprocessing.pool.ThreadPool(processes=self._processes) res = pool.imap_unordered(Binder(self, '_put_object'), objects) # show progress total = len(objects) self._trace('', level=Level.INFO, exact=True) for done, _ in enumerate(res, 1): pct = float(done) / total message = '\rWriting objects: {:4.0%} ({}/{})'.format(pct, done, total) if done == total: message = '%s, done.\n' % message self._trace(message, level=Level.INFO, exact=True) except Exception: self._fatal('exception while writing objects') sha = git_ref_value(src) error = self._write_ref(sha, dst, force) if error is None: self._write('ok %s' % dst) self._pushed[dst] = sha else: self._write('error %s %s' % (dst, error))
def get_started_finished(gcs_client, db, todo): """Download started/finished.json from build dirs in todo.""" ack_ids = [] build_dirs = [] pool = multiprocessing.pool.ThreadPool(16) try: for ack_id, (build_dir, started, finished) in pool.imap_unordered( lambda ack_id_job_build: (ack_id_job_build[0], gcs_client.get_started_finished( ack_id_job_build[1], ack_id_job_build[2])), todo): if finished: if not db.insert_build(build_dir, started, finished): print('build dir already present in db: ', build_dir) start = time.localtime(started.get('timestamp', 0) if started else 0) print((build_dir, bool(started), bool(finished), time.strftime('%F %T %Z', start), finished and finished.get('result'))) build_dirs.append(build_dir) ack_ids.append(ack_id) else: print('finished.json missing?', build_dir, started, finished) finally: pool.close() db.commit() return ack_ids, build_dirs
def update_in_parallel(servers, options): """Update a group of servers in parallel. Exit the process with error if any server failed to be updated and options.cont is not set. @param servers: A list of tuple of (server_name, server_status, roles). @param options: Options for the push. """ args = [] for server, status, _ in servers: args.append({'server': server, 'status': status, 'options': options}) # The update actions run in parallel. If any update failed, we should wait # for other running updates being finished. Abort in the middle of an update # may leave the server in a bad state. pool = multiprocessing.pool.ThreadPool(POOL_SIZE) failed_servers = [] results = pool.imap_unordered(update_server, args) for server, success, output in results: if options.dryrun: print('Dry run, updating server %s is skipped.' % server) elif success: print('Successfully updated server %s.' % server) if options.verbose: print(output) print() else: error = ('Failed to update server %s.\nError: %s' % (server, output)) print(error) failed_servers.append(server) if failed_servers and not options.cont: print('Error! Failed to update following servers: %s' % failed_servers) sys.exit(1)
def create_indexed_database(direct_normal_csv, diffuse_csv): import csv import multiprocessing.pool storage = ABES.IndexedStorage(ABES.dbm_file, 'n') dirnorm = dict(extract_csv_data(direct_normal_csv)) diffuse = dict(extract_csv_data(diffuse_csv)) assert (len(dirnorm) == len(diffuse)) pool = multiprocessing.pool.Pool() read_data = ((key, dirn, diffuse[key]) for key, dirn in dirnorm.items()) i = 1 for key, pdir, pdif in pool.imap_unordered(convert_to_power, read_data): storage[key] = (pdir, pdif) print(i, '/', len(dirnorm)) print(key, ABES.key2coords(key)) print(pdir) print(pdif) print('\n') i += 1
def main(): if not os.path.exists('sigs'): os.mkdir('sigs') elif not os.path.isdir('sigs'): print('Please delete "sigs" before starting') sys.exit(1) tasks = [] distr = 'ubuntu' # for version in os.listdir(distr): for version in ['bionic']: version = os.path.join(distr, version) for arch in os.listdir(version): arch = os.path.join(version, arch) for package in os.listdir(arch): package = os.path.join(arch, package) tasks.append(package) # we are going to do some heirarchical multiprocessing because there is a very high pickle message-passing overhead # so a lot of cpu time gets burned pickling in the main process simply passing work to worker processes import subprocess import multiprocessing.pool pool = multiprocessing.pool.ThreadPool(cpu_factor) def do_package_in_worker(package): subprocess.call(['python3', __file__, '-c', package]) for _ in pool.imap_unordered(do_package_in_worker, tasks): pass
def download_junit(db, threads, client_class): """Download junit results for builds without them.""" builds_to_grab = db.get_builds_missing_junit() pool = None if threads > 1: pool = multiprocessing.pool.ThreadPool( threads, mp_init_worker, ('', {}, client_class, False)) test_iterator = pool.imap_unordered( get_junits, builds_to_grab) else: global WORKER_CLIENT # pylint: disable=global-statement WORKER_CLIENT = client_class('', {}) test_iterator = ( get_junits(build_path) for build_path in builds_to_grab) for n, (build_id, build_path, junits) in enumerate(test_iterator, 1): print('%d/%d' % (n, len(builds_to_grab)), build_path, len(junits), len(''.join(junits.values()))) junits = {k: remove_system_out(v) for k, v in junits.iteritems()} db.insert_build_junits(build_id, junits) if n % 100 == 0: db.commit() db.commit() if pool: pool.close() pool.join()
def validate_states( make_env: Callable[[], CompilerEnv], states: Iterable[CompilerEnvState], datasets: Optional[List[str]] = None, nproc: Optional[int] = None, ) -> Iterable[ValidationResult]: """A parallelized implementation of :func:`validate_state() <compiler_gym.validate_state>` for batched validation. :param make_env: A callback which instantiates a compiler environment. :param states: A sequence of compiler environment states to validate. :param datasets: An optional list of datasets that are required. :param nproc: The number of parallel worker processes to run. :return: An iterator over validation results. The order of results may differ from the input states. """ env = make_env() try: if not isinstance(env, LlvmEnv): raise ValueError( "Only LLVM environment is supported for validation.") # Ensure that the required datasets are available. env.require_datasets(datasets) reward_space_name: str = env.reward_space.id if env.reward_space else None finally: env.close() with multiprocessing.Pool(processes=nproc) as pool: yield from pool.imap_unordered(_validate_states_worker, [(reward_space_name, r) for r in states])
def _find(self, query, n_threads=None, progress=None): p_query = query.prepare(self._nlp) if len(p_query) == 0: return [] find_in_doc = functools.partial(self._find_in_doc, c_query=p_query.compiled) docs = self.session.c_documents total = sum([x.n_tokens for x in docs]) done = 0 if n_threads is None: n_threads = min(len(docs), self._max_threads) results = None with multiprocessing.pool.ThreadPool(processes=n_threads) as pool: for doc, r in pool.imap_unordered(find_in_doc, docs): if results is None: results = r else: results.extend(r) done += doc.n_tokens if progress: progress(done / total) return [CoreMatch(self, p_query, m) for m in results.best_n(-1)]
def iter_bucket(bucket_name, prefix='', accept_key=lambda key: True, key_limit=None, workers=16, retries=3): """ Iterate and download all S3 files under `bucket/prefix`, yielding out `(key, key content)` 2-tuples (generator). `accept_key` is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded out or not (default: accept all keys). If `key_limit` is given, stop after yielding out that many results. The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus _MULTIPROCESSING is False, this parameter will be ignored. Example:: >>> # get all JSON files under "mybucket/foo/" >>> for key, content in iter_bucket(bucket_name, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in iter_bucket(bucket_name, key_limit=10000, workers=32): ... print key, len(content) """ # # If people insist on giving us bucket instances, silently extract the name # before moving on. Works for boto3 as well as boto. # try: bucket_name = bucket_name.name except AttributeError: pass total_size, key_no = 0, -1 key_iterator = _list_bucket(bucket_name, prefix=prefix, accept_key=accept_key) download_key = functools.partial(_download_key, bucket_name=bucket_name, retries=retries) with _create_process_pool(processes=workers) as pool: result_iterator = pool.imap_unordered(download_key, key_iterator) for key_no, (key, content) in enumerate(result_iterator): if True or key_no % 1000 == 0: logger.info("yielding key #%i: %s, size %i (total %.1fMB)", key_no, key, len(content), total_size / 1024.0**2) yield key, content total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def download_junit(db, threads, client_class): """Download junit results for builds without them.""" builds_to_grab = db.get_builds_missing_junit() pool = None if threads > 1: pool = multiprocessing.pool.ThreadPool(threads, mp_init_worker, ('', {}, client_class, False)) test_iterator = pool.imap_unordered(get_junits, builds_to_grab) else: global WORKER_CLIENT # pylint: disable=global-statement WORKER_CLIENT = client_class('', {}) test_iterator = (get_junits(build_path) for build_path in builds_to_grab) for n, (build_id, build_path, junits) in enumerate(test_iterator, 1): print('%d/%d' % (n, len(builds_to_grab)), build_path, len(junits), len(''.join(junits.values()))) junits = {k: remove_system_out(v) for k, v in junits.iteritems()} db.insert_build_junits(build_id, junits) if n % 100 == 0: db.commit() db.commit() if pool: pool.close() pool.join()
def main(): # scraped_count = ScrapedPage.select(fn.Count(ScrapedPage.id)).where(ScrapedPage.batch == 2) df_websites = pd.read_excel( 'data/7_opensources_co/websites_with_results.xlsx') domains = [u for u in df_websites.url.values] domain_type = {} websites_url = df_websites.url.values websites_type = df_websites.type.values for i, url in enumerate(websites_url): domain_type[url] = websites_type[i] urls_domains_not_found = [] with open( 'data/7_opensources_co/news_cleaned_postgres_missing_domains.csv', 'w') as out_missing_domains: with tqdm() as progress: print('Cleaning') pages_parsed = [] with multiprocessing.pool.Pool( processes=multiprocessing.cpu_count()) as pool: # for page in pool.imap_unordered(parse_article, fetch_pages(last_id, batch_size), chunksize=100): for page in pool.imap_unordered(parse_article, fetch_pages_jsonl(), chunksize=100): if page is None: continue domain = None for d in domains: if d in page['url']: domain = d if domain is None: urls_domains_not_found.append(page['url']) out_missing_domains.write(page['url'] + '\n') domain = urlsplit(page['url']).netloc page['domain'] = domain page['type'] = domain_type[page['domain']] if page[ 'domain'] in domain_type else None pages_parsed.append(page) progress.update() if len(pages_parsed) > 1000: print('Inserting cleaned articles to DB') with peewee_database.atomic(): Page.insert_many(pages_parsed).execute() pages_parsed = [] print('Inserting cleaned articles to DB') with peewee_database.atomic(): Page.insert_many(pages_parsed).execute() print('Urls without our domains?!:', len(urls_domains_not_found))
def generate_items(keys: Iterable, factory: Callable[[Any], tuple], method: Callable) -> Iterator: """Generate (key, method(*factory(key))) tuples for each key. The first element returned by factory is an instance of the class to which method is attached. If a process pool has been initialized, use multiprocessing; otherwise, use serial processing. """ if pool is None: return (generate_items_worker((k, factory(k), method)) for k in keys) return pool.imap_unordered(generate_items_worker, ((k, factory(k), method) for k in keys))
def s3_iter_bucket(bucket, prefix="", accept_key=lambda key: True, key_limit=None, workers=16): """ Iterate and download all S3 files under `bucket/prefix`, yielding out `(key, key content)` 2-tuples (generator). `accept_key` is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded out or not (default: accept all keys). If `key_limit` is given, stop after yielding out that many results. The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus NO_MULTIPROCESSING is True, this parameter will be ignored. Example:: >>> mybucket = boto.connect_s3().get_bucket('mybucket') >>> # get all JSON files under "mybucket/foo/" >>> for key, content in s3_iter_bucket(mybucket, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in s3_iter_bucket(mybucket, key_limit=10000, workers=32): ... print key, len(content) """ total_size, key_no = 0, -1 keys = (key for key in bucket.list(prefix=prefix) if accept_key(key.name)) if NO_MULTIPROCESSING: logger.info("iterating over keys from %s without multiprocessing" % bucket) iterator = imap(s3_iter_bucket_process_key, keys) else: logger.info("iterating over keys from %s with %i workers" % (bucket, workers)) pool = multiprocessing.pool.Pool(processes=workers) iterator = pool.imap_unordered(s3_iter_bucket_process_key, keys) for key_no, (key, content) in enumerate(iterator): if key_no % 1000 == 0: logger.info( "yielding key #%i: %s, size %i (total %.1fMB)" % (key_no, key, len(content), total_size / 1024.0 ** 2) ) yield key, content key.close() total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break if not NO_MULTIPROCESSING: pool.terminate() logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def progresspmap(pool: mp.pool.Pool, func: Callable[[X], Y], lst: List[X]) -> List[Y]: """ a parallel map with a progressbar. """ i, maxi, result = 0, len(lst), [] for item in pool.imap_unordered(func, lst): result.append(item) i += 1 progress(i, maxi) return result
def calculate_scores(final_dict, emodel_dirs, scores_db_filename, use_ipyp=False, ipyp_profile=None): """Calculate scores of e-model morphology combinations and update the database accordingly. Args: scores_db_filename: path to .sqlite database with e-model morphology combinations final_dict: a dict mapping e-models to dicts with e-model parameters emodel_dirs: a dict mapping e-models to the directories with e-model input files use_ipyp: bool indicating whether ipyparallel is used. Default is False. ipyp_profile: path to ipyparallel profile. Default is None. """ print('Creating argument list for parallelisation') arg_list = create_arg_list(scores_db_filename, emodel_dirs, final_dict) print('Parallelising score evaluation of %d me-combos' % len(arg_list)) if use_ipyp: # use ipyparallel client = ipyparallel.Client(profile=ipyp_profile) lview = client.load_balanced_view() results = lview.imap(run_emodel_morph_isolated, arg_list, ordered=False) else: # use multiprocessing pool = NestedPool() results = pool.imap_unordered(run_emodel_morph_isolated, arg_list) # keep track of the number of received results uids_received = 0 # every time a result comes in, save the score in the database for result in results: uid = result['uid'] scores = result['scores'] extra_values = result['extra_values'] exception = result['exception'] uids_received += 1 save_scores(scores_db_filename, uid, scores, extra_values, exception) print('Saved scores for uid %s (%d out of %d) %s' % (uid, uids_received, len(arg_list), 'with exception' if exception else '')) sys.stdout.flush() print('Converting score json strings to scores values ...') expand_scores_to_score_values_table(scores_db_filename)
def s3_iter_bucket(bucket, prefix='', accept_key=lambda key: True, key_limit=None, workers=16, retries=3): """ Iterate and download all S3 files under `bucket/prefix`, yielding out `(key, key content)` 2-tuples (generator). `accept_key` is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded out or not (default: accept all keys). If `key_limit` is given, stop after yielding out that many results. The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus MULTIPROCESSING is False, this parameter will be ignored. Example:: >>> mybucket = boto.connect_s3().get_bucket('mybucket') >>> # get all JSON files under "mybucket/foo/" >>> for key, content in s3_iter_bucket(mybucket, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in s3_iter_bucket(mybucket, key_limit=10000, workers=32): ... print key, len(content) """ total_size, key_no = 0, -1 keys = ({'key': key, 'retries': retries} for key in bucket.list(prefix=prefix) if accept_key(key.name)) if MULTIPROCESSING: logger.info("iterating over keys from %s with %i workers" % (bucket, workers)) pool = multiprocessing.pool.Pool(processes=workers) iterator = pool.imap_unordered(s3_iter_bucket_process_key_with_kwargs, keys) else: logger.info("iterating over keys from %s without multiprocessing" % bucket) iterator = imap(s3_iter_bucket_process_key_with_kwargs, keys) for key_no, (key, content) in enumerate(iterator): if key_no % 1000 == 0: logger.info("yielding key #%i: %s, size %i (total %.1fMB)" % (key_no, key, len(content), total_size / 1024.0 ** 2)) yield key, content key.close() total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break if MULTIPROCESSING: pool.terminate() logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def main(): import os if not os.path.isdir(const_path): os.mkdir(const_path) equipes = range(3, 11) pool = multiprocessing.pool.Pool() resultat = pool.imap_unordered(compute_lowest_duration, equipes) for ne, nj, txt in resultat: print(ne, "équipes :", nj, "jours", txt)
def eval_genomes_par(genomes, config): pool = Pool(4) genomes_list = [genome for ignored_genome_id, genome in genomes] for id, fitness in pool.imap_unordered( partial(eval_genomes, config=config, genomes_list=genomes_list), range(len(genomes_list))): genomes_list[id].fitness = fitness pool.close() # should this be terminate? pool.join() for id, g in enumerate(genomes_list): print(id, g.fitness)
def iter_bucket(bucket_name, prefix='', accept_key=lambda key: True, key_limit=None, workers=16, retries=3): """ Iterate and download all S3 files under `bucket/prefix`, yielding out `(key, key content)` 2-tuples (generator). `accept_key` is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded out or not (default: accept all keys). If `key_limit` is given, stop after yielding out that many results. The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus _MULTIPROCESSING is False, this parameter will be ignored. Example:: >>> # get all JSON files under "mybucket/foo/" >>> for key, content in iter_bucket(bucket_name, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in iter_bucket(bucket_name, key_limit=10000, workers=32): ... print key, len(content) """ # # If people insist on giving us bucket instances, silently extract the name # before moving on. Works for boto3 as well as boto. # try: bucket_name = bucket_name.name except AttributeError: pass total_size, key_no = 0, -1 key_iterator = _list_bucket(bucket_name, prefix=prefix, accept_key=accept_key) download_key = functools.partial(_download_key, bucket_name=bucket_name, retries=retries) with _create_process_pool(processes=workers) as pool: result_iterator = pool.imap_unordered(download_key, key_iterator) for key_no, (key, content) in enumerate(result_iterator): if True or key_no % 1000 == 0: logger.info( "yielding key #%i: %s, size %i (total %.1fMB)", key_no, key, len(content), total_size / 1024.0 ** 2 ) yield key, content total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def parfor( f: Callable[[param_t], return_t], arg_list: Sequence[param_t], *, callback: Optional[ Callable[[Union[return_t, Captured_Exception[param_t, return_t]]], None] ] = None, print_time: bool = False, task_name: Optional[str] = None, ) -> List[Union[return_t, Captured_Exception[param_t, return_t]]]: def timedelta2str(T: datetime.timedelta) -> str: s = str(T) return s[: s.rfind(".")] if pool is None: launch_parpool() assert pool is not None helper_arg_list = ((idx, f, arg) for idx, arg in enumerate(arg_list)) result_dict: Dict[int, Union[return_t, Captured_Exception[param_t, return_t]]] = {} num_total = len(arg_list) num_finished = 0 time_start = datetime.datetime.now() helper = cast( Callable[ [Tuple[int, Callable[[param_t], return_t], param_t]], Tuple[int, Union[return_t, Captured_Exception[param_t, return_t]]], ], parfor_helper, ) for idx, result in pool.imap_unordered(helper, helper_arg_list): num_finished += 1 time_now = datetime.datetime.now() time_elapsed = time_now - time_start time_need = ((num_total - num_finished) / num_finished) * time_elapsed if isinstance(result, Captured_Exception): print_without_line_feed("[{}]: {}\n".format(idx, result)) if print_time: assert task_name is not None print_without_line_feed( "{}\n\t已完成{}/{}, {:05.2f}%, 已用{}, 预计还需{}, 结束时间{:%Y-%m-%d %H:%M:%S}\n".format( # noqa: E501 task_name, num_finished, num_total, 100 * num_finished / num_total, timedelta2str(time_elapsed), timedelta2str(time_need), time_now + time_need, ) ) if callback is not None: callback(result) result_dict[idx] = result result_list = [result_dict[idx] for idx in range(len(arg_list))] return result_list
def _map_multithread(func, iterable, chunksize=1): # type: (Callable[[S], T], Iterable[S], int) -> Iterator[T] """Chop iterable into chunks and submit them to a thread pool. For very long iterables using a large value for chunksize can make the job complete much faster than using the default value of 1. Return an unordered iterator of the results. """ with closing(ThreadPool(DEFAULT_POOLSIZE)) as pool: return pool.imap_unordered(func, iterable, chunksize)
def _map_multiprocess(func: Callable[[S], T], iterable: Iterable[S], chunksize: int = 1) -> Iterator[T]: """Chop iterable into chunks and submit them to a process pool. For very long iterables using a large value for chunksize can make the job complete much faster than using the default value of 1. Return an unordered iterator of the results. """ with closing(ProcessPool()) as pool: return pool.imap_unordered(func, iterable, chunksize)
def loader(df): if not os.path.exists(OUT_DIR): os.mkdir(OUT_DIR) key_url_list = parse_data(df) pool = multiprocessing.pool.ThreadPool(processes=NUM_WORKERS) failures = sum(pool.imap_unordered(Downloader(), key_url_list)) print('Total number of download failures: %s out of %s' % (failures, len(key_url_list))) pool.close() pool.terminate()
def MapNativeProcessingBinaries( binaries: typing.List[str], input_protos: typing.List[pbutil.ProtocolBuffer], output_proto_classes: typing.List[typing.Type], pool: typing.Optional[multiprocessing.Pool] = None, num_processes: typing.Optional[int] = None, ) -> typing.Iterator[_MapWorker]: """Run a protocol buffer processing binary over a set of inputs. Args: binary_data_path: The path of the binary to execute, as provied to bazelutil.DataPath(). input_protos: An iterable list of input protos. output_proto_class: The proto class of the output. binary_args: An optional list of additional arguments to pass to binaries. pool: The multiprocessing pool to use. num_processes: The number of processes for the multiprocessing pool. Returns: A generator of _MapWorker instances. The order is random. """ if not len(binaries) == len(input_protos): raise ValueError("Number of binaries does not equal protos") cmds = [[bazelutil.DataPath(b)] for b in binaries] # Read all inputs to a list. We need the inputs in a list so that we can # map an inputs position in the list to a _MapWorker.id. input_protos = list(input_protos) output_proto_classes = list(output_proto_classes) # Create the multiprocessing pool to use, if not provided. pool = pool or multiprocessing.Pool(processes=num_processes) map_worker_iterator = (_MapWorker( id, cmd, input_proto, ) for id, ( cmd, input_proto, ) in enumerate(zip(cmds, input_protos))) for map_worker in pool.imap_unordered( _RunNativeProtoProcessingWorker, map_worker_iterator, ): map_worker.SetProtos( input_protos[map_worker.id], output_proto_classes[map_worker.id], ) yield map_worker
def parallel_perft(pool: multiprocessing.pool.Pool, depth: int, board: chess.Board) -> int: if depth == 1: return board.legal_moves.count() elif depth > 1: def successors(board: chess.Board) -> Iterator[chess.Board]: for move in board.legal_moves: board_after = board.copy(stack=False) board_after.push(move) yield board_after return sum(pool.imap_unordered(functools.partial(perft, depth - 1), successors(board))) else: return 1
def peek_total_size(urls: Iterator[str]) -> Tuple[int, int]: with multiprocessing.pool.ThreadPool(processes=16) as pool: it = pool.imap_unordered(peek_content_length, urls) pool.close() pool.join() total_size = 0 unknown_files = 0 for size in it: if size is None: unknown_files += 1 else: total_size += size return total_size, unknown_files
def AnalyzeAllSongs(music_directory): song_list = [] file_list = os.listdir(music_directory) for file in file_list: if file.endswith(".mp3"): song_name = file[0 : len(file) - 4] song_list.append(song_name) if __name__ == '__main__': num_processes = 8 pool = MyPool(num_processes) song_list = pool.imap_unordered(AnalyzeSong, song_list) return song_list
def get_all_builds(db, jobs_dir, metadata, threads, client_class, build_limit): """ Adds information about tests to a dictionary. Args: jobs_dir: the GCS path containing jobs. metadata: a dict of metadata about the jobs_dir. threads: how many threads to use to download build information. client_class: a constructor for a GCSClient (or a subclass). """ gcs = client_class(jobs_dir, metadata) print(f'Loading builds from {jobs_dir}') sys.stdout.flush() builds_have = db.get_existing_builds(jobs_dir) print(f'already have {len(builds_have)} builds') sys.stdout.flush() jobs_and_builds = gcs.get_builds(builds_have, build_limit) pool = None if threads > 1: pool = multiprocessing.Pool(threads, mp_init_worker, (jobs_dir, metadata, client_class)) builds_iterator = pool.imap_unordered(get_started_finished, jobs_and_builds) else: global WORKER_CLIENT # pylint: disable=global-statement WORKER_CLIENT = gcs builds_iterator = (get_started_finished(job_build) for job_build in jobs_and_builds) try: for n, (build_dir, started, finished) in enumerate(builds_iterator): if not build_dir: continue # skip builds that raised exceptions print(f'inserting build: {build_dir}') if started or finished: db.insert_build(build_dir, started, finished) if n % 200 == 0: db.commit() except KeyboardInterrupt: if pool: pool.terminate() raise else: if pool: pool.close() pool.join() db.commit()
def main(): pool = multiprocessing.pool.ThreadPool(args.parallelism) bots = [] for id, botname in enumerate(args.players): bots.append((id, util.load_player(botname))) wins = [0] * len(bots) games = list(itertools.combinations(bots, 2)) random.shuffle(games) matches = len(games)*args.matches*len(args.planets) rounds = matches * args.rounds log("{} Bots, {} Maps, {} Games, {} Matches, {} Rounds, 1 victor".format(len(bots), len(args.planets), len(games), matches, rounds)) scores = lambda: sorted(zip(wins, args.players), key=lambda x: x[0], reverse=True) try: i = 0 for ret in pool.imap_unordered(execute, gen_rounds(games)): i += 1 (gid, mid, rid), winner, (pid1, pid2), (map_size, seed) = ret if winner is None: result = "DRAW" else: result = args.players[winner] wins[winner] += 1 log("({}:{}:{} | {}:{} | {}:{}): {}".format(gid, mid, rid, map_size, seed, pid1, pid2, result), lvl=2) if i % NOTIFY_AMOUNT == 0: log("Finished {}/{} rounds ({:.2f})%. Current top 3: {}".format(i, rounds, (float(i) / rounds * 100), scores()[:3])) except KeyboardInterrupt: log("Tournament interrupted by user", type="FAIL") pool.terminate() pool.join() sys.exit(1) pool.close() pool.join() log("All games finished", type="SUCCESS") for i, (wins, bot) in enumerate(scores()): log("{:3}. {:20} ({})".format(i, bot, wins))
def computeSamples(self, work, report_interval=100): '''compute samples according to work. returns a list of results. ''' n = len(work) E.debug('sampling will work on %i items' % n) results = [] if self.num_threads == 0: for i, w in enumerate(work): r = computeSample( (w, self.samples_outfile, self.outfile_sample_metrics, None)) if i % report_interval == 0: E.info("%i/%i done (%5.2f)" % (i, n, 100.0 * i / n)) results.append(r) else: E.info("generating processpool with %i threads for %i items" % (self.num_threads, len(work))) manager = multiprocessing.Manager() lock = manager.Lock() pool = multiprocessing.Pool(self.num_threads) # use file names - not files when multiprocessing samples_outfile, metrics_outfile = None, None if self.samples_outfile: samples_outfile = self.samples_outfile.name self.samples_outfile.flush() if self.outfile_sample_metrics: metrics_outfile = self.outfile_sample_metrics.name self.outfile_sample_metrics.flush() ww = [(w, samples_outfile, metrics_outfile, lock) for w in work] for i, r in enumerate(pool.imap_unordered(computeSample, ww)): if i % report_interval == 0: E.info("%i/%i done (%5.2f)" % (i, n, 100.0 * i / n)) results.append(r) pool.close() pool.join() return results
def main(): pool = multiprocessing.Pool(processes=args.parallelism) bots = [] for id, botname in enumerate(args.players): bots.append(util.load_player(botname)) matches = len(bots) * args.matches * len(args.planets) log("Training against {} Bots, {} Maps, {} Matches".format( len(bots), len(args.planets), matches)) data, target = [], [] try: i = 0 for ret in pool.imap_unordered(execute, gen_rounds(bots)): i += 1 (bid, mid), winner, state_vectors, (map_size, seed) = ret if winner == 1: result = 'won' elif winner == 2: result = 'lost' else: result = 'draw' data += state_vectors target += [result] * len(state_vectors) log("({}:{} | {}:{}): {}".format(bid, mid, map_size, seed, result), lvl=1) if i % NOTIFY_AMOUNT == 0: log("Finished {}/{} matches ({:.2f})%.".format( i, matches, (float(i) / matches * 100))) except KeyboardInterrupt: log("Tournament interrupted by user", type="FAIL") pool.terminate() pool.join() sys.exit(1) pool.close() pool.join() log("All games finished", type="SUCCESS") generate_model(data, target)
def get_builds(db, jobs_dir, metadata, threads, client_class): """ Adds information about tests to a dictionary. Args: jobs_dir: the GCS path containing jobs. metadata: a dict of metadata about the jobs_dir. threads: how many threads to use to download build information. client_class: a constructor for a GCSClient (or a subclass). """ gcs = client_class(jobs_dir, metadata) print('Loading builds from %s' % jobs_dir) sys.stdout.flush() builds_have = db.get_existing_builds(jobs_dir) print('already have %d builds' % len(builds_have)) sys.stdout.flush() jobs_and_builds = gcs.get_builds(builds_have) pool = None if threads > 1: pool = multiprocessing.Pool(threads, mp_init_worker, (jobs_dir, metadata, client_class)) builds_iterator = pool.imap_unordered( get_started_finished, jobs_and_builds) else: global WORKER_CLIENT # pylint: disable=global-statement WORKER_CLIENT = gcs builds_iterator = ( get_started_finished(job_build) for job_build in jobs_and_builds) try: for n, (build_dir, started, finished) in enumerate(builds_iterator): print(build_dir) if started or finished: db.insert_build(build_dir, started, finished) if n % 200 == 0: db.commit() except KeyboardInterrupt: if pool: pool.terminate() raise else: if pool: pool.close() pool.join() db.commit()
def get_started_finished(gcs_client, db, todo): """Download started/finished.json from build dirs in todo.""" acks = [] build_dirs = [] pool = multiprocessing.pool.ThreadPool(16) try: for ack_id, (build_dir, started, finished) in pool.imap_unordered( lambda (ack_id, job, build): (ack_id, gcs_client.get_started_finished(job, build)), todo): if finished: if not db.insert_build(build_dir, started, finished): print('already present??') start = time.localtime(started.get('timestamp', 0) if started else 0) print(build_dir, bool(started), bool(finished), time.strftime('%F %T %Z', start), finished and finished.get('result')) build_dirs.append(build_dir) acks.append(ack_id) else: print('finished.json missing?', build_dir, started, finished) finally: pool.close() db.commit() return acks, build_dirs
def run_iptestall(options): """Run the entire IPython test suite by calling nose and trial. This function constructs :class:`IPTester` instances for all IPython modules and package and then runs each of them. This causes the modules and packages of IPython to be tested each in their own subprocess using nose. Parameters ---------- All parameters are passed as attributes of the options object. testgroups : list of str Run only these sections of the test suite. If empty, run all the available sections. fast : int or None Run the test suite in parallel, using n simultaneous processes. If None is passed, one process is used per CPU core. Default 1 (i.e. sequential) inc_slow : bool Include slow tests, like IPython.parallel. By default, these tests aren't run. xunit : bool Produce Xunit XML output. This is written to multiple foo.xunit.xml files. coverage : bool or str Measure code coverage from tests. True will store the raw coverage data, or pass 'html' or 'xml' to get reports. """ if options.fast != 1: # If running in parallel, capture output so it doesn't get interleaved TestController.buffer_output = True if options.testgroups: to_run = [PyTestController(name) for name in options.testgroups] not_run = [] else: to_run, not_run = prepare_py_test_controllers(inc_slow=options.all) configure_controllers(to_run, xunit=options.xunit, coverage=options.coverage) def justify(ltext, rtext, width=70, fill="-"): ltext += " " rtext = (" " + rtext).rjust(width - len(ltext), fill) return ltext + rtext # Run all test runners, tracking execution time failed = [] t_start = time.time() print() if options.fast == 1: # This actually means sequential, i.e. with 1 job for controller in to_run: print("IPython test group:", controller.section) controller, res = do_run(controller) if res: failed.append(controller) if res == -signal.SIGINT: print("Interrupted") break print() else: # Run tests concurrently try: pool = multiprocessing.pool.ThreadPool(options.fast) for (controller, res) in pool.imap_unordered(do_run, to_run): res_string = "OK" if res == 0 else "FAILED" print(justify("IPython test group: " + controller.section, res_string)) if res: print(bytes_to_str(controller.stdout)) failed.append(controller) if res == -signal.SIGINT: print("Interrupted") break except KeyboardInterrupt: return for controller in not_run: print(justify("IPython test group: " + controller.section, "NOT RUN")) t_end = time.time() t_tests = t_end - t_start nrunners = len(to_run) nfail = len(failed) # summarize results print("_" * 70) print("Test suite completed for system with the following information:") print(report()) print("Ran %s test groups in %.3fs" % (nrunners, t_tests)) print() print("Status: ", end="") if not failed: print("OK") else: # If anything went wrong, point out what command to rerun manually to # see the actual errors and individual summary failed_sections = [c.section for c in failed] print("ERROR - {} out of {} test groups failed ({}).".format(nfail, nrunners, ", ".join(failed_sections))) print() print("You may wish to rerun these, with:") print(" iptest", *failed_sections) print() if options.coverage: from coverage import coverage cov = coverage(data_file=".coverage") cov.combine() cov.save() # Coverage HTML report if options.coverage == "html": html_dir = "ipy_htmlcov" shutil.rmtree(html_dir, ignore_errors=True) print("Writing HTML coverage report to %s/ ... " % html_dir, end="") sys.stdout.flush() # Custom HTML reporter to clean up module names. from coverage.html import HtmlReporter class CustomHtmlReporter(HtmlReporter): def find_code_units(self, morfs): super(CustomHtmlReporter, self).find_code_units(morfs) for cu in self.code_units: nameparts = cu.name.split(os.sep) if "IPython" not in nameparts: continue ix = nameparts.index("IPython") cu.name = ".".join(nameparts[ix:]) # Reimplement the html_report method with our custom reporter cov._harvest_data() cov.config.from_args(omit="*%stests" % os.sep, html_dir=html_dir, html_title="IPython test coverage") reporter = CustomHtmlReporter(cov, cov.config) reporter.report(None) print("done.") # Coverage XML report elif options.coverage == "xml": cov.xml_report(outfile="ipy_coverage.xml") if failed: # Ensure that our exit code indicates failure sys.exit(1)
def run_iptestall(options): """Run the entire IPython test suite by calling nose and trial. This function constructs :class:`IPTester` instances for all IPython modules and package and then runs each of them. This causes the modules and packages of IPython to be tested each in their own subprocess using nose. Parameters ---------- All parameters are passed as attributes of the options object. testgroups : list of str Run only these sections of the test suite. If empty, run all the available sections. fast : int or None Run the test suite in parallel, using n simultaneous processes. If None is passed, one process is used per CPU core. Default 1 (i.e. sequential) inc_slow : bool Include slow tests, like IPython.parallel. By default, these tests aren't run. xunit : bool Produce Xunit XML output. This is written to multiple foo.xunit.xml files. coverage : bool or str Measure code coverage from tests. True will store the raw coverage data, or pass 'html' or 'xml' to get reports. extra_args : list Extra arguments to pass to the test subprocesses, e.g. '-v' """ if options.fast != 1: # If running in parallel, capture output so it doesn't get interleaved TestController.buffer_output = True to_run, not_run = prepare_controllers(options) def justify(ltext, rtext, width=70, fill='-'): ltext += ' ' rtext = (' ' + rtext).rjust(width - len(ltext), fill) return ltext + rtext # Run all test runners, tracking execution time failed = [] t_start = time.time() print() if options.fast == 1: # This actually means sequential, i.e. with 1 job for controller in to_run: print('IPython test group:', controller.section) sys.stdout.flush() # Show in correct order when output is piped controller, res = do_run(controller) if res: failed.append(controller) if res == -signal.SIGINT: print("Interrupted") break print() else: # Run tests concurrently try: pool = multiprocessing.pool.ThreadPool(options.fast) for (controller, res) in pool.imap_unordered(do_run, to_run): res_string = 'OK' if res == 0 else 'FAILED' print(justify('IPython test group: ' + controller.section, res_string)) if res: print(bytes_to_str(controller.stdout)) failed.append(controller) if res == -signal.SIGINT: print("Interrupted") break except KeyboardInterrupt: return for controller in not_run: print(justify('IPython test group: ' + controller.section, 'NOT RUN')) t_end = time.time() t_tests = t_end - t_start nrunners = len(to_run) nfail = len(failed) # summarize results print('_'*70) print('Test suite completed for system with the following information:') print(report()) took = "Took %.3fs." % t_tests print('Status: ', end='') if not failed: print('OK (%d test groups).' % nrunners, took) else: # If anything went wrong, point out what command to rerun manually to # see the actual errors and individual summary failed_sections = [c.section for c in failed] print('ERROR - {} out of {} test groups failed ({}).'.format(nfail, nrunners, ', '.join(failed_sections)), took) print() print('You may wish to rerun these, with:') print(' iptest', *failed_sections) print() if options.coverage: from coverage import coverage cov = coverage(data_file='.coverage') cov.combine() cov.save() # Coverage HTML report if options.coverage == 'html': html_dir = 'ipy_htmlcov' shutil.rmtree(html_dir, ignore_errors=True) print("Writing HTML coverage report to %s/ ... " % html_dir, end="") sys.stdout.flush() # Custom HTML reporter to clean up module names. from coverage.html import HtmlReporter class CustomHtmlReporter(HtmlReporter): def find_code_units(self, morfs): super(CustomHtmlReporter, self).find_code_units(morfs) for cu in self.code_units: nameparts = cu.name.split(os.sep) if 'IPython' not in nameparts: continue ix = nameparts.index('IPython') cu.name = '.'.join(nameparts[ix:]) # Reimplement the html_report method with our custom reporter cov._harvest_data() cov.config.from_args(omit='*{0}tests{0}*'.format(os.sep), html_dir=html_dir, html_title='IPython test coverage', ) reporter = CustomHtmlReporter(cov, cov.config) reporter.report(None) print('done.') # Coverage XML report elif options.coverage == 'xml': cov.xml_report(outfile='ipy_coverage.xml') if failed: # Ensure that our exit code indicates failure sys.exit(1)
def generate_script(all_patches): """Resolve dependencies, and afterwards check if everything applies properly.""" depends = sorted([i for i, patch in all_patches.iteritems() if not patch.disabled]) resolved = resolve_dependencies(all_patches, depends=depends) max_patches = max(resolved) + 1 # Generate timestamps based on dependencies, still required for binary patches # Find out which files are modified by multiple patches modified_files = {} for i, patch in [(i, all_patches[i]) for i in resolved]: patch.verify_time = [0]*max_patches patch.verify_time[i] += 1 for j in patch.depends: patch.verify_time = causal_time_combine(patch.verify_time, all_patches[j].verify_time) for f in patch.modified_files: if f not in modified_files: modified_files[f] = [] modified_files[f].append(i) # Check dependencies dependency_cache = _load_dict(config.path_cache) pool = multiprocessing.pool.ThreadPool(processes=4) try: for filename, indices in modified_files.iteritems(): # If one of patches is a binary patch, then we cannot / won't verify it - require dependencies in this case if contains_binary_patch(all_patches, indices, filename): if not causal_time_relation_any(all_patches, indices): raise PatchUpdaterError("Because of binary patch modifying file %s the following patches need explicit dependencies: %s" % (filename, ", ".join([all_patches[i].name for i in indices]))) continue original_content = get_wine_file(filename) original_hash = _sha256(original_content) selected_patches = select_patches(all_patches, indices, filename) # Generate a unique id based on the original content, the selected patches # and the dependency information. Since this information only has to be compared # we can throw it into a single hash. m = hashlib.sha256() m.update(original_hash) for i in indices: m.update("P%s" % selected_patches[i][0]) for j in indices: if causal_time_smaller(all_patches[j].verify_time, all_patches[i].verify_time): m.update("D%s" % selected_patches[j][0]) unique_hash = m.digest() # Skip checks if it matches the information from the cache try: if dependency_cache[filename] == unique_hash: continue except KeyError: pass # Show a progress bar while applying the patches - this task might take some time chunk_size = 20 with progressbar.ProgressBar(desc=filename, total=2 ** len(indices) / chunk_size) as progress: def test_apply(current): set_apply = [(i, all_patches[i]) for i in current] set_skip = [(i, all_patches[i]) for i in indices if i not in current] # Check if there is any patch2 which depends directly or indirectly on patch1. # If this is the case we found an impossible situation, we can be skipped in this test. for i, patch1 in set_apply: for j, patch2 in set_skip: if causal_time_smaller(patch2.verify_time, patch1.verify_time): return None # we can skip this test try: original = original_content for i, _ in set_apply: original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0) except patchutils.PatchApplyError: return current return None # everything is fine def test_apply_seq(current_list): for current in current_list: failed = test_apply(current) if failed is not None: return failed return None iterables = [] for i in xrange(0, len(indices) + 1): iterables.append(itertools.combinations(indices, i)) it = _split_seq(itertools.chain(*iterables), chunk_size) for k, failed in enumerate(pool.imap_unordered(test_apply_seq, it)): if failed is not None: progress.finish("<failed to apply>") raise PatchUpdaterError("Changes to file %s don't apply: %s" % (filename, ", ".join([all_patches[i].name for i in failed]))) progress.update(k) # Update the dependency cache dependency_cache[filename] = unique_hash finally: pool.close() _save_dict(config.path_cache, dependency_cache) # Generate code for helper functions lines = [] lines.append("# Enable or disable all patchsets\n") lines.append("patch_enable_all ()\n") lines.append("{\n") for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name): if patch.is_category: continue patch.variable = "enable_%s" % patch.name.replace("-","_").replace(".","_") lines.append("\t%s=\"$1\"\n" % patch.variable) lines.append("}\n") lines.append("\n") lines.append("# Enable or disable all categories\n") lines.append("category_enable_all ()\n") lines.append("{\n") for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name): if not patch.is_category: continue patch.variable = "enable_%s" % patch.name.replace("-","_").replace(".","_") lines.append("\t%s=\"$1\"\n" % patch.variable) lines.append("}\n") lines.append("\n") lines.append("# Enable or disable a specific patchset/category\n") lines.append("patch_enable ()\n") lines.append("{\n") lines.append("\tcase \"$1\" in\n") for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name): lines.append("\t\t%s)\n" % patch.name) lines.append("\t\t\t%s=\"$2\"\n" % patch.variable) lines.append("\t\t\t;;\n") lines.append("\t\t*)\n") lines.append("\t\t\treturn 1\n") lines.append("\t\t\t;;\n") lines.append("\tesac\n") lines.append("\treturn 0\n") lines.append("}\n") lines_helpers = lines # Generate code for dependency resolver lines = [] for i, patch in [(i, all_patches[i]) for i in reversed(resolved)]: if len(patch.depends): lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable) for j in sorted(patch.depends): lines.append("\tif test \"$%s\" -gt 1; then\n" % all_patches[j].variable) lines.append("\t\tabort \"Patchset %s disabled, but %s depends on that.\"\n" % (all_patches[j].name, patch.name)) lines.append("\tfi\n") for j in sorted(patch.depends): lines.append("\t%s=1\n" % all_patches[j].variable) lines.append("fi\n\n") lines_resolver = lines # Generate code for applying all patchsets lines = [] for i, patch in [(i, all_patches[i]) for i in resolved]: # Categories do not have any files associated, so just skip over if len(patch.files) == 0: continue lines.append("# Patchset %s\n" % patch.name) lines.append("# |\n") # List all bugs fixed by this patchset if any([bugid is not None for bugid, bugname in patch.fixes]): lines.append("# | This patchset fixes the following Wine bugs:\n") for bugid, bugname in patch.fixes: if bugid is not None: lines.append("# | *\t%s\n" % "\n# | \t".join(textwrap.wrap("[#%d] %s" % (bugid, bugname), 120))) lines.append("# |\n") # List all modified files lines.append("# | Modified files:\n") lines.append("# | *\t%s\n" % "\n# | \t".join(textwrap.wrap(", ".join(sorted(patch.modified_files)), 120))) lines.append("# |\n") lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable) for f in patch.files: lines.append("\tpatch_apply %s\n" % os.path.join(patch.name, f)) if len(patch.patches): lines.append("\t(\n") for p in _unique(patch.patches, key=lambda p: (p.patch_author, p.patch_subject, p.patch_revision)): lines.append("\t\techo '+ { \"%s\", \"%s\", %d },';\n" % (_escape(p.patch_author), _escape(p.patch_subject), p.patch_revision)) lines.append("\t) >> \"$patchlist\"\n") lines.append("fi\n\n") lines_apply = lines with open(config.path_template_script) as template_fp: template = template_fp.read() with open(config.path_script, "w") as fp: fp.write(template.format(latest_staging_version=_latest_staging_version(), latest_wine_commit=latest_wine_commit, patch_helpers="".join(lines_helpers).rstrip("\n"), patch_resolver="".join(lines_resolver).rstrip("\n"), patch_apply="".join(lines_apply).rstrip("\n"))) # Add changes to git subprocess.call(["git", "add", config.path_script])
def iter_bucket(bucket_name, prefix='', accept_key=None, key_limit=None, workers=16, retries=3): """ Iterate and download all S3 objects under `s3://bucket_name/prefix`. Parameters ---------- bucket_name: str The name of the bucket. prefix: str, optional Limits the iteration to keys starting wit the prefix. accept_key: callable, optional This is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded. The default behavior is to accept all keys. key_limit: int, optional If specified, the iterator will stop after yielding this many results. workers: int, optional The number of subprocesses to use. retries: int, optional The number of time to retry a failed download. Yields ------ str The full key name (does not include the bucket name). bytes The full contents of the key. Notes ----- The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus _MULTIPROCESSING is False, this parameter will be ignored. Examples -------- >>> # get all JSON files under "mybucket/foo/" >>> for key, content in iter_bucket(bucket_name, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in iter_bucket(bucket_name, key_limit=10000, workers=32): ... print key, len(content) """ if accept_key is None: accept_key = lambda key: True # # If people insist on giving us bucket instances, silently extract the name # before moving on. Works for boto3 as well as boto. # try: bucket_name = bucket_name.name except AttributeError: pass total_size, key_no = 0, -1 key_iterator = _list_bucket(bucket_name, prefix=prefix, accept_key=accept_key) download_key = functools.partial(_download_key, bucket_name=bucket_name, retries=retries) with _create_process_pool(processes=workers) as pool: result_iterator = pool.imap_unordered(download_key, key_iterator) for key_no, (key, content) in enumerate(result_iterator): if True or key_no % 1000 == 0: logger.info( "yielding key #%i: %s, size %i (total %.1fMB)", key_no, key, len(content), total_size / 1024.0 ** 2 ) yield key, content total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def run_jstestall(options): """Run the entire Javascript test suite. This function constructs TestControllers and runs them in subprocesses. Parameters ---------- All parameters are passed as attributes of the options object. testgroups : list of str Run only these sections of the test suite. If empty, run all the available sections. fast : int or None Run the test suite in parallel, using n simultaneous processes. If None is passed, one process is used per CPU core. Default 1 (i.e. sequential) inc_slow : bool Include slow tests. By default, these tests aren't run. slimerjs : bool Use slimerjs if it's installed instead of phantomjs for casperjs tests. url : unicode Address:port to use when running the JS tests. xunit : bool Produce Xunit XML output. This is written to multiple foo.xunit.xml files. extra_args : list Extra arguments to pass to the test subprocesses, e.g. '-v' """ to_run, not_run = prepare_controllers(options) def justify(ltext, rtext, width=70, fill='-'): ltext += ' ' rtext = (' ' + rtext).rjust(width - len(ltext), fill) return ltext + rtext # Run all test runners, tracking execution time failed = [] t_start = time.time() print() if options.fast == 1: # This actually means sequential, i.e. with 1 job for controller in to_run: print('Test group:', controller.section) sys.stdout.flush() # Show in correct order when output is piped controller, res = do_run(controller, buffer_output=False) if res: failed.append(controller) if res == -signal.SIGINT: print("Interrupted") break print() else: # Run tests concurrently try: pool = multiprocessing.pool.ThreadPool(options.fast) for (controller, res) in pool.imap_unordered(do_run, to_run): res_string = 'OK' if res == 0 else 'FAILED' print(justify('Test group: ' + controller.section, res_string)) if res: controller.print_extra_info() print(bytes_to_str(controller.stdout)) failed.append(controller) if res == -signal.SIGINT: print("Interrupted") break except KeyboardInterrupt: return for controller in not_run: print(justify('Test group: ' + controller.section, 'NOT RUN')) t_end = time.time() t_tests = t_end - t_start nrunners = len(to_run) nfail = len(failed) # summarize results print('_'*70) print('Test suite completed for system with the following information:') print(report()) took = "Took %.3fs." % t_tests print('Status: ', end='') if not failed: print('OK (%d test groups).' % nrunners, took) else: # If anything went wrong, point out what command to rerun manually to # see the actual errors and individual summary failed_sections = [c.section for c in failed] print('ERROR - {} out of {} test groups failed ({}).'.format(nfail, nrunners, ', '.join(failed_sections)), took) print() print('You may wish to rerun these, with:') print(' python -m notebook.jstest', *failed_sections) print() if failed: # Ensure that our exit code indicates failure sys.exit(1)
if len(gold) > 0: matthews_score = matthews_corrcoef(gold, pred) if within_tree == 0 or nb_paths == 0: return (root, -5, -5, -5, -5) return (root, outside_edges/len(tree_nodes), one_neg_edges/within_tree, matthews_score, total_path_length/nb_paths) def tree_score(inside_edges, outside_edges): return inside_edges - outside_edges def merge_trees(list_of_tree): list_of_tree = sorted(list_of_tree, key=lambda x: x[1]) def cbfs(root): return consistent_bfs(ADJACENCY, EDGE_SIGNS, root) if __name__ == '__main__': # pylint: disable=C0103 import persistent as p from multiprocessing import pool import random pool = pool.Pool(13) roots = random.sample(list(ADJACENCY.keys()), 10000) res = pool.imap_unordered(cbfs, roots, chunksize=len(roots)//13) pool.close() pool.join() p.save_var('cbfs_val.my', list(res))
def generate_apply_order(all_patches, skip_checks=False): """Resolve dependencies, and afterwards check if everything applies properly.""" depends = sorted([i for i, patch in all_patches.iteritems() if not patch.disabled]) resolved = resolve_dependencies(all_patches, depends=depends) max_patches = max(resolved) + 1 if skip_checks: return resolved # Generate timestamps based on dependencies, still required for binary patches # Find out which files are modified by multiple patches modified_files = {} for i, patch in [(i, all_patches[i]) for i in resolved]: patch.verify_time = [0]*max_patches patch.verify_time[i] += 1 for j in patch.depends: patch.verify_time = causal_time_combine(patch.verify_time, all_patches[j].verify_time) for f in patch.modified_files: if f not in modified_files: modified_files[f] = [] modified_files[f].append(i) # Check dependencies dependency_cache = _load_dict(config.path_cache) pool = multiprocessing.pool.ThreadPool(processes=4) try: for filename, indices in modified_files.iteritems(): # If one of patches is a binary patch, then we cannot / won't verify it - require dependencies in this case if contains_binary_patch(all_patches, indices, filename): if not causal_time_relation_any(all_patches, indices): raise PatchUpdaterError("Because of binary patch modifying file %s the following patches need explicit dependencies: %s" % (filename, ", ".join([all_patches[i].name for i in indices]))) continue original_content = get_wine_file(filename) original_hash = _sha256(original_content) selected_patches = select_patches(all_patches, indices, filename) # Generate a unique id based on the original content, the selected patches # and the dependency information. Since this information only has to be compared # we can throw it into a single hash. m = hashlib.sha256() m.update(original_hash) for i in indices: m.update("P%s" % selected_patches[i][0]) for j in indices: if causal_time_smaller(all_patches[j].verify_time, all_patches[i].verify_time): m.update("D%s" % selected_patches[j][0]) unique_hash = m.digest() # Skip checks if it matches the information from the cache # For backwards compatibility, convert string entries to list if dependency_cache.has_key(filename): if not isinstance(dependency_cache[filename], list): dependency_cache[filename] = [dependency_cache[filename]] if unique_hash in dependency_cache[filename]: dependency_cache[filename].append(unique_hash) dependency_cache[filename].remove(unique_hash) continue # Show a progress bar while applying the patches - this task might take some time chunk_size = 20 with progressbar.ProgressBar(desc=filename, total=2 ** len(indices) / chunk_size) as progress: def test_apply(current): set_apply = [(i, all_patches[i]) for i in current] set_skip = [(i, all_patches[i]) for i in indices if i not in current] # Check if there is any patch2 which depends directly or indirectly on patch1. # If this is the case we found an impossible situation, we can be skipped in this test. for i, patch1 in set_apply: for j, patch2 in set_skip: if causal_time_smaller(patch2.verify_time, patch1.verify_time): return True # we can skip this test try: original = original_content for i, _ in set_apply: original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0) except patchutils.PatchApplyError: return False return True # everything is fine def test_apply_seq(current_list): for current in current_list: if not test_apply(current): return current return None iterables = [] for i in xrange(0, len(indices) + 1): iterables.append(itertools.combinations(indices, i)) it = _split_seq(itertools.chain(*iterables), chunk_size) for k, failed in enumerate(pool.imap_unordered(test_apply_seq, it)): if failed is not None: progress.finish("<failed to apply>") raise PatchUpdaterError("Changes to file %s don't apply: %s" % (filename, ", ".join([all_patches[i].name for i in failed]))) progress.update(k) # Update the dependency cache, store max 10 entries per file if not dependency_cache.has_key(filename): dependency_cache[filename] = [] dependency_cache[filename].append(unique_hash) dependency_cache[filename] = dependency_cache[filename][-10:] # Delete outdated cache information for filename in dependency_cache.keys(): if not modified_files.has_key(filename): del dependency_cache[filename] finally: pool.close() _save_dict(config.path_cache, dependency_cache) return resolved
def generate_script(all_patches): """Resolve dependencies, and afterwards check if everything applies properly.""" depends = sorted([i for i, patch in all_patches.iteritems() if not patch.disabled]) resolved = resolve_dependencies(all_patches, depends=depends) max_patches = max(resolved) + 1 # Generate timestamps based on dependencies, still required for binary patches # Find out which files are modified by multiple patches modified_files = {} for i, patch in [(i, all_patches[i]) for i in resolved]: patch.verify_time = [0]*max_patches patch.verify_time[i] += 1 for j in patch.depends: patch.verify_time = causal_time_combine(patch.verify_time, all_patches[j].verify_time) for f in patch.modified_files: if f not in modified_files: modified_files[f] = [] modified_files[f].append(i) # Check dependencies pool = multiprocessing.pool.ThreadPool(processes=4) try: # Checking all dependencies takes a very long time, so to improve development speed, # run a first quick check with all patches enabled. with progressbar.ProgressBar(desc="pre-check ...", total=len(modified_files)) as progress: for k, (filename, indices) in enumerate(modified_files.iteritems()): # If one of patches is a binary patch, then we cannot / won't verify it - require dependencies in this case if contains_binary_patch(all_patches, indices, filename): if not causal_time_relation_any(all_patches, indices): raise PatchUpdaterError("Because of binary patch modifying file %s the following patches need explicit dependencies: %s" % (filename, ", ".join([all_patches[i].name for i in indices]))) continue original = get_wine_file(filename) selected_patches = select_patches(all_patches, indices, filename) set_apply = [(i, all_patches[i]) for i in indices] try: for i, patch in set_apply: original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0) except patchutils.PatchApplyError: progress.finish("<failed to apply>") raise PatchUpdaterError("Changes to file %s don't apply: %s" % (filename, ", ".join([all_patches[i].name for i in indices]))) progress.update(k) # More detailed checks, required to make sure that dependencies are set correctly for filename, indices in modified_files.iteritems(): if contains_binary_patch(all_patches, indices, filename): continue original_content = get_wine_file(filename) selected_patches = select_patches(all_patches, indices, filename) # Show a progress bar while applying the patches - this task might take some time chunk_size = 20 with progressbar.ProgressBar(desc=filename, total=2 ** len(indices) / chunk_size) as progress: def test_apply(bitstring): set_apply = [(i, all_patches[i]) for u, i in zip(bitstring, indices) if u] set_skip = [(i, all_patches[i]) for u, i in zip(bitstring, indices) if not u] # Check if there is any patch2 which depends directly or indirectly on patch1. # If this is the case we found an impossible situation, we can be skipped in this test. for i, patch1 in set_apply: for j, patch2 in set_skip: if causal_time_smaller(patch2.verify_time, patch1.verify_time): return True # we can skip this test try: original = original_content for i, patch in set_apply: original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0) except patchutils.PatchApplyError: return False return True # everything is fine def test_apply_seq(bitstrings): for bitstring in bitstrings: if not test_apply(bitstring): return False return True it = _split_seq(itertools.product([0,1], repeat=len(indices)), chunk_size) for k, res in enumerate(pool.imap_unordered(test_apply_seq, it)): if not res: progress.finish("<failed to apply>") raise PatchUpdaterError("Changes to file %s don't apply: %s" % (filename, ", ".join([all_patches[i].name for i in indices]))) progress.update(k) finally: pool.close() # Generate code for helper functions lines = [] lines.append("# Enable or disable all patchsets\n") lines.append("patch_enable_all ()\n") lines.append("{\n") for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name): patch.variable = "enable_%s" % patch.name.replace("-","_").replace(".","_") lines.append("\t%s=\"$1\"\n" % patch.variable) lines.append("}\n") lines.append("\n") lines.append("# Enable or disable a specific patchset\n") lines.append("patch_enable ()\n") lines.append("{\n") lines.append("\tcase \"$1\" in\n") for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name): lines.append("\t\t%s)\n" % patch.name) lines.append("\t\t\t%s=\"$2\"\n" % patch.variable) lines.append("\t\t\t;;\n") lines.append("\t\t*)\n") lines.append("\t\t\treturn 1\n") lines.append("\t\t\t;;\n") lines.append("\tesac\n") lines.append("\treturn 0\n") lines.append("}\n") lines_helpers = lines # Generate code for dependency resolver lines = [] for i, patch in [(i, all_patches[i]) for i in reversed(resolved)]: if len(patch.depends): lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable) for j in sorted(patch.depends): lines.append("\tif test \"$%s\" -gt 1; then\n" % all_patches[j].variable) lines.append("\t\tabort \"Patchset %s disabled, but %s depends on that.\"\n" % (all_patches[j].name, patch.name)) lines.append("\tfi\n") for j in sorted(patch.depends): lines.append("\t%s=1\n" % all_patches[j].variable) lines.append("fi\n\n") lines_resolver = lines # Generate code for applying all patchsets lines = [] for i, patch in [(i, all_patches[i]) for i in resolved]: lines.append("# Patchset %s\n" % patch.name) lines.append("# |\n") # List all bugs fixed by this patchset if any([bugid is not None for bugid, bugname in patch.fixes]): lines.append("# | This patchset fixes the following Wine bugs:\n") for bugid, bugname in patch.fixes: if bugid is not None: lines.append("# | *\t%s\n" % "\n# | \t".join(textwrap.wrap("[#%d] %s" % (bugid, bugname), 120))) lines.append("# |\n") # List all modified files lines.append("# | Modified files:\n") lines.append("# | *\t%s\n" % "\n# | \t".join(textwrap.wrap(", ".join(sorted(patch.modified_files)), 120))) lines.append("# |\n") lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable) for f in patch.files: lines.append("\tpatch_apply %s\n" % os.path.join(patch.name, f)) if len(patch.patches): lines.append("\t(\n") for p in _unique(patch.patches, key=lambda p: (p.patch_author, p.patch_subject, p.patch_revision)): lines.append("\t\techo '+ { \"%s\", \"%s\", %d },';\n" % (_escape(p.patch_author), _escape(p.patch_subject), p.patch_revision)) lines.append("\t) >> \"$patchlist\"\n") lines.append("fi\n\n") lines_apply = lines with open(config.path_template_script) as template_fp: template = template_fp.read() with open(config.path_script, "w") as fp: fp.write(template.format(patch_helpers="".join(lines_helpers).rstrip("\n"), patch_resolver="".join(lines_resolver).rstrip("\n"), patch_apply="".join(lines_apply).rstrip("\n"))) # Add changes to git subprocess.call(["git", "add", config.path_script])